Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'kvm-arm-for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into kvm-next

Changes for KVM for arm/arm64 for 3.18

This includes a bunch of changes:
- Support read-only memory slots on arm/arm64
- Various changes to fix Sparse warnings
- Correctly detect write vs. read Stage-2 faults
- Various VGIC cleanups and fixes
- Dynamic VGIC data strcuture sizing
- Fix SGI set_clear_pend offset bug
- Fix VTTBR_BADDR Mask
- Correctly report the FSC on Stage-2 faults

Conflicts:
virt/kvm/eventfd.c
[duplicate, different patch where the kvm-arm version broke x86.
The kvm tree instead has the right one]

+677 -214
+10
Documentation/virtual/kvm/devices/arm-vgic.txt
··· 71 71 Errors: 72 72 -ENODEV: Getting or setting this register is not yet supported 73 73 -EBUSY: One or more VCPUs are running 74 + 75 + KVM_DEV_ARM_VGIC_GRP_NR_IRQS 76 + Attributes: 77 + A value describing the number of interrupts (SGI, PPI and SPI) for 78 + this GIC instance, ranging from 64 to 1024, in increments of 32. 79 + 80 + Errors: 81 + -EINVAL: Value set is out of the expected range 82 + -EBUSY: Value has already be set, or GIC has already been initialized 83 + with default values.
+5
arch/arm/include/asm/kvm_emulate.h
··· 149 149 150 150 static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu) 151 151 { 152 + return kvm_vcpu_get_hsr(vcpu) & HSR_FSC; 153 + } 154 + 155 + static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu) 156 + { 152 157 return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE; 153 158 } 154 159
+1 -1
arch/arm/include/asm/kvm_host.h
··· 43 43 #include <kvm/arm_vgic.h> 44 44 45 45 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); 46 - int kvm_target_cpu(void); 46 + int __attribute_const__ kvm_target_cpu(void); 47 47 int kvm_reset_vcpu(struct kvm_vcpu *vcpu); 48 48 void kvm_reset_coprocs(struct kvm_vcpu *vcpu); 49 49
-11
arch/arm/include/asm/kvm_mmu.h
··· 78 78 flush_pmd_entry(pte); 79 79 } 80 80 81 - static inline bool kvm_is_write_fault(unsigned long hsr) 82 - { 83 - unsigned long hsr_ec = hsr >> HSR_EC_SHIFT; 84 - if (hsr_ec == HSR_EC_IABT) 85 - return false; 86 - else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR)) 87 - return false; 88 - else 89 - return true; 90 - } 91 - 92 81 static inline void kvm_clean_pgd(pgd_t *pgd) 93 82 { 94 83 clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
+2
arch/arm/include/uapi/asm/kvm.h
··· 25 25 26 26 #define __KVM_HAVE_GUEST_DEBUG 27 27 #define __KVM_HAVE_IRQ_LINE 28 + #define __KVM_HAVE_READONLY_MEM 28 29 29 30 #define KVM_REG_SIZE(id) \ 30 31 (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) ··· 174 173 #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) 175 174 #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 176 175 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) 176 + #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 177 177 178 178 /* KVM_IRQ_LINE irq field index values */ 179 179 #define KVM_ARM_IRQ_TYPE_SHIFT 24
+7 -10
arch/arm/kvm/arm.c
··· 82 82 /** 83 83 * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus. 84 84 */ 85 - struct kvm_vcpu __percpu **kvm_get_running_vcpus(void) 85 + struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) 86 86 { 87 87 return &kvm_arm_running_vcpu; 88 88 } ··· 161 161 kvm->vcpus[i] = NULL; 162 162 } 163 163 } 164 + 165 + kvm_vgic_destroy(kvm); 164 166 } 165 167 166 168 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ··· 179 177 case KVM_CAP_ONE_REG: 180 178 case KVM_CAP_ARM_PSCI: 181 179 case KVM_CAP_ARM_PSCI_0_2: 180 + case KVM_CAP_READONLY_MEM: 182 181 r = 1; 183 182 break; 184 183 case KVM_CAP_COALESCED_MMIO: ··· 245 242 { 246 243 kvm_mmu_free_memory_caches(vcpu); 247 244 kvm_timer_vcpu_terminate(vcpu); 245 + kvm_vgic_vcpu_destroy(vcpu); 248 246 kmem_cache_free(kvm_vcpu_cache, vcpu); 249 247 } 250 248 ··· 261 257 262 258 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 263 259 { 264 - int ret; 265 - 266 260 /* Force users to call KVM_ARM_VCPU_INIT */ 267 261 vcpu->arch.target = -1; 268 - 269 - /* Set up VGIC */ 270 - ret = kvm_vgic_vcpu_init(vcpu); 271 - if (ret) 272 - return ret; 273 262 274 263 /* Set up the timer */ 275 264 kvm_timer_vcpu_init(vcpu); ··· 410 413 411 414 /* update vttbr to be used with the new vmid */ 412 415 pgd_phys = virt_to_phys(kvm->arch.pgd); 416 + BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); 413 417 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; 414 - kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK; 415 - kvm->arch.vttbr |= vmid; 418 + kvm->arch.vttbr = pgd_phys | vmid; 416 419 417 420 spin_unlock(&kvm_vmid_lock); 418 421 }
+1 -1
arch/arm/kvm/coproc.c
··· 791 791 u32 level, ctype; 792 792 793 793 if (val >= CSSELR_MAX) 794 - return -ENOENT; 794 + return false; 795 795 796 796 /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ 797 797 level = (val >> 1);
+1 -1
arch/arm/kvm/guest.c
··· 163 163 164 164 ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); 165 165 if (ret != 0) 166 - return ret; 166 + return -EFAULT; 167 167 168 168 return kvm_arm_timer_set_reg(vcpu, reg->id, val); 169 169 }
+22 -18
arch/arm/kvm/mmu.c
··· 746 746 return false; 747 747 } 748 748 749 + static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) 750 + { 751 + if (kvm_vcpu_trap_is_iabt(vcpu)) 752 + return false; 753 + 754 + return kvm_vcpu_dabt_iswrite(vcpu); 755 + } 756 + 749 757 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 750 - struct kvm_memory_slot *memslot, 758 + struct kvm_memory_slot *memslot, unsigned long hva, 751 759 unsigned long fault_status) 752 760 { 753 761 int ret; 754 762 bool write_fault, writable, hugetlb = false, force_pte = false; 755 763 unsigned long mmu_seq; 756 764 gfn_t gfn = fault_ipa >> PAGE_SHIFT; 757 - unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); 758 765 struct kvm *kvm = vcpu->kvm; 759 766 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 760 767 struct vm_area_struct *vma; 761 768 pfn_t pfn; 762 769 pgprot_t mem_type = PAGE_S2; 763 770 764 - write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); 771 + write_fault = kvm_is_write_fault(vcpu); 765 772 if (fault_status == FSC_PERM && !write_fault) { 766 773 kvm_err("Unexpected L2 read permission error\n"); 767 774 return -EFAULT; ··· 870 863 unsigned long fault_status; 871 864 phys_addr_t fault_ipa; 872 865 struct kvm_memory_slot *memslot; 873 - bool is_iabt; 866 + unsigned long hva; 867 + bool is_iabt, write_fault, writable; 874 868 gfn_t gfn; 875 869 int ret, idx; 876 870 ··· 882 874 kvm_vcpu_get_hfar(vcpu), fault_ipa); 883 875 884 876 /* Check the stage-2 fault is trans. fault or write fault */ 885 - fault_status = kvm_vcpu_trap_get_fault(vcpu); 877 + fault_status = kvm_vcpu_trap_get_fault_type(vcpu); 886 878 if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { 887 - kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n", 888 - kvm_vcpu_trap_get_class(vcpu), fault_status); 879 + kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", 880 + kvm_vcpu_trap_get_class(vcpu), 881 + (unsigned long)kvm_vcpu_trap_get_fault(vcpu), 882 + (unsigned long)kvm_vcpu_get_hsr(vcpu)); 889 883 return -EFAULT; 890 884 } 891 885 892 886 idx = srcu_read_lock(&vcpu->kvm->srcu); 893 887 894 888 gfn = fault_ipa >> PAGE_SHIFT; 895 - if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { 889 + memslot = gfn_to_memslot(vcpu->kvm, gfn); 890 + hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); 891 + write_fault = kvm_is_write_fault(vcpu); 892 + if (kvm_is_error_hva(hva) || (write_fault && !writable)) { 896 893 if (is_iabt) { 897 894 /* Prefetch Abort on I/O address */ 898 895 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); 899 896 ret = 1; 900 - goto out_unlock; 901 - } 902 - 903 - if (fault_status != FSC_FAULT) { 904 - kvm_err("Unsupported fault status on io memory: %#lx\n", 905 - fault_status); 906 - ret = -EFAULT; 907 897 goto out_unlock; 908 898 } 909 899 ··· 916 910 goto out_unlock; 917 911 } 918 912 919 - memslot = gfn_to_memslot(vcpu->kvm, gfn); 920 - 921 - ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status); 913 + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); 922 914 if (ret == 0) 923 915 ret = 1; 924 916 out_unlock:
+12 -1
arch/arm64/include/asm/kvm_arm.h
··· 122 122 #define VTCR_EL2_T0SZ_MASK 0x3f 123 123 #define VTCR_EL2_T0SZ_40B 24 124 124 125 + /* 126 + * We configure the Stage-2 page tables to always restrict the IPA space to be 127 + * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are 128 + * not known to exist and will break with this configuration. 129 + * 130 + * Note that when using 4K pages, we concatenate two first level page tables 131 + * together. 132 + * 133 + * The magic numbers used for VTTBR_X in this patch can be found in Tables 134 + * D4-23 and D4-25 in ARM DDI 0487A.b. 135 + */ 125 136 #ifdef CONFIG_ARM64_64K_PAGES 126 137 /* 127 138 * Stage2 translation configuration: ··· 160 149 #endif 161 150 162 151 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) 163 - #define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) 152 + #define VTTBR_BADDR_MASK (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) 164 153 #define VTTBR_VMID_SHIFT (48LLU) 165 154 #define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) 166 155
+5
arch/arm64/include/asm/kvm_emulate.h
··· 174 174 175 175 static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) 176 176 { 177 + return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC; 178 + } 179 + 180 + static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) 181 + { 177 182 return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; 178 183 } 179 184
+2 -2
arch/arm64/include/asm/kvm_host.h
··· 43 43 44 44 #define KVM_VCPU_MAX_FEATURES 3 45 45 46 - int kvm_target_cpu(void); 46 + int __attribute_const__ kvm_target_cpu(void); 47 47 int kvm_reset_vcpu(struct kvm_vcpu *vcpu); 48 48 int kvm_arch_dev_ioctl_check_extension(long ext); 49 49 ··· 197 197 } 198 198 199 199 struct kvm_vcpu *kvm_arm_get_running_vcpu(void); 200 - struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); 200 + struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); 201 201 202 202 u64 kvm_call_hyp(void *hypfn, ...); 203 203
+2 -16
arch/arm64/include/asm/kvm_mmu.h
··· 59 59 #define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET) 60 60 61 61 /* 62 - * Align KVM with the kernel's view of physical memory. Should be 63 - * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration. 62 + * We currently only support a 40bit IPA. 64 63 */ 65 - #define KVM_PHYS_SHIFT PHYS_MASK_SHIFT 64 + #define KVM_PHYS_SHIFT (40) 66 65 #define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) 67 66 #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) 68 67 ··· 91 92 92 93 #define kvm_set_pte(ptep, pte) set_pte(ptep, pte) 93 94 #define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) 94 - 95 - static inline bool kvm_is_write_fault(unsigned long esr) 96 - { 97 - unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT; 98 - 99 - if (esr_ec == ESR_EL2_EC_IABT) 100 - return false; 101 - 102 - if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR)) 103 - return false; 104 - 105 - return true; 106 - } 107 95 108 96 static inline void kvm_clean_pgd(pgd_t *pgd) {} 109 97 static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
+2
arch/arm64/include/uapi/asm/kvm.h
··· 37 37 38 38 #define __KVM_HAVE_GUEST_DEBUG 39 39 #define __KVM_HAVE_IRQ_LINE 40 + #define __KVM_HAVE_READONLY_MEM 40 41 41 42 #define KVM_REG_SIZE(id) \ 42 43 (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) ··· 160 159 #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) 161 160 #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 162 161 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) 162 + #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 163 163 164 164 /* KVM_IRQ_LINE irq field index values */ 165 165 #define KVM_ARM_IRQ_TYPE_SHIFT 24
+1 -1
arch/arm64/kvm/guest.c
··· 174 174 175 175 ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); 176 176 if (ret != 0) 177 - return ret; 177 + return -EFAULT; 178 178 179 179 return kvm_arm_timer_set_reg(vcpu, reg->id, val); 180 180 }
+1 -1
arch/arm64/kvm/sys_regs.c
··· 1218 1218 u32 level, ctype; 1219 1219 1220 1220 if (val >= CSSELR_MAX) 1221 - return -ENOENT; 1221 + return false; 1222 1222 1223 1223 /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ 1224 1224 level = (val >> 1);
+81 -29
include/kvm/arm_vgic.h
··· 25 25 #include <linux/spinlock.h> 26 26 #include <linux/types.h> 27 27 28 - #define VGIC_NR_IRQS 256 28 + #define VGIC_NR_IRQS_LEGACY 256 29 29 #define VGIC_NR_SGIS 16 30 30 #define VGIC_NR_PPIS 16 31 31 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) 32 - #define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) 33 - #define VGIC_MAX_CPUS KVM_MAX_VCPUS 34 32 35 33 #define VGIC_V2_MAX_LRS (1 << 6) 36 34 #define VGIC_V3_MAX_LRS 16 35 + #define VGIC_MAX_IRQS 1024 37 36 38 37 /* Sanity checks... */ 39 - #if (VGIC_MAX_CPUS > 8) 38 + #if (KVM_MAX_VCPUS > 8) 40 39 #error Invalid number of CPU interfaces 41 40 #endif 42 41 43 - #if (VGIC_NR_IRQS & 31) 42 + #if (VGIC_NR_IRQS_LEGACY & 31) 44 43 #error "VGIC_NR_IRQS must be a multiple of 32" 45 44 #endif 46 45 47 - #if (VGIC_NR_IRQS > 1024) 46 + #if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS) 48 47 #error "VGIC_NR_IRQS must be <= 1024" 49 48 #endif 50 49 ··· 53 54 * - a bunch of shared interrupts (SPI) 54 55 */ 55 56 struct vgic_bitmap { 56 - union { 57 - u32 reg[VGIC_NR_PRIVATE_IRQS / 32]; 58 - DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS); 59 - } percpu[VGIC_MAX_CPUS]; 60 - union { 61 - u32 reg[VGIC_NR_SHARED_IRQS / 32]; 62 - DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS); 63 - } shared; 57 + /* 58 + * - One UL per VCPU for private interrupts (assumes UL is at 59 + * least 32 bits) 60 + * - As many UL as necessary for shared interrupts. 61 + * 62 + * The private interrupts are accessed via the "private" 63 + * field, one UL per vcpu (the state for vcpu n is in 64 + * private[n]). The shared interrupts are accessed via the 65 + * "shared" pointer (IRQn state is at bit n-32 in the bitmap). 66 + */ 67 + unsigned long *private; 68 + unsigned long *shared; 64 69 }; 65 70 66 71 struct vgic_bytemap { 67 - u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4]; 68 - u32 shared[VGIC_NR_SHARED_IRQS / 4]; 72 + /* 73 + * - 8 u32 per VCPU for private interrupts 74 + * - As many u32 as necessary for shared interrupts. 75 + * 76 + * The private interrupts are accessed via the "private" 77 + * field, (the state for vcpu n is in private[n*8] to 78 + * private[n*8 + 7]). The shared interrupts are accessed via 79 + * the "shared" pointer (IRQn state is at byte (n-32)%4 of the 80 + * shared[(n-32)/4] word). 81 + */ 82 + u32 *private; 83 + u32 *shared; 69 84 }; 70 85 71 86 struct kvm_vcpu; ··· 140 127 bool in_kernel; 141 128 bool ready; 142 129 130 + int nr_cpus; 131 + int nr_irqs; 132 + 143 133 /* Virtual control interface mapping */ 144 134 void __iomem *vctrl_base; 145 135 ··· 156 140 /* Interrupt enabled (one bit per IRQ) */ 157 141 struct vgic_bitmap irq_enabled; 158 142 159 - /* Interrupt 'pin' level */ 160 - struct vgic_bitmap irq_state; 143 + /* Level-triggered interrupt external input is asserted */ 144 + struct vgic_bitmap irq_level; 161 145 162 - /* Level-triggered interrupt in progress */ 163 - struct vgic_bitmap irq_active; 146 + /* 147 + * Interrupt state is pending on the distributor 148 + */ 149 + struct vgic_bitmap irq_pending; 150 + 151 + /* 152 + * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered 153 + * interrupts. Essentially holds the state of the flip-flop in 154 + * Figure 4-10 on page 4-101 in ARM IHI 0048B.b. 155 + * Once set, it is only cleared for level-triggered interrupts on 156 + * guest ACKs (when we queue it) or writes to GICD_ICPENDRn. 157 + */ 158 + struct vgic_bitmap irq_soft_pend; 159 + 160 + /* Level-triggered interrupt queued on VCPU interface */ 161 + struct vgic_bitmap irq_queued; 164 162 165 163 /* Interrupt priority. Not used yet. */ 166 164 struct vgic_bytemap irq_priority; ··· 182 152 /* Level/edge triggered */ 183 153 struct vgic_bitmap irq_cfg; 184 154 185 - /* Source CPU per SGI and target CPU */ 186 - u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS]; 155 + /* 156 + * Source CPU per SGI and target CPU: 157 + * 158 + * Each byte represent a SGI observable on a VCPU, each bit of 159 + * this byte indicating if the corresponding VCPU has 160 + * generated this interrupt. This is a GICv2 feature only. 161 + * 162 + * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are 163 + * the SGIs observable on VCPUn. 164 + */ 165 + u8 *irq_sgi_sources; 187 166 188 - /* Target CPU for each IRQ */ 189 - u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS]; 190 - struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS]; 167 + /* 168 + * Target CPU for each SPI: 169 + * 170 + * Array of available SPI, each byte indicating the target 171 + * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32]. 172 + */ 173 + u8 *irq_spi_cpu; 174 + 175 + /* 176 + * Reverse lookup of irq_spi_cpu for faster compute pending: 177 + * 178 + * Array of bitmaps, one per VCPU, describing if IRQn is 179 + * routed to a particular VCPU. 180 + */ 181 + struct vgic_bitmap *irq_spi_target; 191 182 192 183 /* Bitmap indicating which CPU has something pending */ 193 - unsigned long irq_pending_on_cpu; 184 + unsigned long *irq_pending_on_cpu; 194 185 #endif 195 186 }; 196 187 ··· 241 190 struct vgic_cpu { 242 191 #ifdef CONFIG_KVM_ARM_VGIC 243 192 /* per IRQ to LR mapping */ 244 - u8 vgic_irq_lr_map[VGIC_NR_IRQS]; 193 + u8 *vgic_irq_lr_map; 245 194 246 195 /* Pending interrupts on this VCPU */ 247 196 DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); 248 - DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); 197 + unsigned long *pending_shared; 249 198 250 199 /* Bitmap of used/free list registers */ 251 200 DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); ··· 276 225 int kvm_vgic_hyp_init(void); 277 226 int kvm_vgic_init(struct kvm *kvm); 278 227 int kvm_vgic_create(struct kvm *kvm); 279 - int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); 228 + void kvm_vgic_destroy(struct kvm *kvm); 229 + void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); 280 230 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); 281 231 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); 282 232 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
+2
include/linux/kvm_host.h
··· 536 536 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); 537 537 unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); 538 538 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); 539 + unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, 540 + bool *writable); 539 541 void kvm_release_page_clean(struct page *page); 540 542 void kvm_release_page_dirty(struct page *page); 541 543 void kvm_set_page_accessed(struct page *page);
+511 -120
virt/kvm/arm/vgic.c
··· 36 36 * How the whole thing works (courtesy of Christoffer Dall): 37 37 * 38 38 * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if 39 - * something is pending 40 - * - VGIC pending interrupts are stored on the vgic.irq_state vgic 41 - * bitmap (this bitmap is updated by both user land ioctls and guest 42 - * mmio ops, and other in-kernel peripherals such as the 43 - * arch. timers) and indicate the 'wire' state. 39 + * something is pending on the CPU interface. 40 + * - Interrupts that are pending on the distributor are stored on the 41 + * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land 42 + * ioctls and guest mmio ops, and other in-kernel peripherals such as the 43 + * arch. timers). 44 44 * - Every time the bitmap changes, the irq_pending_on_cpu oracle is 45 45 * recalculated 46 46 * - To calculate the oracle, we need info for each cpu from 47 47 * compute_pending_for_cpu, which considers: 48 - * - PPI: dist->irq_state & dist->irq_enable 49 - * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target 50 - * - irq_spi_target is a 'formatted' version of the GICD_ICFGR 48 + * - PPI: dist->irq_pending & dist->irq_enable 49 + * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target 50 + * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn 51 51 * registers, stored on each vcpu. We only keep one bit of 52 52 * information per interrupt, making sure that only one vcpu can 53 53 * accept the interrupt. 54 + * - If any of the above state changes, we must recalculate the oracle. 54 55 * - The same is true when injecting an interrupt, except that we only 55 56 * consider a single interrupt at a time. The irq_spi_cpu array 56 57 * contains the target CPU for each SPI. ··· 61 60 * the 'line' again. This is achieved as such: 62 61 * 63 62 * - When a level interrupt is moved onto a vcpu, the corresponding 64 - * bit in irq_active is set. As long as this bit is set, the line 63 + * bit in irq_queued is set. As long as this bit is set, the line 65 64 * will be ignored for further interrupts. The interrupt is injected 66 65 * into the vcpu with the GICH_LR_EOI bit set (generate a 67 66 * maintenance interrupt on EOI). 68 67 * - When the interrupt is EOIed, the maintenance interrupt fires, 69 - * and clears the corresponding bit in irq_active. This allow the 68 + * and clears the corresponding bit in irq_queued. This allows the 70 69 * interrupt line to be sampled again. 70 + * - Note that level-triggered interrupts can also be set to pending from 71 + * writes to GICD_ISPENDRn and lowering the external input line does not 72 + * cause the interrupt to become inactive in such a situation. 73 + * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become 74 + * inactive as long as the external input line is held high. 71 75 */ 72 76 73 77 #define VGIC_ADDR_UNDEF (-1) ··· 95 89 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); 96 90 static void vgic_update_state(struct kvm *kvm); 97 91 static void vgic_kick_vcpus(struct kvm *kvm); 92 + static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi); 98 93 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); 99 94 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); 100 95 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); ··· 106 99 static const struct vgic_params *vgic; 107 100 108 101 /* 109 - * struct vgic_bitmap contains unions that provide two views of 110 - * the same data. In one case it is an array of registers of 111 - * u32's, and in the other case it is a bitmap of unsigned 112 - * longs. 102 + * struct vgic_bitmap contains a bitmap made of unsigned longs, but 103 + * extracts u32s out of them. 113 104 * 114 105 * This does not work on 64-bit BE systems, because the bitmap access 115 106 * will store two consecutive 32-bit words with the higher-addressed ··· 123 118 #define REG_OFFSET_SWIZZLE 0 124 119 #endif 125 120 121 + static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs) 122 + { 123 + int nr_longs; 124 + 125 + nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); 126 + 127 + b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL); 128 + if (!b->private) 129 + return -ENOMEM; 130 + 131 + b->shared = b->private + nr_cpus; 132 + 133 + return 0; 134 + } 135 + 136 + static void vgic_free_bitmap(struct vgic_bitmap *b) 137 + { 138 + kfree(b->private); 139 + b->private = NULL; 140 + b->shared = NULL; 141 + } 142 + 126 143 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, 127 144 int cpuid, u32 offset) 128 145 { 129 146 offset >>= 2; 130 147 if (!offset) 131 - return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE); 148 + return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE; 132 149 else 133 - return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE); 150 + return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE); 134 151 } 135 152 136 153 static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, 137 154 int cpuid, int irq) 138 155 { 139 156 if (irq < VGIC_NR_PRIVATE_IRQS) 140 - return test_bit(irq, x->percpu[cpuid].reg_ul); 157 + return test_bit(irq, x->private + cpuid); 141 158 142 - return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul); 159 + return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared); 143 160 } 144 161 145 162 static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, ··· 170 143 unsigned long *reg; 171 144 172 145 if (irq < VGIC_NR_PRIVATE_IRQS) { 173 - reg = x->percpu[cpuid].reg_ul; 146 + reg = x->private + cpuid; 174 147 } else { 175 - reg = x->shared.reg_ul; 148 + reg = x->shared; 176 149 irq -= VGIC_NR_PRIVATE_IRQS; 177 150 } 178 151 ··· 184 157 185 158 static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) 186 159 { 187 - if (unlikely(cpuid >= VGIC_MAX_CPUS)) 188 - return NULL; 189 - return x->percpu[cpuid].reg_ul; 160 + return x->private + cpuid; 190 161 } 191 162 192 163 static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) 193 164 { 194 - return x->shared.reg_ul; 165 + return x->shared; 166 + } 167 + 168 + static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs) 169 + { 170 + int size; 171 + 172 + size = nr_cpus * VGIC_NR_PRIVATE_IRQS; 173 + size += nr_irqs - VGIC_NR_PRIVATE_IRQS; 174 + 175 + x->private = kzalloc(size, GFP_KERNEL); 176 + if (!x->private) 177 + return -ENOMEM; 178 + 179 + x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32); 180 + return 0; 181 + } 182 + 183 + static void vgic_free_bytemap(struct vgic_bytemap *b) 184 + { 185 + kfree(b->private); 186 + b->private = NULL; 187 + b->shared = NULL; 195 188 } 196 189 197 190 static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) 198 191 { 199 - offset >>= 2; 200 - BUG_ON(offset > (VGIC_NR_IRQS / 4)); 201 - if (offset < 8) 202 - return x->percpu[cpuid] + offset; 203 - else 204 - return x->shared + offset - 8; 192 + u32 *reg; 193 + 194 + if (offset < VGIC_NR_PRIVATE_IRQS) { 195 + reg = x->private; 196 + offset += cpuid * VGIC_NR_PRIVATE_IRQS; 197 + } else { 198 + reg = x->shared; 199 + offset -= VGIC_NR_PRIVATE_IRQS; 200 + } 201 + 202 + return reg + (offset / sizeof(u32)); 205 203 } 206 204 207 205 #define VGIC_CFG_LEVEL 0 ··· 248 196 return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); 249 197 } 250 198 251 - static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) 199 + static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq) 252 200 { 253 201 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 254 202 255 - return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); 203 + return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq); 256 204 } 257 205 258 - static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) 206 + static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq) 259 207 { 260 208 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 261 209 262 - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); 210 + vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1); 263 211 } 264 212 265 - static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) 213 + static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) 266 214 { 267 215 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 268 216 269 - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); 217 + vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); 218 + } 219 + 220 + static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq) 221 + { 222 + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 223 + 224 + return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq); 225 + } 226 + 227 + static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq) 228 + { 229 + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 230 + 231 + vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1); 232 + } 233 + 234 + static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq) 235 + { 236 + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 237 + 238 + vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0); 239 + } 240 + 241 + static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq) 242 + { 243 + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 244 + 245 + return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq); 246 + } 247 + 248 + static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq) 249 + { 250 + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 251 + 252 + vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); 270 253 } 271 254 272 255 static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) 273 256 { 274 257 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 275 258 276 - return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); 259 + return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq); 277 260 } 278 261 279 - static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) 262 + static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq) 280 263 { 281 264 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 282 265 283 - vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1); 266 + vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1); 284 267 } 285 268 286 - static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq) 269 + static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq) 287 270 { 288 271 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 289 272 290 - vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0); 273 + vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0); 291 274 } 292 275 293 276 static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) ··· 341 254 else 342 255 clear_bit(irq - VGIC_NR_PRIVATE_IRQS, 343 256 vcpu->arch.vgic_cpu.pending_shared); 257 + } 258 + 259 + static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq) 260 + { 261 + return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq); 344 262 } 345 263 346 264 static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) ··· 439 347 440 348 case 4: /* GICD_TYPER */ 441 349 reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; 442 - reg |= (VGIC_NR_IRQS >> 5) - 1; 350 + reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1; 443 351 vgic_reg_access(mmio, &reg, word_offset, 444 352 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); 445 353 break; ··· 501 409 struct kvm_exit_mmio *mmio, 502 410 phys_addr_t offset) 503 411 { 504 - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, 505 - vcpu->vcpu_id, offset); 412 + u32 *reg, orig; 413 + u32 level_mask; 414 + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 415 + 416 + reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset); 417 + level_mask = (~(*reg)); 418 + 419 + /* Mark both level and edge triggered irqs as pending */ 420 + reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); 421 + orig = *reg; 506 422 vgic_reg_access(mmio, reg, offset, 507 423 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); 424 + 508 425 if (mmio->is_write) { 426 + /* Set the soft-pending flag only for level-triggered irqs */ 427 + reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, 428 + vcpu->vcpu_id, offset); 429 + vgic_reg_access(mmio, reg, offset, 430 + ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); 431 + *reg &= level_mask; 432 + 433 + /* Ignore writes to SGIs */ 434 + if (offset < 2) { 435 + *reg &= ~0xffff; 436 + *reg |= orig & 0xffff; 437 + } 438 + 509 439 vgic_update_state(vcpu->kvm); 510 440 return true; 511 441 } ··· 539 425 struct kvm_exit_mmio *mmio, 540 426 phys_addr_t offset) 541 427 { 542 - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, 543 - vcpu->vcpu_id, offset); 428 + u32 *level_active; 429 + u32 *reg, orig; 430 + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 431 + 432 + reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); 433 + orig = *reg; 544 434 vgic_reg_access(mmio, reg, offset, 545 435 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); 546 436 if (mmio->is_write) { 437 + /* Re-set level triggered level-active interrupts */ 438 + level_active = vgic_bitmap_get_reg(&dist->irq_level, 439 + vcpu->vcpu_id, offset); 440 + reg = vgic_bitmap_get_reg(&dist->irq_pending, 441 + vcpu->vcpu_id, offset); 442 + *reg |= *level_active; 443 + 444 + /* Ignore writes to SGIs */ 445 + if (offset < 2) { 446 + *reg &= ~0xffff; 447 + *reg |= orig & 0xffff; 448 + } 449 + 450 + /* Clear soft-pending flags */ 451 + reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, 452 + vcpu->vcpu_id, offset); 453 + vgic_reg_access(mmio, reg, offset, 454 + ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); 455 + 547 456 vgic_update_state(vcpu->kvm); 548 457 return true; 549 458 } ··· 788 651 * is fine, then we are only setting a few bits that were 789 652 * already set. 790 653 */ 791 - vgic_dist_irq_set(vcpu, lr.irq); 654 + vgic_dist_irq_set_pending(vcpu, lr.irq); 792 655 if (lr.irq < VGIC_NR_SGIS) 793 - dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; 656 + *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source; 794 657 lr.state &= ~LR_STATE_PENDING; 795 658 vgic_set_lr(vcpu, i, lr); 796 659 ··· 799 662 * active), then the LR does not hold any useful info and can 800 663 * be marked as free for other use. 801 664 */ 802 - if (!(lr.state & LR_STATE_MASK)) 665 + if (!(lr.state & LR_STATE_MASK)) { 803 666 vgic_retire_lr(i, lr.irq, vcpu); 667 + vgic_irq_clear_queued(vcpu, lr.irq); 668 + } 804 669 805 670 /* Finally update the VGIC state. */ 806 671 vgic_update_state(vcpu->kvm); ··· 816 677 { 817 678 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 818 679 int sgi; 819 - int min_sgi = (offset & ~0x3) * 4; 680 + int min_sgi = (offset & ~0x3); 820 681 int max_sgi = min_sgi + 3; 821 682 int vcpu_id = vcpu->vcpu_id; 822 683 u32 reg = 0; ··· 824 685 /* Copy source SGIs from distributor side */ 825 686 for (sgi = min_sgi; sgi <= max_sgi; sgi++) { 826 687 int shift = 8 * (sgi - min_sgi); 827 - reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift; 688 + reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift; 828 689 } 829 690 830 691 mmio_data_write(mmio, ~0, reg); ··· 837 698 { 838 699 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 839 700 int sgi; 840 - int min_sgi = (offset & ~0x3) * 4; 701 + int min_sgi = (offset & ~0x3); 841 702 int max_sgi = min_sgi + 3; 842 703 int vcpu_id = vcpu->vcpu_id; 843 704 u32 reg; ··· 848 709 /* Clear pending SGIs on the distributor */ 849 710 for (sgi = min_sgi; sgi <= max_sgi; sgi++) { 850 711 u8 mask = reg >> (8 * (sgi - min_sgi)); 712 + u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi); 851 713 if (set) { 852 - if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask) 714 + if ((*src & mask) != mask) 853 715 updated = true; 854 - dist->irq_sgi_sources[vcpu_id][sgi] |= mask; 716 + *src |= mask; 855 717 } else { 856 - if (dist->irq_sgi_sources[vcpu_id][sgi] & mask) 718 + if (*src & mask) 857 719 updated = true; 858 - dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask; 720 + *src &= ~mask; 859 721 } 860 722 } 861 723 ··· 895 755 struct mmio_range { 896 756 phys_addr_t base; 897 757 unsigned long len; 758 + int bits_per_irq; 898 759 bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, 899 760 phys_addr_t offset); 900 761 }; ··· 904 763 { 905 764 .base = GIC_DIST_CTRL, 906 765 .len = 12, 766 + .bits_per_irq = 0, 907 767 .handle_mmio = handle_mmio_misc, 908 768 }, 909 769 { 910 770 .base = GIC_DIST_IGROUP, 911 - .len = VGIC_NR_IRQS / 8, 771 + .len = VGIC_MAX_IRQS / 8, 772 + .bits_per_irq = 1, 912 773 .handle_mmio = handle_mmio_raz_wi, 913 774 }, 914 775 { 915 776 .base = GIC_DIST_ENABLE_SET, 916 - .len = VGIC_NR_IRQS / 8, 777 + .len = VGIC_MAX_IRQS / 8, 778 + .bits_per_irq = 1, 917 779 .handle_mmio = handle_mmio_set_enable_reg, 918 780 }, 919 781 { 920 782 .base = GIC_DIST_ENABLE_CLEAR, 921 - .len = VGIC_NR_IRQS / 8, 783 + .len = VGIC_MAX_IRQS / 8, 784 + .bits_per_irq = 1, 922 785 .handle_mmio = handle_mmio_clear_enable_reg, 923 786 }, 924 787 { 925 788 .base = GIC_DIST_PENDING_SET, 926 - .len = VGIC_NR_IRQS / 8, 789 + .len = VGIC_MAX_IRQS / 8, 790 + .bits_per_irq = 1, 927 791 .handle_mmio = handle_mmio_set_pending_reg, 928 792 }, 929 793 { 930 794 .base = GIC_DIST_PENDING_CLEAR, 931 - .len = VGIC_NR_IRQS / 8, 795 + .len = VGIC_MAX_IRQS / 8, 796 + .bits_per_irq = 1, 932 797 .handle_mmio = handle_mmio_clear_pending_reg, 933 798 }, 934 799 { 935 800 .base = GIC_DIST_ACTIVE_SET, 936 - .len = VGIC_NR_IRQS / 8, 801 + .len = VGIC_MAX_IRQS / 8, 802 + .bits_per_irq = 1, 937 803 .handle_mmio = handle_mmio_raz_wi, 938 804 }, 939 805 { 940 806 .base = GIC_DIST_ACTIVE_CLEAR, 941 - .len = VGIC_NR_IRQS / 8, 807 + .len = VGIC_MAX_IRQS / 8, 808 + .bits_per_irq = 1, 942 809 .handle_mmio = handle_mmio_raz_wi, 943 810 }, 944 811 { 945 812 .base = GIC_DIST_PRI, 946 - .len = VGIC_NR_IRQS, 813 + .len = VGIC_MAX_IRQS, 814 + .bits_per_irq = 8, 947 815 .handle_mmio = handle_mmio_priority_reg, 948 816 }, 949 817 { 950 818 .base = GIC_DIST_TARGET, 951 - .len = VGIC_NR_IRQS, 819 + .len = VGIC_MAX_IRQS, 820 + .bits_per_irq = 8, 952 821 .handle_mmio = handle_mmio_target_reg, 953 822 }, 954 823 { 955 824 .base = GIC_DIST_CONFIG, 956 - .len = VGIC_NR_IRQS / 4, 825 + .len = VGIC_MAX_IRQS / 4, 826 + .bits_per_irq = 2, 957 827 .handle_mmio = handle_mmio_cfg_reg, 958 828 }, 959 829 { ··· 1000 848 } 1001 849 1002 850 return NULL; 851 + } 852 + 853 + static bool vgic_validate_access(const struct vgic_dist *dist, 854 + const struct mmio_range *range, 855 + unsigned long offset) 856 + { 857 + int irq; 858 + 859 + if (!range->bits_per_irq) 860 + return true; /* Not an irq-based access */ 861 + 862 + irq = offset * 8 / range->bits_per_irq; 863 + if (irq >= dist->nr_irqs) 864 + return false; 865 + 866 + return true; 1003 867 } 1004 868 1005 869 /** ··· 1057 889 1058 890 spin_lock(&vcpu->kvm->arch.vgic.lock); 1059 891 offset = mmio->phys_addr - range->base - base; 1060 - updated_state = range->handle_mmio(vcpu, mmio, offset); 892 + if (vgic_validate_access(dist, range, offset)) { 893 + updated_state = range->handle_mmio(vcpu, mmio, offset); 894 + } else { 895 + vgic_reg_access(mmio, NULL, offset, 896 + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); 897 + updated_state = false; 898 + } 1061 899 spin_unlock(&vcpu->kvm->arch.vgic.lock); 1062 900 kvm_prepare_mmio(run, mmio); 1063 901 kvm_handle_mmio_return(vcpu, run); ··· 1072 898 vgic_kick_vcpus(vcpu->kvm); 1073 899 1074 900 return true; 901 + } 902 + 903 + static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) 904 + { 905 + return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi; 1075 906 } 1076 907 1077 908 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) ··· 1111 932 kvm_for_each_vcpu(c, vcpu, kvm) { 1112 933 if (target_cpus & 1) { 1113 934 /* Flag the SGI as pending */ 1114 - vgic_dist_irq_set(vcpu, sgi); 1115 - dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; 935 + vgic_dist_irq_set_pending(vcpu, sgi); 936 + *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id; 1116 937 kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); 1117 938 } 1118 939 ··· 1120 941 } 1121 942 } 1122 943 944 + static int vgic_nr_shared_irqs(struct vgic_dist *dist) 945 + { 946 + return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; 947 + } 948 + 1123 949 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) 1124 950 { 1125 951 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1126 952 unsigned long *pending, *enabled, *pend_percpu, *pend_shared; 1127 953 unsigned long pending_private, pending_shared; 954 + int nr_shared = vgic_nr_shared_irqs(dist); 1128 955 int vcpu_id; 1129 956 1130 957 vcpu_id = vcpu->vcpu_id; 1131 958 pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; 1132 959 pend_shared = vcpu->arch.vgic_cpu.pending_shared; 1133 960 1134 - pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id); 961 + pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id); 1135 962 enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); 1136 963 bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); 1137 964 1138 - pending = vgic_bitmap_get_shared_map(&dist->irq_state); 965 + pending = vgic_bitmap_get_shared_map(&dist->irq_pending); 1139 966 enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); 1140 - bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); 967 + bitmap_and(pend_shared, pending, enabled, nr_shared); 1141 968 bitmap_and(pend_shared, pend_shared, 1142 969 vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), 1143 - VGIC_NR_SHARED_IRQS); 970 + nr_shared); 1144 971 1145 972 pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); 1146 - pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS); 973 + pending_shared = find_first_bit(pend_shared, nr_shared); 1147 974 return (pending_private < VGIC_NR_PRIVATE_IRQS || 1148 - pending_shared < VGIC_NR_SHARED_IRQS); 975 + pending_shared < vgic_nr_shared_irqs(dist)); 1149 976 } 1150 977 1151 978 /* ··· 1165 980 int c; 1166 981 1167 982 if (!dist->enabled) { 1168 - set_bit(0, &dist->irq_pending_on_cpu); 983 + set_bit(0, dist->irq_pending_on_cpu); 1169 984 return; 1170 985 } 1171 986 1172 987 kvm_for_each_vcpu(c, vcpu, kvm) { 1173 988 if (compute_pending_for_cpu(vcpu)) { 1174 989 pr_debug("CPU%d has pending interrupts\n", c); 1175 - set_bit(c, &dist->irq_pending_on_cpu); 990 + set_bit(c, dist->irq_pending_on_cpu); 1176 991 } 1177 992 } 1178 993 } ··· 1264 1079 1265 1080 if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { 1266 1081 vgic_retire_lr(lr, vlr.irq, vcpu); 1267 - if (vgic_irq_is_active(vcpu, vlr.irq)) 1268 - vgic_irq_clear_active(vcpu, vlr.irq); 1082 + if (vgic_irq_is_queued(vcpu, vlr.irq)) 1083 + vgic_irq_clear_queued(vcpu, vlr.irq); 1269 1084 } 1270 1085 } 1271 1086 } ··· 1277 1092 static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) 1278 1093 { 1279 1094 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1095 + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1280 1096 struct vgic_lr vlr; 1281 1097 int lr; 1282 1098 1283 1099 /* Sanitize the input... */ 1284 1100 BUG_ON(sgi_source_id & ~7); 1285 1101 BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); 1286 - BUG_ON(irq >= VGIC_NR_IRQS); 1102 + BUG_ON(irq >= dist->nr_irqs); 1287 1103 1288 1104 kvm_debug("Queue IRQ%d\n", irq); 1289 1105 ··· 1330 1144 int vcpu_id = vcpu->vcpu_id; 1331 1145 int c; 1332 1146 1333 - sources = dist->irq_sgi_sources[vcpu_id][irq]; 1147 + sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); 1334 1148 1335 - for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { 1149 + for_each_set_bit(c, &sources, dist->nr_cpus) { 1336 1150 if (vgic_queue_irq(vcpu, c, irq)) 1337 1151 clear_bit(c, &sources); 1338 1152 } 1339 1153 1340 - dist->irq_sgi_sources[vcpu_id][irq] = sources; 1154 + *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources; 1341 1155 1342 1156 /* 1343 1157 * If the sources bitmap has been cleared it means that we ··· 1346 1160 * our emulated gic and can get rid of them. 1347 1161 */ 1348 1162 if (!sources) { 1349 - vgic_dist_irq_clear(vcpu, irq); 1163 + vgic_dist_irq_clear_pending(vcpu, irq); 1350 1164 vgic_cpu_irq_clear(vcpu, irq); 1351 1165 return true; 1352 1166 } ··· 1356 1170 1357 1171 static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) 1358 1172 { 1359 - if (vgic_irq_is_active(vcpu, irq)) 1173 + if (!vgic_can_sample_irq(vcpu, irq)) 1360 1174 return true; /* level interrupt, already queued */ 1361 1175 1362 1176 if (vgic_queue_irq(vcpu, 0, irq)) { 1363 1177 if (vgic_irq_is_edge(vcpu, irq)) { 1364 - vgic_dist_irq_clear(vcpu, irq); 1178 + vgic_dist_irq_clear_pending(vcpu, irq); 1365 1179 vgic_cpu_irq_clear(vcpu, irq); 1366 1180 } else { 1367 - vgic_irq_set_active(vcpu, irq); 1181 + vgic_irq_set_queued(vcpu, irq); 1368 1182 } 1369 1183 1370 1184 return true; ··· 1409 1223 } 1410 1224 1411 1225 /* SPIs */ 1412 - for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) { 1226 + for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) { 1413 1227 if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) 1414 1228 overflow = 1; 1415 1229 } ··· 1425 1239 * us. Claim we don't have anything pending. We'll 1426 1240 * adjust that if needed while exiting. 1427 1241 */ 1428 - clear_bit(vcpu_id, &dist->irq_pending_on_cpu); 1242 + clear_bit(vcpu_id, dist->irq_pending_on_cpu); 1429 1243 } 1430 1244 } 1431 1245 ··· 1447 1261 1448 1262 for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { 1449 1263 struct vgic_lr vlr = vgic_get_lr(vcpu, lr); 1264 + WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); 1450 1265 1451 - vgic_irq_clear_active(vcpu, vlr.irq); 1266 + vgic_irq_clear_queued(vcpu, vlr.irq); 1452 1267 WARN_ON(vlr.state & LR_STATE_MASK); 1453 1268 vlr.state = 0; 1454 1269 vgic_set_lr(vcpu, lr, vlr); 1455 1270 1271 + /* 1272 + * If the IRQ was EOIed it was also ACKed and we we 1273 + * therefore assume we can clear the soft pending 1274 + * state (should it had been set) for this interrupt. 1275 + * 1276 + * Note: if the IRQ soft pending state was set after 1277 + * the IRQ was acked, it actually shouldn't be 1278 + * cleared, but we have no way of knowing that unless 1279 + * we start trapping ACKs when the soft-pending state 1280 + * is set. 1281 + */ 1282 + vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); 1283 + 1456 1284 /* Any additional pending interrupt? */ 1457 - if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) { 1285 + if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { 1458 1286 vgic_cpu_irq_set(vcpu, vlr.irq); 1459 1287 level_pending = true; 1460 1288 } else { 1289 + vgic_dist_irq_clear_pending(vcpu, vlr.irq); 1461 1290 vgic_cpu_irq_clear(vcpu, vlr.irq); 1462 1291 } 1463 1292 ··· 1516 1315 1517 1316 vlr = vgic_get_lr(vcpu, lr); 1518 1317 1519 - BUG_ON(vlr.irq >= VGIC_NR_IRQS); 1318 + BUG_ON(vlr.irq >= dist->nr_irqs); 1520 1319 vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; 1521 1320 } 1522 1321 1523 1322 /* Check if we still have something up our sleeve... */ 1524 1323 pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); 1525 1324 if (level_pending || pending < vgic->nr_lr) 1526 - set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); 1325 + set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); 1527 1326 } 1528 1327 1529 1328 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) ··· 1557 1356 if (!irqchip_in_kernel(vcpu->kvm)) 1558 1357 return 0; 1559 1358 1560 - return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); 1359 + return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); 1561 1360 } 1562 1361 1563 1362 static void vgic_kick_vcpus(struct kvm *kvm) ··· 1577 1376 1578 1377 static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) 1579 1378 { 1580 - int is_edge = vgic_irq_is_edge(vcpu, irq); 1581 - int state = vgic_dist_irq_is_pending(vcpu, irq); 1379 + int edge_triggered = vgic_irq_is_edge(vcpu, irq); 1582 1380 1583 1381 /* 1584 1382 * Only inject an interrupt if: 1585 1383 * - edge triggered and we have a rising edge 1586 1384 * - level triggered and we change level 1587 1385 */ 1588 - if (is_edge) 1386 + if (edge_triggered) { 1387 + int state = vgic_dist_irq_is_pending(vcpu, irq); 1589 1388 return level > state; 1590 - else 1389 + } else { 1390 + int state = vgic_dist_irq_get_level(vcpu, irq); 1591 1391 return level != state; 1392 + } 1592 1393 } 1593 1394 1594 - static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, 1395 + static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, 1595 1396 unsigned int irq_num, bool level) 1596 1397 { 1597 1398 struct vgic_dist *dist = &kvm->arch.vgic; 1598 1399 struct kvm_vcpu *vcpu; 1599 - int is_edge, is_level; 1400 + int edge_triggered, level_triggered; 1600 1401 int enabled; 1601 1402 bool ret = true; 1602 1403 1603 1404 spin_lock(&dist->lock); 1604 1405 1605 1406 vcpu = kvm_get_vcpu(kvm, cpuid); 1606 - is_edge = vgic_irq_is_edge(vcpu, irq_num); 1607 - is_level = !is_edge; 1407 + edge_triggered = vgic_irq_is_edge(vcpu, irq_num); 1408 + level_triggered = !edge_triggered; 1608 1409 1609 1410 if (!vgic_validate_injection(vcpu, irq_num, level)) { 1610 1411 ret = false; ··· 1620 1417 1621 1418 kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); 1622 1419 1623 - if (level) 1624 - vgic_dist_irq_set(vcpu, irq_num); 1625 - else 1626 - vgic_dist_irq_clear(vcpu, irq_num); 1420 + if (level) { 1421 + if (level_triggered) 1422 + vgic_dist_irq_set_level(vcpu, irq_num); 1423 + vgic_dist_irq_set_pending(vcpu, irq_num); 1424 + } else { 1425 + if (level_triggered) { 1426 + vgic_dist_irq_clear_level(vcpu, irq_num); 1427 + if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) 1428 + vgic_dist_irq_clear_pending(vcpu, irq_num); 1429 + } else { 1430 + vgic_dist_irq_clear_pending(vcpu, irq_num); 1431 + } 1432 + } 1627 1433 1628 1434 enabled = vgic_irq_is_enabled(vcpu, irq_num); 1629 1435 ··· 1641 1429 goto out; 1642 1430 } 1643 1431 1644 - if (is_level && vgic_irq_is_active(vcpu, irq_num)) { 1432 + if (!vgic_can_sample_irq(vcpu, irq_num)) { 1645 1433 /* 1646 1434 * Level interrupt in progress, will be picked up 1647 1435 * when EOId. ··· 1652 1440 1653 1441 if (level) { 1654 1442 vgic_cpu_irq_set(vcpu, irq_num); 1655 - set_bit(cpuid, &dist->irq_pending_on_cpu); 1443 + set_bit(cpuid, dist->irq_pending_on_cpu); 1656 1444 } 1657 1445 1658 1446 out: ··· 1678 1466 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, 1679 1467 bool level) 1680 1468 { 1681 - if (vgic_update_irq_state(kvm, cpuid, irq_num, level)) 1469 + if (likely(vgic_initialized(kvm)) && 1470 + vgic_update_irq_pending(kvm, cpuid, irq_num, level)) 1682 1471 vgic_kick_vcpus(kvm); 1683 1472 1684 1473 return 0; ··· 1696 1483 return IRQ_HANDLED; 1697 1484 } 1698 1485 1486 + void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) 1487 + { 1488 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1489 + 1490 + kfree(vgic_cpu->pending_shared); 1491 + kfree(vgic_cpu->vgic_irq_lr_map); 1492 + vgic_cpu->pending_shared = NULL; 1493 + vgic_cpu->vgic_irq_lr_map = NULL; 1494 + } 1495 + 1496 + static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) 1497 + { 1498 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1499 + 1500 + int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; 1501 + vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); 1502 + vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL); 1503 + 1504 + if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { 1505 + kvm_vgic_vcpu_destroy(vcpu); 1506 + return -ENOMEM; 1507 + } 1508 + 1509 + return 0; 1510 + } 1511 + 1699 1512 /** 1700 1513 * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state 1701 1514 * @vcpu: pointer to the vcpu struct ··· 1729 1490 * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to 1730 1491 * this vcpu and enable the VGIC for this VCPU 1731 1492 */ 1732 - int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) 1493 + static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) 1733 1494 { 1734 1495 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1735 1496 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1736 1497 int i; 1737 1498 1738 - if (vcpu->vcpu_id >= VGIC_MAX_CPUS) 1739 - return -EBUSY; 1740 - 1741 - for (i = 0; i < VGIC_NR_IRQS; i++) { 1499 + for (i = 0; i < dist->nr_irqs; i++) { 1742 1500 if (i < VGIC_NR_PPIS) 1743 1501 vgic_bitmap_set_irq_val(&dist->irq_enabled, 1744 1502 vcpu->vcpu_id, i, 1); ··· 1754 1518 vgic_cpu->nr_lr = vgic->nr_lr; 1755 1519 1756 1520 vgic_enable(vcpu); 1521 + } 1757 1522 1758 - return 0; 1523 + void kvm_vgic_destroy(struct kvm *kvm) 1524 + { 1525 + struct vgic_dist *dist = &kvm->arch.vgic; 1526 + struct kvm_vcpu *vcpu; 1527 + int i; 1528 + 1529 + kvm_for_each_vcpu(i, vcpu, kvm) 1530 + kvm_vgic_vcpu_destroy(vcpu); 1531 + 1532 + vgic_free_bitmap(&dist->irq_enabled); 1533 + vgic_free_bitmap(&dist->irq_level); 1534 + vgic_free_bitmap(&dist->irq_pending); 1535 + vgic_free_bitmap(&dist->irq_soft_pend); 1536 + vgic_free_bitmap(&dist->irq_queued); 1537 + vgic_free_bitmap(&dist->irq_cfg); 1538 + vgic_free_bytemap(&dist->irq_priority); 1539 + if (dist->irq_spi_target) { 1540 + for (i = 0; i < dist->nr_cpus; i++) 1541 + vgic_free_bitmap(&dist->irq_spi_target[i]); 1542 + } 1543 + kfree(dist->irq_sgi_sources); 1544 + kfree(dist->irq_spi_cpu); 1545 + kfree(dist->irq_spi_target); 1546 + kfree(dist->irq_pending_on_cpu); 1547 + dist->irq_sgi_sources = NULL; 1548 + dist->irq_spi_cpu = NULL; 1549 + dist->irq_spi_target = NULL; 1550 + dist->irq_pending_on_cpu = NULL; 1551 + } 1552 + 1553 + /* 1554 + * Allocate and initialize the various data structures. Must be called 1555 + * with kvm->lock held! 1556 + */ 1557 + static int vgic_init_maps(struct kvm *kvm) 1558 + { 1559 + struct vgic_dist *dist = &kvm->arch.vgic; 1560 + struct kvm_vcpu *vcpu; 1561 + int nr_cpus, nr_irqs; 1562 + int ret, i; 1563 + 1564 + if (dist->nr_cpus) /* Already allocated */ 1565 + return 0; 1566 + 1567 + nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); 1568 + if (!nr_cpus) /* No vcpus? Can't be good... */ 1569 + return -EINVAL; 1570 + 1571 + /* 1572 + * If nobody configured the number of interrupts, use the 1573 + * legacy one. 1574 + */ 1575 + if (!dist->nr_irqs) 1576 + dist->nr_irqs = VGIC_NR_IRQS_LEGACY; 1577 + 1578 + nr_irqs = dist->nr_irqs; 1579 + 1580 + ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs); 1581 + ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs); 1582 + ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs); 1583 + ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs); 1584 + ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs); 1585 + ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs); 1586 + ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs); 1587 + 1588 + if (ret) 1589 + goto out; 1590 + 1591 + dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL); 1592 + dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL); 1593 + dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus, 1594 + GFP_KERNEL); 1595 + dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), 1596 + GFP_KERNEL); 1597 + if (!dist->irq_sgi_sources || 1598 + !dist->irq_spi_cpu || 1599 + !dist->irq_spi_target || 1600 + !dist->irq_pending_on_cpu) { 1601 + ret = -ENOMEM; 1602 + goto out; 1603 + } 1604 + 1605 + for (i = 0; i < nr_cpus; i++) 1606 + ret |= vgic_init_bitmap(&dist->irq_spi_target[i], 1607 + nr_cpus, nr_irqs); 1608 + 1609 + if (ret) 1610 + goto out; 1611 + 1612 + kvm_for_each_vcpu(i, vcpu, kvm) { 1613 + ret = vgic_vcpu_init_maps(vcpu, nr_irqs); 1614 + if (ret) { 1615 + kvm_err("VGIC: Failed to allocate vcpu memory\n"); 1616 + break; 1617 + } 1618 + } 1619 + 1620 + for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) 1621 + vgic_set_target_reg(kvm, 0, i); 1622 + 1623 + out: 1624 + if (ret) 1625 + kvm_vgic_destroy(kvm); 1626 + 1627 + return ret; 1759 1628 } 1760 1629 1761 1630 /** ··· 1874 1533 */ 1875 1534 int kvm_vgic_init(struct kvm *kvm) 1876 1535 { 1536 + struct kvm_vcpu *vcpu; 1877 1537 int ret = 0, i; 1878 1538 1879 1539 if (!irqchip_in_kernel(kvm)) ··· 1892 1550 goto out; 1893 1551 } 1894 1552 1553 + ret = vgic_init_maps(kvm); 1554 + if (ret) { 1555 + kvm_err("Unable to allocate maps\n"); 1556 + goto out; 1557 + } 1558 + 1895 1559 ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, 1896 1560 vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); 1897 1561 if (ret) { ··· 1905 1557 goto out; 1906 1558 } 1907 1559 1908 - for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) 1909 - vgic_set_target_reg(kvm, 0, i); 1560 + kvm_for_each_vcpu(i, vcpu, kvm) 1561 + kvm_vgic_vcpu_init(vcpu); 1910 1562 1911 1563 kvm->arch.vgic.ready = true; 1912 1564 out: 1565 + if (ret) 1566 + kvm_vgic_destroy(kvm); 1913 1567 mutex_unlock(&kvm->lock); 1914 1568 return ret; 1915 1569 } ··· 1963 1613 return ret; 1964 1614 } 1965 1615 1966 - static bool vgic_ioaddr_overlap(struct kvm *kvm) 1616 + static int vgic_ioaddr_overlap(struct kvm *kvm) 1967 1617 { 1968 1618 phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; 1969 1619 phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; ··· 2152 1802 2153 1803 mutex_lock(&dev->kvm->lock); 2154 1804 1805 + ret = vgic_init_maps(dev->kvm); 1806 + if (ret) 1807 + goto out; 1808 + 2155 1809 if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { 2156 1810 ret = -EINVAL; 2157 1811 goto out; ··· 2253 1899 2254 1900 return vgic_attr_regs_access(dev, attr, &reg, true); 2255 1901 } 1902 + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { 1903 + u32 __user *uaddr = (u32 __user *)(long)attr->addr; 1904 + u32 val; 1905 + int ret = 0; 1906 + 1907 + if (get_user(val, uaddr)) 1908 + return -EFAULT; 1909 + 1910 + /* 1911 + * We require: 1912 + * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs 1913 + * - at most 1024 interrupts 1914 + * - a multiple of 32 interrupts 1915 + */ 1916 + if (val < (VGIC_NR_PRIVATE_IRQS + 32) || 1917 + val > VGIC_MAX_IRQS || 1918 + (val & 31)) 1919 + return -EINVAL; 1920 + 1921 + mutex_lock(&dev->kvm->lock); 1922 + 1923 + if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) 1924 + ret = -EBUSY; 1925 + else 1926 + dev->kvm->arch.vgic.nr_irqs = val; 1927 + 1928 + mutex_unlock(&dev->kvm->lock); 1929 + 1930 + return ret; 1931 + } 2256 1932 2257 1933 } 2258 1934 ··· 2317 1933 if (r) 2318 1934 return r; 2319 1935 r = put_user(reg, uaddr); 1936 + break; 1937 + } 1938 + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { 1939 + u32 __user *uaddr = (u32 __user *)(long)attr->addr; 1940 + r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr); 2320 1941 break; 2321 1942 } 2322 1943 ··· 2360 1971 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: 2361 1972 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; 2362 1973 return vgic_has_attr_regs(vgic_cpu_ranges, offset); 1974 + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: 1975 + return 0; 2363 1976 } 2364 1977 return -ENXIO; 2365 1978 } ··· 2420 2029 int kvm_vgic_hyp_init(void) 2421 2030 { 2422 2031 const struct of_device_id *matched_id; 2423 - int (*vgic_probe)(struct device_node *,const struct vgic_ops **, 2424 - const struct vgic_params **); 2032 + const int (*vgic_probe)(struct device_node *,const struct vgic_ops **, 2033 + const struct vgic_params **); 2425 2034 struct device_node *vgic_node; 2426 2035 int ret; 2427 2036
+9 -2
virt/kvm/kvm_main.c
··· 1095 1095 * If writable is set to false, the hva returned by this function is only 1096 1096 * allowed to be read. 1097 1097 */ 1098 - unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) 1098 + unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, 1099 + gfn_t gfn, bool *writable) 1099 1100 { 1100 - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); 1101 1101 unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); 1102 1102 1103 1103 if (!kvm_is_error_hva(hva) && writable) 1104 1104 *writable = !memslot_is_readonly(slot); 1105 1105 1106 1106 return hva; 1107 + } 1108 + 1109 + unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) 1110 + { 1111 + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); 1112 + 1113 + return gfn_to_hva_memslot_prot(slot, gfn, writable); 1107 1114 } 1108 1115 1109 1116 static int kvm_read_hva(void *data, void __user *hva, int len)