Merge tag 'kvm-arm-for-4.11-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm

KVM/ARM updates for v4.11-rc2

vgic updates:
- Honour disabling the ITS
- Don't deadlock when deactivating own interrupts via MMIO
- Correctly expose the lact of IRQ/FIQ bypass on GICv3

I/O virtualization:
- Make KVM_CAP_NR_MEMSLOTS big enough for large guests with
many PCIe devices

General bug fixes:
- Gracefully handle exception generated with syndroms that
the host doesn't understand
- Properly invalidate TLBs on VHE systems

Changed files
+183 -79
Documentation
virtual
kvm
arch
arm
arm64
include
kvm
include
linux
irqchip
virt
+4
Documentation/virtual/kvm/api.txt
··· 951 951 slot. When changing an existing slot, it may be moved in the guest 952 952 physical memory space, or its flags may be modified. It may not be 953 953 resized. Slots may not overlap in guest physical address space. 954 + Bits 0-15 of "slot" specifies the slot id and this value should be 955 + less than the maximum number of user memory slots supported per VM. 956 + The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS, 957 + if this capability is supported by the architecture. 954 958 955 959 If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot" 956 960 specifies the address space which is being modified. They must be
+1
arch/arm/include/asm/kvm_arm.h
··· 209 209 #define HSR_EC_IABT_HYP (0x21) 210 210 #define HSR_EC_DABT (0x24) 211 211 #define HSR_EC_DABT_HYP (0x25) 212 + #define HSR_EC_MAX (0x3f) 212 213 213 214 #define HSR_WFI_IS_WFE (_AC(1, UL) << 0) 214 215
-1
arch/arm/include/asm/kvm_host.h
··· 30 30 #define __KVM_HAVE_ARCH_INTC_INITIALIZED 31 31 32 32 #define KVM_USER_MEM_SLOTS 32 33 - #define KVM_PRIVATE_MEM_SLOTS 4 34 33 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 35 34 #define KVM_HAVE_ONE_REG 36 35 #define KVM_HALT_POLL_NS_DEFAULT 500000
+3
arch/arm/kvm/arm.c
··· 221 221 case KVM_CAP_MAX_VCPUS: 222 222 r = KVM_MAX_VCPUS; 223 223 break; 224 + case KVM_CAP_NR_MEMSLOTS: 225 + r = KVM_USER_MEM_SLOTS; 226 + break; 224 227 case KVM_CAP_MSI_DEVID: 225 228 if (!kvm) 226 229 r = -EINVAL;
+12 -7
arch/arm/kvm/handle_exit.c
··· 79 79 return 1; 80 80 } 81 81 82 + static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) 83 + { 84 + u32 hsr = kvm_vcpu_get_hsr(vcpu); 85 + 86 + kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n", 87 + hsr); 88 + 89 + kvm_inject_undefined(vcpu); 90 + return 1; 91 + } 92 + 82 93 static exit_handle_fn arm_exit_handlers[] = { 94 + [0 ... HSR_EC_MAX] = kvm_handle_unknown_ec, 83 95 [HSR_EC_WFI] = kvm_handle_wfx, 84 96 [HSR_EC_CP15_32] = kvm_handle_cp15_32, 85 97 [HSR_EC_CP15_64] = kvm_handle_cp15_64, ··· 109 97 static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) 110 98 { 111 99 u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); 112 - 113 - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || 114 - !arm_exit_handlers[hsr_ec]) { 115 - kvm_err("Unknown exception class: hsr: %#08x\n", 116 - (unsigned int)kvm_vcpu_get_hsr(vcpu)); 117 - BUG(); 118 - } 119 100 120 101 return arm_exit_handlers[hsr_ec]; 121 102 }
+1 -2
arch/arm64/include/asm/kvm_host.h
··· 30 30 31 31 #define __KVM_HAVE_ARCH_INTC_INITIALIZED 32 32 33 - #define KVM_USER_MEM_SLOTS 32 34 - #define KVM_PRIVATE_MEM_SLOTS 4 33 + #define KVM_USER_MEM_SLOTS 512 35 34 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 36 35 #define KVM_HALT_POLL_NS_DEFAULT 500000 37 36
+12 -7
arch/arm64/kvm/handle_exit.c
··· 135 135 return ret; 136 136 } 137 137 138 + static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) 139 + { 140 + u32 hsr = kvm_vcpu_get_hsr(vcpu); 141 + 142 + kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n", 143 + hsr, esr_get_class_string(hsr)); 144 + 145 + kvm_inject_undefined(vcpu); 146 + return 1; 147 + } 148 + 138 149 static exit_handle_fn arm_exit_handlers[] = { 150 + [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec, 139 151 [ESR_ELx_EC_WFx] = kvm_handle_wfx, 140 152 [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32, 141 153 [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64, ··· 173 161 { 174 162 u32 hsr = kvm_vcpu_get_hsr(vcpu); 175 163 u8 hsr_ec = ESR_ELx_EC(hsr); 176 - 177 - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || 178 - !arm_exit_handlers[hsr_ec]) { 179 - kvm_err("Unknown exception class: hsr: %#08x -- %s\n", 180 - hsr, esr_get_class_string(hsr)); 181 - BUG(); 182 - } 183 164 184 165 return arm_exit_handlers[hsr_ec]; 185 166 }
+55 -9
arch/arm64/kvm/hyp/tlb.c
··· 18 18 #include <asm/kvm_hyp.h> 19 19 #include <asm/tlbflush.h> 20 20 21 + static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) 22 + { 23 + u64 val; 24 + 25 + /* 26 + * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and 27 + * most TLB operations target EL2/EL0. In order to affect the 28 + * guest TLBs (EL1/EL0), we need to change one of these two 29 + * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so 30 + * let's flip TGE before executing the TLB operation. 31 + */ 32 + write_sysreg(kvm->arch.vttbr, vttbr_el2); 33 + val = read_sysreg(hcr_el2); 34 + val &= ~HCR_TGE; 35 + write_sysreg(val, hcr_el2); 36 + isb(); 37 + } 38 + 39 + static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm) 40 + { 41 + write_sysreg(kvm->arch.vttbr, vttbr_el2); 42 + isb(); 43 + } 44 + 45 + static hyp_alternate_select(__tlb_switch_to_guest, 46 + __tlb_switch_to_guest_nvhe, 47 + __tlb_switch_to_guest_vhe, 48 + ARM64_HAS_VIRT_HOST_EXTN); 49 + 50 + static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm) 51 + { 52 + /* 53 + * We're done with the TLB operation, let's restore the host's 54 + * view of HCR_EL2. 55 + */ 56 + write_sysreg(0, vttbr_el2); 57 + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); 58 + } 59 + 60 + static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm) 61 + { 62 + write_sysreg(0, vttbr_el2); 63 + } 64 + 65 + static hyp_alternate_select(__tlb_switch_to_host, 66 + __tlb_switch_to_host_nvhe, 67 + __tlb_switch_to_host_vhe, 68 + ARM64_HAS_VIRT_HOST_EXTN); 69 + 21 70 void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 22 71 { 23 72 dsb(ishst); 24 73 25 74 /* Switch to requested VMID */ 26 75 kvm = kern_hyp_va(kvm); 27 - write_sysreg(kvm->arch.vttbr, vttbr_el2); 28 - isb(); 76 + __tlb_switch_to_guest()(kvm); 29 77 30 78 /* 31 79 * We could do so much better if we had the VA as well. ··· 94 46 dsb(ish); 95 47 isb(); 96 48 97 - write_sysreg(0, vttbr_el2); 49 + __tlb_switch_to_host()(kvm); 98 50 } 99 51 100 52 void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) ··· 103 55 104 56 /* Switch to requested VMID */ 105 57 kvm = kern_hyp_va(kvm); 106 - write_sysreg(kvm->arch.vttbr, vttbr_el2); 107 - isb(); 58 + __tlb_switch_to_guest()(kvm); 108 59 109 60 __tlbi(vmalls12e1is); 110 61 dsb(ish); 111 62 isb(); 112 63 113 - write_sysreg(0, vttbr_el2); 64 + __tlb_switch_to_host()(kvm); 114 65 } 115 66 116 67 void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) ··· 117 70 struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); 118 71 119 72 /* Switch to requested VMID */ 120 - write_sysreg(kvm->arch.vttbr, vttbr_el2); 121 - isb(); 73 + __tlb_switch_to_guest()(kvm); 122 74 123 75 __tlbi(vmalle1); 124 76 dsb(nsh); 125 77 isb(); 126 78 127 - write_sysreg(0, vttbr_el2); 79 + __tlb_switch_to_host()(kvm); 128 80 } 129 81 130 82 void __hyp_text __kvm_flush_vm_context(void)
+2
include/linux/irqchip/arm-gic-v3.h
··· 373 373 #define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) 374 374 #define ICC_IGRPEN1_EL1_SHIFT 0 375 375 #define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) 376 + #define ICC_SRE_EL1_DIB (1U << 2) 377 + #define ICC_SRE_EL1_DFB (1U << 1) 376 378 #define ICC_SRE_EL1_SRE (1U << 0) 377 379 378 380 /*
+65 -44
virt/kvm/arm/vgic/vgic-its.c
··· 360 360 return ret; 361 361 } 362 362 363 - static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, 364 - struct vgic_its *its, 365 - gpa_t addr, unsigned int len) 366 - { 367 - u32 reg = 0; 368 - 369 - mutex_lock(&its->cmd_lock); 370 - if (its->creadr == its->cwriter) 371 - reg |= GITS_CTLR_QUIESCENT; 372 - if (its->enabled) 373 - reg |= GITS_CTLR_ENABLE; 374 - mutex_unlock(&its->cmd_lock); 375 - 376 - return reg; 377 - } 378 - 379 - static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, 380 - gpa_t addr, unsigned int len, 381 - unsigned long val) 382 - { 383 - its->enabled = !!(val & GITS_CTLR_ENABLE); 384 - } 385 - 386 363 static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, 387 364 struct vgic_its *its, 388 365 gpa_t addr, unsigned int len) ··· 1138 1161 #define ITS_CMD_SIZE 32 1139 1162 #define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5)) 1140 1163 1141 - /* 1142 - * By writing to CWRITER the guest announces new commands to be processed. 1143 - * To avoid any races in the first place, we take the its_cmd lock, which 1144 - * protects our ring buffer variables, so that there is only one user 1145 - * per ITS handling commands at a given time. 1146 - */ 1147 - static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, 1148 - gpa_t addr, unsigned int len, 1149 - unsigned long val) 1164 + /* Must be called with the cmd_lock held. */ 1165 + static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) 1150 1166 { 1151 1167 gpa_t cbaser; 1152 1168 u64 cmd_buf[4]; 1153 - u32 reg; 1154 1169 1155 - if (!its) 1170 + /* Commands are only processed when the ITS is enabled. */ 1171 + if (!its->enabled) 1156 1172 return; 1157 1173 1158 - mutex_lock(&its->cmd_lock); 1159 - 1160 - reg = update_64bit_reg(its->cwriter, addr & 7, len, val); 1161 - reg = ITS_CMD_OFFSET(reg); 1162 - if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { 1163 - mutex_unlock(&its->cmd_lock); 1164 - return; 1165 - } 1166 - 1167 - its->cwriter = reg; 1168 1174 cbaser = CBASER_ADDRESS(its->cbaser); 1169 1175 1170 1176 while (its->cwriter != its->creadr) { ··· 1167 1207 if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser)) 1168 1208 its->creadr = 0; 1169 1209 } 1210 + } 1211 + 1212 + /* 1213 + * By writing to CWRITER the guest announces new commands to be processed. 1214 + * To avoid any races in the first place, we take the its_cmd lock, which 1215 + * protects our ring buffer variables, so that there is only one user 1216 + * per ITS handling commands at a given time. 1217 + */ 1218 + static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, 1219 + gpa_t addr, unsigned int len, 1220 + unsigned long val) 1221 + { 1222 + u64 reg; 1223 + 1224 + if (!its) 1225 + return; 1226 + 1227 + mutex_lock(&its->cmd_lock); 1228 + 1229 + reg = update_64bit_reg(its->cwriter, addr & 7, len, val); 1230 + reg = ITS_CMD_OFFSET(reg); 1231 + if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { 1232 + mutex_unlock(&its->cmd_lock); 1233 + return; 1234 + } 1235 + its->cwriter = reg; 1236 + 1237 + vgic_its_process_commands(kvm, its); 1170 1238 1171 1239 mutex_unlock(&its->cmd_lock); 1172 1240 } ··· 1273 1285 reg = vgic_sanitise_its_baser(reg); 1274 1286 1275 1287 *regptr = reg; 1288 + } 1289 + 1290 + static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, 1291 + struct vgic_its *its, 1292 + gpa_t addr, unsigned int len) 1293 + { 1294 + u32 reg = 0; 1295 + 1296 + mutex_lock(&its->cmd_lock); 1297 + if (its->creadr == its->cwriter) 1298 + reg |= GITS_CTLR_QUIESCENT; 1299 + if (its->enabled) 1300 + reg |= GITS_CTLR_ENABLE; 1301 + mutex_unlock(&its->cmd_lock); 1302 + 1303 + return reg; 1304 + } 1305 + 1306 + static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, 1307 + gpa_t addr, unsigned int len, 1308 + unsigned long val) 1309 + { 1310 + mutex_lock(&its->cmd_lock); 1311 + 1312 + its->enabled = !!(val & GITS_CTLR_ENABLE); 1313 + 1314 + /* 1315 + * Try to process any pending commands. This function bails out early 1316 + * if the ITS is disabled or no commands have been queued. 1317 + */ 1318 + vgic_its_process_commands(kvm, its); 1319 + 1320 + mutex_unlock(&its->cmd_lock); 1276 1321 } 1277 1322 1278 1323 #define REGISTER_ITS_DESC(off, rd, wr, length, acc) \
+24 -8
virt/kvm/arm/vgic/vgic-mmio.c
··· 180 180 static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, 181 181 bool new_active_state) 182 182 { 183 + struct kvm_vcpu *requester_vcpu; 183 184 spin_lock(&irq->irq_lock); 185 + 186 + /* 187 + * The vcpu parameter here can mean multiple things depending on how 188 + * this function is called; when handling a trap from the kernel it 189 + * depends on the GIC version, and these functions are also called as 190 + * part of save/restore from userspace. 191 + * 192 + * Therefore, we have to figure out the requester in a reliable way. 193 + * 194 + * When accessing VGIC state from user space, the requester_vcpu is 195 + * NULL, which is fine, because we guarantee that no VCPUs are running 196 + * when accessing VGIC state from user space so irq->vcpu->cpu is 197 + * always -1. 198 + */ 199 + requester_vcpu = kvm_arm_get_running_vcpu(); 200 + 184 201 /* 185 202 * If this virtual IRQ was written into a list register, we 186 203 * have to make sure the CPU that runs the VCPU thread has 187 - * synced back LR state to the struct vgic_irq. We can only 188 - * know this for sure, when either this irq is not assigned to 189 - * anyone's AP list anymore, or the VCPU thread is not 190 - * running on any CPUs. 204 + * synced back the LR state to the struct vgic_irq. 191 205 * 192 - * In the opposite case, we know the VCPU thread may be on its 193 - * way back from the guest and still has to sync back this 194 - * IRQ, so we release and re-acquire the spin_lock to let the 195 - * other thread sync back the IRQ. 206 + * As long as the conditions below are true, we know the VCPU thread 207 + * may be on its way back from the guest (we kicked the VCPU thread in 208 + * vgic_change_active_prepare) and still has to sync back this IRQ, 209 + * so we release and re-acquire the spin_lock to let the other thread 210 + * sync back the IRQ. 196 211 */ 197 212 while (irq->vcpu && /* IRQ may have state in an LR somewhere */ 213 + irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */ 198 214 irq->vcpu->cpu != -1) /* VCPU thread is running */ 199 215 cond_resched_lock(&irq->irq_lock); 200 216
+4 -1
virt/kvm/arm/vgic/vgic-v3.c
··· 229 229 /* 230 230 * If we are emulating a GICv3, we do it in an non-GICv2-compatible 231 231 * way, so we force SRE to 1 to demonstrate this to the guest. 232 + * Also, we don't support any form of IRQ/FIQ bypass. 232 233 * This goes with the spec allowing the value to be RAO/WI. 233 234 */ 234 235 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { 235 - vgic_v3->vgic_sre = ICC_SRE_EL1_SRE; 236 + vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB | 237 + ICC_SRE_EL1_DFB | 238 + ICC_SRE_EL1_SRE); 236 239 vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; 237 240 } else { 238 241 vgic_v3->vgic_sre = 0;