Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] KVM: Improve interrupt response

The current interrupt injection mechanism might delay an interrupt under
the following circumstances:

- if injection fails because the guest is not interruptible (rflags.IF clear,
or after a 'mov ss' or 'sti' instruction). Userspace can check rflags,
but the other cases or not testable under the current API.
- if injection fails because of a fault during delivery. This probably
never happens under normal guests.
- if injection fails due to a physical interrupt causing a vmexit so that
it can be handled by the host.

In all cases the guest proceeds without processing the interrupt, reducing
the interactive feel and interrupt throughput of the guest.

This patch fixes the situation by allowing userspace to request an exit
when the 'interrupt window' opens, so that it can re-inject the interrupt
at the right time. Guest interactivity is very visibly improved.

Signed-off-by: Dor Laor <dor.laor@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Dor Laor and committed by
Linus Torvalds
c1150d8c e097f35c

+180 -28
+4
drivers/kvm/kvm.h
··· 173 173 struct mutex mutex; 174 174 int cpu; 175 175 int launched; 176 + int interrupt_window_open; 176 177 unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ 177 178 #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) 178 179 unsigned long irq_pending[NR_IRQ_WORDS]; ··· 248 247 u32 io_exits; 249 248 u32 mmio_exits; 250 249 u32 signal_exits; 250 + u32 irq_window_exits; 251 + u32 halt_exits; 252 + u32 request_irq_exits; 251 253 u32 irq_exits; 252 254 }; 253 255
+7 -4
drivers/kvm/kvm_main.c
··· 58 58 { "io_exits", &kvm_stat.io_exits }, 59 59 { "mmio_exits", &kvm_stat.mmio_exits }, 60 60 { "signal_exits", &kvm_stat.signal_exits }, 61 + { "irq_window", &kvm_stat.irq_window_exits }, 62 + { "halt_exits", &kvm_stat.halt_exits }, 63 + { "request_irq", &kvm_stat.request_irq_exits }, 61 64 { "irq_exits", &kvm_stat.irq_exits }, 62 65 { 0, 0 } 63 66 }; ··· 1696 1693 if (copy_from_user(&kvm_run, (void *)arg, sizeof kvm_run)) 1697 1694 goto out; 1698 1695 r = kvm_dev_ioctl_run(kvm, &kvm_run); 1699 - if (r < 0) 1696 + if (r < 0 && r != -EINTR) 1700 1697 goto out; 1701 - r = -EFAULT; 1702 - if (copy_to_user((void *)arg, &kvm_run, sizeof kvm_run)) 1698 + if (copy_to_user((void *)arg, &kvm_run, sizeof kvm_run)) { 1699 + r = -EFAULT; 1703 1700 goto out; 1704 - r = 0; 1701 + } 1705 1702 break; 1706 1703 } 1707 1704 case KVM_GET_REGS: {
+87 -7
drivers/kvm/svm.c
··· 235 235 236 236 vcpu->rip = vcpu->svm->vmcb->save.rip = vcpu->svm->next_rip; 237 237 vcpu->svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; 238 + 239 + vcpu->interrupt_window_open = 1; 238 240 } 239 241 240 242 static int has_svm(void) ··· 1033 1031 { 1034 1032 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1; 1035 1033 skip_emulated_instruction(vcpu); 1036 - if (vcpu->irq_summary && (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)) 1034 + if (vcpu->irq_summary) 1037 1035 return 1; 1038 1036 1039 1037 kvm_run->exit_reason = KVM_EXIT_HLT; 1038 + ++kvm_stat.halt_exits; 1040 1039 return 0; 1041 1040 } 1042 1041 ··· 1189 1186 return rdmsr_interception(vcpu, kvm_run); 1190 1187 } 1191 1188 1189 + static int interrupt_window_interception(struct kvm_vcpu *vcpu, 1190 + struct kvm_run *kvm_run) 1191 + { 1192 + /* 1193 + * If the user space waits to inject interrupts, exit as soon as 1194 + * possible 1195 + */ 1196 + if (kvm_run->request_interrupt_window && 1197 + !vcpu->irq_summary && 1198 + (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)) { 1199 + ++kvm_stat.irq_window_exits; 1200 + kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 1201 + return 0; 1202 + } 1203 + 1204 + return 1; 1205 + } 1206 + 1192 1207 static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, 1193 1208 struct kvm_run *kvm_run) = { 1194 1209 [SVM_EXIT_READ_CR0] = emulate_on_interception, ··· 1231 1210 [SVM_EXIT_NMI] = nop_on_interception, 1232 1211 [SVM_EXIT_SMI] = nop_on_interception, 1233 1212 [SVM_EXIT_INIT] = nop_on_interception, 1213 + [SVM_EXIT_VINTR] = interrupt_window_interception, 1234 1214 /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ 1235 1215 [SVM_EXIT_CPUID] = cpuid_interception, 1236 1216 [SVM_EXIT_HLT] = halt_interception, ··· 1300 1278 } 1301 1279 1302 1280 1303 - static inline void kvm_try_inject_irq(struct kvm_vcpu *vcpu) 1281 + static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu) 1304 1282 { 1305 1283 struct vmcb_control_area *control; 1306 1284 1307 - if (!vcpu->irq_summary) 1308 - return; 1309 - 1310 1285 control = &vcpu->svm->vmcb->control; 1311 - 1312 1286 control->int_vector = pop_irq(vcpu); 1313 1287 control->int_ctl &= ~V_INTR_PRIO_MASK; 1314 1288 control->int_ctl |= V_IRQ_MASK | ··· 1319 1301 control->int_ctl &= ~V_IRQ_MASK; 1320 1302 push_irq(vcpu, control->int_vector); 1321 1303 } 1304 + 1305 + vcpu->interrupt_window_open = 1306 + !(control->int_state & SVM_INTERRUPT_SHADOW_MASK); 1307 + } 1308 + 1309 + static void do_interrupt_requests(struct kvm_vcpu *vcpu, 1310 + struct kvm_run *kvm_run) 1311 + { 1312 + struct vmcb_control_area *control = &vcpu->svm->vmcb->control; 1313 + 1314 + vcpu->interrupt_window_open = 1315 + (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && 1316 + (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)); 1317 + 1318 + if (vcpu->interrupt_window_open && vcpu->irq_summary) 1319 + /* 1320 + * If interrupts enabled, and not blocked by sti or mov ss. Good. 1321 + */ 1322 + kvm_do_inject_irq(vcpu); 1323 + 1324 + /* 1325 + * Interrupts blocked. Wait for unblock. 1326 + */ 1327 + if (!vcpu->interrupt_window_open && 1328 + (vcpu->irq_summary || kvm_run->request_interrupt_window)) { 1329 + control->intercept |= 1ULL << INTERCEPT_VINTR; 1330 + } else 1331 + control->intercept &= ~(1ULL << INTERCEPT_VINTR); 1332 + } 1333 + 1334 + static void post_kvm_run_save(struct kvm_vcpu *vcpu, 1335 + struct kvm_run *kvm_run) 1336 + { 1337 + kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open && 1338 + vcpu->irq_summary == 0); 1339 + kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0; 1340 + kvm_run->cr8 = vcpu->cr8; 1341 + kvm_run->apic_base = vcpu->apic_base; 1342 + } 1343 + 1344 + /* 1345 + * Check if userspace requested an interrupt window, and that the 1346 + * interrupt window is open. 1347 + * 1348 + * No need to exit to userspace if we already have an interrupt queued. 1349 + */ 1350 + static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, 1351 + struct kvm_run *kvm_run) 1352 + { 1353 + return (!vcpu->irq_summary && 1354 + kvm_run->request_interrupt_window && 1355 + vcpu->interrupt_window_open && 1356 + (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)); 1322 1357 } 1323 1358 1324 1359 static void save_db_regs(unsigned long *db_regs) ··· 1397 1326 u16 ldt_selector; 1398 1327 1399 1328 again: 1400 - kvm_try_inject_irq(vcpu); 1329 + do_interrupt_requests(vcpu, kvm_run); 1401 1330 1402 1331 clgi(); 1403 1332 ··· 1558 1487 if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 1559 1488 kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; 1560 1489 kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code; 1490 + post_kvm_run_save(vcpu, kvm_run); 1561 1491 return 0; 1562 1492 } 1563 1493 1564 1494 if (handle_exit(vcpu, kvm_run)) { 1565 1495 if (signal_pending(current)) { 1566 1496 ++kvm_stat.signal_exits; 1497 + post_kvm_run_save(vcpu, kvm_run); 1498 + return -EINTR; 1499 + } 1500 + 1501 + if (dm_request_for_irq_injection(vcpu, kvm_run)) { 1502 + ++kvm_stat.request_irq_exits; 1503 + post_kvm_run_save(vcpu, kvm_run); 1567 1504 return -EINTR; 1568 1505 } 1569 1506 kvm_resched(vcpu); 1570 1507 goto again; 1571 1508 } 1509 + post_kvm_run_save(vcpu, kvm_run); 1572 1510 return 0; 1573 1511 } 1574 1512
+72 -16
drivers/kvm/vmx.c
··· 263 263 if (interruptibility & 3) 264 264 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 265 265 interruptibility & ~3); 266 + vcpu->interrupt_window_open = 1; 266 267 } 267 268 268 269 static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) ··· 1215 1214 irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); 1216 1215 } 1217 1216 1218 - static void kvm_try_inject_irq(struct kvm_vcpu *vcpu) 1217 + 1218 + static void do_interrupt_requests(struct kvm_vcpu *vcpu, 1219 + struct kvm_run *kvm_run) 1219 1220 { 1220 - if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) 1221 - && (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0) 1221 + u32 cpu_based_vm_exec_control; 1222 + 1223 + vcpu->interrupt_window_open = 1224 + ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && 1225 + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); 1226 + 1227 + if (vcpu->interrupt_window_open && 1228 + vcpu->irq_summary && 1229 + !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK)) 1222 1230 /* 1223 - * Interrupts enabled, and not blocked by sti or mov ss. Good. 1231 + * If interrupts enabled, and not blocked by sti or mov ss. Good. 1224 1232 */ 1225 1233 kvm_do_inject_irq(vcpu); 1226 - else 1234 + 1235 + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 1236 + if (!vcpu->interrupt_window_open && 1237 + (vcpu->irq_summary || kvm_run->request_interrupt_window)) 1227 1238 /* 1228 1239 * Interrupts blocked. Wait for unblock. 1229 1240 */ 1230 - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1231 - vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) 1232 - | CPU_BASED_VIRTUAL_INTR_PENDING); 1241 + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; 1242 + else 1243 + cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; 1244 + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 1233 1245 } 1234 1246 1235 1247 static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu) ··· 1579 1565 return 1; 1580 1566 } 1581 1567 1568 + static void post_kvm_run_save(struct kvm_vcpu *vcpu, 1569 + struct kvm_run *kvm_run) 1570 + { 1571 + kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0; 1572 + kvm_run->cr8 = vcpu->cr8; 1573 + kvm_run->apic_base = vcpu->apic_base; 1574 + kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open && 1575 + vcpu->irq_summary == 0); 1576 + } 1577 + 1582 1578 static int handle_interrupt_window(struct kvm_vcpu *vcpu, 1583 1579 struct kvm_run *kvm_run) 1584 1580 { 1585 - /* Turn off interrupt window reporting. */ 1586 - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1587 - vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) 1588 - & ~CPU_BASED_VIRTUAL_INTR_PENDING); 1581 + /* 1582 + * If the user space waits to inject interrupts, exit as soon as 1583 + * possible 1584 + */ 1585 + if (kvm_run->request_interrupt_window && 1586 + !vcpu->irq_summary && 1587 + (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)) { 1588 + kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 1589 + ++kvm_stat.irq_window_exits; 1590 + return 0; 1591 + } 1589 1592 return 1; 1590 1593 } 1591 1594 1592 1595 static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1593 1596 { 1594 1597 skip_emulated_instruction(vcpu); 1595 - if (vcpu->irq_summary && (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)) 1598 + if (vcpu->irq_summary) 1596 1599 return 1; 1597 1600 1598 1601 kvm_run->exit_reason = KVM_EXIT_HLT; 1602 + ++kvm_stat.halt_exits; 1599 1603 return 0; 1600 1604 } 1601 1605 ··· 1664 1632 return 0; 1665 1633 } 1666 1634 1635 + /* 1636 + * Check if userspace requested an interrupt window, and that the 1637 + * interrupt window is open. 1638 + * 1639 + * No need to exit to userspace if we already have an interrupt queued. 1640 + */ 1641 + static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, 1642 + struct kvm_run *kvm_run) 1643 + { 1644 + return (!vcpu->irq_summary && 1645 + kvm_run->request_interrupt_window && 1646 + vcpu->interrupt_window_open && 1647 + (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); 1648 + } 1649 + 1667 1650 static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1668 1651 { 1669 1652 u8 fail; ··· 1710 1663 vmcs_writel(HOST_GS_BASE, segment_base(gs_sel)); 1711 1664 #endif 1712 1665 1713 - if (vcpu->irq_summary && 1714 - !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK)) 1715 - kvm_try_inject_irq(vcpu); 1666 + do_interrupt_requests(vcpu, kvm_run); 1716 1667 1717 1668 if (vcpu->guest_debug.enabled) 1718 1669 kvm_guest_debug_pre(vcpu); ··· 1847 1802 1848 1803 fx_save(vcpu->guest_fx_image); 1849 1804 fx_restore(vcpu->host_fx_image); 1805 + vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; 1850 1806 1851 1807 #ifndef CONFIG_X86_64 1852 1808 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); ··· 1880 1834 /* Give scheduler a change to reschedule. */ 1881 1835 if (signal_pending(current)) { 1882 1836 ++kvm_stat.signal_exits; 1837 + post_kvm_run_save(vcpu, kvm_run); 1883 1838 return -EINTR; 1884 1839 } 1840 + 1841 + if (dm_request_for_irq_injection(vcpu, kvm_run)) { 1842 + ++kvm_stat.request_irq_exits; 1843 + post_kvm_run_save(vcpu, kvm_run); 1844 + return -EINTR; 1845 + } 1846 + 1885 1847 kvm_resched(vcpu); 1886 1848 goto again; 1887 1849 } 1888 1850 } 1851 + 1852 + post_kvm_run_save(vcpu, kvm_run); 1889 1853 return 0; 1890 1854 } 1891 1855
+10 -1
include/linux/kvm.h
··· 11 11 #include <asm/types.h> 12 12 #include <linux/ioctl.h> 13 13 14 - #define KVM_API_VERSION 1 14 + #define KVM_API_VERSION 2 15 15 16 16 /* 17 17 * Architectural interrupt line count, and the size of the bitmap needed ··· 45 45 KVM_EXIT_DEBUG = 4, 46 46 KVM_EXIT_HLT = 5, 47 47 KVM_EXIT_MMIO = 6, 48 + KVM_EXIT_IRQ_WINDOW_OPEN = 7, 48 49 }; 49 50 50 51 /* for KVM_RUN */ ··· 54 53 __u32 vcpu; 55 54 __u32 emulated; /* skip current instruction */ 56 55 __u32 mmio_completed; /* mmio request completed */ 56 + __u8 request_interrupt_window; 57 + __u8 padding1[3]; 57 58 58 59 /* out */ 59 60 __u32 exit_type; 60 61 __u32 exit_reason; 61 62 __u32 instruction_length; 63 + __u8 ready_for_interrupt_injection; 64 + __u8 if_flag; 65 + __u16 padding2; 66 + __u64 cr8; 67 + __u64 apic_base; 68 + 62 69 union { 63 70 /* KVM_EXIT_UNKNOWN */ 64 71 struct {