Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.20-rc6 1706 lines 43 kB view raw
1/* 2 * Kernel-based Virtual Machine driver for Linux 3 * 4 * AMD SVM support 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 * 8 * Authors: 9 * Yaniv Kamay <yaniv@qumranet.com> 10 * Avi Kivity <avi@qumranet.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2. See 13 * the COPYING file in the top-level directory. 14 * 15 */ 16 17#include <linux/module.h> 18#include <linux/vmalloc.h> 19#include <linux/highmem.h> 20#include <linux/profile.h> 21#include <asm/desc.h> 22 23#include "kvm_svm.h" 24#include "x86_emulate.h" 25 26MODULE_AUTHOR("Qumranet"); 27MODULE_LICENSE("GPL"); 28 29#define IOPM_ALLOC_ORDER 2 30#define MSRPM_ALLOC_ORDER 1 31 32#define DB_VECTOR 1 33#define UD_VECTOR 6 34#define GP_VECTOR 13 35 36#define DR7_GD_MASK (1 << 13) 37#define DR6_BD_MASK (1 << 13) 38#define CR4_DE_MASK (1UL << 3) 39 40#define SEG_TYPE_LDT 2 41#define SEG_TYPE_BUSY_TSS16 3 42 43#define KVM_EFER_LMA (1 << 10) 44#define KVM_EFER_LME (1 << 8) 45 46unsigned long iopm_base; 47unsigned long msrpm_base; 48 49struct kvm_ldttss_desc { 50 u16 limit0; 51 u16 base0; 52 unsigned base1 : 8, type : 5, dpl : 2, p : 1; 53 unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; 54 u32 base3; 55 u32 zero1; 56} __attribute__((packed)); 57 58struct svm_cpu_data { 59 int cpu; 60 61 uint64_t asid_generation; 62 uint32_t max_asid; 63 uint32_t next_asid; 64 struct kvm_ldttss_desc *tss_desc; 65 66 struct page *save_area; 67}; 68 69static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); 70 71struct svm_init_data { 72 int cpu; 73 int r; 74}; 75 76static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; 77 78#define NUM_MSR_MAPS (sizeof(msrpm_ranges) / sizeof(*msrpm_ranges)) 79#define MSRS_RANGE_SIZE 2048 80#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) 81 82#define MAX_INST_SIZE 15 83 84static unsigned get_addr_size(struct kvm_vcpu *vcpu) 85{ 86 struct vmcb_save_area *sa = &vcpu->svm->vmcb->save; 87 u16 cs_attrib; 88 89 if (!(sa->cr0 & CR0_PE_MASK) || (sa->rflags & X86_EFLAGS_VM)) 90 return 2; 91 92 cs_attrib = sa->cs.attrib; 93 94 return (cs_attrib & SVM_SELECTOR_L_MASK) ? 8 : 95 (cs_attrib & SVM_SELECTOR_DB_MASK) ? 4 : 2; 96} 97 98static inline u8 pop_irq(struct kvm_vcpu *vcpu) 99{ 100 int word_index = __ffs(vcpu->irq_summary); 101 int bit_index = __ffs(vcpu->irq_pending[word_index]); 102 int irq = word_index * BITS_PER_LONG + bit_index; 103 104 clear_bit(bit_index, &vcpu->irq_pending[word_index]); 105 if (!vcpu->irq_pending[word_index]) 106 clear_bit(word_index, &vcpu->irq_summary); 107 return irq; 108} 109 110static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq) 111{ 112 set_bit(irq, vcpu->irq_pending); 113 set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); 114} 115 116static inline void clgi(void) 117{ 118 asm volatile (SVM_CLGI); 119} 120 121static inline void stgi(void) 122{ 123 asm volatile (SVM_STGI); 124} 125 126static inline void invlpga(unsigned long addr, u32 asid) 127{ 128 asm volatile (SVM_INVLPGA :: "a"(addr), "c"(asid)); 129} 130 131static inline unsigned long kvm_read_cr2(void) 132{ 133 unsigned long cr2; 134 135 asm volatile ("mov %%cr2, %0" : "=r" (cr2)); 136 return cr2; 137} 138 139static inline void kvm_write_cr2(unsigned long val) 140{ 141 asm volatile ("mov %0, %%cr2" :: "r" (val)); 142} 143 144static inline unsigned long read_dr6(void) 145{ 146 unsigned long dr6; 147 148 asm volatile ("mov %%dr6, %0" : "=r" (dr6)); 149 return dr6; 150} 151 152static inline void write_dr6(unsigned long val) 153{ 154 asm volatile ("mov %0, %%dr6" :: "r" (val)); 155} 156 157static inline unsigned long read_dr7(void) 158{ 159 unsigned long dr7; 160 161 asm volatile ("mov %%dr7, %0" : "=r" (dr7)); 162 return dr7; 163} 164 165static inline void write_dr7(unsigned long val) 166{ 167 asm volatile ("mov %0, %%dr7" :: "r" (val)); 168} 169 170static inline void force_new_asid(struct kvm_vcpu *vcpu) 171{ 172 vcpu->svm->asid_generation--; 173} 174 175static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) 176{ 177 force_new_asid(vcpu); 178} 179 180static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) 181{ 182 if (!(efer & KVM_EFER_LMA)) 183 efer &= ~KVM_EFER_LME; 184 185 vcpu->svm->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; 186 vcpu->shadow_efer = efer; 187} 188 189static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) 190{ 191 vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | 192 SVM_EVTINJ_VALID_ERR | 193 SVM_EVTINJ_TYPE_EXEPT | 194 GP_VECTOR; 195 vcpu->svm->vmcb->control.event_inj_err = error_code; 196} 197 198static void inject_ud(struct kvm_vcpu *vcpu) 199{ 200 vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | 201 SVM_EVTINJ_TYPE_EXEPT | 202 UD_VECTOR; 203} 204 205static void inject_db(struct kvm_vcpu *vcpu) 206{ 207 vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | 208 SVM_EVTINJ_TYPE_EXEPT | 209 DB_VECTOR; 210} 211 212static int is_page_fault(uint32_t info) 213{ 214 info &= SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; 215 return info == (PF_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT); 216} 217 218static int is_external_interrupt(u32 info) 219{ 220 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; 221 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); 222} 223 224static void skip_emulated_instruction(struct kvm_vcpu *vcpu) 225{ 226 if (!vcpu->svm->next_rip) { 227 printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__); 228 return; 229 } 230 if (vcpu->svm->next_rip - vcpu->svm->vmcb->save.rip > 15) { 231 printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", 232 __FUNCTION__, 233 vcpu->svm->vmcb->save.rip, 234 vcpu->svm->next_rip); 235 } 236 237 vcpu->rip = vcpu->svm->vmcb->save.rip = vcpu->svm->next_rip; 238 vcpu->svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; 239 240 vcpu->interrupt_window_open = 1; 241} 242 243static int has_svm(void) 244{ 245 uint32_t eax, ebx, ecx, edx; 246 247 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { 248 printk(KERN_INFO "has_svm: not amd\n"); 249 return 0; 250 } 251 252 cpuid(0x80000000, &eax, &ebx, &ecx, &edx); 253 if (eax < SVM_CPUID_FUNC) { 254 printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n"); 255 return 0; 256 } 257 258 cpuid(0x80000001, &eax, &ebx, &ecx, &edx); 259 if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { 260 printk(KERN_DEBUG "has_svm: svm not available\n"); 261 return 0; 262 } 263 return 1; 264} 265 266static void svm_hardware_disable(void *garbage) 267{ 268 struct svm_cpu_data *svm_data 269 = per_cpu(svm_data, raw_smp_processor_id()); 270 271 if (svm_data) { 272 uint64_t efer; 273 274 wrmsrl(MSR_VM_HSAVE_PA, 0); 275 rdmsrl(MSR_EFER, efer); 276 wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); 277 per_cpu(svm_data, raw_smp_processor_id()) = 0; 278 __free_page(svm_data->save_area); 279 kfree(svm_data); 280 } 281} 282 283static void svm_hardware_enable(void *garbage) 284{ 285 286 struct svm_cpu_data *svm_data; 287 uint64_t efer; 288#ifdef CONFIG_X86_64 289 struct desc_ptr gdt_descr; 290#else 291 struct Xgt_desc_struct gdt_descr; 292#endif 293 struct desc_struct *gdt; 294 int me = raw_smp_processor_id(); 295 296 if (!has_svm()) { 297 printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); 298 return; 299 } 300 svm_data = per_cpu(svm_data, me); 301 302 if (!svm_data) { 303 printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", 304 me); 305 return; 306 } 307 308 svm_data->asid_generation = 1; 309 svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; 310 svm_data->next_asid = svm_data->max_asid + 1; 311 312 asm volatile ( "sgdt %0" : "=m"(gdt_descr) ); 313 gdt = (struct desc_struct *)gdt_descr.address; 314 svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); 315 316 rdmsrl(MSR_EFER, efer); 317 wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK); 318 319 wrmsrl(MSR_VM_HSAVE_PA, 320 page_to_pfn(svm_data->save_area) << PAGE_SHIFT); 321} 322 323static int svm_cpu_init(int cpu) 324{ 325 struct svm_cpu_data *svm_data; 326 int r; 327 328 svm_data = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); 329 if (!svm_data) 330 return -ENOMEM; 331 svm_data->cpu = cpu; 332 svm_data->save_area = alloc_page(GFP_KERNEL); 333 r = -ENOMEM; 334 if (!svm_data->save_area) 335 goto err_1; 336 337 per_cpu(svm_data, cpu) = svm_data; 338 339 return 0; 340 341err_1: 342 kfree(svm_data); 343 return r; 344 345} 346 347static int set_msr_interception(u32 *msrpm, unsigned msr, 348 int read, int write) 349{ 350 int i; 351 352 for (i = 0; i < NUM_MSR_MAPS; i++) { 353 if (msr >= msrpm_ranges[i] && 354 msr < msrpm_ranges[i] + MSRS_IN_RANGE) { 355 u32 msr_offset = (i * MSRS_IN_RANGE + msr - 356 msrpm_ranges[i]) * 2; 357 358 u32 *base = msrpm + (msr_offset / 32); 359 u32 msr_shift = msr_offset % 32; 360 u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); 361 *base = (*base & ~(0x3 << msr_shift)) | 362 (mask << msr_shift); 363 return 1; 364 } 365 } 366 printk(KERN_DEBUG "%s: not found 0x%x\n", __FUNCTION__, msr); 367 return 0; 368} 369 370static __init int svm_hardware_setup(void) 371{ 372 int cpu; 373 struct page *iopm_pages; 374 struct page *msrpm_pages; 375 void *msrpm_va; 376 int r; 377 378 kvm_emulator_want_group7_invlpg(); 379 380 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); 381 382 if (!iopm_pages) 383 return -ENOMEM; 384 memset(page_address(iopm_pages), 0xff, 385 PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); 386 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; 387 388 389 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 390 391 r = -ENOMEM; 392 if (!msrpm_pages) 393 goto err_1; 394 395 msrpm_va = page_address(msrpm_pages); 396 memset(msrpm_va, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); 397 msrpm_base = page_to_pfn(msrpm_pages) << PAGE_SHIFT; 398 399#ifdef CONFIG_X86_64 400 set_msr_interception(msrpm_va, MSR_GS_BASE, 1, 1); 401 set_msr_interception(msrpm_va, MSR_FS_BASE, 1, 1); 402 set_msr_interception(msrpm_va, MSR_KERNEL_GS_BASE, 1, 1); 403 set_msr_interception(msrpm_va, MSR_LSTAR, 1, 1); 404 set_msr_interception(msrpm_va, MSR_CSTAR, 1, 1); 405 set_msr_interception(msrpm_va, MSR_SYSCALL_MASK, 1, 1); 406#endif 407 set_msr_interception(msrpm_va, MSR_K6_STAR, 1, 1); 408 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_CS, 1, 1); 409 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_ESP, 1, 1); 410 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_EIP, 1, 1); 411 412 for_each_online_cpu(cpu) { 413 r = svm_cpu_init(cpu); 414 if (r) 415 goto err_2; 416 } 417 return 0; 418 419err_2: 420 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); 421 msrpm_base = 0; 422err_1: 423 __free_pages(iopm_pages, IOPM_ALLOC_ORDER); 424 iopm_base = 0; 425 return r; 426} 427 428static __exit void svm_hardware_unsetup(void) 429{ 430 __free_pages(pfn_to_page(msrpm_base >> PAGE_SHIFT), MSRPM_ALLOC_ORDER); 431 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); 432 iopm_base = msrpm_base = 0; 433} 434 435static void init_seg(struct vmcb_seg *seg) 436{ 437 seg->selector = 0; 438 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | 439 SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ 440 seg->limit = 0xffff; 441 seg->base = 0; 442} 443 444static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) 445{ 446 seg->selector = 0; 447 seg->attrib = SVM_SELECTOR_P_MASK | type; 448 seg->limit = 0xffff; 449 seg->base = 0; 450} 451 452static int svm_vcpu_setup(struct kvm_vcpu *vcpu) 453{ 454 return 0; 455} 456 457static void init_vmcb(struct vmcb *vmcb) 458{ 459 struct vmcb_control_area *control = &vmcb->control; 460 struct vmcb_save_area *save = &vmcb->save; 461 u64 tsc; 462 463 control->intercept_cr_read = INTERCEPT_CR0_MASK | 464 INTERCEPT_CR3_MASK | 465 INTERCEPT_CR4_MASK; 466 467 control->intercept_cr_write = INTERCEPT_CR0_MASK | 468 INTERCEPT_CR3_MASK | 469 INTERCEPT_CR4_MASK; 470 471 control->intercept_dr_read = INTERCEPT_DR0_MASK | 472 INTERCEPT_DR1_MASK | 473 INTERCEPT_DR2_MASK | 474 INTERCEPT_DR3_MASK; 475 476 control->intercept_dr_write = INTERCEPT_DR0_MASK | 477 INTERCEPT_DR1_MASK | 478 INTERCEPT_DR2_MASK | 479 INTERCEPT_DR3_MASK | 480 INTERCEPT_DR5_MASK | 481 INTERCEPT_DR7_MASK; 482 483 control->intercept_exceptions = 1 << PF_VECTOR; 484 485 486 control->intercept = (1ULL << INTERCEPT_INTR) | 487 (1ULL << INTERCEPT_NMI) | 488 /* 489 * selective cr0 intercept bug? 490 * 0: 0f 22 d8 mov %eax,%cr3 491 * 3: 0f 20 c0 mov %cr0,%eax 492 * 6: 0d 00 00 00 80 or $0x80000000,%eax 493 * b: 0f 22 c0 mov %eax,%cr0 494 * set cr3 ->interception 495 * get cr0 ->interception 496 * set cr0 -> no interception 497 */ 498 /* (1ULL << INTERCEPT_SELECTIVE_CR0) | */ 499 (1ULL << INTERCEPT_CPUID) | 500 (1ULL << INTERCEPT_HLT) | 501 (1ULL << INTERCEPT_INVLPGA) | 502 (1ULL << INTERCEPT_IOIO_PROT) | 503 (1ULL << INTERCEPT_MSR_PROT) | 504 (1ULL << INTERCEPT_TASK_SWITCH) | 505 (1ULL << INTERCEPT_VMRUN) | 506 (1ULL << INTERCEPT_VMMCALL) | 507 (1ULL << INTERCEPT_VMLOAD) | 508 (1ULL << INTERCEPT_VMSAVE) | 509 (1ULL << INTERCEPT_STGI) | 510 (1ULL << INTERCEPT_CLGI) | 511 (1ULL << INTERCEPT_SKINIT); 512 513 control->iopm_base_pa = iopm_base; 514 control->msrpm_base_pa = msrpm_base; 515 rdtscll(tsc); 516 control->tsc_offset = -tsc; 517 control->int_ctl = V_INTR_MASKING_MASK; 518 519 init_seg(&save->es); 520 init_seg(&save->ss); 521 init_seg(&save->ds); 522 init_seg(&save->fs); 523 init_seg(&save->gs); 524 525 save->cs.selector = 0xf000; 526 /* Executable/Readable Code Segment */ 527 save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | 528 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; 529 save->cs.limit = 0xffff; 530 save->cs.base = 0xffff0000; 531 532 save->gdtr.limit = 0xffff; 533 save->idtr.limit = 0xffff; 534 535 init_sys_seg(&save->ldtr, SEG_TYPE_LDT); 536 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); 537 538 save->efer = MSR_EFER_SVME_MASK; 539 540 save->dr6 = 0xffff0ff0; 541 save->dr7 = 0x400; 542 save->rflags = 2; 543 save->rip = 0x0000fff0; 544 545 /* 546 * cr0 val on cpu init should be 0x60000010, we enable cpu 547 * cache by default. the orderly way is to enable cache in bios. 548 */ 549 save->cr0 = 0x00000010 | CR0_PG_MASK; 550 save->cr4 = CR4_PAE_MASK; 551 /* rdx = ?? */ 552} 553 554static int svm_create_vcpu(struct kvm_vcpu *vcpu) 555{ 556 struct page *page; 557 int r; 558 559 r = -ENOMEM; 560 vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL); 561 if (!vcpu->svm) 562 goto out1; 563 page = alloc_page(GFP_KERNEL); 564 if (!page) 565 goto out2; 566 567 vcpu->svm->vmcb = page_address(page); 568 memset(vcpu->svm->vmcb, 0, PAGE_SIZE); 569 vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; 570 vcpu->svm->cr0 = 0x00000010; 571 vcpu->svm->asid_generation = 0; 572 memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs)); 573 init_vmcb(vcpu->svm->vmcb); 574 575 fx_init(vcpu); 576 577 return 0; 578 579out2: 580 kfree(vcpu->svm); 581out1: 582 return r; 583} 584 585static void svm_free_vcpu(struct kvm_vcpu *vcpu) 586{ 587 if (!vcpu->svm) 588 return; 589 if (vcpu->svm->vmcb) 590 __free_page(pfn_to_page(vcpu->svm->vmcb_pa >> PAGE_SHIFT)); 591 kfree(vcpu->svm); 592} 593 594static struct kvm_vcpu *svm_vcpu_load(struct kvm_vcpu *vcpu) 595{ 596 get_cpu(); 597 return vcpu; 598} 599 600static void svm_vcpu_put(struct kvm_vcpu *vcpu) 601{ 602 put_cpu(); 603} 604 605static void svm_cache_regs(struct kvm_vcpu *vcpu) 606{ 607 vcpu->regs[VCPU_REGS_RAX] = vcpu->svm->vmcb->save.rax; 608 vcpu->regs[VCPU_REGS_RSP] = vcpu->svm->vmcb->save.rsp; 609 vcpu->rip = vcpu->svm->vmcb->save.rip; 610} 611 612static void svm_decache_regs(struct kvm_vcpu *vcpu) 613{ 614 vcpu->svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX]; 615 vcpu->svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP]; 616 vcpu->svm->vmcb->save.rip = vcpu->rip; 617} 618 619static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) 620{ 621 return vcpu->svm->vmcb->save.rflags; 622} 623 624static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 625{ 626 vcpu->svm->vmcb->save.rflags = rflags; 627} 628 629static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) 630{ 631 struct vmcb_save_area *save = &vcpu->svm->vmcb->save; 632 633 switch (seg) { 634 case VCPU_SREG_CS: return &save->cs; 635 case VCPU_SREG_DS: return &save->ds; 636 case VCPU_SREG_ES: return &save->es; 637 case VCPU_SREG_FS: return &save->fs; 638 case VCPU_SREG_GS: return &save->gs; 639 case VCPU_SREG_SS: return &save->ss; 640 case VCPU_SREG_TR: return &save->tr; 641 case VCPU_SREG_LDTR: return &save->ldtr; 642 } 643 BUG(); 644 return 0; 645} 646 647static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg) 648{ 649 struct vmcb_seg *s = svm_seg(vcpu, seg); 650 651 return s->base; 652} 653 654static void svm_get_segment(struct kvm_vcpu *vcpu, 655 struct kvm_segment *var, int seg) 656{ 657 struct vmcb_seg *s = svm_seg(vcpu, seg); 658 659 var->base = s->base; 660 var->limit = s->limit; 661 var->selector = s->selector; 662 var->type = s->attrib & SVM_SELECTOR_TYPE_MASK; 663 var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1; 664 var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; 665 var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1; 666 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1; 667 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; 668 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 669 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; 670 var->unusable = !var->present; 671} 672 673static void svm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 674{ 675 struct vmcb_seg *s = svm_seg(vcpu, VCPU_SREG_CS); 676 677 *db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 678 *l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; 679} 680 681static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 682{ 683 dt->limit = vcpu->svm->vmcb->save.ldtr.limit; 684 dt->base = vcpu->svm->vmcb->save.ldtr.base; 685} 686 687static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 688{ 689 vcpu->svm->vmcb->save.ldtr.limit = dt->limit; 690 vcpu->svm->vmcb->save.ldtr.base = dt->base ; 691} 692 693static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 694{ 695 dt->limit = vcpu->svm->vmcb->save.gdtr.limit; 696 dt->base = vcpu->svm->vmcb->save.gdtr.base; 697} 698 699static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 700{ 701 vcpu->svm->vmcb->save.gdtr.limit = dt->limit; 702 vcpu->svm->vmcb->save.gdtr.base = dt->base ; 703} 704 705static void svm_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu) 706{ 707} 708 709static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 710{ 711#ifdef CONFIG_X86_64 712 if (vcpu->shadow_efer & KVM_EFER_LME) { 713 if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) { 714 vcpu->shadow_efer |= KVM_EFER_LMA; 715 vcpu->svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME; 716 } 717 718 if (is_paging(vcpu) && !(cr0 & CR0_PG_MASK) ) { 719 vcpu->shadow_efer &= ~KVM_EFER_LMA; 720 vcpu->svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME); 721 } 722 } 723#endif 724 vcpu->svm->cr0 = cr0; 725 vcpu->svm->vmcb->save.cr0 = cr0 | CR0_PG_MASK; 726 vcpu->cr0 = cr0; 727} 728 729static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 730{ 731 vcpu->cr4 = cr4; 732 vcpu->svm->vmcb->save.cr4 = cr4 | CR4_PAE_MASK; 733} 734 735static void svm_set_segment(struct kvm_vcpu *vcpu, 736 struct kvm_segment *var, int seg) 737{ 738 struct vmcb_seg *s = svm_seg(vcpu, seg); 739 740 s->base = var->base; 741 s->limit = var->limit; 742 s->selector = var->selector; 743 if (var->unusable) 744 s->attrib = 0; 745 else { 746 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); 747 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; 748 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; 749 s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT; 750 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; 751 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; 752 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; 753 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; 754 } 755 if (seg == VCPU_SREG_CS) 756 vcpu->svm->vmcb->save.cpl 757 = (vcpu->svm->vmcb->save.cs.attrib 758 >> SVM_SELECTOR_DPL_SHIFT) & 3; 759 760} 761 762/* FIXME: 763 764 vcpu->svm->vmcb->control.int_ctl &= ~V_TPR_MASK; 765 vcpu->svm->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK); 766 767*/ 768 769static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) 770{ 771 return -EOPNOTSUPP; 772} 773 774static void load_host_msrs(struct kvm_vcpu *vcpu) 775{ 776 int i; 777 778 for ( i = 0; i < NR_HOST_SAVE_MSRS; i++) 779 wrmsrl(host_save_msrs[i], vcpu->svm->host_msrs[i]); 780} 781 782static void save_host_msrs(struct kvm_vcpu *vcpu) 783{ 784 int i; 785 786 for ( i = 0; i < NR_HOST_SAVE_MSRS; i++) 787 rdmsrl(host_save_msrs[i], vcpu->svm->host_msrs[i]); 788} 789 790static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data) 791{ 792 if (svm_data->next_asid > svm_data->max_asid) { 793 ++svm_data->asid_generation; 794 svm_data->next_asid = 1; 795 vcpu->svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; 796 } 797 798 vcpu->cpu = svm_data->cpu; 799 vcpu->svm->asid_generation = svm_data->asid_generation; 800 vcpu->svm->vmcb->control.asid = svm_data->next_asid++; 801} 802 803static void svm_invlpg(struct kvm_vcpu *vcpu, gva_t address) 804{ 805 invlpga(address, vcpu->svm->vmcb->control.asid); // is needed? 806} 807 808static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) 809{ 810 return vcpu->svm->db_regs[dr]; 811} 812 813static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, 814 int *exception) 815{ 816 *exception = 0; 817 818 if (vcpu->svm->vmcb->save.dr7 & DR7_GD_MASK) { 819 vcpu->svm->vmcb->save.dr7 &= ~DR7_GD_MASK; 820 vcpu->svm->vmcb->save.dr6 |= DR6_BD_MASK; 821 *exception = DB_VECTOR; 822 return; 823 } 824 825 switch (dr) { 826 case 0 ... 3: 827 vcpu->svm->db_regs[dr] = value; 828 return; 829 case 4 ... 5: 830 if (vcpu->cr4 & CR4_DE_MASK) { 831 *exception = UD_VECTOR; 832 return; 833 } 834 case 7: { 835 if (value & ~((1ULL << 32) - 1)) { 836 *exception = GP_VECTOR; 837 return; 838 } 839 vcpu->svm->vmcb->save.dr7 = value; 840 return; 841 } 842 default: 843 printk(KERN_DEBUG "%s: unexpected dr %u\n", 844 __FUNCTION__, dr); 845 *exception = UD_VECTOR; 846 return; 847 } 848} 849 850static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 851{ 852 u32 exit_int_info = vcpu->svm->vmcb->control.exit_int_info; 853 u64 fault_address; 854 u32 error_code; 855 enum emulation_result er; 856 int r; 857 858 if (is_external_interrupt(exit_int_info)) 859 push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); 860 861 spin_lock(&vcpu->kvm->lock); 862 863 fault_address = vcpu->svm->vmcb->control.exit_info_2; 864 error_code = vcpu->svm->vmcb->control.exit_info_1; 865 r = kvm_mmu_page_fault(vcpu, fault_address, error_code); 866 if (r < 0) { 867 spin_unlock(&vcpu->kvm->lock); 868 return r; 869 } 870 if (!r) { 871 spin_unlock(&vcpu->kvm->lock); 872 return 1; 873 } 874 er = emulate_instruction(vcpu, kvm_run, fault_address, error_code); 875 spin_unlock(&vcpu->kvm->lock); 876 877 switch (er) { 878 case EMULATE_DONE: 879 return 1; 880 case EMULATE_DO_MMIO: 881 ++kvm_stat.mmio_exits; 882 kvm_run->exit_reason = KVM_EXIT_MMIO; 883 return 0; 884 case EMULATE_FAIL: 885 vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__); 886 break; 887 default: 888 BUG(); 889 } 890 891 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 892 return 0; 893} 894 895static int io_get_override(struct kvm_vcpu *vcpu, 896 struct vmcb_seg **seg, 897 int *addr_override) 898{ 899 u8 inst[MAX_INST_SIZE]; 900 unsigned ins_length; 901 gva_t rip; 902 int i; 903 904 rip = vcpu->svm->vmcb->save.rip; 905 ins_length = vcpu->svm->next_rip - rip; 906 rip += vcpu->svm->vmcb->save.cs.base; 907 908 if (ins_length > MAX_INST_SIZE) 909 printk(KERN_DEBUG 910 "%s: inst length err, cs base 0x%llx rip 0x%llx " 911 "next rip 0x%llx ins_length %u\n", 912 __FUNCTION__, 913 vcpu->svm->vmcb->save.cs.base, 914 vcpu->svm->vmcb->save.rip, 915 vcpu->svm->vmcb->control.exit_info_2, 916 ins_length); 917 918 if (kvm_read_guest(vcpu, rip, ins_length, inst) != ins_length) 919 /* #PF */ 920 return 0; 921 922 *addr_override = 0; 923 *seg = 0; 924 for (i = 0; i < ins_length; i++) 925 switch (inst[i]) { 926 case 0xf0: 927 case 0xf2: 928 case 0xf3: 929 case 0x66: 930 continue; 931 case 0x67: 932 *addr_override = 1; 933 continue; 934 case 0x2e: 935 *seg = &vcpu->svm->vmcb->save.cs; 936 continue; 937 case 0x36: 938 *seg = &vcpu->svm->vmcb->save.ss; 939 continue; 940 case 0x3e: 941 *seg = &vcpu->svm->vmcb->save.ds; 942 continue; 943 case 0x26: 944 *seg = &vcpu->svm->vmcb->save.es; 945 continue; 946 case 0x64: 947 *seg = &vcpu->svm->vmcb->save.fs; 948 continue; 949 case 0x65: 950 *seg = &vcpu->svm->vmcb->save.gs; 951 continue; 952 default: 953 return 1; 954 } 955 printk(KERN_DEBUG "%s: unexpected\n", __FUNCTION__); 956 return 0; 957} 958 959static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address) 960{ 961 unsigned long addr_mask; 962 unsigned long *reg; 963 struct vmcb_seg *seg; 964 int addr_override; 965 struct vmcb_save_area *save_area = &vcpu->svm->vmcb->save; 966 u16 cs_attrib = save_area->cs.attrib; 967 unsigned addr_size = get_addr_size(vcpu); 968 969 if (!io_get_override(vcpu, &seg, &addr_override)) 970 return 0; 971 972 if (addr_override) 973 addr_size = (addr_size == 2) ? 4: (addr_size >> 1); 974 975 if (ins) { 976 reg = &vcpu->regs[VCPU_REGS_RDI]; 977 seg = &vcpu->svm->vmcb->save.es; 978 } else { 979 reg = &vcpu->regs[VCPU_REGS_RSI]; 980 seg = (seg) ? seg : &vcpu->svm->vmcb->save.ds; 981 } 982 983 addr_mask = ~0ULL >> (64 - (addr_size * 8)); 984 985 if ((cs_attrib & SVM_SELECTOR_L_MASK) && 986 !(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_VM)) { 987 *address = (*reg & addr_mask); 988 return addr_mask; 989 } 990 991 if (!(seg->attrib & SVM_SELECTOR_P_SHIFT)) { 992 svm_inject_gp(vcpu, 0); 993 return 0; 994 } 995 996 *address = (*reg & addr_mask) + seg->base; 997 return addr_mask; 998} 999 1000static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1001{ 1002 u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug? 1003 int _in = io_info & SVM_IOIO_TYPE_MASK; 1004 1005 ++kvm_stat.io_exits; 1006 1007 vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2; 1008 1009 kvm_run->exit_reason = KVM_EXIT_IO; 1010 kvm_run->io.port = io_info >> 16; 1011 kvm_run->io.direction = (_in) ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 1012 kvm_run->io.size = ((io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT); 1013 kvm_run->io.string = (io_info & SVM_IOIO_STR_MASK) != 0; 1014 kvm_run->io.rep = (io_info & SVM_IOIO_REP_MASK) != 0; 1015 1016 if (kvm_run->io.string) { 1017 unsigned addr_mask; 1018 1019 addr_mask = io_adress(vcpu, _in, &kvm_run->io.address); 1020 if (!addr_mask) { 1021 printk(KERN_DEBUG "%s: get io address failed\n", __FUNCTION__); 1022 return 1; 1023 } 1024 1025 if (kvm_run->io.rep) { 1026 kvm_run->io.count = vcpu->regs[VCPU_REGS_RCX] & addr_mask; 1027 kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags 1028 & X86_EFLAGS_DF) != 0; 1029 } 1030 } else { 1031 kvm_run->io.value = vcpu->svm->vmcb->save.rax; 1032 } 1033 return 0; 1034} 1035 1036 1037static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1038{ 1039 return 1; 1040} 1041 1042static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1043{ 1044 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1; 1045 skip_emulated_instruction(vcpu); 1046 if (vcpu->irq_summary) 1047 return 1; 1048 1049 kvm_run->exit_reason = KVM_EXIT_HLT; 1050 ++kvm_stat.halt_exits; 1051 return 0; 1052} 1053 1054static int invalid_op_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1055{ 1056 inject_ud(vcpu); 1057 return 1; 1058} 1059 1060static int task_switch_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1061{ 1062 printk(KERN_DEBUG "%s: task swiche is unsupported\n", __FUNCTION__); 1063 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 1064 return 0; 1065} 1066 1067static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1068{ 1069 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; 1070 kvm_run->exit_reason = KVM_EXIT_CPUID; 1071 return 0; 1072} 1073 1074static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1075{ 1076 if (emulate_instruction(vcpu, 0, 0, 0) != EMULATE_DONE) 1077 printk(KERN_ERR "%s: failed\n", __FUNCTION__); 1078 return 1; 1079} 1080 1081static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) 1082{ 1083 switch (ecx) { 1084 case MSR_IA32_TIME_STAMP_COUNTER: { 1085 u64 tsc; 1086 1087 rdtscll(tsc); 1088 *data = vcpu->svm->vmcb->control.tsc_offset + tsc; 1089 break; 1090 } 1091 case MSR_K6_STAR: 1092 *data = vcpu->svm->vmcb->save.star; 1093 break; 1094#ifdef CONFIG_X86_64 1095 case MSR_LSTAR: 1096 *data = vcpu->svm->vmcb->save.lstar; 1097 break; 1098 case MSR_CSTAR: 1099 *data = vcpu->svm->vmcb->save.cstar; 1100 break; 1101 case MSR_KERNEL_GS_BASE: 1102 *data = vcpu->svm->vmcb->save.kernel_gs_base; 1103 break; 1104 case MSR_SYSCALL_MASK: 1105 *data = vcpu->svm->vmcb->save.sfmask; 1106 break; 1107#endif 1108 case MSR_IA32_SYSENTER_CS: 1109 *data = vcpu->svm->vmcb->save.sysenter_cs; 1110 break; 1111 case MSR_IA32_SYSENTER_EIP: 1112 *data = vcpu->svm->vmcb->save.sysenter_eip; 1113 break; 1114 case MSR_IA32_SYSENTER_ESP: 1115 *data = vcpu->svm->vmcb->save.sysenter_esp; 1116 break; 1117 default: 1118 return kvm_get_msr_common(vcpu, ecx, data); 1119 } 1120 return 0; 1121} 1122 1123static int rdmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1124{ 1125 u32 ecx = vcpu->regs[VCPU_REGS_RCX]; 1126 u64 data; 1127 1128 if (svm_get_msr(vcpu, ecx, &data)) 1129 svm_inject_gp(vcpu, 0); 1130 else { 1131 vcpu->svm->vmcb->save.rax = data & 0xffffffff; 1132 vcpu->regs[VCPU_REGS_RDX] = data >> 32; 1133 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; 1134 skip_emulated_instruction(vcpu); 1135 } 1136 return 1; 1137} 1138 1139static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) 1140{ 1141 switch (ecx) { 1142 case MSR_IA32_TIME_STAMP_COUNTER: { 1143 u64 tsc; 1144 1145 rdtscll(tsc); 1146 vcpu->svm->vmcb->control.tsc_offset = data - tsc; 1147 break; 1148 } 1149 case MSR_K6_STAR: 1150 vcpu->svm->vmcb->save.star = data; 1151 break; 1152#ifdef CONFIG_X86_64_ 1153 case MSR_LSTAR: 1154 vcpu->svm->vmcb->save.lstar = data; 1155 break; 1156 case MSR_CSTAR: 1157 vcpu->svm->vmcb->save.cstar = data; 1158 break; 1159 case MSR_KERNEL_GS_BASE: 1160 vcpu->svm->vmcb->save.kernel_gs_base = data; 1161 break; 1162 case MSR_SYSCALL_MASK: 1163 vcpu->svm->vmcb->save.sfmask = data; 1164 break; 1165#endif 1166 case MSR_IA32_SYSENTER_CS: 1167 vcpu->svm->vmcb->save.sysenter_cs = data; 1168 break; 1169 case MSR_IA32_SYSENTER_EIP: 1170 vcpu->svm->vmcb->save.sysenter_eip = data; 1171 break; 1172 case MSR_IA32_SYSENTER_ESP: 1173 vcpu->svm->vmcb->save.sysenter_esp = data; 1174 break; 1175 default: 1176 return kvm_set_msr_common(vcpu, ecx, data); 1177 } 1178 return 0; 1179} 1180 1181static int wrmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1182{ 1183 u32 ecx = vcpu->regs[VCPU_REGS_RCX]; 1184 u64 data = (vcpu->svm->vmcb->save.rax & -1u) 1185 | ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32); 1186 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; 1187 if (svm_set_msr(vcpu, ecx, data)) 1188 svm_inject_gp(vcpu, 0); 1189 else 1190 skip_emulated_instruction(vcpu); 1191 return 1; 1192} 1193 1194static int msr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1195{ 1196 if (vcpu->svm->vmcb->control.exit_info_1) 1197 return wrmsr_interception(vcpu, kvm_run); 1198 else 1199 return rdmsr_interception(vcpu, kvm_run); 1200} 1201 1202static int interrupt_window_interception(struct kvm_vcpu *vcpu, 1203 struct kvm_run *kvm_run) 1204{ 1205 /* 1206 * If the user space waits to inject interrupts, exit as soon as 1207 * possible 1208 */ 1209 if (kvm_run->request_interrupt_window && 1210 !vcpu->irq_summary) { 1211 ++kvm_stat.irq_window_exits; 1212 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 1213 return 0; 1214 } 1215 1216 return 1; 1217} 1218 1219static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, 1220 struct kvm_run *kvm_run) = { 1221 [SVM_EXIT_READ_CR0] = emulate_on_interception, 1222 [SVM_EXIT_READ_CR3] = emulate_on_interception, 1223 [SVM_EXIT_READ_CR4] = emulate_on_interception, 1224 /* for now: */ 1225 [SVM_EXIT_WRITE_CR0] = emulate_on_interception, 1226 [SVM_EXIT_WRITE_CR3] = emulate_on_interception, 1227 [SVM_EXIT_WRITE_CR4] = emulate_on_interception, 1228 [SVM_EXIT_READ_DR0] = emulate_on_interception, 1229 [SVM_EXIT_READ_DR1] = emulate_on_interception, 1230 [SVM_EXIT_READ_DR2] = emulate_on_interception, 1231 [SVM_EXIT_READ_DR3] = emulate_on_interception, 1232 [SVM_EXIT_WRITE_DR0] = emulate_on_interception, 1233 [SVM_EXIT_WRITE_DR1] = emulate_on_interception, 1234 [SVM_EXIT_WRITE_DR2] = emulate_on_interception, 1235 [SVM_EXIT_WRITE_DR3] = emulate_on_interception, 1236 [SVM_EXIT_WRITE_DR5] = emulate_on_interception, 1237 [SVM_EXIT_WRITE_DR7] = emulate_on_interception, 1238 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 1239 [SVM_EXIT_INTR] = nop_on_interception, 1240 [SVM_EXIT_NMI] = nop_on_interception, 1241 [SVM_EXIT_SMI] = nop_on_interception, 1242 [SVM_EXIT_INIT] = nop_on_interception, 1243 [SVM_EXIT_VINTR] = interrupt_window_interception, 1244 /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ 1245 [SVM_EXIT_CPUID] = cpuid_interception, 1246 [SVM_EXIT_HLT] = halt_interception, 1247 [SVM_EXIT_INVLPG] = emulate_on_interception, 1248 [SVM_EXIT_INVLPGA] = invalid_op_interception, 1249 [SVM_EXIT_IOIO] = io_interception, 1250 [SVM_EXIT_MSR] = msr_interception, 1251 [SVM_EXIT_TASK_SWITCH] = task_switch_interception, 1252 [SVM_EXIT_VMRUN] = invalid_op_interception, 1253 [SVM_EXIT_VMMCALL] = invalid_op_interception, 1254 [SVM_EXIT_VMLOAD] = invalid_op_interception, 1255 [SVM_EXIT_VMSAVE] = invalid_op_interception, 1256 [SVM_EXIT_STGI] = invalid_op_interception, 1257 [SVM_EXIT_CLGI] = invalid_op_interception, 1258 [SVM_EXIT_SKINIT] = invalid_op_interception, 1259}; 1260 1261 1262static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1263{ 1264 u32 exit_code = vcpu->svm->vmcb->control.exit_code; 1265 1266 kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; 1267 1268 if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) && 1269 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) 1270 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " 1271 "exit_code 0x%x\n", 1272 __FUNCTION__, vcpu->svm->vmcb->control.exit_int_info, 1273 exit_code); 1274 1275 if (exit_code >= sizeof(svm_exit_handlers) / sizeof(*svm_exit_handlers) 1276 || svm_exit_handlers[exit_code] == 0) { 1277 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 1278 printk(KERN_ERR "%s: 0x%x @ 0x%llx cr0 0x%lx rflags 0x%llx\n", 1279 __FUNCTION__, 1280 exit_code, 1281 vcpu->svm->vmcb->save.rip, 1282 vcpu->cr0, 1283 vcpu->svm->vmcb->save.rflags); 1284 return 0; 1285 } 1286 1287 return svm_exit_handlers[exit_code](vcpu, kvm_run); 1288} 1289 1290static void reload_tss(struct kvm_vcpu *vcpu) 1291{ 1292 int cpu = raw_smp_processor_id(); 1293 1294 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); 1295 svm_data->tss_desc->type = 9; //available 32/64-bit TSS 1296 load_TR_desc(); 1297} 1298 1299static void pre_svm_run(struct kvm_vcpu *vcpu) 1300{ 1301 int cpu = raw_smp_processor_id(); 1302 1303 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); 1304 1305 vcpu->svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; 1306 if (vcpu->cpu != cpu || 1307 vcpu->svm->asid_generation != svm_data->asid_generation) 1308 new_asid(vcpu, svm_data); 1309} 1310 1311 1312static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu) 1313{ 1314 struct vmcb_control_area *control; 1315 1316 control = &vcpu->svm->vmcb->control; 1317 control->int_vector = pop_irq(vcpu); 1318 control->int_ctl &= ~V_INTR_PRIO_MASK; 1319 control->int_ctl |= V_IRQ_MASK | 1320 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); 1321} 1322 1323static void kvm_reput_irq(struct kvm_vcpu *vcpu) 1324{ 1325 struct vmcb_control_area *control = &vcpu->svm->vmcb->control; 1326 1327 if (control->int_ctl & V_IRQ_MASK) { 1328 control->int_ctl &= ~V_IRQ_MASK; 1329 push_irq(vcpu, control->int_vector); 1330 } 1331 1332 vcpu->interrupt_window_open = 1333 !(control->int_state & SVM_INTERRUPT_SHADOW_MASK); 1334} 1335 1336static void do_interrupt_requests(struct kvm_vcpu *vcpu, 1337 struct kvm_run *kvm_run) 1338{ 1339 struct vmcb_control_area *control = &vcpu->svm->vmcb->control; 1340 1341 vcpu->interrupt_window_open = 1342 (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && 1343 (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)); 1344 1345 if (vcpu->interrupt_window_open && vcpu->irq_summary) 1346 /* 1347 * If interrupts enabled, and not blocked by sti or mov ss. Good. 1348 */ 1349 kvm_do_inject_irq(vcpu); 1350 1351 /* 1352 * Interrupts blocked. Wait for unblock. 1353 */ 1354 if (!vcpu->interrupt_window_open && 1355 (vcpu->irq_summary || kvm_run->request_interrupt_window)) { 1356 control->intercept |= 1ULL << INTERCEPT_VINTR; 1357 } else 1358 control->intercept &= ~(1ULL << INTERCEPT_VINTR); 1359} 1360 1361static void post_kvm_run_save(struct kvm_vcpu *vcpu, 1362 struct kvm_run *kvm_run) 1363{ 1364 kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open && 1365 vcpu->irq_summary == 0); 1366 kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0; 1367 kvm_run->cr8 = vcpu->cr8; 1368 kvm_run->apic_base = vcpu->apic_base; 1369} 1370 1371/* 1372 * Check if userspace requested an interrupt window, and that the 1373 * interrupt window is open. 1374 * 1375 * No need to exit to userspace if we already have an interrupt queued. 1376 */ 1377static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, 1378 struct kvm_run *kvm_run) 1379{ 1380 return (!vcpu->irq_summary && 1381 kvm_run->request_interrupt_window && 1382 vcpu->interrupt_window_open && 1383 (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)); 1384} 1385 1386static void save_db_regs(unsigned long *db_regs) 1387{ 1388 asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0])); 1389 asm volatile ("mov %%dr1, %0" : "=r"(db_regs[1])); 1390 asm volatile ("mov %%dr2, %0" : "=r"(db_regs[2])); 1391 asm volatile ("mov %%dr3, %0" : "=r"(db_regs[3])); 1392} 1393 1394static void load_db_regs(unsigned long *db_regs) 1395{ 1396 asm volatile ("mov %0, %%dr0" : : "r"(db_regs[0])); 1397 asm volatile ("mov %0, %%dr1" : : "r"(db_regs[1])); 1398 asm volatile ("mov %0, %%dr2" : : "r"(db_regs[2])); 1399 asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3])); 1400} 1401 1402static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1403{ 1404 u16 fs_selector; 1405 u16 gs_selector; 1406 u16 ldt_selector; 1407 int r; 1408 1409again: 1410 if (!vcpu->mmio_read_completed) 1411 do_interrupt_requests(vcpu, kvm_run); 1412 1413 clgi(); 1414 1415 pre_svm_run(vcpu); 1416 1417 save_host_msrs(vcpu); 1418 fs_selector = read_fs(); 1419 gs_selector = read_gs(); 1420 ldt_selector = read_ldt(); 1421 vcpu->svm->host_cr2 = kvm_read_cr2(); 1422 vcpu->svm->host_dr6 = read_dr6(); 1423 vcpu->svm->host_dr7 = read_dr7(); 1424 vcpu->svm->vmcb->save.cr2 = vcpu->cr2; 1425 1426 if (vcpu->svm->vmcb->save.dr7 & 0xff) { 1427 write_dr7(0); 1428 save_db_regs(vcpu->svm->host_db_regs); 1429 load_db_regs(vcpu->svm->db_regs); 1430 } 1431 1432 fx_save(vcpu->host_fx_image); 1433 fx_restore(vcpu->guest_fx_image); 1434 1435 asm volatile ( 1436#ifdef CONFIG_X86_64 1437 "push %%rbx; push %%rcx; push %%rdx;" 1438 "push %%rsi; push %%rdi; push %%rbp;" 1439 "push %%r8; push %%r9; push %%r10; push %%r11;" 1440 "push %%r12; push %%r13; push %%r14; push %%r15;" 1441#else 1442 "push %%ebx; push %%ecx; push %%edx;" 1443 "push %%esi; push %%edi; push %%ebp;" 1444#endif 1445 1446#ifdef CONFIG_X86_64 1447 "mov %c[rbx](%[vcpu]), %%rbx \n\t" 1448 "mov %c[rcx](%[vcpu]), %%rcx \n\t" 1449 "mov %c[rdx](%[vcpu]), %%rdx \n\t" 1450 "mov %c[rsi](%[vcpu]), %%rsi \n\t" 1451 "mov %c[rdi](%[vcpu]), %%rdi \n\t" 1452 "mov %c[rbp](%[vcpu]), %%rbp \n\t" 1453 "mov %c[r8](%[vcpu]), %%r8 \n\t" 1454 "mov %c[r9](%[vcpu]), %%r9 \n\t" 1455 "mov %c[r10](%[vcpu]), %%r10 \n\t" 1456 "mov %c[r11](%[vcpu]), %%r11 \n\t" 1457 "mov %c[r12](%[vcpu]), %%r12 \n\t" 1458 "mov %c[r13](%[vcpu]), %%r13 \n\t" 1459 "mov %c[r14](%[vcpu]), %%r14 \n\t" 1460 "mov %c[r15](%[vcpu]), %%r15 \n\t" 1461#else 1462 "mov %c[rbx](%[vcpu]), %%ebx \n\t" 1463 "mov %c[rcx](%[vcpu]), %%ecx \n\t" 1464 "mov %c[rdx](%[vcpu]), %%edx \n\t" 1465 "mov %c[rsi](%[vcpu]), %%esi \n\t" 1466 "mov %c[rdi](%[vcpu]), %%edi \n\t" 1467 "mov %c[rbp](%[vcpu]), %%ebp \n\t" 1468#endif 1469 1470#ifdef CONFIG_X86_64 1471 /* Enter guest mode */ 1472 "push %%rax \n\t" 1473 "mov %c[svm](%[vcpu]), %%rax \n\t" 1474 "mov %c[vmcb](%%rax), %%rax \n\t" 1475 SVM_VMLOAD "\n\t" 1476 SVM_VMRUN "\n\t" 1477 SVM_VMSAVE "\n\t" 1478 "pop %%rax \n\t" 1479#else 1480 /* Enter guest mode */ 1481 "push %%eax \n\t" 1482 "mov %c[svm](%[vcpu]), %%eax \n\t" 1483 "mov %c[vmcb](%%eax), %%eax \n\t" 1484 SVM_VMLOAD "\n\t" 1485 SVM_VMRUN "\n\t" 1486 SVM_VMSAVE "\n\t" 1487 "pop %%eax \n\t" 1488#endif 1489 1490 /* Save guest registers, load host registers */ 1491#ifdef CONFIG_X86_64 1492 "mov %%rbx, %c[rbx](%[vcpu]) \n\t" 1493 "mov %%rcx, %c[rcx](%[vcpu]) \n\t" 1494 "mov %%rdx, %c[rdx](%[vcpu]) \n\t" 1495 "mov %%rsi, %c[rsi](%[vcpu]) \n\t" 1496 "mov %%rdi, %c[rdi](%[vcpu]) \n\t" 1497 "mov %%rbp, %c[rbp](%[vcpu]) \n\t" 1498 "mov %%r8, %c[r8](%[vcpu]) \n\t" 1499 "mov %%r9, %c[r9](%[vcpu]) \n\t" 1500 "mov %%r10, %c[r10](%[vcpu]) \n\t" 1501 "mov %%r11, %c[r11](%[vcpu]) \n\t" 1502 "mov %%r12, %c[r12](%[vcpu]) \n\t" 1503 "mov %%r13, %c[r13](%[vcpu]) \n\t" 1504 "mov %%r14, %c[r14](%[vcpu]) \n\t" 1505 "mov %%r15, %c[r15](%[vcpu]) \n\t" 1506 1507 "pop %%r15; pop %%r14; pop %%r13; pop %%r12;" 1508 "pop %%r11; pop %%r10; pop %%r9; pop %%r8;" 1509 "pop %%rbp; pop %%rdi; pop %%rsi;" 1510 "pop %%rdx; pop %%rcx; pop %%rbx; \n\t" 1511#else 1512 "mov %%ebx, %c[rbx](%[vcpu]) \n\t" 1513 "mov %%ecx, %c[rcx](%[vcpu]) \n\t" 1514 "mov %%edx, %c[rdx](%[vcpu]) \n\t" 1515 "mov %%esi, %c[rsi](%[vcpu]) \n\t" 1516 "mov %%edi, %c[rdi](%[vcpu]) \n\t" 1517 "mov %%ebp, %c[rbp](%[vcpu]) \n\t" 1518 1519 "pop %%ebp; pop %%edi; pop %%esi;" 1520 "pop %%edx; pop %%ecx; pop %%ebx; \n\t" 1521#endif 1522 : 1523 : [vcpu]"a"(vcpu), 1524 [svm]"i"(offsetof(struct kvm_vcpu, svm)), 1525 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), 1526 [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])), 1527 [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])), 1528 [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])), 1529 [rsi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RSI])), 1530 [rdi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDI])), 1531 [rbp]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBP])) 1532#ifdef CONFIG_X86_64 1533 ,[r8 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R8 ])), 1534 [r9 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R9 ])), 1535 [r10]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R10])), 1536 [r11]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R11])), 1537 [r12]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R12])), 1538 [r13]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R13])), 1539 [r14]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R14])), 1540 [r15]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R15])) 1541#endif 1542 : "cc", "memory" ); 1543 1544 fx_save(vcpu->guest_fx_image); 1545 fx_restore(vcpu->host_fx_image); 1546 1547 if ((vcpu->svm->vmcb->save.dr7 & 0xff)) 1548 load_db_regs(vcpu->svm->host_db_regs); 1549 1550 vcpu->cr2 = vcpu->svm->vmcb->save.cr2; 1551 1552 write_dr6(vcpu->svm->host_dr6); 1553 write_dr7(vcpu->svm->host_dr7); 1554 kvm_write_cr2(vcpu->svm->host_cr2); 1555 1556 load_fs(fs_selector); 1557 load_gs(gs_selector); 1558 load_ldt(ldt_selector); 1559 load_host_msrs(vcpu); 1560 1561 reload_tss(vcpu); 1562 1563 /* 1564 * Profile KVM exit RIPs: 1565 */ 1566 if (unlikely(prof_on == KVM_PROFILING)) 1567 profile_hit(KVM_PROFILING, 1568 (void *)(unsigned long)vcpu->svm->vmcb->save.rip); 1569 1570 stgi(); 1571 1572 kvm_reput_irq(vcpu); 1573 1574 vcpu->svm->next_rip = 0; 1575 1576 if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 1577 kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; 1578 kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code; 1579 post_kvm_run_save(vcpu, kvm_run); 1580 return 0; 1581 } 1582 1583 r = handle_exit(vcpu, kvm_run); 1584 if (r > 0) { 1585 if (signal_pending(current)) { 1586 ++kvm_stat.signal_exits; 1587 post_kvm_run_save(vcpu, kvm_run); 1588 return -EINTR; 1589 } 1590 1591 if (dm_request_for_irq_injection(vcpu, kvm_run)) { 1592 ++kvm_stat.request_irq_exits; 1593 post_kvm_run_save(vcpu, kvm_run); 1594 return -EINTR; 1595 } 1596 kvm_resched(vcpu); 1597 goto again; 1598 } 1599 post_kvm_run_save(vcpu, kvm_run); 1600 return r; 1601} 1602 1603static void svm_flush_tlb(struct kvm_vcpu *vcpu) 1604{ 1605 force_new_asid(vcpu); 1606} 1607 1608static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) 1609{ 1610 vcpu->svm->vmcb->save.cr3 = root; 1611 force_new_asid(vcpu); 1612} 1613 1614static void svm_inject_page_fault(struct kvm_vcpu *vcpu, 1615 unsigned long addr, 1616 uint32_t err_code) 1617{ 1618 uint32_t exit_int_info = vcpu->svm->vmcb->control.exit_int_info; 1619 1620 ++kvm_stat.pf_guest; 1621 1622 if (is_page_fault(exit_int_info)) { 1623 1624 vcpu->svm->vmcb->control.event_inj_err = 0; 1625 vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | 1626 SVM_EVTINJ_VALID_ERR | 1627 SVM_EVTINJ_TYPE_EXEPT | 1628 DF_VECTOR; 1629 return; 1630 } 1631 vcpu->cr2 = addr; 1632 vcpu->svm->vmcb->save.cr2 = addr; 1633 vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | 1634 SVM_EVTINJ_VALID_ERR | 1635 SVM_EVTINJ_TYPE_EXEPT | 1636 PF_VECTOR; 1637 vcpu->svm->vmcb->control.event_inj_err = err_code; 1638} 1639 1640 1641static int is_disabled(void) 1642{ 1643 return 0; 1644} 1645 1646static struct kvm_arch_ops svm_arch_ops = { 1647 .cpu_has_kvm_support = has_svm, 1648 .disabled_by_bios = is_disabled, 1649 .hardware_setup = svm_hardware_setup, 1650 .hardware_unsetup = svm_hardware_unsetup, 1651 .hardware_enable = svm_hardware_enable, 1652 .hardware_disable = svm_hardware_disable, 1653 1654 .vcpu_create = svm_create_vcpu, 1655 .vcpu_free = svm_free_vcpu, 1656 1657 .vcpu_load = svm_vcpu_load, 1658 .vcpu_put = svm_vcpu_put, 1659 1660 .set_guest_debug = svm_guest_debug, 1661 .get_msr = svm_get_msr, 1662 .set_msr = svm_set_msr, 1663 .get_segment_base = svm_get_segment_base, 1664 .get_segment = svm_get_segment, 1665 .set_segment = svm_set_segment, 1666 .get_cs_db_l_bits = svm_get_cs_db_l_bits, 1667 .decache_cr0_cr4_guest_bits = svm_decache_cr0_cr4_guest_bits, 1668 .set_cr0 = svm_set_cr0, 1669 .set_cr0_no_modeswitch = svm_set_cr0, 1670 .set_cr3 = svm_set_cr3, 1671 .set_cr4 = svm_set_cr4, 1672 .set_efer = svm_set_efer, 1673 .get_idt = svm_get_idt, 1674 .set_idt = svm_set_idt, 1675 .get_gdt = svm_get_gdt, 1676 .set_gdt = svm_set_gdt, 1677 .get_dr = svm_get_dr, 1678 .set_dr = svm_set_dr, 1679 .cache_regs = svm_cache_regs, 1680 .decache_regs = svm_decache_regs, 1681 .get_rflags = svm_get_rflags, 1682 .set_rflags = svm_set_rflags, 1683 1684 .invlpg = svm_invlpg, 1685 .tlb_flush = svm_flush_tlb, 1686 .inject_page_fault = svm_inject_page_fault, 1687 1688 .inject_gp = svm_inject_gp, 1689 1690 .run = svm_vcpu_run, 1691 .skip_emulated_instruction = skip_emulated_instruction, 1692 .vcpu_setup = svm_vcpu_setup, 1693}; 1694 1695static int __init svm_init(void) 1696{ 1697 return kvm_init_arch(&svm_arch_ops, THIS_MODULE); 1698} 1699 1700static void __exit svm_exit(void) 1701{ 1702 kvm_exit_arch(); 1703} 1704 1705module_init(svm_init) 1706module_exit(svm_exit)