Merge branch 'kvm-updates/2.6.35' of git://git.kernel.org/pub/scm/virt/kvm/kvm

+206 -2

Documentation/kvm/api.txt

··· 656 656 4.29 KVM_GET_VCPU_EVENTS 657 657 658 658 Capability: KVM_CAP_VCPU_EVENTS 659 + Extended by: KVM_CAP_INTR_SHADOW 659 660 Architectures: x86 660 661 Type: vm ioctl 661 662 Parameters: struct kvm_vcpu_event (out) ··· 677 676 __u8 injected; 678 677 __u8 nr; 679 678 __u8 soft; 680 - __u8 pad; 679 + __u8 shadow; 681 680 } interrupt; 682 681 struct { 683 682 __u8 injected; ··· 689 688 __u32 flags; 690 689 }; 691 690 691 + KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that 692 + interrupt.shadow contains a valid state. Otherwise, this field is undefined. 693 + 692 694 4.30 KVM_SET_VCPU_EVENTS 693 695 694 696 Capability: KVM_CAP_VCPU_EVENTS 697 + Extended by: KVM_CAP_INTR_SHADOW 695 698 Architectures: x86 696 699 Type: vm ioctl 697 700 Parameters: struct kvm_vcpu_event (in) ··· 714 709 KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel 715 710 KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector 716 711 712 + If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in 713 + the flags field to signal that interrupt.shadow contains a valid state and 714 + shall be written into the VCPU. 715 + 716 + 4.32 KVM_GET_DEBUGREGS 717 + 718 + Capability: KVM_CAP_DEBUGREGS 719 + Architectures: x86 720 + Type: vm ioctl 721 + Parameters: struct kvm_debugregs (out) 722 + Returns: 0 on success, -1 on error 723 + 724 + Reads debug registers from the vcpu. 725 + 726 + struct kvm_debugregs { 727 + __u64 db[4]; 728 + __u64 dr6; 729 + __u64 dr7; 730 + __u64 flags; 731 + __u64 reserved[9]; 732 + }; 733 + 734 + 4.33 KVM_SET_DEBUGREGS 735 + 736 + Capability: KVM_CAP_DEBUGREGS 737 + Architectures: x86 738 + Type: vm ioctl 739 + Parameters: struct kvm_debugregs (in) 740 + Returns: 0 on success, -1 on error 741 + 742 + Writes debug registers into the vcpu. 743 + 744 + See KVM_GET_DEBUGREGS for the data structure. The flags field is unused 745 + yet and must be cleared on entry. 746 + 747 + 4.34 KVM_SET_USER_MEMORY_REGION 748 + 749 + Capability: KVM_CAP_USER_MEM 750 + Architectures: all 751 + Type: vm ioctl 752 + Parameters: struct kvm_userspace_memory_region (in) 753 + Returns: 0 on success, -1 on error 754 + 755 + struct kvm_userspace_memory_region { 756 + __u32 slot; 757 + __u32 flags; 758 + __u64 guest_phys_addr; 759 + __u64 memory_size; /* bytes */ 760 + __u64 userspace_addr; /* start of the userspace allocated memory */ 761 + }; 762 + 763 + /* for kvm_memory_region::flags */ 764 + #define KVM_MEM_LOG_DIRTY_PAGES 1UL 765 + 766 + This ioctl allows the user to create or modify a guest physical memory 767 + slot. When changing an existing slot, it may be moved in the guest 768 + physical memory space, or its flags may be modified. It may not be 769 + resized. Slots may not overlap in guest physical address space. 770 + 771 + Memory for the region is taken starting at the address denoted by the 772 + field userspace_addr, which must point at user addressable memory for 773 + the entire memory slot size. Any object may back this memory, including 774 + anonymous memory, ordinary files, and hugetlbfs. 775 + 776 + It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr 777 + be identical. This allows large pages in the guest to be backed by large 778 + pages in the host. 779 + 780 + The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which 781 + instructs kvm to keep track of writes to memory within the slot. See 782 + the KVM_GET_DIRTY_LOG ioctl. 783 + 784 + When the KVM_CAP_SYNC_MMU capability, changes in the backing of the memory 785 + region are automatically reflected into the guest. For example, an mmap() 786 + that affects the region will be made visible immediately. Another example 787 + is madvise(MADV_DROP). 788 + 789 + It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl. 790 + The KVM_SET_MEMORY_REGION does not allow fine grained control over memory 791 + allocation and is deprecated. 792 + 793 + 4.35 KVM_SET_TSS_ADDR 794 + 795 + Capability: KVM_CAP_SET_TSS_ADDR 796 + Architectures: x86 797 + Type: vm ioctl 798 + Parameters: unsigned long tss_address (in) 799 + Returns: 0 on success, -1 on error 800 + 801 + This ioctl defines the physical address of a three-page region in the guest 802 + physical address space. The region must be within the first 4GB of the 803 + guest physical address space and must not conflict with any memory slot 804 + or any mmio address. The guest may malfunction if it accesses this memory 805 + region. 806 + 807 + This ioctl is required on Intel-based hosts. This is needed on Intel hardware 808 + because of a quirk in the virtualization implementation (see the internals 809 + documentation when it pops into existence). 810 + 811 + 4.36 KVM_ENABLE_CAP 812 + 813 + Capability: KVM_CAP_ENABLE_CAP 814 + Architectures: ppc 815 + Type: vcpu ioctl 816 + Parameters: struct kvm_enable_cap (in) 817 + Returns: 0 on success; -1 on error 818 + 819 + +Not all extensions are enabled by default. Using this ioctl the application 820 + can enable an extension, making it available to the guest. 821 + 822 + On systems that do not support this ioctl, it always fails. On systems that 823 + do support it, it only works for extensions that are supported for enablement. 824 + 825 + To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should 826 + be used. 827 + 828 + struct kvm_enable_cap { 829 + /* in */ 830 + __u32 cap; 831 + 832 + The capability that is supposed to get enabled. 833 + 834 + __u32 flags; 835 + 836 + A bitfield indicating future enhancements. Has to be 0 for now. 837 + 838 + __u64 args[4]; 839 + 840 + Arguments for enabling a feature. If a feature needs initial values to 841 + function properly, this is the place to put them. 842 + 843 + __u8 pad[64]; 844 + }; 845 + 846 + 4.37 KVM_GET_MP_STATE 847 + 848 + Capability: KVM_CAP_MP_STATE 849 + Architectures: x86, ia64 850 + Type: vcpu ioctl 851 + Parameters: struct kvm_mp_state (out) 852 + Returns: 0 on success; -1 on error 853 + 854 + struct kvm_mp_state { 855 + __u32 mp_state; 856 + }; 857 + 858 + Returns the vcpu's current "multiprocessing state" (though also valid on 859 + uniprocessor guests). 860 + 861 + Possible values are: 862 + 863 + - KVM_MP_STATE_RUNNABLE: the vcpu is currently running 864 + - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP) 865 + which has not yet received an INIT signal 866 + - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is 867 + now ready for a SIPI 868 + - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and 869 + is waiting for an interrupt 870 + - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector 871 + accesible via KVM_GET_VCPU_EVENTS) 872 + 873 + This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel 874 + irqchip, the multiprocessing state must be maintained by userspace. 875 + 876 + 4.38 KVM_SET_MP_STATE 877 + 878 + Capability: KVM_CAP_MP_STATE 879 + Architectures: x86, ia64 880 + Type: vcpu ioctl 881 + Parameters: struct kvm_mp_state (in) 882 + Returns: 0 on success; -1 on error 883 + 884 + Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for 885 + arguments. 886 + 887 + This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel 888 + irqchip, the multiprocessing state must be maintained by userspace. 717 889 718 890 5. The kvm_run structure 719 891 ··· 1002 820 by kvm. The 'data' member contains the written data if 'is_write' is 1003 821 true, and should be filled by application code otherwise. 1004 822 823 + NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO and KVM_EXIT_OSI, the corresponding 824 + operations are complete (and guest state is consistent) only after userspace 825 + has re-entered the kernel with KVM_RUN. The kernel side will first finish 826 + incomplete operations and then check for pending signals. Userspace 827 + can re-enter the guest with an unmasked signal pending to complete 828 + pending operations. 829 + 1005 830 /* KVM_EXIT_HYPERCALL */ 1006 831 struct { 1007 832 __u64 nr; ··· 1018 829 __u32 pad; 1019 830 } hypercall; 1020 831 1021 - Unused. 832 + Unused. This was once used for 'hypercall to userspace'. To implement 833 + such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390). 834 + Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO. 1022 835 1023 836 /* KVM_EXIT_TPR_ACCESS */ 1024 837 struct { ··· 1060 869 } dcr; 1061 870 1062 871 powerpc specific. 872 + 873 + /* KVM_EXIT_OSI */ 874 + struct { 875 + __u64 gprs[32]; 876 + } osi; 877 + 878 + MOL uses a special hypercall interface it calls 'OSI'. To enable it, we catch 879 + hypercalls and exit with this exit struct that contains all the guest gprs. 880 + 881 + If exit_reason is KVM_EXIT_OSI, then the vcpu has triggered such a hypercall. 882 + Userspace can now handle the hypercall and when it's done modify the gprs as 883 + necessary. Upon guest entry all guest GPRs will then be replaced by the values 884 + in this struct. 1063 885 1064 886 /* Fix the size of the union. */ 1065 887 char padding[256];

+42

Documentation/kvm/cpuid.txt

··· 1 + KVM CPUID bits 2 + Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010 3 + ===================================================== 4 + 5 + A guest running on a kvm host, can check some of its features using 6 + cpuid. This is not always guaranteed to work, since userspace can 7 + mask-out some, or even all KVM-related cpuid features before launching 8 + a guest. 9 + 10 + KVM cpuid functions are: 11 + 12 + function: KVM_CPUID_SIGNATURE (0x40000000) 13 + returns : eax = 0, 14 + ebx = 0x4b4d564b, 15 + ecx = 0x564b4d56, 16 + edx = 0x4d. 17 + Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM". 18 + This function queries the presence of KVM cpuid leafs. 19 + 20 + 21 + function: define KVM_CPUID_FEATURES (0x40000001) 22 + returns : ebx, ecx, edx = 0 23 + eax = and OR'ed group of (1 << flag), where each flags is: 24 + 25 + 26 + flag || value || meaning 27 + ============================================================================= 28 + KVM_FEATURE_CLOCKSOURCE || 0 || kvmclock available at msrs 29 + || || 0x11 and 0x12. 30 + ------------------------------------------------------------------------------ 31 + KVM_FEATURE_NOP_IO_DELAY || 1 || not necessary to perform delays 32 + || || on PIO operations. 33 + ------------------------------------------------------------------------------ 34 + KVM_FEATURE_MMU_OP || 2 || deprecated. 35 + ------------------------------------------------------------------------------ 36 + KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs 37 + || || 0x4b564d00 and 0x4b564d01 38 + ------------------------------------------------------------------------------ 39 + KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side 40 + || || per-cpu warps are expected in 41 + || || kvmclock. 42 + ------------------------------------------------------------------------------

+304

Documentation/kvm/mmu.txt

··· 1 + The x86 kvm shadow mmu 2 + ====================== 3 + 4 + The mmu (in arch/x86/kvm, files mmu.[ch] and paging_tmpl.h) is responsible 5 + for presenting a standard x86 mmu to the guest, while translating guest 6 + physical addresses to host physical addresses. 7 + 8 + The mmu code attempts to satisfy the following requirements: 9 + 10 + - correctness: the guest should not be able to determine that it is running 11 + on an emulated mmu except for timing (we attempt to comply 12 + with the specification, not emulate the characteristics of 13 + a particular implementation such as tlb size) 14 + - security: the guest must not be able to touch host memory not assigned 15 + to it 16 + - performance: minimize the performance penalty imposed by the mmu 17 + - scaling: need to scale to large memory and large vcpu guests 18 + - hardware: support the full range of x86 virtualization hardware 19 + - integration: Linux memory management code must be in control of guest memory 20 + so that swapping, page migration, page merging, transparent 21 + hugepages, and similar features work without change 22 + - dirty tracking: report writes to guest memory to enable live migration 23 + and framebuffer-based displays 24 + - footprint: keep the amount of pinned kernel memory low (most memory 25 + should be shrinkable) 26 + - reliablity: avoid multipage or GFP_ATOMIC allocations 27 + 28 + Acronyms 29 + ======== 30 + 31 + pfn host page frame number 32 + hpa host physical address 33 + hva host virtual address 34 + gfn guest frame number 35 + gpa guest physical address 36 + gva guest virtual address 37 + ngpa nested guest physical address 38 + ngva nested guest virtual address 39 + pte page table entry (used also to refer generically to paging structure 40 + entries) 41 + gpte guest pte (referring to gfns) 42 + spte shadow pte (referring to pfns) 43 + tdp two dimensional paging (vendor neutral term for NPT and EPT) 44 + 45 + Virtual and real hardware supported 46 + =================================== 47 + 48 + The mmu supports first-generation mmu hardware, which allows an atomic switch 49 + of the current paging mode and cr3 during guest entry, as well as 50 + two-dimensional paging (AMD's NPT and Intel's EPT). The emulated hardware 51 + it exposes is the traditional 2/3/4 level x86 mmu, with support for global 52 + pages, pae, pse, pse36, cr0.wp, and 1GB pages. Work is in progress to support 53 + exposing NPT capable hardware on NPT capable hosts. 54 + 55 + Translation 56 + =========== 57 + 58 + The primary job of the mmu is to program the processor's mmu to translate 59 + addresses for the guest. Different translations are required at different 60 + times: 61 + 62 + - when guest paging is disabled, we translate guest physical addresses to 63 + host physical addresses (gpa->hpa) 64 + - when guest paging is enabled, we translate guest virtual addresses, to 65 + guest physical addresses, to host physical addresses (gva->gpa->hpa) 66 + - when the guest launches a guest of its own, we translate nested guest 67 + virtual addresses, to nested guest physical addresses, to guest physical 68 + addresses, to host physical addresses (ngva->ngpa->gpa->hpa) 69 + 70 + The primary challenge is to encode between 1 and 3 translations into hardware 71 + that support only 1 (traditional) and 2 (tdp) translations. When the 72 + number of required translations matches the hardware, the mmu operates in 73 + direct mode; otherwise it operates in shadow mode (see below). 74 + 75 + Memory 76 + ====== 77 + 78 + Guest memory (gpa) is part of the user address space of the process that is 79 + using kvm. Userspace defines the translation between guest addresses and user 80 + addresses (gpa->hva); note that two gpas may alias to the same gva, but not 81 + vice versa. 82 + 83 + These gvas may be backed using any method available to the host: anonymous 84 + memory, file backed memory, and device memory. Memory might be paged by the 85 + host at any time. 86 + 87 + Events 88 + ====== 89 + 90 + The mmu is driven by events, some from the guest, some from the host. 91 + 92 + Guest generated events: 93 + - writes to control registers (especially cr3) 94 + - invlpg/invlpga instruction execution 95 + - access to missing or protected translations 96 + 97 + Host generated events: 98 + - changes in the gpa->hpa translation (either through gpa->hva changes or 99 + through hva->hpa changes) 100 + - memory pressure (the shrinker) 101 + 102 + Shadow pages 103 + ============ 104 + 105 + The principal data structure is the shadow page, 'struct kvm_mmu_page'. A 106 + shadow page contains 512 sptes, which can be either leaf or nonleaf sptes. A 107 + shadow page may contain a mix of leaf and nonleaf sptes. 108 + 109 + A nonleaf spte allows the hardware mmu to reach the leaf pages and 110 + is not related to a translation directly. It points to other shadow pages. 111 + 112 + A leaf spte corresponds to either one or two translations encoded into 113 + one paging structure entry. These are always the lowest level of the 114 + translation stack, with optional higher level translations left to NPT/EPT. 115 + Leaf ptes point at guest pages. 116 + 117 + The following table shows translations encoded by leaf ptes, with higher-level 118 + translations in parentheses: 119 + 120 + Non-nested guests: 121 + nonpaging: gpa->hpa 122 + paging: gva->gpa->hpa 123 + paging, tdp: (gva->)gpa->hpa 124 + Nested guests: 125 + non-tdp: ngva->gpa->hpa (*) 126 + tdp: (ngva->)ngpa->gpa->hpa 127 + 128 + (*) the guest hypervisor will encode the ngva->gpa translation into its page 129 + tables if npt is not present 130 + 131 + Shadow pages contain the following information: 132 + role.level: 133 + The level in the shadow paging hierarchy that this shadow page belongs to. 134 + 1=4k sptes, 2=2M sptes, 3=1G sptes, etc. 135 + role.direct: 136 + If set, leaf sptes reachable from this page are for a linear range. 137 + Examples include real mode translation, large guest pages backed by small 138 + host pages, and gpa->hpa translations when NPT or EPT is active. 139 + The linear range starts at (gfn << PAGE_SHIFT) and its size is determined 140 + by role.level (2MB for first level, 1GB for second level, 0.5TB for third 141 + level, 256TB for fourth level) 142 + If clear, this page corresponds to a guest page table denoted by the gfn 143 + field. 144 + role.quadrant: 145 + When role.cr4_pae=0, the guest uses 32-bit gptes while the host uses 64-bit 146 + sptes. That means a guest page table contains more ptes than the host, 147 + so multiple shadow pages are needed to shadow one guest page. 148 + For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the 149 + first or second 512-gpte block in the guest page table. For second-level 150 + page tables, each 32-bit gpte is converted to two 64-bit sptes 151 + (since each first-level guest page is shadowed by two first-level 152 + shadow pages) so role.quadrant takes values in the range 0..3. Each 153 + quadrant maps 1GB virtual address space. 154 + role.access: 155 + Inherited guest access permissions in the form uwx. Note execute 156 + permission is positive, not negative. 157 + role.invalid: 158 + The page is invalid and should not be used. It is a root page that is 159 + currently pinned (by a cpu hardware register pointing to it); once it is 160 + unpinned it will be destroyed. 161 + role.cr4_pae: 162 + Contains the value of cr4.pae for which the page is valid (e.g. whether 163 + 32-bit or 64-bit gptes are in use). 164 + role.cr4_nxe: 165 + Contains the value of efer.nxe for which the page is valid. 166 + role.cr0_wp: 167 + Contains the value of cr0.wp for which the page is valid. 168 + gfn: 169 + Either the guest page table containing the translations shadowed by this 170 + page, or the base page frame for linear translations. See role.direct. 171 + spt: 172 + A pageful of 64-bit sptes containing the translations for this page. 173 + Accessed by both kvm and hardware. 174 + The page pointed to by spt will have its page->private pointing back 175 + at the shadow page structure. 176 + sptes in spt point either at guest pages, or at lower-level shadow pages. 177 + Specifically, if sp1 and sp2 are shadow pages, then sp1->spt[n] may point 178 + at __pa(sp2->spt). sp2 will point back at sp1 through parent_pte. 179 + The spt array forms a DAG structure with the shadow page as a node, and 180 + guest pages as leaves. 181 + gfns: 182 + An array of 512 guest frame numbers, one for each present pte. Used to 183 + perform a reverse map from a pte to a gfn. 184 + slot_bitmap: 185 + A bitmap containing one bit per memory slot. If the page contains a pte 186 + mapping a page from memory slot n, then bit n of slot_bitmap will be set 187 + (if a page is aliased among several slots, then it is not guaranteed that 188 + all slots will be marked). 189 + Used during dirty logging to avoid scanning a shadow page if none if its 190 + pages need tracking. 191 + root_count: 192 + A counter keeping track of how many hardware registers (guest cr3 or 193 + pdptrs) are now pointing at the page. While this counter is nonzero, the 194 + page cannot be destroyed. See role.invalid. 195 + multimapped: 196 + Whether there exist multiple sptes pointing at this page. 197 + parent_pte/parent_ptes: 198 + If multimapped is zero, parent_pte points at the single spte that points at 199 + this page's spt. Otherwise, parent_ptes points at a data structure 200 + with a list of parent_ptes. 201 + unsync: 202 + If true, then the translations in this page may not match the guest's 203 + translation. This is equivalent to the state of the tlb when a pte is 204 + changed but before the tlb entry is flushed. Accordingly, unsync ptes 205 + are synchronized when the guest executes invlpg or flushes its tlb by 206 + other means. Valid for leaf pages. 207 + unsync_children: 208 + How many sptes in the page point at pages that are unsync (or have 209 + unsynchronized children). 210 + unsync_child_bitmap: 211 + A bitmap indicating which sptes in spt point (directly or indirectly) at 212 + pages that may be unsynchronized. Used to quickly locate all unsychronized 213 + pages reachable from a given page. 214 + 215 + Reverse map 216 + =========== 217 + 218 + The mmu maintains a reverse mapping whereby all ptes mapping a page can be 219 + reached given its gfn. This is used, for example, when swapping out a page. 220 + 221 + Synchronized and unsynchronized pages 222 + ===================================== 223 + 224 + The guest uses two events to synchronize its tlb and page tables: tlb flushes 225 + and page invalidations (invlpg). 226 + 227 + A tlb flush means that we need to synchronize all sptes reachable from the 228 + guest's cr3. This is expensive, so we keep all guest page tables write 229 + protected, and synchronize sptes to gptes when a gpte is written. 230 + 231 + A special case is when a guest page table is reachable from the current 232 + guest cr3. In this case, the guest is obliged to issue an invlpg instruction 233 + before using the translation. We take advantage of that by removing write 234 + protection from the guest page, and allowing the guest to modify it freely. 235 + We synchronize modified gptes when the guest invokes invlpg. This reduces 236 + the amount of emulation we have to do when the guest modifies multiple gptes, 237 + or when the a guest page is no longer used as a page table and is used for 238 + random guest data. 239 + 240 + As a side effect we have to resynchronize all reachable unsynchronized shadow 241 + pages on a tlb flush. 242 + 243 + 244 + Reaction to events 245 + ================== 246 + 247 + - guest page fault (or npt page fault, or ept violation) 248 + 249 + This is the most complicated event. The cause of a page fault can be: 250 + 251 + - a true guest fault (the guest translation won't allow the access) (*) 252 + - access to a missing translation 253 + - access to a protected translation 254 + - when logging dirty pages, memory is write protected 255 + - synchronized shadow pages are write protected (*) 256 + - access to untranslatable memory (mmio) 257 + 258 + (*) not applicable in direct mode 259 + 260 + Handling a page fault is performed as follows: 261 + 262 + - if needed, walk the guest page tables to determine the guest translation 263 + (gva->gpa or ngpa->gpa) 264 + - if permissions are insufficient, reflect the fault back to the guest 265 + - determine the host page 266 + - if this is an mmio request, there is no host page; call the emulator 267 + to emulate the instruction instead 268 + - walk the shadow page table to find the spte for the translation, 269 + instantiating missing intermediate page tables as necessary 270 + - try to unsynchronize the page 271 + - if successful, we can let the guest continue and modify the gpte 272 + - emulate the instruction 273 + - if failed, unshadow the page and let the guest continue 274 + - update any translations that were modified by the instruction 275 + 276 + invlpg handling: 277 + 278 + - walk the shadow page hierarchy and drop affected translations 279 + - try to reinstantiate the indicated translation in the hope that the 280 + guest will use it in the near future 281 + 282 + Guest control register updates: 283 + 284 + - mov to cr3 285 + - look up new shadow roots 286 + - synchronize newly reachable shadow pages 287 + 288 + - mov to cr0/cr4/efer 289 + - set up mmu context for new paging mode 290 + - look up new shadow roots 291 + - synchronize newly reachable shadow pages 292 + 293 + Host translation updates: 294 + 295 + - mmu notifier called with updated hva 296 + - look up affected sptes through reverse map 297 + - drop (or update) translations 298 + 299 + Further reading 300 + =============== 301 + 302 + - NPT presentation from KVM Forum 2008 303 + http://www.linux-kvm.org/wiki/images/c/c8/KvmForum2008%24kdf2008_21.pdf 304 +

+6 -2

arch/ia64/kvm/kvm-ia64.c

··· 979 979 r = -EFAULT; 980 980 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 981 981 goto out; 982 + r = -ENXIO; 982 983 if (irqchip_in_kernel(kvm)) { 983 984 __s32 status; 984 985 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 985 986 irq_event.irq, irq_event.level); 986 987 if (ioctl == KVM_IRQ_LINE_STATUS) { 988 + r = -EFAULT; 987 989 irq_event.status = status; 988 990 if (copy_to_user(argp, &irq_event, 989 991 sizeof irq_event)) ··· 1381 1379 int i, j; 1382 1380 unsigned long base_gfn; 1383 1381 1384 - slots = rcu_dereference(kvm->memslots); 1382 + slots = kvm_memslots(kvm); 1385 1383 for (i = 0; i < slots->nmemslots; i++) { 1386 1384 memslot = &slots->memslots[i]; 1387 1385 base_gfn = memslot->base_gfn; ··· 1537 1535 goto out; 1538 1536 1539 1537 if (copy_to_user(user_stack, stack, 1540 - sizeof(struct kvm_ia64_vcpu_stack))) 1538 + sizeof(struct kvm_ia64_vcpu_stack))) { 1539 + r = -EFAULT; 1541 1540 goto out; 1541 + } 1542 1542 1543 1543 break; 1544 1544 }

+1 -1

arch/ia64/kvm/vmm.c

··· 51 51 vmm_fpswa_interface = fpswa_interface; 52 52 53 53 /*Register vmm data to kvm side*/ 54 - return kvm_init(&vmm_info, 1024, THIS_MODULE); 54 + return kvm_init(&vmm_info, 1024, 0, THIS_MODULE); 55 55 } 56 56 57 57 static void __exit kvm_vmm_exit(void)

+2

arch/powerpc/include/asm/asm-compat.h

··· 21 21 /* operations for longs and pointers */ 22 22 #define PPC_LL stringify_in_c(ld) 23 23 #define PPC_STL stringify_in_c(std) 24 + #define PPC_STLU stringify_in_c(stdu) 24 25 #define PPC_LCMPI stringify_in_c(cmpdi) 25 26 #define PPC_LONG stringify_in_c(.llong) 26 27 #define PPC_LONG_ALIGN stringify_in_c(.balign 8) ··· 45 44 /* operations for longs and pointers */ 46 45 #define PPC_LL stringify_in_c(lwz) 47 46 #define PPC_STL stringify_in_c(stw) 47 + #define PPC_STLU stringify_in_c(stwu) 48 48 #define PPC_LCMPI stringify_in_c(cmpwi) 49 49 #define PPC_LONG stringify_in_c(.long) 50 50 #define PPC_LONG_ALIGN stringify_in_c(.balign 4)

+10

arch/powerpc/include/asm/kvm.h

··· 77 77 struct kvm_guest_debug_arch { 78 78 }; 79 79 80 + #define KVM_REG_MASK 0x001f 81 + #define KVM_REG_EXT_MASK 0xffe0 82 + #define KVM_REG_GPR 0x0000 83 + #define KVM_REG_FPR 0x0020 84 + #define KVM_REG_QPR 0x0040 85 + #define KVM_REG_FQPR 0x0060 86 + 87 + #define KVM_INTERRUPT_SET -1U 88 + #define KVM_INTERRUPT_UNSET -2U 89 + 80 90 #endif /* __LINUX_KVM_POWERPC_H */

+2

arch/powerpc/include/asm/kvm_asm.h

··· 88 88 89 89 #define BOOK3S_HFLAG_DCBZ32 0x1 90 90 #define BOOK3S_HFLAG_SLB 0x2 91 + #define BOOK3S_HFLAG_PAIRED_SINGLE 0x4 92 + #define BOOK3S_HFLAG_NATIVE_PS 0x8 91 93 92 94 #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ 93 95 #define RESUME_FLAG_HOST (1<<1) /* Resume host? */

+132 -25

arch/powerpc/include/asm/kvm_book3s.h

··· 22 22 23 23 #include <linux/types.h> 24 24 #include <linux/kvm_host.h> 25 - #include <asm/kvm_book3s_64_asm.h> 25 + #include <asm/kvm_book3s_asm.h> 26 26 27 27 struct kvmppc_slb { 28 28 u64 esid; 29 29 u64 vsid; 30 30 u64 orige; 31 31 u64 origv; 32 - bool valid; 33 - bool Ks; 34 - bool Kp; 35 - bool nx; 36 - bool large; /* PTEs are 16MB */ 37 - bool tb; /* 1TB segment */ 38 - bool class; 32 + bool valid : 1; 33 + bool Ks : 1; 34 + bool Kp : 1; 35 + bool nx : 1; 36 + bool large : 1; /* PTEs are 16MB */ 37 + bool tb : 1; /* 1TB segment */ 38 + bool class : 1; 39 39 }; 40 40 41 41 struct kvmppc_sr { 42 42 u32 raw; 43 43 u32 vsid; 44 - bool Ks; 45 - bool Kp; 46 - bool nx; 44 + bool Ks : 1; 45 + bool Kp : 1; 46 + bool nx : 1; 47 + bool valid : 1; 47 48 }; 48 49 49 50 struct kvmppc_bat { 50 51 u64 raw; 51 52 u32 bepi; 52 53 u32 bepi_mask; 53 - bool vs; 54 - bool vp; 55 54 u32 brpn; 56 55 u8 wimg; 57 56 u8 pp; 57 + bool vs : 1; 58 + bool vp : 1; 58 59 }; 59 60 60 61 struct kvmppc_sid_map { 61 62 u64 guest_vsid; 62 63 u64 guest_esid; 63 64 u64 host_vsid; 64 - bool valid; 65 + bool valid : 1; 65 66 }; 66 67 67 68 #define SID_MAP_BITS 9 ··· 71 70 72 71 struct kvmppc_vcpu_book3s { 73 72 struct kvm_vcpu vcpu; 74 - struct kvmppc_book3s_shadow_vcpu shadow_vcpu; 73 + struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; 75 74 struct kvmppc_sid_map sid_map[SID_MAP_NUM]; 76 75 struct kvmppc_slb slb[64]; 77 76 struct { ··· 83 82 struct kvmppc_bat ibat[8]; 84 83 struct kvmppc_bat dbat[8]; 85 84 u64 hid[6]; 85 + u64 gqr[8]; 86 86 int slb_nr; 87 + u32 dsisr; 87 88 u64 sdr1; 88 - u64 dsisr; 89 89 u64 hior; 90 90 u64 msr_mask; 91 91 u64 vsid_first; ··· 100 98 #define CONTEXT_GUEST 1 101 99 #define CONTEXT_GUEST_END 2 102 100 103 - #define VSID_REAL 0xfffffffffff00000 104 - #define VSID_REAL_DR 0xffffffffffe00000 105 - #define VSID_REAL_IR 0xffffffffffd00000 106 - #define VSID_BAT 0xffffffffffc00000 107 - #define VSID_PR 0x8000000000000000 101 + #define VSID_REAL 0x1fffffffffc00000ULL 102 + #define VSID_BAT 0x1fffffffffb00000ULL 103 + #define VSID_REAL_DR 0x2000000000000000ULL 104 + #define VSID_REAL_IR 0x4000000000000000ULL 105 + #define VSID_PR 0x8000000000000000ULL 108 106 109 - extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 ea, u64 ea_mask); 107 + extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong ea, ulong ea_mask); 110 108 extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask); 111 - extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, u64 pa_start, u64 pa_end); 109 + extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end); 112 110 extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr); 113 111 extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu); 114 112 extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); ··· 116 114 extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); 117 115 extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); 118 116 extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data); 119 - extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, bool data); 120 - extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr); 117 + extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); 118 + extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); 121 119 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); 122 120 extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, 123 121 bool upper, u32 val); 122 + extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); 123 + extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); 124 124 125 125 extern u32 kvmppc_trampoline_lowmem; 126 126 extern u32 kvmppc_trampoline_enter; ··· 130 126 extern void kvmppc_load_up_fpu(void); 131 127 extern void kvmppc_load_up_altivec(void); 132 128 extern void kvmppc_load_up_vsx(void); 129 + extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); 130 + extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); 133 131 134 132 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) 135 133 { ··· 146 140 } 147 141 148 142 extern void kvm_return_point(void); 143 + static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu); 144 + 145 + static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 146 + { 147 + if ( num < 14 ) { 148 + to_svcpu(vcpu)->gpr[num] = val; 149 + to_book3s(vcpu)->shadow_vcpu->gpr[num] = val; 150 + } else 151 + vcpu->arch.gpr[num] = val; 152 + } 153 + 154 + static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) 155 + { 156 + if ( num < 14 ) 157 + return to_svcpu(vcpu)->gpr[num]; 158 + else 159 + return vcpu->arch.gpr[num]; 160 + } 161 + 162 + static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) 163 + { 164 + to_svcpu(vcpu)->cr = val; 165 + to_book3s(vcpu)->shadow_vcpu->cr = val; 166 + } 167 + 168 + static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) 169 + { 170 + return to_svcpu(vcpu)->cr; 171 + } 172 + 173 + static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 174 + { 175 + to_svcpu(vcpu)->xer = val; 176 + to_book3s(vcpu)->shadow_vcpu->xer = val; 177 + } 178 + 179 + static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 180 + { 181 + return to_svcpu(vcpu)->xer; 182 + } 183 + 184 + static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) 185 + { 186 + to_svcpu(vcpu)->ctr = val; 187 + } 188 + 189 + static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) 190 + { 191 + return to_svcpu(vcpu)->ctr; 192 + } 193 + 194 + static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) 195 + { 196 + to_svcpu(vcpu)->lr = val; 197 + } 198 + 199 + static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) 200 + { 201 + return to_svcpu(vcpu)->lr; 202 + } 203 + 204 + static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) 205 + { 206 + to_svcpu(vcpu)->pc = val; 207 + } 208 + 209 + static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) 210 + { 211 + return to_svcpu(vcpu)->pc; 212 + } 213 + 214 + static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) 215 + { 216 + ulong pc = kvmppc_get_pc(vcpu); 217 + struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); 218 + 219 + /* Load the instruction manually if it failed to do so in the 220 + * exit path */ 221 + if (svcpu->last_inst == KVM_INST_FETCH_FAILED) 222 + kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false); 223 + 224 + return svcpu->last_inst; 225 + } 226 + 227 + static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) 228 + { 229 + return to_svcpu(vcpu)->fault_dar; 230 + } 231 + 232 + /* Magic register values loaded into r3 and r4 before the 'sc' assembly 233 + * instruction for the OSI hypercalls */ 234 + #define OSI_SC_MAGIC_R3 0x113724FA 235 + #define OSI_SC_MAGIC_R4 0x77810F9B 149 236 150 237 #define INS_DCBZ 0x7c0007ec 238 + 239 + /* Also add subarch specific defines */ 240 + 241 + #ifdef CONFIG_PPC_BOOK3S_32 242 + #include <asm/kvm_book3s_32.h> 243 + #else 244 + #include <asm/kvm_book3s_64.h> 245 + #endif 151 246 152 247 #endif /* __ASM_KVM_BOOK3S_H__ */

+42

arch/powerpc/include/asm/kvm_book3s_32.h

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program; if not, write to the Free Software 13 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 + * 15 + * Copyright SUSE Linux Products GmbH 2010 16 + * 17 + * Authors: Alexander Graf <agraf@suse.de> 18 + */ 19 + 20 + #ifndef __ASM_KVM_BOOK3S_32_H__ 21 + #define __ASM_KVM_BOOK3S_32_H__ 22 + 23 + static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu) 24 + { 25 + return to_book3s(vcpu)->shadow_vcpu; 26 + } 27 + 28 + #define PTE_SIZE 12 29 + #define VSID_ALL 0 30 + #define SR_INVALID 0x00000001 /* VSID 1 should always be unused */ 31 + #define SR_KP 0x20000000 32 + #define PTE_V 0x80000000 33 + #define PTE_SEC 0x00000040 34 + #define PTE_M 0x00000010 35 + #define PTE_R 0x00000100 36 + #define PTE_C 0x00000080 37 + 38 + #define SID_SHIFT 28 39 + #define ESID_MASK 0xf0000000 40 + #define VSID_MASK 0x00fffffff0000000ULL 41 + 42 + #endif /* __ASM_KVM_BOOK3S_32_H__ */

+28

arch/powerpc/include/asm/kvm_book3s_64.h

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program; if not, write to the Free Software 13 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 + * 15 + * Copyright SUSE Linux Products GmbH 2010 16 + * 17 + * Authors: Alexander Graf <agraf@suse.de> 18 + */ 19 + 20 + #ifndef __ASM_KVM_BOOK3S_64_H__ 21 + #define __ASM_KVM_BOOK3S_64_H__ 22 + 23 + static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu) 24 + { 25 + return &get_paca()->shadow_vcpu; 26 + } 27 + 28 + #endif /* __ASM_KVM_BOOK3S_64_H__ */

+23 -2

arch/powerpc/include/asm/kvm_book3s_64_asm.h arch/powerpc/include/asm/kvm_book3s_asm.h

··· 22 22 23 23 #ifdef __ASSEMBLY__ 24 24 25 - #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 25 + #ifdef CONFIG_KVM_BOOK3S_HANDLER 26 26 27 27 #include <asm/kvm_asm.h> 28 28 ··· 55 55 .macro DO_KVM intno 56 56 .endm 57 57 58 - #endif /* CONFIG_KVM_BOOK3S_64_HANDLER */ 58 + #endif /* CONFIG_KVM_BOOK3S_HANDLER */ 59 59 60 60 #else /*__ASSEMBLY__ */ 61 61 ··· 63 63 ulong gpr[14]; 64 64 u32 cr; 65 65 u32 xer; 66 + 67 + u32 fault_dsisr; 68 + u32 last_inst; 69 + ulong ctr; 70 + ulong lr; 71 + ulong pc; 72 + ulong shadow_srr1; 73 + ulong fault_dar; 74 + 66 75 ulong host_r1; 67 76 ulong host_r2; 68 77 ulong handler; 69 78 ulong scratch0; 70 79 ulong scratch1; 71 80 ulong vmhandler; 81 + u8 in_guest; 82 + 83 + #ifdef CONFIG_PPC_BOOK3S_32 84 + u32 sr[16]; /* Guest SRs */ 85 + #endif 86 + #ifdef CONFIG_PPC_BOOK3S_64 87 + u8 slb_max; /* highest used guest slb entry */ 88 + struct { 89 + u64 esid; 90 + u64 vsid; 91 + } slb[64]; /* guest SLB */ 92 + #endif 72 93 }; 73 94 74 95 #endif /*__ASSEMBLY__ */

+96

arch/powerpc/include/asm/kvm_booke.h

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program; if not, write to the Free Software 13 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 + * 15 + * Copyright SUSE Linux Products GmbH 2010 16 + * 17 + * Authors: Alexander Graf <agraf@suse.de> 18 + */ 19 + 20 + #ifndef __ASM_KVM_BOOKE_H__ 21 + #define __ASM_KVM_BOOKE_H__ 22 + 23 + #include <linux/types.h> 24 + #include <linux/kvm_host.h> 25 + 26 + static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 27 + { 28 + vcpu->arch.gpr[num] = val; 29 + } 30 + 31 + static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) 32 + { 33 + return vcpu->arch.gpr[num]; 34 + } 35 + 36 + static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) 37 + { 38 + vcpu->arch.cr = val; 39 + } 40 + 41 + static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) 42 + { 43 + return vcpu->arch.cr; 44 + } 45 + 46 + static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 47 + { 48 + vcpu->arch.xer = val; 49 + } 50 + 51 + static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 52 + { 53 + return vcpu->arch.xer; 54 + } 55 + 56 + static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) 57 + { 58 + return vcpu->arch.last_inst; 59 + } 60 + 61 + static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) 62 + { 63 + vcpu->arch.ctr = val; 64 + } 65 + 66 + static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) 67 + { 68 + return vcpu->arch.ctr; 69 + } 70 + 71 + static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) 72 + { 73 + vcpu->arch.lr = val; 74 + } 75 + 76 + static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) 77 + { 78 + return vcpu->arch.lr; 79 + } 80 + 81 + static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) 82 + { 83 + vcpu->arch.pc = val; 84 + } 85 + 86 + static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) 87 + { 88 + return vcpu->arch.pc; 89 + } 90 + 91 + static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) 92 + { 93 + return vcpu->arch.fault_dear; 94 + } 95 + 96 + #endif /* __ASM_KVM_BOOKE_H__ */

+85

arch/powerpc/include/asm/kvm_fpu.h

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program; if not, write to the Free Software 13 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 + * 15 + * Copyright Novell Inc. 2010 16 + * 17 + * Authors: Alexander Graf <agraf@suse.de> 18 + */ 19 + 20 + #ifndef __ASM_KVM_FPU_H__ 21 + #define __ASM_KVM_FPU_H__ 22 + 23 + #include <linux/types.h> 24 + 25 + extern void fps_fres(struct thread_struct *t, u32 *dst, u32 *src1); 26 + extern void fps_frsqrte(struct thread_struct *t, u32 *dst, u32 *src1); 27 + extern void fps_fsqrts(struct thread_struct *t, u32 *dst, u32 *src1); 28 + 29 + extern void fps_fadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); 30 + extern void fps_fdivs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); 31 + extern void fps_fmuls(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); 32 + extern void fps_fsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); 33 + 34 + extern void fps_fmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, 35 + u32 *src3); 36 + extern void fps_fmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, 37 + u32 *src3); 38 + extern void fps_fnmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, 39 + u32 *src3); 40 + extern void fps_fnmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, 41 + u32 *src3); 42 + extern void fps_fsel(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, 43 + u32 *src3); 44 + 45 + #define FPD_ONE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ 46 + u64 *dst, u64 *src1); 47 + #define FPD_TWO_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ 48 + u64 *dst, u64 *src1, u64 *src2); 49 + #define FPD_THREE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ 50 + u64 *dst, u64 *src1, u64 *src2, u64 *src3); 51 + 52 + extern void fpd_fcmpu(u64 *fpscr, u32 *cr, u64 *src1, u64 *src2); 53 + extern void fpd_fcmpo(u64 *fpscr, u32 *cr, u64 *src1, u64 *src2); 54 + 55 + FPD_ONE_IN(fsqrts) 56 + FPD_ONE_IN(frsqrtes) 57 + FPD_ONE_IN(fres) 58 + FPD_ONE_IN(frsp) 59 + FPD_ONE_IN(fctiw) 60 + FPD_ONE_IN(fctiwz) 61 + FPD_ONE_IN(fsqrt) 62 + FPD_ONE_IN(fre) 63 + FPD_ONE_IN(frsqrte) 64 + FPD_ONE_IN(fneg) 65 + FPD_ONE_IN(fabs) 66 + FPD_TWO_IN(fadds) 67 + FPD_TWO_IN(fsubs) 68 + FPD_TWO_IN(fdivs) 69 + FPD_TWO_IN(fmuls) 70 + FPD_TWO_IN(fcpsgn) 71 + FPD_TWO_IN(fdiv) 72 + FPD_TWO_IN(fadd) 73 + FPD_TWO_IN(fmul) 74 + FPD_TWO_IN(fsub) 75 + FPD_THREE_IN(fmsubs) 76 + FPD_THREE_IN(fmadds) 77 + FPD_THREE_IN(fnmsubs) 78 + FPD_THREE_IN(fnmadds) 79 + FPD_THREE_IN(fsel) 80 + FPD_THREE_IN(fmsub) 81 + FPD_THREE_IN(fmadd) 82 + FPD_THREE_IN(fnmsub) 83 + FPD_THREE_IN(fnmadd) 84 + 85 + #endif

+22 -16

arch/powerpc/include/asm/kvm_host.h

··· 66 66 u32 dec_exits; 67 67 u32 ext_intr_exits; 68 68 u32 halt_wakeup; 69 - #ifdef CONFIG_PPC64 69 + #ifdef CONFIG_PPC_BOOK3S 70 70 u32 pf_storage; 71 71 u32 pf_instruc; 72 72 u32 sp_storage; ··· 124 124 }; 125 125 126 126 struct kvmppc_pte { 127 - u64 eaddr; 127 + ulong eaddr; 128 128 u64 vpage; 129 - u64 raddr; 130 - bool may_read; 131 - bool may_write; 132 - bool may_execute; 129 + ulong raddr; 130 + bool may_read : 1; 131 + bool may_write : 1; 132 + bool may_execute : 1; 133 133 }; 134 134 135 135 struct kvmppc_mmu { ··· 145 145 int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data); 146 146 void (*reset_msr)(struct kvm_vcpu *vcpu); 147 147 void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large); 148 - int (*esid_to_vsid)(struct kvm_vcpu *vcpu, u64 esid, u64 *vsid); 148 + int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid); 149 149 u64 (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data); 150 150 bool (*is_dcbz32)(struct kvm_vcpu *vcpu); 151 151 }; ··· 160 160 struct kvm_vcpu_arch { 161 161 ulong host_stack; 162 162 u32 host_pid; 163 - #ifdef CONFIG_PPC64 163 + #ifdef CONFIG_PPC_BOOK3S 164 164 ulong host_msr; 165 165 ulong host_r2; 166 166 void *host_retip; ··· 175 175 ulong gpr[32]; 176 176 177 177 u64 fpr[32]; 178 - u32 fpscr; 178 + u64 fpscr; 179 179 180 180 #ifdef CONFIG_ALTIVEC 181 181 vector128 vr[32]; ··· 186 186 u64 vsr[32]; 187 187 #endif 188 188 189 + #ifdef CONFIG_PPC_BOOK3S 190 + /* For Gekko paired singles */ 191 + u32 qpr[32]; 192 + #endif 193 + 194 + #ifdef CONFIG_BOOKE 189 195 ulong pc; 190 196 ulong ctr; 191 197 ulong lr; 192 198 193 - #ifdef CONFIG_BOOKE 194 199 ulong xer; 195 200 u32 cr; 196 201 #endif 197 202 198 203 ulong msr; 199 - #ifdef CONFIG_PPC64 204 + #ifdef CONFIG_PPC_BOOK3S 200 205 ulong shadow_msr; 201 - ulong shadow_srr1; 202 206 ulong hflags; 203 207 ulong guest_owned_ext; 204 208 #endif ··· 257 253 struct dentry *debugfs_exit_timing; 258 254 #endif 259 255 256 + #ifdef CONFIG_BOOKE 260 257 u32 last_inst; 261 - #ifdef CONFIG_PPC64 262 - ulong fault_dsisr; 263 - #endif 264 258 ulong fault_dear; 265 259 ulong fault_esr; 266 260 ulong queued_dear; 267 261 ulong queued_esr; 262 + #endif 268 263 gpa_t paddr_accessed; 269 264 270 265 u8 io_gpr; /* GPR used as IO source/target */ 271 266 u8 mmio_is_bigendian; 267 + u8 mmio_sign_extend; 272 268 u8 dcr_needed; 273 269 u8 dcr_is_write; 270 + u8 osi_needed; 271 + u8 osi_enabled; 274 272 275 273 u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ 276 274 ··· 281 275 u64 dec_jiffies; 282 276 unsigned long pending_exceptions; 283 277 284 - #ifdef CONFIG_PPC64 278 + #ifdef CONFIG_PPC_BOOK3S 285 279 struct hpte_cache hpte_cache[HPTEG_CACHE_NUM]; 286 280 int hpte_cache_offset; 287 281 #endif

+37 -72

arch/powerpc/include/asm/kvm_ppc.h

··· 30 30 #include <linux/kvm_host.h> 31 31 #ifdef CONFIG_PPC_BOOK3S 32 32 #include <asm/kvm_book3s.h> 33 + #else 34 + #include <asm/kvm_booke.h> 33 35 #endif 34 36 35 37 enum emulation_result { ··· 39 37 EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ 40 38 EMULATE_DO_DCR, /* kvm_run filled with DCR request */ 41 39 EMULATE_FAIL, /* can't emulate this instruction */ 40 + EMULATE_AGAIN, /* something went wrong. go again */ 42 41 }; 43 42 44 43 extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); ··· 51 48 extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, 52 49 unsigned int rt, unsigned int bytes, 53 50 int is_bigendian); 51 + extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, 52 + unsigned int rt, unsigned int bytes, 53 + int is_bigendian); 54 54 extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, 55 - u32 val, unsigned int bytes, int is_bigendian); 55 + u64 val, unsigned int bytes, int is_bigendian); 56 56 57 57 extern int kvmppc_emulate_instruction(struct kvm_run *run, 58 58 struct kvm_vcpu *vcpu); ··· 69 63 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); 70 64 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); 71 65 extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu); 66 + extern int kvmppc_mmu_init(struct kvm_vcpu *vcpu); 72 67 extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); 73 68 extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); 74 69 extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, ··· 95 88 extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); 96 89 extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 97 90 struct kvm_interrupt *irq); 91 + extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, 92 + struct kvm_interrupt *irq); 98 93 99 94 extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 100 95 unsigned int op, int *advance); ··· 108 99 109 100 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); 110 101 111 - #ifdef CONFIG_PPC_BOOK3S 112 - 113 - /* We assume we're always acting on the current vcpu */ 114 - 115 - static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 102 + /* 103 + * Cuts out inst bits with ordering according to spec. 104 + * That means the leftmost bit is zero. All given bits are included. 105 + */ 106 + static inline u32 kvmppc_get_field(u64 inst, int msb, int lsb) 116 107 { 117 - if ( num < 14 ) { 118 - get_paca()->shadow_vcpu.gpr[num] = val; 119 - to_book3s(vcpu)->shadow_vcpu.gpr[num] = val; 120 - } else 121 - vcpu->arch.gpr[num] = val; 108 + u32 r; 109 + u32 mask; 110 + 111 + BUG_ON(msb > lsb); 112 + 113 + mask = (1 << (lsb - msb + 1)) - 1; 114 + r = (inst >> (63 - lsb)) & mask; 115 + 116 + return r; 122 117 } 123 118 124 - static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) 119 + /* 120 + * Replaces inst bits with ordering according to spec. 121 + */ 122 + static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) 125 123 { 126 - if ( num < 14 ) 127 - return get_paca()->shadow_vcpu.gpr[num]; 128 - else 129 - return vcpu->arch.gpr[num]; 124 + u32 r; 125 + u32 mask; 126 + 127 + BUG_ON(msb > lsb); 128 + 129 + mask = ((1 << (lsb - msb + 1)) - 1) << (63 - lsb); 130 + r = (inst & ~mask) | ((value << (63 - lsb)) & mask); 131 + 132 + return r; 130 133 } 131 - 132 - static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) 133 - { 134 - get_paca()->shadow_vcpu.cr = val; 135 - to_book3s(vcpu)->shadow_vcpu.cr = val; 136 - } 137 - 138 - static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) 139 - { 140 - return get_paca()->shadow_vcpu.cr; 141 - } 142 - 143 - static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 144 - { 145 - get_paca()->shadow_vcpu.xer = val; 146 - to_book3s(vcpu)->shadow_vcpu.xer = val; 147 - } 148 - 149 - static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 150 - { 151 - return get_paca()->shadow_vcpu.xer; 152 - } 153 - 154 - #else 155 - 156 - static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 157 - { 158 - vcpu->arch.gpr[num] = val; 159 - } 160 - 161 - static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) 162 - { 163 - return vcpu->arch.gpr[num]; 164 - } 165 - 166 - static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) 167 - { 168 - vcpu->arch.cr = val; 169 - } 170 - 171 - static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) 172 - { 173 - return vcpu->arch.cr; 174 - } 175 - 176 - static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 177 - { 178 - vcpu->arch.xer = val; 179 - } 180 - 181 - static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 182 - { 183 - return vcpu->arch.xer; 184 - } 185 - 186 - #endif 187 134 188 135 #endif /* __POWERPC_KVM_PPC_H__ */

+2

arch/powerpc/include/asm/mmu_context.h

··· 27 27 extern void __destroy_context(int context_id); 28 28 static inline void mmu_context_init(void) { } 29 29 #else 30 + extern unsigned long __init_new_context(void); 31 + extern void __destroy_context(unsigned long context_id); 30 32 extern void mmu_context_init(void); 31 33 #endif 32 34

+2 -8

arch/powerpc/include/asm/paca.h

··· 23 23 #include <asm/page.h> 24 24 #include <asm/exception-64e.h> 25 25 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 26 - #include <asm/kvm_book3s_64_asm.h> 26 + #include <asm/kvm_book3s_asm.h> 27 27 #endif 28 28 29 29 register struct paca_struct *local_paca asm("r13"); ··· 137 137 u64 startpurr; /* PURR/TB value snapshot */ 138 138 u64 startspurr; /* SPURR value snapshot */ 139 139 140 - #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 141 - struct { 142 - u64 esid; 143 - u64 vsid; 144 - } kvm_slb[64]; /* guest SLB */ 140 + #ifdef CONFIG_KVM_BOOK3S_HANDLER 145 141 /* We use this to store guest state in */ 146 142 struct kvmppc_book3s_shadow_vcpu shadow_vcpu; 147 - u8 kvm_slb_max; /* highest used guest slb entry */ 148 - u8 kvm_in_guest; /* are we inside the guest? */ 149 143 #endif 150 144 }; 151 145

+3

arch/powerpc/include/asm/processor.h

··· 229 229 unsigned long spefscr; /* SPE & eFP status */ 230 230 int used_spe; /* set if process has used spe */ 231 231 #endif /* CONFIG_SPE */ 232 + #ifdef CONFIG_KVM_BOOK3S_32_HANDLER 233 + void* kvm_shadow_vcpu; /* KVM internal data */ 234 + #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ 232 235 }; 233 236 234 237 #define ARCH_MIN_TASKALIGN 16

+10

arch/powerpc/include/asm/reg.h

··· 293 293 #define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */ 294 294 #define HID1_PS (1<<16) /* 750FX PLL selection */ 295 295 #define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ 296 + #define SPRN_HID2_GEKKO 0x398 /* Gekko HID2 Register */ 296 297 #define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ 297 298 #define SPRN_IABR2 0x3FA /* 83xx */ 298 299 #define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */ 299 300 #define SPRN_HID4 0x3F4 /* 970 HID4 */ 301 + #define SPRN_HID4_GEKKO 0x3F3 /* Gekko HID4 */ 300 302 #define SPRN_HID5 0x3F6 /* 970 HID5 */ 301 303 #define SPRN_HID6 0x3F9 /* BE HID 6 */ 302 304 #define HID6_LB (0x0F<<12) /* Concurrent Large Page Modes */ ··· 466 464 #define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ 467 465 #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ 468 466 #define SPRN_XER 0x001 /* Fixed Point Exception Register */ 467 + 468 + #define SPRN_MMCR0_GEKKO 0x3B8 /* Gekko Monitor Mode Control Register 0 */ 469 + #define SPRN_MMCR1_GEKKO 0x3BC /* Gekko Monitor Mode Control Register 1 */ 470 + #define SPRN_PMC1_GEKKO 0x3B9 /* Gekko Performance Monitor Control 1 */ 471 + #define SPRN_PMC2_GEKKO 0x3BA /* Gekko Performance Monitor Control 2 */ 472 + #define SPRN_PMC3_GEKKO 0x3BD /* Gekko Performance Monitor Control 3 */ 473 + #define SPRN_PMC4_GEKKO 0x3BE /* Gekko Performance Monitor Control 4 */ 474 + #define SPRN_WPAR_GEKKO 0x399 /* Gekko Write Pipe Address Register */ 469 475 470 476 #define SPRN_SCOMC 0x114 /* SCOM Access Control */ 471 477 #define SPRN_SCOMD 0x115 /* SCOM Access DATA */

+61 -41

arch/powerpc/kernel/asm-offsets.c

··· 50 50 #endif 51 51 #ifdef CONFIG_KVM 52 52 #include <linux/kvm_host.h> 53 + #ifndef CONFIG_BOOKE 54 + #include <asm/kvm_book3s.h> 55 + #endif 53 56 #endif 54 57 55 58 #ifdef CONFIG_PPC32 ··· 108 105 DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); 109 106 #endif /* CONFIG_SPE */ 110 107 #endif /* CONFIG_PPC64 */ 108 + #ifdef CONFIG_KVM_BOOK3S_32_HANDLER 109 + DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); 110 + #endif 111 111 112 112 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); 113 113 DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); ··· 197 191 DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset)); 198 192 DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); 199 193 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 200 - DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest)); 201 - DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb)); 202 - DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max)); 203 - DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr)); 204 - DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer)); 205 - DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0])); 206 - DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1])); 207 - DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2])); 208 - DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3])); 209 - DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4])); 210 - DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5])); 211 - DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6])); 212 - DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7])); 213 - DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8])); 214 - DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9])); 215 - DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10])); 216 - DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11])); 217 - DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12])); 218 - DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13])); 219 - DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1)); 220 - DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2)); 221 - DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct, 222 - shadow_vcpu.vmhandler)); 223 - DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct, 224 - shadow_vcpu.scratch0)); 225 - DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct, 226 - shadow_vcpu.scratch1)); 194 + DEFINE(PACA_KVM_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); 195 + DEFINE(SVCPU_SLB, offsetof(struct kvmppc_book3s_shadow_vcpu, slb)); 196 + DEFINE(SVCPU_SLB_MAX, offsetof(struct kvmppc_book3s_shadow_vcpu, slb_max)); 227 197 #endif 228 198 #endif /* CONFIG_PPC64 */ 229 199 ··· 210 228 /* Interrupt register frame */ 211 229 DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); 212 230 DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); 213 - #ifdef CONFIG_PPC64 214 231 DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); 232 + #ifdef CONFIG_PPC64 215 233 /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ 216 234 DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); 217 235 DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); ··· 394 412 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); 395 413 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); 396 414 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 397 - DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 398 - DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); 399 - DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); 400 415 DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); 401 416 DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); 402 417 DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); ··· 401 422 DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); 402 423 DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); 403 424 404 - DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); 405 - DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); 406 - DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); 407 - 408 - /* book3s_64 */ 409 - #ifdef CONFIG_PPC64 410 - DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); 425 + /* book3s */ 426 + #ifdef CONFIG_PPC_BOOK3S 411 427 DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); 412 - DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2)); 413 428 DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); 414 429 DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); 415 - DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); 416 430 DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); 417 431 DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); 418 432 DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); 419 433 DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); 420 434 DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); 435 + DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - 436 + offsetof(struct kvmppc_vcpu_book3s, vcpu)); 437 + DEFINE(SVCPU_CR, offsetof(struct kvmppc_book3s_shadow_vcpu, cr)); 438 + DEFINE(SVCPU_XER, offsetof(struct kvmppc_book3s_shadow_vcpu, xer)); 439 + DEFINE(SVCPU_CTR, offsetof(struct kvmppc_book3s_shadow_vcpu, ctr)); 440 + DEFINE(SVCPU_LR, offsetof(struct kvmppc_book3s_shadow_vcpu, lr)); 441 + DEFINE(SVCPU_PC, offsetof(struct kvmppc_book3s_shadow_vcpu, pc)); 442 + DEFINE(SVCPU_R0, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[0])); 443 + DEFINE(SVCPU_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[1])); 444 + DEFINE(SVCPU_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[2])); 445 + DEFINE(SVCPU_R3, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[3])); 446 + DEFINE(SVCPU_R4, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[4])); 447 + DEFINE(SVCPU_R5, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[5])); 448 + DEFINE(SVCPU_R6, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[6])); 449 + DEFINE(SVCPU_R7, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[7])); 450 + DEFINE(SVCPU_R8, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[8])); 451 + DEFINE(SVCPU_R9, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[9])); 452 + DEFINE(SVCPU_R10, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[10])); 453 + DEFINE(SVCPU_R11, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[11])); 454 + DEFINE(SVCPU_R12, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[12])); 455 + DEFINE(SVCPU_R13, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[13])); 456 + DEFINE(SVCPU_HOST_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r1)); 457 + DEFINE(SVCPU_HOST_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r2)); 458 + DEFINE(SVCPU_VMHANDLER, offsetof(struct kvmppc_book3s_shadow_vcpu, 459 + vmhandler)); 460 + DEFINE(SVCPU_SCRATCH0, offsetof(struct kvmppc_book3s_shadow_vcpu, 461 + scratch0)); 462 + DEFINE(SVCPU_SCRATCH1, offsetof(struct kvmppc_book3s_shadow_vcpu, 463 + scratch1)); 464 + DEFINE(SVCPU_IN_GUEST, offsetof(struct kvmppc_book3s_shadow_vcpu, 465 + in_guest)); 466 + DEFINE(SVCPU_FAULT_DSISR, offsetof(struct kvmppc_book3s_shadow_vcpu, 467 + fault_dsisr)); 468 + DEFINE(SVCPU_FAULT_DAR, offsetof(struct kvmppc_book3s_shadow_vcpu, 469 + fault_dar)); 470 + DEFINE(SVCPU_LAST_INST, offsetof(struct kvmppc_book3s_shadow_vcpu, 471 + last_inst)); 472 + DEFINE(SVCPU_SHADOW_SRR1, offsetof(struct kvmppc_book3s_shadow_vcpu, 473 + shadow_srr1)); 474 + #ifdef CONFIG_PPC_BOOK3S_32 475 + DEFINE(SVCPU_SR, offsetof(struct kvmppc_book3s_shadow_vcpu, sr)); 476 + #endif 421 477 #else 422 478 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); 423 479 DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); 424 - #endif /* CONFIG_PPC64 */ 480 + DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 481 + DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); 482 + DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); 483 + DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); 484 + DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); 485 + DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); 486 + #endif /* CONFIG_PPC_BOOK3S */ 425 487 #endif 426 488 #ifdef CONFIG_44x 427 489 DEFINE(PGD_T_LOG2, PGD_T_LOG2);

+14

arch/powerpc/kernel/head_32.S

··· 33 33 #include <asm/asm-offsets.h> 34 34 #include <asm/ptrace.h> 35 35 #include <asm/bug.h> 36 + #include <asm/kvm_book3s_asm.h> 36 37 37 38 /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ 38 39 #define LOAD_BAT(n, reg, RA, RB) \ ··· 304 303 */ 305 304 #define EXCEPTION(n, label, hdlr, xfer) \ 306 305 . = n; \ 306 + DO_KVM n; \ 307 307 label: \ 308 308 EXCEPTION_PROLOG; \ 309 309 addi r3,r1,STACK_FRAME_OVERHEAD; \ ··· 360 358 * -- paulus. 361 359 */ 362 360 . = 0x200 361 + DO_KVM 0x200 363 362 mtspr SPRN_SPRG_SCRATCH0,r10 364 363 mtspr SPRN_SPRG_SCRATCH1,r11 365 364 mfcr r10 ··· 384 381 385 382 /* Data access exception. */ 386 383 . = 0x300 384 + DO_KVM 0x300 387 385 DataAccess: 388 386 EXCEPTION_PROLOG 389 387 mfspr r10,SPRN_DSISR ··· 401 397 402 398 /* Instruction access exception. */ 403 399 . = 0x400 400 + DO_KVM 0x400 404 401 InstructionAccess: 405 402 EXCEPTION_PROLOG 406 403 andis. r0,r9,0x4000 /* no pte found? */ ··· 418 413 419 414 /* Alignment exception */ 420 415 . = 0x600 416 + DO_KVM 0x600 421 417 Alignment: 422 418 EXCEPTION_PROLOG 423 419 mfspr r4,SPRN_DAR ··· 433 427 434 428 /* Floating-point unavailable */ 435 429 . = 0x800 430 + DO_KVM 0x800 436 431 FPUnavailable: 437 432 BEGIN_FTR_SECTION 438 433 /* ··· 457 450 458 451 /* System call */ 459 452 . = 0xc00 453 + DO_KVM 0xc00 460 454 SystemCall: 461 455 EXCEPTION_PROLOG 462 456 EXC_XFER_EE_LITE(0xc00, DoSyscall) ··· 475 467 * by executing an altivec instruction. 476 468 */ 477 469 . = 0xf00 470 + DO_KVM 0xf00 478 471 b PerformanceMonitor 479 472 480 473 . = 0xf20 474 + DO_KVM 0xf20 481 475 b AltiVecUnavailable 482 476 483 477 /* ··· 891 881 SYNC 892 882 RFI 893 883 #endif /* CONFIG_SMP */ 884 + 885 + #ifdef CONFIG_KVM_BOOK3S_HANDLER 886 + #include "../kvm/book3s_rmhandlers.S" 887 + #endif 894 888 895 889 /* 896 890 * Those generic dummy functions are kept for CPUs not

+2 -2

arch/powerpc/kernel/head_64.S

··· 37 37 #include <asm/firmware.h> 38 38 #include <asm/page_64.h> 39 39 #include <asm/irqflags.h> 40 - #include <asm/kvm_book3s_64_asm.h> 40 + #include <asm/kvm_book3s_asm.h> 41 41 42 42 /* The physical memory is layed out such that the secondary processor 43 43 * spin code sits at 0x0000...0x00ff. On server, the vectors follow ··· 169 169 /* KVM trampoline code needs to be close to the interrupt handlers */ 170 170 171 171 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 172 - #include "../kvm/book3s_64_rmhandlers.S" 172 + #include "../kvm/book3s_rmhandlers.S" 173 173 #endif 174 174 175 175 _GLOBAL(generic_secondary_thread_init)

+4

arch/powerpc/kernel/ppc_ksyms.c

··· 101 101 EXPORT_SYMBOL(start_thread); 102 102 EXPORT_SYMBOL(kernel_thread); 103 103 104 + #ifndef CONFIG_BOOKE 105 + EXPORT_SYMBOL_GPL(cvt_df); 106 + EXPORT_SYMBOL_GPL(cvt_fd); 107 + #endif 104 108 EXPORT_SYMBOL(giveup_fpu); 105 109 #ifdef CONFIG_ALTIVEC 106 110 EXPORT_SYMBOL(giveup_altivec);

+1 -1

arch/powerpc/kvm/44x.c

··· 147 147 if (r) 148 148 return r; 149 149 150 - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE); 150 + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); 151 151 } 152 152 153 153 static void __exit kvmppc_44x_exit(void)

+23 -1

arch/powerpc/kvm/Kconfig

··· 22 22 select ANON_INODES 23 23 select KVM_MMIO 24 24 25 + config KVM_BOOK3S_HANDLER 26 + bool 27 + 28 + config KVM_BOOK3S_32_HANDLER 29 + bool 30 + select KVM_BOOK3S_HANDLER 31 + 25 32 config KVM_BOOK3S_64_HANDLER 26 33 bool 34 + select KVM_BOOK3S_HANDLER 35 + 36 + config KVM_BOOK3S_32 37 + tristate "KVM support for PowerPC book3s_32 processors" 38 + depends on EXPERIMENTAL && PPC_BOOK3S_32 && !SMP && !PTE_64BIT 39 + select KVM 40 + select KVM_BOOK3S_32_HANDLER 41 + ---help--- 42 + Support running unmodified book3s_32 guest kernels 43 + in virtual machines on book3s_32 host processors. 44 + 45 + This module provides access to the hardware capabilities through 46 + a character device node named /dev/kvm. 47 + 48 + If unsure, say N. 27 49 28 50 config KVM_BOOK3S_64 29 51 tristate "KVM support for PowerPC book3s_64 processors" 30 - depends on EXPERIMENTAL && PPC64 52 + depends on EXPERIMENTAL && PPC_BOOK3S_64 31 53 select KVM 32 54 select KVM_BOOK3S_64_HANDLER 33 55 ---help---

+17 -3

arch/powerpc/kvm/Makefile

··· 14 14 15 15 common-objs-y += powerpc.o emulate.o 16 16 obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o 17 - obj-$(CONFIG_KVM_BOOK3S_64_HANDLER) += book3s_64_exports.o 17 + obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o 18 18 19 19 AFLAGS_booke_interrupts.o := -I$(obj) 20 20 ··· 40 40 41 41 kvm-book3s_64-objs := \ 42 42 $(common-objs-y) \ 43 + fpu.o \ 44 + book3s_paired_singles.o \ 43 45 book3s.o \ 44 - book3s_64_emulate.o \ 45 - book3s_64_interrupts.o \ 46 + book3s_emulate.o \ 47 + book3s_interrupts.o \ 46 48 book3s_64_mmu_host.o \ 47 49 book3s_64_mmu.o \ 48 50 book3s_32_mmu.o 49 51 kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-objs) 52 + 53 + kvm-book3s_32-objs := \ 54 + $(common-objs-y) \ 55 + fpu.o \ 56 + book3s_paired_singles.o \ 57 + book3s.o \ 58 + book3s_emulate.o \ 59 + book3s_interrupts.o \ 60 + book3s_32_mmu_host.o \ 61 + book3s_32_mmu.o 62 + kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) 50 63 51 64 kvm-objs := $(kvm-objs-m) $(kvm-objs-y) 52 65 53 66 obj-$(CONFIG_KVM_440) += kvm.o 54 67 obj-$(CONFIG_KVM_E500) += kvm.o 55 68 obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o 69 + obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o 56 70

+360 -143

arch/powerpc/kvm/book3s.c

··· 16 16 17 17 #include <linux/kvm_host.h> 18 18 #include <linux/err.h> 19 + #include <linux/slab.h> 19 20 20 21 #include <asm/reg.h> 21 22 #include <asm/cputable.h> ··· 30 29 #include <linux/gfp.h> 31 30 #include <linux/sched.h> 32 31 #include <linux/vmalloc.h> 32 + #include <linux/highmem.h> 33 33 34 34 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 35 35 ··· 38 36 /* #define EXIT_DEBUG_SIMPLE */ 39 37 /* #define DEBUG_EXT */ 40 38 41 - static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); 39 + static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, 40 + ulong msr); 41 + 42 + /* Some compatibility defines */ 43 + #ifdef CONFIG_PPC_BOOK3S_32 44 + #define MSR_USER32 MSR_USER 45 + #define MSR_USER64 MSR_USER 46 + #define HW_PAGE_SIZE PAGE_SIZE 47 + #endif 42 48 43 49 struct kvm_stats_debugfs_item debugfs_entries[] = { 44 50 { "exits", VCPU_STAT(sum_exits) }, ··· 79 69 80 70 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 81 71 { 82 - memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb)); 83 - memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu, 72 + #ifdef CONFIG_PPC_BOOK3S_64 73 + memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb)); 74 + memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, 84 75 sizeof(get_paca()->shadow_vcpu)); 85 - get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max; 76 + to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max; 77 + #endif 78 + 79 + #ifdef CONFIG_PPC_BOOK3S_32 80 + current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; 81 + #endif 86 82 } 87 83 88 84 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 89 85 { 90 - memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb)); 91 - memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, 86 + #ifdef CONFIG_PPC_BOOK3S_64 87 + memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb)); 88 + memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, 92 89 sizeof(get_paca()->shadow_vcpu)); 93 - to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max; 90 + to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max; 91 + #endif 94 92 95 93 kvmppc_giveup_ext(vcpu, MSR_FP); 96 94 kvmppc_giveup_ext(vcpu, MSR_VEC); ··· 149 131 } 150 132 } 151 133 152 - if (((vcpu->arch.msr & (MSR_IR|MSR_DR)) != (old_msr & (MSR_IR|MSR_DR))) || 153 - (vcpu->arch.msr & MSR_PR) != (old_msr & MSR_PR)) { 134 + if ((vcpu->arch.msr & (MSR_PR|MSR_IR|MSR_DR)) != 135 + (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { 154 136 kvmppc_mmu_flush_segments(vcpu); 155 - kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc); 137 + kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 156 138 } 139 + 140 + /* Preload FPU if it's enabled */ 141 + if (vcpu->arch.msr & MSR_FP) 142 + kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 157 143 } 158 144 159 145 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) 160 146 { 161 - vcpu->arch.srr0 = vcpu->arch.pc; 147 + vcpu->arch.srr0 = kvmppc_get_pc(vcpu); 162 148 vcpu->arch.srr1 = vcpu->arch.msr | flags; 163 - vcpu->arch.pc = to_book3s(vcpu)->hior + vec; 149 + kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec); 164 150 vcpu->arch.mmu.reset_msr(vcpu); 165 151 } 166 152 ··· 238 216 struct kvm_interrupt *irq) 239 217 { 240 218 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); 219 + } 220 + 221 + void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, 222 + struct kvm_interrupt *irq) 223 + { 224 + kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); 241 225 } 242 226 243 227 int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) ··· 330 302 printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); 331 303 #endif 332 304 priority = __ffs(*pending); 333 - while (priority <= (sizeof(unsigned int) * 8)) { 305 + while (priority < BOOK3S_IRQPRIO_MAX) { 334 306 if (kvmppc_book3s_irqprio_deliver(vcpu, priority) && 335 307 (priority != BOOK3S_IRQPRIO_DECREMENTER)) { 336 308 /* DEC interrupts get cleared by mtdec */ ··· 346 318 347 319 void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 348 320 { 321 + u32 host_pvr; 322 + 349 323 vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB; 350 324 vcpu->arch.pvr = pvr; 325 + #ifdef CONFIG_PPC_BOOK3S_64 351 326 if ((pvr >= 0x330000) && (pvr < 0x70330000)) { 352 327 kvmppc_mmu_book3s_64_init(vcpu); 353 328 to_book3s(vcpu)->hior = 0xfff00000; 354 329 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; 355 - } else { 330 + } else 331 + #endif 332 + { 356 333 kvmppc_mmu_book3s_32_init(vcpu); 357 334 to_book3s(vcpu)->hior = 0; 358 335 to_book3s(vcpu)->msr_mask = 0xffffffffULL; ··· 370 337 !strcmp(cur_cpu_spec->platform, "ppc970")) 371 338 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 372 339 340 + /* Cell performs badly if MSR_FEx are set. So let's hope nobody 341 + really needs them in a VM on Cell and force disable them. */ 342 + if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) 343 + to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); 344 + 345 + #ifdef CONFIG_PPC_BOOK3S_32 346 + /* 32 bit Book3S always has 32 byte dcbz */ 347 + vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 348 + #endif 349 + 350 + /* On some CPUs we can execute paired single operations natively */ 351 + asm ( "mfpvr %0" : "=r"(host_pvr)); 352 + switch (host_pvr) { 353 + case 0x00080200: /* lonestar 2.0 */ 354 + case 0x00088202: /* lonestar 2.2 */ 355 + case 0x70000100: /* gekko 1.0 */ 356 + case 0x00080100: /* gekko 2.0 */ 357 + case 0x00083203: /* gekko 2.3a */ 358 + case 0x00083213: /* gekko 2.3b */ 359 + case 0x00083204: /* gekko 2.4 */ 360 + case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ 361 + case 0x00087200: /* broadway */ 362 + vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS; 363 + /* Enable HID2.PSE - in case we need it later */ 364 + mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29)); 365 + } 373 366 } 374 367 375 368 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To ··· 409 350 */ 410 351 static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) 411 352 { 412 - bool touched = false; 413 - hva_t hpage; 353 + struct page *hpage; 354 + u64 hpage_offset; 414 355 u32 *page; 415 356 int i; 416 357 417 - hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); 418 - if (kvm_is_error_hva(hpage)) 358 + hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); 359 + if (is_error_page(hpage)) 419 360 return; 420 361 421 - hpage |= pte->raddr & ~PAGE_MASK; 422 - hpage &= ~0xFFFULL; 362 + hpage_offset = pte->raddr & ~PAGE_MASK; 363 + hpage_offset &= ~0xFFFULL; 364 + hpage_offset /= 4; 423 365 424 - page = vmalloc(HW_PAGE_SIZE); 366 + get_page(hpage); 367 + page = kmap_atomic(hpage, KM_USER0); 425 368 426 - if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE)) 427 - goto out; 369 + /* patch dcbz into reserved instruction, so we trap */ 370 + for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) 371 + if ((page[i] & 0xff0007ff) == INS_DCBZ) 372 + page[i] &= 0xfffffff7; 428 373 429 - for (i=0; i < HW_PAGE_SIZE / 4; i++) 430 - if ((page[i] & 0xff0007ff) == INS_DCBZ) { 431 - page[i] &= 0xfffffff7; // reserved instruction, so we trap 432 - touched = true; 433 - } 434 - 435 - if (touched) 436 - copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE); 437 - 438 - out: 439 - vfree(page); 374 + kunmap_atomic(page, KM_USER0); 375 + put_page(hpage); 440 376 } 441 377 442 378 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, ··· 445 391 } else { 446 392 pte->eaddr = eaddr; 447 393 pte->raddr = eaddr & 0xffffffff; 448 - pte->vpage = eaddr >> 12; 449 - switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { 450 - case 0: 451 - pte->vpage |= VSID_REAL; 452 - case MSR_DR: 453 - pte->vpage |= VSID_REAL_DR; 454 - case MSR_IR: 455 - pte->vpage |= VSID_REAL_IR; 456 - } 394 + pte->vpage = VSID_REAL | eaddr >> 12; 457 395 pte->may_read = true; 458 396 pte->may_write = true; 459 397 pte->may_execute = true; ··· 480 434 return kvmppc_bad_hva(); 481 435 } 482 436 483 - int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr) 437 + int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, 438 + bool data) 484 439 { 485 440 struct kvmppc_pte pte; 486 - hva_t hva = eaddr; 487 441 488 442 vcpu->stat.st++; 489 443 490 - if (kvmppc_xlate(vcpu, eaddr, false, &pte)) 491 - goto err; 444 + if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) 445 + return -ENOENT; 492 446 493 - hva = kvmppc_pte_to_hva(vcpu, &pte, false); 494 - if (kvm_is_error_hva(hva)) 495 - goto err; 447 + *eaddr = pte.raddr; 496 448 497 - if (copy_to_user((void __user *)hva, ptr, size)) { 498 - printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva); 499 - goto err; 500 - } 449 + if (!pte.may_write) 450 + return -EPERM; 501 451 502 - return 0; 452 + if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) 453 + return EMULATE_DO_MMIO; 503 454 504 - err: 505 - return -ENOENT; 455 + return EMULATE_DONE; 506 456 } 507 457 508 - int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, 458 + int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, 509 459 bool data) 510 460 { 511 461 struct kvmppc_pte pte; 512 - hva_t hva = eaddr; 462 + hva_t hva = *eaddr; 513 463 514 464 vcpu->stat.ld++; 515 465 516 - if (kvmppc_xlate(vcpu, eaddr, data, &pte)) 517 - goto err; 466 + if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) 467 + goto nopte; 468 + 469 + *eaddr = pte.raddr; 518 470 519 471 hva = kvmppc_pte_to_hva(vcpu, &pte, true); 520 472 if (kvm_is_error_hva(hva)) 521 - goto err; 473 + goto mmio; 522 474 523 475 if (copy_from_user(ptr, (void __user *)hva, size)) { 524 476 printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); 525 - goto err; 477 + goto mmio; 526 478 } 527 479 528 - return 0; 480 + return EMULATE_DONE; 529 481 530 - err: 482 + nopte: 531 483 return -ENOENT; 484 + mmio: 485 + return EMULATE_DO_MMIO; 532 486 } 533 487 534 488 static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) ··· 545 499 int page_found = 0; 546 500 struct kvmppc_pte pte; 547 501 bool is_mmio = false; 502 + bool dr = (vcpu->arch.msr & MSR_DR) ? true : false; 503 + bool ir = (vcpu->arch.msr & MSR_IR) ? true : false; 504 + u64 vsid; 548 505 549 - if ( vec == BOOK3S_INTERRUPT_DATA_STORAGE ) { 550 - relocated = (vcpu->arch.msr & MSR_DR); 551 - } else { 552 - relocated = (vcpu->arch.msr & MSR_IR); 553 - } 506 + relocated = data ? dr : ir; 554 507 555 508 /* Resolve real address if translation turned on */ 556 509 if (relocated) { ··· 561 516 pte.raddr = eaddr & 0xffffffff; 562 517 pte.eaddr = eaddr; 563 518 pte.vpage = eaddr >> 12; 564 - switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { 565 - case 0: 566 - pte.vpage |= VSID_REAL; 567 - case MSR_DR: 568 - pte.vpage |= VSID_REAL_DR; 569 - case MSR_IR: 570 - pte.vpage |= VSID_REAL_IR; 571 - } 519 + } 520 + 521 + switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { 522 + case 0: 523 + pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); 524 + break; 525 + case MSR_DR: 526 + case MSR_IR: 527 + vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); 528 + 529 + if ((vcpu->arch.msr & (MSR_DR|MSR_IR)) == MSR_DR) 530 + pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12)); 531 + else 532 + pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12)); 533 + pte.vpage |= vsid; 534 + 535 + if (vsid == -1) 536 + page_found = -EINVAL; 537 + break; 572 538 } 573 539 574 540 if (vcpu->arch.mmu.is_dcbz32(vcpu) && ··· 594 538 595 539 if (page_found == -ENOENT) { 596 540 /* Page not found in guest PTE entries */ 597 - vcpu->arch.dear = vcpu->arch.fault_dear; 598 - to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; 599 - vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL); 541 + vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); 542 + to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr; 543 + vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); 600 544 kvmppc_book3s_queue_irqprio(vcpu, vec); 601 545 } else if (page_found == -EPERM) { 602 546 /* Storage protection */ 603 - vcpu->arch.dear = vcpu->arch.fault_dear; 604 - to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; 547 + vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); 548 + to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE; 605 549 to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; 606 - vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL); 550 + vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); 607 551 kvmppc_book3s_queue_irqprio(vcpu, vec); 608 552 } else if (page_found == -EINVAL) { 609 553 /* Page not found in guest SLB */ 610 - vcpu->arch.dear = vcpu->arch.fault_dear; 554 + vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); 611 555 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); 612 556 } else if (!is_mmio && 613 557 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { ··· 639 583 } 640 584 641 585 /* Give up external provider (FPU, Altivec, VSX) */ 642 - static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) 586 + void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) 643 587 { 644 588 struct thread_struct *t = &current->thread; 645 589 u64 *vcpu_fpr = vcpu->arch.fpr; 590 + #ifdef CONFIG_VSX 646 591 u64 *vcpu_vsx = vcpu->arch.vsr; 592 + #endif 647 593 u64 *thread_fpr = (u64*)t->fpr; 648 594 int i; 649 595 ··· 687 629 kvmppc_recalc_shadow_msr(vcpu); 688 630 } 689 631 632 + static int kvmppc_read_inst(struct kvm_vcpu *vcpu) 633 + { 634 + ulong srr0 = kvmppc_get_pc(vcpu); 635 + u32 last_inst = kvmppc_get_last_inst(vcpu); 636 + int ret; 637 + 638 + ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); 639 + if (ret == -ENOENT) { 640 + vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1); 641 + vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0); 642 + vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); 643 + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); 644 + return EMULATE_AGAIN; 645 + } 646 + 647 + return EMULATE_DONE; 648 + } 649 + 650 + static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) 651 + { 652 + 653 + /* Need to do paired single emulation? */ 654 + if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) 655 + return EMULATE_DONE; 656 + 657 + /* Read out the instruction */ 658 + if (kvmppc_read_inst(vcpu) == EMULATE_DONE) 659 + /* Need to emulate */ 660 + return EMULATE_FAIL; 661 + 662 + return EMULATE_AGAIN; 663 + } 664 + 690 665 /* Handle external providers (FPU, Altivec, VSX) */ 691 666 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, 692 667 ulong msr) 693 668 { 694 669 struct thread_struct *t = &current->thread; 695 670 u64 *vcpu_fpr = vcpu->arch.fpr; 671 + #ifdef CONFIG_VSX 696 672 u64 *vcpu_vsx = vcpu->arch.vsr; 673 + #endif 697 674 u64 *thread_fpr = (u64*)t->fpr; 698 675 int i; 699 676 677 + /* When we have paired singles, we emulate in software */ 678 + if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) 679 + return RESUME_GUEST; 680 + 700 681 if (!(vcpu->arch.msr & msr)) { 701 682 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 683 + return RESUME_GUEST; 684 + } 685 + 686 + /* We already own the ext */ 687 + if (vcpu->arch.guest_owned_ext & msr) { 702 688 return RESUME_GUEST; 703 689 } 704 690 ··· 798 696 run->ready_for_interrupt_injection = 1; 799 697 #ifdef EXIT_DEBUG 800 698 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n", 801 - exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear, 802 - kvmppc_get_dec(vcpu), vcpu->arch.msr); 699 + exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu), 700 + kvmppc_get_dec(vcpu), to_svcpu(vcpu)->shadow_srr1); 803 701 #elif defined (EXIT_DEBUG_SIMPLE) 804 702 if ((exit_nr != 0x900) && (exit_nr != 0x500)) 805 703 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n", 806 - exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear, 704 + exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu), 807 705 vcpu->arch.msr); 808 706 #endif 809 707 kvm_resched(vcpu); 810 708 switch (exit_nr) { 811 709 case BOOK3S_INTERRUPT_INST_STORAGE: 812 710 vcpu->stat.pf_instruc++; 711 + 712 + #ifdef CONFIG_PPC_BOOK3S_32 713 + /* We set segments as unused segments when invalidating them. So 714 + * treat the respective fault as segment fault. */ 715 + if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] 716 + == SR_INVALID) { 717 + kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 718 + r = RESUME_GUEST; 719 + break; 720 + } 721 + #endif 722 + 813 723 /* only care about PTEG not found errors, but leave NX alone */ 814 - if (vcpu->arch.shadow_srr1 & 0x40000000) { 815 - r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr); 724 + if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) { 725 + r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); 816 726 vcpu->stat.sp_instruc++; 817 727 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 818 728 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { ··· 833 719 * so we can't use the NX bit inside the guest. Let's cross our fingers, 834 720 * that no guest that needs the dcbz hack does NX. 835 721 */ 836 - kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); 722 + kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); 723 + r = RESUME_GUEST; 837 724 } else { 838 - vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000; 725 + vcpu->arch.msr |= to_svcpu(vcpu)->shadow_srr1 & 0x58000000; 839 726 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 840 - kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); 727 + kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); 841 728 r = RESUME_GUEST; 842 729 } 843 730 break; 844 731 case BOOK3S_INTERRUPT_DATA_STORAGE: 732 + { 733 + ulong dar = kvmppc_get_fault_dar(vcpu); 845 734 vcpu->stat.pf_storage++; 735 + 736 + #ifdef CONFIG_PPC_BOOK3S_32 737 + /* We set segments as unused segments when invalidating them. So 738 + * treat the respective fault as segment fault. */ 739 + if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) { 740 + kvmppc_mmu_map_segment(vcpu, dar); 741 + r = RESUME_GUEST; 742 + break; 743 + } 744 + #endif 745 + 846 746 /* The only case we need to handle is missing shadow PTEs */ 847 - if (vcpu->arch.fault_dsisr & DSISR_NOHPTE) { 848 - r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.fault_dear, exit_nr); 747 + if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) { 748 + r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); 849 749 } else { 850 - vcpu->arch.dear = vcpu->arch.fault_dear; 851 - to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; 750 + vcpu->arch.dear = dar; 751 + to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr; 852 752 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 853 - kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFULL); 753 + kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL); 854 754 r = RESUME_GUEST; 855 755 } 856 756 break; 757 + } 857 758 case BOOK3S_INTERRUPT_DATA_SEGMENT: 858 - if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.fault_dear) < 0) { 859 - vcpu->arch.dear = vcpu->arch.fault_dear; 759 + if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) { 760 + vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); 860 761 kvmppc_book3s_queue_irqprio(vcpu, 861 762 BOOK3S_INTERRUPT_DATA_SEGMENT); 862 763 } 863 764 r = RESUME_GUEST; 864 765 break; 865 766 case BOOK3S_INTERRUPT_INST_SEGMENT: 866 - if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc) < 0) { 767 + if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) { 867 768 kvmppc_book3s_queue_irqprio(vcpu, 868 769 BOOK3S_INTERRUPT_INST_SEGMENT); 869 770 } ··· 893 764 vcpu->stat.ext_intr_exits++; 894 765 r = RESUME_GUEST; 895 766 break; 767 + case BOOK3S_INTERRUPT_PERFMON: 768 + r = RESUME_GUEST; 769 + break; 896 770 case BOOK3S_INTERRUPT_PROGRAM: 897 771 { 898 772 enum emulation_result er; 899 773 ulong flags; 900 774 901 - flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; 775 + program_interrupt: 776 + flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull; 902 777 903 778 if (vcpu->arch.msr & MSR_PR) { 904 779 #ifdef EXIT_DEBUG 905 - printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", vcpu->arch.pc, vcpu->arch.last_inst); 780 + printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); 906 781 #endif 907 - if ((vcpu->arch.last_inst & 0xff0007ff) != 782 + if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) != 908 783 (INS_DCBZ & 0xfffffff7)) { 909 784 kvmppc_core_queue_program(vcpu, flags); 910 785 r = RESUME_GUEST; ··· 922 789 case EMULATE_DONE: 923 790 r = RESUME_GUEST_NV; 924 791 break; 792 + case EMULATE_AGAIN: 793 + r = RESUME_GUEST; 794 + break; 925 795 case EMULATE_FAIL: 926 796 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", 927 - __func__, vcpu->arch.pc, vcpu->arch.last_inst); 797 + __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); 928 798 kvmppc_core_queue_program(vcpu, flags); 929 799 r = RESUME_GUEST; 800 + break; 801 + case EMULATE_DO_MMIO: 802 + run->exit_reason = KVM_EXIT_MMIO; 803 + r = RESUME_HOST_NV; 930 804 break; 931 805 default: 932 806 BUG(); ··· 941 801 break; 942 802 } 943 803 case BOOK3S_INTERRUPT_SYSCALL: 944 - #ifdef EXIT_DEBUG 945 - printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0)); 946 - #endif 947 - vcpu->stat.syscall_exits++; 948 - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 949 - r = RESUME_GUEST; 804 + // XXX make user settable 805 + if (vcpu->arch.osi_enabled && 806 + (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && 807 + (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { 808 + u64 *gprs = run->osi.gprs; 809 + int i; 810 + 811 + run->exit_reason = KVM_EXIT_OSI; 812 + for (i = 0; i < 32; i++) 813 + gprs[i] = kvmppc_get_gpr(vcpu, i); 814 + vcpu->arch.osi_needed = 1; 815 + r = RESUME_HOST_NV; 816 + 817 + } else { 818 + vcpu->stat.syscall_exits++; 819 + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 820 + r = RESUME_GUEST; 821 + } 950 822 break; 951 823 case BOOK3S_INTERRUPT_FP_UNAVAIL: 952 - r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP); 953 - break; 954 824 case BOOK3S_INTERRUPT_ALTIVEC: 955 - r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC); 956 - break; 957 825 case BOOK3S_INTERRUPT_VSX: 958 - r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX); 826 + { 827 + int ext_msr = 0; 828 + 829 + switch (exit_nr) { 830 + case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; 831 + case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; 832 + case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; 833 + } 834 + 835 + switch (kvmppc_check_ext(vcpu, exit_nr)) { 836 + case EMULATE_DONE: 837 + /* everything ok - let's enable the ext */ 838 + r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); 839 + break; 840 + case EMULATE_FAIL: 841 + /* we need to emulate this instruction */ 842 + goto program_interrupt; 843 + break; 844 + default: 845 + /* nothing to worry about - go again */ 846 + break; 847 + } 848 + break; 849 + } 850 + case BOOK3S_INTERRUPT_ALIGNMENT: 851 + if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { 852 + to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu, 853 + kvmppc_get_last_inst(vcpu)); 854 + vcpu->arch.dear = kvmppc_alignment_dar(vcpu, 855 + kvmppc_get_last_inst(vcpu)); 856 + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 857 + } 858 + r = RESUME_GUEST; 959 859 break; 960 860 case BOOK3S_INTERRUPT_MACHINE_CHECK: 961 861 case BOOK3S_INTERRUPT_TRACE: ··· 1005 825 default: 1006 826 /* Ugh - bork here! What did we get? */ 1007 827 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", 1008 - exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1); 828 + exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1); 1009 829 r = RESUME_HOST; 1010 830 BUG(); 1011 831 break; ··· 1032 852 } 1033 853 1034 854 #ifdef EXIT_DEBUG 1035 - printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, vcpu->arch.pc, r); 855 + printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, kvmppc_get_pc(vcpu), r); 1036 856 #endif 1037 857 1038 858 return r; ··· 1047 867 { 1048 868 int i; 1049 869 1050 - regs->pc = vcpu->arch.pc; 870 + vcpu_load(vcpu); 871 + 872 + regs->pc = kvmppc_get_pc(vcpu); 1051 873 regs->cr = kvmppc_get_cr(vcpu); 1052 - regs->ctr = vcpu->arch.ctr; 1053 - regs->lr = vcpu->arch.lr; 874 + regs->ctr = kvmppc_get_ctr(vcpu); 875 + regs->lr = kvmppc_get_lr(vcpu); 1054 876 regs->xer = kvmppc_get_xer(vcpu); 1055 877 regs->msr = vcpu->arch.msr; 1056 878 regs->srr0 = vcpu->arch.srr0; ··· 1069 887 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 1070 888 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 1071 889 890 + vcpu_put(vcpu); 891 + 1072 892 return 0; 1073 893 } 1074 894 ··· 1078 894 { 1079 895 int i; 1080 896 1081 - vcpu->arch.pc = regs->pc; 897 + vcpu_load(vcpu); 898 + 899 + kvmppc_set_pc(vcpu, regs->pc); 1082 900 kvmppc_set_cr(vcpu, regs->cr); 1083 - vcpu->arch.ctr = regs->ctr; 1084 - vcpu->arch.lr = regs->lr; 901 + kvmppc_set_ctr(vcpu, regs->ctr); 902 + kvmppc_set_lr(vcpu, regs->lr); 1085 903 kvmppc_set_xer(vcpu, regs->xer); 1086 904 kvmppc_set_msr(vcpu, regs->msr); 1087 905 vcpu->arch.srr0 = regs->srr0; ··· 1099 913 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 1100 914 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 1101 915 916 + vcpu_put(vcpu); 917 + 1102 918 return 0; 1103 919 } 1104 920 ··· 1109 921 { 1110 922 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1111 923 int i; 924 + 925 + vcpu_load(vcpu); 1112 926 1113 927 sregs->pvr = vcpu->arch.pvr; 1114 928 ··· 1130 940 sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; 1131 941 } 1132 942 } 943 + 944 + vcpu_put(vcpu); 945 + 1133 946 return 0; 1134 947 } 1135 948 ··· 1141 948 { 1142 949 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1143 950 int i; 951 + 952 + vcpu_load(vcpu); 1144 953 1145 954 kvmppc_set_pvr(vcpu, sregs->pvr); 1146 955 ··· 1170 975 1171 976 /* Flush the MMU after messing with the segments */ 1172 977 kvmppc_mmu_pte_flush(vcpu, 0, 0); 978 + 979 + vcpu_put(vcpu); 980 + 1173 981 return 0; 1174 982 } 1175 983 ··· 1240 1042 { 1241 1043 struct kvmppc_vcpu_book3s *vcpu_book3s; 1242 1044 struct kvm_vcpu *vcpu; 1243 - int err; 1045 + int err = -ENOMEM; 1244 1046 1245 - vcpu_book3s = (struct kvmppc_vcpu_book3s *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, 1246 - get_order(sizeof(struct kvmppc_vcpu_book3s))); 1247 - if (!vcpu_book3s) { 1248 - err = -ENOMEM; 1047 + vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s)); 1048 + if (!vcpu_book3s) 1249 1049 goto out; 1250 - } 1050 + 1051 + memset(vcpu_book3s, 0, sizeof(struct kvmppc_vcpu_book3s)); 1052 + 1053 + vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *) 1054 + kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL); 1055 + if (!vcpu_book3s->shadow_vcpu) 1056 + goto free_vcpu; 1251 1057 1252 1058 vcpu = &vcpu_book3s->vcpu; 1253 1059 err = kvm_vcpu_init(vcpu, kvm, id); 1254 1060 if (err) 1255 - goto free_vcpu; 1061 + goto free_shadow_vcpu; 1256 1062 1257 1063 vcpu->arch.host_retip = kvm_return_point; 1258 1064 vcpu->arch.host_msr = mfmsr(); 1065 + #ifdef CONFIG_PPC_BOOK3S_64 1259 1066 /* default to book3s_64 (970fx) */ 1260 1067 vcpu->arch.pvr = 0x3C0301; 1068 + #else 1069 + /* default to book3s_32 (750) */ 1070 + vcpu->arch.pvr = 0x84202; 1071 + #endif 1261 1072 kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 1262 1073 vcpu_book3s->slb_nr = 64; 1263 1074 ··· 1274 1067 vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; 1275 1068 vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; 1276 1069 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; 1070 + #ifdef CONFIG_PPC_BOOK3S_64 1277 1071 vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; 1072 + #else 1073 + vcpu->arch.rmcall = (ulong)kvmppc_rmcall; 1074 + #endif 1278 1075 1279 1076 vcpu->arch.shadow_msr = MSR_USER64; 1280 1077 1281 - err = __init_new_context(); 1078 + err = kvmppc_mmu_init(vcpu); 1282 1079 if (err < 0) 1283 - goto free_vcpu; 1284 - vcpu_book3s->context_id = err; 1285 - 1286 - vcpu_book3s->vsid_max = ((vcpu_book3s->context_id + 1) << USER_ESID_BITS) - 1; 1287 - vcpu_book3s->vsid_first = vcpu_book3s->context_id << USER_ESID_BITS; 1288 - vcpu_book3s->vsid_next = vcpu_book3s->vsid_first; 1080 + goto free_shadow_vcpu; 1289 1081 1290 1082 return vcpu; 1291 1083 1084 + free_shadow_vcpu: 1085 + kfree(vcpu_book3s->shadow_vcpu); 1292 1086 free_vcpu: 1293 - free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); 1087 + vfree(vcpu_book3s); 1294 1088 out: 1295 1089 return ERR_PTR(err); 1296 1090 } ··· 1300 1092 { 1301 1093 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 1302 1094 1303 - __destroy_context(vcpu_book3s->context_id); 1304 1095 kvm_vcpu_uninit(vcpu); 1305 - free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); 1096 + kfree(vcpu_book3s->shadow_vcpu); 1097 + vfree(vcpu_book3s); 1306 1098 } 1307 1099 1308 1100 extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); ··· 1310 1102 { 1311 1103 int ret; 1312 1104 struct thread_struct ext_bkp; 1105 + #ifdef CONFIG_ALTIVEC 1313 1106 bool save_vec = current->thread.used_vr; 1107 + #endif 1108 + #ifdef CONFIG_VSX 1314 1109 bool save_vsx = current->thread.used_vsr; 1110 + #endif 1315 1111 ulong ext_msr; 1316 1112 1317 1113 /* No need to go into the guest when all we do is going out */ ··· 1356 1144 /* XXX we get called with irq disabled - change that! */ 1357 1145 local_irq_enable(); 1358 1146 1147 + /* Preload FPU if it's enabled */ 1148 + if (vcpu->arch.msr & MSR_FP) 1149 + kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 1150 + 1359 1151 ret = __kvmppc_vcpu_entry(kvm_run, vcpu); 1360 1152 1361 1153 local_irq_disable(); ··· 1395 1179 1396 1180 static int kvmppc_book3s_init(void) 1397 1181 { 1398 - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), THIS_MODULE); 1182 + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, 1183 + THIS_MODULE); 1399 1184 } 1400 1185 1401 1186 static void kvmppc_book3s_exit(void)

+39 -15

arch/powerpc/kvm/book3s_32_mmu.c

··· 37 37 #define dprintk(X...) do { } while(0) 38 38 #endif 39 39 40 - #ifdef DEBUG_PTE 40 + #ifdef DEBUG_MMU_PTE 41 41 #define dprintk_pte(X...) printk(KERN_INFO X) 42 42 #else 43 43 #define dprintk_pte(X...) do { } while(0) ··· 45 45 46 46 #define PTEG_FLAG_ACCESSED 0x00000100 47 47 #define PTEG_FLAG_DIRTY 0x00000080 48 + #ifndef SID_SHIFT 49 + #define SID_SHIFT 28 50 + #endif 48 51 49 52 static inline bool check_debug_ip(struct kvm_vcpu *vcpu) 50 53 { ··· 60 57 61 58 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, 62 59 struct kvmppc_pte *pte, bool data); 60 + static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, 61 + u64 *vsid); 63 62 64 63 static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr) 65 64 { ··· 71 66 static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, 72 67 bool data) 73 68 { 74 - struct kvmppc_sr *sre = find_sr(to_book3s(vcpu), eaddr); 69 + u64 vsid; 75 70 struct kvmppc_pte pte; 76 71 77 72 if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data)) 78 73 return pte.vpage; 79 74 80 - return (((u64)eaddr >> 12) & 0xffff) | (((u64)sre->vsid) << 16); 75 + kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); 76 + return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16); 81 77 } 82 78 83 79 static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) ··· 148 142 bat->bepi_mask); 149 143 } 150 144 if ((eaddr & bat->bepi_mask) == bat->bepi) { 145 + u64 vsid; 146 + kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, 147 + eaddr >> SID_SHIFT, &vsid); 148 + vsid <<= 16; 149 + pte->vpage = (((u64)eaddr >> 12) & 0xffff) | vsid; 150 + 151 151 pte->raddr = bat->brpn | (eaddr & ~bat->bepi_mask); 152 - pte->vpage = (eaddr >> 12) | VSID_BAT; 153 152 pte->may_read = bat->pp; 154 153 pte->may_write = bat->pp > 1; 155 154 pte->may_execute = true; ··· 183 172 struct kvmppc_sr *sre; 184 173 hva_t ptegp; 185 174 u32 pteg[16]; 186 - u64 ptem = 0; 175 + u32 ptem = 0; 187 176 int i; 188 177 int found = 0; 189 178 ··· 313 302 /* And then put in the new SR */ 314 303 sre->raw = value; 315 304 sre->vsid = (value & 0x0fffffff); 305 + sre->valid = (value & 0x80000000) ? false : true; 316 306 sre->Ks = (value & 0x40000000) ? true : false; 317 307 sre->Kp = (value & 0x20000000) ? true : false; 318 308 sre->nx = (value & 0x10000000) ? true : false; ··· 324 312 325 313 static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) 326 314 { 327 - kvmppc_mmu_pte_flush(vcpu, ea, ~0xFFFULL); 315 + kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000); 328 316 } 329 317 330 - static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid, 318 + static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, 331 319 u64 *vsid) 332 320 { 321 + ulong ea = esid << SID_SHIFT; 322 + struct kvmppc_sr *sr; 323 + u64 gvsid = esid; 324 + 325 + if (vcpu->arch.msr & (MSR_DR|MSR_IR)) { 326 + sr = find_sr(to_book3s(vcpu), ea); 327 + if (sr->valid) 328 + gvsid = sr->vsid; 329 + } 330 + 333 331 /* In case we only have one of MSR_IR or MSR_DR set, let's put 334 332 that in the real-mode context (and hope RM doesn't access 335 333 high memory) */ 336 334 switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { 337 335 case 0: 338 - *vsid = (VSID_REAL >> 16) | esid; 336 + *vsid = VSID_REAL | esid; 339 337 break; 340 338 case MSR_IR: 341 - *vsid = (VSID_REAL_IR >> 16) | esid; 339 + *vsid = VSID_REAL_IR | gvsid; 342 340 break; 343 341 case MSR_DR: 344 - *vsid = (VSID_REAL_DR >> 16) | esid; 342 + *vsid = VSID_REAL_DR | gvsid; 345 343 break; 346 344 case MSR_DR|MSR_IR: 347 - { 348 - ulong ea; 349 - ea = esid << SID_SHIFT; 350 - *vsid = find_sr(to_book3s(vcpu), ea)->vsid; 345 + if (!sr->valid) 346 + return -1; 347 + 348 + *vsid = sr->vsid; 351 349 break; 352 - } 353 350 default: 354 351 BUG(); 355 352 } 353 + 354 + if (vcpu->arch.msr & MSR_PR) 355 + *vsid |= VSID_PR; 356 356 357 357 return 0; 358 358 }

+483

arch/powerpc/kvm/book3s_32_mmu_host.c

··· 1 + /* 2 + * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. 3 + * 4 + * Authors: 5 + * Alexander Graf <agraf@suse.de> 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License, version 2, as 9 + * published by the Free Software Foundation. 10 + * 11 + * This program is distributed in the hope that it will be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write to the Free Software 18 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + 21 + #include <linux/kvm_host.h> 22 + 23 + #include <asm/kvm_ppc.h> 24 + #include <asm/kvm_book3s.h> 25 + #include <asm/mmu-hash32.h> 26 + #include <asm/machdep.h> 27 + #include <asm/mmu_context.h> 28 + #include <asm/hw_irq.h> 29 + 30 + /* #define DEBUG_MMU */ 31 + /* #define DEBUG_SR */ 32 + 33 + #ifdef DEBUG_MMU 34 + #define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__) 35 + #else 36 + #define dprintk_mmu(a, ...) do { } while(0) 37 + #endif 38 + 39 + #ifdef DEBUG_SR 40 + #define dprintk_sr(a, ...) printk(KERN_INFO a, __VA_ARGS__) 41 + #else 42 + #define dprintk_sr(a, ...) do { } while(0) 43 + #endif 44 + 45 + #if PAGE_SHIFT != 12 46 + #error Unknown page size 47 + #endif 48 + 49 + #ifdef CONFIG_SMP 50 + #error XXX need to grab mmu_hash_lock 51 + #endif 52 + 53 + #ifdef CONFIG_PTE_64BIT 54 + #error Only 32 bit pages are supported for now 55 + #endif 56 + 57 + static ulong htab; 58 + static u32 htabmask; 59 + 60 + static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 61 + { 62 + volatile u32 *pteg; 63 + 64 + dprintk_mmu("KVM: Flushing SPTE: 0x%llx (0x%llx) -> 0x%llx\n", 65 + pte->pte.eaddr, pte->pte.vpage, pte->host_va); 66 + 67 + pteg = (u32*)pte->slot; 68 + 69 + pteg[0] = 0; 70 + asm volatile ("sync"); 71 + asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory"); 72 + asm volatile ("sync"); 73 + asm volatile ("tlbsync"); 74 + 75 + pte->host_va = 0; 76 + 77 + if (pte->pte.may_write) 78 + kvm_release_pfn_dirty(pte->pfn); 79 + else 80 + kvm_release_pfn_clean(pte->pfn); 81 + } 82 + 83 + void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) 84 + { 85 + int i; 86 + 87 + dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%x & 0x%x\n", 88 + vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); 89 + BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); 90 + 91 + guest_ea &= ea_mask; 92 + for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { 93 + struct hpte_cache *pte; 94 + 95 + pte = &vcpu->arch.hpte_cache[i]; 96 + if (!pte->host_va) 97 + continue; 98 + 99 + if ((pte->pte.eaddr & ea_mask) == guest_ea) { 100 + invalidate_pte(vcpu, pte); 101 + } 102 + } 103 + 104 + /* Doing a complete flush -> start from scratch */ 105 + if (!ea_mask) 106 + vcpu->arch.hpte_cache_offset = 0; 107 + } 108 + 109 + void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) 110 + { 111 + int i; 112 + 113 + dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", 114 + vcpu->arch.hpte_cache_offset, guest_vp, vp_mask); 115 + BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); 116 + 117 + guest_vp &= vp_mask; 118 + for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { 119 + struct hpte_cache *pte; 120 + 121 + pte = &vcpu->arch.hpte_cache[i]; 122 + if (!pte->host_va) 123 + continue; 124 + 125 + if ((pte->pte.vpage & vp_mask) == guest_vp) { 126 + invalidate_pte(vcpu, pte); 127 + } 128 + } 129 + } 130 + 131 + void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) 132 + { 133 + int i; 134 + 135 + dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%llx & 0x%llx\n", 136 + vcpu->arch.hpte_cache_offset, pa_start, pa_end); 137 + BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); 138 + 139 + for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { 140 + struct hpte_cache *pte; 141 + 142 + pte = &vcpu->arch.hpte_cache[i]; 143 + if (!pte->host_va) 144 + continue; 145 + 146 + if ((pte->pte.raddr >= pa_start) && 147 + (pte->pte.raddr < pa_end)) { 148 + invalidate_pte(vcpu, pte); 149 + } 150 + } 151 + } 152 + 153 + struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data) 154 + { 155 + int i; 156 + u64 guest_vp; 157 + 158 + guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false); 159 + for (i=0; i<vcpu->arch.hpte_cache_offset; i++) { 160 + struct hpte_cache *pte; 161 + 162 + pte = &vcpu->arch.hpte_cache[i]; 163 + if (!pte->host_va) 164 + continue; 165 + 166 + if (pte->pte.vpage == guest_vp) 167 + return &pte->pte; 168 + } 169 + 170 + return NULL; 171 + } 172 + 173 + static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) 174 + { 175 + if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM) 176 + kvmppc_mmu_pte_flush(vcpu, 0, 0); 177 + 178 + return vcpu->arch.hpte_cache_offset++; 179 + } 180 + 181 + /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using 182 + * a hash, so we don't waste cycles on looping */ 183 + static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) 184 + { 185 + return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ 186 + ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ 187 + ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ 188 + ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ 189 + ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ 190 + ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ 191 + ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ 192 + ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); 193 + } 194 + 195 + 196 + static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) 197 + { 198 + struct kvmppc_sid_map *map; 199 + u16 sid_map_mask; 200 + 201 + if (vcpu->arch.msr & MSR_PR) 202 + gvsid |= VSID_PR; 203 + 204 + sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); 205 + map = &to_book3s(vcpu)->sid_map[sid_map_mask]; 206 + if (map->guest_vsid == gvsid) { 207 + dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n", 208 + gvsid, map->host_vsid); 209 + return map; 210 + } 211 + 212 + map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask]; 213 + if (map->guest_vsid == gvsid) { 214 + dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n", 215 + gvsid, map->host_vsid); 216 + return map; 217 + } 218 + 219 + dprintk_sr("SR: Searching 0x%llx -> not found\n", gvsid); 220 + return NULL; 221 + } 222 + 223 + static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr, 224 + bool primary) 225 + { 226 + u32 page, hash; 227 + ulong pteg = htab; 228 + 229 + page = (eaddr & ~ESID_MASK) >> 12; 230 + 231 + hash = ((vsid ^ page) << 6); 232 + if (!primary) 233 + hash = ~hash; 234 + 235 + hash &= htabmask; 236 + 237 + pteg |= hash; 238 + 239 + dprintk_mmu("htab: %lx | hash: %x | htabmask: %x | pteg: %lx\n", 240 + htab, hash, htabmask, pteg); 241 + 242 + return (u32*)pteg; 243 + } 244 + 245 + extern char etext[]; 246 + 247 + int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) 248 + { 249 + pfn_t hpaddr; 250 + u64 va; 251 + u64 vsid; 252 + struct kvmppc_sid_map *map; 253 + volatile u32 *pteg; 254 + u32 eaddr = orig_pte->eaddr; 255 + u32 pteg0, pteg1; 256 + register int rr = 0; 257 + bool primary = false; 258 + bool evict = false; 259 + int hpte_id; 260 + struct hpte_cache *pte; 261 + 262 + /* Get host physical address for gpa */ 263 + hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); 264 + if (kvm_is_error_hva(hpaddr)) { 265 + printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", 266 + orig_pte->eaddr); 267 + return -EINVAL; 268 + } 269 + hpaddr <<= PAGE_SHIFT; 270 + 271 + /* and write the mapping ea -> hpa into the pt */ 272 + vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); 273 + map = find_sid_vsid(vcpu, vsid); 274 + if (!map) { 275 + kvmppc_mmu_map_segment(vcpu, eaddr); 276 + map = find_sid_vsid(vcpu, vsid); 277 + } 278 + BUG_ON(!map); 279 + 280 + vsid = map->host_vsid; 281 + va = (vsid << SID_SHIFT) | (eaddr & ~ESID_MASK); 282 + 283 + next_pteg: 284 + if (rr == 16) { 285 + primary = !primary; 286 + evict = true; 287 + rr = 0; 288 + } 289 + 290 + pteg = kvmppc_mmu_get_pteg(vcpu, vsid, eaddr, primary); 291 + 292 + /* not evicting yet */ 293 + if (!evict && (pteg[rr] & PTE_V)) { 294 + rr += 2; 295 + goto next_pteg; 296 + } 297 + 298 + dprintk_mmu("KVM: old PTEG: %p (%d)\n", pteg, rr); 299 + dprintk_mmu("KVM: %08x - %08x\n", pteg[0], pteg[1]); 300 + dprintk_mmu("KVM: %08x - %08x\n", pteg[2], pteg[3]); 301 + dprintk_mmu("KVM: %08x - %08x\n", pteg[4], pteg[5]); 302 + dprintk_mmu("KVM: %08x - %08x\n", pteg[6], pteg[7]); 303 + dprintk_mmu("KVM: %08x - %08x\n", pteg[8], pteg[9]); 304 + dprintk_mmu("KVM: %08x - %08x\n", pteg[10], pteg[11]); 305 + dprintk_mmu("KVM: %08x - %08x\n", pteg[12], pteg[13]); 306 + dprintk_mmu("KVM: %08x - %08x\n", pteg[14], pteg[15]); 307 + 308 + pteg0 = ((eaddr & 0x0fffffff) >> 22) | (vsid << 7) | PTE_V | 309 + (primary ? 0 : PTE_SEC); 310 + pteg1 = hpaddr | PTE_M | PTE_R | PTE_C; 311 + 312 + if (orig_pte->may_write) { 313 + pteg1 |= PP_RWRW; 314 + mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); 315 + } else { 316 + pteg1 |= PP_RWRX; 317 + } 318 + 319 + local_irq_disable(); 320 + 321 + if (pteg[rr]) { 322 + pteg[rr] = 0; 323 + asm volatile ("sync"); 324 + } 325 + pteg[rr + 1] = pteg1; 326 + pteg[rr] = pteg0; 327 + asm volatile ("sync"); 328 + 329 + local_irq_enable(); 330 + 331 + dprintk_mmu("KVM: new PTEG: %p\n", pteg); 332 + dprintk_mmu("KVM: %08x - %08x\n", pteg[0], pteg[1]); 333 + dprintk_mmu("KVM: %08x - %08x\n", pteg[2], pteg[3]); 334 + dprintk_mmu("KVM: %08x - %08x\n", pteg[4], pteg[5]); 335 + dprintk_mmu("KVM: %08x - %08x\n", pteg[6], pteg[7]); 336 + dprintk_mmu("KVM: %08x - %08x\n", pteg[8], pteg[9]); 337 + dprintk_mmu("KVM: %08x - %08x\n", pteg[10], pteg[11]); 338 + dprintk_mmu("KVM: %08x - %08x\n", pteg[12], pteg[13]); 339 + dprintk_mmu("KVM: %08x - %08x\n", pteg[14], pteg[15]); 340 + 341 + 342 + /* Now tell our Shadow PTE code about the new page */ 343 + 344 + hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); 345 + pte = &vcpu->arch.hpte_cache[hpte_id]; 346 + 347 + dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", 348 + orig_pte->may_write ? 'w' : '-', 349 + orig_pte->may_execute ? 'x' : '-', 350 + orig_pte->eaddr, (ulong)pteg, va, 351 + orig_pte->vpage, hpaddr); 352 + 353 + pte->slot = (ulong)&pteg[rr]; 354 + pte->host_va = va; 355 + pte->pte = *orig_pte; 356 + pte->pfn = hpaddr >> PAGE_SHIFT; 357 + 358 + return 0; 359 + } 360 + 361 + static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) 362 + { 363 + struct kvmppc_sid_map *map; 364 + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 365 + u16 sid_map_mask; 366 + static int backwards_map = 0; 367 + 368 + if (vcpu->arch.msr & MSR_PR) 369 + gvsid |= VSID_PR; 370 + 371 + /* We might get collisions that trap in preceding order, so let's 372 + map them differently */ 373 + 374 + sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); 375 + if (backwards_map) 376 + sid_map_mask = SID_MAP_MASK - sid_map_mask; 377 + 378 + map = &to_book3s(vcpu)->sid_map[sid_map_mask]; 379 + 380 + /* Make sure we're taking the other map next time */ 381 + backwards_map = !backwards_map; 382 + 383 + /* Uh-oh ... out of mappings. Let's flush! */ 384 + if (vcpu_book3s->vsid_next >= vcpu_book3s->vsid_max) { 385 + vcpu_book3s->vsid_next = vcpu_book3s->vsid_first; 386 + memset(vcpu_book3s->sid_map, 0, 387 + sizeof(struct kvmppc_sid_map) * SID_MAP_NUM); 388 + kvmppc_mmu_pte_flush(vcpu, 0, 0); 389 + kvmppc_mmu_flush_segments(vcpu); 390 + } 391 + map->host_vsid = vcpu_book3s->vsid_next; 392 + 393 + /* Would have to be 111 to be completely aligned with the rest of 394 + Linux, but that is just way too little space! */ 395 + vcpu_book3s->vsid_next+=1; 396 + 397 + map->guest_vsid = gvsid; 398 + map->valid = true; 399 + 400 + return map; 401 + } 402 + 403 + int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) 404 + { 405 + u32 esid = eaddr >> SID_SHIFT; 406 + u64 gvsid; 407 + u32 sr; 408 + struct kvmppc_sid_map *map; 409 + struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); 410 + 411 + if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { 412 + /* Invalidate an entry */ 413 + svcpu->sr[esid] = SR_INVALID; 414 + return -ENOENT; 415 + } 416 + 417 + map = find_sid_vsid(vcpu, gvsid); 418 + if (!map) 419 + map = create_sid_map(vcpu, gvsid); 420 + 421 + map->guest_esid = esid; 422 + sr = map->host_vsid | SR_KP; 423 + svcpu->sr[esid] = sr; 424 + 425 + dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr); 426 + 427 + return 0; 428 + } 429 + 430 + void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) 431 + { 432 + int i; 433 + struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); 434 + 435 + dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr)); 436 + for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++) 437 + svcpu->sr[i] = SR_INVALID; 438 + } 439 + 440 + void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 441 + { 442 + kvmppc_mmu_pte_flush(vcpu, 0, 0); 443 + preempt_disable(); 444 + __destroy_context(to_book3s(vcpu)->context_id); 445 + preempt_enable(); 446 + } 447 + 448 + /* From mm/mmu_context_hash32.c */ 449 + #define CTX_TO_VSID(ctx) (((ctx) * (897 * 16)) & 0xffffff) 450 + 451 + int kvmppc_mmu_init(struct kvm_vcpu *vcpu) 452 + { 453 + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 454 + int err; 455 + ulong sdr1; 456 + 457 + err = __init_new_context(); 458 + if (err < 0) 459 + return -1; 460 + vcpu3s->context_id = err; 461 + 462 + vcpu3s->vsid_max = CTX_TO_VSID(vcpu3s->context_id + 1) - 1; 463 + vcpu3s->vsid_first = CTX_TO_VSID(vcpu3s->context_id); 464 + 465 + #if 0 /* XXX still doesn't guarantee uniqueness */ 466 + /* We could collide with the Linux vsid space because the vsid 467 + * wraps around at 24 bits. We're safe if we do our own space 468 + * though, so let's always set the highest bit. */ 469 + 470 + vcpu3s->vsid_max |= 0x00800000; 471 + vcpu3s->vsid_first |= 0x00800000; 472 + #endif 473 + BUG_ON(vcpu3s->vsid_max < vcpu3s->vsid_first); 474 + 475 + vcpu3s->vsid_next = vcpu3s->vsid_first; 476 + 477 + /* Remember where the HTAB is */ 478 + asm ( "mfsdr1 %0" : "=r"(sdr1) ); 479 + htabmask = ((sdr1 & 0x1FF) << 16) | 0xFFC0; 480 + htab = (ulong)__va(sdr1 & 0xffff0000); 481 + 482 + return 0; 483 + }

+143

arch/powerpc/kvm/book3s_32_sr.S

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program; if not, write to the Free Software 13 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 + * 15 + * Copyright SUSE Linux Products GmbH 2009 16 + * 17 + * Authors: Alexander Graf <agraf@suse.de> 18 + */ 19 + 20 + /****************************************************************************** 21 + * * 22 + * Entry code * 23 + * * 24 + *****************************************************************************/ 25 + 26 + .macro LOAD_GUEST_SEGMENTS 27 + 28 + /* Required state: 29 + * 30 + * MSR = ~IR|DR 31 + * R1 = host R1 32 + * R2 = host R2 33 + * R3 = shadow vcpu 34 + * all other volatile GPRS = free 35 + * SVCPU[CR] = guest CR 36 + * SVCPU[XER] = guest XER 37 + * SVCPU[CTR] = guest CTR 38 + * SVCPU[LR] = guest LR 39 + */ 40 + 41 + #define XCHG_SR(n) lwz r9, (SVCPU_SR+(n*4))(r3); \ 42 + mtsr n, r9 43 + 44 + XCHG_SR(0) 45 + XCHG_SR(1) 46 + XCHG_SR(2) 47 + XCHG_SR(3) 48 + XCHG_SR(4) 49 + XCHG_SR(5) 50 + XCHG_SR(6) 51 + XCHG_SR(7) 52 + XCHG_SR(8) 53 + XCHG_SR(9) 54 + XCHG_SR(10) 55 + XCHG_SR(11) 56 + XCHG_SR(12) 57 + XCHG_SR(13) 58 + XCHG_SR(14) 59 + XCHG_SR(15) 60 + 61 + /* Clear BATs. */ 62 + 63 + #define KVM_KILL_BAT(n, reg) \ 64 + mtspr SPRN_IBAT##n##U,reg; \ 65 + mtspr SPRN_IBAT##n##L,reg; \ 66 + mtspr SPRN_DBAT##n##U,reg; \ 67 + mtspr SPRN_DBAT##n##L,reg; \ 68 + 69 + li r9, 0 70 + KVM_KILL_BAT(0, r9) 71 + KVM_KILL_BAT(1, r9) 72 + KVM_KILL_BAT(2, r9) 73 + KVM_KILL_BAT(3, r9) 74 + 75 + .endm 76 + 77 + /****************************************************************************** 78 + * * 79 + * Exit code * 80 + * * 81 + *****************************************************************************/ 82 + 83 + .macro LOAD_HOST_SEGMENTS 84 + 85 + /* Register usage at this point: 86 + * 87 + * R1 = host R1 88 + * R2 = host R2 89 + * R12 = exit handler id 90 + * R13 = shadow vcpu - SHADOW_VCPU_OFF 91 + * SVCPU.* = guest * 92 + * SVCPU[CR] = guest CR 93 + * SVCPU[XER] = guest XER 94 + * SVCPU[CTR] = guest CTR 95 + * SVCPU[LR] = guest LR 96 + * 97 + */ 98 + 99 + /* Restore BATs */ 100 + 101 + /* We only overwrite the upper part, so we only restoree 102 + the upper part. */ 103 + #define KVM_LOAD_BAT(n, reg, RA, RB) \ 104 + lwz RA,(n*16)+0(reg); \ 105 + lwz RB,(n*16)+4(reg); \ 106 + mtspr SPRN_IBAT##n##U,RA; \ 107 + mtspr SPRN_IBAT##n##L,RB; \ 108 + lwz RA,(n*16)+8(reg); \ 109 + lwz RB,(n*16)+12(reg); \ 110 + mtspr SPRN_DBAT##n##U,RA; \ 111 + mtspr SPRN_DBAT##n##L,RB; \ 112 + 113 + lis r9, BATS@ha 114 + addi r9, r9, BATS@l 115 + tophys(r9, r9) 116 + KVM_LOAD_BAT(0, r9, r10, r11) 117 + KVM_LOAD_BAT(1, r9, r10, r11) 118 + KVM_LOAD_BAT(2, r9, r10, r11) 119 + KVM_LOAD_BAT(3, r9, r10, r11) 120 + 121 + /* Restore Segment Registers */ 122 + 123 + /* 0xc - 0xf */ 124 + 125 + li r0, 4 126 + mtctr r0 127 + LOAD_REG_IMMEDIATE(r3, 0x20000000 | (0x111 * 0xc)) 128 + lis r4, 0xc000 129 + 3: mtsrin r3, r4 130 + addi r3, r3, 0x111 /* increment VSID */ 131 + addis r4, r4, 0x1000 /* address of next segment */ 132 + bdnz 3b 133 + 134 + /* 0x0 - 0xb */ 135 + 136 + /* 'current->mm' needs to be in r4 */ 137 + tophys(r4, r2) 138 + lwz r4, MM(r4) 139 + tophys(r4, r4) 140 + /* This only clobbers r0, r3, r4 and r5 */ 141 + bl switch_mmu_context 142 + 143 + .endm

+235 -10

arch/powerpc/kvm/book3s_64_emulate.c arch/powerpc/kvm/book3s_emulate.c

··· 28 28 #define OP_31_XOP_MFMSR 83 29 29 #define OP_31_XOP_MTMSR 146 30 30 #define OP_31_XOP_MTMSRD 178 31 + #define OP_31_XOP_MTSR 210 31 32 #define OP_31_XOP_MTSRIN 242 32 33 #define OP_31_XOP_TLBIEL 274 33 34 #define OP_31_XOP_TLBIE 306 34 35 #define OP_31_XOP_SLBMTE 402 35 36 #define OP_31_XOP_SLBIE 434 36 37 #define OP_31_XOP_SLBIA 498 38 + #define OP_31_XOP_MFSR 595 37 39 #define OP_31_XOP_MFSRIN 659 40 + #define OP_31_XOP_DCBA 758 38 41 #define OP_31_XOP_SLBMFEV 851 39 42 #define OP_31_XOP_EIOIO 854 40 43 #define OP_31_XOP_SLBMFEE 915 41 44 42 45 /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */ 43 46 #define OP_31_XOP_DCBZ 1010 47 + 48 + #define OP_LFS 48 49 + #define OP_LFD 50 50 + #define OP_STFS 52 51 + #define OP_STFD 54 52 + 53 + #define SPRN_GQR0 912 54 + #define SPRN_GQR1 913 55 + #define SPRN_GQR2 914 56 + #define SPRN_GQR3 915 57 + #define SPRN_GQR4 916 58 + #define SPRN_GQR5 917 59 + #define SPRN_GQR6 918 60 + #define SPRN_GQR7 919 61 + 62 + /* Book3S_32 defines mfsrin(v) - but that messes up our abstract 63 + * function pointers, so let's just disable the define. */ 64 + #undef mfsrin 44 65 45 66 int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 46 67 unsigned int inst, int *advance) ··· 73 52 switch (get_xop(inst)) { 74 53 case OP_19_XOP_RFID: 75 54 case OP_19_XOP_RFI: 76 - vcpu->arch.pc = vcpu->arch.srr0; 55 + kvmppc_set_pc(vcpu, vcpu->arch.srr0); 77 56 kvmppc_set_msr(vcpu, vcpu->arch.srr1); 78 57 *advance = 0; 79 58 break; ··· 101 80 case OP_31_XOP_MTMSR: 102 81 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst))); 103 82 break; 83 + case OP_31_XOP_MFSR: 84 + { 85 + int srnum; 86 + 87 + srnum = kvmppc_get_field(inst, 12 + 32, 15 + 32); 88 + if (vcpu->arch.mmu.mfsrin) { 89 + u32 sr; 90 + sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); 91 + kvmppc_set_gpr(vcpu, get_rt(inst), sr); 92 + } 93 + break; 94 + } 104 95 case OP_31_XOP_MFSRIN: 105 96 { 106 97 int srnum; ··· 125 92 } 126 93 break; 127 94 } 95 + case OP_31_XOP_MTSR: 96 + vcpu->arch.mmu.mtsrin(vcpu, 97 + (inst >> 16) & 0xf, 98 + kvmppc_get_gpr(vcpu, get_rs(inst))); 99 + break; 128 100 case OP_31_XOP_MTSRIN: 129 101 vcpu->arch.mmu.mtsrin(vcpu, 130 102 (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf, ··· 188 150 kvmppc_set_gpr(vcpu, get_rt(inst), t); 189 151 } 190 152 break; 153 + case OP_31_XOP_DCBA: 154 + /* Gets treated as NOP */ 155 + break; 191 156 case OP_31_XOP_DCBZ: 192 157 { 193 158 ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst)); 194 159 ulong ra = 0; 195 - ulong addr; 160 + ulong addr, vaddr; 196 161 u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; 162 + u32 dsisr; 163 + int r; 197 164 198 165 if (get_ra(inst)) 199 166 ra = kvmppc_get_gpr(vcpu, get_ra(inst)); ··· 206 163 addr = (ra + rb) & ~31ULL; 207 164 if (!(vcpu->arch.msr & MSR_SF)) 208 165 addr &= 0xffffffff; 166 + vaddr = addr; 209 167 210 - if (kvmppc_st(vcpu, addr, 32, zeros)) { 211 - vcpu->arch.dear = addr; 212 - vcpu->arch.fault_dear = addr; 213 - to_book3s(vcpu)->dsisr = DSISR_PROTFAULT | 214 - DSISR_ISSTORE; 168 + r = kvmppc_st(vcpu, &addr, 32, zeros, true); 169 + if ((r == -ENOENT) || (r == -EPERM)) { 170 + *advance = 0; 171 + vcpu->arch.dear = vaddr; 172 + to_svcpu(vcpu)->fault_dar = vaddr; 173 + 174 + dsisr = DSISR_ISSTORE; 175 + if (r == -ENOENT) 176 + dsisr |= DSISR_NOHPTE; 177 + else if (r == -EPERM) 178 + dsisr |= DSISR_PROTFAULT; 179 + 180 + to_book3s(vcpu)->dsisr = dsisr; 181 + to_svcpu(vcpu)->fault_dsisr = dsisr; 182 + 215 183 kvmppc_book3s_queue_irqprio(vcpu, 216 184 BOOK3S_INTERRUPT_DATA_STORAGE); 217 - kvmppc_mmu_pte_flush(vcpu, addr, ~0xFFFULL); 218 185 } 219 186 220 187 break; ··· 236 183 default: 237 184 emulated = EMULATE_FAIL; 238 185 } 186 + 187 + if (emulated == EMULATE_FAIL) 188 + emulated = kvmppc_emulate_paired_single(run, vcpu); 239 189 240 190 return emulated; 241 191 } ··· 263 207 } 264 208 } 265 209 210 + static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn) 211 + { 212 + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 213 + struct kvmppc_bat *bat; 214 + 215 + switch (sprn) { 216 + case SPRN_IBAT0U ... SPRN_IBAT3L: 217 + bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; 218 + break; 219 + case SPRN_IBAT4U ... SPRN_IBAT7L: 220 + bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)]; 221 + break; 222 + case SPRN_DBAT0U ... SPRN_DBAT3L: 223 + bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; 224 + break; 225 + case SPRN_DBAT4U ... SPRN_DBAT7L: 226 + bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)]; 227 + break; 228 + default: 229 + BUG(); 230 + } 231 + 232 + if (sprn % 2) 233 + return bat->raw >> 32; 234 + else 235 + return bat->raw; 236 + } 237 + 266 238 static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) 267 239 { 268 240 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); ··· 301 217 bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; 302 218 break; 303 219 case SPRN_IBAT4U ... SPRN_IBAT7L: 304 - bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT4U) / 2]; 220 + bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)]; 305 221 break; 306 222 case SPRN_DBAT0U ... SPRN_DBAT3L: 307 223 bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; 308 224 break; 309 225 case SPRN_DBAT4U ... SPRN_DBAT7L: 310 - bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT4U) / 2]; 226 + bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)]; 311 227 break; 312 228 default: 313 229 BUG(); ··· 342 258 /* BAT writes happen so rarely that we're ok to flush 343 259 * everything here */ 344 260 kvmppc_mmu_pte_flush(vcpu, 0, 0); 261 + kvmppc_mmu_flush_segments(vcpu); 345 262 break; 346 263 case SPRN_HID0: 347 264 to_book3s(vcpu)->hid[0] = spr_val; ··· 353 268 case SPRN_HID2: 354 269 to_book3s(vcpu)->hid[2] = spr_val; 355 270 break; 271 + case SPRN_HID2_GEKKO: 272 + to_book3s(vcpu)->hid[2] = spr_val; 273 + /* HID2.PSE controls paired single on gekko */ 274 + switch (vcpu->arch.pvr) { 275 + case 0x00080200: /* lonestar 2.0 */ 276 + case 0x00088202: /* lonestar 2.2 */ 277 + case 0x70000100: /* gekko 1.0 */ 278 + case 0x00080100: /* gekko 2.0 */ 279 + case 0x00083203: /* gekko 2.3a */ 280 + case 0x00083213: /* gekko 2.3b */ 281 + case 0x00083204: /* gekko 2.4 */ 282 + case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ 283 + case 0x00087200: /* broadway */ 284 + if (vcpu->arch.hflags & BOOK3S_HFLAG_NATIVE_PS) { 285 + /* Native paired singles */ 286 + } else if (spr_val & (1 << 29)) { /* HID2.PSE */ 287 + vcpu->arch.hflags |= BOOK3S_HFLAG_PAIRED_SINGLE; 288 + kvmppc_giveup_ext(vcpu, MSR_FP); 289 + } else { 290 + vcpu->arch.hflags &= ~BOOK3S_HFLAG_PAIRED_SINGLE; 291 + } 292 + break; 293 + } 294 + break; 356 295 case SPRN_HID4: 296 + case SPRN_HID4_GEKKO: 357 297 to_book3s(vcpu)->hid[4] = spr_val; 358 298 break; 359 299 case SPRN_HID5: ··· 388 278 (mfmsr() & MSR_HV)) 389 279 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 390 280 break; 281 + case SPRN_GQR0: 282 + case SPRN_GQR1: 283 + case SPRN_GQR2: 284 + case SPRN_GQR3: 285 + case SPRN_GQR4: 286 + case SPRN_GQR5: 287 + case SPRN_GQR6: 288 + case SPRN_GQR7: 289 + to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val; 290 + break; 391 291 case SPRN_ICTC: 392 292 case SPRN_THRM1: 393 293 case SPRN_THRM2: 394 294 case SPRN_THRM3: 395 295 case SPRN_CTRLF: 396 296 case SPRN_CTRLT: 297 + case SPRN_L2CR: 298 + case SPRN_MMCR0_GEKKO: 299 + case SPRN_MMCR1_GEKKO: 300 + case SPRN_PMC1_GEKKO: 301 + case SPRN_PMC2_GEKKO: 302 + case SPRN_PMC3_GEKKO: 303 + case SPRN_PMC4_GEKKO: 304 + case SPRN_WPAR_GEKKO: 397 305 break; 398 306 default: 399 307 printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); ··· 429 301 int emulated = EMULATE_DONE; 430 302 431 303 switch (sprn) { 304 + case SPRN_IBAT0U ... SPRN_IBAT3L: 305 + case SPRN_IBAT4U ... SPRN_IBAT7L: 306 + case SPRN_DBAT0U ... SPRN_DBAT3L: 307 + case SPRN_DBAT4U ... SPRN_DBAT7L: 308 + kvmppc_set_gpr(vcpu, rt, kvmppc_read_bat(vcpu, sprn)); 309 + break; 432 310 case SPRN_SDR1: 433 311 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); 434 312 break; ··· 454 320 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]); 455 321 break; 456 322 case SPRN_HID2: 323 + case SPRN_HID2_GEKKO: 457 324 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]); 458 325 break; 459 326 case SPRN_HID4: 327 + case SPRN_HID4_GEKKO: 460 328 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]); 461 329 break; 462 330 case SPRN_HID5: 463 331 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); 332 + break; 333 + case SPRN_GQR0: 334 + case SPRN_GQR1: 335 + case SPRN_GQR2: 336 + case SPRN_GQR3: 337 + case SPRN_GQR4: 338 + case SPRN_GQR5: 339 + case SPRN_GQR6: 340 + case SPRN_GQR7: 341 + kvmppc_set_gpr(vcpu, rt, 342 + to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]); 464 343 break; 465 344 case SPRN_THRM1: 466 345 case SPRN_THRM2: 467 346 case SPRN_THRM3: 468 347 case SPRN_CTRLF: 469 348 case SPRN_CTRLT: 349 + case SPRN_L2CR: 350 + case SPRN_MMCR0_GEKKO: 351 + case SPRN_MMCR1_GEKKO: 352 + case SPRN_PMC1_GEKKO: 353 + case SPRN_PMC2_GEKKO: 354 + case SPRN_PMC3_GEKKO: 355 + case SPRN_PMC4_GEKKO: 356 + case SPRN_WPAR_GEKKO: 470 357 kvmppc_set_gpr(vcpu, rt, 0); 471 358 break; 472 359 default: ··· 501 346 return emulated; 502 347 } 503 348 349 + u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst) 350 + { 351 + u32 dsisr = 0; 352 + 353 + /* 354 + * This is what the spec says about DSISR bits (not mentioned = 0): 355 + * 356 + * 12:13 [DS] Set to bits 30:31 357 + * 15:16 [X] Set to bits 29:30 358 + * 17 [X] Set to bit 25 359 + * [D/DS] Set to bit 5 360 + * 18:21 [X] Set to bits 21:24 361 + * [D/DS] Set to bits 1:4 362 + * 22:26 Set to bits 6:10 (RT/RS/FRT/FRS) 363 + * 27:31 Set to bits 11:15 (RA) 364 + */ 365 + 366 + switch (get_op(inst)) { 367 + /* D-form */ 368 + case OP_LFS: 369 + case OP_LFD: 370 + case OP_STFD: 371 + case OP_STFS: 372 + dsisr |= (inst >> 12) & 0x4000; /* bit 17 */ 373 + dsisr |= (inst >> 17) & 0x3c00; /* bits 18:21 */ 374 + break; 375 + /* X-form */ 376 + case 31: 377 + dsisr |= (inst << 14) & 0x18000; /* bits 15:16 */ 378 + dsisr |= (inst << 8) & 0x04000; /* bit 17 */ 379 + dsisr |= (inst << 3) & 0x03c00; /* bits 18:21 */ 380 + break; 381 + default: 382 + printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); 383 + break; 384 + } 385 + 386 + dsisr |= (inst >> 16) & 0x03ff; /* bits 22:31 */ 387 + 388 + return dsisr; 389 + } 390 + 391 + ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst) 392 + { 393 + ulong dar = 0; 394 + ulong ra; 395 + 396 + switch (get_op(inst)) { 397 + case OP_LFS: 398 + case OP_LFD: 399 + case OP_STFD: 400 + case OP_STFS: 401 + ra = get_ra(inst); 402 + if (ra) 403 + dar = kvmppc_get_gpr(vcpu, ra); 404 + dar += (s32)((s16)inst); 405 + break; 406 + case 31: 407 + ra = get_ra(inst); 408 + if (ra) 409 + dar = kvmppc_get_gpr(vcpu, ra); 410 + dar += kvmppc_get_gpr(vcpu, get_rb(inst)); 411 + break; 412 + default: 413 + printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); 414 + break; 415 + } 416 + 417 + return dar; 418 + }

arch/powerpc/kvm/book3s_64_exports.c arch/powerpc/kvm/book3s_exports.c

+102 -100

arch/powerpc/kvm/book3s_64_interrupts.S arch/powerpc/kvm/book3s_interrupts.S

··· 24 24 #include <asm/asm-offsets.h> 25 25 #include <asm/exception-64s.h> 26 26 27 - #define KVMPPC_HANDLE_EXIT .kvmppc_handle_exit 28 - #define ULONG_SIZE 8 29 - #define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE)) 27 + #if defined(CONFIG_PPC_BOOK3S_64) 30 28 31 - .macro DISABLE_INTERRUPTS 32 - mfmsr r0 33 - rldicl r0,r0,48,1 34 - rotldi r0,r0,16 35 - mtmsrd r0,1 36 - .endm 29 + #define ULONG_SIZE 8 30 + #define FUNC(name) GLUE(.,name) 37 31 32 + #define GET_SHADOW_VCPU(reg) \ 33 + addi reg, r13, PACA_KVM_SVCPU 34 + 35 + #define DISABLE_INTERRUPTS \ 36 + mfmsr r0; \ 37 + rldicl r0,r0,48,1; \ 38 + rotldi r0,r0,16; \ 39 + mtmsrd r0,1; \ 40 + 41 + #elif defined(CONFIG_PPC_BOOK3S_32) 42 + 43 + #define ULONG_SIZE 4 44 + #define FUNC(name) name 45 + 46 + #define GET_SHADOW_VCPU(reg) \ 47 + lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2) 48 + 49 + #define DISABLE_INTERRUPTS \ 50 + mfmsr r0; \ 51 + rlwinm r0,r0,0,17,15; \ 52 + mtmsr r0; \ 53 + 54 + #endif /* CONFIG_PPC_BOOK3S_XX */ 55 + 56 + 57 + #define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE)) 38 58 #define VCPU_LOAD_NVGPRS(vcpu) \ 39 - ld r14, VCPU_GPR(r14)(vcpu); \ 40 - ld r15, VCPU_GPR(r15)(vcpu); \ 41 - ld r16, VCPU_GPR(r16)(vcpu); \ 42 - ld r17, VCPU_GPR(r17)(vcpu); \ 43 - ld r18, VCPU_GPR(r18)(vcpu); \ 44 - ld r19, VCPU_GPR(r19)(vcpu); \ 45 - ld r20, VCPU_GPR(r20)(vcpu); \ 46 - ld r21, VCPU_GPR(r21)(vcpu); \ 47 - ld r22, VCPU_GPR(r22)(vcpu); \ 48 - ld r23, VCPU_GPR(r23)(vcpu); \ 49 - ld r24, VCPU_GPR(r24)(vcpu); \ 50 - ld r25, VCPU_GPR(r25)(vcpu); \ 51 - ld r26, VCPU_GPR(r26)(vcpu); \ 52 - ld r27, VCPU_GPR(r27)(vcpu); \ 53 - ld r28, VCPU_GPR(r28)(vcpu); \ 54 - ld r29, VCPU_GPR(r29)(vcpu); \ 55 - ld r30, VCPU_GPR(r30)(vcpu); \ 56 - ld r31, VCPU_GPR(r31)(vcpu); \ 59 + PPC_LL r14, VCPU_GPR(r14)(vcpu); \ 60 + PPC_LL r15, VCPU_GPR(r15)(vcpu); \ 61 + PPC_LL r16, VCPU_GPR(r16)(vcpu); \ 62 + PPC_LL r17, VCPU_GPR(r17)(vcpu); \ 63 + PPC_LL r18, VCPU_GPR(r18)(vcpu); \ 64 + PPC_LL r19, VCPU_GPR(r19)(vcpu); \ 65 + PPC_LL r20, VCPU_GPR(r20)(vcpu); \ 66 + PPC_LL r21, VCPU_GPR(r21)(vcpu); \ 67 + PPC_LL r22, VCPU_GPR(r22)(vcpu); \ 68 + PPC_LL r23, VCPU_GPR(r23)(vcpu); \ 69 + PPC_LL r24, VCPU_GPR(r24)(vcpu); \ 70 + PPC_LL r25, VCPU_GPR(r25)(vcpu); \ 71 + PPC_LL r26, VCPU_GPR(r26)(vcpu); \ 72 + PPC_LL r27, VCPU_GPR(r27)(vcpu); \ 73 + PPC_LL r28, VCPU_GPR(r28)(vcpu); \ 74 + PPC_LL r29, VCPU_GPR(r29)(vcpu); \ 75 + PPC_LL r30, VCPU_GPR(r30)(vcpu); \ 76 + PPC_LL r31, VCPU_GPR(r31)(vcpu); \ 57 77 58 78 /***************************************************************************** 59 79 * * ··· 89 69 90 70 kvm_start_entry: 91 71 /* Write correct stack frame */ 92 - mflr r0 93 - std r0,16(r1) 72 + mflr r0 73 + PPC_STL r0,PPC_LR_STKOFF(r1) 94 74 95 75 /* Save host state to the stack */ 96 - stdu r1, -SWITCH_FRAME_SIZE(r1) 76 + PPC_STLU r1, -SWITCH_FRAME_SIZE(r1) 97 77 98 78 /* Save r3 (kvm_run) and r4 (vcpu) */ 99 79 SAVE_2GPRS(3, r1) ··· 102 82 SAVE_NVGPRS(r1) 103 83 104 84 /* Save LR */ 105 - std r0, _LINK(r1) 85 + PPC_STL r0, _LINK(r1) 106 86 107 87 /* Load non-volatile guest state from the vcpu */ 108 88 VCPU_LOAD_NVGPRS(r4) 109 89 90 + GET_SHADOW_VCPU(r5) 91 + 110 92 /* Save R1/R2 in the PACA */ 111 - std r1, PACA_KVM_HOST_R1(r13) 112 - std r2, PACA_KVM_HOST_R2(r13) 93 + PPC_STL r1, SVCPU_HOST_R1(r5) 94 + PPC_STL r2, SVCPU_HOST_R2(r5) 113 95 114 96 /* XXX swap in/out on load? */ 115 - ld r3, VCPU_HIGHMEM_HANDLER(r4) 116 - std r3, PACA_KVM_VMHANDLER(r13) 97 + PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4) 98 + PPC_STL r3, SVCPU_VMHANDLER(r5) 117 99 118 100 kvm_start_lightweight: 119 101 120 - ld r9, VCPU_PC(r4) /* r9 = vcpu->arch.pc */ 121 - ld r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ 122 - 123 - /* Load some guest state in the respective registers */ 124 - ld r5, VCPU_CTR(r4) /* r5 = vcpu->arch.ctr */ 125 - /* will be swapped in by rmcall */ 126 - 127 - ld r3, VCPU_LR(r4) /* r3 = vcpu->arch.lr */ 128 - mtlr r3 /* LR = r3 */ 102 + PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ 129 103 130 104 DISABLE_INTERRUPTS 131 105 106 + #ifdef CONFIG_PPC_BOOK3S_64 132 107 /* Some guests may need to have dcbz set to 32 byte length. 133 108 * 134 109 * Usually we ensure that by patching the guest's instructions ··· 133 118 * because that's a lot faster. 134 119 */ 135 120 136 - ld r3, VCPU_HFLAGS(r4) 121 + PPC_LL r3, VCPU_HFLAGS(r4) 137 122 rldicl. r3, r3, 0, 63 /* CR = ((r3 & 1) == 0) */ 138 123 beq no_dcbz32_on 139 124 ··· 143 128 144 129 no_dcbz32_on: 145 130 146 - ld r6, VCPU_RMCALL(r4) 131 + #endif /* CONFIG_PPC_BOOK3S_64 */ 132 + 133 + PPC_LL r6, VCPU_RMCALL(r4) 147 134 mtctr r6 148 135 149 - ld r3, VCPU_TRAMPOLINE_ENTER(r4) 136 + PPC_LL r3, VCPU_TRAMPOLINE_ENTER(r4) 150 137 LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR)) 151 138 152 - /* Jump to SLB patching handlder and into our guest */ 139 + /* Jump to segment patching handler and into our guest */ 153 140 bctr 154 141 155 142 /* ··· 166 149 /* 167 150 * Register usage at this point: 168 151 * 169 - * R0 = guest last inst 170 - * R1 = host R1 171 - * R2 = host R2 172 - * R3 = guest PC 173 - * R4 = guest MSR 174 - * R5 = guest DAR 175 - * R6 = guest DSISR 176 - * R13 = PACA 177 - * PACA.KVM.* = guest * 152 + * R1 = host R1 153 + * R2 = host R2 154 + * R12 = exit handler id 155 + * R13 = PACA 156 + * SVCPU.* = guest * 178 157 * 179 158 */ 180 159 181 160 /* R7 = vcpu */ 182 - ld r7, GPR4(r1) 161 + PPC_LL r7, GPR4(r1) 183 162 184 - /* Now save the guest state */ 163 + #ifdef CONFIG_PPC_BOOK3S_64 185 164 186 - stw r0, VCPU_LAST_INST(r7) 187 - 188 - std r3, VCPU_PC(r7) 189 - std r4, VCPU_SHADOW_SRR1(r7) 190 - std r5, VCPU_FAULT_DEAR(r7) 191 - std r6, VCPU_FAULT_DSISR(r7) 192 - 193 - ld r5, VCPU_HFLAGS(r7) 165 + PPC_LL r5, VCPU_HFLAGS(r7) 194 166 rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ 195 167 beq no_dcbz32_off 196 168 ··· 190 184 191 185 no_dcbz32_off: 192 186 193 - std r14, VCPU_GPR(r14)(r7) 194 - std r15, VCPU_GPR(r15)(r7) 195 - std r16, VCPU_GPR(r16)(r7) 196 - std r17, VCPU_GPR(r17)(r7) 197 - std r18, VCPU_GPR(r18)(r7) 198 - std r19, VCPU_GPR(r19)(r7) 199 - std r20, VCPU_GPR(r20)(r7) 200 - std r21, VCPU_GPR(r21)(r7) 201 - std r22, VCPU_GPR(r22)(r7) 202 - std r23, VCPU_GPR(r23)(r7) 203 - std r24, VCPU_GPR(r24)(r7) 204 - std r25, VCPU_GPR(r25)(r7) 205 - std r26, VCPU_GPR(r26)(r7) 206 - std r27, VCPU_GPR(r27)(r7) 207 - std r28, VCPU_GPR(r28)(r7) 208 - std r29, VCPU_GPR(r29)(r7) 209 - std r30, VCPU_GPR(r30)(r7) 210 - std r31, VCPU_GPR(r31)(r7) 187 + #endif /* CONFIG_PPC_BOOK3S_64 */ 211 188 212 - /* Save guest CTR */ 213 - mfctr r5 214 - std r5, VCPU_CTR(r7) 215 - 216 - /* Save guest LR */ 217 - mflr r5 218 - std r5, VCPU_LR(r7) 189 + PPC_STL r14, VCPU_GPR(r14)(r7) 190 + PPC_STL r15, VCPU_GPR(r15)(r7) 191 + PPC_STL r16, VCPU_GPR(r16)(r7) 192 + PPC_STL r17, VCPU_GPR(r17)(r7) 193 + PPC_STL r18, VCPU_GPR(r18)(r7) 194 + PPC_STL r19, VCPU_GPR(r19)(r7) 195 + PPC_STL r20, VCPU_GPR(r20)(r7) 196 + PPC_STL r21, VCPU_GPR(r21)(r7) 197 + PPC_STL r22, VCPU_GPR(r22)(r7) 198 + PPC_STL r23, VCPU_GPR(r23)(r7) 199 + PPC_STL r24, VCPU_GPR(r24)(r7) 200 + PPC_STL r25, VCPU_GPR(r25)(r7) 201 + PPC_STL r26, VCPU_GPR(r26)(r7) 202 + PPC_STL r27, VCPU_GPR(r27)(r7) 203 + PPC_STL r28, VCPU_GPR(r28)(r7) 204 + PPC_STL r29, VCPU_GPR(r29)(r7) 205 + PPC_STL r30, VCPU_GPR(r30)(r7) 206 + PPC_STL r31, VCPU_GPR(r31)(r7) 219 207 220 208 /* Restore host msr -> SRR1 */ 221 - ld r6, VCPU_HOST_MSR(r7) 209 + PPC_LL r6, VCPU_HOST_MSR(r7) 222 210 223 211 /* 224 212 * For some interrupts, we need to call the real Linux ··· 228 228 beq call_linux_handler 229 229 cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER 230 230 beq call_linux_handler 231 + cmpwi r12, BOOK3S_INTERRUPT_PERFMON 232 + beq call_linux_handler 231 233 232 234 /* Back to EE=1 */ 233 235 mtmsr r6 236 + sync 234 237 b kvm_return_point 235 238 236 239 call_linux_handler: ··· 252 249 */ 253 250 254 251 /* Restore host IP -> SRR0 */ 255 - ld r5, VCPU_HOST_RETIP(r7) 252 + PPC_LL r5, VCPU_HOST_RETIP(r7) 256 253 257 254 /* XXX Better move to a safe function? 258 255 * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */ 259 256 260 257 mtlr r12 261 258 262 - ld r4, VCPU_TRAMPOLINE_LOWMEM(r7) 259 + PPC_LL r4, VCPU_TRAMPOLINE_LOWMEM(r7) 263 260 mtsrr0 r4 264 261 LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)) 265 262 mtsrr1 r3 ··· 277 274 278 275 /* Restore r3 (kvm_run) and r4 (vcpu) */ 279 276 REST_2GPRS(3, r1) 280 - bl KVMPPC_HANDLE_EXIT 277 + bl FUNC(kvmppc_handle_exit) 281 278 282 279 /* If RESUME_GUEST, get back in the loop */ 283 280 cmpwi r3, RESUME_GUEST ··· 288 285 289 286 kvm_exit_loop: 290 287 291 - ld r4, _LINK(r1) 288 + PPC_LL r4, _LINK(r1) 292 289 mtlr r4 293 290 294 291 /* Restore non-volatile host registers (r14 - r31) */ ··· 299 296 300 297 kvm_loop_heavyweight: 301 298 302 - ld r4, _LINK(r1) 303 - std r4, (16 + SWITCH_FRAME_SIZE)(r1) 299 + PPC_LL r4, _LINK(r1) 300 + PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1) 304 301 305 302 /* Load vcpu and cpu_run */ 306 303 REST_2GPRS(3, r1) ··· 318 315 319 316 /* Jump back into the beginning of this function */ 320 317 b kvm_start_lightweight 321 -

+27 -21

arch/powerpc/kvm/book3s_64_mmu.c

··· 232 232 } 233 233 234 234 dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " 235 - "-> 0x%llx\n", 235 + "-> 0x%lx\n", 236 236 eaddr, avpn, gpte->vpage, gpte->raddr); 237 237 found = true; 238 238 break; ··· 383 383 384 384 if (vcpu->arch.msr & MSR_IR) { 385 385 kvmppc_mmu_flush_segments(vcpu); 386 - kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc); 386 + kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 387 387 } 388 388 } 389 389 ··· 439 439 kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask); 440 440 } 441 441 442 - static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid, 442 + static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, 443 443 u64 *vsid) 444 444 { 445 - switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { 446 - case 0: 447 - *vsid = (VSID_REAL >> 16) | esid; 448 - break; 449 - case MSR_IR: 450 - *vsid = (VSID_REAL_IR >> 16) | esid; 451 - break; 452 - case MSR_DR: 453 - *vsid = (VSID_REAL_DR >> 16) | esid; 454 - break; 455 - case MSR_DR|MSR_IR: 456 - { 457 - ulong ea; 458 - struct kvmppc_slb *slb; 459 - ea = esid << SID_SHIFT; 445 + ulong ea = esid << SID_SHIFT; 446 + struct kvmppc_slb *slb; 447 + u64 gvsid = esid; 448 + 449 + if (vcpu->arch.msr & (MSR_DR|MSR_IR)) { 460 450 slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea); 461 451 if (slb) 462 - *vsid = slb->vsid; 463 - else 452 + gvsid = slb->vsid; 453 + } 454 + 455 + switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { 456 + case 0: 457 + *vsid = VSID_REAL | esid; 458 + break; 459 + case MSR_IR: 460 + *vsid = VSID_REAL_IR | gvsid; 461 + break; 462 + case MSR_DR: 463 + *vsid = VSID_REAL_DR | gvsid; 464 + break; 465 + case MSR_DR|MSR_IR: 466 + if (!slb) 464 467 return -ENOENT; 465 468 469 + *vsid = gvsid; 466 470 break; 467 - } 468 471 default: 469 472 BUG(); 470 473 break; 471 474 } 475 + 476 + if (vcpu->arch.msr & MSR_PR) 477 + *vsid |= VSID_PR; 472 478 473 479 return 0; 474 480 }

+67 -35

arch/powerpc/kvm/book3s_64_mmu_host.c

··· 48 48 49 49 static void invalidate_pte(struct hpte_cache *pte) 50 50 { 51 - dprintk_mmu("KVM: Flushing SPT %d: 0x%llx (0x%llx) -> 0x%llx\n", 52 - i, pte->pte.eaddr, pte->pte.vpage, pte->host_va); 51 + dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n", 52 + pte->pte.eaddr, pte->pte.vpage, pte->host_va); 53 53 54 54 ppc_md.hpte_invalidate(pte->slot, pte->host_va, 55 55 MMU_PAGE_4K, MMU_SEGSIZE_256M, 56 56 false); 57 57 pte->host_va = 0; 58 - kvm_release_pfn_dirty(pte->pfn); 58 + 59 + if (pte->pte.may_write) 60 + kvm_release_pfn_dirty(pte->pfn); 61 + else 62 + kvm_release_pfn_clean(pte->pfn); 59 63 } 60 64 61 - void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 guest_ea, u64 ea_mask) 65 + void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) 62 66 { 63 67 int i; 64 68 65 - dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%llx & 0x%llx\n", 69 + dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n", 66 70 vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); 67 71 BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); 68 72 ··· 110 106 } 111 107 } 112 108 113 - void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, u64 pa_start, u64 pa_end) 109 + void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) 114 110 { 115 111 int i; 116 112 117 - dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%llx & 0x%llx\n", 118 - vcpu->arch.hpte_cache_offset, guest_pa, pa_mask); 113 + dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx & 0x%lx\n", 114 + vcpu->arch.hpte_cache_offset, pa_start, pa_end); 119 115 BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); 120 116 121 117 for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { ··· 186 182 sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); 187 183 map = &to_book3s(vcpu)->sid_map[sid_map_mask]; 188 184 if (map->guest_vsid == gvsid) { 189 - dprintk_slb("SLB: Searching 0x%llx -> 0x%llx\n", 185 + dprintk_slb("SLB: Searching: 0x%llx -> 0x%llx\n", 190 186 gvsid, map->host_vsid); 191 187 return map; 192 188 } ··· 198 194 return map; 199 195 } 200 196 201 - dprintk_slb("SLB: Searching 0x%llx -> not found\n", gvsid); 197 + dprintk_slb("SLB: Searching %d/%d: 0x%llx -> not found\n", 198 + sid_map_mask, SID_MAP_MASK - sid_map_mask, gvsid); 202 199 return NULL; 203 200 } 204 201 ··· 217 212 /* Get host physical address for gpa */ 218 213 hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); 219 214 if (kvm_is_error_hva(hpaddr)) { 220 - printk(KERN_INFO "Couldn't get guest page for gfn %llx!\n", orig_pte->eaddr); 215 + printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); 221 216 return -EINVAL; 222 217 } 223 218 hpaddr <<= PAGE_SHIFT; ··· 232 227 vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); 233 228 map = find_sid_vsid(vcpu, vsid); 234 229 if (!map) { 235 - kvmppc_mmu_map_segment(vcpu, orig_pte->eaddr); 230 + ret = kvmppc_mmu_map_segment(vcpu, orig_pte->eaddr); 231 + WARN_ON(ret < 0); 236 232 map = find_sid_vsid(vcpu, vsid); 237 233 } 238 - BUG_ON(!map); 234 + if (!map) { 235 + printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n", 236 + vsid, orig_pte->eaddr); 237 + WARN_ON(true); 238 + return -EINVAL; 239 + } 239 240 240 241 vsid = map->host_vsid; 241 242 va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M); ··· 268 257 269 258 if (ret < 0) { 270 259 /* If we couldn't map a primary PTE, try a secondary */ 271 - #ifdef USE_SECONDARY 272 260 hash = ~hash; 261 + vflags ^= HPTE_V_SECONDARY; 273 262 attempt++; 274 - if (attempt % 2) 275 - vflags = HPTE_V_SECONDARY; 276 - else 277 - vflags = 0; 278 - #else 279 - attempt = 2; 280 - #endif 281 263 goto map_again; 282 264 } else { 283 265 int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); 284 266 struct hpte_cache *pte = &vcpu->arch.hpte_cache[hpte_id]; 285 267 286 - dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%lx (0x%llx) -> %lx\n", 268 + dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n", 287 269 ((rflags & HPTE_R_PP) == 3) ? '-' : 'w', 288 270 (rflags & HPTE_R_N) ? '-' : 'x', 289 271 orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr); 272 + 273 + /* The ppc_md code may give us a secondary entry even though we 274 + asked for a primary. Fix up. */ 275 + if ((ret & _PTEIDX_SECONDARY) && !(vflags & HPTE_V_SECONDARY)) { 276 + hash = ~hash; 277 + hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 278 + } 290 279 291 280 pte->slot = hpteg + (ret & 7); 292 281 pte->host_va = va; ··· 332 321 map->guest_vsid = gvsid; 333 322 map->valid = true; 334 323 324 + dprintk_slb("SLB: New mapping at %d: 0x%llx -> 0x%llx\n", 325 + sid_map_mask, gvsid, map->host_vsid); 326 + 335 327 return map; 336 328 } 337 329 ··· 345 331 int found_inval = -1; 346 332 int r; 347 333 348 - if (!get_paca()->kvm_slb_max) 349 - get_paca()->kvm_slb_max = 1; 334 + if (!to_svcpu(vcpu)->slb_max) 335 + to_svcpu(vcpu)->slb_max = 1; 350 336 351 337 /* Are we overwriting? */ 352 - for (i = 1; i < get_paca()->kvm_slb_max; i++) { 353 - if (!(get_paca()->kvm_slb[i].esid & SLB_ESID_V)) 338 + for (i = 1; i < to_svcpu(vcpu)->slb_max; i++) { 339 + if (!(to_svcpu(vcpu)->slb[i].esid & SLB_ESID_V)) 354 340 found_inval = i; 355 - else if ((get_paca()->kvm_slb[i].esid & ESID_MASK) == esid) 341 + else if ((to_svcpu(vcpu)->slb[i].esid & ESID_MASK) == esid) 356 342 return i; 357 343 } 358 344 ··· 366 352 max_slb_size = mmu_slb_size; 367 353 368 354 /* Overflowing -> purge */ 369 - if ((get_paca()->kvm_slb_max) == max_slb_size) 355 + if ((to_svcpu(vcpu)->slb_max) == max_slb_size) 370 356 kvmppc_mmu_flush_segments(vcpu); 371 357 372 - r = get_paca()->kvm_slb_max; 373 - get_paca()->kvm_slb_max++; 358 + r = to_svcpu(vcpu)->slb_max; 359 + to_svcpu(vcpu)->slb_max++; 374 360 375 361 return r; 376 362 } ··· 388 374 389 375 if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { 390 376 /* Invalidate an entry */ 391 - get_paca()->kvm_slb[slb_index].esid = 0; 377 + to_svcpu(vcpu)->slb[slb_index].esid = 0; 392 378 return -ENOENT; 393 379 } 394 380 ··· 402 388 slb_vsid &= ~SLB_VSID_KP; 403 389 slb_esid |= slb_index; 404 390 405 - get_paca()->kvm_slb[slb_index].esid = slb_esid; 406 - get_paca()->kvm_slb[slb_index].vsid = slb_vsid; 391 + to_svcpu(vcpu)->slb[slb_index].esid = slb_esid; 392 + to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid; 407 393 408 394 dprintk_slb("slbmte %#llx, %#llx\n", slb_vsid, slb_esid); 409 395 ··· 412 398 413 399 void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) 414 400 { 415 - get_paca()->kvm_slb_max = 1; 416 - get_paca()->kvm_slb[0].esid = 0; 401 + to_svcpu(vcpu)->slb_max = 1; 402 + to_svcpu(vcpu)->slb[0].esid = 0; 417 403 } 418 404 419 405 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 420 406 { 421 407 kvmppc_mmu_pte_flush(vcpu, 0, 0); 408 + __destroy_context(to_book3s(vcpu)->context_id); 409 + } 410 + 411 + int kvmppc_mmu_init(struct kvm_vcpu *vcpu) 412 + { 413 + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 414 + int err; 415 + 416 + err = __init_new_context(); 417 + if (err < 0) 418 + return -1; 419 + vcpu3s->context_id = err; 420 + 421 + vcpu3s->vsid_max = ((vcpu3s->context_id + 1) << USER_ESID_BITS) - 1; 422 + vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS; 423 + vcpu3s->vsid_next = vcpu3s->vsid_first; 424 + 425 + return 0; 422 426 }

+91 -44

arch/powerpc/kvm/book3s_64_rmhandlers.S arch/powerpc/kvm/book3s_rmhandlers.S

··· 22 22 #include <asm/reg.h> 23 23 #include <asm/page.h> 24 24 #include <asm/asm-offsets.h> 25 + 26 + #ifdef CONFIG_PPC_BOOK3S_64 25 27 #include <asm/exception-64s.h> 28 + #endif 26 29 27 30 /***************************************************************************** 28 31 * * ··· 33 30 * * 34 31 ****************************************************************************/ 35 32 33 + #if defined(CONFIG_PPC_BOOK3S_64) 34 + 35 + #define LOAD_SHADOW_VCPU(reg) \ 36 + mfspr reg, SPRN_SPRG_PACA 37 + 38 + #define SHADOW_VCPU_OFF PACA_KVM_SVCPU 39 + #define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR) 40 + #define FUNC(name) GLUE(.,name) 41 + 42 + #elif defined(CONFIG_PPC_BOOK3S_32) 43 + 44 + #define LOAD_SHADOW_VCPU(reg) \ 45 + mfspr reg, SPRN_SPRG_THREAD; \ 46 + lwz reg, THREAD_KVM_SVCPU(reg); \ 47 + /* PPC32 can have a NULL pointer - let's check for that */ \ 48 + mtspr SPRN_SPRG_SCRATCH1, r12; /* Save r12 */ \ 49 + mfcr r12; \ 50 + cmpwi reg, 0; \ 51 + bne 1f; \ 52 + mfspr reg, SPRN_SPRG_SCRATCH0; \ 53 + mtcr r12; \ 54 + mfspr r12, SPRN_SPRG_SCRATCH1; \ 55 + b kvmppc_resume_\intno; \ 56 + 1:; \ 57 + mtcr r12; \ 58 + mfspr r12, SPRN_SPRG_SCRATCH1; \ 59 + tophys(reg, reg) 60 + 61 + #define SHADOW_VCPU_OFF 0 62 + #define MSR_NOIRQ MSR_KERNEL 63 + #define FUNC(name) name 64 + 65 + #endif 36 66 37 67 .macro INTERRUPT_TRAMPOLINE intno 38 68 ··· 78 42 * First thing to do is to find out if we're coming 79 43 * from a KVM guest or a Linux process. 80 44 * 81 - * To distinguish, we check a magic byte in the PACA 45 + * To distinguish, we check a magic byte in the PACA/current 82 46 */ 83 - mfspr r13, SPRN_SPRG_PACA /* r13 = PACA */ 84 - std r12, PACA_KVM_SCRATCH0(r13) 47 + LOAD_SHADOW_VCPU(r13) 48 + PPC_STL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) 85 49 mfcr r12 86 - stw r12, PACA_KVM_SCRATCH1(r13) 87 - lbz r12, PACA_KVM_IN_GUEST(r13) 50 + stw r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) 51 + lbz r12, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) 88 52 cmpwi r12, KVM_GUEST_MODE_NONE 89 53 bne ..kvmppc_handler_hasmagic_\intno 90 54 /* No KVM guest? Then jump back to the Linux handler! */ 91 - lwz r12, PACA_KVM_SCRATCH1(r13) 55 + lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) 92 56 mtcr r12 93 - ld r12, PACA_KVM_SCRATCH0(r13) 57 + PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) 94 58 mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ 95 59 b kvmppc_resume_\intno /* Get back original handler */ 96 60 ··· 112 76 INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_SYSTEM_RESET 113 77 INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK 114 78 INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE 115 - INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_SEGMENT 116 79 INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE 117 - INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_SEGMENT 118 80 INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL 119 81 INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT 120 82 INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM ··· 122 88 INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_TRACE 123 89 INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON 124 90 INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC 91 + 92 + /* Those are only available on 64 bit machines */ 93 + 94 + #ifdef CONFIG_PPC_BOOK3S_64 95 + INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_SEGMENT 96 + INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_SEGMENT 125 97 INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX 98 + #endif 126 99 127 100 /* 128 101 * Bring us back to the faulting code, but skip the ··· 140 99 * 141 100 * Input Registers: 142 101 * 143 - * R12 = free 144 - * R13 = PACA 145 - * PACA.KVM.SCRATCH0 = guest R12 146 - * PACA.KVM.SCRATCH1 = guest CR 147 - * SPRG_SCRATCH0 = guest R13 102 + * R12 = free 103 + * R13 = Shadow VCPU (PACA) 104 + * SVCPU.SCRATCH0 = guest R12 105 + * SVCPU.SCRATCH1 = guest CR 106 + * SPRG_SCRATCH0 = guest R13 148 107 * 149 108 */ 150 109 kvmppc_handler_skip_ins: ··· 155 114 mtsrr0 r12 156 115 157 116 /* Clean up all state */ 158 - lwz r12, PACA_KVM_SCRATCH1(r13) 117 + lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) 159 118 mtcr r12 160 - ld r12, PACA_KVM_SCRATCH0(r13) 119 + PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) 161 120 mfspr r13, SPRN_SPRG_SCRATCH0 162 121 163 122 /* And get back into the code */ ··· 188 147 * 189 148 * R3 = function 190 149 * R4 = MSR 191 - * R5 = CTR 150 + * R5 = scratch register 192 151 * 193 152 */ 194 153 _GLOBAL(kvmppc_rmcall) 195 - mtmsr r4 /* Disable relocation, so mtsrr 154 + LOAD_REG_IMMEDIATE(r5, MSR_NOIRQ) 155 + mtmsr r5 /* Disable relocation and interrupts, so mtsrr 196 156 doesn't get interrupted */ 197 - mtctr r5 157 + sync 198 158 mtsrr0 r3 199 159 mtsrr1 r4 200 160 RFI 201 161 162 + #if defined(CONFIG_PPC_BOOK3S_32) 163 + #define STACK_LR INT_FRAME_SIZE+4 164 + #elif defined(CONFIG_PPC_BOOK3S_64) 165 + #define STACK_LR _LINK 166 + #endif 167 + 202 168 /* 203 169 * Activate current's external feature (FPU/Altivec/VSX) 204 170 */ 205 - #define define_load_up(what) \ 206 - \ 207 - _GLOBAL(kvmppc_load_up_ ## what); \ 208 - subi r1, r1, INT_FRAME_SIZE; \ 209 - mflr r3; \ 210 - std r3, _LINK(r1); \ 211 - mfmsr r4; \ 212 - std r31, GPR3(r1); \ 213 - mr r31, r4; \ 214 - li r5, MSR_DR; \ 215 - oris r5, r5, MSR_EE@h; \ 216 - andc r4, r4, r5; \ 217 - mtmsr r4; \ 218 - \ 219 - bl .load_up_ ## what; \ 220 - \ 221 - mtmsr r31; \ 222 - ld r3, _LINK(r1); \ 223 - ld r31, GPR3(r1); \ 224 - addi r1, r1, INT_FRAME_SIZE; \ 225 - mtlr r3; \ 171 + #define define_load_up(what) \ 172 + \ 173 + _GLOBAL(kvmppc_load_up_ ## what); \ 174 + PPC_STLU r1, -INT_FRAME_SIZE(r1); \ 175 + mflr r3; \ 176 + PPC_STL r3, STACK_LR(r1); \ 177 + PPC_STL r20, _NIP(r1); \ 178 + mfmsr r20; \ 179 + LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \ 180 + andc r3,r20,r3; /* Disable DR,EE */ \ 181 + mtmsr r3; \ 182 + sync; \ 183 + \ 184 + bl FUNC(load_up_ ## what); \ 185 + \ 186 + mtmsr r20; /* Enable DR,EE */ \ 187 + sync; \ 188 + PPC_LL r3, STACK_LR(r1); \ 189 + PPC_LL r20, _NIP(r1); \ 190 + mtlr r3; \ 191 + addi r1, r1, INT_FRAME_SIZE; \ 226 192 blr 227 193 228 194 define_load_up(fpu) ··· 242 194 243 195 .global kvmppc_trampoline_lowmem 244 196 kvmppc_trampoline_lowmem: 245 - .long kvmppc_handler_lowmem_trampoline - _stext 197 + .long kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START 246 198 247 199 .global kvmppc_trampoline_enter 248 200 kvmppc_trampoline_enter: 249 - .long kvmppc_handler_trampoline_enter - _stext 201 + .long kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START 250 202 251 - #include "book3s_64_slb.S" 252 - 203 + #include "book3s_segment.S"

+24 -159

arch/powerpc/kvm/book3s_64_slb.S

··· 44 44 * * 45 45 *****************************************************************************/ 46 46 47 - .global kvmppc_handler_trampoline_enter 48 - kvmppc_handler_trampoline_enter: 47 + .macro LOAD_GUEST_SEGMENTS 49 48 50 49 /* Required state: 51 50 * ··· 52 53 * R13 = PACA 53 54 * R1 = host R1 54 55 * R2 = host R2 55 - * R9 = guest IP 56 - * R10 = guest MSR 57 - * all other GPRS = free 58 - * PACA[KVM_CR] = guest CR 59 - * PACA[KVM_XER] = guest XER 56 + * R3 = shadow vcpu 57 + * all other volatile GPRS = free 58 + * SVCPU[CR] = guest CR 59 + * SVCPU[XER] = guest XER 60 + * SVCPU[CTR] = guest CTR 61 + * SVCPU[LR] = guest LR 60 62 */ 61 - 62 - mtsrr0 r9 63 - mtsrr1 r10 64 - 65 - /* Activate guest mode, so faults get handled by KVM */ 66 - li r11, KVM_GUEST_MODE_GUEST 67 - stb r11, PACA_KVM_IN_GUEST(r13) 68 63 69 64 /* Remove LPAR shadow entries */ 70 65 ··· 94 101 95 102 /* Fill SLB with our shadow */ 96 103 97 - lbz r12, PACA_KVM_SLB_MAX(r13) 104 + lbz r12, SVCPU_SLB_MAX(r3) 98 105 mulli r12, r12, 16 99 - addi r12, r12, PACA_KVM_SLB 100 - add r12, r12, r13 106 + addi r12, r12, SVCPU_SLB 107 + add r12, r12, r3 101 108 102 109 /* for (r11 = kvm_slb; r11 < kvm_slb + kvm_slb_size; r11+=slb_entry) */ 103 - li r11, PACA_KVM_SLB 104 - add r11, r11, r13 110 + li r11, SVCPU_SLB 111 + add r11, r11, r3 105 112 106 113 slb_loop_enter: 107 114 ··· 120 127 121 128 slb_do_enter: 122 129 123 - /* Enter guest */ 124 - 125 - ld r0, (PACA_KVM_R0)(r13) 126 - ld r1, (PACA_KVM_R1)(r13) 127 - ld r2, (PACA_KVM_R2)(r13) 128 - ld r3, (PACA_KVM_R3)(r13) 129 - ld r4, (PACA_KVM_R4)(r13) 130 - ld r5, (PACA_KVM_R5)(r13) 131 - ld r6, (PACA_KVM_R6)(r13) 132 - ld r7, (PACA_KVM_R7)(r13) 133 - ld r8, (PACA_KVM_R8)(r13) 134 - ld r9, (PACA_KVM_R9)(r13) 135 - ld r10, (PACA_KVM_R10)(r13) 136 - ld r12, (PACA_KVM_R12)(r13) 137 - 138 - lwz r11, (PACA_KVM_CR)(r13) 139 - mtcr r11 140 - 141 - ld r11, (PACA_KVM_XER)(r13) 142 - mtxer r11 143 - 144 - ld r11, (PACA_KVM_R11)(r13) 145 - ld r13, (PACA_KVM_R13)(r13) 146 - 147 - RFI 148 - kvmppc_handler_trampoline_enter_end: 149 - 150 - 130 + .endm 151 131 152 132 /****************************************************************************** 153 133 * * ··· 128 162 * * 129 163 *****************************************************************************/ 130 164 131 - .global kvmppc_handler_trampoline_exit 132 - kvmppc_handler_trampoline_exit: 165 + .macro LOAD_HOST_SEGMENTS 133 166 134 167 /* Register usage at this point: 135 168 * 136 - * SPRG_SCRATCH0 = guest R13 137 - * R12 = exit handler id 138 - * R13 = PACA 139 - * PACA.KVM.SCRATCH0 = guest R12 140 - * PACA.KVM.SCRATCH1 = guest CR 169 + * R1 = host R1 170 + * R2 = host R2 171 + * R12 = exit handler id 172 + * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64] 173 + * SVCPU.* = guest * 174 + * SVCPU[CR] = guest CR 175 + * SVCPU[XER] = guest XER 176 + * SVCPU[CTR] = guest CTR 177 + * SVCPU[LR] = guest LR 141 178 * 142 179 */ 143 - 144 - /* Save registers */ 145 - 146 - std r0, PACA_KVM_R0(r13) 147 - std r1, PACA_KVM_R1(r13) 148 - std r2, PACA_KVM_R2(r13) 149 - std r3, PACA_KVM_R3(r13) 150 - std r4, PACA_KVM_R4(r13) 151 - std r5, PACA_KVM_R5(r13) 152 - std r6, PACA_KVM_R6(r13) 153 - std r7, PACA_KVM_R7(r13) 154 - std r8, PACA_KVM_R8(r13) 155 - std r9, PACA_KVM_R9(r13) 156 - std r10, PACA_KVM_R10(r13) 157 - std r11, PACA_KVM_R11(r13) 158 - 159 - /* Restore R1/R2 so we can handle faults */ 160 - ld r1, PACA_KVM_HOST_R1(r13) 161 - ld r2, PACA_KVM_HOST_R2(r13) 162 - 163 - /* Save guest PC and MSR in GPRs */ 164 - mfsrr0 r3 165 - mfsrr1 r4 166 - 167 - /* Get scratch'ed off registers */ 168 - mfspr r9, SPRN_SPRG_SCRATCH0 169 - std r9, PACA_KVM_R13(r13) 170 - 171 - ld r8, PACA_KVM_SCRATCH0(r13) 172 - std r8, PACA_KVM_R12(r13) 173 - 174 - lwz r7, PACA_KVM_SCRATCH1(r13) 175 - stw r7, PACA_KVM_CR(r13) 176 - 177 - /* Save more register state */ 178 - 179 - mfxer r6 180 - stw r6, PACA_KVM_XER(r13) 181 - 182 - mfdar r5 183 - mfdsisr r6 184 - 185 - /* 186 - * In order for us to easily get the last instruction, 187 - * we got the #vmexit at, we exploit the fact that the 188 - * virtual layout is still the same here, so we can just 189 - * ld from the guest's PC address 190 - */ 191 - 192 - /* We only load the last instruction when it's safe */ 193 - cmpwi r12, BOOK3S_INTERRUPT_DATA_STORAGE 194 - beq ld_last_inst 195 - cmpwi r12, BOOK3S_INTERRUPT_PROGRAM 196 - beq ld_last_inst 197 - 198 - b no_ld_last_inst 199 - 200 - ld_last_inst: 201 - /* Save off the guest instruction we're at */ 202 - 203 - /* Set guest mode to 'jump over instruction' so if lwz faults 204 - * we'll just continue at the next IP. */ 205 - li r9, KVM_GUEST_MODE_SKIP 206 - stb r9, PACA_KVM_IN_GUEST(r13) 207 - 208 - /* 1) enable paging for data */ 209 - mfmsr r9 210 - ori r11, r9, MSR_DR /* Enable paging for data */ 211 - mtmsr r11 212 - /* 2) fetch the instruction */ 213 - li r0, KVM_INST_FETCH_FAILED /* In case lwz faults */ 214 - lwz r0, 0(r3) 215 - /* 3) disable paging again */ 216 - mtmsr r9 217 - 218 - no_ld_last_inst: 219 - 220 - /* Unset guest mode */ 221 - li r9, KVM_GUEST_MODE_NONE 222 - stb r9, PACA_KVM_IN_GUEST(r13) 223 180 224 181 /* Restore bolted entries from the shadow and fix it along the way */ 225 182 ··· 164 275 165 276 slb_do_exit: 166 277 167 - /* Register usage at this point: 168 - * 169 - * R0 = guest last inst 170 - * R1 = host R1 171 - * R2 = host R2 172 - * R3 = guest PC 173 - * R4 = guest MSR 174 - * R5 = guest DAR 175 - * R6 = guest DSISR 176 - * R12 = exit handler id 177 - * R13 = PACA 178 - * PACA.KVM.* = guest * 179 - * 180 - */ 181 - 182 - /* RFI into the highmem handler */ 183 - mfmsr r7 184 - ori r7, r7, MSR_IR|MSR_DR|MSR_RI /* Enable paging */ 185 - mtsrr1 r7 186 - ld r8, PACA_KVM_VMHANDLER(r13) /* Highmem handler address */ 187 - mtsrr0 r8 188 - 189 - RFI 190 - kvmppc_handler_trampoline_exit_end: 191 - 278 + .endm

+1289

arch/powerpc/kvm/book3s_paired_singles.c

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program; if not, write to the Free Software 13 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 + * 15 + * Copyright Novell Inc 2010 16 + * 17 + * Authors: Alexander Graf <agraf@suse.de> 18 + */ 19 + 20 + #include <asm/kvm.h> 21 + #include <asm/kvm_ppc.h> 22 + #include <asm/disassemble.h> 23 + #include <asm/kvm_book3s.h> 24 + #include <asm/kvm_fpu.h> 25 + #include <asm/reg.h> 26 + #include <asm/cacheflush.h> 27 + #include <linux/vmalloc.h> 28 + 29 + /* #define DEBUG */ 30 + 31 + #ifdef DEBUG 32 + #define dprintk printk 33 + #else 34 + #define dprintk(...) do { } while(0); 35 + #endif 36 + 37 + #define OP_LFS 48 38 + #define OP_LFSU 49 39 + #define OP_LFD 50 40 + #define OP_LFDU 51 41 + #define OP_STFS 52 42 + #define OP_STFSU 53 43 + #define OP_STFD 54 44 + #define OP_STFDU 55 45 + #define OP_PSQ_L 56 46 + #define OP_PSQ_LU 57 47 + #define OP_PSQ_ST 60 48 + #define OP_PSQ_STU 61 49 + 50 + #define OP_31_LFSX 535 51 + #define OP_31_LFSUX 567 52 + #define OP_31_LFDX 599 53 + #define OP_31_LFDUX 631 54 + #define OP_31_STFSX 663 55 + #define OP_31_STFSUX 695 56 + #define OP_31_STFX 727 57 + #define OP_31_STFUX 759 58 + #define OP_31_LWIZX 887 59 + #define OP_31_STFIWX 983 60 + 61 + #define OP_59_FADDS 21 62 + #define OP_59_FSUBS 20 63 + #define OP_59_FSQRTS 22 64 + #define OP_59_FDIVS 18 65 + #define OP_59_FRES 24 66 + #define OP_59_FMULS 25 67 + #define OP_59_FRSQRTES 26 68 + #define OP_59_FMSUBS 28 69 + #define OP_59_FMADDS 29 70 + #define OP_59_FNMSUBS 30 71 + #define OP_59_FNMADDS 31 72 + 73 + #define OP_63_FCMPU 0 74 + #define OP_63_FCPSGN 8 75 + #define OP_63_FRSP 12 76 + #define OP_63_FCTIW 14 77 + #define OP_63_FCTIWZ 15 78 + #define OP_63_FDIV 18 79 + #define OP_63_FADD 21 80 + #define OP_63_FSQRT 22 81 + #define OP_63_FSEL 23 82 + #define OP_63_FRE 24 83 + #define OP_63_FMUL 25 84 + #define OP_63_FRSQRTE 26 85 + #define OP_63_FMSUB 28 86 + #define OP_63_FMADD 29 87 + #define OP_63_FNMSUB 30 88 + #define OP_63_FNMADD 31 89 + #define OP_63_FCMPO 32 90 + #define OP_63_MTFSB1 38 // XXX 91 + #define OP_63_FSUB 20 92 + #define OP_63_FNEG 40 93 + #define OP_63_MCRFS 64 94 + #define OP_63_MTFSB0 70 95 + #define OP_63_FMR 72 96 + #define OP_63_MTFSFI 134 97 + #define OP_63_FABS 264 98 + #define OP_63_MFFS 583 99 + #define OP_63_MTFSF 711 100 + 101 + #define OP_4X_PS_CMPU0 0 102 + #define OP_4X_PSQ_LX 6 103 + #define OP_4XW_PSQ_STX 7 104 + #define OP_4A_PS_SUM0 10 105 + #define OP_4A_PS_SUM1 11 106 + #define OP_4A_PS_MULS0 12 107 + #define OP_4A_PS_MULS1 13 108 + #define OP_4A_PS_MADDS0 14 109 + #define OP_4A_PS_MADDS1 15 110 + #define OP_4A_PS_DIV 18 111 + #define OP_4A_PS_SUB 20 112 + #define OP_4A_PS_ADD 21 113 + #define OP_4A_PS_SEL 23 114 + #define OP_4A_PS_RES 24 115 + #define OP_4A_PS_MUL 25 116 + #define OP_4A_PS_RSQRTE 26 117 + #define OP_4A_PS_MSUB 28 118 + #define OP_4A_PS_MADD 29 119 + #define OP_4A_PS_NMSUB 30 120 + #define OP_4A_PS_NMADD 31 121 + #define OP_4X_PS_CMPO0 32 122 + #define OP_4X_PSQ_LUX 38 123 + #define OP_4XW_PSQ_STUX 39 124 + #define OP_4X_PS_NEG 40 125 + #define OP_4X_PS_CMPU1 64 126 + #define OP_4X_PS_MR 72 127 + #define OP_4X_PS_CMPO1 96 128 + #define OP_4X_PS_NABS 136 129 + #define OP_4X_PS_ABS 264 130 + #define OP_4X_PS_MERGE00 528 131 + #define OP_4X_PS_MERGE01 560 132 + #define OP_4X_PS_MERGE10 592 133 + #define OP_4X_PS_MERGE11 624 134 + 135 + #define SCALAR_NONE 0 136 + #define SCALAR_HIGH (1 << 0) 137 + #define SCALAR_LOW (1 << 1) 138 + #define SCALAR_NO_PS0 (1 << 2) 139 + #define SCALAR_NO_PS1 (1 << 3) 140 + 141 + #define GQR_ST_TYPE_MASK 0x00000007 142 + #define GQR_ST_TYPE_SHIFT 0 143 + #define GQR_ST_SCALE_MASK 0x00003f00 144 + #define GQR_ST_SCALE_SHIFT 8 145 + #define GQR_LD_TYPE_MASK 0x00070000 146 + #define GQR_LD_TYPE_SHIFT 16 147 + #define GQR_LD_SCALE_MASK 0x3f000000 148 + #define GQR_LD_SCALE_SHIFT 24 149 + 150 + #define GQR_QUANTIZE_FLOAT 0 151 + #define GQR_QUANTIZE_U8 4 152 + #define GQR_QUANTIZE_U16 5 153 + #define GQR_QUANTIZE_S8 6 154 + #define GQR_QUANTIZE_S16 7 155 + 156 + #define FPU_LS_SINGLE 0 157 + #define FPU_LS_DOUBLE 1 158 + #define FPU_LS_SINGLE_LOW 2 159 + 160 + static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) 161 + { 162 + struct thread_struct t; 163 + 164 + t.fpscr.val = vcpu->arch.fpscr; 165 + cvt_df((double*)&vcpu->arch.fpr[rt], (float*)&vcpu->arch.qpr[rt], &t); 166 + } 167 + 168 + static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) 169 + { 170 + u64 dsisr; 171 + 172 + vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 36, 0); 173 + vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); 174 + vcpu->arch.dear = eaddr; 175 + /* Page Fault */ 176 + dsisr = kvmppc_set_field(0, 33, 33, 1); 177 + if (is_store) 178 + to_book3s(vcpu)->dsisr = kvmppc_set_field(dsisr, 38, 38, 1); 179 + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); 180 + } 181 + 182 + static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, 183 + int rs, ulong addr, int ls_type) 184 + { 185 + int emulated = EMULATE_FAIL; 186 + struct thread_struct t; 187 + int r; 188 + char tmp[8]; 189 + int len = sizeof(u32); 190 + 191 + if (ls_type == FPU_LS_DOUBLE) 192 + len = sizeof(u64); 193 + 194 + t.fpscr.val = vcpu->arch.fpscr; 195 + 196 + /* read from memory */ 197 + r = kvmppc_ld(vcpu, &addr, len, tmp, true); 198 + vcpu->arch.paddr_accessed = addr; 199 + 200 + if (r < 0) { 201 + kvmppc_inject_pf(vcpu, addr, false); 202 + goto done_load; 203 + } else if (r == EMULATE_DO_MMIO) { 204 + emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, len, 1); 205 + goto done_load; 206 + } 207 + 208 + emulated = EMULATE_DONE; 209 + 210 + /* put in registers */ 211 + switch (ls_type) { 212 + case FPU_LS_SINGLE: 213 + cvt_fd((float*)tmp, (double*)&vcpu->arch.fpr[rs], &t); 214 + vcpu->arch.qpr[rs] = *((u32*)tmp); 215 + break; 216 + case FPU_LS_DOUBLE: 217 + vcpu->arch.fpr[rs] = *((u64*)tmp); 218 + break; 219 + } 220 + 221 + dprintk(KERN_INFO "KVM: FPR_LD [0x%llx] at 0x%lx (%d)\n", *(u64*)tmp, 222 + addr, len); 223 + 224 + done_load: 225 + return emulated; 226 + } 227 + 228 + static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, 229 + int rs, ulong addr, int ls_type) 230 + { 231 + int emulated = EMULATE_FAIL; 232 + struct thread_struct t; 233 + int r; 234 + char tmp[8]; 235 + u64 val; 236 + int len; 237 + 238 + t.fpscr.val = vcpu->arch.fpscr; 239 + 240 + switch (ls_type) { 241 + case FPU_LS_SINGLE: 242 + cvt_df((double*)&vcpu->arch.fpr[rs], (float*)tmp, &t); 243 + val = *((u32*)tmp); 244 + len = sizeof(u32); 245 + break; 246 + case FPU_LS_SINGLE_LOW: 247 + *((u32*)tmp) = vcpu->arch.fpr[rs]; 248 + val = vcpu->arch.fpr[rs] & 0xffffffff; 249 + len = sizeof(u32); 250 + break; 251 + case FPU_LS_DOUBLE: 252 + *((u64*)tmp) = vcpu->arch.fpr[rs]; 253 + val = vcpu->arch.fpr[rs]; 254 + len = sizeof(u64); 255 + break; 256 + default: 257 + val = 0; 258 + len = 0; 259 + } 260 + 261 + r = kvmppc_st(vcpu, &addr, len, tmp, true); 262 + vcpu->arch.paddr_accessed = addr; 263 + if (r < 0) { 264 + kvmppc_inject_pf(vcpu, addr, true); 265 + } else if (r == EMULATE_DO_MMIO) { 266 + emulated = kvmppc_handle_store(run, vcpu, val, len, 1); 267 + } else { 268 + emulated = EMULATE_DONE; 269 + } 270 + 271 + dprintk(KERN_INFO "KVM: FPR_ST [0x%llx] at 0x%lx (%d)\n", 272 + val, addr, len); 273 + 274 + return emulated; 275 + } 276 + 277 + static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, 278 + int rs, ulong addr, bool w, int i) 279 + { 280 + int emulated = EMULATE_FAIL; 281 + struct thread_struct t; 282 + int r; 283 + float one = 1.0; 284 + u32 tmp[2]; 285 + 286 + t.fpscr.val = vcpu->arch.fpscr; 287 + 288 + /* read from memory */ 289 + if (w) { 290 + r = kvmppc_ld(vcpu, &addr, sizeof(u32), tmp, true); 291 + memcpy(&tmp[1], &one, sizeof(u32)); 292 + } else { 293 + r = kvmppc_ld(vcpu, &addr, sizeof(u32) * 2, tmp, true); 294 + } 295 + vcpu->arch.paddr_accessed = addr; 296 + if (r < 0) { 297 + kvmppc_inject_pf(vcpu, addr, false); 298 + goto done_load; 299 + } else if ((r == EMULATE_DO_MMIO) && w) { 300 + emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, 4, 1); 301 + vcpu->arch.qpr[rs] = tmp[1]; 302 + goto done_load; 303 + } else if (r == EMULATE_DO_MMIO) { 304 + emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FQPR | rs, 8, 1); 305 + goto done_load; 306 + } 307 + 308 + emulated = EMULATE_DONE; 309 + 310 + /* put in registers */ 311 + cvt_fd((float*)&tmp[0], (double*)&vcpu->arch.fpr[rs], &t); 312 + vcpu->arch.qpr[rs] = tmp[1]; 313 + 314 + dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], 315 + tmp[1], addr, w ? 4 : 8); 316 + 317 + done_load: 318 + return emulated; 319 + } 320 + 321 + static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, 322 + int rs, ulong addr, bool w, int i) 323 + { 324 + int emulated = EMULATE_FAIL; 325 + struct thread_struct t; 326 + int r; 327 + u32 tmp[2]; 328 + int len = w ? sizeof(u32) : sizeof(u64); 329 + 330 + t.fpscr.val = vcpu->arch.fpscr; 331 + 332 + cvt_df((double*)&vcpu->arch.fpr[rs], (float*)&tmp[0], &t); 333 + tmp[1] = vcpu->arch.qpr[rs]; 334 + 335 + r = kvmppc_st(vcpu, &addr, len, tmp, true); 336 + vcpu->arch.paddr_accessed = addr; 337 + if (r < 0) { 338 + kvmppc_inject_pf(vcpu, addr, true); 339 + } else if ((r == EMULATE_DO_MMIO) && w) { 340 + emulated = kvmppc_handle_store(run, vcpu, tmp[0], 4, 1); 341 + } else if (r == EMULATE_DO_MMIO) { 342 + u64 val = ((u64)tmp[0] << 32) | tmp[1]; 343 + emulated = kvmppc_handle_store(run, vcpu, val, 8, 1); 344 + } else { 345 + emulated = EMULATE_DONE; 346 + } 347 + 348 + dprintk(KERN_INFO "KVM: PSQ_ST [0x%x, 0x%x] at 0x%lx (%d)\n", 349 + tmp[0], tmp[1], addr, len); 350 + 351 + return emulated; 352 + } 353 + 354 + /* 355 + * Cuts out inst bits with ordering according to spec. 356 + * That means the leftmost bit is zero. All given bits are included. 357 + */ 358 + static inline u32 inst_get_field(u32 inst, int msb, int lsb) 359 + { 360 + return kvmppc_get_field(inst, msb + 32, lsb + 32); 361 + } 362 + 363 + /* 364 + * Replaces inst bits with ordering according to spec. 365 + */ 366 + static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value) 367 + { 368 + return kvmppc_set_field(inst, msb + 32, lsb + 32, value); 369 + } 370 + 371 + bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) 372 + { 373 + if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) 374 + return false; 375 + 376 + switch (get_op(inst)) { 377 + case OP_PSQ_L: 378 + case OP_PSQ_LU: 379 + case OP_PSQ_ST: 380 + case OP_PSQ_STU: 381 + case OP_LFS: 382 + case OP_LFSU: 383 + case OP_LFD: 384 + case OP_LFDU: 385 + case OP_STFS: 386 + case OP_STFSU: 387 + case OP_STFD: 388 + case OP_STFDU: 389 + return true; 390 + case 4: 391 + /* X form */ 392 + switch (inst_get_field(inst, 21, 30)) { 393 + case OP_4X_PS_CMPU0: 394 + case OP_4X_PSQ_LX: 395 + case OP_4X_PS_CMPO0: 396 + case OP_4X_PSQ_LUX: 397 + case OP_4X_PS_NEG: 398 + case OP_4X_PS_CMPU1: 399 + case OP_4X_PS_MR: 400 + case OP_4X_PS_CMPO1: 401 + case OP_4X_PS_NABS: 402 + case OP_4X_PS_ABS: 403 + case OP_4X_PS_MERGE00: 404 + case OP_4X_PS_MERGE01: 405 + case OP_4X_PS_MERGE10: 406 + case OP_4X_PS_MERGE11: 407 + return true; 408 + } 409 + /* XW form */ 410 + switch (inst_get_field(inst, 25, 30)) { 411 + case OP_4XW_PSQ_STX: 412 + case OP_4XW_PSQ_STUX: 413 + return true; 414 + } 415 + /* A form */ 416 + switch (inst_get_field(inst, 26, 30)) { 417 + case OP_4A_PS_SUM1: 418 + case OP_4A_PS_SUM0: 419 + case OP_4A_PS_MULS0: 420 + case OP_4A_PS_MULS1: 421 + case OP_4A_PS_MADDS0: 422 + case OP_4A_PS_MADDS1: 423 + case OP_4A_PS_DIV: 424 + case OP_4A_PS_SUB: 425 + case OP_4A_PS_ADD: 426 + case OP_4A_PS_SEL: 427 + case OP_4A_PS_RES: 428 + case OP_4A_PS_MUL: 429 + case OP_4A_PS_RSQRTE: 430 + case OP_4A_PS_MSUB: 431 + case OP_4A_PS_MADD: 432 + case OP_4A_PS_NMSUB: 433 + case OP_4A_PS_NMADD: 434 + return true; 435 + } 436 + break; 437 + case 59: 438 + switch (inst_get_field(inst, 21, 30)) { 439 + case OP_59_FADDS: 440 + case OP_59_FSUBS: 441 + case OP_59_FDIVS: 442 + case OP_59_FRES: 443 + case OP_59_FRSQRTES: 444 + return true; 445 + } 446 + switch (inst_get_field(inst, 26, 30)) { 447 + case OP_59_FMULS: 448 + case OP_59_FMSUBS: 449 + case OP_59_FMADDS: 450 + case OP_59_FNMSUBS: 451 + case OP_59_FNMADDS: 452 + return true; 453 + } 454 + break; 455 + case 63: 456 + switch (inst_get_field(inst, 21, 30)) { 457 + case OP_63_MTFSB0: 458 + case OP_63_MTFSB1: 459 + case OP_63_MTFSF: 460 + case OP_63_MTFSFI: 461 + case OP_63_MCRFS: 462 + case OP_63_MFFS: 463 + case OP_63_FCMPU: 464 + case OP_63_FCMPO: 465 + case OP_63_FNEG: 466 + case OP_63_FMR: 467 + case OP_63_FABS: 468 + case OP_63_FRSP: 469 + case OP_63_FDIV: 470 + case OP_63_FADD: 471 + case OP_63_FSUB: 472 + case OP_63_FCTIW: 473 + case OP_63_FCTIWZ: 474 + case OP_63_FRSQRTE: 475 + case OP_63_FCPSGN: 476 + return true; 477 + } 478 + switch (inst_get_field(inst, 26, 30)) { 479 + case OP_63_FMUL: 480 + case OP_63_FSEL: 481 + case OP_63_FMSUB: 482 + case OP_63_FMADD: 483 + case OP_63_FNMSUB: 484 + case OP_63_FNMADD: 485 + return true; 486 + } 487 + break; 488 + case 31: 489 + switch (inst_get_field(inst, 21, 30)) { 490 + case OP_31_LFSX: 491 + case OP_31_LFSUX: 492 + case OP_31_LFDX: 493 + case OP_31_LFDUX: 494 + case OP_31_STFSX: 495 + case OP_31_STFSUX: 496 + case OP_31_STFX: 497 + case OP_31_STFUX: 498 + case OP_31_STFIWX: 499 + return true; 500 + } 501 + break; 502 + } 503 + 504 + return false; 505 + } 506 + 507 + static int get_d_signext(u32 inst) 508 + { 509 + int d = inst & 0x8ff; 510 + 511 + if (d & 0x800) 512 + return -(d & 0x7ff); 513 + 514 + return (d & 0x7ff); 515 + } 516 + 517 + static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, 518 + int reg_out, int reg_in1, int reg_in2, 519 + int reg_in3, int scalar, 520 + void (*func)(struct thread_struct *t, 521 + u32 *dst, u32 *src1, 522 + u32 *src2, u32 *src3)) 523 + { 524 + u32 *qpr = vcpu->arch.qpr; 525 + u64 *fpr = vcpu->arch.fpr; 526 + u32 ps0_out; 527 + u32 ps0_in1, ps0_in2, ps0_in3; 528 + u32 ps1_in1, ps1_in2, ps1_in3; 529 + struct thread_struct t; 530 + t.fpscr.val = vcpu->arch.fpscr; 531 + 532 + /* RC */ 533 + WARN_ON(rc); 534 + 535 + /* PS0 */ 536 + cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); 537 + cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); 538 + cvt_df((double*)&fpr[reg_in3], (float*)&ps0_in3, &t); 539 + 540 + if (scalar & SCALAR_LOW) 541 + ps0_in2 = qpr[reg_in2]; 542 + 543 + func(&t, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); 544 + 545 + dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", 546 + ps0_in1, ps0_in2, ps0_in3, ps0_out); 547 + 548 + if (!(scalar & SCALAR_NO_PS0)) 549 + cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); 550 + 551 + /* PS1 */ 552 + ps1_in1 = qpr[reg_in1]; 553 + ps1_in2 = qpr[reg_in2]; 554 + ps1_in3 = qpr[reg_in3]; 555 + 556 + if (scalar & SCALAR_HIGH) 557 + ps1_in2 = ps0_in2; 558 + 559 + if (!(scalar & SCALAR_NO_PS1)) 560 + func(&t, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); 561 + 562 + dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", 563 + ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); 564 + 565 + return EMULATE_DONE; 566 + } 567 + 568 + static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, 569 + int reg_out, int reg_in1, int reg_in2, 570 + int scalar, 571 + void (*func)(struct thread_struct *t, 572 + u32 *dst, u32 *src1, 573 + u32 *src2)) 574 + { 575 + u32 *qpr = vcpu->arch.qpr; 576 + u64 *fpr = vcpu->arch.fpr; 577 + u32 ps0_out; 578 + u32 ps0_in1, ps0_in2; 579 + u32 ps1_out; 580 + u32 ps1_in1, ps1_in2; 581 + struct thread_struct t; 582 + t.fpscr.val = vcpu->arch.fpscr; 583 + 584 + /* RC */ 585 + WARN_ON(rc); 586 + 587 + /* PS0 */ 588 + cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); 589 + 590 + if (scalar & SCALAR_LOW) 591 + ps0_in2 = qpr[reg_in2]; 592 + else 593 + cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); 594 + 595 + func(&t, &ps0_out, &ps0_in1, &ps0_in2); 596 + 597 + if (!(scalar & SCALAR_NO_PS0)) { 598 + dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", 599 + ps0_in1, ps0_in2, ps0_out); 600 + 601 + cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); 602 + } 603 + 604 + /* PS1 */ 605 + ps1_in1 = qpr[reg_in1]; 606 + ps1_in2 = qpr[reg_in2]; 607 + 608 + if (scalar & SCALAR_HIGH) 609 + ps1_in2 = ps0_in2; 610 + 611 + func(&t, &ps1_out, &ps1_in1, &ps1_in2); 612 + 613 + if (!(scalar & SCALAR_NO_PS1)) { 614 + qpr[reg_out] = ps1_out; 615 + 616 + dprintk(KERN_INFO "PS2 ps1 -> f(0x%x, 0x%x) = 0x%x\n", 617 + ps1_in1, ps1_in2, qpr[reg_out]); 618 + } 619 + 620 + return EMULATE_DONE; 621 + } 622 + 623 + static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, 624 + int reg_out, int reg_in, 625 + void (*func)(struct thread_struct *t, 626 + u32 *dst, u32 *src1)) 627 + { 628 + u32 *qpr = vcpu->arch.qpr; 629 + u64 *fpr = vcpu->arch.fpr; 630 + u32 ps0_out, ps0_in; 631 + u32 ps1_in; 632 + struct thread_struct t; 633 + t.fpscr.val = vcpu->arch.fpscr; 634 + 635 + /* RC */ 636 + WARN_ON(rc); 637 + 638 + /* PS0 */ 639 + cvt_df((double*)&fpr[reg_in], (float*)&ps0_in, &t); 640 + func(&t, &ps0_out, &ps0_in); 641 + 642 + dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", 643 + ps0_in, ps0_out); 644 + 645 + cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); 646 + 647 + /* PS1 */ 648 + ps1_in = qpr[reg_in]; 649 + func(&t, &qpr[reg_out], &ps1_in); 650 + 651 + dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n", 652 + ps1_in, qpr[reg_out]); 653 + 654 + return EMULATE_DONE; 655 + } 656 + 657 + int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) 658 + { 659 + u32 inst = kvmppc_get_last_inst(vcpu); 660 + enum emulation_result emulated = EMULATE_DONE; 661 + 662 + int ax_rd = inst_get_field(inst, 6, 10); 663 + int ax_ra = inst_get_field(inst, 11, 15); 664 + int ax_rb = inst_get_field(inst, 16, 20); 665 + int ax_rc = inst_get_field(inst, 21, 25); 666 + short full_d = inst_get_field(inst, 16, 31); 667 + 668 + u64 *fpr_d = &vcpu->arch.fpr[ax_rd]; 669 + u64 *fpr_a = &vcpu->arch.fpr[ax_ra]; 670 + u64 *fpr_b = &vcpu->arch.fpr[ax_rb]; 671 + u64 *fpr_c = &vcpu->arch.fpr[ax_rc]; 672 + 673 + bool rcomp = (inst & 1) ? true : false; 674 + u32 cr = kvmppc_get_cr(vcpu); 675 + struct thread_struct t; 676 + #ifdef DEBUG 677 + int i; 678 + #endif 679 + 680 + t.fpscr.val = vcpu->arch.fpscr; 681 + 682 + if (!kvmppc_inst_is_paired_single(vcpu, inst)) 683 + return EMULATE_FAIL; 684 + 685 + if (!(vcpu->arch.msr & MSR_FP)) { 686 + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL); 687 + return EMULATE_AGAIN; 688 + } 689 + 690 + kvmppc_giveup_ext(vcpu, MSR_FP); 691 + preempt_disable(); 692 + enable_kernel_fp(); 693 + /* Do we need to clear FE0 / FE1 here? Don't think so. */ 694 + 695 + #ifdef DEBUG 696 + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { 697 + u32 f; 698 + cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); 699 + dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", 700 + i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); 701 + } 702 + #endif 703 + 704 + switch (get_op(inst)) { 705 + case OP_PSQ_L: 706 + { 707 + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; 708 + bool w = inst_get_field(inst, 16, 16) ? true : false; 709 + int i = inst_get_field(inst, 17, 19); 710 + 711 + addr += get_d_signext(inst); 712 + emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); 713 + break; 714 + } 715 + case OP_PSQ_LU: 716 + { 717 + ulong addr = kvmppc_get_gpr(vcpu, ax_ra); 718 + bool w = inst_get_field(inst, 16, 16) ? true : false; 719 + int i = inst_get_field(inst, 17, 19); 720 + 721 + addr += get_d_signext(inst); 722 + emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); 723 + 724 + if (emulated == EMULATE_DONE) 725 + kvmppc_set_gpr(vcpu, ax_ra, addr); 726 + break; 727 + } 728 + case OP_PSQ_ST: 729 + { 730 + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; 731 + bool w = inst_get_field(inst, 16, 16) ? true : false; 732 + int i = inst_get_field(inst, 17, 19); 733 + 734 + addr += get_d_signext(inst); 735 + emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); 736 + break; 737 + } 738 + case OP_PSQ_STU: 739 + { 740 + ulong addr = kvmppc_get_gpr(vcpu, ax_ra); 741 + bool w = inst_get_field(inst, 16, 16) ? true : false; 742 + int i = inst_get_field(inst, 17, 19); 743 + 744 + addr += get_d_signext(inst); 745 + emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); 746 + 747 + if (emulated == EMULATE_DONE) 748 + kvmppc_set_gpr(vcpu, ax_ra, addr); 749 + break; 750 + } 751 + case 4: 752 + /* X form */ 753 + switch (inst_get_field(inst, 21, 30)) { 754 + case OP_4X_PS_CMPU0: 755 + /* XXX */ 756 + emulated = EMULATE_FAIL; 757 + break; 758 + case OP_4X_PSQ_LX: 759 + { 760 + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; 761 + bool w = inst_get_field(inst, 21, 21) ? true : false; 762 + int i = inst_get_field(inst, 22, 24); 763 + 764 + addr += kvmppc_get_gpr(vcpu, ax_rb); 765 + emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); 766 + break; 767 + } 768 + case OP_4X_PS_CMPO0: 769 + /* XXX */ 770 + emulated = EMULATE_FAIL; 771 + break; 772 + case OP_4X_PSQ_LUX: 773 + { 774 + ulong addr = kvmppc_get_gpr(vcpu, ax_ra); 775 + bool w = inst_get_field(inst, 21, 21) ? true : false; 776 + int i = inst_get_field(inst, 22, 24); 777 + 778 + addr += kvmppc_get_gpr(vcpu, ax_rb); 779 + emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); 780 + 781 + if (emulated == EMULATE_DONE) 782 + kvmppc_set_gpr(vcpu, ax_ra, addr); 783 + break; 784 + } 785 + case OP_4X_PS_NEG: 786 + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; 787 + vcpu->arch.fpr[ax_rd] ^= 0x8000000000000000ULL; 788 + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; 789 + vcpu->arch.qpr[ax_rd] ^= 0x80000000; 790 + break; 791 + case OP_4X_PS_CMPU1: 792 + /* XXX */ 793 + emulated = EMULATE_FAIL; 794 + break; 795 + case OP_4X_PS_MR: 796 + WARN_ON(rcomp); 797 + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; 798 + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; 799 + break; 800 + case OP_4X_PS_CMPO1: 801 + /* XXX */ 802 + emulated = EMULATE_FAIL; 803 + break; 804 + case OP_4X_PS_NABS: 805 + WARN_ON(rcomp); 806 + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; 807 + vcpu->arch.fpr[ax_rd] |= 0x8000000000000000ULL; 808 + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; 809 + vcpu->arch.qpr[ax_rd] |= 0x80000000; 810 + break; 811 + case OP_4X_PS_ABS: 812 + WARN_ON(rcomp); 813 + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; 814 + vcpu->arch.fpr[ax_rd] &= ~0x8000000000000000ULL; 815 + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; 816 + vcpu->arch.qpr[ax_rd] &= ~0x80000000; 817 + break; 818 + case OP_4X_PS_MERGE00: 819 + WARN_ON(rcomp); 820 + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; 821 + /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ 822 + cvt_df((double*)&vcpu->arch.fpr[ax_rb], 823 + (float*)&vcpu->arch.qpr[ax_rd], &t); 824 + break; 825 + case OP_4X_PS_MERGE01: 826 + WARN_ON(rcomp); 827 + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; 828 + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; 829 + break; 830 + case OP_4X_PS_MERGE10: 831 + WARN_ON(rcomp); 832 + /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ 833 + cvt_fd((float*)&vcpu->arch.qpr[ax_ra], 834 + (double*)&vcpu->arch.fpr[ax_rd], &t); 835 + /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ 836 + cvt_df((double*)&vcpu->arch.fpr[ax_rb], 837 + (float*)&vcpu->arch.qpr[ax_rd], &t); 838 + break; 839 + case OP_4X_PS_MERGE11: 840 + WARN_ON(rcomp); 841 + /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ 842 + cvt_fd((float*)&vcpu->arch.qpr[ax_ra], 843 + (double*)&vcpu->arch.fpr[ax_rd], &t); 844 + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; 845 + break; 846 + } 847 + /* XW form */ 848 + switch (inst_get_field(inst, 25, 30)) { 849 + case OP_4XW_PSQ_STX: 850 + { 851 + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; 852 + bool w = inst_get_field(inst, 21, 21) ? true : false; 853 + int i = inst_get_field(inst, 22, 24); 854 + 855 + addr += kvmppc_get_gpr(vcpu, ax_rb); 856 + emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); 857 + break; 858 + } 859 + case OP_4XW_PSQ_STUX: 860 + { 861 + ulong addr = kvmppc_get_gpr(vcpu, ax_ra); 862 + bool w = inst_get_field(inst, 21, 21) ? true : false; 863 + int i = inst_get_field(inst, 22, 24); 864 + 865 + addr += kvmppc_get_gpr(vcpu, ax_rb); 866 + emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); 867 + 868 + if (emulated == EMULATE_DONE) 869 + kvmppc_set_gpr(vcpu, ax_ra, addr); 870 + break; 871 + } 872 + } 873 + /* A form */ 874 + switch (inst_get_field(inst, 26, 30)) { 875 + case OP_4A_PS_SUM1: 876 + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, 877 + ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds); 878 + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rc]; 879 + break; 880 + case OP_4A_PS_SUM0: 881 + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, 882 + ax_ra, ax_rb, SCALAR_NO_PS1 | SCALAR_LOW, fps_fadds); 883 + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rc]; 884 + break; 885 + case OP_4A_PS_MULS0: 886 + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, 887 + ax_ra, ax_rc, SCALAR_HIGH, fps_fmuls); 888 + break; 889 + case OP_4A_PS_MULS1: 890 + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, 891 + ax_ra, ax_rc, SCALAR_LOW, fps_fmuls); 892 + break; 893 + case OP_4A_PS_MADDS0: 894 + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, 895 + ax_ra, ax_rc, ax_rb, SCALAR_HIGH, fps_fmadds); 896 + break; 897 + case OP_4A_PS_MADDS1: 898 + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, 899 + ax_ra, ax_rc, ax_rb, SCALAR_LOW, fps_fmadds); 900 + break; 901 + case OP_4A_PS_DIV: 902 + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, 903 + ax_ra, ax_rb, SCALAR_NONE, fps_fdivs); 904 + break; 905 + case OP_4A_PS_SUB: 906 + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, 907 + ax_ra, ax_rb, SCALAR_NONE, fps_fsubs); 908 + break; 909 + case OP_4A_PS_ADD: 910 + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, 911 + ax_ra, ax_rb, SCALAR_NONE, fps_fadds); 912 + break; 913 + case OP_4A_PS_SEL: 914 + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, 915 + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fsel); 916 + break; 917 + case OP_4A_PS_RES: 918 + emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd, 919 + ax_rb, fps_fres); 920 + break; 921 + case OP_4A_PS_MUL: 922 + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, 923 + ax_ra, ax_rc, SCALAR_NONE, fps_fmuls); 924 + break; 925 + case OP_4A_PS_RSQRTE: 926 + emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd, 927 + ax_rb, fps_frsqrte); 928 + break; 929 + case OP_4A_PS_MSUB: 930 + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, 931 + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmsubs); 932 + break; 933 + case OP_4A_PS_MADD: 934 + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, 935 + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmadds); 936 + break; 937 + case OP_4A_PS_NMSUB: 938 + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, 939 + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmsubs); 940 + break; 941 + case OP_4A_PS_NMADD: 942 + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, 943 + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmadds); 944 + break; 945 + } 946 + break; 947 + 948 + /* Real FPU operations */ 949 + 950 + case OP_LFS: 951 + { 952 + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; 953 + 954 + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, 955 + FPU_LS_SINGLE); 956 + break; 957 + } 958 + case OP_LFSU: 959 + { 960 + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; 961 + 962 + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, 963 + FPU_LS_SINGLE); 964 + 965 + if (emulated == EMULATE_DONE) 966 + kvmppc_set_gpr(vcpu, ax_ra, addr); 967 + break; 968 + } 969 + case OP_LFD: 970 + { 971 + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; 972 + 973 + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, 974 + FPU_LS_DOUBLE); 975 + break; 976 + } 977 + case OP_LFDU: 978 + { 979 + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; 980 + 981 + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, 982 + FPU_LS_DOUBLE); 983 + 984 + if (emulated == EMULATE_DONE) 985 + kvmppc_set_gpr(vcpu, ax_ra, addr); 986 + break; 987 + } 988 + case OP_STFS: 989 + { 990 + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; 991 + 992 + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, 993 + FPU_LS_SINGLE); 994 + break; 995 + } 996 + case OP_STFSU: 997 + { 998 + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; 999 + 1000 + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, 1001 + FPU_LS_SINGLE); 1002 + 1003 + if (emulated == EMULATE_DONE) 1004 + kvmppc_set_gpr(vcpu, ax_ra, addr); 1005 + break; 1006 + } 1007 + case OP_STFD: 1008 + { 1009 + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; 1010 + 1011 + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, 1012 + FPU_LS_DOUBLE); 1013 + break; 1014 + } 1015 + case OP_STFDU: 1016 + { 1017 + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; 1018 + 1019 + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, 1020 + FPU_LS_DOUBLE); 1021 + 1022 + if (emulated == EMULATE_DONE) 1023 + kvmppc_set_gpr(vcpu, ax_ra, addr); 1024 + break; 1025 + } 1026 + case 31: 1027 + switch (inst_get_field(inst, 21, 30)) { 1028 + case OP_31_LFSX: 1029 + { 1030 + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; 1031 + 1032 + addr += kvmppc_get_gpr(vcpu, ax_rb); 1033 + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, 1034 + addr, FPU_LS_SINGLE); 1035 + break; 1036 + } 1037 + case OP_31_LFSUX: 1038 + { 1039 + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + 1040 + kvmppc_get_gpr(vcpu, ax_rb); 1041 + 1042 + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, 1043 + addr, FPU_LS_SINGLE); 1044 + 1045 + if (emulated == EMULATE_DONE) 1046 + kvmppc_set_gpr(vcpu, ax_ra, addr); 1047 + break; 1048 + } 1049 + case OP_31_LFDX: 1050 + { 1051 + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + 1052 + kvmppc_get_gpr(vcpu, ax_rb); 1053 + 1054 + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, 1055 + addr, FPU_LS_DOUBLE); 1056 + break; 1057 + } 1058 + case OP_31_LFDUX: 1059 + { 1060 + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + 1061 + kvmppc_get_gpr(vcpu, ax_rb); 1062 + 1063 + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, 1064 + addr, FPU_LS_DOUBLE); 1065 + 1066 + if (emulated == EMULATE_DONE) 1067 + kvmppc_set_gpr(vcpu, ax_ra, addr); 1068 + break; 1069 + } 1070 + case OP_31_STFSX: 1071 + { 1072 + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + 1073 + kvmppc_get_gpr(vcpu, ax_rb); 1074 + 1075 + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, 1076 + addr, FPU_LS_SINGLE); 1077 + break; 1078 + } 1079 + case OP_31_STFSUX: 1080 + { 1081 + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + 1082 + kvmppc_get_gpr(vcpu, ax_rb); 1083 + 1084 + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, 1085 + addr, FPU_LS_SINGLE); 1086 + 1087 + if (emulated == EMULATE_DONE) 1088 + kvmppc_set_gpr(vcpu, ax_ra, addr); 1089 + break; 1090 + } 1091 + case OP_31_STFX: 1092 + { 1093 + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + 1094 + kvmppc_get_gpr(vcpu, ax_rb); 1095 + 1096 + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, 1097 + addr, FPU_LS_DOUBLE); 1098 + break; 1099 + } 1100 + case OP_31_STFUX: 1101 + { 1102 + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + 1103 + kvmppc_get_gpr(vcpu, ax_rb); 1104 + 1105 + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, 1106 + addr, FPU_LS_DOUBLE); 1107 + 1108 + if (emulated == EMULATE_DONE) 1109 + kvmppc_set_gpr(vcpu, ax_ra, addr); 1110 + break; 1111 + } 1112 + case OP_31_STFIWX: 1113 + { 1114 + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + 1115 + kvmppc_get_gpr(vcpu, ax_rb); 1116 + 1117 + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, 1118 + addr, 1119 + FPU_LS_SINGLE_LOW); 1120 + break; 1121 + } 1122 + break; 1123 + } 1124 + break; 1125 + case 59: 1126 + switch (inst_get_field(inst, 21, 30)) { 1127 + case OP_59_FADDS: 1128 + fpd_fadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); 1129 + kvmppc_sync_qpr(vcpu, ax_rd); 1130 + break; 1131 + case OP_59_FSUBS: 1132 + fpd_fsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); 1133 + kvmppc_sync_qpr(vcpu, ax_rd); 1134 + break; 1135 + case OP_59_FDIVS: 1136 + fpd_fdivs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); 1137 + kvmppc_sync_qpr(vcpu, ax_rd); 1138 + break; 1139 + case OP_59_FRES: 1140 + fpd_fres(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); 1141 + kvmppc_sync_qpr(vcpu, ax_rd); 1142 + break; 1143 + case OP_59_FRSQRTES: 1144 + fpd_frsqrtes(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); 1145 + kvmppc_sync_qpr(vcpu, ax_rd); 1146 + break; 1147 + } 1148 + switch (inst_get_field(inst, 26, 30)) { 1149 + case OP_59_FMULS: 1150 + fpd_fmuls(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); 1151 + kvmppc_sync_qpr(vcpu, ax_rd); 1152 + break; 1153 + case OP_59_FMSUBS: 1154 + fpd_fmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); 1155 + kvmppc_sync_qpr(vcpu, ax_rd); 1156 + break; 1157 + case OP_59_FMADDS: 1158 + fpd_fmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); 1159 + kvmppc_sync_qpr(vcpu, ax_rd); 1160 + break; 1161 + case OP_59_FNMSUBS: 1162 + fpd_fnmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); 1163 + kvmppc_sync_qpr(vcpu, ax_rd); 1164 + break; 1165 + case OP_59_FNMADDS: 1166 + fpd_fnmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); 1167 + kvmppc_sync_qpr(vcpu, ax_rd); 1168 + break; 1169 + } 1170 + break; 1171 + case 63: 1172 + switch (inst_get_field(inst, 21, 30)) { 1173 + case OP_63_MTFSB0: 1174 + case OP_63_MTFSB1: 1175 + case OP_63_MCRFS: 1176 + case OP_63_MTFSFI: 1177 + /* XXX need to implement */ 1178 + break; 1179 + case OP_63_MFFS: 1180 + /* XXX missing CR */ 1181 + *fpr_d = vcpu->arch.fpscr; 1182 + break; 1183 + case OP_63_MTFSF: 1184 + /* XXX missing fm bits */ 1185 + /* XXX missing CR */ 1186 + vcpu->arch.fpscr = *fpr_b; 1187 + break; 1188 + case OP_63_FCMPU: 1189 + { 1190 + u32 tmp_cr; 1191 + u32 cr0_mask = 0xf0000000; 1192 + u32 cr_shift = inst_get_field(inst, 6, 8) * 4; 1193 + 1194 + fpd_fcmpu(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); 1195 + cr &= ~(cr0_mask >> cr_shift); 1196 + cr |= (cr & cr0_mask) >> cr_shift; 1197 + break; 1198 + } 1199 + case OP_63_FCMPO: 1200 + { 1201 + u32 tmp_cr; 1202 + u32 cr0_mask = 0xf0000000; 1203 + u32 cr_shift = inst_get_field(inst, 6, 8) * 4; 1204 + 1205 + fpd_fcmpo(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); 1206 + cr &= ~(cr0_mask >> cr_shift); 1207 + cr |= (cr & cr0_mask) >> cr_shift; 1208 + break; 1209 + } 1210 + case OP_63_FNEG: 1211 + fpd_fneg(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); 1212 + break; 1213 + case OP_63_FMR: 1214 + *fpr_d = *fpr_b; 1215 + break; 1216 + case OP_63_FABS: 1217 + fpd_fabs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); 1218 + break; 1219 + case OP_63_FCPSGN: 1220 + fpd_fcpsgn(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); 1221 + break; 1222 + case OP_63_FDIV: 1223 + fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); 1224 + break; 1225 + case OP_63_FADD: 1226 + fpd_fadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); 1227 + break; 1228 + case OP_63_FSUB: 1229 + fpd_fsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); 1230 + break; 1231 + case OP_63_FCTIW: 1232 + fpd_fctiw(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); 1233 + break; 1234 + case OP_63_FCTIWZ: 1235 + fpd_fctiwz(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); 1236 + break; 1237 + case OP_63_FRSP: 1238 + fpd_frsp(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); 1239 + kvmppc_sync_qpr(vcpu, ax_rd); 1240 + break; 1241 + case OP_63_FRSQRTE: 1242 + { 1243 + double one = 1.0f; 1244 + 1245 + /* fD = sqrt(fB) */ 1246 + fpd_fsqrt(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); 1247 + /* fD = 1.0f / fD */ 1248 + fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, (u64*)&one, fpr_d); 1249 + break; 1250 + } 1251 + } 1252 + switch (inst_get_field(inst, 26, 30)) { 1253 + case OP_63_FMUL: 1254 + fpd_fmul(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); 1255 + break; 1256 + case OP_63_FSEL: 1257 + fpd_fsel(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); 1258 + break; 1259 + case OP_63_FMSUB: 1260 + fpd_fmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); 1261 + break; 1262 + case OP_63_FMADD: 1263 + fpd_fmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); 1264 + break; 1265 + case OP_63_FNMSUB: 1266 + fpd_fnmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); 1267 + break; 1268 + case OP_63_FNMADD: 1269 + fpd_fnmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); 1270 + break; 1271 + } 1272 + break; 1273 + } 1274 + 1275 + #ifdef DEBUG 1276 + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { 1277 + u32 f; 1278 + cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); 1279 + dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); 1280 + } 1281 + #endif 1282 + 1283 + if (rcomp) 1284 + kvmppc_set_cr(vcpu, cr); 1285 + 1286 + preempt_enable(); 1287 + 1288 + return emulated; 1289 + }

+259

arch/powerpc/kvm/book3s_segment.S

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program; if not, write to the Free Software 13 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 + * 15 + * Copyright SUSE Linux Products GmbH 2010 16 + * 17 + * Authors: Alexander Graf <agraf@suse.de> 18 + */ 19 + 20 + /* Real mode helpers */ 21 + 22 + #if defined(CONFIG_PPC_BOOK3S_64) 23 + 24 + #define GET_SHADOW_VCPU(reg) \ 25 + addi reg, r13, PACA_KVM_SVCPU 26 + 27 + #elif defined(CONFIG_PPC_BOOK3S_32) 28 + 29 + #define GET_SHADOW_VCPU(reg) \ 30 + tophys(reg, r2); \ 31 + lwz reg, (THREAD + THREAD_KVM_SVCPU)(reg); \ 32 + tophys(reg, reg) 33 + 34 + #endif 35 + 36 + /* Disable for nested KVM */ 37 + #define USE_QUICK_LAST_INST 38 + 39 + 40 + /* Get helper functions for subarch specific functionality */ 41 + 42 + #if defined(CONFIG_PPC_BOOK3S_64) 43 + #include "book3s_64_slb.S" 44 + #elif defined(CONFIG_PPC_BOOK3S_32) 45 + #include "book3s_32_sr.S" 46 + #endif 47 + 48 + /****************************************************************************** 49 + * * 50 + * Entry code * 51 + * * 52 + *****************************************************************************/ 53 + 54 + .global kvmppc_handler_trampoline_enter 55 + kvmppc_handler_trampoline_enter: 56 + 57 + /* Required state: 58 + * 59 + * MSR = ~IR|DR 60 + * R13 = PACA 61 + * R1 = host R1 62 + * R2 = host R2 63 + * R10 = guest MSR 64 + * all other volatile GPRS = free 65 + * SVCPU[CR] = guest CR 66 + * SVCPU[XER] = guest XER 67 + * SVCPU[CTR] = guest CTR 68 + * SVCPU[LR] = guest LR 69 + */ 70 + 71 + /* r3 = shadow vcpu */ 72 + GET_SHADOW_VCPU(r3) 73 + 74 + /* Move SRR0 and SRR1 into the respective regs */ 75 + PPC_LL r9, SVCPU_PC(r3) 76 + mtsrr0 r9 77 + mtsrr1 r10 78 + 79 + /* Activate guest mode, so faults get handled by KVM */ 80 + li r11, KVM_GUEST_MODE_GUEST 81 + stb r11, SVCPU_IN_GUEST(r3) 82 + 83 + /* Switch to guest segment. This is subarch specific. */ 84 + LOAD_GUEST_SEGMENTS 85 + 86 + /* Enter guest */ 87 + 88 + PPC_LL r4, (SVCPU_CTR)(r3) 89 + PPC_LL r5, (SVCPU_LR)(r3) 90 + lwz r6, (SVCPU_CR)(r3) 91 + lwz r7, (SVCPU_XER)(r3) 92 + 93 + mtctr r4 94 + mtlr r5 95 + mtcr r6 96 + mtxer r7 97 + 98 + PPC_LL r0, (SVCPU_R0)(r3) 99 + PPC_LL r1, (SVCPU_R1)(r3) 100 + PPC_LL r2, (SVCPU_R2)(r3) 101 + PPC_LL r4, (SVCPU_R4)(r3) 102 + PPC_LL r5, (SVCPU_R5)(r3) 103 + PPC_LL r6, (SVCPU_R6)(r3) 104 + PPC_LL r7, (SVCPU_R7)(r3) 105 + PPC_LL r8, (SVCPU_R8)(r3) 106 + PPC_LL r9, (SVCPU_R9)(r3) 107 + PPC_LL r10, (SVCPU_R10)(r3) 108 + PPC_LL r11, (SVCPU_R11)(r3) 109 + PPC_LL r12, (SVCPU_R12)(r3) 110 + PPC_LL r13, (SVCPU_R13)(r3) 111 + 112 + PPC_LL r3, (SVCPU_R3)(r3) 113 + 114 + RFI 115 + kvmppc_handler_trampoline_enter_end: 116 + 117 + 118 + 119 + /****************************************************************************** 120 + * * 121 + * Exit code * 122 + * * 123 + *****************************************************************************/ 124 + 125 + .global kvmppc_handler_trampoline_exit 126 + kvmppc_handler_trampoline_exit: 127 + 128 + /* Register usage at this point: 129 + * 130 + * SPRG_SCRATCH0 = guest R13 131 + * R12 = exit handler id 132 + * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64] 133 + * SVCPU.SCRATCH0 = guest R12 134 + * SVCPU.SCRATCH1 = guest CR 135 + * 136 + */ 137 + 138 + /* Save registers */ 139 + 140 + PPC_STL r0, (SHADOW_VCPU_OFF + SVCPU_R0)(r13) 141 + PPC_STL r1, (SHADOW_VCPU_OFF + SVCPU_R1)(r13) 142 + PPC_STL r2, (SHADOW_VCPU_OFF + SVCPU_R2)(r13) 143 + PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_R3)(r13) 144 + PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_R4)(r13) 145 + PPC_STL r5, (SHADOW_VCPU_OFF + SVCPU_R5)(r13) 146 + PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_R6)(r13) 147 + PPC_STL r7, (SHADOW_VCPU_OFF + SVCPU_R7)(r13) 148 + PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R8)(r13) 149 + PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R9)(r13) 150 + PPC_STL r10, (SHADOW_VCPU_OFF + SVCPU_R10)(r13) 151 + PPC_STL r11, (SHADOW_VCPU_OFF + SVCPU_R11)(r13) 152 + 153 + /* Restore R1/R2 so we can handle faults */ 154 + PPC_LL r1, (SHADOW_VCPU_OFF + SVCPU_HOST_R1)(r13) 155 + PPC_LL r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13) 156 + 157 + /* Save guest PC and MSR */ 158 + mfsrr0 r3 159 + mfsrr1 r4 160 + 161 + PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_PC)(r13) 162 + PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13) 163 + 164 + /* Get scratch'ed off registers */ 165 + mfspr r9, SPRN_SPRG_SCRATCH0 166 + PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) 167 + lwz r7, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) 168 + 169 + PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R13)(r13) 170 + PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R12)(r13) 171 + stw r7, (SHADOW_VCPU_OFF + SVCPU_CR)(r13) 172 + 173 + /* Save more register state */ 174 + 175 + mfxer r5 176 + mfdar r6 177 + mfdsisr r7 178 + mfctr r8 179 + mflr r9 180 + 181 + stw r5, (SHADOW_VCPU_OFF + SVCPU_XER)(r13) 182 + PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_FAULT_DAR)(r13) 183 + stw r7, (SHADOW_VCPU_OFF + SVCPU_FAULT_DSISR)(r13) 184 + PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_CTR)(r13) 185 + PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_LR)(r13) 186 + 187 + /* 188 + * In order for us to easily get the last instruction, 189 + * we got the #vmexit at, we exploit the fact that the 190 + * virtual layout is still the same here, so we can just 191 + * ld from the guest's PC address 192 + */ 193 + 194 + /* We only load the last instruction when it's safe */ 195 + cmpwi r12, BOOK3S_INTERRUPT_DATA_STORAGE 196 + beq ld_last_inst 197 + cmpwi r12, BOOK3S_INTERRUPT_PROGRAM 198 + beq ld_last_inst 199 + cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT 200 + beq- ld_last_inst 201 + 202 + b no_ld_last_inst 203 + 204 + ld_last_inst: 205 + /* Save off the guest instruction we're at */ 206 + 207 + /* In case lwz faults */ 208 + li r0, KVM_INST_FETCH_FAILED 209 + 210 + #ifdef USE_QUICK_LAST_INST 211 + 212 + /* Set guest mode to 'jump over instruction' so if lwz faults 213 + * we'll just continue at the next IP. */ 214 + li r9, KVM_GUEST_MODE_SKIP 215 + stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) 216 + 217 + /* 1) enable paging for data */ 218 + mfmsr r9 219 + ori r11, r9, MSR_DR /* Enable paging for data */ 220 + mtmsr r11 221 + sync 222 + /* 2) fetch the instruction */ 223 + lwz r0, 0(r3) 224 + /* 3) disable paging again */ 225 + mtmsr r9 226 + sync 227 + 228 + #endif 229 + stw r0, (SHADOW_VCPU_OFF + SVCPU_LAST_INST)(r13) 230 + 231 + no_ld_last_inst: 232 + 233 + /* Unset guest mode */ 234 + li r9, KVM_GUEST_MODE_NONE 235 + stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) 236 + 237 + /* Switch back to host MMU */ 238 + LOAD_HOST_SEGMENTS 239 + 240 + /* Register usage at this point: 241 + * 242 + * R1 = host R1 243 + * R2 = host R2 244 + * R12 = exit handler id 245 + * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64] 246 + * SVCPU.* = guest * 247 + * 248 + */ 249 + 250 + /* RFI into the highmem handler */ 251 + mfmsr r7 252 + ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */ 253 + mtsrr1 r7 254 + /* Load highmem handler address */ 255 + PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_VMHANDLER)(r13) 256 + mtsrr0 r8 257 + 258 + RFI 259 + kvmppc_handler_trampoline_exit_end:

+20 -1

arch/powerpc/kvm/booke.c

··· 133 133 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); 134 134 } 135 135 136 + void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, 137 + struct kvm_interrupt *irq) 138 + { 139 + clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); 140 + } 141 + 136 142 /* Deliver the interrupt of the corresponding priority, if possible. */ 137 143 static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, 138 144 unsigned int priority) ··· 485 479 { 486 480 int i; 487 481 482 + vcpu_load(vcpu); 483 + 488 484 regs->pc = vcpu->arch.pc; 489 485 regs->cr = kvmppc_get_cr(vcpu); 490 486 regs->ctr = vcpu->arch.ctr; ··· 507 499 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 508 500 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 509 501 502 + vcpu_put(vcpu); 503 + 510 504 return 0; 511 505 } 512 506 513 507 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 514 508 { 515 509 int i; 510 + 511 + vcpu_load(vcpu); 516 512 517 513 vcpu->arch.pc = regs->pc; 518 514 kvmppc_set_cr(vcpu, regs->cr); ··· 536 524 537 525 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 538 526 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 527 + 528 + vcpu_put(vcpu); 539 529 540 530 return 0; 541 531 } ··· 567 553 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 568 554 struct kvm_translation *tr) 569 555 { 570 - return kvmppc_core_vcpu_translate(vcpu, tr); 556 + int r; 557 + 558 + vcpu_load(vcpu); 559 + r = kvmppc_core_vcpu_translate(vcpu, tr); 560 + vcpu_put(vcpu); 561 + return r; 571 562 } 572 563 573 564 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)

+1 -1

arch/powerpc/kvm/e500.c

··· 161 161 flush_icache_range(kvmppc_booke_handlers, 162 162 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); 163 163 164 - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE); 164 + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); 165 165 } 166 166 167 167 static void __init kvmppc_e500_exit(void)

+44 -11

arch/powerpc/kvm/emulate.c

··· 38 38 #define OP_31_XOP_LBZX 87 39 39 #define OP_31_XOP_STWX 151 40 40 #define OP_31_XOP_STBX 215 41 + #define OP_31_XOP_LBZUX 119 41 42 #define OP_31_XOP_STBUX 247 42 43 #define OP_31_XOP_LHZX 279 43 44 #define OP_31_XOP_LHZUX 311 44 45 #define OP_31_XOP_MFSPR 339 46 + #define OP_31_XOP_LHAX 343 45 47 #define OP_31_XOP_STHX 407 46 48 #define OP_31_XOP_STHUX 439 47 49 #define OP_31_XOP_MTSPR 467 ··· 64 62 #define OP_STBU 39 65 63 #define OP_LHZ 40 66 64 #define OP_LHZU 41 65 + #define OP_LHA 42 66 + #define OP_LHAU 43 67 67 #define OP_STH 44 68 68 #define OP_STHU 45 69 69 70 - #ifdef CONFIG_PPC64 70 + #ifdef CONFIG_PPC_BOOK3S 71 71 static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu) 72 72 { 73 73 return 1; ··· 86 82 unsigned long dec_nsec; 87 83 88 84 pr_debug("mtDEC: %x\n", vcpu->arch.dec); 89 - #ifdef CONFIG_PPC64 85 + #ifdef CONFIG_PPC_BOOK3S 90 86 /* mtdec lowers the interrupt line when positive. */ 91 87 kvmppc_core_dequeue_dec(vcpu); 92 88 ··· 132 128 * from opcode tables in the future. */ 133 129 int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 134 130 { 135 - u32 inst = vcpu->arch.last_inst; 131 + u32 inst = kvmppc_get_last_inst(vcpu); 136 132 u32 ea; 137 133 int ra; 138 134 int rb; ··· 147 143 148 144 pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); 149 145 150 - /* Try again next time */ 151 - if (inst == KVM_INST_FETCH_FAILED) 152 - return EMULATE_DONE; 153 - 154 146 switch (get_op(inst)) { 155 147 case OP_TRAP: 156 - #ifdef CONFIG_PPC64 148 + #ifdef CONFIG_PPC_BOOK3S 157 149 case OP_TRAP_64: 158 150 kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); 159 151 #else ··· 169 169 case OP_31_XOP_LBZX: 170 170 rt = get_rt(inst); 171 171 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 172 + break; 173 + 174 + case OP_31_XOP_LBZUX: 175 + rt = get_rt(inst); 176 + ra = get_ra(inst); 177 + rb = get_rb(inst); 178 + 179 + ea = kvmppc_get_gpr(vcpu, rb); 180 + if (ra) 181 + ea += kvmppc_get_gpr(vcpu, ra); 182 + 183 + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 184 + kvmppc_set_gpr(vcpu, ra, ea); 172 185 break; 173 186 174 187 case OP_31_XOP_STWX: ··· 211 198 kvmppc_get_gpr(vcpu, rs), 212 199 1, 1); 213 200 kvmppc_set_gpr(vcpu, rs, ea); 201 + break; 202 + 203 + case OP_31_XOP_LHAX: 204 + rt = get_rt(inst); 205 + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); 214 206 break; 215 207 216 208 case OP_31_XOP_LHZX: ··· 468 450 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 469 451 break; 470 452 453 + case OP_LHA: 454 + rt = get_rt(inst); 455 + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); 456 + break; 457 + 458 + case OP_LHAU: 459 + ra = get_ra(inst); 460 + rt = get_rt(inst); 461 + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); 462 + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 463 + break; 464 + 471 465 case OP_STH: 472 466 rs = get_rs(inst); 473 467 emulated = kvmppc_handle_store(run, vcpu, ··· 502 472 503 473 if (emulated == EMULATE_FAIL) { 504 474 emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); 505 - if (emulated == EMULATE_FAIL) { 475 + if (emulated == EMULATE_AGAIN) { 476 + advance = 0; 477 + } else if (emulated == EMULATE_FAIL) { 506 478 advance = 0; 507 479 printk(KERN_ERR "Couldn't emulate instruction 0x%08x " 508 480 "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); ··· 512 480 } 513 481 } 514 482 515 - trace_kvm_ppc_instr(inst, vcpu->arch.pc, emulated); 483 + trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated); 516 484 485 + /* Advance past emulated instruction. */ 517 486 if (advance) 518 - vcpu->arch.pc += 4; /* Advance past emulated instruction. */ 487 + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); 519 488 520 489 return emulated; 521 490 }

+273

arch/powerpc/kvm/fpu.S

··· 1 + /* 2 + * FPU helper code to use FPU operations from inside the kernel 3 + * 4 + * Copyright (C) 2010 Alexander Graf (agraf@suse.de) 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 9 + * 2 of the License, or (at your option) any later version. 10 + * 11 + */ 12 + 13 + #include <asm/reg.h> 14 + #include <asm/page.h> 15 + #include <asm/mmu.h> 16 + #include <asm/pgtable.h> 17 + #include <asm/cputable.h> 18 + #include <asm/cache.h> 19 + #include <asm/thread_info.h> 20 + #include <asm/ppc_asm.h> 21 + #include <asm/asm-offsets.h> 22 + 23 + /* Instructions operating on single parameters */ 24 + 25 + /* 26 + * Single operation with one input operand 27 + * 28 + * R3 = (double*)&fpscr 29 + * R4 = (short*)&result 30 + * R5 = (short*)&param1 31 + */ 32 + #define FPS_ONE_IN(name) \ 33 + _GLOBAL(fps_ ## name); \ 34 + lfd 0,0(r3); /* load up fpscr value */ \ 35 + MTFSF_L(0); \ 36 + lfs 0,0(r5); \ 37 + \ 38 + name 0,0; \ 39 + \ 40 + stfs 0,0(r4); \ 41 + mffs 0; \ 42 + stfd 0,0(r3); /* save new fpscr value */ \ 43 + blr 44 + 45 + /* 46 + * Single operation with two input operands 47 + * 48 + * R3 = (double*)&fpscr 49 + * R4 = (short*)&result 50 + * R5 = (short*)&param1 51 + * R6 = (short*)&param2 52 + */ 53 + #define FPS_TWO_IN(name) \ 54 + _GLOBAL(fps_ ## name); \ 55 + lfd 0,0(r3); /* load up fpscr value */ \ 56 + MTFSF_L(0); \ 57 + lfs 0,0(r5); \ 58 + lfs 1,0(r6); \ 59 + \ 60 + name 0,0,1; \ 61 + \ 62 + stfs 0,0(r4); \ 63 + mffs 0; \ 64 + stfd 0,0(r3); /* save new fpscr value */ \ 65 + blr 66 + 67 + /* 68 + * Single operation with three input operands 69 + * 70 + * R3 = (double*)&fpscr 71 + * R4 = (short*)&result 72 + * R5 = (short*)&param1 73 + * R6 = (short*)&param2 74 + * R7 = (short*)&param3 75 + */ 76 + #define FPS_THREE_IN(name) \ 77 + _GLOBAL(fps_ ## name); \ 78 + lfd 0,0(r3); /* load up fpscr value */ \ 79 + MTFSF_L(0); \ 80 + lfs 0,0(r5); \ 81 + lfs 1,0(r6); \ 82 + lfs 2,0(r7); \ 83 + \ 84 + name 0,0,1,2; \ 85 + \ 86 + stfs 0,0(r4); \ 87 + mffs 0; \ 88 + stfd 0,0(r3); /* save new fpscr value */ \ 89 + blr 90 + 91 + FPS_ONE_IN(fres) 92 + FPS_ONE_IN(frsqrte) 93 + FPS_ONE_IN(fsqrts) 94 + FPS_TWO_IN(fadds) 95 + FPS_TWO_IN(fdivs) 96 + FPS_TWO_IN(fmuls) 97 + FPS_TWO_IN(fsubs) 98 + FPS_THREE_IN(fmadds) 99 + FPS_THREE_IN(fmsubs) 100 + FPS_THREE_IN(fnmadds) 101 + FPS_THREE_IN(fnmsubs) 102 + FPS_THREE_IN(fsel) 103 + 104 + 105 + /* Instructions operating on double parameters */ 106 + 107 + /* 108 + * Beginning of double instruction processing 109 + * 110 + * R3 = (double*)&fpscr 111 + * R4 = (u32*)&cr 112 + * R5 = (double*)&result 113 + * R6 = (double*)&param1 114 + * R7 = (double*)&param2 [load_two] 115 + * R8 = (double*)&param3 [load_three] 116 + * LR = instruction call function 117 + */ 118 + fpd_load_three: 119 + lfd 2,0(r8) /* load param3 */ 120 + fpd_load_two: 121 + lfd 1,0(r7) /* load param2 */ 122 + fpd_load_one: 123 + lfd 0,0(r6) /* load param1 */ 124 + fpd_load_none: 125 + lfd 3,0(r3) /* load up fpscr value */ 126 + MTFSF_L(3) 127 + lwz r6, 0(r4) /* load cr */ 128 + mtcr r6 129 + blr 130 + 131 + /* 132 + * End of double instruction processing 133 + * 134 + * R3 = (double*)&fpscr 135 + * R4 = (u32*)&cr 136 + * R5 = (double*)&result 137 + * LR = caller of instruction call function 138 + */ 139 + fpd_return: 140 + mfcr r6 141 + stfd 0,0(r5) /* save result */ 142 + mffs 0 143 + stfd 0,0(r3) /* save new fpscr value */ 144 + stw r6,0(r4) /* save new cr value */ 145 + blr 146 + 147 + /* 148 + * Double operation with no input operand 149 + * 150 + * R3 = (double*)&fpscr 151 + * R4 = (u32*)&cr 152 + * R5 = (double*)&result 153 + */ 154 + #define FPD_NONE_IN(name) \ 155 + _GLOBAL(fpd_ ## name); \ 156 + mflr r12; \ 157 + bl fpd_load_none; \ 158 + mtlr r12; \ 159 + \ 160 + name. 0; /* call instruction */ \ 161 + b fpd_return 162 + 163 + /* 164 + * Double operation with one input operand 165 + * 166 + * R3 = (double*)&fpscr 167 + * R4 = (u32*)&cr 168 + * R5 = (double*)&result 169 + * R6 = (double*)&param1 170 + */ 171 + #define FPD_ONE_IN(name) \ 172 + _GLOBAL(fpd_ ## name); \ 173 + mflr r12; \ 174 + bl fpd_load_one; \ 175 + mtlr r12; \ 176 + \ 177 + name. 0,0; /* call instruction */ \ 178 + b fpd_return 179 + 180 + /* 181 + * Double operation with two input operands 182 + * 183 + * R3 = (double*)&fpscr 184 + * R4 = (u32*)&cr 185 + * R5 = (double*)&result 186 + * R6 = (double*)&param1 187 + * R7 = (double*)&param2 188 + * R8 = (double*)&param3 189 + */ 190 + #define FPD_TWO_IN(name) \ 191 + _GLOBAL(fpd_ ## name); \ 192 + mflr r12; \ 193 + bl fpd_load_two; \ 194 + mtlr r12; \ 195 + \ 196 + name. 0,0,1; /* call instruction */ \ 197 + b fpd_return 198 + 199 + /* 200 + * CR Double operation with two input operands 201 + * 202 + * R3 = (double*)&fpscr 203 + * R4 = (u32*)&cr 204 + * R5 = (double*)&param1 205 + * R6 = (double*)&param2 206 + * R7 = (double*)&param3 207 + */ 208 + #define FPD_TWO_IN_CR(name) \ 209 + _GLOBAL(fpd_ ## name); \ 210 + lfd 1,0(r6); /* load param2 */ \ 211 + lfd 0,0(r5); /* load param1 */ \ 212 + lfd 3,0(r3); /* load up fpscr value */ \ 213 + MTFSF_L(3); \ 214 + lwz r6, 0(r4); /* load cr */ \ 215 + mtcr r6; \ 216 + \ 217 + name 0,0,1; /* call instruction */ \ 218 + mfcr r6; \ 219 + mffs 0; \ 220 + stfd 0,0(r3); /* save new fpscr value */ \ 221 + stw r6,0(r4); /* save new cr value */ \ 222 + blr 223 + 224 + /* 225 + * Double operation with three input operands 226 + * 227 + * R3 = (double*)&fpscr 228 + * R4 = (u32*)&cr 229 + * R5 = (double*)&result 230 + * R6 = (double*)&param1 231 + * R7 = (double*)&param2 232 + * R8 = (double*)&param3 233 + */ 234 + #define FPD_THREE_IN(name) \ 235 + _GLOBAL(fpd_ ## name); \ 236 + mflr r12; \ 237 + bl fpd_load_three; \ 238 + mtlr r12; \ 239 + \ 240 + name. 0,0,1,2; /* call instruction */ \ 241 + b fpd_return 242 + 243 + FPD_ONE_IN(fsqrts) 244 + FPD_ONE_IN(frsqrtes) 245 + FPD_ONE_IN(fres) 246 + FPD_ONE_IN(frsp) 247 + FPD_ONE_IN(fctiw) 248 + FPD_ONE_IN(fctiwz) 249 + FPD_ONE_IN(fsqrt) 250 + FPD_ONE_IN(fre) 251 + FPD_ONE_IN(frsqrte) 252 + FPD_ONE_IN(fneg) 253 + FPD_ONE_IN(fabs) 254 + FPD_TWO_IN(fadds) 255 + FPD_TWO_IN(fsubs) 256 + FPD_TWO_IN(fdivs) 257 + FPD_TWO_IN(fmuls) 258 + FPD_TWO_IN_CR(fcmpu) 259 + FPD_TWO_IN(fcpsgn) 260 + FPD_TWO_IN(fdiv) 261 + FPD_TWO_IN(fadd) 262 + FPD_TWO_IN(fmul) 263 + FPD_TWO_IN_CR(fcmpo) 264 + FPD_TWO_IN(fsub) 265 + FPD_THREE_IN(fmsubs) 266 + FPD_THREE_IN(fmadds) 267 + FPD_THREE_IN(fnmsubs) 268 + FPD_THREE_IN(fnmadds) 269 + FPD_THREE_IN(fsel) 270 + FPD_THREE_IN(fmsub) 271 + FPD_THREE_IN(fmadd) 272 + FPD_THREE_IN(fnmsub) 273 + FPD_THREE_IN(fnmadd)

+105 -5

arch/powerpc/kvm/powerpc.c

··· 70 70 case EMULATE_FAIL: 71 71 /* XXX Deliver Program interrupt to guest. */ 72 72 printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, 73 - vcpu->arch.last_inst); 73 + kvmppc_get_last_inst(vcpu)); 74 74 r = RESUME_HOST; 75 75 break; 76 76 default: ··· 148 148 149 149 switch (ext) { 150 150 case KVM_CAP_PPC_SEGSTATE: 151 + case KVM_CAP_PPC_PAIRED_SINGLES: 152 + case KVM_CAP_PPC_UNSET_IRQ: 153 + case KVM_CAP_ENABLE_CAP: 154 + case KVM_CAP_PPC_OSI: 151 155 r = 1; 152 156 break; 153 157 case KVM_CAP_COALESCED_MMIO: ··· 197 193 { 198 194 struct kvm_vcpu *vcpu; 199 195 vcpu = kvmppc_core_vcpu_create(kvm, id); 200 - kvmppc_create_vcpu_debugfs(vcpu, id); 196 + if (!IS_ERR(vcpu)) 197 + kvmppc_create_vcpu_debugfs(vcpu, id); 201 198 return vcpu; 202 199 } 203 200 204 201 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 205 202 { 203 + /* Make sure we're not using the vcpu anymore */ 204 + hrtimer_cancel(&vcpu->arch.dec_timer); 205 + tasklet_kill(&vcpu->arch.tasklet); 206 + 206 207 kvmppc_remove_vcpu_debugfs(vcpu); 207 208 kvmppc_core_vcpu_free(vcpu); 208 209 } ··· 287 278 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 288 279 struct kvm_run *run) 289 280 { 290 - ulong gpr; 281 + u64 gpr; 291 282 292 283 if (run->mmio.len > sizeof(gpr)) { 293 284 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); ··· 296 287 297 288 if (vcpu->arch.mmio_is_bigendian) { 298 289 switch (run->mmio.len) { 290 + case 8: gpr = *(u64 *)run->mmio.data; break; 299 291 case 4: gpr = *(u32 *)run->mmio.data; break; 300 292 case 2: gpr = *(u16 *)run->mmio.data; break; 301 293 case 1: gpr = *(u8 *)run->mmio.data; break; ··· 310 300 } 311 301 } 312 302 303 + if (vcpu->arch.mmio_sign_extend) { 304 + switch (run->mmio.len) { 305 + #ifdef CONFIG_PPC64 306 + case 4: 307 + gpr = (s64)(s32)gpr; 308 + break; 309 + #endif 310 + case 2: 311 + gpr = (s64)(s16)gpr; 312 + break; 313 + case 1: 314 + gpr = (s64)(s8)gpr; 315 + break; 316 + } 317 + } 318 + 313 319 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); 320 + 321 + switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) { 322 + case KVM_REG_GPR: 323 + kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); 324 + break; 325 + case KVM_REG_FPR: 326 + vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; 327 + break; 328 + #ifdef CONFIG_PPC_BOOK3S 329 + case KVM_REG_QPR: 330 + vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; 331 + break; 332 + case KVM_REG_FQPR: 333 + vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; 334 + vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; 335 + break; 336 + #endif 337 + default: 338 + BUG(); 339 + } 314 340 } 315 341 316 342 int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, ··· 365 319 vcpu->arch.mmio_is_bigendian = is_bigendian; 366 320 vcpu->mmio_needed = 1; 367 321 vcpu->mmio_is_write = 0; 322 + vcpu->arch.mmio_sign_extend = 0; 368 323 369 324 return EMULATE_DO_MMIO; 370 325 } 371 326 327 + /* Same as above, but sign extends */ 328 + int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, 329 + unsigned int rt, unsigned int bytes, int is_bigendian) 330 + { 331 + int r; 332 + 333 + r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian); 334 + vcpu->arch.mmio_sign_extend = 1; 335 + 336 + return r; 337 + } 338 + 372 339 int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, 373 - u32 val, unsigned int bytes, int is_bigendian) 340 + u64 val, unsigned int bytes, int is_bigendian) 374 341 { 375 342 void *data = run->mmio.data; 376 343 ··· 401 342 /* Store the value at the lowest bytes in 'data'. */ 402 343 if (is_bigendian) { 403 344 switch (bytes) { 345 + case 8: *(u64 *)data = val; break; 404 346 case 4: *(u32 *)data = val; break; 405 347 case 2: *(u16 *)data = val; break; 406 348 case 1: *(u8 *)data = val; break; ··· 436 376 if (!vcpu->arch.dcr_is_write) 437 377 kvmppc_complete_dcr_load(vcpu, run); 438 378 vcpu->arch.dcr_needed = 0; 379 + } else if (vcpu->arch.osi_needed) { 380 + u64 *gprs = run->osi.gprs; 381 + int i; 382 + 383 + for (i = 0; i < 32; i++) 384 + kvmppc_set_gpr(vcpu, i, gprs[i]); 385 + vcpu->arch.osi_needed = 0; 439 386 } 440 387 441 388 kvmppc_core_deliver_interrupts(vcpu); ··· 463 396 464 397 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 465 398 { 466 - kvmppc_core_queue_external(vcpu, irq); 399 + if (irq->irq == KVM_INTERRUPT_UNSET) 400 + kvmppc_core_dequeue_external(vcpu, irq); 401 + else 402 + kvmppc_core_queue_external(vcpu, irq); 467 403 468 404 if (waitqueue_active(&vcpu->wq)) { 469 405 wake_up_interruptible(&vcpu->wq); ··· 474 404 } 475 405 476 406 return 0; 407 + } 408 + 409 + static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 410 + struct kvm_enable_cap *cap) 411 + { 412 + int r; 413 + 414 + if (cap->flags) 415 + return -EINVAL; 416 + 417 + switch (cap->cap) { 418 + case KVM_CAP_PPC_OSI: 419 + r = 0; 420 + vcpu->arch.osi_enabled = true; 421 + break; 422 + default: 423 + r = -EINVAL; 424 + break; 425 + } 426 + 427 + return r; 477 428 } 478 429 479 430 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, ··· 523 432 if (copy_from_user(&irq, argp, sizeof(irq))) 524 433 goto out; 525 434 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); 435 + break; 436 + } 437 + case KVM_ENABLE_CAP: 438 + { 439 + struct kvm_enable_cap cap; 440 + r = -EFAULT; 441 + if (copy_from_user(&cap, argp, sizeof(cap))) 442 + goto out; 443 + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 526 444 break; 527 445 } 528 446 default:

+22 -7

arch/powerpc/mm/mmu_context_hash32.c

··· 60 60 static unsigned long next_mmu_context; 61 61 static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; 62 62 63 - 64 - /* 65 - * Set up the context for a new address space. 66 - */ 67 - int init_new_context(struct task_struct *t, struct mm_struct *mm) 63 + unsigned long __init_new_context(void) 68 64 { 69 65 unsigned long ctx = next_mmu_context; 70 66 ··· 70 74 ctx = 0; 71 75 } 72 76 next_mmu_context = (ctx + 1) & LAST_CONTEXT; 73 - mm->context.id = ctx; 77 + 78 + return ctx; 79 + } 80 + EXPORT_SYMBOL_GPL(__init_new_context); 81 + 82 + /* 83 + * Set up the context for a new address space. 84 + */ 85 + int init_new_context(struct task_struct *t, struct mm_struct *mm) 86 + { 87 + mm->context.id = __init_new_context(); 74 88 75 89 return 0; 76 90 } 91 + 92 + /* 93 + * Free a context ID. Make sure to call this with preempt disabled! 94 + */ 95 + void __destroy_context(unsigned long ctx) 96 + { 97 + clear_bit(ctx, context_map); 98 + } 99 + EXPORT_SYMBOL_GPL(__destroy_context); 77 100 78 101 /* 79 102 * We're finished using the context for an address space. ··· 101 86 { 102 87 preempt_disable(); 103 88 if (mm->context.id != NO_CONTEXT) { 104 - clear_bit(mm->context.id, context_map); 89 + __destroy_context(mm->context.id); 105 90 mm->context.id = NO_CONTEXT; 106 91 } 107 92 preempt_enable();

+4 -2

arch/s390/kvm/kvm-s390.c

··· 341 341 342 342 rc = kvm_vcpu_init(vcpu, kvm, id); 343 343 if (rc) 344 - goto out_free_cpu; 344 + goto out_free_sie_block; 345 345 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, 346 346 vcpu->arch.sie_block); 347 347 348 348 return vcpu; 349 + out_free_sie_block: 350 + free_page((unsigned long)(vcpu->arch.sie_block)); 349 351 out_free_cpu: 350 352 kfree(vcpu); 351 353 out_nomem: ··· 752 750 static int __init kvm_s390_init(void) 753 751 { 754 752 int ret; 755 - ret = kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); 753 + ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 756 754 if (ret) 757 755 return ret; 758 756

+1 -1

arch/s390/kvm/kvm-s390.h

··· 72 72 struct kvm_memslots *memslots; 73 73 74 74 idx = srcu_read_lock(&vcpu->kvm->srcu); 75 - memslots = rcu_dereference(vcpu->kvm->memslots); 75 + memslots = kvm_memslots(vcpu->kvm); 76 76 77 77 mem = &memslots->memslots[0]; 78 78

+16 -1

arch/x86/include/asm/kvm.h

··· 21 21 #define __KVM_HAVE_PIT_STATE2 22 22 #define __KVM_HAVE_XEN_HVM 23 23 #define __KVM_HAVE_VCPU_EVENTS 24 + #define __KVM_HAVE_DEBUGREGS 24 25 25 26 /* Architectural interrupt line count. */ 26 27 #define KVM_NR_INTERRUPTS 256 ··· 258 257 /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ 259 258 #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 260 259 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 260 + #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 261 + 262 + /* Interrupt shadow states */ 263 + #define KVM_X86_SHADOW_INT_MOV_SS 0x01 264 + #define KVM_X86_SHADOW_INT_STI 0x02 261 265 262 266 /* for KVM_GET/SET_VCPU_EVENTS */ 263 267 struct kvm_vcpu_events { ··· 277 271 __u8 injected; 278 272 __u8 nr; 279 273 __u8 soft; 280 - __u8 pad; 274 + __u8 shadow; 281 275 } interrupt; 282 276 struct { 283 277 __u8 injected; ··· 288 282 __u32 sipi_vector; 289 283 __u32 flags; 290 284 __u32 reserved[10]; 285 + }; 286 + 287 + /* for KVM_GET/SET_DEBUGREGS */ 288 + struct kvm_debugregs { 289 + __u64 db[4]; 290 + __u64 dr6; 291 + __u64 dr7; 292 + __u64 flags; 293 + __u64 reserved[9]; 291 294 }; 292 295 293 296 #endif /* _ASM_X86_KVM_H */

+42 -4

arch/x86/include/asm/kvm_emulate.h

··· 11 11 #ifndef _ASM_X86_KVM_X86_EMULATE_H 12 12 #define _ASM_X86_KVM_X86_EMULATE_H 13 13 14 + #include <asm/desc_defs.h> 15 + 14 16 struct x86_emulate_ctxt; 15 17 16 18 /* ··· 65 63 unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); 66 64 67 65 /* 66 + * write_std: Write bytes of standard (non-emulated/special) memory. 67 + * Used for descriptor writing. 68 + * @addr: [IN ] Linear address to which to write. 69 + * @val: [OUT] Value write to memory, zero-extended to 'u_long'. 70 + * @bytes: [IN ] Number of bytes to write to memory. 71 + */ 72 + int (*write_std)(unsigned long addr, void *val, 73 + unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); 74 + /* 68 75 * fetch: Read bytes of standard (non-emulated/special) memory. 69 76 * Used for instruction fetch. 70 77 * @addr: [IN ] Linear address from which to read. ··· 120 109 unsigned int bytes, 121 110 struct kvm_vcpu *vcpu); 122 111 112 + int (*pio_in_emulated)(int size, unsigned short port, void *val, 113 + unsigned int count, struct kvm_vcpu *vcpu); 114 + 115 + int (*pio_out_emulated)(int size, unsigned short port, const void *val, 116 + unsigned int count, struct kvm_vcpu *vcpu); 117 + 118 + bool (*get_cached_descriptor)(struct desc_struct *desc, 119 + int seg, struct kvm_vcpu *vcpu); 120 + void (*set_cached_descriptor)(struct desc_struct *desc, 121 + int seg, struct kvm_vcpu *vcpu); 122 + u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); 123 + void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); 124 + void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); 125 + ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); 126 + void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); 127 + int (*cpl)(struct kvm_vcpu *vcpu); 128 + void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); 123 129 }; 124 130 125 131 /* Type, address-of, and value of an instruction's operand. */ ··· 149 121 struct fetch_cache { 150 122 u8 data[15]; 151 123 unsigned long start; 124 + unsigned long end; 125 + }; 126 + 127 + struct read_cache { 128 + u8 data[1024]; 129 + unsigned long pos; 152 130 unsigned long end; 153 131 }; 154 132 ··· 173 139 u8 seg_override; 174 140 unsigned int d; 175 141 unsigned long regs[NR_VCPU_REGS]; 176 - unsigned long eip, eip_orig; 142 + unsigned long eip; 177 143 /* modrm */ 178 144 u8 modrm; 179 145 u8 modrm_mod; ··· 185 151 void *modrm_ptr; 186 152 unsigned long modrm_val; 187 153 struct fetch_cache fetch; 154 + struct read_cache io_read; 188 155 }; 189 - 190 - #define X86_SHADOW_INT_MOV_SS 1 191 - #define X86_SHADOW_INT_STI 2 192 156 193 157 struct x86_emulate_ctxt { 194 158 /* Register state before/after emulation. */ 195 159 struct kvm_vcpu *vcpu; 196 160 197 161 unsigned long eflags; 162 + unsigned long eip; /* eip before instruction emulation */ 198 163 /* Emulated execution mode, represented by an X86EMUL_MODE value. */ 199 164 int mode; 200 165 u32 cs_base; ··· 201 168 /* interruptibility state, as a result of execution of STI or MOV SS */ 202 169 int interruptibility; 203 170 171 + bool restart; /* restart string instruction after writeback */ 204 172 /* decode cache */ 205 173 struct decode_cache decode; 206 174 }; ··· 228 194 struct x86_emulate_ops *ops); 229 195 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, 230 196 struct x86_emulate_ops *ops); 197 + int emulator_task_switch(struct x86_emulate_ctxt *ctxt, 198 + struct x86_emulate_ops *ops, 199 + u16 tss_selector, int reason, 200 + bool has_error_code, u32 error_code); 231 201 232 202 #endif /* _ASM_X86_KVM_X86_EMULATE_H */

+25 -55

arch/x86/include/asm/kvm_host.h

··· 171 171 union kvm_mmu_page_role { 172 172 unsigned word; 173 173 struct { 174 - unsigned glevels:4; 175 174 unsigned level:4; 175 + unsigned cr4_pae:1; 176 176 unsigned quadrant:2; 177 177 unsigned pad_for_nice_hex_output:6; 178 178 unsigned direct:1; 179 179 unsigned access:3; 180 180 unsigned invalid:1; 181 - unsigned cr4_pge:1; 182 181 unsigned nxe:1; 182 + unsigned cr0_wp:1; 183 183 }; 184 184 }; 185 185 186 186 struct kvm_mmu_page { 187 187 struct list_head link; 188 188 struct hlist_node hash_link; 189 - 190 - struct list_head oos_link; 191 189 192 190 /* 193 191 * The following two entries are used to key the shadow page in the ··· 202 204 * in this shadow page. 203 205 */ 204 206 DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); 205 - int multimapped; /* More than one parent_pte? */ 206 - int root_count; /* Currently serving as active root */ 207 + bool multimapped; /* More than one parent_pte? */ 207 208 bool unsync; 209 + int root_count; /* Currently serving as active root */ 208 210 unsigned int unsync_children; 209 211 union { 210 212 u64 *parent_pte; /* !multimapped */ ··· 222 224 223 225 struct kvm_pio_request { 224 226 unsigned long count; 225 - int cur_count; 226 - gva_t guest_gva; 227 227 int in; 228 228 int port; 229 229 int size; 230 - int string; 231 - int down; 232 - int rep; 233 230 }; 234 231 235 232 /* ··· 313 320 struct kvm_queued_exception { 314 321 bool pending; 315 322 bool has_error_code; 323 + bool reinject; 316 324 u8 nr; 317 325 u32 error_code; 318 326 } exception; ··· 356 362 u64 *mce_banks; 357 363 358 364 /* used for guest single stepping over the given code position */ 359 - u16 singlestep_cs; 360 365 unsigned long singlestep_rip; 366 + 361 367 /* fields used by HYPER-V emulation */ 362 368 u64 hv_vapic; 363 369 }; ··· 383 389 unsigned int n_free_mmu_pages; 384 390 unsigned int n_requested_mmu_pages; 385 391 unsigned int n_alloc_mmu_pages; 392 + atomic_t invlpg_counter; 386 393 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; 387 394 /* 388 395 * Hash table of struct kvm_mmu_page. ··· 456 461 u32 nmi_injections; 457 462 }; 458 463 459 - struct descriptor_table { 460 - u16 limit; 461 - unsigned long base; 462 - } __attribute__((packed)); 463 - 464 464 struct kvm_x86_ops { 465 465 int (*cpu_has_kvm_support)(void); /* __init */ 466 466 int (*disabled_by_bios)(void); /* __init */ ··· 493 503 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); 494 504 void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); 495 505 void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); 496 - void (*get_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 497 - void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 498 - void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 499 - void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 500 - int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest); 501 - int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value); 506 + void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 507 + void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 508 + void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 509 + void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 510 + void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); 502 511 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); 503 512 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 504 513 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); ··· 516 527 void (*set_irq)(struct kvm_vcpu *vcpu); 517 528 void (*set_nmi)(struct kvm_vcpu *vcpu); 518 529 void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, 519 - bool has_error_code, u32 error_code); 530 + bool has_error_code, u32 error_code, 531 + bool reinject); 520 532 int (*interrupt_allowed)(struct kvm_vcpu *vcpu); 521 533 int (*nmi_allowed)(struct kvm_vcpu *vcpu); 522 534 bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); ··· 530 540 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); 531 541 int (*get_lpage_level)(void); 532 542 bool (*rdtscp_supported)(void); 543 + 544 + void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); 533 545 534 546 const struct trace_print_flags *exit_reasons_str; 535 547 }; ··· 579 587 void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); 580 588 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); 581 589 void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); 582 - void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, 583 - unsigned long *rflags); 584 590 585 - unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); 586 - void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, 587 - unsigned long *rflags); 588 591 void kvm_enable_efer_bits(u64); 589 592 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); 590 593 int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); 591 594 592 595 struct x86_emulate_ctxt; 593 596 594 - int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, 595 - int size, unsigned port); 596 - int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, 597 - int size, unsigned long count, int down, 598 - gva_t address, int rep, unsigned port); 597 + int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); 599 598 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); 600 599 int kvm_emulate_halt(struct kvm_vcpu *vcpu); 601 600 int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); ··· 599 616 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); 600 617 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); 601 618 602 - int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); 619 + int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, 620 + bool has_error_code, u32 error_code); 603 621 604 622 void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); 605 623 void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); 606 624 void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); 607 625 void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); 626 + int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); 627 + int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); 608 628 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); 609 629 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); 610 630 void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); ··· 620 634 621 635 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); 622 636 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); 637 + void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); 638 + void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); 623 639 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, 624 640 u32 error_code); 625 641 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); ··· 636 648 const void *val, 637 649 unsigned int bytes, 638 650 struct kvm_vcpu *vcpu); 639 - 640 - unsigned long segment_base(u16 selector); 641 651 642 652 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); 643 653 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ··· 661 675 void kvm_enable_tdp(void); 662 676 void kvm_disable_tdp(void); 663 677 664 - int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); 665 678 int complete_pio(struct kvm_vcpu *vcpu); 666 679 bool kvm_check_iopl(struct kvm_vcpu *vcpu); 667 680 ··· 707 722 static inline void kvm_load_ldt(u16 sel) 708 723 { 709 724 asm("lldt %0" : : "rm"(sel)); 710 - } 711 - 712 - static inline void kvm_get_idt(struct descriptor_table *table) 713 - { 714 - asm("sidt %0" : "=m"(*table)); 715 - } 716 - 717 - static inline void kvm_get_gdt(struct descriptor_table *table) 718 - { 719 - asm("sgdt %0" : "=m"(*table)); 720 - } 721 - 722 - static inline unsigned long kvm_read_tr_base(void) 723 - { 724 - u16 tr; 725 - asm("str %0" : "=g"(tr)); 726 - return segment_base(tr); 727 725 } 728 726 729 727 #ifdef CONFIG_X86_64 ··· 793 825 794 826 void kvm_define_shared_msr(unsigned index, u32 msr); 795 827 void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); 828 + 829 + bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); 796 830 797 831 #endif /* _ASM_X86_KVM_HOST_H */

+13

arch/x86/include/asm/kvm_para.h

··· 16 16 #define KVM_FEATURE_CLOCKSOURCE 0 17 17 #define KVM_FEATURE_NOP_IO_DELAY 1 18 18 #define KVM_FEATURE_MMU_OP 2 19 + /* This indicates that the new set of kvmclock msrs 20 + * are available. The use of 0x11 and 0x12 is deprecated 21 + */ 22 + #define KVM_FEATURE_CLOCKSOURCE2 3 23 + 24 + /* The last 8 bits are used to indicate how to interpret the flags field 25 + * in pvclock structure. If no bits are set, all flags are ignored. 26 + */ 27 + #define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 19 28 20 29 #define MSR_KVM_WALL_CLOCK 0x11 21 30 #define MSR_KVM_SYSTEM_TIME 0x12 31 + 32 + /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ 33 + #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 34 + #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 22 35 23 36 #define KVM_MAX_MMU_OP_BATCH 32 24 37

+3 -2

arch/x86/include/asm/msr-index.h

··· 202 202 #define MSR_IA32_EBL_CR_POWERON 0x0000002a 203 203 #define MSR_IA32_FEATURE_CONTROL 0x0000003a 204 204 205 - #define FEATURE_CONTROL_LOCKED (1<<0) 206 - #define FEATURE_CONTROL_VMXON_ENABLED (1<<2) 205 + #define FEATURE_CONTROL_LOCKED (1<<0) 206 + #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) 207 + #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) 207 208 208 209 #define MSR_IA32_APICBASE 0x0000001b 209 210 #define MSR_IA32_APICBASE_BSP (1<<8)

+3 -1

arch/x86/include/asm/pvclock-abi.h

··· 29 29 u64 system_time; 30 30 u32 tsc_to_system_mul; 31 31 s8 tsc_shift; 32 - u8 pad[3]; 32 + u8 flags; 33 + u8 pad[2]; 33 34 } __attribute__((__packed__)); /* 32 bytes */ 34 35 35 36 struct pvclock_wall_clock { ··· 39 38 u32 nsec; 40 39 } __attribute__((__packed__)); 41 40 41 + #define PVCLOCK_TSC_STABLE_BIT (1 << 0) 42 42 #endif /* __ASSEMBLY__ */ 43 43 #endif /* _ASM_X86_PVCLOCK_ABI_H */

+1

arch/x86/include/asm/pvclock.h

··· 6 6 7 7 /* some helper functions for xen and kvm pv clock sources */ 8 8 cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); 9 + void pvclock_set_flags(u8 flags); 9 10 unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); 10 11 void pvclock_read_wallclock(struct pvclock_wall_clock *wall, 11 12 struct pvclock_vcpu_time_info *vcpu,

+8 -1

arch/x86/include/asm/svm.h

··· 81 81 u32 event_inj_err; 82 82 u64 nested_cr3; 83 83 u64 lbr_ctl; 84 - u8 reserved_5[832]; 84 + u64 reserved_5; 85 + u64 next_rip; 86 + u8 reserved_6[816]; 85 87 }; 86 88 87 89 ··· 116 114 #define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT) 117 115 #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) 118 116 #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) 117 + 118 + #define SVM_VM_CR_VALID_MASK 0x001fULL 119 + #define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL 120 + #define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL 119 121 120 122 struct __attribute__ ((__packed__)) vmcb_seg { 121 123 u16 selector; ··· 244 238 245 239 #define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 246 240 #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 241 + #define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 247 242 248 243 #define SVM_EXIT_READ_CR0 0x000 249 244 #define SVM_EXIT_READ_CR3 0x003

+11 -1

arch/x86/include/asm/vmx.h

··· 25 25 * 26 26 */ 27 27 28 + #include <linux/types.h> 29 + 28 30 /* 29 31 * Definitions of Primary Processor-Based VM-Execution Controls. 30 32 */ ··· 122 120 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, 123 121 GUEST_IA32_PAT = 0x00002804, 124 122 GUEST_IA32_PAT_HIGH = 0x00002805, 123 + GUEST_IA32_EFER = 0x00002806, 124 + GUEST_IA32_EFER_HIGH = 0x00002807, 125 125 GUEST_PDPTR0 = 0x0000280a, 126 126 GUEST_PDPTR0_HIGH = 0x0000280b, 127 127 GUEST_PDPTR1 = 0x0000280c, ··· 134 130 GUEST_PDPTR3_HIGH = 0x00002811, 135 131 HOST_IA32_PAT = 0x00002c00, 136 132 HOST_IA32_PAT_HIGH = 0x00002c01, 133 + HOST_IA32_EFER = 0x00002c02, 134 + HOST_IA32_EFER_HIGH = 0x00002c03, 137 135 PIN_BASED_VM_EXEC_CONTROL = 0x00004000, 138 136 CPU_BASED_VM_EXEC_CONTROL = 0x00004002, 139 137 EXCEPTION_BITMAP = 0x00004004, ··· 400 394 #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" 401 395 #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" 402 396 403 - 397 + struct vmx_msr_entry { 398 + u32 index; 399 + u32 reserved; 400 + u64 value; 401 + } __aligned(16); 404 402 405 403 #endif

+35 -21

arch/x86/kernel/kvmclock.c

··· 29 29 #define KVM_SCALE 22 30 30 31 31 static int kvmclock = 1; 32 + static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; 33 + static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; 32 34 33 35 static int parse_no_kvmclock(char *arg) 34 36 { ··· 56 54 57 55 low = (int)__pa_symbol(&wall_clock); 58 56 high = ((u64)__pa_symbol(&wall_clock) >> 32); 59 - native_write_msr(MSR_KVM_WALL_CLOCK, low, high); 57 + 58 + native_write_msr(msr_kvm_wall_clock, low, high); 60 59 61 60 vcpu_time = &get_cpu_var(hv_clock); 62 61 pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); ··· 133 130 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); 134 131 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", 135 132 cpu, high, low, txt); 136 - return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); 133 + 134 + return native_write_msr_safe(msr_kvm_system_time, low, high); 137 135 } 138 136 139 137 #ifdef CONFIG_X86_LOCAL_APIC ··· 169 165 #ifdef CONFIG_KEXEC 170 166 static void kvm_crash_shutdown(struct pt_regs *regs) 171 167 { 172 - native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); 168 + native_write_msr(msr_kvm_system_time, 0, 0); 173 169 native_machine_crash_shutdown(regs); 174 170 } 175 171 #endif 176 172 177 173 static void kvm_shutdown(void) 178 174 { 179 - native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); 175 + native_write_msr(msr_kvm_system_time, 0, 0); 180 176 native_machine_shutdown(); 181 177 } 182 178 ··· 185 181 if (!kvm_para_available()) 186 182 return; 187 183 188 - if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { 189 - if (kvm_register_clock("boot clock")) 190 - return; 191 - pv_time_ops.sched_clock = kvm_clock_read; 192 - x86_platform.calibrate_tsc = kvm_get_tsc_khz; 193 - x86_platform.get_wallclock = kvm_get_wallclock; 194 - x86_platform.set_wallclock = kvm_set_wallclock; 184 + if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) { 185 + msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW; 186 + msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW; 187 + } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) 188 + return; 189 + 190 + printk(KERN_INFO "kvm-clock: Using msrs %x and %x", 191 + msr_kvm_system_time, msr_kvm_wall_clock); 192 + 193 + if (kvm_register_clock("boot clock")) 194 + return; 195 + pv_time_ops.sched_clock = kvm_clock_read; 196 + x86_platform.calibrate_tsc = kvm_get_tsc_khz; 197 + x86_platform.get_wallclock = kvm_get_wallclock; 198 + x86_platform.set_wallclock = kvm_set_wallclock; 195 199 #ifdef CONFIG_X86_LOCAL_APIC 196 - x86_cpuinit.setup_percpu_clockev = 197 - kvm_setup_secondary_clock; 200 + x86_cpuinit.setup_percpu_clockev = 201 + kvm_setup_secondary_clock; 198 202 #endif 199 203 #ifdef CONFIG_SMP 200 - smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; 204 + smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; 201 205 #endif 202 - machine_ops.shutdown = kvm_shutdown; 206 + machine_ops.shutdown = kvm_shutdown; 203 207 #ifdef CONFIG_KEXEC 204 - machine_ops.crash_shutdown = kvm_crash_shutdown; 208 + machine_ops.crash_shutdown = kvm_crash_shutdown; 205 209 #endif 206 - kvm_get_preset_lpj(); 207 - clocksource_register(&kvm_clock); 208 - pv_info.paravirt_enabled = 1; 209 - pv_info.name = "KVM"; 210 - } 210 + kvm_get_preset_lpj(); 211 + clocksource_register(&kvm_clock); 212 + pv_info.paravirt_enabled = 1; 213 + pv_info.name = "KVM"; 214 + 215 + if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) 216 + pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); 211 217 }

+37

arch/x86/kernel/pvclock.c

··· 31 31 u32 tsc_to_nsec_mul; 32 32 int tsc_shift; 33 33 u32 version; 34 + u8 flags; 34 35 }; 36 + 37 + static u8 valid_flags __read_mostly = 0; 38 + 39 + void pvclock_set_flags(u8 flags) 40 + { 41 + valid_flags = flags; 42 + } 35 43 36 44 /* 37 45 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, ··· 99 91 dst->system_timestamp = src->system_time; 100 92 dst->tsc_to_nsec_mul = src->tsc_to_system_mul; 101 93 dst->tsc_shift = src->tsc_shift; 94 + dst->flags = src->flags; 102 95 rmb(); /* test version after fetching data */ 103 96 } while ((src->version & 1) || (dst->version != src->version)); 104 97 ··· 118 109 return pv_tsc_khz; 119 110 } 120 111 112 + static atomic64_t last_value = ATOMIC64_INIT(0); 113 + 121 114 cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) 122 115 { 123 116 struct pvclock_shadow_time shadow; 124 117 unsigned version; 125 118 cycle_t ret, offset; 119 + u64 last; 126 120 127 121 do { 128 122 version = pvclock_get_time_values(&shadow, src); ··· 134 122 ret = shadow.system_timestamp + offset; 135 123 barrier(); 136 124 } while (version != src->version); 125 + 126 + if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && 127 + (shadow.flags & PVCLOCK_TSC_STABLE_BIT)) 128 + return ret; 129 + 130 + /* 131 + * Assumption here is that last_value, a global accumulator, always goes 132 + * forward. If we are less than that, we should not be much smaller. 133 + * We assume there is an error marging we're inside, and then the correction 134 + * does not sacrifice accuracy. 135 + * 136 + * For reads: global may have changed between test and return, 137 + * but this means someone else updated poked the clock at a later time. 138 + * We just need to make sure we are not seeing a backwards event. 139 + * 140 + * For updates: last_value = ret is not enough, since two vcpus could be 141 + * updating at the same time, and one of them could be slightly behind, 142 + * making the assumption that last_value always go forward fail to hold. 143 + */ 144 + last = atomic64_read(&last_value); 145 + do { 146 + if (ret < last) 147 + return last; 148 + last = atomic64_cmpxchg(&last_value, last, ret); 149 + } while (unlikely(last != ret)); 137 150 138 151 return ret; 139 152 }

+1

arch/x86/kernel/tboot.c

··· 46 46 47 47 /* Global pointer to shared data; NULL means no measured launch. */ 48 48 struct tboot *tboot __read_mostly; 49 + EXPORT_SYMBOL(tboot); 49 50 50 51 /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ 51 52 #define AP_WAIT_TIMEOUT 1

+917 -332

arch/x86/kvm/emulate.c

··· 33 33 #include <asm/kvm_emulate.h> 34 34 35 35 #include "x86.h" 36 + #include "tss.h" 36 37 37 38 /* 38 39 * Opcode effective-address decode tables. ··· 51 50 #define DstReg (2<<1) /* Register operand. */ 52 51 #define DstMem (3<<1) /* Memory operand. */ 53 52 #define DstAcc (4<<1) /* Destination Accumulator */ 53 + #define DstDI (5<<1) /* Destination is in ES:(E)DI */ 54 + #define DstMem64 (6<<1) /* 64bit memory operand */ 54 55 #define DstMask (7<<1) 55 56 /* Source operand type. */ 56 57 #define SrcNone (0<<4) /* No source operand. */ ··· 66 63 #define SrcOne (7<<4) /* Implied '1' */ 67 64 #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ 68 65 #define SrcImmU (9<<4) /* Immediate operand, unsigned */ 66 + #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ 69 67 #define SrcMask (0xf<<4) 70 68 /* Generic ModRM decode. */ 71 69 #define ModRM (1<<8) ··· 89 85 #define Src2ImmByte (2<<29) 90 86 #define Src2One (3<<29) 91 87 #define Src2Imm16 (4<<29) 88 + #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be 89 + in memory and second argument is located 90 + immediately after the first one in memory. */ 92 91 #define Src2Mask (7<<29) 93 92 94 93 enum { ··· 154 147 0, 0, 0, 0, 155 148 /* 0x68 - 0x6F */ 156 149 SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, 157 - SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ 158 - SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ 150 + DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */ 151 + SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */ 159 152 /* 0x70 - 0x77 */ 160 153 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, 161 154 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, ··· 180 173 /* 0xA0 - 0xA7 */ 181 174 ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, 182 175 ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, 183 - ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, 184 - ByteOp | ImplicitOps | String, ImplicitOps | String, 176 + ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, 177 + ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, 185 178 /* 0xA8 - 0xAF */ 186 - 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, 187 - ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, 188 - ByteOp | ImplicitOps | String, ImplicitOps | String, 179 + 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String, 180 + ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, 181 + ByteOp | DstDI | String, DstDI | String, 189 182 /* 0xB0 - 0xB7 */ 190 183 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, 191 184 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, ··· 211 204 0, 0, 0, 0, 0, 0, 0, 0, 212 205 /* 0xE0 - 0xE7 */ 213 206 0, 0, 0, 0, 214 - ByteOp | SrcImmUByte, SrcImmUByte, 215 - ByteOp | SrcImmUByte, SrcImmUByte, 207 + ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, 208 + ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, 216 209 /* 0xE8 - 0xEF */ 217 210 SrcImm | Stack, SrcImm | ImplicitOps, 218 211 SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, 219 - SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, 220 - SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, 212 + SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, 213 + SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, 221 214 /* 0xF0 - 0xF7 */ 222 215 0, 0, 0, 0, 223 216 ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, ··· 350 343 [Group5*8] = 351 344 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 352 345 SrcMem | ModRM | Stack, 0, 353 - SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, 346 + SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps, 347 + SrcMem | ModRM | Stack, 0, 354 348 [Group7*8] = 355 349 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, 356 350 SrcNone | ModRM | DstMem | Mov, 0, ··· 361 353 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, 362 354 DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, 363 355 [Group9*8] = 364 - 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0, 356 + 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0, 365 357 }; 366 358 367 359 static u32 group2_table[] = { 368 360 [Group7*8] = 369 - SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, 361 + SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv, 370 362 SrcNone | ModRM | DstMem | Mov, 0, 371 - SrcMem16 | ModRM | Mov, 0, 363 + SrcMem16 | ModRM | Mov | Priv, 0, 372 364 [Group9*8] = 373 365 0, 0, 0, 0, 0, 0, 0, 0, 374 366 }; ··· 570 562 #define insn_fetch(_type, _size, _eip) \ 571 563 ({ unsigned long _x; \ 572 564 rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ 573 - if (rc != 0) \ 565 + if (rc != X86EMUL_CONTINUE) \ 574 566 goto done; \ 575 567 (_eip) += (_size); \ 576 568 (_type)_x; \ ··· 646 638 647 639 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, 648 640 struct x86_emulate_ops *ops, 649 - unsigned long linear, u8 *dest) 641 + unsigned long eip, u8 *dest) 650 642 { 651 643 struct fetch_cache *fc = &ctxt->decode.fetch; 652 644 int rc; 653 - int size; 645 + int size, cur_size; 654 646 655 - if (linear < fc->start || linear >= fc->end) { 656 - size = min(15UL, PAGE_SIZE - offset_in_page(linear)); 657 - rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL); 658 - if (rc) 647 + if (eip == fc->end) { 648 + cur_size = fc->end - fc->start; 649 + size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); 650 + rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, 651 + size, ctxt->vcpu, NULL); 652 + if (rc != X86EMUL_CONTINUE) 659 653 return rc; 660 - fc->start = linear; 661 - fc->end = linear + size; 654 + fc->end += size; 662 655 } 663 - *dest = fc->data[linear - fc->start]; 664 - return 0; 656 + *dest = fc->data[eip - fc->start]; 657 + return X86EMUL_CONTINUE; 665 658 } 666 659 667 660 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, 668 661 struct x86_emulate_ops *ops, 669 662 unsigned long eip, void *dest, unsigned size) 670 663 { 671 - int rc = 0; 664 + int rc; 672 665 673 666 /* x86 instructions are limited to 15 bytes. */ 674 - if (eip + size - ctxt->decode.eip_orig > 15) 667 + if (eip + size - ctxt->eip > 15) 675 668 return X86EMUL_UNHANDLEABLE; 676 - eip += ctxt->cs_base; 677 669 while (size--) { 678 670 rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); 679 - if (rc) 671 + if (rc != X86EMUL_CONTINUE) 680 672 return rc; 681 673 } 682 - return 0; 674 + return X86EMUL_CONTINUE; 683 675 } 684 676 685 677 /* ··· 710 702 *address = 0; 711 703 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, 712 704 ctxt->vcpu, NULL); 713 - if (rc) 705 + if (rc != X86EMUL_CONTINUE) 714 706 return rc; 715 707 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, 716 708 ctxt->vcpu, NULL); ··· 790 782 struct decode_cache *c = &ctxt->decode; 791 783 u8 sib; 792 784 int index_reg = 0, base_reg = 0, scale; 793 - int rc = 0; 785 + int rc = X86EMUL_CONTINUE; 794 786 795 787 if (c->rex_prefix) { 796 788 c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ ··· 903 895 struct x86_emulate_ops *ops) 904 896 { 905 897 struct decode_cache *c = &ctxt->decode; 906 - int rc = 0; 898 + int rc = X86EMUL_CONTINUE; 907 899 908 900 switch (c->ad_bytes) { 909 901 case 2: ··· 924 916 x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) 925 917 { 926 918 struct decode_cache *c = &ctxt->decode; 927 - int rc = 0; 919 + int rc = X86EMUL_CONTINUE; 928 920 int mode = ctxt->mode; 929 921 int def_op_bytes, def_ad_bytes, group; 930 922 931 - /* Shadow copy of register state. Committed on successful emulation. */ 932 923 924 + /* we cannot decode insn before we complete previous rep insn */ 925 + WARN_ON(ctxt->restart); 926 + 927 + /* Shadow copy of register state. Committed on successful emulation. */ 933 928 memset(c, 0, sizeof(struct decode_cache)); 934 - c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); 929 + c->eip = ctxt->eip; 930 + c->fetch.start = c->fetch.end = c->eip; 935 931 ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); 936 932 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); 937 933 ··· 1027 1015 } 1028 1016 } 1029 1017 1030 - if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { 1031 - kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction"); 1032 - return -1; 1033 - } 1034 - 1035 1018 if (c->d & Group) { 1036 1019 group = c->d & GroupMask; 1037 1020 c->modrm = insn_fetch(u8, 1, c->eip); ··· 1053 1046 rc = decode_modrm(ctxt, ops); 1054 1047 else if (c->d & MemAbs) 1055 1048 rc = decode_abs(ctxt, ops); 1056 - if (rc) 1049 + if (rc != X86EMUL_CONTINUE) 1057 1050 goto done; 1058 1051 1059 1052 if (!c->has_seg_override) ··· 1064 1057 1065 1058 if (c->ad_bytes != 8) 1066 1059 c->modrm_ea = (u32)c->modrm_ea; 1060 + 1061 + if (c->rip_relative) 1062 + c->modrm_ea += c->eip; 1063 + 1067 1064 /* 1068 1065 * Decode and fetch the source operand: register, memory 1069 1066 * or immediate. ··· 1102 1091 break; 1103 1092 } 1104 1093 c->src.type = OP_MEM; 1094 + c->src.ptr = (unsigned long *)c->modrm_ea; 1095 + c->src.val = 0; 1105 1096 break; 1106 1097 case SrcImm: 1107 1098 case SrcImmU: ··· 1152 1139 c->src.bytes = 1; 1153 1140 c->src.val = 1; 1154 1141 break; 1142 + case SrcSI: 1143 + c->src.type = OP_MEM; 1144 + c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1145 + c->src.ptr = (unsigned long *) 1146 + register_address(c, seg_override_base(ctxt, c), 1147 + c->regs[VCPU_REGS_RSI]); 1148 + c->src.val = 0; 1149 + break; 1155 1150 } 1156 1151 1157 1152 /* ··· 1189 1168 c->src2.bytes = 1; 1190 1169 c->src2.val = 1; 1191 1170 break; 1171 + case Src2Mem16: 1172 + c->src2.type = OP_MEM; 1173 + c->src2.bytes = 2; 1174 + c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes); 1175 + c->src2.val = 0; 1176 + break; 1192 1177 } 1193 1178 1194 1179 /* Decode and fetch the destination operand: register or memory. */ ··· 1207 1180 c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); 1208 1181 break; 1209 1182 case DstMem: 1183 + case DstMem64: 1210 1184 if ((c->d & ModRM) && c->modrm_mod == 3) { 1211 1185 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1212 1186 c->dst.type = OP_REG; ··· 1216 1188 break; 1217 1189 } 1218 1190 c->dst.type = OP_MEM; 1191 + c->dst.ptr = (unsigned long *)c->modrm_ea; 1192 + if ((c->d & DstMask) == DstMem64) 1193 + c->dst.bytes = 8; 1194 + else 1195 + c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1196 + c->dst.val = 0; 1197 + if (c->d & BitOp) { 1198 + unsigned long mask = ~(c->dst.bytes * 8 - 1); 1199 + 1200 + c->dst.ptr = (void *)c->dst.ptr + 1201 + (c->src.val & mask) / 8; 1202 + } 1219 1203 break; 1220 1204 case DstAcc: 1221 1205 c->dst.type = OP_REG; 1222 - c->dst.bytes = c->op_bytes; 1206 + c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1223 1207 c->dst.ptr = &c->regs[VCPU_REGS_RAX]; 1224 - switch (c->op_bytes) { 1208 + switch (c->dst.bytes) { 1225 1209 case 1: 1226 1210 c->dst.val = *(u8 *)c->dst.ptr; 1227 1211 break; ··· 1243 1203 case 4: 1244 1204 c->dst.val = *(u32 *)c->dst.ptr; 1245 1205 break; 1206 + case 8: 1207 + c->dst.val = *(u64 *)c->dst.ptr; 1208 + break; 1246 1209 } 1247 1210 c->dst.orig_val = c->dst.val; 1248 1211 break; 1212 + case DstDI: 1213 + c->dst.type = OP_MEM; 1214 + c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1215 + c->dst.ptr = (unsigned long *) 1216 + register_address(c, es_base(ctxt), 1217 + c->regs[VCPU_REGS_RDI]); 1218 + c->dst.val = 0; 1219 + break; 1249 1220 } 1250 - 1251 - if (c->rip_relative) 1252 - c->modrm_ea += c->eip; 1253 1221 1254 1222 done: 1255 1223 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; 1224 + } 1225 + 1226 + static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, 1227 + struct x86_emulate_ops *ops, 1228 + unsigned int size, unsigned short port, 1229 + void *dest) 1230 + { 1231 + struct read_cache *rc = &ctxt->decode.io_read; 1232 + 1233 + if (rc->pos == rc->end) { /* refill pio read ahead */ 1234 + struct decode_cache *c = &ctxt->decode; 1235 + unsigned int in_page, n; 1236 + unsigned int count = c->rep_prefix ? 1237 + address_mask(c, c->regs[VCPU_REGS_RCX]) : 1; 1238 + in_page = (ctxt->eflags & EFLG_DF) ? 1239 + offset_in_page(c->regs[VCPU_REGS_RDI]) : 1240 + PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]); 1241 + n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, 1242 + count); 1243 + if (n == 0) 1244 + n = 1; 1245 + rc->pos = rc->end = 0; 1246 + if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) 1247 + return 0; 1248 + rc->end = n * size; 1249 + } 1250 + 1251 + memcpy(dest, rc->data + rc->pos, size); 1252 + rc->pos += size; 1253 + return 1; 1254 + } 1255 + 1256 + static u32 desc_limit_scaled(struct desc_struct *desc) 1257 + { 1258 + u32 limit = get_desc_limit(desc); 1259 + 1260 + return desc->g ? (limit << 12) | 0xfff : limit; 1261 + } 1262 + 1263 + static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, 1264 + struct x86_emulate_ops *ops, 1265 + u16 selector, struct desc_ptr *dt) 1266 + { 1267 + if (selector & 1 << 2) { 1268 + struct desc_struct desc; 1269 + memset (dt, 0, sizeof *dt); 1270 + if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu)) 1271 + return; 1272 + 1273 + dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ 1274 + dt->address = get_desc_base(&desc); 1275 + } else 1276 + ops->get_gdt(dt, ctxt->vcpu); 1277 + } 1278 + 1279 + /* allowed just for 8 bytes segments */ 1280 + static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, 1281 + struct x86_emulate_ops *ops, 1282 + u16 selector, struct desc_struct *desc) 1283 + { 1284 + struct desc_ptr dt; 1285 + u16 index = selector >> 3; 1286 + int ret; 1287 + u32 err; 1288 + ulong addr; 1289 + 1290 + get_descriptor_table_ptr(ctxt, ops, selector, &dt); 1291 + 1292 + if (dt.size < index * 8 + 7) { 1293 + kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); 1294 + return X86EMUL_PROPAGATE_FAULT; 1295 + } 1296 + addr = dt.address + index * 8; 1297 + ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); 1298 + if (ret == X86EMUL_PROPAGATE_FAULT) 1299 + kvm_inject_page_fault(ctxt->vcpu, addr, err); 1300 + 1301 + return ret; 1302 + } 1303 + 1304 + /* allowed just for 8 bytes segments */ 1305 + static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, 1306 + struct x86_emulate_ops *ops, 1307 + u16 selector, struct desc_struct *desc) 1308 + { 1309 + struct desc_ptr dt; 1310 + u16 index = selector >> 3; 1311 + u32 err; 1312 + ulong addr; 1313 + int ret; 1314 + 1315 + get_descriptor_table_ptr(ctxt, ops, selector, &dt); 1316 + 1317 + if (dt.size < index * 8 + 7) { 1318 + kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); 1319 + return X86EMUL_PROPAGATE_FAULT; 1320 + } 1321 + 1322 + addr = dt.address + index * 8; 1323 + ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); 1324 + if (ret == X86EMUL_PROPAGATE_FAULT) 1325 + kvm_inject_page_fault(ctxt->vcpu, addr, err); 1326 + 1327 + return ret; 1328 + } 1329 + 1330 + static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, 1331 + struct x86_emulate_ops *ops, 1332 + u16 selector, int seg) 1333 + { 1334 + struct desc_struct seg_desc; 1335 + u8 dpl, rpl, cpl; 1336 + unsigned err_vec = GP_VECTOR; 1337 + u32 err_code = 0; 1338 + bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ 1339 + int ret; 1340 + 1341 + memset(&seg_desc, 0, sizeof seg_desc); 1342 + 1343 + if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) 1344 + || ctxt->mode == X86EMUL_MODE_REAL) { 1345 + /* set real mode segment descriptor */ 1346 + set_desc_base(&seg_desc, selector << 4); 1347 + set_desc_limit(&seg_desc, 0xffff); 1348 + seg_desc.type = 3; 1349 + seg_desc.p = 1; 1350 + seg_desc.s = 1; 1351 + goto load; 1352 + } 1353 + 1354 + /* NULL selector is not valid for TR, CS and SS */ 1355 + if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) 1356 + && null_selector) 1357 + goto exception; 1358 + 1359 + /* TR should be in GDT only */ 1360 + if (seg == VCPU_SREG_TR && (selector & (1 << 2))) 1361 + goto exception; 1362 + 1363 + if (null_selector) /* for NULL selector skip all following checks */ 1364 + goto load; 1365 + 1366 + ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc); 1367 + if (ret != X86EMUL_CONTINUE) 1368 + return ret; 1369 + 1370 + err_code = selector & 0xfffc; 1371 + err_vec = GP_VECTOR; 1372 + 1373 + /* can't load system descriptor into segment selecor */ 1374 + if (seg <= VCPU_SREG_GS && !seg_desc.s) 1375 + goto exception; 1376 + 1377 + if (!seg_desc.p) { 1378 + err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; 1379 + goto exception; 1380 + } 1381 + 1382 + rpl = selector & 3; 1383 + dpl = seg_desc.dpl; 1384 + cpl = ops->cpl(ctxt->vcpu); 1385 + 1386 + switch (seg) { 1387 + case VCPU_SREG_SS: 1388 + /* 1389 + * segment is not a writable data segment or segment 1390 + * selector's RPL != CPL or segment selector's RPL != CPL 1391 + */ 1392 + if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl) 1393 + goto exception; 1394 + break; 1395 + case VCPU_SREG_CS: 1396 + if (!(seg_desc.type & 8)) 1397 + goto exception; 1398 + 1399 + if (seg_desc.type & 4) { 1400 + /* conforming */ 1401 + if (dpl > cpl) 1402 + goto exception; 1403 + } else { 1404 + /* nonconforming */ 1405 + if (rpl > cpl || dpl != cpl) 1406 + goto exception; 1407 + } 1408 + /* CS(RPL) <- CPL */ 1409 + selector = (selector & 0xfffc) | cpl; 1410 + break; 1411 + case VCPU_SREG_TR: 1412 + if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) 1413 + goto exception; 1414 + break; 1415 + case VCPU_SREG_LDTR: 1416 + if (seg_desc.s || seg_desc.type != 2) 1417 + goto exception; 1418 + break; 1419 + default: /* DS, ES, FS, or GS */ 1420 + /* 1421 + * segment is not a data or readable code segment or 1422 + * ((segment is a data or nonconforming code segment) 1423 + * and (both RPL and CPL > DPL)) 1424 + */ 1425 + if ((seg_desc.type & 0xa) == 0x8 || 1426 + (((seg_desc.type & 0xc) != 0xc) && 1427 + (rpl > dpl && cpl > dpl))) 1428 + goto exception; 1429 + break; 1430 + } 1431 + 1432 + if (seg_desc.s) { 1433 + /* mark segment as accessed */ 1434 + seg_desc.type |= 1; 1435 + ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc); 1436 + if (ret != X86EMUL_CONTINUE) 1437 + return ret; 1438 + } 1439 + load: 1440 + ops->set_segment_selector(selector, seg, ctxt->vcpu); 1441 + ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); 1442 + return X86EMUL_CONTINUE; 1443 + exception: 1444 + kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code); 1445 + return X86EMUL_PROPAGATE_FAULT; 1256 1446 } 1257 1447 1258 1448 static inline void emulate_push(struct x86_emulate_ctxt *ctxt) ··· 1521 1251 int rc; 1522 1252 unsigned long val, change_mask; 1523 1253 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; 1524 - int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu); 1254 + int cpl = ops->cpl(ctxt->vcpu); 1525 1255 1526 1256 rc = emulate_pop(ctxt, ops, &val, len); 1527 1257 if (rc != X86EMUL_CONTINUE) ··· 1576 1306 int rc; 1577 1307 1578 1308 rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); 1579 - if (rc != 0) 1309 + if (rc != X86EMUL_CONTINUE) 1580 1310 return rc; 1581 1311 1582 - rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg); 1312 + rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg); 1583 1313 return rc; 1584 1314 } 1585 1315 ··· 1602 1332 struct x86_emulate_ops *ops) 1603 1333 { 1604 1334 struct decode_cache *c = &ctxt->decode; 1605 - int rc = 0; 1335 + int rc = X86EMUL_CONTINUE; 1606 1336 int reg = VCPU_REGS_RDI; 1607 1337 1608 1338 while (reg >= VCPU_REGS_RAX) { ··· 1613 1343 } 1614 1344 1615 1345 rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); 1616 - if (rc != 0) 1346 + if (rc != X86EMUL_CONTINUE) 1617 1347 break; 1618 1348 --reg; 1619 1349 } ··· 1624 1354 struct x86_emulate_ops *ops) 1625 1355 { 1626 1356 struct decode_cache *c = &ctxt->decode; 1627 - int rc; 1628 1357 1629 - rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); 1630 - if (rc != 0) 1631 - return rc; 1632 - return 0; 1358 + return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); 1633 1359 } 1634 1360 1635 1361 static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) ··· 1661 1395 struct x86_emulate_ops *ops) 1662 1396 { 1663 1397 struct decode_cache *c = &ctxt->decode; 1664 - int rc = 0; 1665 1398 1666 1399 switch (c->modrm_reg) { 1667 1400 case 0 ... 1: /* test */ ··· 1673 1408 emulate_1op("neg", c->dst, ctxt->eflags); 1674 1409 break; 1675 1410 default: 1676 - DPRINTF("Cannot emulate %02x\n", c->b); 1677 - rc = X86EMUL_UNHANDLEABLE; 1678 - break; 1411 + return 0; 1679 1412 } 1680 - return rc; 1413 + return 1; 1681 1414 } 1682 1415 1683 1416 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, ··· 1705 1442 emulate_push(ctxt); 1706 1443 break; 1707 1444 } 1708 - return 0; 1445 + return X86EMUL_CONTINUE; 1709 1446 } 1710 1447 1711 1448 static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, 1712 - struct x86_emulate_ops *ops, 1713 - unsigned long memop) 1449 + struct x86_emulate_ops *ops) 1714 1450 { 1715 1451 struct decode_cache *c = &ctxt->decode; 1716 - u64 old, new; 1717 - int rc; 1718 - 1719 - rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); 1720 - if (rc != X86EMUL_CONTINUE) 1721 - return rc; 1452 + u64 old = c->dst.orig_val; 1722 1453 1723 1454 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || 1724 1455 ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { ··· 1720 1463 c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); 1721 1464 c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); 1722 1465 ctxt->eflags &= ~EFLG_ZF; 1723 - 1724 1466 } else { 1725 - new = ((u64)c->regs[VCPU_REGS_RCX] << 32) | 1467 + c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) | 1726 1468 (u32) c->regs[VCPU_REGS_RBX]; 1727 1469 1728 - rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); 1729 - if (rc != X86EMUL_CONTINUE) 1730 - return rc; 1731 1470 ctxt->eflags |= EFLG_ZF; 1732 1471 } 1733 - return 0; 1472 + return X86EMUL_CONTINUE; 1734 1473 } 1735 1474 1736 1475 static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, ··· 1737 1484 unsigned long cs; 1738 1485 1739 1486 rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); 1740 - if (rc) 1487 + if (rc != X86EMUL_CONTINUE) 1741 1488 return rc; 1742 1489 if (c->op_bytes == 4) 1743 1490 c->eip = (u32)c->eip; 1744 1491 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); 1745 - if (rc) 1492 + if (rc != X86EMUL_CONTINUE) 1746 1493 return rc; 1747 - rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS); 1494 + rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); 1748 1495 return rc; 1749 1496 } 1750 1497 ··· 1797 1544 default: 1798 1545 break; 1799 1546 } 1800 - return 0; 1547 + return X86EMUL_CONTINUE; 1801 1548 } 1802 1549 1803 1550 static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) ··· 1851 1598 u64 msr_data; 1852 1599 1853 1600 /* syscall is not available in real mode */ 1854 - if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) 1855 - return X86EMUL_UNHANDLEABLE; 1601 + if (ctxt->mode == X86EMUL_MODE_REAL || 1602 + ctxt->mode == X86EMUL_MODE_VM86) { 1603 + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 1604 + return X86EMUL_PROPAGATE_FAULT; 1605 + } 1856 1606 1857 1607 setup_syscalls_segments(ctxt, &cs, &ss); 1858 1608 ··· 1905 1649 /* inject #GP if in real mode */ 1906 1650 if (ctxt->mode == X86EMUL_MODE_REAL) { 1907 1651 kvm_inject_gp(ctxt->vcpu, 0); 1908 - return X86EMUL_UNHANDLEABLE; 1652 + return X86EMUL_PROPAGATE_FAULT; 1909 1653 } 1910 1654 1911 1655 /* XXX sysenter/sysexit have not been tested in 64bit mode. 1912 1656 * Therefore, we inject an #UD. 1913 1657 */ 1914 - if (ctxt->mode == X86EMUL_MODE_PROT64) 1915 - return X86EMUL_UNHANDLEABLE; 1658 + if (ctxt->mode == X86EMUL_MODE_PROT64) { 1659 + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 1660 + return X86EMUL_PROPAGATE_FAULT; 1661 + } 1916 1662 1917 1663 setup_syscalls_segments(ctxt, &cs, &ss); 1918 1664 ··· 1969 1711 if (ctxt->mode == X86EMUL_MODE_REAL || 1970 1712 ctxt->mode == X86EMUL_MODE_VM86) { 1971 1713 kvm_inject_gp(ctxt->vcpu, 0); 1972 - return X86EMUL_UNHANDLEABLE; 1714 + return X86EMUL_PROPAGATE_FAULT; 1973 1715 } 1974 1716 1975 1717 setup_syscalls_segments(ctxt, &cs, &ss); ··· 2014 1756 return X86EMUL_CONTINUE; 2015 1757 } 2016 1758 2017 - static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) 1759 + static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, 1760 + struct x86_emulate_ops *ops) 2018 1761 { 2019 1762 int iopl; 2020 1763 if (ctxt->mode == X86EMUL_MODE_REAL) ··· 2023 1764 if (ctxt->mode == X86EMUL_MODE_VM86) 2024 1765 return true; 2025 1766 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; 2026 - return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl; 1767 + return ops->cpl(ctxt->vcpu) > iopl; 2027 1768 } 2028 1769 2029 1770 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, ··· 2060 1801 struct x86_emulate_ops *ops, 2061 1802 u16 port, u16 len) 2062 1803 { 2063 - if (emulator_bad_iopl(ctxt)) 1804 + if (emulator_bad_iopl(ctxt, ops)) 2064 1805 if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) 2065 1806 return false; 2066 1807 return true; 2067 1808 } 2068 1809 1810 + static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt, 1811 + struct x86_emulate_ops *ops, 1812 + int seg) 1813 + { 1814 + struct desc_struct desc; 1815 + if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu)) 1816 + return get_desc_base(&desc); 1817 + else 1818 + return ~0; 1819 + } 1820 + 1821 + static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, 1822 + struct x86_emulate_ops *ops, 1823 + struct tss_segment_16 *tss) 1824 + { 1825 + struct decode_cache *c = &ctxt->decode; 1826 + 1827 + tss->ip = c->eip; 1828 + tss->flag = ctxt->eflags; 1829 + tss->ax = c->regs[VCPU_REGS_RAX]; 1830 + tss->cx = c->regs[VCPU_REGS_RCX]; 1831 + tss->dx = c->regs[VCPU_REGS_RDX]; 1832 + tss->bx = c->regs[VCPU_REGS_RBX]; 1833 + tss->sp = c->regs[VCPU_REGS_RSP]; 1834 + tss->bp = c->regs[VCPU_REGS_RBP]; 1835 + tss->si = c->regs[VCPU_REGS_RSI]; 1836 + tss->di = c->regs[VCPU_REGS_RDI]; 1837 + 1838 + tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); 1839 + tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); 1840 + tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); 1841 + tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); 1842 + tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); 1843 + } 1844 + 1845 + static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, 1846 + struct x86_emulate_ops *ops, 1847 + struct tss_segment_16 *tss) 1848 + { 1849 + struct decode_cache *c = &ctxt->decode; 1850 + int ret; 1851 + 1852 + c->eip = tss->ip; 1853 + ctxt->eflags = tss->flag | 2; 1854 + c->regs[VCPU_REGS_RAX] = tss->ax; 1855 + c->regs[VCPU_REGS_RCX] = tss->cx; 1856 + c->regs[VCPU_REGS_RDX] = tss->dx; 1857 + c->regs[VCPU_REGS_RBX] = tss->bx; 1858 + c->regs[VCPU_REGS_RSP] = tss->sp; 1859 + c->regs[VCPU_REGS_RBP] = tss->bp; 1860 + c->regs[VCPU_REGS_RSI] = tss->si; 1861 + c->regs[VCPU_REGS_RDI] = tss->di; 1862 + 1863 + /* 1864 + * SDM says that segment selectors are loaded before segment 1865 + * descriptors 1866 + */ 1867 + ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); 1868 + ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); 1869 + ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); 1870 + ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); 1871 + ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); 1872 + 1873 + /* 1874 + * Now load segment descriptors. If fault happenes at this stage 1875 + * it is handled in a context of new task 1876 + */ 1877 + ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR); 1878 + if (ret != X86EMUL_CONTINUE) 1879 + return ret; 1880 + ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); 1881 + if (ret != X86EMUL_CONTINUE) 1882 + return ret; 1883 + ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); 1884 + if (ret != X86EMUL_CONTINUE) 1885 + return ret; 1886 + ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); 1887 + if (ret != X86EMUL_CONTINUE) 1888 + return ret; 1889 + ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); 1890 + if (ret != X86EMUL_CONTINUE) 1891 + return ret; 1892 + 1893 + return X86EMUL_CONTINUE; 1894 + } 1895 + 1896 + static int task_switch_16(struct x86_emulate_ctxt *ctxt, 1897 + struct x86_emulate_ops *ops, 1898 + u16 tss_selector, u16 old_tss_sel, 1899 + ulong old_tss_base, struct desc_struct *new_desc) 1900 + { 1901 + struct tss_segment_16 tss_seg; 1902 + int ret; 1903 + u32 err, new_tss_base = get_desc_base(new_desc); 1904 + 1905 + ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 1906 + &err); 1907 + if (ret == X86EMUL_PROPAGATE_FAULT) { 1908 + /* FIXME: need to provide precise fault address */ 1909 + kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); 1910 + return ret; 1911 + } 1912 + 1913 + save_state_to_tss16(ctxt, ops, &tss_seg); 1914 + 1915 + ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 1916 + &err); 1917 + if (ret == X86EMUL_PROPAGATE_FAULT) { 1918 + /* FIXME: need to provide precise fault address */ 1919 + kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); 1920 + return ret; 1921 + } 1922 + 1923 + ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 1924 + &err); 1925 + if (ret == X86EMUL_PROPAGATE_FAULT) { 1926 + /* FIXME: need to provide precise fault address */ 1927 + kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); 1928 + return ret; 1929 + } 1930 + 1931 + if (old_tss_sel != 0xffff) { 1932 + tss_seg.prev_task_link = old_tss_sel; 1933 + 1934 + ret = ops->write_std(new_tss_base, 1935 + &tss_seg.prev_task_link, 1936 + sizeof tss_seg.prev_task_link, 1937 + ctxt->vcpu, &err); 1938 + if (ret == X86EMUL_PROPAGATE_FAULT) { 1939 + /* FIXME: need to provide precise fault address */ 1940 + kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); 1941 + return ret; 1942 + } 1943 + } 1944 + 1945 + return load_state_from_tss16(ctxt, ops, &tss_seg); 1946 + } 1947 + 1948 + static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, 1949 + struct x86_emulate_ops *ops, 1950 + struct tss_segment_32 *tss) 1951 + { 1952 + struct decode_cache *c = &ctxt->decode; 1953 + 1954 + tss->cr3 = ops->get_cr(3, ctxt->vcpu); 1955 + tss->eip = c->eip; 1956 + tss->eflags = ctxt->eflags; 1957 + tss->eax = c->regs[VCPU_REGS_RAX]; 1958 + tss->ecx = c->regs[VCPU_REGS_RCX]; 1959 + tss->edx = c->regs[VCPU_REGS_RDX]; 1960 + tss->ebx = c->regs[VCPU_REGS_RBX]; 1961 + tss->esp = c->regs[VCPU_REGS_RSP]; 1962 + tss->ebp = c->regs[VCPU_REGS_RBP]; 1963 + tss->esi = c->regs[VCPU_REGS_RSI]; 1964 + tss->edi = c->regs[VCPU_REGS_RDI]; 1965 + 1966 + tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); 1967 + tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); 1968 + tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); 1969 + tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); 1970 + tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); 1971 + tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); 1972 + tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); 1973 + } 1974 + 1975 + static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, 1976 + struct x86_emulate_ops *ops, 1977 + struct tss_segment_32 *tss) 1978 + { 1979 + struct decode_cache *c = &ctxt->decode; 1980 + int ret; 1981 + 1982 + ops->set_cr(3, tss->cr3, ctxt->vcpu); 1983 + c->eip = tss->eip; 1984 + ctxt->eflags = tss->eflags | 2; 1985 + c->regs[VCPU_REGS_RAX] = tss->eax; 1986 + c->regs[VCPU_REGS_RCX] = tss->ecx; 1987 + c->regs[VCPU_REGS_RDX] = tss->edx; 1988 + c->regs[VCPU_REGS_RBX] = tss->ebx; 1989 + c->regs[VCPU_REGS_RSP] = tss->esp; 1990 + c->regs[VCPU_REGS_RBP] = tss->ebp; 1991 + c->regs[VCPU_REGS_RSI] = tss->esi; 1992 + c->regs[VCPU_REGS_RDI] = tss->edi; 1993 + 1994 + /* 1995 + * SDM says that segment selectors are loaded before segment 1996 + * descriptors 1997 + */ 1998 + ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); 1999 + ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); 2000 + ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); 2001 + ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); 2002 + ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); 2003 + ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); 2004 + ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); 2005 + 2006 + /* 2007 + * Now load segment descriptors. If fault happenes at this stage 2008 + * it is handled in a context of new task 2009 + */ 2010 + ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR); 2011 + if (ret != X86EMUL_CONTINUE) 2012 + return ret; 2013 + ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); 2014 + if (ret != X86EMUL_CONTINUE) 2015 + return ret; 2016 + ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); 2017 + if (ret != X86EMUL_CONTINUE) 2018 + return ret; 2019 + ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); 2020 + if (ret != X86EMUL_CONTINUE) 2021 + return ret; 2022 + ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); 2023 + if (ret != X86EMUL_CONTINUE) 2024 + return ret; 2025 + ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS); 2026 + if (ret != X86EMUL_CONTINUE) 2027 + return ret; 2028 + ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS); 2029 + if (ret != X86EMUL_CONTINUE) 2030 + return ret; 2031 + 2032 + return X86EMUL_CONTINUE; 2033 + } 2034 + 2035 + static int task_switch_32(struct x86_emulate_ctxt *ctxt, 2036 + struct x86_emulate_ops *ops, 2037 + u16 tss_selector, u16 old_tss_sel, 2038 + ulong old_tss_base, struct desc_struct *new_desc) 2039 + { 2040 + struct tss_segment_32 tss_seg; 2041 + int ret; 2042 + u32 err, new_tss_base = get_desc_base(new_desc); 2043 + 2044 + ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 2045 + &err); 2046 + if (ret == X86EMUL_PROPAGATE_FAULT) { 2047 + /* FIXME: need to provide precise fault address */ 2048 + kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); 2049 + return ret; 2050 + } 2051 + 2052 + save_state_to_tss32(ctxt, ops, &tss_seg); 2053 + 2054 + ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 2055 + &err); 2056 + if (ret == X86EMUL_PROPAGATE_FAULT) { 2057 + /* FIXME: need to provide precise fault address */ 2058 + kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); 2059 + return ret; 2060 + } 2061 + 2062 + ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 2063 + &err); 2064 + if (ret == X86EMUL_PROPAGATE_FAULT) { 2065 + /* FIXME: need to provide precise fault address */ 2066 + kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); 2067 + return ret; 2068 + } 2069 + 2070 + if (old_tss_sel != 0xffff) { 2071 + tss_seg.prev_task_link = old_tss_sel; 2072 + 2073 + ret = ops->write_std(new_tss_base, 2074 + &tss_seg.prev_task_link, 2075 + sizeof tss_seg.prev_task_link, 2076 + ctxt->vcpu, &err); 2077 + if (ret == X86EMUL_PROPAGATE_FAULT) { 2078 + /* FIXME: need to provide precise fault address */ 2079 + kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); 2080 + return ret; 2081 + } 2082 + } 2083 + 2084 + return load_state_from_tss32(ctxt, ops, &tss_seg); 2085 + } 2086 + 2087 + static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, 2088 + struct x86_emulate_ops *ops, 2089 + u16 tss_selector, int reason, 2090 + bool has_error_code, u32 error_code) 2091 + { 2092 + struct desc_struct curr_tss_desc, next_tss_desc; 2093 + int ret; 2094 + u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); 2095 + ulong old_tss_base = 2096 + get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR); 2097 + u32 desc_limit; 2098 + 2099 + /* FIXME: old_tss_base == ~0 ? */ 2100 + 2101 + ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc); 2102 + if (ret != X86EMUL_CONTINUE) 2103 + return ret; 2104 + ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc); 2105 + if (ret != X86EMUL_CONTINUE) 2106 + return ret; 2107 + 2108 + /* FIXME: check that next_tss_desc is tss */ 2109 + 2110 + if (reason != TASK_SWITCH_IRET) { 2111 + if ((tss_selector & 3) > next_tss_desc.dpl || 2112 + ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { 2113 + kvm_inject_gp(ctxt->vcpu, 0); 2114 + return X86EMUL_PROPAGATE_FAULT; 2115 + } 2116 + } 2117 + 2118 + desc_limit = desc_limit_scaled(&next_tss_desc); 2119 + if (!next_tss_desc.p || 2120 + ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || 2121 + desc_limit < 0x2b)) { 2122 + kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR, 2123 + tss_selector & 0xfffc); 2124 + return X86EMUL_PROPAGATE_FAULT; 2125 + } 2126 + 2127 + if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { 2128 + curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */ 2129 + write_segment_descriptor(ctxt, ops, old_tss_sel, 2130 + &curr_tss_desc); 2131 + } 2132 + 2133 + if (reason == TASK_SWITCH_IRET) 2134 + ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; 2135 + 2136 + /* set back link to prev task only if NT bit is set in eflags 2137 + note that old_tss_sel is not used afetr this point */ 2138 + if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) 2139 + old_tss_sel = 0xffff; 2140 + 2141 + if (next_tss_desc.type & 8) 2142 + ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel, 2143 + old_tss_base, &next_tss_desc); 2144 + else 2145 + ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel, 2146 + old_tss_base, &next_tss_desc); 2147 + if (ret != X86EMUL_CONTINUE) 2148 + return ret; 2149 + 2150 + if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) 2151 + ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT; 2152 + 2153 + if (reason != TASK_SWITCH_IRET) { 2154 + next_tss_desc.type |= (1 << 1); /* set busy flag */ 2155 + write_segment_descriptor(ctxt, ops, tss_selector, 2156 + &next_tss_desc); 2157 + } 2158 + 2159 + ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); 2160 + ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu); 2161 + ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); 2162 + 2163 + if (has_error_code) { 2164 + struct decode_cache *c = &ctxt->decode; 2165 + 2166 + c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; 2167 + c->lock_prefix = 0; 2168 + c->src.val = (unsigned long) error_code; 2169 + emulate_push(ctxt); 2170 + } 2171 + 2172 + return ret; 2173 + } 2174 + 2175 + int emulator_task_switch(struct x86_emulate_ctxt *ctxt, 2176 + struct x86_emulate_ops *ops, 2177 + u16 tss_selector, int reason, 2178 + bool has_error_code, u32 error_code) 2179 + { 2180 + struct decode_cache *c = &ctxt->decode; 2181 + int rc; 2182 + 2183 + memset(c, 0, sizeof(struct decode_cache)); 2184 + c->eip = ctxt->eip; 2185 + memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); 2186 + c->dst.type = OP_NONE; 2187 + 2188 + rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, 2189 + has_error_code, error_code); 2190 + 2191 + if (rc == X86EMUL_CONTINUE) { 2192 + memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); 2193 + kvm_rip_write(ctxt->vcpu, c->eip); 2194 + rc = writeback(ctxt, ops); 2195 + } 2196 + 2197 + return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; 2198 + } 2199 + 2200 + static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base, 2201 + int reg, struct operand *op) 2202 + { 2203 + struct decode_cache *c = &ctxt->decode; 2204 + int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; 2205 + 2206 + register_address_increment(c, &c->regs[reg], df * op->bytes); 2207 + op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]); 2208 + } 2209 + 2069 2210 int 2070 2211 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) 2071 2212 { 2072 - unsigned long memop = 0; 2073 2213 u64 msr_data; 2074 - unsigned long saved_eip = 0; 2075 2214 struct decode_cache *c = &ctxt->decode; 2076 - unsigned int port; 2077 - int io_dir_in; 2078 - int rc = 0; 2215 + int rc = X86EMUL_CONTINUE; 2216 + int saved_dst_type = c->dst.type; 2079 2217 2080 2218 ctxt->interruptibility = 0; 2081 2219 ··· 2482 1826 */ 2483 1827 2484 1828 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); 2485 - saved_eip = c->eip; 1829 + 1830 + if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { 1831 + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 1832 + goto done; 1833 + } 2486 1834 2487 1835 /* LOCK prefix is allowed only with some instructions */ 2488 - if (c->lock_prefix && !(c->d & Lock)) { 1836 + if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { 2489 1837 kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 2490 1838 goto done; 2491 1839 } 2492 1840 2493 1841 /* Privileged instruction can be executed only in CPL=0 */ 2494 - if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) { 1842 + if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { 2495 1843 kvm_inject_gp(ctxt->vcpu, 0); 2496 1844 goto done; 2497 1845 } 2498 1846 2499 - if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) 2500 - memop = c->modrm_ea; 2501 - 2502 1847 if (c->rep_prefix && (c->d & String)) { 1848 + ctxt->restart = true; 2503 1849 /* All REP prefixes have the same first termination condition */ 2504 - if (c->regs[VCPU_REGS_RCX] == 0) { 1850 + if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { 1851 + string_done: 1852 + ctxt->restart = false; 2505 1853 kvm_rip_write(ctxt->vcpu, c->eip); 2506 1854 goto done; 2507 1855 } ··· 2517 1857 * - if REPNE/REPNZ and ZF = 1 then done 2518 1858 */ 2519 1859 if ((c->b == 0xa6) || (c->b == 0xa7) || 2520 - (c->b == 0xae) || (c->b == 0xaf)) { 1860 + (c->b == 0xae) || (c->b == 0xaf)) { 2521 1861 if ((c->rep_prefix == REPE_PREFIX) && 2522 - ((ctxt->eflags & EFLG_ZF) == 0)) { 2523 - kvm_rip_write(ctxt->vcpu, c->eip); 2524 - goto done; 2525 - } 1862 + ((ctxt->eflags & EFLG_ZF) == 0)) 1863 + goto string_done; 2526 1864 if ((c->rep_prefix == REPNE_PREFIX) && 2527 - ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { 2528 - kvm_rip_write(ctxt->vcpu, c->eip); 2529 - goto done; 2530 - } 1865 + ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) 1866 + goto string_done; 2531 1867 } 2532 - c->regs[VCPU_REGS_RCX]--; 2533 - c->eip = kvm_rip_read(ctxt->vcpu); 1868 + c->eip = ctxt->eip; 2534 1869 } 2535 1870 2536 1871 if (c->src.type == OP_MEM) { 2537 - c->src.ptr = (unsigned long *)memop; 2538 - c->src.val = 0; 2539 1872 rc = ops->read_emulated((unsigned long)c->src.ptr, 2540 1873 &c->src.val, 2541 1874 c->src.bytes, ··· 2538 1885 c->src.orig_val = c->src.val; 2539 1886 } 2540 1887 1888 + if (c->src2.type == OP_MEM) { 1889 + rc = ops->read_emulated((unsigned long)c->src2.ptr, 1890 + &c->src2.val, 1891 + c->src2.bytes, 1892 + ctxt->vcpu); 1893 + if (rc != X86EMUL_CONTINUE) 1894 + goto done; 1895 + } 1896 + 2541 1897 if ((c->d & DstMask) == ImplicitOps) 2542 1898 goto special_insn; 2543 1899 2544 1900 2545 - if (c->dst.type == OP_MEM) { 2546 - c->dst.ptr = (unsigned long *)memop; 2547 - c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 2548 - c->dst.val = 0; 2549 - if (c->d & BitOp) { 2550 - unsigned long mask = ~(c->dst.bytes * 8 - 1); 2551 - 2552 - c->dst.ptr = (void *)c->dst.ptr + 2553 - (c->src.val & mask) / 8; 2554 - } 2555 - if (!(c->d & Mov)) { 2556 - /* optimisation - avoid slow emulated read */ 2557 - rc = ops->read_emulated((unsigned long)c->dst.ptr, 2558 - &c->dst.val, 2559 - c->dst.bytes, 2560 - ctxt->vcpu); 2561 - if (rc != X86EMUL_CONTINUE) 2562 - goto done; 2563 - } 1901 + if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { 1902 + /* optimisation - avoid slow emulated read if Mov */ 1903 + rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, 1904 + c->dst.bytes, ctxt->vcpu); 1905 + if (rc != X86EMUL_CONTINUE) 1906 + goto done; 2564 1907 } 2565 1908 c->dst.orig_val = c->dst.val; 2566 1909 ··· 2575 1926 break; 2576 1927 case 0x07: /* pop es */ 2577 1928 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); 2578 - if (rc != 0) 1929 + if (rc != X86EMUL_CONTINUE) 2579 1930 goto done; 2580 1931 break; 2581 1932 case 0x08 ... 0x0d: ··· 2594 1945 break; 2595 1946 case 0x17: /* pop ss */ 2596 1947 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); 2597 - if (rc != 0) 1948 + if (rc != X86EMUL_CONTINUE) 2598 1949 goto done; 2599 1950 break; 2600 1951 case 0x18 ... 0x1d: ··· 2606 1957 break; 2607 1958 case 0x1f: /* pop ds */ 2608 1959 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); 2609 - if (rc != 0) 1960 + if (rc != X86EMUL_CONTINUE) 2610 1961 goto done; 2611 1962 break; 2612 1963 case 0x20 ... 0x25: ··· 2637 1988 case 0x58 ... 0x5f: /* pop reg */ 2638 1989 pop_instruction: 2639 1990 rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); 2640 - if (rc != 0) 1991 + if (rc != X86EMUL_CONTINUE) 2641 1992 goto done; 2642 1993 break; 2643 1994 case 0x60: /* pusha */ ··· 2645 1996 break; 2646 1997 case 0x61: /* popa */ 2647 1998 rc = emulate_popa(ctxt, ops); 2648 - if (rc != 0) 1999 + if (rc != X86EMUL_CONTINUE) 2649 2000 goto done; 2650 2001 break; 2651 2002 case 0x63: /* movsxd */ ··· 2659 2010 break; 2660 2011 case 0x6c: /* insb */ 2661 2012 case 0x6d: /* insw/insd */ 2013 + c->dst.bytes = min(c->dst.bytes, 4u); 2662 2014 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], 2663 - (c->d & ByteOp) ? 1 : c->op_bytes)) { 2015 + c->dst.bytes)) { 2664 2016 kvm_inject_gp(ctxt->vcpu, 0); 2665 2017 goto done; 2666 2018 } 2667 - if (kvm_emulate_pio_string(ctxt->vcpu, 2668 - 1, 2669 - (c->d & ByteOp) ? 1 : c->op_bytes, 2670 - c->rep_prefix ? 2671 - address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, 2672 - (ctxt->eflags & EFLG_DF), 2673 - register_address(c, es_base(ctxt), 2674 - c->regs[VCPU_REGS_RDI]), 2675 - c->rep_prefix, 2676 - c->regs[VCPU_REGS_RDX]) == 0) { 2677 - c->eip = saved_eip; 2678 - return -1; 2679 - } 2680 - return 0; 2019 + if (!pio_in_emulated(ctxt, ops, c->dst.bytes, 2020 + c->regs[VCPU_REGS_RDX], &c->dst.val)) 2021 + goto done; /* IO is needed, skip writeback */ 2022 + break; 2681 2023 case 0x6e: /* outsb */ 2682 2024 case 0x6f: /* outsw/outsd */ 2025 + c->src.bytes = min(c->src.bytes, 4u); 2683 2026 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], 2684 - (c->d & ByteOp) ? 1 : c->op_bytes)) { 2027 + c->src.bytes)) { 2685 2028 kvm_inject_gp(ctxt->vcpu, 0); 2686 2029 goto done; 2687 2030 } 2688 - if (kvm_emulate_pio_string(ctxt->vcpu, 2689 - 0, 2690 - (c->d & ByteOp) ? 1 : c->op_bytes, 2691 - c->rep_prefix ? 2692 - address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, 2693 - (ctxt->eflags & EFLG_DF), 2694 - register_address(c, 2695 - seg_override_base(ctxt, c), 2696 - c->regs[VCPU_REGS_RSI]), 2697 - c->rep_prefix, 2698 - c->regs[VCPU_REGS_RDX]) == 0) { 2699 - c->eip = saved_eip; 2700 - return -1; 2701 - } 2702 - return 0; 2031 + ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], 2032 + &c->src.val, 1, ctxt->vcpu); 2033 + 2034 + c->dst.type = OP_NONE; /* nothing to writeback */ 2035 + break; 2703 2036 case 0x70 ... 0x7f: /* jcc (short) */ 2704 2037 if (test_cc(c->b, ctxt->eflags)) 2705 2038 jmp_rel(c, c->src.val); ··· 2738 2107 case 0x8c: { /* mov r/m, sreg */ 2739 2108 struct kvm_segment segreg; 2740 2109 2741 - if (c->modrm_reg <= 5) 2110 + if (c->modrm_reg <= VCPU_SREG_GS) 2742 2111 kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); 2743 2112 else { 2744 - printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n", 2745 - c->modrm); 2746 - goto cannot_emulate; 2113 + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 2114 + goto done; 2747 2115 } 2748 2116 c->dst.val = segreg.selector; 2749 2117 break; ··· 2762 2132 } 2763 2133 2764 2134 if (c->modrm_reg == VCPU_SREG_SS) 2765 - toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); 2135 + toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); 2766 2136 2767 - rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); 2137 + rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); 2768 2138 2769 2139 c->dst.type = OP_NONE; /* Disable writeback. */ 2770 2140 break; 2771 2141 } 2772 2142 case 0x8f: /* pop (sole member of Grp1a) */ 2773 2143 rc = emulate_grp1a(ctxt, ops); 2774 - if (rc != 0) 2144 + if (rc != X86EMUL_CONTINUE) 2775 2145 goto done; 2776 2146 break; 2777 2147 case 0x90: /* nop / xchg r8,rax */ ··· 2805 2175 c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; 2806 2176 break; 2807 2177 case 0xa4 ... 0xa5: /* movs */ 2808 - c->dst.type = OP_MEM; 2809 - c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 2810 - c->dst.ptr = (unsigned long *)register_address(c, 2811 - es_base(ctxt), 2812 - c->regs[VCPU_REGS_RDI]); 2813 - rc = ops->read_emulated(register_address(c, 2814 - seg_override_base(ctxt, c), 2815 - c->regs[VCPU_REGS_RSI]), 2816 - &c->dst.val, 2817 - c->dst.bytes, ctxt->vcpu); 2818 - if (rc != X86EMUL_CONTINUE) 2819 - goto done; 2820 - register_address_increment(c, &c->regs[VCPU_REGS_RSI], 2821 - (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 2822 - : c->dst.bytes); 2823 - register_address_increment(c, &c->regs[VCPU_REGS_RDI], 2824 - (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 2825 - : c->dst.bytes); 2826 - break; 2178 + goto mov; 2827 2179 case 0xa6 ... 0xa7: /* cmps */ 2828 - c->src.type = OP_NONE; /* Disable writeback. */ 2829 - c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 2830 - c->src.ptr = (unsigned long *)register_address(c, 2831 - seg_override_base(ctxt, c), 2832 - c->regs[VCPU_REGS_RSI]); 2833 - rc = ops->read_emulated((unsigned long)c->src.ptr, 2834 - &c->src.val, 2835 - c->src.bytes, 2836 - ctxt->vcpu); 2837 - if (rc != X86EMUL_CONTINUE) 2838 - goto done; 2839 - 2840 2180 c->dst.type = OP_NONE; /* Disable writeback. */ 2841 - c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 2842 - c->dst.ptr = (unsigned long *)register_address(c, 2843 - es_base(ctxt), 2844 - c->regs[VCPU_REGS_RDI]); 2845 - rc = ops->read_emulated((unsigned long)c->dst.ptr, 2846 - &c->dst.val, 2847 - c->dst.bytes, 2848 - ctxt->vcpu); 2849 - if (rc != X86EMUL_CONTINUE) 2850 - goto done; 2851 - 2852 2181 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); 2853 - 2854 - emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); 2855 - 2856 - register_address_increment(c, &c->regs[VCPU_REGS_RSI], 2857 - (ctxt->eflags & EFLG_DF) ? -c->src.bytes 2858 - : c->src.bytes); 2859 - register_address_increment(c, &c->regs[VCPU_REGS_RDI], 2860 - (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 2861 - : c->dst.bytes); 2862 - 2863 - break; 2182 + goto cmp; 2864 2183 case 0xaa ... 0xab: /* stos */ 2865 - c->dst.type = OP_MEM; 2866 - c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 2867 - c->dst.ptr = (unsigned long *)register_address(c, 2868 - es_base(ctxt), 2869 - c->regs[VCPU_REGS_RDI]); 2870 2184 c->dst.val = c->regs[VCPU_REGS_RAX]; 2871 - register_address_increment(c, &c->regs[VCPU_REGS_RDI], 2872 - (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 2873 - : c->dst.bytes); 2874 2185 break; 2875 2186 case 0xac ... 0xad: /* lods */ 2876 - c->dst.type = OP_REG; 2877 - c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 2878 - c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; 2879 - rc = ops->read_emulated(register_address(c, 2880 - seg_override_base(ctxt, c), 2881 - c->regs[VCPU_REGS_RSI]), 2882 - &c->dst.val, 2883 - c->dst.bytes, 2884 - ctxt->vcpu); 2885 - if (rc != X86EMUL_CONTINUE) 2886 - goto done; 2887 - register_address_increment(c, &c->regs[VCPU_REGS_RSI], 2888 - (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 2889 - : c->dst.bytes); 2890 - break; 2187 + goto mov; 2891 2188 case 0xae ... 0xaf: /* scas */ 2892 2189 DPRINTF("Urk! I don't handle SCAS.\n"); 2893 2190 goto cannot_emulate; ··· 2834 2277 break; 2835 2278 case 0xcb: /* ret far */ 2836 2279 rc = emulate_ret_far(ctxt, ops); 2837 - if (rc) 2280 + if (rc != X86EMUL_CONTINUE) 2838 2281 goto done; 2839 2282 break; 2840 2283 case 0xd0 ... 0xd1: /* Grp2 */ ··· 2847 2290 break; 2848 2291 case 0xe4: /* inb */ 2849 2292 case 0xe5: /* in */ 2850 - port = c->src.val; 2851 - io_dir_in = 1; 2852 - goto do_io; 2293 + goto do_io_in; 2853 2294 case 0xe6: /* outb */ 2854 2295 case 0xe7: /* out */ 2855 - port = c->src.val; 2856 - io_dir_in = 0; 2857 - goto do_io; 2296 + goto do_io_out; 2858 2297 case 0xe8: /* call (near) */ { 2859 2298 long int rel = c->src.val; 2860 2299 c->src.val = (unsigned long) c->eip; ··· 2861 2308 case 0xe9: /* jmp rel */ 2862 2309 goto jmp; 2863 2310 case 0xea: /* jmp far */ 2864 - if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 2865 - VCPU_SREG_CS)) 2311 + jump_far: 2312 + if (load_segment_descriptor(ctxt, ops, c->src2.val, 2313 + VCPU_SREG_CS)) 2866 2314 goto done; 2867 2315 2868 2316 c->eip = c->src.val; ··· 2875 2321 break; 2876 2322 case 0xec: /* in al,dx */ 2877 2323 case 0xed: /* in (e/r)ax,dx */ 2878 - port = c->regs[VCPU_REGS_RDX]; 2879 - io_dir_in = 1; 2880 - goto do_io; 2881 - case 0xee: /* out al,dx */ 2882 - case 0xef: /* out (e/r)ax,dx */ 2883 - port = c->regs[VCPU_REGS_RDX]; 2884 - io_dir_in = 0; 2885 - do_io: 2886 - if (!emulator_io_permited(ctxt, ops, port, 2887 - (c->d & ByteOp) ? 1 : c->op_bytes)) { 2324 + c->src.val = c->regs[VCPU_REGS_RDX]; 2325 + do_io_in: 2326 + c->dst.bytes = min(c->dst.bytes, 4u); 2327 + if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { 2888 2328 kvm_inject_gp(ctxt->vcpu, 0); 2889 2329 goto done; 2890 2330 } 2891 - if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, 2892 - (c->d & ByteOp) ? 1 : c->op_bytes, 2893 - port) != 0) { 2894 - c->eip = saved_eip; 2895 - goto cannot_emulate; 2331 + if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, 2332 + &c->dst.val)) 2333 + goto done; /* IO is needed */ 2334 + break; 2335 + case 0xee: /* out al,dx */ 2336 + case 0xef: /* out (e/r)ax,dx */ 2337 + c->src.val = c->regs[VCPU_REGS_RDX]; 2338 + do_io_out: 2339 + c->dst.bytes = min(c->dst.bytes, 4u); 2340 + if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { 2341 + kvm_inject_gp(ctxt->vcpu, 0); 2342 + goto done; 2896 2343 } 2344 + ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, 2345 + ctxt->vcpu); 2346 + c->dst.type = OP_NONE; /* Disable writeback. */ 2897 2347 break; 2898 2348 case 0xf4: /* hlt */ 2899 2349 ctxt->vcpu->arch.halt_request = 1; ··· 2908 2350 c->dst.type = OP_NONE; /* Disable writeback. */ 2909 2351 break; 2910 2352 case 0xf6 ... 0xf7: /* Grp3 */ 2911 - rc = emulate_grp3(ctxt, ops); 2912 - if (rc != 0) 2913 - goto done; 2353 + if (!emulate_grp3(ctxt, ops)) 2354 + goto cannot_emulate; 2914 2355 break; 2915 2356 case 0xf8: /* clc */ 2916 2357 ctxt->eflags &= ~EFLG_CF; 2917 2358 c->dst.type = OP_NONE; /* Disable writeback. */ 2918 2359 break; 2919 2360 case 0xfa: /* cli */ 2920 - if (emulator_bad_iopl(ctxt)) 2361 + if (emulator_bad_iopl(ctxt, ops)) 2921 2362 kvm_inject_gp(ctxt->vcpu, 0); 2922 2363 else { 2923 2364 ctxt->eflags &= ~X86_EFLAGS_IF; ··· 2924 2367 } 2925 2368 break; 2926 2369 case 0xfb: /* sti */ 2927 - if (emulator_bad_iopl(ctxt)) 2370 + if (emulator_bad_iopl(ctxt, ops)) 2928 2371 kvm_inject_gp(ctxt->vcpu, 0); 2929 2372 else { 2930 - toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); 2373 + toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); 2931 2374 ctxt->eflags |= X86_EFLAGS_IF; 2932 2375 c->dst.type = OP_NONE; /* Disable writeback. */ 2933 2376 } ··· 2940 2383 ctxt->eflags |= EFLG_DF; 2941 2384 c->dst.type = OP_NONE; /* Disable writeback. */ 2942 2385 break; 2943 - case 0xfe ... 0xff: /* Grp4/Grp5 */ 2386 + case 0xfe: /* Grp4 */ 2387 + grp45: 2944 2388 rc = emulate_grp45(ctxt, ops); 2945 - if (rc != 0) 2389 + if (rc != X86EMUL_CONTINUE) 2946 2390 goto done; 2947 2391 break; 2392 + case 0xff: /* Grp5 */ 2393 + if (c->modrm_reg == 5) 2394 + goto jump_far; 2395 + goto grp45; 2948 2396 } 2949 2397 2950 2398 writeback: 2951 2399 rc = writeback(ctxt, ops); 2952 - if (rc != 0) 2400 + if (rc != X86EMUL_CONTINUE) 2953 2401 goto done; 2402 + 2403 + /* 2404 + * restore dst type in case the decoding will be reused 2405 + * (happens for string instruction ) 2406 + */ 2407 + c->dst.type = saved_dst_type; 2408 + 2409 + if ((c->d & SrcMask) == SrcSI) 2410 + string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, 2411 + &c->src); 2412 + 2413 + if ((c->d & DstMask) == DstDI) 2414 + string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); 2415 + 2416 + if (c->rep_prefix && (c->d & String)) { 2417 + struct read_cache *rc = &ctxt->decode.io_read; 2418 + register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); 2419 + /* 2420 + * Re-enter guest when pio read ahead buffer is empty or, 2421 + * if it is not used, after each 1024 iteration. 2422 + */ 2423 + if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) || 2424 + (rc->end != 0 && rc->end == rc->pos)) 2425 + ctxt->restart = false; 2426 + } 2954 2427 2955 2428 /* Commit shadow register state. */ 2956 2429 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); 2957 2430 kvm_rip_write(ctxt->vcpu, c->eip); 2431 + ops->set_rflags(ctxt->vcpu, ctxt->eflags); 2958 2432 2959 2433 done: 2960 - if (rc == X86EMUL_UNHANDLEABLE) { 2961 - c->eip = saved_eip; 2962 - return -1; 2963 - } 2964 - return 0; 2434 + return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; 2965 2435 2966 2436 twobyte_insn: 2967 2437 switch (c->b) { ··· 3002 2418 goto cannot_emulate; 3003 2419 3004 2420 rc = kvm_fix_hypercall(ctxt->vcpu); 3005 - if (rc) 2421 + if (rc != X86EMUL_CONTINUE) 3006 2422 goto done; 3007 2423 3008 2424 /* Let the processor re-execute the fixed hypercall */ 3009 - c->eip = kvm_rip_read(ctxt->vcpu); 2425 + c->eip = ctxt->eip; 3010 2426 /* Disable writeback. */ 3011 2427 c->dst.type = OP_NONE; 3012 2428 break; 3013 2429 case 2: /* lgdt */ 3014 2430 rc = read_descriptor(ctxt, ops, c->src.ptr, 3015 2431 &size, &address, c->op_bytes); 3016 - if (rc) 2432 + if (rc != X86EMUL_CONTINUE) 3017 2433 goto done; 3018 2434 realmode_lgdt(ctxt->vcpu, size, address); 3019 2435 /* Disable writeback. */ ··· 3024 2440 switch (c->modrm_rm) { 3025 2441 case 1: 3026 2442 rc = kvm_fix_hypercall(ctxt->vcpu); 3027 - if (rc) 2443 + if (rc != X86EMUL_CONTINUE) 3028 2444 goto done; 3029 2445 break; 3030 2446 default: ··· 3034 2450 rc = read_descriptor(ctxt, ops, c->src.ptr, 3035 2451 &size, &address, 3036 2452 c->op_bytes); 3037 - if (rc) 2453 + if (rc != X86EMUL_CONTINUE) 3038 2454 goto done; 3039 2455 realmode_lidt(ctxt->vcpu, size, address); 3040 2456 } ··· 3043 2459 break; 3044 2460 case 4: /* smsw */ 3045 2461 c->dst.bytes = 2; 3046 - c->dst.val = realmode_get_cr(ctxt->vcpu, 0); 2462 + c->dst.val = ops->get_cr(0, ctxt->vcpu); 3047 2463 break; 3048 2464 case 6: /* lmsw */ 3049 - realmode_lmsw(ctxt->vcpu, (u16)c->src.val, 3050 - &ctxt->eflags); 2465 + ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) | 2466 + (c->src.val & 0x0f), ctxt->vcpu); 3051 2467 c->dst.type = OP_NONE; 3052 2468 break; 2469 + case 5: /* not defined */ 2470 + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 2471 + goto done; 3053 2472 case 7: /* invlpg*/ 3054 - emulate_invlpg(ctxt->vcpu, memop); 2473 + emulate_invlpg(ctxt->vcpu, c->modrm_ea); 3055 2474 /* Disable writeback. */ 3056 2475 c->dst.type = OP_NONE; 3057 2476 break; ··· 3080 2493 c->dst.type = OP_NONE; 3081 2494 break; 3082 2495 case 0x20: /* mov cr, reg */ 3083 - if (c->modrm_mod != 3) 3084 - goto cannot_emulate; 3085 - c->regs[c->modrm_rm] = 3086 - realmode_get_cr(ctxt->vcpu, c->modrm_reg); 2496 + switch (c->modrm_reg) { 2497 + case 1: 2498 + case 5 ... 7: 2499 + case 9 ... 15: 2500 + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 2501 + goto done; 2502 + } 2503 + c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); 3087 2504 c->dst.type = OP_NONE; /* no writeback */ 3088 2505 break; 3089 2506 case 0x21: /* mov from dr to reg */ 3090 - if (c->modrm_mod != 3) 3091 - goto cannot_emulate; 3092 - rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); 3093 - if (rc) 3094 - goto cannot_emulate; 2507 + if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && 2508 + (c->modrm_reg == 4 || c->modrm_reg == 5)) { 2509 + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 2510 + goto done; 2511 + } 2512 + emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); 3095 2513 c->dst.type = OP_NONE; /* no writeback */ 3096 2514 break; 3097 2515 case 0x22: /* mov reg, cr */ 3098 - if (c->modrm_mod != 3) 3099 - goto cannot_emulate; 3100 - realmode_set_cr(ctxt->vcpu, 3101 - c->modrm_reg, c->modrm_val, &ctxt->eflags); 2516 + ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu); 3102 2517 c->dst.type = OP_NONE; 3103 2518 break; 3104 2519 case 0x23: /* mov from reg to dr */ 3105 - if (c->modrm_mod != 3) 3106 - goto cannot_emulate; 3107 - rc = emulator_set_dr(ctxt, c->modrm_reg, 3108 - c->regs[c->modrm_rm]); 3109 - if (rc) 3110 - goto cannot_emulate; 2520 + if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && 2521 + (c->modrm_reg == 4 || c->modrm_reg == 5)) { 2522 + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 2523 + goto done; 2524 + } 2525 + emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]); 3111 2526 c->dst.type = OP_NONE; /* no writeback */ 3112 2527 break; 3113 2528 case 0x30: 3114 2529 /* wrmsr */ 3115 2530 msr_data = (u32)c->regs[VCPU_REGS_RAX] 3116 2531 | ((u64)c->regs[VCPU_REGS_RDX] << 32); 3117 - rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); 3118 - if (rc) { 2532 + if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { 3119 2533 kvm_inject_gp(ctxt->vcpu, 0); 3120 - c->eip = kvm_rip_read(ctxt->vcpu); 2534 + goto done; 3121 2535 } 3122 2536 rc = X86EMUL_CONTINUE; 3123 2537 c->dst.type = OP_NONE; 3124 2538 break; 3125 2539 case 0x32: 3126 2540 /* rdmsr */ 3127 - rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); 3128 - if (rc) { 2541 + if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { 3129 2542 kvm_inject_gp(ctxt->vcpu, 0); 3130 - c->eip = kvm_rip_read(ctxt->vcpu); 2543 + goto done; 3131 2544 } else { 3132 2545 c->regs[VCPU_REGS_RAX] = (u32)msr_data; 3133 2546 c->regs[VCPU_REGS_RDX] = msr_data >> 32; ··· 3164 2577 break; 3165 2578 case 0xa1: /* pop fs */ 3166 2579 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); 3167 - if (rc != 0) 2580 + if (rc != X86EMUL_CONTINUE) 3168 2581 goto done; 3169 2582 break; 3170 2583 case 0xa3: ··· 3183 2596 break; 3184 2597 case 0xa9: /* pop gs */ 3185 2598 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); 3186 - if (rc != 0) 2599 + if (rc != X86EMUL_CONTINUE) 3187 2600 goto done; 3188 2601 break; 3189 2602 case 0xab: ··· 3255 2668 (u64) c->src.val; 3256 2669 break; 3257 2670 case 0xc7: /* Grp9 (cmpxchg8b) */ 3258 - rc = emulate_grp9(ctxt, ops, memop); 3259 - if (rc != 0) 2671 + rc = emulate_grp9(ctxt, ops); 2672 + if (rc != X86EMUL_CONTINUE) 3260 2673 goto done; 3261 - c->dst.type = OP_NONE; 3262 2674 break; 3263 2675 } 3264 2676 goto writeback; 3265 2677 3266 2678 cannot_emulate: 3267 2679 DPRINTF("Cannot emulate %02x\n", c->b); 3268 - c->eip = saved_eip; 3269 2680 return -1; 3270 2681 }

+38 -15

arch/x86/kvm/i8259.c

··· 33 33 #include <linux/kvm_host.h> 34 34 #include "trace.h" 35 35 36 + static void pic_lock(struct kvm_pic *s) 37 + __acquires(&s->lock) 38 + { 39 + raw_spin_lock(&s->lock); 40 + } 41 + 42 + static void pic_unlock(struct kvm_pic *s) 43 + __releases(&s->lock) 44 + { 45 + bool wakeup = s->wakeup_needed; 46 + struct kvm_vcpu *vcpu; 47 + 48 + s->wakeup_needed = false; 49 + 50 + raw_spin_unlock(&s->lock); 51 + 52 + if (wakeup) { 53 + vcpu = s->kvm->bsp_vcpu; 54 + if (vcpu) 55 + kvm_vcpu_kick(vcpu); 56 + } 57 + } 58 + 36 59 static void pic_clear_isr(struct kvm_kpic_state *s, int irq) 37 60 { 38 61 s->isr &= ~(1 << irq); ··· 68 45 * Other interrupt may be delivered to PIC while lock is dropped but 69 46 * it should be safe since PIC state is already updated at this stage. 70 47 */ 71 - raw_spin_unlock(&s->pics_state->lock); 48 + pic_unlock(s->pics_state); 72 49 kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); 73 - raw_spin_lock(&s->pics_state->lock); 50 + pic_lock(s->pics_state); 74 51 } 75 52 76 53 void kvm_pic_clear_isr_ack(struct kvm *kvm) 77 54 { 78 55 struct kvm_pic *s = pic_irqchip(kvm); 79 56 80 - raw_spin_lock(&s->lock); 57 + pic_lock(s); 81 58 s->pics[0].isr_ack = 0xff; 82 59 s->pics[1].isr_ack = 0xff; 83 - raw_spin_unlock(&s->lock); 60 + pic_unlock(s); 84 61 } 85 62 86 63 /* ··· 181 158 182 159 void kvm_pic_update_irq(struct kvm_pic *s) 183 160 { 184 - raw_spin_lock(&s->lock); 161 + pic_lock(s); 185 162 pic_update_irq(s); 186 - raw_spin_unlock(&s->lock); 163 + pic_unlock(s); 187 164 } 188 165 189 166 int kvm_pic_set_irq(void *opaque, int irq, int level) ··· 191 168 struct kvm_pic *s = opaque; 192 169 int ret = -1; 193 170 194 - raw_spin_lock(&s->lock); 171 + pic_lock(s); 195 172 if (irq >= 0 && irq < PIC_NUM_PINS) { 196 173 ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); 197 174 pic_update_irq(s); 198 175 trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, 199 176 s->pics[irq >> 3].imr, ret == 0); 200 177 } 201 - raw_spin_unlock(&s->lock); 178 + pic_unlock(s); 202 179 203 180 return ret; 204 181 } ··· 228 205 int irq, irq2, intno; 229 206 struct kvm_pic *s = pic_irqchip(kvm); 230 207 231 - raw_spin_lock(&s->lock); 208 + pic_lock(s); 232 209 irq = pic_get_irq(&s->pics[0]); 233 210 if (irq >= 0) { 234 211 pic_intack(&s->pics[0], irq); ··· 253 230 intno = s->pics[0].irq_base + irq; 254 231 } 255 232 pic_update_irq(s); 256 - raw_spin_unlock(&s->lock); 233 + pic_unlock(s); 257 234 258 235 return intno; 259 236 } ··· 467 444 printk(KERN_ERR "PIC: non byte write\n"); 468 445 return 0; 469 446 } 470 - raw_spin_lock(&s->lock); 447 + pic_lock(s); 471 448 switch (addr) { 472 449 case 0x20: 473 450 case 0x21: ··· 480 457 elcr_ioport_write(&s->pics[addr & 1], addr, data); 481 458 break; 482 459 } 483 - raw_spin_unlock(&s->lock); 460 + pic_unlock(s); 484 461 return 0; 485 462 } 486 463 ··· 497 474 printk(KERN_ERR "PIC: non byte read\n"); 498 475 return 0; 499 476 } 500 - raw_spin_lock(&s->lock); 477 + pic_lock(s); 501 478 switch (addr) { 502 479 case 0x20: 503 480 case 0x21: ··· 511 488 break; 512 489 } 513 490 *(unsigned char *)val = data; 514 - raw_spin_unlock(&s->lock); 491 + pic_unlock(s); 515 492 return 0; 516 493 } 517 494 ··· 528 505 s->output = level; 529 506 if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { 530 507 s->pics[0].isr_ack &= ~(1 << irq); 531 - kvm_vcpu_kick(vcpu); 508 + s->wakeup_needed = true; 532 509 } 533 510 } 534 511

+1

arch/x86/kvm/irq.h

··· 63 63 64 64 struct kvm_pic { 65 65 raw_spinlock_t lock; 66 + bool wakeup_needed; 66 67 unsigned pending_acks; 67 68 struct kvm *kvm; 68 69 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */

+1 -3

arch/x86/kvm/kvm_timer.h

··· 10 10 }; 11 11 12 12 struct kvm_timer_ops { 13 - bool (*is_periodic)(struct kvm_timer *); 13 + bool (*is_periodic)(struct kvm_timer *); 14 14 }; 15 15 16 - 17 16 enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); 18 -

+114 -111

arch/x86/kvm/mmu.c

··· 148 148 149 149 #include <trace/events/kvm.h> 150 150 151 - #undef TRACE_INCLUDE_FILE 152 151 #define CREATE_TRACE_POINTS 153 152 #include "mmutrace.h" 154 153 ··· 173 174 shadow_walk_okay(&(_walker)); \ 174 175 shadow_walk_next(&(_walker))) 175 176 176 - 177 - struct kvm_unsync_walk { 178 - int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); 179 - }; 180 - 181 - typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); 177 + typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); 182 178 183 179 static struct kmem_cache *pte_chain_cache; 184 180 static struct kmem_cache *rmap_desc_cache; ··· 217 223 } 218 224 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); 219 225 220 - static int is_write_protection(struct kvm_vcpu *vcpu) 226 + static bool is_write_protection(struct kvm_vcpu *vcpu) 221 227 { 222 228 return kvm_read_cr0_bits(vcpu, X86_CR0_WP); 223 229 } ··· 321 327 page = alloc_page(GFP_KERNEL); 322 328 if (!page) 323 329 return -ENOMEM; 324 - set_page_private(page, 0); 325 330 cache->objects[cache->nobjs++] = page_address(page); 326 331 } 327 332 return 0; ··· 431 438 int i; 432 439 433 440 gfn = unalias_gfn(kvm, gfn); 441 + slot = gfn_to_memslot_unaliased(kvm, gfn); 434 442 for (i = PT_DIRECTORY_LEVEL; 435 443 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 436 - slot = gfn_to_memslot_unaliased(kvm, gfn); 437 444 write_count = slot_largepage_idx(gfn, slot, i); 438 445 *write_count -= 1; 439 446 WARN_ON(*write_count < 0); ··· 647 654 static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) 648 655 { 649 656 struct kvm_rmap_desc *desc; 650 - struct kvm_rmap_desc *prev_desc; 651 657 u64 *prev_spte; 652 658 int i; 653 659 ··· 658 666 return NULL; 659 667 } 660 668 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); 661 - prev_desc = NULL; 662 669 prev_spte = NULL; 663 670 while (desc) { 664 671 for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { ··· 785 794 int retval = 0; 786 795 struct kvm_memslots *slots; 787 796 788 - slots = rcu_dereference(kvm->memslots); 797 + slots = kvm_memslots(kvm); 789 798 790 799 for (i = 0; i < slots->nmemslots; i++) { 791 800 struct kvm_memory_slot *memslot = &slots->memslots[i]; ··· 916 925 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); 917 926 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 918 927 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 919 - INIT_LIST_HEAD(&sp->oos_link); 920 928 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); 921 929 sp->multimapped = 0; 922 930 sp->parent_pte = parent_pte; ··· 999 1009 } 1000 1010 1001 1011 1002 - static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 1003 - mmu_parent_walk_fn fn) 1012 + static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) 1004 1013 { 1005 1014 struct kvm_pte_chain *pte_chain; 1006 1015 struct hlist_node *node; ··· 1008 1019 1009 1020 if (!sp->multimapped && sp->parent_pte) { 1010 1021 parent_sp = page_header(__pa(sp->parent_pte)); 1011 - fn(vcpu, parent_sp); 1012 - mmu_parent_walk(vcpu, parent_sp, fn); 1022 + fn(parent_sp); 1023 + mmu_parent_walk(parent_sp, fn); 1013 1024 return; 1014 1025 } 1015 1026 hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) ··· 1017 1028 if (!pte_chain->parent_ptes[i]) 1018 1029 break; 1019 1030 parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); 1020 - fn(vcpu, parent_sp); 1021 - mmu_parent_walk(vcpu, parent_sp, fn); 1031 + fn(parent_sp); 1032 + mmu_parent_walk(parent_sp, fn); 1022 1033 } 1023 1034 } 1024 1035 ··· 1055 1066 } 1056 1067 } 1057 1068 1058 - static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1069 + static int unsync_walk_fn(struct kvm_mmu_page *sp) 1059 1070 { 1060 1071 kvm_mmu_update_parents_unsync(sp); 1061 1072 return 1; 1062 1073 } 1063 1074 1064 - static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu, 1065 - struct kvm_mmu_page *sp) 1075 + static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) 1066 1076 { 1067 - mmu_parent_walk(vcpu, sp, unsync_walk_fn); 1077 + mmu_parent_walk(sp, unsync_walk_fn); 1068 1078 kvm_mmu_update_parents_unsync(sp); 1069 1079 } 1070 1080 ··· 1189 1201 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1190 1202 { 1191 1203 WARN_ON(!sp->unsync); 1204 + trace_kvm_mmu_sync_page(sp); 1192 1205 sp->unsync = 0; 1193 1206 --kvm->stat.mmu_unsync; 1194 1207 } ··· 1198 1209 1199 1210 static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1200 1211 { 1201 - if (sp->role.glevels != vcpu->arch.mmu.root_level) { 1212 + if (sp->role.cr4_pae != !!is_pae(vcpu)) { 1202 1213 kvm_mmu_zap_page(vcpu->kvm, sp); 1203 1214 return 1; 1204 1215 } 1205 1216 1206 - trace_kvm_mmu_sync_page(sp); 1207 1217 if (rmap_write_protect(vcpu->kvm, sp->gfn)) 1208 1218 kvm_flush_remote_tlbs(vcpu->kvm); 1209 1219 kvm_unlink_unsync_page(vcpu->kvm, sp); ··· 1319 1331 role = vcpu->arch.mmu.base_role; 1320 1332 role.level = level; 1321 1333 role.direct = direct; 1334 + if (role.direct) 1335 + role.cr4_pae = 0; 1322 1336 role.access = access; 1323 1337 if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { 1324 1338 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); ··· 1341 1351 mmu_page_add_parent_pte(vcpu, sp, parent_pte); 1342 1352 if (sp->unsync_children) { 1343 1353 set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); 1344 - kvm_mmu_mark_parents_unsync(vcpu, sp); 1354 + kvm_mmu_mark_parents_unsync(sp); 1345 1355 } 1346 1356 trace_kvm_mmu_get_page(sp, false); 1347 1357 return sp; ··· 1563 1573 r = 0; 1564 1574 index = kvm_page_table_hashfn(gfn); 1565 1575 bucket = &kvm->arch.mmu_page_hash[index]; 1576 + restart: 1566 1577 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) 1567 1578 if (sp->gfn == gfn && !sp->role.direct) { 1568 1579 pgprintk("%s: gfn %lx role %x\n", __func__, gfn, 1569 1580 sp->role.word); 1570 1581 r = 1; 1571 1582 if (kvm_mmu_zap_page(kvm, sp)) 1572 - n = bucket->first; 1583 + goto restart; 1573 1584 } 1574 1585 return r; 1575 1586 } ··· 1584 1593 1585 1594 index = kvm_page_table_hashfn(gfn); 1586 1595 bucket = &kvm->arch.mmu_page_hash[index]; 1596 + restart: 1587 1597 hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { 1588 1598 if (sp->gfn == gfn && !sp->role.direct 1589 1599 && !sp->role.invalid) { 1590 1600 pgprintk("%s: zap %lx %x\n", 1591 1601 __func__, gfn, sp->role.word); 1592 1602 if (kvm_mmu_zap_page(kvm, sp)) 1593 - nn = bucket->first; 1603 + goto restart; 1594 1604 } 1595 1605 } 1596 1606 } ··· 1616 1624 if (pt[i] == shadow_notrap_nonpresent_pte) 1617 1625 __set_spte(&pt[i], shadow_trap_nonpresent_pte); 1618 1626 } 1619 - } 1620 - 1621 - struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) 1622 - { 1623 - struct page *page; 1624 - 1625 - gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); 1626 - 1627 - if (gpa == UNMAPPED_GVA) 1628 - return NULL; 1629 - 1630 - page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 1631 - 1632 - return page; 1633 1627 } 1634 1628 1635 1629 /* ··· 1730 1752 struct kvm_mmu_page *s; 1731 1753 struct hlist_node *node, *n; 1732 1754 1733 - trace_kvm_mmu_unsync_page(sp); 1734 1755 index = kvm_page_table_hashfn(sp->gfn); 1735 1756 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 1736 1757 /* don't unsync if pagetable is shadowed with multiple roles */ ··· 1739 1762 if (s->role.word != sp->role.word) 1740 1763 return 1; 1741 1764 } 1765 + trace_kvm_mmu_unsync_page(sp); 1742 1766 ++vcpu->kvm->stat.mmu_unsync; 1743 1767 sp->unsync = 1; 1744 1768 1745 - kvm_mmu_mark_parents_unsync(vcpu, sp); 1769 + kvm_mmu_mark_parents_unsync(sp); 1746 1770 1747 1771 mmu_convert_notrap(sp); 1748 1772 return 0; ··· 2059 2081 hpa_t root = vcpu->arch.mmu.root_hpa; 2060 2082 2061 2083 ASSERT(!VALID_PAGE(root)); 2062 - if (tdp_enabled) 2063 - direct = 1; 2064 2084 if (mmu_check_root(vcpu, root_gfn)) 2065 2085 return 1; 2086 + if (tdp_enabled) { 2087 + direct = 1; 2088 + root_gfn = 0; 2089 + } 2090 + spin_lock(&vcpu->kvm->mmu_lock); 2066 2091 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 2067 2092 PT64_ROOT_LEVEL, direct, 2068 2093 ACC_ALL, NULL); 2069 2094 root = __pa(sp->spt); 2070 2095 ++sp->root_count; 2096 + spin_unlock(&vcpu->kvm->mmu_lock); 2071 2097 vcpu->arch.mmu.root_hpa = root; 2072 2098 return 0; 2073 2099 } 2074 2100 direct = !is_paging(vcpu); 2075 - if (tdp_enabled) 2076 - direct = 1; 2077 2101 for (i = 0; i < 4; ++i) { 2078 2102 hpa_t root = vcpu->arch.mmu.pae_root[i]; 2079 2103 ··· 2091 2111 root_gfn = 0; 2092 2112 if (mmu_check_root(vcpu, root_gfn)) 2093 2113 return 1; 2114 + if (tdp_enabled) { 2115 + direct = 1; 2116 + root_gfn = i << 30; 2117 + } 2118 + spin_lock(&vcpu->kvm->mmu_lock); 2094 2119 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 2095 2120 PT32_ROOT_LEVEL, direct, 2096 2121 ACC_ALL, NULL); 2097 2122 root = __pa(sp->spt); 2098 2123 ++sp->root_count; 2124 + spin_unlock(&vcpu->kvm->mmu_lock); 2125 + 2099 2126 vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; 2100 2127 } 2101 2128 vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); ··· 2286 2299 /* no rsvd bits for 2 level 4K page table entries */ 2287 2300 context->rsvd_bits_mask[0][1] = 0; 2288 2301 context->rsvd_bits_mask[0][0] = 0; 2302 + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; 2303 + 2304 + if (!is_pse(vcpu)) { 2305 + context->rsvd_bits_mask[1][1] = 0; 2306 + break; 2307 + } 2308 + 2289 2309 if (is_cpuid_PSE36()) 2290 2310 /* 36bits PSE 4MB page */ 2291 2311 context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); 2292 2312 else 2293 2313 /* 32 bits PSE 4MB page */ 2294 2314 context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); 2295 - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; 2296 2315 break; 2297 2316 case PT32E_ROOT_LEVEL: 2298 2317 context->rsvd_bits_mask[0][2] = ··· 2311 2318 context->rsvd_bits_mask[1][1] = exb_bit_rsvd | 2312 2319 rsvd_bits(maxphyaddr, 62) | 2313 2320 rsvd_bits(13, 20); /* large page */ 2314 - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; 2321 + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; 2315 2322 break; 2316 2323 case PT64_ROOT_LEVEL: 2317 2324 context->rsvd_bits_mask[0][3] = exb_bit_rsvd | ··· 2329 2336 context->rsvd_bits_mask[1][1] = exb_bit_rsvd | 2330 2337 rsvd_bits(maxphyaddr, 51) | 2331 2338 rsvd_bits(13, 20); /* large page */ 2332 - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; 2339 + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; 2333 2340 break; 2334 2341 } 2335 2342 } ··· 2431 2438 else 2432 2439 r = paging32_init_context(vcpu); 2433 2440 2434 - vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level; 2441 + vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); 2442 + vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); 2435 2443 2436 2444 return r; 2437 2445 } ··· 2472 2478 goto out; 2473 2479 spin_lock(&vcpu->kvm->mmu_lock); 2474 2480 kvm_mmu_free_some_pages(vcpu); 2481 + spin_unlock(&vcpu->kvm->mmu_lock); 2475 2482 r = mmu_alloc_roots(vcpu); 2483 + spin_lock(&vcpu->kvm->mmu_lock); 2476 2484 mmu_sync_roots(vcpu); 2477 2485 spin_unlock(&vcpu->kvm->mmu_lock); 2478 2486 if (r) ··· 2523 2527 } 2524 2528 2525 2529 ++vcpu->kvm->stat.mmu_pte_updated; 2526 - if (sp->role.glevels == PT32_ROOT_LEVEL) 2530 + if (!sp->role.cr4_pae) 2527 2531 paging32_update_pte(vcpu, sp, spte, new); 2528 2532 else 2529 2533 paging64_update_pte(vcpu, sp, spte, new); ··· 2558 2562 } 2559 2563 2560 2564 static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 2561 - const u8 *new, int bytes) 2565 + u64 gpte) 2562 2566 { 2563 2567 gfn_t gfn; 2564 - int r; 2565 - u64 gpte = 0; 2566 2568 pfn_t pfn; 2567 2569 2568 - if (bytes != 4 && bytes != 8) 2569 - return; 2570 - 2571 - /* 2572 - * Assume that the pte write on a page table of the same type 2573 - * as the current vcpu paging mode. This is nearly always true 2574 - * (might be false while changing modes). Note it is verified later 2575 - * by update_pte(). 2576 - */ 2577 - if (is_pae(vcpu)) { 2578 - /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ 2579 - if ((bytes == 4) && (gpa % 4 == 0)) { 2580 - r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8); 2581 - if (r) 2582 - return; 2583 - memcpy((void *)&gpte + (gpa % 8), new, 4); 2584 - } else if ((bytes == 8) && (gpa % 8 == 0)) { 2585 - memcpy((void *)&gpte, new, 8); 2586 - } 2587 - } else { 2588 - if ((bytes == 4) && (gpa % 4 == 0)) 2589 - memcpy((void *)&gpte, new, 4); 2590 - } 2591 2570 if (!is_present_gpte(gpte)) 2592 2571 return; 2593 2572 gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; ··· 2611 2640 int flooded = 0; 2612 2641 int npte; 2613 2642 int r; 2643 + int invlpg_counter; 2614 2644 2615 2645 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); 2616 - mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); 2646 + 2647 + invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); 2648 + 2649 + /* 2650 + * Assume that the pte write on a page table of the same type 2651 + * as the current vcpu paging mode. This is nearly always true 2652 + * (might be false while changing modes). Note it is verified later 2653 + * by update_pte(). 2654 + */ 2655 + if ((is_pae(vcpu) && bytes == 4) || !new) { 2656 + /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ 2657 + if (is_pae(vcpu)) { 2658 + gpa &= ~(gpa_t)7; 2659 + bytes = 8; 2660 + } 2661 + r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); 2662 + if (r) 2663 + gentry = 0; 2664 + new = (const u8 *)&gentry; 2665 + } 2666 + 2667 + switch (bytes) { 2668 + case 4: 2669 + gentry = *(const u32 *)new; 2670 + break; 2671 + case 8: 2672 + gentry = *(const u64 *)new; 2673 + break; 2674 + default: 2675 + gentry = 0; 2676 + break; 2677 + } 2678 + 2679 + mmu_guess_page_from_pte_write(vcpu, gpa, gentry); 2617 2680 spin_lock(&vcpu->kvm->mmu_lock); 2681 + if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) 2682 + gentry = 0; 2618 2683 kvm_mmu_access_page(vcpu, gfn); 2619 2684 kvm_mmu_free_some_pages(vcpu); 2620 2685 ++vcpu->kvm->stat.mmu_pte_write; ··· 2669 2662 } 2670 2663 index = kvm_page_table_hashfn(gfn); 2671 2664 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 2665 + 2666 + restart: 2672 2667 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { 2673 2668 if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) 2674 2669 continue; 2675 - pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; 2670 + pte_size = sp->role.cr4_pae ? 8 : 4; 2676 2671 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); 2677 2672 misaligned |= bytes < 4; 2678 2673 if (misaligned || flooded) { ··· 2691 2682 pgprintk("misaligned: gpa %llx bytes %d role %x\n", 2692 2683 gpa, bytes, sp->role.word); 2693 2684 if (kvm_mmu_zap_page(vcpu->kvm, sp)) 2694 - n = bucket->first; 2685 + goto restart; 2695 2686 ++vcpu->kvm->stat.mmu_flooded; 2696 2687 continue; 2697 2688 } 2698 2689 page_offset = offset; 2699 2690 level = sp->role.level; 2700 2691 npte = 1; 2701 - if (sp->role.glevels == PT32_ROOT_LEVEL) { 2692 + if (!sp->role.cr4_pae) { 2702 2693 page_offset <<= 1; /* 32->64 */ 2703 2694 /* 2704 2695 * A 32-bit pde maps 4MB while the shadow pdes map ··· 2716 2707 continue; 2717 2708 } 2718 2709 spte = &sp->spt[page_offset / sizeof(*spte)]; 2719 - if ((gpa & (pte_size - 1)) || (bytes < pte_size)) { 2720 - gentry = 0; 2721 - r = kvm_read_guest_atomic(vcpu->kvm, 2722 - gpa & ~(u64)(pte_size - 1), 2723 - &gentry, pte_size); 2724 - new = (const void *)&gentry; 2725 - if (r < 0) 2726 - new = NULL; 2727 - } 2728 2710 while (npte--) { 2729 2711 entry = *spte; 2730 2712 mmu_pte_write_zap_pte(vcpu, sp, spte); 2731 - if (new) 2732 - mmu_pte_write_new_pte(vcpu, sp, spte, new); 2713 + if (gentry) 2714 + mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); 2733 2715 mmu_pte_write_flush_tlb(vcpu, entry, *spte); 2734 2716 ++spte; 2735 2717 } ··· 2900 2900 struct kvm_mmu_page *sp, *node; 2901 2901 2902 2902 spin_lock(&kvm->mmu_lock); 2903 + restart: 2903 2904 list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) 2904 2905 if (kvm_mmu_zap_page(kvm, sp)) 2905 - node = container_of(kvm->arch.active_mmu_pages.next, 2906 - struct kvm_mmu_page, link); 2906 + goto restart; 2907 + 2907 2908 spin_unlock(&kvm->mmu_lock); 2908 2909 2909 2910 kvm_flush_remote_tlbs(kvm); 2910 2911 } 2911 2912 2912 - static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) 2913 + static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) 2913 2914 { 2914 2915 struct kvm_mmu_page *page; 2915 2916 2916 2917 page = container_of(kvm->arch.active_mmu_pages.prev, 2917 2918 struct kvm_mmu_page, link); 2918 - kvm_mmu_zap_page(kvm, page); 2919 + return kvm_mmu_zap_page(kvm, page) + 1; 2919 2920 } 2920 2921 2921 2922 static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) ··· 2928 2927 spin_lock(&kvm_lock); 2929 2928 2930 2929 list_for_each_entry(kvm, &vm_list, vm_list) { 2931 - int npages, idx; 2930 + int npages, idx, freed_pages; 2932 2931 2933 2932 idx = srcu_read_lock(&kvm->srcu); 2934 2933 spin_lock(&kvm->mmu_lock); ··· 2936 2935 kvm->arch.n_free_mmu_pages; 2937 2936 cache_count += npages; 2938 2937 if (!kvm_freed && nr_to_scan > 0 && npages > 0) { 2939 - kvm_mmu_remove_one_alloc_mmu_page(kvm); 2940 - cache_count--; 2938 + freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm); 2939 + cache_count -= freed_pages; 2941 2940 kvm_freed = kvm; 2942 2941 } 2943 2942 nr_to_scan--; ··· 3012 3011 unsigned int nr_pages = 0; 3013 3012 struct kvm_memslots *slots; 3014 3013 3015 - slots = rcu_dereference(kvm->memslots); 3014 + slots = kvm_memslots(kvm); 3015 + 3016 3016 for (i = 0; i < slots->nmemslots; i++) 3017 3017 nr_pages += slots->memslots[i].npages; 3018 3018 ··· 3176 3174 } 3177 3175 3178 3176 3179 - typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp, 3180 - u64 *sptep); 3177 + typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep); 3181 3178 3182 3179 static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, 3183 3180 inspect_spte_fn fn) ··· 3192 3191 child = page_header(ent & PT64_BASE_ADDR_MASK); 3193 3192 __mmu_spte_walk(kvm, child, fn); 3194 3193 } else 3195 - fn(kvm, sp, &sp->spt[i]); 3194 + fn(kvm, &sp->spt[i]); 3196 3195 } 3197 3196 } 3198 3197 } ··· 3283 3282 3284 3283 static int count_rmaps(struct kvm_vcpu *vcpu) 3285 3284 { 3285 + struct kvm *kvm = vcpu->kvm; 3286 + struct kvm_memslots *slots; 3286 3287 int nmaps = 0; 3287 3288 int i, j, k, idx; 3288 3289 3289 3290 idx = srcu_read_lock(&kvm->srcu); 3290 - slots = rcu_dereference(kvm->memslots); 3291 + slots = kvm_memslots(kvm); 3291 3292 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 3292 3293 struct kvm_memory_slot *m = &slots->memslots[i]; 3293 3294 struct kvm_rmap_desc *d; ··· 3318 3315 return nmaps; 3319 3316 } 3320 3317 3321 - void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) 3318 + void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) 3322 3319 { 3323 3320 unsigned long *rmapp; 3324 3321 struct kvm_mmu_page *rev_sp; ··· 3334 3331 printk(KERN_ERR "%s: no memslot for gfn %ld\n", 3335 3332 audit_msg, gfn); 3336 3333 printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", 3337 - audit_msg, sptep - rev_sp->spt, 3334 + audit_msg, (long int)(sptep - rev_sp->spt), 3338 3335 rev_sp->gfn); 3339 3336 dump_stack(); 3340 3337 return; 3341 3338 } 3342 3339 3343 3340 rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], 3344 - is_large_pte(*sptep)); 3341 + rev_sp->role.level); 3345 3342 if (!*rmapp) { 3346 3343 if (!printk_ratelimit()) 3347 3344 return; ··· 3376 3373 continue; 3377 3374 if (!(ent & PT_WRITABLE_MASK)) 3378 3375 continue; 3379 - inspect_spte_has_rmap(vcpu->kvm, sp, &pt[i]); 3376 + inspect_spte_has_rmap(vcpu->kvm, &pt[i]); 3380 3377 } 3381 3378 } 3382 3379 return;

+36 -50

arch/x86/kvm/mmutrace.h

··· 6 6 7 7 #undef TRACE_SYSTEM 8 8 #define TRACE_SYSTEM kvmmmu 9 - #define TRACE_INCLUDE_PATH . 10 - #define TRACE_INCLUDE_FILE mmutrace 11 9 12 10 #define KVM_MMU_PAGE_FIELDS \ 13 11 __field(__u64, gfn) \ 14 12 __field(__u32, role) \ 15 13 __field(__u32, root_count) \ 16 - __field(__u32, unsync) 14 + __field(bool, unsync) 17 15 18 16 #define KVM_MMU_PAGE_ASSIGN(sp) \ 19 17 __entry->gfn = sp->gfn; \ ··· 28 30 \ 29 31 role.word = __entry->role; \ 30 32 \ 31 - trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge" \ 33 + trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s" \ 32 34 " %snxe root %u %s%c", \ 33 - __entry->gfn, role.level, role.glevels, \ 35 + __entry->gfn, role.level, \ 36 + role.cr4_pae ? " pae" : "", \ 34 37 role.quadrant, \ 35 38 role.direct ? " direct" : "", \ 36 39 access_str[role.access], \ 37 40 role.invalid ? " invalid" : "", \ 38 - role.cr4_pge ? "" : "!", \ 39 41 role.nxe ? "" : "!", \ 40 42 __entry->root_count, \ 41 43 __entry->unsync ? "unsync" : "sync", 0); \ ··· 92 94 TP_printk("pte %llx level %u", __entry->pte, __entry->level) 93 95 ); 94 96 95 - /* We set a pte accessed bit */ 96 - TRACE_EVENT( 97 - kvm_mmu_set_accessed_bit, 97 + DECLARE_EVENT_CLASS(kvm_mmu_set_bit_class, 98 + 98 99 TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), 100 + 99 101 TP_ARGS(table_gfn, index, size), 100 102 101 103 TP_STRUCT__entry( 102 104 __field(__u64, gpa) 103 - ), 105 + ), 104 106 105 107 TP_fast_assign( 106 108 __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) ··· 110 112 TP_printk("gpa %llx", __entry->gpa) 111 113 ); 112 114 113 - /* We set a pte dirty bit */ 114 - TRACE_EVENT( 115 - kvm_mmu_set_dirty_bit, 115 + /* We set a pte accessed bit */ 116 + DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_accessed_bit, 117 + 116 118 TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), 117 - TP_ARGS(table_gfn, index, size), 118 119 119 - TP_STRUCT__entry( 120 - __field(__u64, gpa) 121 - ), 120 + TP_ARGS(table_gfn, index, size) 121 + ); 122 122 123 - TP_fast_assign( 124 - __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) 125 - + index * size; 126 - ), 123 + /* We set a pte dirty bit */ 124 + DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_dirty_bit, 127 125 128 - TP_printk("gpa %llx", __entry->gpa) 126 + TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), 127 + 128 + TP_ARGS(table_gfn, index, size) 129 129 ); 130 130 131 131 TRACE_EVENT( ··· 162 166 __entry->created ? "new" : "existing") 163 167 ); 164 168 165 - TRACE_EVENT( 166 - kvm_mmu_sync_page, 169 + DECLARE_EVENT_CLASS(kvm_mmu_page_class, 170 + 167 171 TP_PROTO(struct kvm_mmu_page *sp), 168 172 TP_ARGS(sp), 169 173 170 174 TP_STRUCT__entry( 171 175 KVM_MMU_PAGE_FIELDS 172 - ), 176 + ), 173 177 174 178 TP_fast_assign( 175 179 KVM_MMU_PAGE_ASSIGN(sp) 176 - ), 180 + ), 177 181 178 182 TP_printk("%s", KVM_MMU_PAGE_PRINTK()) 179 183 ); 180 184 181 - TRACE_EVENT( 182 - kvm_mmu_unsync_page, 185 + DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_sync_page, 183 186 TP_PROTO(struct kvm_mmu_page *sp), 184 - TP_ARGS(sp), 185 187 186 - TP_STRUCT__entry( 187 - KVM_MMU_PAGE_FIELDS 188 - ), 189 - 190 - TP_fast_assign( 191 - KVM_MMU_PAGE_ASSIGN(sp) 192 - ), 193 - 194 - TP_printk("%s", KVM_MMU_PAGE_PRINTK()) 188 + TP_ARGS(sp) 195 189 ); 196 190 197 - TRACE_EVENT( 198 - kvm_mmu_zap_page, 191 + DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page, 199 192 TP_PROTO(struct kvm_mmu_page *sp), 200 - TP_ARGS(sp), 201 193 202 - TP_STRUCT__entry( 203 - KVM_MMU_PAGE_FIELDS 204 - ), 205 - 206 - TP_fast_assign( 207 - KVM_MMU_PAGE_ASSIGN(sp) 208 - ), 209 - 210 - TP_printk("%s", KVM_MMU_PAGE_PRINTK()) 194 + TP_ARGS(sp) 211 195 ); 212 196 197 + DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_zap_page, 198 + TP_PROTO(struct kvm_mmu_page *sp), 199 + 200 + TP_ARGS(sp) 201 + ); 213 202 #endif /* _TRACE_KVMMMU_H */ 203 + 204 + #undef TRACE_INCLUDE_PATH 205 + #define TRACE_INCLUDE_PATH . 206 + #undef TRACE_INCLUDE_FILE 207 + #define TRACE_INCLUDE_FILE mmutrace 214 208 215 209 /* This part must be outside protection */ 216 210 #include <trace/define_trace.h>

+36 -10

arch/x86/kvm/paging_tmpl.h

··· 170 170 goto access_error; 171 171 172 172 #if PTTYPE == 64 173 - if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK)) 173 + if (fetch_fault && (pte & PT64_NX_MASK)) 174 174 goto access_error; 175 175 #endif 176 176 ··· 190 190 191 191 if ((walker->level == PT_PAGE_TABLE_LEVEL) || 192 192 ((walker->level == PT_DIRECTORY_LEVEL) && 193 - (pte & PT_PAGE_SIZE_MASK) && 193 + is_large_pte(pte) && 194 194 (PTTYPE == 64 || is_pse(vcpu))) || 195 195 ((walker->level == PT_PDPE_LEVEL) && 196 - (pte & PT_PAGE_SIZE_MASK) && 196 + is_large_pte(pte) && 197 197 is_long_mode(vcpu))) { 198 198 int lvl = walker->level; 199 199 ··· 258 258 pt_element_t gpte; 259 259 unsigned pte_access; 260 260 pfn_t pfn; 261 + u64 new_spte; 261 262 262 263 gpte = *(const pt_element_t *)pte; 263 264 if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { 264 - if (!is_present_gpte(gpte)) 265 - __set_spte(spte, shadow_notrap_nonpresent_pte); 265 + if (!is_present_gpte(gpte)) { 266 + if (page->unsync) 267 + new_spte = shadow_trap_nonpresent_pte; 268 + else 269 + new_spte = shadow_notrap_nonpresent_pte; 270 + __set_spte(spte, new_spte); 271 + } 266 272 return; 267 273 } 268 274 pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); ··· 463 457 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) 464 458 { 465 459 struct kvm_shadow_walk_iterator iterator; 460 + gpa_t pte_gpa = -1; 466 461 int level; 467 462 u64 *sptep; 468 463 int need_flush = 0; ··· 474 467 level = iterator.level; 475 468 sptep = iterator.sptep; 476 469 477 - if (level == PT_PAGE_TABLE_LEVEL || 478 - ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || 479 - ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { 470 + if (is_last_spte(*sptep, level)) { 471 + struct kvm_mmu_page *sp = page_header(__pa(sptep)); 472 + int offset, shift; 473 + 474 + shift = PAGE_SHIFT - 475 + (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; 476 + offset = sp->role.quadrant << shift; 477 + 478 + pte_gpa = (sp->gfn << PAGE_SHIFT) + offset; 479 + pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); 480 480 481 481 if (is_shadow_present_pte(*sptep)) { 482 482 rmap_remove(vcpu->kvm, sptep); ··· 501 487 502 488 if (need_flush) 503 489 kvm_flush_remote_tlbs(vcpu->kvm); 490 + 491 + atomic_inc(&vcpu->kvm->arch.invlpg_counter); 492 + 504 493 spin_unlock(&vcpu->kvm->mmu_lock); 494 + 495 + if (pte_gpa == -1) 496 + return; 497 + 498 + if (mmu_topup_memory_caches(vcpu)) 499 + return; 500 + kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); 505 501 } 506 502 507 503 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, ··· 575 551 { 576 552 int i, offset, nr_present; 577 553 bool reset_host_protection; 554 + gpa_t first_pte_gpa; 578 555 579 556 offset = nr_present = 0; 580 557 581 558 if (PTTYPE == 32) 582 559 offset = sp->role.quadrant << PT64_LEVEL_BITS; 560 + 561 + first_pte_gpa = gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); 583 562 584 563 for (i = 0; i < PT64_ENT_PER_PAGE; i++) { 585 564 unsigned pte_access; ··· 593 566 if (!is_shadow_present_pte(sp->spt[i])) 594 567 continue; 595 568 596 - pte_gpa = gfn_to_gpa(sp->gfn); 597 - pte_gpa += (i+offset) * sizeof(pt_element_t); 569 + pte_gpa = first_pte_gpa + i * sizeof(pt_element_t); 598 570 599 571 if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, 600 572 sizeof(pt_element_t)))

+623 -329

arch/x86/kvm/svm.c

··· 44 44 #define SEG_TYPE_LDT 2 45 45 #define SEG_TYPE_BUSY_TSS16 3 46 46 47 - #define SVM_FEATURE_NPT (1 << 0) 48 - #define SVM_FEATURE_LBRV (1 << 1) 49 - #define SVM_FEATURE_SVML (1 << 2) 50 - #define SVM_FEATURE_PAUSE_FILTER (1 << 10) 47 + #define SVM_FEATURE_NPT (1 << 0) 48 + #define SVM_FEATURE_LBRV (1 << 1) 49 + #define SVM_FEATURE_SVML (1 << 2) 50 + #define SVM_FEATURE_NRIP (1 << 3) 51 + #define SVM_FEATURE_PAUSE_FILTER (1 << 10) 51 52 52 53 #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ 53 54 #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ ··· 71 70 struct nested_state { 72 71 struct vmcb *hsave; 73 72 u64 hsave_msr; 73 + u64 vm_cr_msr; 74 74 u64 vmcb; 75 75 76 76 /* These are the merged vectors */ ··· 79 77 80 78 /* gpa pointers to the real vectors */ 81 79 u64 vmcb_msrpm; 80 + u64 vmcb_iopm; 82 81 83 82 /* A VMEXIT is required but not yet emulated */ 84 83 bool exit_required; ··· 93 90 u64 intercept; 94 91 95 92 }; 93 + 94 + #define MSRPM_OFFSETS 16 95 + static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; 96 96 97 97 struct vcpu_svm { 98 98 struct kvm_vcpu vcpu; ··· 116 110 struct nested_state nested; 117 111 118 112 bool nmi_singlestep; 113 + 114 + unsigned int3_injected; 115 + unsigned long int3_rip; 116 + }; 117 + 118 + #define MSR_INVALID 0xffffffffU 119 + 120 + static struct svm_direct_access_msrs { 121 + u32 index; /* Index of the MSR */ 122 + bool always; /* True if intercept is always on */ 123 + } direct_access_msrs[] = { 124 + { .index = MSR_K6_STAR, .always = true }, 125 + { .index = MSR_IA32_SYSENTER_CS, .always = true }, 126 + #ifdef CONFIG_X86_64 127 + { .index = MSR_GS_BASE, .always = true }, 128 + { .index = MSR_FS_BASE, .always = true }, 129 + { .index = MSR_KERNEL_GS_BASE, .always = true }, 130 + { .index = MSR_LSTAR, .always = true }, 131 + { .index = MSR_CSTAR, .always = true }, 132 + { .index = MSR_SYSCALL_MASK, .always = true }, 133 + #endif 134 + { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, 135 + { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, 136 + { .index = MSR_IA32_LASTINTFROMIP, .always = false }, 137 + { .index = MSR_IA32_LASTINTTOIP, .always = false }, 138 + { .index = MSR_INVALID, .always = false }, 119 139 }; 120 140 121 141 /* enable NPT for AMD64 and X86 with PAE */ 122 142 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 123 143 static bool npt_enabled = true; 124 144 #else 125 - static bool npt_enabled = false; 145 + static bool npt_enabled; 126 146 #endif 127 147 static int npt = 1; 128 148 ··· 161 129 static void svm_complete_interrupts(struct vcpu_svm *svm); 162 130 163 131 static int nested_svm_exit_handled(struct vcpu_svm *svm); 132 + static int nested_svm_intercept(struct vcpu_svm *svm); 164 133 static int nested_svm_vmexit(struct vcpu_svm *svm); 165 134 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 166 135 bool has_error_code, u32 error_code); ··· 196 163 struct kvm_ldttss_desc { 197 164 u16 limit0; 198 165 u16 base0; 199 - unsigned base1 : 8, type : 5, dpl : 2, p : 1; 200 - unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; 166 + unsigned base1:8, type:5, dpl:2, p:1; 167 + unsigned limit1:4, zero0:3, g:1, base2:8; 201 168 u32 base3; 202 169 u32 zero1; 203 170 } __attribute__((packed)); ··· 227 194 #define MSRS_RANGE_SIZE 2048 228 195 #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) 229 196 197 + static u32 svm_msrpm_offset(u32 msr) 198 + { 199 + u32 offset; 200 + int i; 201 + 202 + for (i = 0; i < NUM_MSR_MAPS; i++) { 203 + if (msr < msrpm_ranges[i] || 204 + msr >= msrpm_ranges[i] + MSRS_IN_RANGE) 205 + continue; 206 + 207 + offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */ 208 + offset += (i * MSRS_RANGE_SIZE); /* add range offset */ 209 + 210 + /* Now we have the u8 offset - but need the u32 offset */ 211 + return offset / 4; 212 + } 213 + 214 + /* MSR not in any range */ 215 + return MSR_INVALID; 216 + } 217 + 230 218 #define MAX_INST_SIZE 15 231 219 232 220 static inline u32 svm_has(u32 feat) ··· 267 213 268 214 static inline void invlpga(unsigned long addr, u32 asid) 269 215 { 270 - asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); 216 + asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); 271 217 } 272 218 273 219 static inline void force_new_asid(struct kvm_vcpu *vcpu) ··· 289 235 vcpu->arch.efer = efer; 290 236 } 291 237 292 - static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, 293 - bool has_error_code, u32 error_code) 294 - { 295 - struct vcpu_svm *svm = to_svm(vcpu); 296 - 297 - /* If we are within a nested VM we'd better #VMEXIT and let the 298 - guest handle the exception */ 299 - if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) 300 - return; 301 - 302 - svm->vmcb->control.event_inj = nr 303 - | SVM_EVTINJ_VALID 304 - | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) 305 - | SVM_EVTINJ_TYPE_EXEPT; 306 - svm->vmcb->control.event_inj_err = error_code; 307 - } 308 - 309 238 static int is_external_interrupt(u32 info) 310 239 { 311 240 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; ··· 301 264 u32 ret = 0; 302 265 303 266 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) 304 - ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; 267 + ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; 305 268 return ret & mask; 306 269 } 307 270 ··· 320 283 { 321 284 struct vcpu_svm *svm = to_svm(vcpu); 322 285 286 + if (svm->vmcb->control.next_rip != 0) 287 + svm->next_rip = svm->vmcb->control.next_rip; 288 + 323 289 if (!svm->next_rip) { 324 290 if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != 325 291 EMULATE_DONE) ··· 335 295 336 296 kvm_rip_write(vcpu, svm->next_rip); 337 297 svm_set_interrupt_shadow(vcpu, 0); 298 + } 299 + 300 + static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, 301 + bool has_error_code, u32 error_code, 302 + bool reinject) 303 + { 304 + struct vcpu_svm *svm = to_svm(vcpu); 305 + 306 + /* 307 + * If we are within a nested VM we'd better #VMEXIT and let the guest 308 + * handle the exception 309 + */ 310 + if (!reinject && 311 + nested_svm_check_exception(svm, nr, has_error_code, error_code)) 312 + return; 313 + 314 + if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) { 315 + unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); 316 + 317 + /* 318 + * For guest debugging where we have to reinject #BP if some 319 + * INT3 is guest-owned: 320 + * Emulate nRIP by moving RIP forward. Will fail if injection 321 + * raises a fault that is not intercepted. Still better than 322 + * failing in all cases. 323 + */ 324 + skip_emulated_instruction(&svm->vcpu); 325 + rip = kvm_rip_read(&svm->vcpu); 326 + svm->int3_rip = rip + svm->vmcb->save.cs.base; 327 + svm->int3_injected = rip - old_rip; 328 + } 329 + 330 + svm->vmcb->control.event_inj = nr 331 + | SVM_EVTINJ_VALID 332 + | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) 333 + | SVM_EVTINJ_TYPE_EXEPT; 334 + svm->vmcb->control.event_inj_err = error_code; 338 335 } 339 336 340 337 static int has_svm(void) ··· 396 319 397 320 struct svm_cpu_data *sd; 398 321 uint64_t efer; 399 - struct descriptor_table gdt_descr; 322 + struct desc_ptr gdt_descr; 400 323 struct desc_struct *gdt; 401 324 int me = raw_smp_processor_id(); 402 325 ··· 421 344 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; 422 345 sd->next_asid = sd->max_asid + 1; 423 346 424 - kvm_get_gdt(&gdt_descr); 425 - gdt = (struct desc_struct *)gdt_descr.base; 347 + native_store_gdt(&gdt_descr); 348 + gdt = (struct desc_struct *)gdt_descr.address; 426 349 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); 427 350 428 351 wrmsrl(MSR_EFER, efer | EFER_SVME); ··· 468 391 469 392 } 470 393 471 - static void set_msr_interception(u32 *msrpm, unsigned msr, 472 - int read, int write) 394 + static bool valid_msr_intercept(u32 index) 473 395 { 474 396 int i; 475 397 476 - for (i = 0; i < NUM_MSR_MAPS; i++) { 477 - if (msr >= msrpm_ranges[i] && 478 - msr < msrpm_ranges[i] + MSRS_IN_RANGE) { 479 - u32 msr_offset = (i * MSRS_IN_RANGE + msr - 480 - msrpm_ranges[i]) * 2; 398 + for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) 399 + if (direct_access_msrs[i].index == index) 400 + return true; 481 401 482 - u32 *base = msrpm + (msr_offset / 32); 483 - u32 msr_shift = msr_offset % 32; 484 - u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); 485 - *base = (*base & ~(0x3 << msr_shift)) | 486 - (mask << msr_shift); 487 - return; 488 - } 489 - } 490 - BUG(); 402 + return false; 403 + } 404 + 405 + static void set_msr_interception(u32 *msrpm, unsigned msr, 406 + int read, int write) 407 + { 408 + u8 bit_read, bit_write; 409 + unsigned long tmp; 410 + u32 offset; 411 + 412 + /* 413 + * If this warning triggers extend the direct_access_msrs list at the 414 + * beginning of the file 415 + */ 416 + WARN_ON(!valid_msr_intercept(msr)); 417 + 418 + offset = svm_msrpm_offset(msr); 419 + bit_read = 2 * (msr & 0x0f); 420 + bit_write = 2 * (msr & 0x0f) + 1; 421 + tmp = msrpm[offset]; 422 + 423 + BUG_ON(offset == MSR_INVALID); 424 + 425 + read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp); 426 + write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp); 427 + 428 + msrpm[offset] = tmp; 491 429 } 492 430 493 431 static void svm_vcpu_init_msrpm(u32 *msrpm) 494 432 { 433 + int i; 434 + 495 435 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); 496 436 497 - #ifdef CONFIG_X86_64 498 - set_msr_interception(msrpm, MSR_GS_BASE, 1, 1); 499 - set_msr_interception(msrpm, MSR_FS_BASE, 1, 1); 500 - set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1); 501 - set_msr_interception(msrpm, MSR_LSTAR, 1, 1); 502 - set_msr_interception(msrpm, MSR_CSTAR, 1, 1); 503 - set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1); 504 - #endif 505 - set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); 506 - set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); 437 + for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { 438 + if (!direct_access_msrs[i].always) 439 + continue; 440 + 441 + set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); 442 + } 443 + } 444 + 445 + static void add_msr_offset(u32 offset) 446 + { 447 + int i; 448 + 449 + for (i = 0; i < MSRPM_OFFSETS; ++i) { 450 + 451 + /* Offset already in list? */ 452 + if (msrpm_offsets[i] == offset) 453 + return; 454 + 455 + /* Slot used by another offset? */ 456 + if (msrpm_offsets[i] != MSR_INVALID) 457 + continue; 458 + 459 + /* Add offset to list */ 460 + msrpm_offsets[i] = offset; 461 + 462 + return; 463 + } 464 + 465 + /* 466 + * If this BUG triggers the msrpm_offsets table has an overflow. Just 467 + * increase MSRPM_OFFSETS in this case. 468 + */ 469 + BUG(); 470 + } 471 + 472 + static void init_msrpm_offsets(void) 473 + { 474 + int i; 475 + 476 + memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets)); 477 + 478 + for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { 479 + u32 offset; 480 + 481 + offset = svm_msrpm_offset(direct_access_msrs[i].index); 482 + BUG_ON(offset == MSR_INVALID); 483 + 484 + add_msr_offset(offset); 485 + } 507 486 } 508 487 509 488 static void svm_enable_lbrv(struct vcpu_svm *svm) ··· 599 466 iopm_va = page_address(iopm_pages); 600 467 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); 601 468 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; 469 + 470 + init_msrpm_offsets(); 602 471 603 472 if (boot_cpu_has(X86_FEATURE_NX)) 604 473 kvm_enable_efer_bits(EFER_NX); ··· 658 523 { 659 524 seg->selector = 0; 660 525 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | 661 - SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ 526 + SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ 662 527 seg->limit = 0xffff; 663 528 seg->base = 0; 664 529 } ··· 678 543 679 544 svm->vcpu.fpu_active = 1; 680 545 681 - control->intercept_cr_read = INTERCEPT_CR0_MASK | 546 + control->intercept_cr_read = INTERCEPT_CR0_MASK | 682 547 INTERCEPT_CR3_MASK | 683 548 INTERCEPT_CR4_MASK; 684 549 685 - control->intercept_cr_write = INTERCEPT_CR0_MASK | 550 + control->intercept_cr_write = INTERCEPT_CR0_MASK | 686 551 INTERCEPT_CR3_MASK | 687 552 INTERCEPT_CR4_MASK | 688 553 INTERCEPT_CR8_MASK; 689 554 690 - control->intercept_dr_read = INTERCEPT_DR0_MASK | 555 + control->intercept_dr_read = INTERCEPT_DR0_MASK | 691 556 INTERCEPT_DR1_MASK | 692 557 INTERCEPT_DR2_MASK | 693 558 INTERCEPT_DR3_MASK | ··· 696 561 INTERCEPT_DR6_MASK | 697 562 INTERCEPT_DR7_MASK; 698 563 699 - control->intercept_dr_write = INTERCEPT_DR0_MASK | 564 + control->intercept_dr_write = INTERCEPT_DR0_MASK | 700 565 INTERCEPT_DR1_MASK | 701 566 INTERCEPT_DR2_MASK | 702 567 INTERCEPT_DR3_MASK | ··· 710 575 (1 << MC_VECTOR); 711 576 712 577 713 - control->intercept = (1ULL << INTERCEPT_INTR) | 578 + control->intercept = (1ULL << INTERCEPT_INTR) | 714 579 (1ULL << INTERCEPT_NMI) | 715 580 (1ULL << INTERCEPT_SMI) | 716 581 (1ULL << INTERCEPT_SELECTIVE_CR0) | ··· 771 636 save->rip = 0x0000fff0; 772 637 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; 773 638 774 - /* This is the guest-visible cr0 value. 639 + /* 640 + * This is the guest-visible cr0 value. 775 641 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. 776 642 */ 777 643 svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; ··· 865 729 svm_vcpu_init_msrpm(svm->msrpm); 866 730 867 731 svm->nested.msrpm = page_address(nested_msrpm_pages); 732 + svm_vcpu_init_msrpm(svm->nested.msrpm); 868 733 869 734 svm->vmcb = page_address(page); 870 735 clear_page(svm->vmcb); ··· 1019 882 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 1020 883 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; 1021 884 1022 - /* AMD's VMCB does not have an explicit unusable field, so emulate it 885 + /* 886 + * AMD's VMCB does not have an explicit unusable field, so emulate it 1023 887 * for cross vendor migration purposes by "not present" 1024 888 */ 1025 889 var->unusable = !var->present || (var->type == 0); ··· 1056 918 var->type |= 0x1; 1057 919 break; 1058 920 case VCPU_SREG_SS: 1059 - /* On AMD CPUs sometimes the DB bit in the segment 921 + /* 922 + * On AMD CPUs sometimes the DB bit in the segment 1060 923 * descriptor is left as 1, although the whole segment has 1061 924 * been made unusable. Clear it here to pass an Intel VMX 1062 925 * entry check when cross vendor migrating. ··· 1075 936 return save->cpl; 1076 937 } 1077 938 1078 - static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 939 + static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1079 940 { 1080 941 struct vcpu_svm *svm = to_svm(vcpu); 1081 942 1082 - dt->limit = svm->vmcb->save.idtr.limit; 1083 - dt->base = svm->vmcb->save.idtr.base; 943 + dt->size = svm->vmcb->save.idtr.limit; 944 + dt->address = svm->vmcb->save.idtr.base; 1084 945 } 1085 946 1086 - static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 947 + static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1087 948 { 1088 949 struct vcpu_svm *svm = to_svm(vcpu); 1089 950 1090 - svm->vmcb->save.idtr.limit = dt->limit; 1091 - svm->vmcb->save.idtr.base = dt->base ; 951 + svm->vmcb->save.idtr.limit = dt->size; 952 + svm->vmcb->save.idtr.base = dt->address ; 1092 953 } 1093 954 1094 - static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 955 + static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1095 956 { 1096 957 struct vcpu_svm *svm = to_svm(vcpu); 1097 958 1098 - dt->limit = svm->vmcb->save.gdtr.limit; 1099 - dt->base = svm->vmcb->save.gdtr.base; 959 + dt->size = svm->vmcb->save.gdtr.limit; 960 + dt->address = svm->vmcb->save.gdtr.base; 1100 961 } 1101 962 1102 - static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 963 + static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1103 964 { 1104 965 struct vcpu_svm *svm = to_svm(vcpu); 1105 966 1106 - svm->vmcb->save.gdtr.limit = dt->limit; 1107 - svm->vmcb->save.gdtr.base = dt->base ; 967 + svm->vmcb->save.gdtr.limit = dt->size; 968 + svm->vmcb->save.gdtr.base = dt->address ; 1108 969 } 1109 970 1110 971 static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) ··· 1117 978 1118 979 static void update_cr0_intercept(struct vcpu_svm *svm) 1119 980 { 981 + struct vmcb *vmcb = svm->vmcb; 1120 982 ulong gcr0 = svm->vcpu.arch.cr0; 1121 983 u64 *hcr0 = &svm->vmcb->save.cr0; 1122 984 ··· 1129 989 1130 990 1131 991 if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { 1132 - svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; 1133 - svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; 992 + vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; 993 + vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; 994 + if (is_nested(svm)) { 995 + struct vmcb *hsave = svm->nested.hsave; 996 + 997 + hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; 998 + hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; 999 + vmcb->control.intercept_cr_read |= svm->nested.intercept_cr_read; 1000 + vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write; 1001 + } 1134 1002 } else { 1135 1003 svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; 1136 1004 svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; 1005 + if (is_nested(svm)) { 1006 + struct vmcb *hsave = svm->nested.hsave; 1007 + 1008 + hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK; 1009 + hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK; 1010 + } 1137 1011 } 1138 1012 } 1139 1013 1140 1014 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1141 1015 { 1142 1016 struct vcpu_svm *svm = to_svm(vcpu); 1017 + 1018 + if (is_nested(svm)) { 1019 + /* 1020 + * We are here because we run in nested mode, the host kvm 1021 + * intercepts cr0 writes but the l1 hypervisor does not. 1022 + * But the L1 hypervisor may intercept selective cr0 writes. 1023 + * This needs to be checked here. 1024 + */ 1025 + unsigned long old, new; 1026 + 1027 + /* Remove bits that would trigger a real cr0 write intercept */ 1028 + old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; 1029 + new = cr0 & SVM_CR0_SELECTIVE_MASK; 1030 + 1031 + if (old == new) { 1032 + /* cr0 write with ts and mp unchanged */ 1033 + svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; 1034 + if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) 1035 + return; 1036 + } 1037 + } 1143 1038 1144 1039 #ifdef CONFIG_X86_64 1145 1040 if (vcpu->arch.efer & EFER_LME) { ··· 1309 1134 svm->vmcb->control.asid = sd->next_asid++; 1310 1135 } 1311 1136 1312 - static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest) 1137 + static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) 1313 1138 { 1314 1139 struct vcpu_svm *svm = to_svm(vcpu); 1315 1140 1316 - switch (dr) { 1317 - case 0 ... 3: 1318 - *dest = vcpu->arch.db[dr]; 1319 - break; 1320 - case 4: 1321 - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 1322 - return EMULATE_FAIL; /* will re-inject UD */ 1323 - /* fall through */ 1324 - case 6: 1325 - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 1326 - *dest = vcpu->arch.dr6; 1327 - else 1328 - *dest = svm->vmcb->save.dr6; 1329 - break; 1330 - case 5: 1331 - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 1332 - return EMULATE_FAIL; /* will re-inject UD */ 1333 - /* fall through */ 1334 - case 7: 1335 - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 1336 - *dest = vcpu->arch.dr7; 1337 - else 1338 - *dest = svm->vmcb->save.dr7; 1339 - break; 1340 - } 1341 - 1342 - return EMULATE_DONE; 1343 - } 1344 - 1345 - static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value) 1346 - { 1347 - struct vcpu_svm *svm = to_svm(vcpu); 1348 - 1349 - switch (dr) { 1350 - case 0 ... 3: 1351 - vcpu->arch.db[dr] = value; 1352 - if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 1353 - vcpu->arch.eff_db[dr] = value; 1354 - break; 1355 - case 4: 1356 - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 1357 - return EMULATE_FAIL; /* will re-inject UD */ 1358 - /* fall through */ 1359 - case 6: 1360 - vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; 1361 - break; 1362 - case 5: 1363 - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 1364 - return EMULATE_FAIL; /* will re-inject UD */ 1365 - /* fall through */ 1366 - case 7: 1367 - vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; 1368 - if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 1369 - svm->vmcb->save.dr7 = vcpu->arch.dr7; 1370 - vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); 1371 - } 1372 - break; 1373 - } 1374 - 1375 - return EMULATE_DONE; 1141 + svm->vmcb->save.dr7 = value; 1376 1142 } 1377 1143 1378 1144 static int pf_interception(struct vcpu_svm *svm) ··· 1350 1234 } 1351 1235 1352 1236 if (svm->vcpu.guest_debug & 1353 - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){ 1237 + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { 1354 1238 kvm_run->exit_reason = KVM_EXIT_DEBUG; 1355 1239 kvm_run->debug.arch.pc = 1356 1240 svm->vmcb->save.cs.base + svm->vmcb->save.rip; ··· 1384 1268 static void svm_fpu_activate(struct kvm_vcpu *vcpu) 1385 1269 { 1386 1270 struct vcpu_svm *svm = to_svm(vcpu); 1387 - svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 1271 + u32 excp; 1272 + 1273 + if (is_nested(svm)) { 1274 + u32 h_excp, n_excp; 1275 + 1276 + h_excp = svm->nested.hsave->control.intercept_exceptions; 1277 + n_excp = svm->nested.intercept_exceptions; 1278 + h_excp &= ~(1 << NM_VECTOR); 1279 + excp = h_excp | n_excp; 1280 + } else { 1281 + excp = svm->vmcb->control.intercept_exceptions; 1282 + excp &= ~(1 << NM_VECTOR); 1283 + } 1284 + 1285 + svm->vmcb->control.intercept_exceptions = excp; 1286 + 1388 1287 svm->vcpu.fpu_active = 1; 1389 1288 update_cr0_intercept(svm); 1390 1289 } ··· 1440 1309 1441 1310 static int io_interception(struct vcpu_svm *svm) 1442 1311 { 1312 + struct kvm_vcpu *vcpu = &svm->vcpu; 1443 1313 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ 1444 1314 int size, in, string; 1445 1315 unsigned port; 1446 1316 1447 1317 ++svm->vcpu.stat.io_exits; 1448 - 1449 - svm->next_rip = svm->vmcb->control.exit_info_2; 1450 - 1451 1318 string = (io_info & SVM_IOIO_STR_MASK) != 0; 1452 - 1453 - if (string) { 1454 - if (emulate_instruction(&svm->vcpu, 1455 - 0, 0, 0) == EMULATE_DO_MMIO) 1456 - return 0; 1457 - return 1; 1458 - } 1459 - 1460 1319 in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 1320 + if (string || in) 1321 + return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); 1322 + 1461 1323 port = io_info >> 16; 1462 1324 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; 1463 - 1325 + svm->next_rip = svm->vmcb->control.exit_info_2; 1464 1326 skip_emulated_instruction(&svm->vcpu); 1465 - return kvm_emulate_pio(&svm->vcpu, in, size, port); 1327 + 1328 + return kvm_fast_pio_out(vcpu, size, port); 1466 1329 } 1467 1330 1468 1331 static int nmi_interception(struct vcpu_svm *svm) ··· 1509 1384 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 1510 1385 bool has_error_code, u32 error_code) 1511 1386 { 1387 + int vmexit; 1388 + 1512 1389 if (!is_nested(svm)) 1513 1390 return 0; 1514 1391 ··· 1519 1392 svm->vmcb->control.exit_info_1 = error_code; 1520 1393 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; 1521 1394 1522 - return nested_svm_exit_handled(svm); 1395 + vmexit = nested_svm_intercept(svm); 1396 + if (vmexit == NESTED_EXIT_DONE) 1397 + svm->nested.exit_required = true; 1398 + 1399 + return vmexit; 1523 1400 } 1524 1401 1525 - static inline int nested_svm_intr(struct vcpu_svm *svm) 1402 + /* This function returns true if it is save to enable the irq window */ 1403 + static inline bool nested_svm_intr(struct vcpu_svm *svm) 1526 1404 { 1527 1405 if (!is_nested(svm)) 1528 - return 0; 1406 + return true; 1529 1407 1530 1408 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) 1531 - return 0; 1409 + return true; 1532 1410 1533 1411 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) 1534 - return 0; 1412 + return false; 1535 1413 1536 - svm->vmcb->control.exit_code = SVM_EXIT_INTR; 1414 + svm->vmcb->control.exit_code = SVM_EXIT_INTR; 1415 + svm->vmcb->control.exit_info_1 = 0; 1416 + svm->vmcb->control.exit_info_2 = 0; 1537 1417 1538 1418 if (svm->nested.intercept & 1ULL) { 1539 1419 /* ··· 1551 1417 */ 1552 1418 svm->nested.exit_required = true; 1553 1419 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); 1554 - return 1; 1420 + return false; 1555 1421 } 1556 1422 1557 - return 0; 1423 + return true; 1558 1424 } 1559 1425 1560 - static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) 1426 + /* This function returns true if it is save to enable the nmi window */ 1427 + static inline bool nested_svm_nmi(struct vcpu_svm *svm) 1428 + { 1429 + if (!is_nested(svm)) 1430 + return true; 1431 + 1432 + if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) 1433 + return true; 1434 + 1435 + svm->vmcb->control.exit_code = SVM_EXIT_NMI; 1436 + svm->nested.exit_required = true; 1437 + 1438 + return false; 1439 + } 1440 + 1441 + static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) 1561 1442 { 1562 1443 struct page *page; 1444 + 1445 + might_sleep(); 1563 1446 1564 1447 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); 1565 1448 if (is_error_page(page)) 1566 1449 goto error; 1567 1450 1568 - return kmap_atomic(page, idx); 1451 + *_page = page; 1452 + 1453 + return kmap(page); 1569 1454 1570 1455 error: 1571 1456 kvm_release_page_clean(page); ··· 1593 1440 return NULL; 1594 1441 } 1595 1442 1596 - static void nested_svm_unmap(void *addr, enum km_type idx) 1443 + static void nested_svm_unmap(struct page *page) 1597 1444 { 1598 - struct page *page; 1599 - 1600 - if (!addr) 1601 - return; 1602 - 1603 - page = kmap_atomic_to_page(addr); 1604 - 1605 - kunmap_atomic(addr, idx); 1445 + kunmap(page); 1606 1446 kvm_release_page_dirty(page); 1607 1447 } 1608 1448 1609 - static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm) 1449 + static int nested_svm_intercept_ioio(struct vcpu_svm *svm) 1610 1450 { 1611 - u32 param = svm->vmcb->control.exit_info_1 & 1; 1612 - u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 1613 - bool ret = false; 1614 - u32 t0, t1; 1615 - u8 *msrpm; 1451 + unsigned port; 1452 + u8 val, bit; 1453 + u64 gpa; 1454 + 1455 + if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) 1456 + return NESTED_EXIT_HOST; 1457 + 1458 + port = svm->vmcb->control.exit_info_1 >> 16; 1459 + gpa = svm->nested.vmcb_iopm + (port / 8); 1460 + bit = port % 8; 1461 + val = 0; 1462 + 1463 + if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1)) 1464 + val &= (1 << bit); 1465 + 1466 + return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 1467 + } 1468 + 1469 + static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) 1470 + { 1471 + u32 offset, msr, value; 1472 + int write, mask; 1616 1473 1617 1474 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) 1618 - return false; 1475 + return NESTED_EXIT_HOST; 1619 1476 1620 - msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); 1477 + msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 1478 + offset = svm_msrpm_offset(msr); 1479 + write = svm->vmcb->control.exit_info_1 & 1; 1480 + mask = 1 << ((2 * (msr & 0xf)) + write); 1621 1481 1622 - if (!msrpm) 1623 - goto out; 1482 + if (offset == MSR_INVALID) 1483 + return NESTED_EXIT_DONE; 1624 1484 1625 - switch (msr) { 1626 - case 0 ... 0x1fff: 1627 - t0 = (msr * 2) % 8; 1628 - t1 = msr / 8; 1629 - break; 1630 - case 0xc0000000 ... 0xc0001fff: 1631 - t0 = (8192 + msr - 0xc0000000) * 2; 1632 - t1 = (t0 / 8); 1633 - t0 %= 8; 1634 - break; 1635 - case 0xc0010000 ... 0xc0011fff: 1636 - t0 = (16384 + msr - 0xc0010000) * 2; 1637 - t1 = (t0 / 8); 1638 - t0 %= 8; 1639 - break; 1640 - default: 1641 - ret = true; 1642 - goto out; 1643 - } 1485 + /* Offset is in 32 bit units but need in 8 bit units */ 1486 + offset *= 4; 1644 1487 1645 - ret = msrpm[t1] & ((1 << param) << t0); 1488 + if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4)) 1489 + return NESTED_EXIT_DONE; 1646 1490 1647 - out: 1648 - nested_svm_unmap(msrpm, KM_USER0); 1649 - 1650 - return ret; 1491 + return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 1651 1492 } 1652 1493 1653 1494 static int nested_svm_exit_special(struct vcpu_svm *svm) ··· 1651 1504 switch (exit_code) { 1652 1505 case SVM_EXIT_INTR: 1653 1506 case SVM_EXIT_NMI: 1507 + case SVM_EXIT_EXCP_BASE + MC_VECTOR: 1654 1508 return NESTED_EXIT_HOST; 1655 - /* For now we are always handling NPFs when using them */ 1656 1509 case SVM_EXIT_NPF: 1510 + /* For now we are always handling NPFs when using them */ 1657 1511 if (npt_enabled) 1658 1512 return NESTED_EXIT_HOST; 1659 1513 break; 1660 - /* When we're shadowing, trap PFs */ 1661 1514 case SVM_EXIT_EXCP_BASE + PF_VECTOR: 1515 + /* When we're shadowing, trap PFs */ 1662 1516 if (!npt_enabled) 1663 1517 return NESTED_EXIT_HOST; 1518 + break; 1519 + case SVM_EXIT_EXCP_BASE + NM_VECTOR: 1520 + nm_interception(svm); 1664 1521 break; 1665 1522 default: 1666 1523 break; ··· 1676 1525 /* 1677 1526 * If this function returns true, this #vmexit was already handled 1678 1527 */ 1679 - static int nested_svm_exit_handled(struct vcpu_svm *svm) 1528 + static int nested_svm_intercept(struct vcpu_svm *svm) 1680 1529 { 1681 1530 u32 exit_code = svm->vmcb->control.exit_code; 1682 1531 int vmexit = NESTED_EXIT_HOST; ··· 1684 1533 switch (exit_code) { 1685 1534 case SVM_EXIT_MSR: 1686 1535 vmexit = nested_svm_exit_handled_msr(svm); 1536 + break; 1537 + case SVM_EXIT_IOIO: 1538 + vmexit = nested_svm_intercept_ioio(svm); 1687 1539 break; 1688 1540 case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { 1689 1541 u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); ··· 1718 1564 vmexit = NESTED_EXIT_DONE; 1719 1565 break; 1720 1566 } 1567 + case SVM_EXIT_ERR: { 1568 + vmexit = NESTED_EXIT_DONE; 1569 + break; 1570 + } 1721 1571 default: { 1722 1572 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); 1723 1573 if (svm->nested.intercept & exit_bits) ··· 1729 1571 } 1730 1572 } 1731 1573 1732 - if (vmexit == NESTED_EXIT_DONE) { 1574 + return vmexit; 1575 + } 1576 + 1577 + static int nested_svm_exit_handled(struct vcpu_svm *svm) 1578 + { 1579 + int vmexit; 1580 + 1581 + vmexit = nested_svm_intercept(svm); 1582 + 1583 + if (vmexit == NESTED_EXIT_DONE) 1733 1584 nested_svm_vmexit(svm); 1734 - } 1735 1585 1736 1586 return vmexit; 1737 1587 } ··· 1781 1615 struct vmcb *nested_vmcb; 1782 1616 struct vmcb *hsave = svm->nested.hsave; 1783 1617 struct vmcb *vmcb = svm->vmcb; 1618 + struct page *page; 1784 1619 1785 1620 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, 1786 1621 vmcb->control.exit_info_1, ··· 1789 1622 vmcb->control.exit_int_info, 1790 1623 vmcb->control.exit_int_info_err); 1791 1624 1792 - nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); 1625 + nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); 1793 1626 if (!nested_vmcb) 1794 1627 return 1; 1628 + 1629 + /* Exit nested SVM mode */ 1630 + svm->nested.vmcb = 0; 1795 1631 1796 1632 /* Give the current vmcb to the guest */ 1797 1633 disable_gif(svm); ··· 1805 1635 nested_vmcb->save.ds = vmcb->save.ds; 1806 1636 nested_vmcb->save.gdtr = vmcb->save.gdtr; 1807 1637 nested_vmcb->save.idtr = vmcb->save.idtr; 1808 - if (npt_enabled) 1809 - nested_vmcb->save.cr3 = vmcb->save.cr3; 1638 + nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); 1639 + nested_vmcb->save.cr3 = svm->vcpu.arch.cr3; 1810 1640 nested_vmcb->save.cr2 = vmcb->save.cr2; 1641 + nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; 1811 1642 nested_vmcb->save.rflags = vmcb->save.rflags; 1812 1643 nested_vmcb->save.rip = vmcb->save.rip; 1813 1644 nested_vmcb->save.rsp = vmcb->save.rsp; ··· 1880 1709 svm->vmcb->save.cpl = 0; 1881 1710 svm->vmcb->control.exit_int_info = 0; 1882 1711 1883 - /* Exit nested SVM mode */ 1884 - svm->nested.vmcb = 0; 1885 - 1886 - nested_svm_unmap(nested_vmcb, KM_USER0); 1712 + nested_svm_unmap(page); 1887 1713 1888 1714 kvm_mmu_reset_context(&svm->vcpu); 1889 1715 kvm_mmu_load(&svm->vcpu); ··· 1890 1722 1891 1723 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) 1892 1724 { 1893 - u32 *nested_msrpm; 1725 + /* 1726 + * This function merges the msr permission bitmaps of kvm and the 1727 + * nested vmcb. It is omptimized in that it only merges the parts where 1728 + * the kvm msr permission bitmap may contain zero bits 1729 + */ 1894 1730 int i; 1895 1731 1896 - nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); 1897 - if (!nested_msrpm) 1898 - return false; 1732 + if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) 1733 + return true; 1899 1734 1900 - for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) 1901 - svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; 1735 + for (i = 0; i < MSRPM_OFFSETS; i++) { 1736 + u32 value, p; 1737 + u64 offset; 1738 + 1739 + if (msrpm_offsets[i] == 0xffffffff) 1740 + break; 1741 + 1742 + p = msrpm_offsets[i]; 1743 + offset = svm->nested.vmcb_msrpm + (p * 4); 1744 + 1745 + if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4)) 1746 + return false; 1747 + 1748 + svm->nested.msrpm[p] = svm->msrpm[p] | value; 1749 + } 1902 1750 1903 1751 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); 1904 - 1905 - nested_svm_unmap(nested_msrpm, KM_USER0); 1906 1752 1907 1753 return true; 1908 1754 } ··· 1926 1744 struct vmcb *nested_vmcb; 1927 1745 struct vmcb *hsave = svm->nested.hsave; 1928 1746 struct vmcb *vmcb = svm->vmcb; 1747 + struct page *page; 1748 + u64 vmcb_gpa; 1929 1749 1930 - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); 1750 + vmcb_gpa = svm->vmcb->save.rax; 1751 + 1752 + nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 1931 1753 if (!nested_vmcb) 1932 1754 return false; 1933 1755 1934 - /* nested_vmcb is our indicator if nested SVM is activated */ 1935 - svm->nested.vmcb = svm->vmcb->save.rax; 1936 - 1937 - trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, 1756 + trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa, 1938 1757 nested_vmcb->save.rip, 1939 1758 nested_vmcb->control.int_ctl, 1940 1759 nested_vmcb->control.event_inj, 1941 1760 nested_vmcb->control.nested_ctl); 1942 1761 1762 + trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read, 1763 + nested_vmcb->control.intercept_cr_write, 1764 + nested_vmcb->control.intercept_exceptions, 1765 + nested_vmcb->control.intercept); 1766 + 1943 1767 /* Clear internal status */ 1944 1768 kvm_clear_exception_queue(&svm->vcpu); 1945 1769 kvm_clear_interrupt_queue(&svm->vcpu); 1946 1770 1947 - /* Save the old vmcb, so we don't need to pick what we save, but 1948 - can restore everything when a VMEXIT occurs */ 1771 + /* 1772 + * Save the old vmcb, so we don't need to pick what we save, but can 1773 + * restore everything when a VMEXIT occurs 1774 + */ 1949 1775 hsave->save.es = vmcb->save.es; 1950 1776 hsave->save.cs = vmcb->save.cs; 1951 1777 hsave->save.ss = vmcb->save.ss; ··· 1993 1803 if (npt_enabled) { 1994 1804 svm->vmcb->save.cr3 = nested_vmcb->save.cr3; 1995 1805 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; 1996 - } else { 1806 + } else 1997 1807 kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); 1998 - kvm_mmu_reset_context(&svm->vcpu); 1999 - } 1808 + 1809 + /* Guest paging mode is active - reset mmu */ 1810 + kvm_mmu_reset_context(&svm->vcpu); 1811 + 2000 1812 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; 2001 1813 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); 2002 1814 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); 2003 1815 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); 1816 + 2004 1817 /* In case we don't even reach vcpu_run, the fields are not updated */ 2005 1818 svm->vmcb->save.rax = nested_vmcb->save.rax; 2006 1819 svm->vmcb->save.rsp = nested_vmcb->save.rsp; ··· 2012 1819 svm->vmcb->save.dr6 = nested_vmcb->save.dr6; 2013 1820 svm->vmcb->save.cpl = nested_vmcb->save.cpl; 2014 1821 2015 - /* We don't want a nested guest to be more powerful than the guest, 2016 - so all intercepts are ORed */ 2017 - svm->vmcb->control.intercept_cr_read |= 2018 - nested_vmcb->control.intercept_cr_read; 2019 - svm->vmcb->control.intercept_cr_write |= 2020 - nested_vmcb->control.intercept_cr_write; 2021 - svm->vmcb->control.intercept_dr_read |= 2022 - nested_vmcb->control.intercept_dr_read; 2023 - svm->vmcb->control.intercept_dr_write |= 2024 - nested_vmcb->control.intercept_dr_write; 2025 - svm->vmcb->control.intercept_exceptions |= 2026 - nested_vmcb->control.intercept_exceptions; 2027 - 2028 - svm->vmcb->control.intercept |= nested_vmcb->control.intercept; 2029 - 2030 - svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa; 1822 + svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; 1823 + svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; 2031 1824 2032 1825 /* cache intercepts */ 2033 1826 svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; ··· 2030 1851 else 2031 1852 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; 2032 1853 1854 + if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { 1855 + /* We only want the cr8 intercept bits of the guest */ 1856 + svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK; 1857 + svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; 1858 + } 1859 + 1860 + /* We don't want to see VMMCALLs from a nested guest */ 1861 + svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL); 1862 + 1863 + /* 1864 + * We don't want a nested guest to be more powerful than the guest, so 1865 + * all intercepts are ORed 1866 + */ 1867 + svm->vmcb->control.intercept_cr_read |= 1868 + nested_vmcb->control.intercept_cr_read; 1869 + svm->vmcb->control.intercept_cr_write |= 1870 + nested_vmcb->control.intercept_cr_write; 1871 + svm->vmcb->control.intercept_dr_read |= 1872 + nested_vmcb->control.intercept_dr_read; 1873 + svm->vmcb->control.intercept_dr_write |= 1874 + nested_vmcb->control.intercept_dr_write; 1875 + svm->vmcb->control.intercept_exceptions |= 1876 + nested_vmcb->control.intercept_exceptions; 1877 + 1878 + svm->vmcb->control.intercept |= nested_vmcb->control.intercept; 1879 + 1880 + svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; 2033 1881 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; 2034 1882 svm->vmcb->control.int_state = nested_vmcb->control.int_state; 2035 1883 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; 2036 1884 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; 2037 1885 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; 2038 1886 2039 - nested_svm_unmap(nested_vmcb, KM_USER0); 1887 + nested_svm_unmap(page); 1888 + 1889 + /* nested_vmcb is our indicator if nested SVM is activated */ 1890 + svm->nested.vmcb = vmcb_gpa; 2040 1891 2041 1892 enable_gif(svm); 2042 1893 ··· 2092 1883 static int vmload_interception(struct vcpu_svm *svm) 2093 1884 { 2094 1885 struct vmcb *nested_vmcb; 1886 + struct page *page; 2095 1887 2096 1888 if (nested_svm_check_permissions(svm)) 2097 1889 return 1; ··· 2100 1890 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2101 1891 skip_emulated_instruction(&svm->vcpu); 2102 1892 2103 - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); 1893 + nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2104 1894 if (!nested_vmcb) 2105 1895 return 1; 2106 1896 2107 1897 nested_svm_vmloadsave(nested_vmcb, svm->vmcb); 2108 - nested_svm_unmap(nested_vmcb, KM_USER0); 1898 + nested_svm_unmap(page); 2109 1899 2110 1900 return 1; 2111 1901 } ··· 2113 1903 static int vmsave_interception(struct vcpu_svm *svm) 2114 1904 { 2115 1905 struct vmcb *nested_vmcb; 1906 + struct page *page; 2116 1907 2117 1908 if (nested_svm_check_permissions(svm)) 2118 1909 return 1; ··· 2121 1910 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2122 1911 skip_emulated_instruction(&svm->vcpu); 2123 1912 2124 - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); 1913 + nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2125 1914 if (!nested_vmcb) 2126 1915 return 1; 2127 1916 2128 1917 nested_svm_vmloadsave(svm->vmcb, nested_vmcb); 2129 - nested_svm_unmap(nested_vmcb, KM_USER0); 1918 + nested_svm_unmap(page); 2130 1919 2131 1920 return 1; 2132 1921 } ··· 2229 2018 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; 2230 2019 uint32_t idt_v = 2231 2020 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; 2021 + bool has_error_code = false; 2022 + u32 error_code = 0; 2232 2023 2233 2024 tss_selector = (u16)svm->vmcb->control.exit_info_1; 2234 2025 ··· 2251 2038 svm->vcpu.arch.nmi_injected = false; 2252 2039 break; 2253 2040 case SVM_EXITINTINFO_TYPE_EXEPT: 2041 + if (svm->vmcb->control.exit_info_2 & 2042 + (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) { 2043 + has_error_code = true; 2044 + error_code = 2045 + (u32)svm->vmcb->control.exit_info_2; 2046 + } 2254 2047 kvm_clear_exception_queue(&svm->vcpu); 2255 2048 break; 2256 2049 case SVM_EXITINTINFO_TYPE_INTR: ··· 2273 2054 (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) 2274 2055 skip_emulated_instruction(&svm->vcpu); 2275 2056 2276 - return kvm_task_switch(&svm->vcpu, tss_selector, reason); 2057 + if (kvm_task_switch(&svm->vcpu, tss_selector, reason, 2058 + has_error_code, error_code) == EMULATE_FAIL) { 2059 + svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 2060 + svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 2061 + svm->vcpu.run->internal.ndata = 0; 2062 + return 0; 2063 + } 2064 + return 1; 2277 2065 } 2278 2066 2279 2067 static int cpuid_interception(struct vcpu_svm *svm) ··· 2371 2145 case MSR_IA32_SYSENTER_ESP: 2372 2146 *data = svm->sysenter_esp; 2373 2147 break; 2374 - /* Nobody will change the following 5 values in the VMCB so 2375 - we can safely return them on rdmsr. They will always be 0 2376 - until LBRV is implemented. */ 2148 + /* 2149 + * Nobody will change the following 5 values in the VMCB so we can 2150 + * safely return them on rdmsr. They will always be 0 until LBRV is 2151 + * implemented. 2152 + */ 2377 2153 case MSR_IA32_DEBUGCTLMSR: 2378 2154 *data = svm->vmcb->save.dbgctl; 2379 2155 break; ··· 2395 2167 *data = svm->nested.hsave_msr; 2396 2168 break; 2397 2169 case MSR_VM_CR: 2398 - *data = 0; 2170 + *data = svm->nested.vm_cr_msr; 2399 2171 break; 2400 2172 case MSR_IA32_UCODE_REV: 2401 2173 *data = 0x01000065; ··· 2423 2195 skip_emulated_instruction(&svm->vcpu); 2424 2196 } 2425 2197 return 1; 2198 + } 2199 + 2200 + static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) 2201 + { 2202 + struct vcpu_svm *svm = to_svm(vcpu); 2203 + int svm_dis, chg_mask; 2204 + 2205 + if (data & ~SVM_VM_CR_VALID_MASK) 2206 + return 1; 2207 + 2208 + chg_mask = SVM_VM_CR_VALID_MASK; 2209 + 2210 + if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK) 2211 + chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK); 2212 + 2213 + svm->nested.vm_cr_msr &= ~chg_mask; 2214 + svm->nested.vm_cr_msr |= (data & chg_mask); 2215 + 2216 + svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK; 2217 + 2218 + /* check for svm_disable while efer.svme is set */ 2219 + if (svm_dis && (vcpu->arch.efer & EFER_SVME)) 2220 + return 1; 2221 + 2222 + return 0; 2426 2223 } 2427 2224 2428 2225 static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) ··· 2516 2263 svm->nested.hsave_msr = data; 2517 2264 break; 2518 2265 case MSR_VM_CR: 2266 + return svm_set_vm_cr(vcpu, data); 2519 2267 case MSR_VM_IGNNE: 2520 2268 pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); 2521 2269 break; ··· 2580 2326 } 2581 2327 2582 2328 static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { 2583 - [SVM_EXIT_READ_CR0] = emulate_on_interception, 2584 - [SVM_EXIT_READ_CR3] = emulate_on_interception, 2585 - [SVM_EXIT_READ_CR4] = emulate_on_interception, 2586 - [SVM_EXIT_READ_CR8] = emulate_on_interception, 2329 + [SVM_EXIT_READ_CR0] = emulate_on_interception, 2330 + [SVM_EXIT_READ_CR3] = emulate_on_interception, 2331 + [SVM_EXIT_READ_CR4] = emulate_on_interception, 2332 + [SVM_EXIT_READ_CR8] = emulate_on_interception, 2587 2333 [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, 2588 - [SVM_EXIT_WRITE_CR0] = emulate_on_interception, 2589 - [SVM_EXIT_WRITE_CR3] = emulate_on_interception, 2590 - [SVM_EXIT_WRITE_CR4] = emulate_on_interception, 2591 - [SVM_EXIT_WRITE_CR8] = cr8_write_interception, 2592 - [SVM_EXIT_READ_DR0] = emulate_on_interception, 2334 + [SVM_EXIT_WRITE_CR0] = emulate_on_interception, 2335 + [SVM_EXIT_WRITE_CR3] = emulate_on_interception, 2336 + [SVM_EXIT_WRITE_CR4] = emulate_on_interception, 2337 + [SVM_EXIT_WRITE_CR8] = cr8_write_interception, 2338 + [SVM_EXIT_READ_DR0] = emulate_on_interception, 2593 2339 [SVM_EXIT_READ_DR1] = emulate_on_interception, 2594 2340 [SVM_EXIT_READ_DR2] = emulate_on_interception, 2595 2341 [SVM_EXIT_READ_DR3] = emulate_on_interception, ··· 2608 2354 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, 2609 2355 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, 2610 2356 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, 2611 - [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 2612 - [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 2613 - [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, 2614 - [SVM_EXIT_INTR] = intr_interception, 2357 + [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 2358 + [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 2359 + [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, 2360 + [SVM_EXIT_INTR] = intr_interception, 2615 2361 [SVM_EXIT_NMI] = nmi_interception, 2616 2362 [SVM_EXIT_SMI] = nop_on_interception, 2617 2363 [SVM_EXIT_INIT] = nop_on_interception, 2618 2364 [SVM_EXIT_VINTR] = interrupt_window_interception, 2619 - /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ 2620 2365 [SVM_EXIT_CPUID] = cpuid_interception, 2621 2366 [SVM_EXIT_IRET] = iret_interception, 2622 2367 [SVM_EXIT_INVD] = emulate_on_interception, ··· 2623 2370 [SVM_EXIT_HLT] = halt_interception, 2624 2371 [SVM_EXIT_INVLPG] = invlpg_interception, 2625 2372 [SVM_EXIT_INVLPGA] = invlpga_interception, 2626 - [SVM_EXIT_IOIO] = io_interception, 2373 + [SVM_EXIT_IOIO] = io_interception, 2627 2374 [SVM_EXIT_MSR] = msr_interception, 2628 2375 [SVM_EXIT_TASK_SWITCH] = task_switch_interception, 2629 2376 [SVM_EXIT_SHUTDOWN] = shutdown_interception, ··· 2646 2393 struct kvm_run *kvm_run = vcpu->run; 2647 2394 u32 exit_code = svm->vmcb->control.exit_code; 2648 2395 2649 - trace_kvm_exit(exit_code, svm->vmcb->save.rip); 2396 + trace_kvm_exit(exit_code, vcpu); 2397 + 2398 + if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) 2399 + vcpu->arch.cr0 = svm->vmcb->save.cr0; 2400 + if (npt_enabled) 2401 + vcpu->arch.cr3 = svm->vmcb->save.cr3; 2650 2402 2651 2403 if (unlikely(svm->nested.exit_required)) { 2652 2404 nested_svm_vmexit(svm); ··· 2679 2421 } 2680 2422 2681 2423 svm_complete_interrupts(svm); 2682 - 2683 - if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) 2684 - vcpu->arch.cr0 = svm->vmcb->save.cr0; 2685 - if (npt_enabled) 2686 - vcpu->arch.cr3 = svm->vmcb->save.cr3; 2687 2424 2688 2425 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 2689 2426 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; ··· 2764 2511 { 2765 2512 struct vcpu_svm *svm = to_svm(vcpu); 2766 2513 2514 + if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) 2515 + return; 2516 + 2767 2517 if (irr == -1) 2768 2518 return; 2769 2519 ··· 2778 2522 { 2779 2523 struct vcpu_svm *svm = to_svm(vcpu); 2780 2524 struct vmcb *vmcb = svm->vmcb; 2781 - return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && 2782 - !(svm->vcpu.arch.hflags & HF_NMI_MASK); 2525 + int ret; 2526 + ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && 2527 + !(svm->vcpu.arch.hflags & HF_NMI_MASK); 2528 + ret = ret && gif_set(svm) && nested_svm_nmi(svm); 2529 + 2530 + return ret; 2783 2531 } 2784 2532 2785 2533 static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) ··· 2828 2568 { 2829 2569 struct vcpu_svm *svm = to_svm(vcpu); 2830 2570 2831 - nested_svm_intr(svm); 2832 - 2833 - /* In case GIF=0 we can't rely on the CPU to tell us when 2834 - * GIF becomes 1, because that's a separate STGI/VMRUN intercept. 2835 - * The next time we get that intercept, this function will be 2836 - * called again though and we'll get the vintr intercept. */ 2837 - if (gif_set(svm)) { 2571 + /* 2572 + * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes 2573 + * 1, because that's a separate STGI/VMRUN intercept. The next time we 2574 + * get that intercept, this function will be called again though and 2575 + * we'll get the vintr intercept. 2576 + */ 2577 + if (gif_set(svm) && nested_svm_intr(svm)) { 2838 2578 svm_set_vintr(svm); 2839 2579 svm_inject_irq(svm, 0x0); 2840 2580 } ··· 2848 2588 == HF_NMI_MASK) 2849 2589 return; /* IRET will cause a vm exit */ 2850 2590 2851 - /* Something prevents NMI from been injected. Single step over 2852 - possible problem (IRET or exception injection or interrupt 2853 - shadow) */ 2591 + /* 2592 + * Something prevents NMI from been injected. Single step over possible 2593 + * problem (IRET or exception injection or interrupt shadow) 2594 + */ 2854 2595 svm->nmi_singlestep = true; 2855 2596 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); 2856 2597 update_db_intercept(vcpu); ··· 2875 2614 { 2876 2615 struct vcpu_svm *svm = to_svm(vcpu); 2877 2616 2617 + if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) 2618 + return; 2619 + 2878 2620 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { 2879 2621 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; 2880 2622 kvm_set_cr8(vcpu, cr8); ··· 2889 2625 struct vcpu_svm *svm = to_svm(vcpu); 2890 2626 u64 cr8; 2891 2627 2628 + if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) 2629 + return; 2630 + 2892 2631 cr8 = kvm_get_cr8(vcpu); 2893 2632 svm->vmcb->control.int_ctl &= ~V_TPR_MASK; 2894 2633 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; ··· 2902 2635 u8 vector; 2903 2636 int type; 2904 2637 u32 exitintinfo = svm->vmcb->control.exit_int_info; 2638 + unsigned int3_injected = svm->int3_injected; 2639 + 2640 + svm->int3_injected = 0; 2905 2641 2906 2642 if (svm->vcpu.arch.hflags & HF_IRET_MASK) 2907 2643 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); ··· 2924 2654 svm->vcpu.arch.nmi_injected = true; 2925 2655 break; 2926 2656 case SVM_EXITINTINFO_TYPE_EXEPT: 2927 - /* In case of software exception do not reinject an exception 2928 - vector, but re-execute and instruction instead */ 2929 - if (is_nested(svm)) 2657 + /* 2658 + * In case of software exceptions, do not reinject the vector, 2659 + * but re-execute the instruction instead. Rewind RIP first 2660 + * if we emulated INT3 before. 2661 + */ 2662 + if (kvm_exception_is_soft(vector)) { 2663 + if (vector == BP_VECTOR && int3_injected && 2664 + kvm_is_linear_rip(&svm->vcpu, svm->int3_rip)) 2665 + kvm_rip_write(&svm->vcpu, 2666 + kvm_rip_read(&svm->vcpu) - 2667 + int3_injected); 2930 2668 break; 2931 - if (kvm_exception_is_soft(vector)) 2932 - break; 2669 + } 2933 2670 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { 2934 2671 u32 err = svm->vmcb->control.exit_int_info_err; 2935 - kvm_queue_exception_e(&svm->vcpu, vector, err); 2672 + kvm_requeue_exception_e(&svm->vcpu, vector, err); 2936 2673 2937 2674 } else 2938 - kvm_queue_exception(&svm->vcpu, vector); 2675 + kvm_requeue_exception(&svm->vcpu, vector); 2939 2676 break; 2940 2677 case SVM_EXITINTINFO_TYPE_INTR: 2941 2678 kvm_queue_interrupt(&svm->vcpu, vector, false); ··· 2965 2688 u16 gs_selector; 2966 2689 u16 ldt_selector; 2967 2690 2691 + svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; 2692 + svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; 2693 + svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; 2694 + 2968 2695 /* 2969 2696 * A vmexit emulation is required before the vcpu can be executed 2970 2697 * again. 2971 2698 */ 2972 2699 if (unlikely(svm->nested.exit_required)) 2973 2700 return; 2974 - 2975 - svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; 2976 - svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; 2977 - svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; 2978 2701 2979 2702 pre_svm_run(svm); 2980 2703 ··· 3156 2879 { 3157 2880 } 3158 2881 2882 + static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) 2883 + { 2884 + switch (func) { 2885 + case 0x8000000A: 2886 + entry->eax = 1; /* SVM revision 1 */ 2887 + entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper 2888 + ASID emulation to nested SVM */ 2889 + entry->ecx = 0; /* Reserved */ 2890 + entry->edx = 0; /* Do not support any additional features */ 2891 + 2892 + break; 2893 + } 2894 + } 2895 + 3159 2896 static const struct trace_print_flags svm_exit_reasons_str[] = { 3160 - { SVM_EXIT_READ_CR0, "read_cr0" }, 3161 - { SVM_EXIT_READ_CR3, "read_cr3" }, 3162 - { SVM_EXIT_READ_CR4, "read_cr4" }, 3163 - { SVM_EXIT_READ_CR8, "read_cr8" }, 3164 - { SVM_EXIT_WRITE_CR0, "write_cr0" }, 3165 - { SVM_EXIT_WRITE_CR3, "write_cr3" }, 3166 - { SVM_EXIT_WRITE_CR4, "write_cr4" }, 3167 - { SVM_EXIT_WRITE_CR8, "write_cr8" }, 3168 - { SVM_EXIT_READ_DR0, "read_dr0" }, 3169 - { SVM_EXIT_READ_DR1, "read_dr1" }, 3170 - { SVM_EXIT_READ_DR2, "read_dr2" }, 3171 - { SVM_EXIT_READ_DR3, "read_dr3" }, 3172 - { SVM_EXIT_WRITE_DR0, "write_dr0" }, 3173 - { SVM_EXIT_WRITE_DR1, "write_dr1" }, 3174 - { SVM_EXIT_WRITE_DR2, "write_dr2" }, 3175 - { SVM_EXIT_WRITE_DR3, "write_dr3" }, 3176 - { SVM_EXIT_WRITE_DR5, "write_dr5" }, 3177 - { SVM_EXIT_WRITE_DR7, "write_dr7" }, 2897 + { SVM_EXIT_READ_CR0, "read_cr0" }, 2898 + { SVM_EXIT_READ_CR3, "read_cr3" }, 2899 + { SVM_EXIT_READ_CR4, "read_cr4" }, 2900 + { SVM_EXIT_READ_CR8, "read_cr8" }, 2901 + { SVM_EXIT_WRITE_CR0, "write_cr0" }, 2902 + { SVM_EXIT_WRITE_CR3, "write_cr3" }, 2903 + { SVM_EXIT_WRITE_CR4, "write_cr4" }, 2904 + { SVM_EXIT_WRITE_CR8, "write_cr8" }, 2905 + { SVM_EXIT_READ_DR0, "read_dr0" }, 2906 + { SVM_EXIT_READ_DR1, "read_dr1" }, 2907 + { SVM_EXIT_READ_DR2, "read_dr2" }, 2908 + { SVM_EXIT_READ_DR3, "read_dr3" }, 2909 + { SVM_EXIT_WRITE_DR0, "write_dr0" }, 2910 + { SVM_EXIT_WRITE_DR1, "write_dr1" }, 2911 + { SVM_EXIT_WRITE_DR2, "write_dr2" }, 2912 + { SVM_EXIT_WRITE_DR3, "write_dr3" }, 2913 + { SVM_EXIT_WRITE_DR5, "write_dr5" }, 2914 + { SVM_EXIT_WRITE_DR7, "write_dr7" }, 3178 2915 { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, 3179 2916 { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, 3180 2917 { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, ··· 3237 2946 { 3238 2947 struct vcpu_svm *svm = to_svm(vcpu); 3239 2948 3240 - update_cr0_intercept(svm); 3241 2949 svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; 2950 + if (is_nested(svm)) 2951 + svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR; 2952 + update_cr0_intercept(svm); 3242 2953 } 3243 2954 3244 2955 static struct kvm_x86_ops svm_x86_ops = { ··· 3279 2986 .set_idt = svm_set_idt, 3280 2987 .get_gdt = svm_get_gdt, 3281 2988 .set_gdt = svm_set_gdt, 3282 - .get_dr = svm_get_dr, 3283 - .set_dr = svm_set_dr, 2989 + .set_dr7 = svm_set_dr7, 3284 2990 .cache_reg = svm_cache_reg, 3285 2991 .get_rflags = svm_get_rflags, 3286 2992 .set_rflags = svm_set_rflags, ··· 3315 3023 .cpuid_update = svm_cpuid_update, 3316 3024 3317 3025 .rdtscp_supported = svm_rdtscp_supported, 3026 + 3027 + .set_supported_cpuid = svm_set_supported_cpuid, 3318 3028 }; 3319 3029 3320 3030 static int __init svm_init(void) 3321 3031 { 3322 3032 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), 3323 - THIS_MODULE); 3033 + __alignof__(struct vcpu_svm), THIS_MODULE); 3324 3034 } 3325 3035 3326 3036 static void __exit svm_exit(void)

+2 -1

arch/x86/kvm/timer.c

··· 12 12 /* 13 13 * There is a race window between reading and incrementing, but we do 14 14 * not care about potentially loosing timer events in the !reinject 15 - * case anyway. 15 + * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked 16 + * in vcpu_enter_guest. 16 17 */ 17 18 if (ktimer->reinject || !atomic_read(&ktimer->pending)) { 18 19 atomic_inc(&ktimer->pending);

+154 -11

arch/x86/kvm/trace.h

··· 5 5 6 6 #undef TRACE_SYSTEM 7 7 #define TRACE_SYSTEM kvm 8 - #define TRACE_INCLUDE_PATH arch/x86/kvm 9 - #define TRACE_INCLUDE_FILE trace 10 8 11 9 /* 12 10 * Tracepoint for guest mode entry. ··· 182 184 * Tracepoint for kvm guest exit: 183 185 */ 184 186 TRACE_EVENT(kvm_exit, 185 - TP_PROTO(unsigned int exit_reason, unsigned long guest_rip), 186 - TP_ARGS(exit_reason, guest_rip), 187 + TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu), 188 + TP_ARGS(exit_reason, vcpu), 187 189 188 190 TP_STRUCT__entry( 189 191 __field( unsigned int, exit_reason ) ··· 192 194 193 195 TP_fast_assign( 194 196 __entry->exit_reason = exit_reason; 195 - __entry->guest_rip = guest_rip; 197 + __entry->guest_rip = kvm_rip_read(vcpu); 196 198 ), 197 199 198 200 TP_printk("reason %s rip 0x%lx", ··· 217 219 ), 218 220 219 221 TP_printk("irq %u", __entry->irq) 222 + ); 223 + 224 + #define EXS(x) { x##_VECTOR, "#" #x } 225 + 226 + #define kvm_trace_sym_exc \ 227 + EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ 228 + EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ 229 + EXS(MF), EXS(MC) 230 + 231 + /* 232 + * Tracepoint for kvm interrupt injection: 233 + */ 234 + TRACE_EVENT(kvm_inj_exception, 235 + TP_PROTO(unsigned exception, bool has_error, unsigned error_code), 236 + TP_ARGS(exception, has_error, error_code), 237 + 238 + TP_STRUCT__entry( 239 + __field( u8, exception ) 240 + __field( u8, has_error ) 241 + __field( u32, error_code ) 242 + ), 243 + 244 + TP_fast_assign( 245 + __entry->exception = exception; 246 + __entry->has_error = has_error; 247 + __entry->error_code = error_code; 248 + ), 249 + 250 + TP_printk("%s (0x%x)", 251 + __print_symbolic(__entry->exception, kvm_trace_sym_exc), 252 + /* FIXME: don't print error_code if not present */ 253 + __entry->has_error ? __entry->error_code : 0) 220 254 ); 221 255 222 256 /* ··· 443 413 ), 444 414 445 415 TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " 446 - "event_inj: 0x%08x npt: %s\n", 416 + "event_inj: 0x%08x npt: %s", 447 417 __entry->rip, __entry->vmcb, __entry->nested_rip, 448 418 __entry->int_ctl, __entry->event_inj, 449 419 __entry->npt ? "on" : "off") 450 420 ); 451 421 422 + TRACE_EVENT(kvm_nested_intercepts, 423 + TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u64 intercept), 424 + TP_ARGS(cr_read, cr_write, exceptions, intercept), 425 + 426 + TP_STRUCT__entry( 427 + __field( __u16, cr_read ) 428 + __field( __u16, cr_write ) 429 + __field( __u32, exceptions ) 430 + __field( __u64, intercept ) 431 + ), 432 + 433 + TP_fast_assign( 434 + __entry->cr_read = cr_read; 435 + __entry->cr_write = cr_write; 436 + __entry->exceptions = exceptions; 437 + __entry->intercept = intercept; 438 + ), 439 + 440 + TP_printk("cr_read: %04x cr_write: %04x excp: %08x intercept: %016llx", 441 + __entry->cr_read, __entry->cr_write, __entry->exceptions, 442 + __entry->intercept) 443 + ); 452 444 /* 453 445 * Tracepoint for #VMEXIT while nested 454 446 */ ··· 499 447 __entry->exit_int_info_err = exit_int_info_err; 500 448 ), 501 449 TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " 502 - "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", 450 + "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", 503 451 __entry->rip, 504 452 ftrace_print_symbols_seq(p, __entry->exit_code, 505 453 kvm_x86_ops->exit_reasons_str), ··· 534 482 ), 535 483 536 484 TP_printk("reason: %s ext_inf1: 0x%016llx " 537 - "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", 485 + "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", 538 486 ftrace_print_symbols_seq(p, __entry->exit_code, 539 487 kvm_x86_ops->exit_reasons_str), 540 488 __entry->exit_info1, __entry->exit_info2, ··· 556 504 __entry->rip = rip 557 505 ), 558 506 559 - TP_printk("rip: 0x%016llx\n", __entry->rip) 507 + TP_printk("rip: 0x%016llx", __entry->rip) 560 508 ); 561 509 562 510 /* ··· 578 526 __entry->address = address; 579 527 ), 580 528 581 - TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", 529 + TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", 582 530 __entry->rip, __entry->asid, __entry->address) 583 531 ); 584 532 ··· 599 547 __entry->slb = slb; 600 548 ), 601 549 602 - TP_printk("rip: 0x%016llx slb: 0x%08x\n", 550 + TP_printk("rip: 0x%016llx slb: 0x%08x", 603 551 __entry->rip, __entry->slb) 604 552 ); 605 553 554 + #define __print_insn(insn, ilen) ({ \ 555 + int i; \ 556 + const char *ret = p->buffer + p->len; \ 557 + \ 558 + for (i = 0; i < ilen; ++i) \ 559 + trace_seq_printf(p, " %02x", insn[i]); \ 560 + trace_seq_printf(p, "%c", 0); \ 561 + ret; \ 562 + }) 563 + 564 + #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) 565 + #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) 566 + #define KVM_EMUL_INSN_F_CS_D (1 << 2) 567 + #define KVM_EMUL_INSN_F_CS_L (1 << 3) 568 + 569 + #define kvm_trace_symbol_emul_flags \ 570 + { 0, "real" }, \ 571 + { KVM_EMUL_INSN_F_CR0_PE \ 572 + | KVM_EMUL_INSN_F_EFL_VM, "vm16" }, \ 573 + { KVM_EMUL_INSN_F_CR0_PE, "prot16" }, \ 574 + { KVM_EMUL_INSN_F_CR0_PE \ 575 + | KVM_EMUL_INSN_F_CS_D, "prot32" }, \ 576 + { KVM_EMUL_INSN_F_CR0_PE \ 577 + | KVM_EMUL_INSN_F_CS_L, "prot64" } 578 + 579 + #define kei_decode_mode(mode) ({ \ 580 + u8 flags = 0xff; \ 581 + switch (mode) { \ 582 + case X86EMUL_MODE_REAL: \ 583 + flags = 0; \ 584 + break; \ 585 + case X86EMUL_MODE_VM86: \ 586 + flags = KVM_EMUL_INSN_F_EFL_VM; \ 587 + break; \ 588 + case X86EMUL_MODE_PROT16: \ 589 + flags = KVM_EMUL_INSN_F_CR0_PE; \ 590 + break; \ 591 + case X86EMUL_MODE_PROT32: \ 592 + flags = KVM_EMUL_INSN_F_CR0_PE \ 593 + | KVM_EMUL_INSN_F_CS_D; \ 594 + break; \ 595 + case X86EMUL_MODE_PROT64: \ 596 + flags = KVM_EMUL_INSN_F_CR0_PE \ 597 + | KVM_EMUL_INSN_F_CS_L; \ 598 + break; \ 599 + } \ 600 + flags; \ 601 + }) 602 + 603 + TRACE_EVENT(kvm_emulate_insn, 604 + TP_PROTO(struct kvm_vcpu *vcpu, __u8 failed), 605 + TP_ARGS(vcpu, failed), 606 + 607 + TP_STRUCT__entry( 608 + __field( __u64, rip ) 609 + __field( __u32, csbase ) 610 + __field( __u8, len ) 611 + __array( __u8, insn, 15 ) 612 + __field( __u8, flags ) 613 + __field( __u8, failed ) 614 + ), 615 + 616 + TP_fast_assign( 617 + __entry->rip = vcpu->arch.emulate_ctxt.decode.fetch.start; 618 + __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); 619 + __entry->len = vcpu->arch.emulate_ctxt.decode.eip 620 + - vcpu->arch.emulate_ctxt.decode.fetch.start; 621 + memcpy(__entry->insn, 622 + vcpu->arch.emulate_ctxt.decode.fetch.data, 623 + 15); 624 + __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt.mode); 625 + __entry->failed = failed; 626 + ), 627 + 628 + TP_printk("%x:%llx:%s (%s)%s", 629 + __entry->csbase, __entry->rip, 630 + __print_insn(__entry->insn, __entry->len), 631 + __print_symbolic(__entry->flags, 632 + kvm_trace_symbol_emul_flags), 633 + __entry->failed ? " failed" : "" 634 + ) 635 + ); 636 + 637 + #define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0) 638 + #define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1) 639 + 606 640 #endif /* _TRACE_KVM_H */ 641 + 642 + #undef TRACE_INCLUDE_PATH 643 + #define TRACE_INCLUDE_PATH arch/x86/kvm 644 + #undef TRACE_INCLUDE_FILE 645 + #define TRACE_INCLUDE_FILE trace 607 646 608 647 /* This part must be outside protection */ 609 648 #include <trace/define_trace.h>

+230 -150

arch/x86/kvm/vmx.c

··· 27 27 #include <linux/moduleparam.h> 28 28 #include <linux/ftrace_event.h> 29 29 #include <linux/slab.h> 30 + #include <linux/tboot.h> 30 31 #include "kvm_cache_regs.h" 31 32 #include "x86.h" 32 33 ··· 99 98 static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; 100 99 module_param(ple_window, int, S_IRUGO); 101 100 101 + #define NR_AUTOLOAD_MSRS 1 102 + 102 103 struct vmcs { 103 104 u32 revision_id; 104 105 u32 abort; ··· 128 125 u64 msr_guest_kernel_gs_base; 129 126 #endif 130 127 struct vmcs *vmcs; 128 + struct msr_autoload { 129 + unsigned nr; 130 + struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; 131 + struct vmx_msr_entry host[NR_AUTOLOAD_MSRS]; 132 + } msr_autoload; 131 133 struct { 132 134 int loaded; 133 135 u16 fs_sel, gs_sel, ldt_sel; ··· 242 234 }; 243 235 #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) 244 236 245 - static inline int is_page_fault(u32 intr_info) 237 + static inline bool is_page_fault(u32 intr_info) 246 238 { 247 239 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 248 240 INTR_INFO_VALID_MASK)) == 249 241 (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); 250 242 } 251 243 252 - static inline int is_no_device(u32 intr_info) 244 + static inline bool is_no_device(u32 intr_info) 253 245 { 254 246 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 255 247 INTR_INFO_VALID_MASK)) == 256 248 (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); 257 249 } 258 250 259 - static inline int is_invalid_opcode(u32 intr_info) 251 + static inline bool is_invalid_opcode(u32 intr_info) 260 252 { 261 253 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 262 254 INTR_INFO_VALID_MASK)) == 263 255 (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); 264 256 } 265 257 266 - static inline int is_external_interrupt(u32 intr_info) 258 + static inline bool is_external_interrupt(u32 intr_info) 267 259 { 268 260 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) 269 261 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); 270 262 } 271 263 272 - static inline int is_machine_check(u32 intr_info) 264 + static inline bool is_machine_check(u32 intr_info) 273 265 { 274 266 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 275 267 INTR_INFO_VALID_MASK)) == 276 268 (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); 277 269 } 278 270 279 - static inline int cpu_has_vmx_msr_bitmap(void) 271 + static inline bool cpu_has_vmx_msr_bitmap(void) 280 272 { 281 273 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; 282 274 } 283 275 284 - static inline int cpu_has_vmx_tpr_shadow(void) 276 + static inline bool cpu_has_vmx_tpr_shadow(void) 285 277 { 286 278 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; 287 279 } 288 280 289 - static inline int vm_need_tpr_shadow(struct kvm *kvm) 281 + static inline bool vm_need_tpr_shadow(struct kvm *kvm) 290 282 { 291 283 return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); 292 284 } 293 285 294 - static inline int cpu_has_secondary_exec_ctrls(void) 286 + static inline bool cpu_has_secondary_exec_ctrls(void) 295 287 { 296 288 return vmcs_config.cpu_based_exec_ctrl & 297 289 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; ··· 311 303 312 304 static inline bool cpu_has_vmx_ept_execute_only(void) 313 305 { 314 - return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT); 306 + return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT; 315 307 } 316 308 317 309 static inline bool cpu_has_vmx_eptp_uncacheable(void) 318 310 { 319 - return !!(vmx_capability.ept & VMX_EPTP_UC_BIT); 311 + return vmx_capability.ept & VMX_EPTP_UC_BIT; 320 312 } 321 313 322 314 static inline bool cpu_has_vmx_eptp_writeback(void) 323 315 { 324 - return !!(vmx_capability.ept & VMX_EPTP_WB_BIT); 316 + return vmx_capability.ept & VMX_EPTP_WB_BIT; 325 317 } 326 318 327 319 static inline bool cpu_has_vmx_ept_2m_page(void) 328 320 { 329 - return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); 321 + return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT; 330 322 } 331 323 332 324 static inline bool cpu_has_vmx_ept_1g_page(void) 333 325 { 334 - return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); 326 + return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; 335 327 } 336 328 337 - static inline int cpu_has_vmx_invept_individual_addr(void) 329 + static inline bool cpu_has_vmx_invept_individual_addr(void) 338 330 { 339 - return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); 331 + return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; 340 332 } 341 333 342 - static inline int cpu_has_vmx_invept_context(void) 334 + static inline bool cpu_has_vmx_invept_context(void) 343 335 { 344 - return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT); 336 + return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT; 345 337 } 346 338 347 - static inline int cpu_has_vmx_invept_global(void) 339 + static inline bool cpu_has_vmx_invept_global(void) 348 340 { 349 - return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT); 341 + return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; 350 342 } 351 343 352 - static inline int cpu_has_vmx_ept(void) 344 + static inline bool cpu_has_vmx_ept(void) 353 345 { 354 346 return vmcs_config.cpu_based_2nd_exec_ctrl & 355 347 SECONDARY_EXEC_ENABLE_EPT; 356 348 } 357 349 358 - static inline int cpu_has_vmx_unrestricted_guest(void) 350 + static inline bool cpu_has_vmx_unrestricted_guest(void) 359 351 { 360 352 return vmcs_config.cpu_based_2nd_exec_ctrl & 361 353 SECONDARY_EXEC_UNRESTRICTED_GUEST; 362 354 } 363 355 364 - static inline int cpu_has_vmx_ple(void) 356 + static inline bool cpu_has_vmx_ple(void) 365 357 { 366 358 return vmcs_config.cpu_based_2nd_exec_ctrl & 367 359 SECONDARY_EXEC_PAUSE_LOOP_EXITING; 368 360 } 369 361 370 - static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 362 + static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) 371 363 { 372 364 return flexpriority_enabled && irqchip_in_kernel(kvm); 373 365 } 374 366 375 - static inline int cpu_has_vmx_vpid(void) 367 + static inline bool cpu_has_vmx_vpid(void) 376 368 { 377 369 return vmcs_config.cpu_based_2nd_exec_ctrl & 378 370 SECONDARY_EXEC_ENABLE_VPID; 379 371 } 380 372 381 - static inline int cpu_has_vmx_rdtscp(void) 373 + static inline bool cpu_has_vmx_rdtscp(void) 382 374 { 383 375 return vmcs_config.cpu_based_2nd_exec_ctrl & 384 376 SECONDARY_EXEC_RDTSCP; 385 377 } 386 378 387 - static inline int cpu_has_virtual_nmis(void) 379 + static inline bool cpu_has_virtual_nmis(void) 388 380 { 389 381 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; 390 382 } ··· 603 595 vmcs_write32(EXCEPTION_BITMAP, eb); 604 596 } 605 597 598 + static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) 599 + { 600 + unsigned i; 601 + struct msr_autoload *m = &vmx->msr_autoload; 602 + 603 + for (i = 0; i < m->nr; ++i) 604 + if (m->guest[i].index == msr) 605 + break; 606 + 607 + if (i == m->nr) 608 + return; 609 + --m->nr; 610 + m->guest[i] = m->guest[m->nr]; 611 + m->host[i] = m->host[m->nr]; 612 + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); 613 + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); 614 + } 615 + 616 + static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, 617 + u64 guest_val, u64 host_val) 618 + { 619 + unsigned i; 620 + struct msr_autoload *m = &vmx->msr_autoload; 621 + 622 + for (i = 0; i < m->nr; ++i) 623 + if (m->guest[i].index == msr) 624 + break; 625 + 626 + if (i == m->nr) { 627 + ++m->nr; 628 + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); 629 + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); 630 + } 631 + 632 + m->guest[i].index = msr; 633 + m->guest[i].value = guest_val; 634 + m->host[i].index = msr; 635 + m->host[i].value = host_val; 636 + } 637 + 606 638 static void reload_tss(void) 607 639 { 608 640 /* 609 641 * VT restores TR but not its size. Useless. 610 642 */ 611 - struct descriptor_table gdt; 643 + struct desc_ptr gdt; 612 644 struct desc_struct *descs; 613 645 614 - kvm_get_gdt(&gdt); 615 - descs = (void *)gdt.base; 646 + native_store_gdt(&gdt); 647 + descs = (void *)gdt.address; 616 648 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ 617 649 load_TR_desc(); 618 650 } ··· 679 631 guest_efer |= host_efer & ignore_bits; 680 632 vmx->guest_msrs[efer_offset].data = guest_efer; 681 633 vmx->guest_msrs[efer_offset].mask = ~ignore_bits; 634 + 635 + clear_atomic_switch_msr(vmx, MSR_EFER); 636 + /* On ept, can't emulate nx, and must switch nx atomically */ 637 + if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) { 638 + guest_efer = vmx->vcpu.arch.efer; 639 + if (!(guest_efer & EFER_LMA)) 640 + guest_efer &= ~EFER_LME; 641 + add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); 642 + return false; 643 + } 644 + 682 645 return true; 646 + } 647 + 648 + static unsigned long segment_base(u16 selector) 649 + { 650 + struct desc_ptr gdt; 651 + struct desc_struct *d; 652 + unsigned long table_base; 653 + unsigned long v; 654 + 655 + if (!(selector & ~3)) 656 + return 0; 657 + 658 + native_store_gdt(&gdt); 659 + table_base = gdt.address; 660 + 661 + if (selector & 4) { /* from ldt */ 662 + u16 ldt_selector = kvm_read_ldt(); 663 + 664 + if (!(ldt_selector & ~3)) 665 + return 0; 666 + 667 + table_base = segment_base(ldt_selector); 668 + } 669 + d = (struct desc_struct *)(table_base + (selector & ~7)); 670 + v = get_desc_base(d); 671 + #ifdef CONFIG_X86_64 672 + if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) 673 + v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; 674 + #endif 675 + return v; 676 + } 677 + 678 + static inline unsigned long kvm_read_tr_base(void) 679 + { 680 + u16 tr; 681 + asm("str %0" : "=g"(tr)); 682 + return segment_base(tr); 683 683 } 684 684 685 685 static void vmx_save_host_state(struct kvm_vcpu *vcpu) ··· 854 758 } 855 759 856 760 if (vcpu->cpu != cpu) { 857 - struct descriptor_table dt; 761 + struct desc_ptr dt; 858 762 unsigned long sysenter_esp; 859 763 860 764 vcpu->cpu = cpu; ··· 863 767 * processors. 864 768 */ 865 769 vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ 866 - kvm_get_gdt(&dt); 867 - vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ 770 + native_store_gdt(&dt); 771 + vmcs_writel(HOST_GDTR_BASE, dt.address); /* 22.2.4 */ 868 772 869 773 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); 870 774 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ ··· 942 846 int ret = 0; 943 847 944 848 if (interruptibility & GUEST_INTR_STATE_STI) 945 - ret |= X86_SHADOW_INT_STI; 849 + ret |= KVM_X86_SHADOW_INT_STI; 946 850 if (interruptibility & GUEST_INTR_STATE_MOV_SS) 947 - ret |= X86_SHADOW_INT_MOV_SS; 851 + ret |= KVM_X86_SHADOW_INT_MOV_SS; 948 852 949 853 return ret & mask; 950 854 } ··· 956 860 957 861 interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); 958 862 959 - if (mask & X86_SHADOW_INT_MOV_SS) 863 + if (mask & KVM_X86_SHADOW_INT_MOV_SS) 960 864 interruptibility |= GUEST_INTR_STATE_MOV_SS; 961 - if (mask & X86_SHADOW_INT_STI) 865 + else if (mask & KVM_X86_SHADOW_INT_STI) 962 866 interruptibility |= GUEST_INTR_STATE_STI; 963 867 964 868 if ((interruptibility != interruptibility_old)) ··· 978 882 } 979 883 980 884 static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, 981 - bool has_error_code, u32 error_code) 885 + bool has_error_code, u32 error_code, 886 + bool reinject) 982 887 { 983 888 struct vcpu_vmx *vmx = to_vmx(vcpu); 984 889 u32 intr_info = nr | INTR_INFO_VALID_MASK; ··· 1273 1176 u64 msr; 1274 1177 1275 1178 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); 1276 - return (msr & (FEATURE_CONTROL_LOCKED | 1277 - FEATURE_CONTROL_VMXON_ENABLED)) 1278 - == FEATURE_CONTROL_LOCKED; 1179 + if (msr & FEATURE_CONTROL_LOCKED) { 1180 + if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) 1181 + && tboot_enabled()) 1182 + return 1; 1183 + if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) 1184 + && !tboot_enabled()) 1185 + return 1; 1186 + } 1187 + 1188 + return 0; 1279 1189 /* locked but not enabled */ 1280 1190 } 1281 1191 ··· 1290 1186 { 1291 1187 int cpu = raw_smp_processor_id(); 1292 1188 u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 1293 - u64 old; 1189 + u64 old, test_bits; 1294 1190 1295 1191 if (read_cr4() & X86_CR4_VMXE) 1296 1192 return -EBUSY; 1297 1193 1298 1194 INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); 1299 1195 rdmsrl(MSR_IA32_FEATURE_CONTROL, old); 1300 - if ((old & (FEATURE_CONTROL_LOCKED | 1301 - FEATURE_CONTROL_VMXON_ENABLED)) 1302 - != (FEATURE_CONTROL_LOCKED | 1303 - FEATURE_CONTROL_VMXON_ENABLED)) 1196 + 1197 + test_bits = FEATURE_CONTROL_LOCKED; 1198 + test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 1199 + if (tboot_enabled()) 1200 + test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; 1201 + 1202 + if ((old & test_bits) != test_bits) { 1304 1203 /* enable and lock */ 1305 - wrmsrl(MSR_IA32_FEATURE_CONTROL, old | 1306 - FEATURE_CONTROL_LOCKED | 1307 - FEATURE_CONTROL_VMXON_ENABLED); 1204 + wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); 1205 + } 1308 1206 write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ 1309 1207 asm volatile (ASM_VMX_VMXON_RAX 1310 1208 : : "a"(&phys_addr), "m"(phys_addr) ··· 1627 1521 struct kvm_memslots *slots; 1628 1522 gfn_t base_gfn; 1629 1523 1630 - slots = rcu_dereference(kvm->memslots); 1524 + slots = kvm_memslots(kvm); 1631 1525 base_gfn = kvm->memslots->memslots[0].base_gfn + 1632 1526 kvm->memslots->memslots[0].npages - 3; 1633 1527 return base_gfn << PAGE_SHIFT; ··· 1755 1649 vmcs_write32(VM_ENTRY_CONTROLS, 1756 1650 vmcs_read32(VM_ENTRY_CONTROLS) 1757 1651 & ~VM_ENTRY_IA32E_MODE); 1652 + vmx_set_efer(vcpu, vcpu->arch.efer); 1758 1653 } 1759 1654 1760 1655 #endif ··· 2041 1934 *l = (ar >> 13) & 1; 2042 1935 } 2043 1936 2044 - static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 1937 + static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 2045 1938 { 2046 - dt->limit = vmcs_read32(GUEST_IDTR_LIMIT); 2047 - dt->base = vmcs_readl(GUEST_IDTR_BASE); 1939 + dt->size = vmcs_read32(GUEST_IDTR_LIMIT); 1940 + dt->address = vmcs_readl(GUEST_IDTR_BASE); 2048 1941 } 2049 1942 2050 - static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 1943 + static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 2051 1944 { 2052 - vmcs_write32(GUEST_IDTR_LIMIT, dt->limit); 2053 - vmcs_writel(GUEST_IDTR_BASE, dt->base); 1945 + vmcs_write32(GUEST_IDTR_LIMIT, dt->size); 1946 + vmcs_writel(GUEST_IDTR_BASE, dt->address); 2054 1947 } 2055 1948 2056 - static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 1949 + static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 2057 1950 { 2058 - dt->limit = vmcs_read32(GUEST_GDTR_LIMIT); 2059 - dt->base = vmcs_readl(GUEST_GDTR_BASE); 1951 + dt->size = vmcs_read32(GUEST_GDTR_LIMIT); 1952 + dt->address = vmcs_readl(GUEST_GDTR_BASE); 2060 1953 } 2061 1954 2062 - static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 1955 + static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 2063 1956 { 2064 - vmcs_write32(GUEST_GDTR_LIMIT, dt->limit); 2065 - vmcs_writel(GUEST_GDTR_BASE, dt->base); 1957 + vmcs_write32(GUEST_GDTR_LIMIT, dt->size); 1958 + vmcs_writel(GUEST_GDTR_BASE, dt->address); 2066 1959 } 2067 1960 2068 1961 static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) ··· 2403 2296 spin_unlock(&vmx_vpid_lock); 2404 2297 } 2405 2298 2299 + static void free_vpid(struct vcpu_vmx *vmx) 2300 + { 2301 + if (!enable_vpid) 2302 + return; 2303 + spin_lock(&vmx_vpid_lock); 2304 + if (vmx->vpid != 0) 2305 + __clear_bit(vmx->vpid, vmx_vpid_bitmap); 2306 + spin_unlock(&vmx_vpid_lock); 2307 + } 2308 + 2406 2309 static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) 2407 2310 { 2408 2311 int f = sizeof(unsigned long); ··· 2451 2334 u32 junk; 2452 2335 u64 host_pat, tsc_this, tsc_base; 2453 2336 unsigned long a; 2454 - struct descriptor_table dt; 2337 + struct desc_ptr dt; 2455 2338 int i; 2456 2339 unsigned long kvm_vmx_return; 2457 2340 u32 exec_control; ··· 2532 2415 2533 2416 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ 2534 2417 2535 - kvm_get_idt(&dt); 2536 - vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ 2418 + native_store_idt(&dt); 2419 + vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ 2537 2420 2538 2421 asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); 2539 2422 vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ 2540 2423 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); 2541 2424 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); 2425 + vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); 2542 2426 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); 2427 + vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); 2543 2428 2544 2429 rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); 2545 2430 vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); ··· 3066 2947 int size, in, string; 3067 2948 unsigned port; 3068 2949 3069 - ++vcpu->stat.io_exits; 3070 2950 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 3071 2951 string = (exit_qualification & 16) != 0; 3072 - 3073 - if (string) { 3074 - if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) 3075 - return 0; 3076 - return 1; 3077 - } 3078 - 3079 - size = (exit_qualification & 7) + 1; 3080 2952 in = (exit_qualification & 8) != 0; 3081 - port = exit_qualification >> 16; 3082 2953 2954 + ++vcpu->stat.io_exits; 2955 + 2956 + if (string || in) 2957 + return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); 2958 + 2959 + port = exit_qualification >> 16; 2960 + size = (exit_qualification & 7) + 1; 3083 2961 skip_emulated_instruction(vcpu); 3084 - return kvm_emulate_pio(vcpu, in, size, port); 2962 + 2963 + return kvm_fast_pio_out(vcpu, size, port); 3085 2964 } 3086 2965 3087 2966 static void ··· 3170 3053 return 0; 3171 3054 } 3172 3055 3173 - static int check_dr_alias(struct kvm_vcpu *vcpu) 3174 - { 3175 - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { 3176 - kvm_queue_exception(vcpu, UD_VECTOR); 3177 - return -1; 3178 - } 3179 - return 0; 3180 - } 3181 - 3182 3056 static int handle_dr(struct kvm_vcpu *vcpu) 3183 3057 { 3184 3058 unsigned long exit_qualification; 3185 - unsigned long val; 3186 3059 int dr, reg; 3187 3060 3188 3061 /* Do not handle if the CPL > 0, will trigger GP on re-entry */ ··· 3207 3100 dr = exit_qualification & DEBUG_REG_ACCESS_NUM; 3208 3101 reg = DEBUG_REG_ACCESS_REG(exit_qualification); 3209 3102 if (exit_qualification & TYPE_MOV_FROM_DR) { 3210 - switch (dr) { 3211 - case 0 ... 3: 3212 - val = vcpu->arch.db[dr]; 3213 - break; 3214 - case 4: 3215 - if (check_dr_alias(vcpu) < 0) 3216 - return 1; 3217 - /* fall through */ 3218 - case 6: 3219 - val = vcpu->arch.dr6; 3220 - break; 3221 - case 5: 3222 - if (check_dr_alias(vcpu) < 0) 3223 - return 1; 3224 - /* fall through */ 3225 - default: /* 7 */ 3226 - val = vcpu->arch.dr7; 3227 - break; 3228 - } 3229 - kvm_register_write(vcpu, reg, val); 3230 - } else { 3231 - val = vcpu->arch.regs[reg]; 3232 - switch (dr) { 3233 - case 0 ... 3: 3234 - vcpu->arch.db[dr] = val; 3235 - if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 3236 - vcpu->arch.eff_db[dr] = val; 3237 - break; 3238 - case 4: 3239 - if (check_dr_alias(vcpu) < 0) 3240 - return 1; 3241 - /* fall through */ 3242 - case 6: 3243 - if (val & 0xffffffff00000000ULL) { 3244 - kvm_inject_gp(vcpu, 0); 3245 - return 1; 3246 - } 3247 - vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 3248 - break; 3249 - case 5: 3250 - if (check_dr_alias(vcpu) < 0) 3251 - return 1; 3252 - /* fall through */ 3253 - default: /* 7 */ 3254 - if (val & 0xffffffff00000000ULL) { 3255 - kvm_inject_gp(vcpu, 0); 3256 - return 1; 3257 - } 3258 - vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; 3259 - if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 3260 - vmcs_writel(GUEST_DR7, vcpu->arch.dr7); 3261 - vcpu->arch.switch_db_regs = 3262 - (val & DR7_BP_EN_MASK); 3263 - } 3264 - break; 3265 - } 3266 - } 3103 + unsigned long val; 3104 + if (!kvm_get_dr(vcpu, dr, &val)) 3105 + kvm_register_write(vcpu, reg, val); 3106 + } else 3107 + kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); 3267 3108 skip_emulated_instruction(vcpu); 3268 3109 return 1; 3110 + } 3111 + 3112 + static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) 3113 + { 3114 + vmcs_writel(GUEST_DR7, val); 3269 3115 } 3270 3116 3271 3117 static int handle_cpuid(struct kvm_vcpu *vcpu) ··· 3352 3292 { 3353 3293 struct vcpu_vmx *vmx = to_vmx(vcpu); 3354 3294 unsigned long exit_qualification; 3295 + bool has_error_code = false; 3296 + u32 error_code = 0; 3355 3297 u16 tss_selector; 3356 3298 int reason, type, idt_v; 3357 3299 ··· 3376 3314 kvm_clear_interrupt_queue(vcpu); 3377 3315 break; 3378 3316 case INTR_TYPE_HARD_EXCEPTION: 3317 + if (vmx->idt_vectoring_info & 3318 + VECTORING_INFO_DELIVER_CODE_MASK) { 3319 + has_error_code = true; 3320 + error_code = 3321 + vmcs_read32(IDT_VECTORING_ERROR_CODE); 3322 + } 3323 + /* fall through */ 3379 3324 case INTR_TYPE_SOFT_EXCEPTION: 3380 3325 kvm_clear_exception_queue(vcpu); 3381 3326 break; ··· 3397 3328 type != INTR_TYPE_NMI_INTR)) 3398 3329 skip_emulated_instruction(vcpu); 3399 3330 3400 - if (!kvm_task_switch(vcpu, tss_selector, reason)) 3331 + if (kvm_task_switch(vcpu, tss_selector, reason, 3332 + has_error_code, error_code) == EMULATE_FAIL) { 3333 + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 3334 + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 3335 + vcpu->run->internal.ndata = 0; 3401 3336 return 0; 3337 + } 3402 3338 3403 3339 /* clear all local breakpoint enable flags */ 3404 3340 vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); ··· 3648 3574 u32 exit_reason = vmx->exit_reason; 3649 3575 u32 vectoring_info = vmx->idt_vectoring_info; 3650 3576 3651 - trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); 3577 + trace_kvm_exit(exit_reason, vcpu); 3652 3578 3653 3579 /* If guest state is invalid, start emulating */ 3654 3580 if (vmx->emulation_required && emulate_invalid_guest_state) ··· 3997 3923 { 3998 3924 struct vcpu_vmx *vmx = to_vmx(vcpu); 3999 3925 4000 - spin_lock(&vmx_vpid_lock); 4001 - if (vmx->vpid != 0) 4002 - __clear_bit(vmx->vpid, vmx_vpid_bitmap); 4003 - spin_unlock(&vmx_vpid_lock); 3926 + free_vpid(vmx); 4004 3927 vmx_free_vmcs(vcpu); 4005 3928 kfree(vmx->guest_msrs); 4006 3929 kvm_vcpu_uninit(vcpu); ··· 4059 3988 uninit_vcpu: 4060 3989 kvm_vcpu_uninit(&vmx->vcpu); 4061 3990 free_vcpu: 3991 + free_vpid(vmx); 4062 3992 kmem_cache_free(kvm_vcpu_cache, vmx); 4063 3993 return ERR_PTR(err); 4064 3994 } ··· 4190 4118 } 4191 4119 } 4192 4120 4121 + static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) 4122 + { 4123 + } 4124 + 4193 4125 static struct kvm_x86_ops vmx_x86_ops = { 4194 4126 .cpu_has_kvm_support = cpu_has_kvm_support, 4195 4127 .disabled_by_bios = vmx_disabled_by_bios, ··· 4230 4154 .set_idt = vmx_set_idt, 4231 4155 .get_gdt = vmx_get_gdt, 4232 4156 .set_gdt = vmx_set_gdt, 4157 + .set_dr7 = vmx_set_dr7, 4233 4158 .cache_reg = vmx_cache_reg, 4234 4159 .get_rflags = vmx_get_rflags, 4235 4160 .set_rflags = vmx_set_rflags, ··· 4266 4189 .cpuid_update = vmx_cpuid_update, 4267 4190 4268 4191 .rdtscp_supported = vmx_rdtscp_supported, 4192 + 4193 + .set_supported_cpuid = vmx_set_supported_cpuid, 4269 4194 }; 4270 4195 4271 4196 static int __init vmx_init(void) ··· 4315 4236 4316 4237 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ 4317 4238 4318 - r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); 4239 + r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), 4240 + __alignof__(struct vcpu_vmx), THIS_MODULE); 4319 4241 if (r) 4320 4242 goto out3; 4321 4243

+640 -959

arch/x86/kvm/x86.c

··· 42 42 #include <linux/slab.h> 43 43 #include <linux/perf_event.h> 44 44 #include <trace/events/kvm.h> 45 - #undef TRACE_INCLUDE_FILE 45 + 46 46 #define CREATE_TRACE_POINTS 47 47 #include "trace.h" 48 48 ··· 224 224 kvm_on_user_return(&smsr->urn); 225 225 } 226 226 227 - unsigned long segment_base(u16 selector) 228 - { 229 - struct descriptor_table gdt; 230 - struct desc_struct *d; 231 - unsigned long table_base; 232 - unsigned long v; 233 - 234 - if (selector == 0) 235 - return 0; 236 - 237 - kvm_get_gdt(&gdt); 238 - table_base = gdt.base; 239 - 240 - if (selector & 4) { /* from ldt */ 241 - u16 ldt_selector = kvm_read_ldt(); 242 - 243 - table_base = segment_base(ldt_selector); 244 - } 245 - d = (struct desc_struct *)(table_base + (selector & ~7)); 246 - v = get_desc_base(d); 247 - #ifdef CONFIG_X86_64 248 - if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) 249 - v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; 250 - #endif 251 - return v; 252 - } 253 - EXPORT_SYMBOL_GPL(segment_base); 254 - 255 227 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) 256 228 { 257 229 if (irqchip_in_kernel(vcpu->kvm)) ··· 265 293 } 266 294 267 295 static void kvm_multiple_exception(struct kvm_vcpu *vcpu, 268 - unsigned nr, bool has_error, u32 error_code) 296 + unsigned nr, bool has_error, u32 error_code, 297 + bool reinject) 269 298 { 270 299 u32 prev_nr; 271 300 int class1, class2; ··· 277 304 vcpu->arch.exception.has_error_code = has_error; 278 305 vcpu->arch.exception.nr = nr; 279 306 vcpu->arch.exception.error_code = error_code; 307 + vcpu->arch.exception.reinject = reinject; 280 308 return; 281 309 } 282 310 ··· 306 332 307 333 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) 308 334 { 309 - kvm_multiple_exception(vcpu, nr, false, 0); 335 + kvm_multiple_exception(vcpu, nr, false, 0, false); 310 336 } 311 337 EXPORT_SYMBOL_GPL(kvm_queue_exception); 338 + 339 + void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) 340 + { 341 + kvm_multiple_exception(vcpu, nr, false, 0, true); 342 + } 343 + EXPORT_SYMBOL_GPL(kvm_requeue_exception); 312 344 313 345 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, 314 346 u32 error_code) ··· 332 352 333 353 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) 334 354 { 335 - kvm_multiple_exception(vcpu, nr, true, error_code); 355 + kvm_multiple_exception(vcpu, nr, true, error_code, false); 336 356 } 337 357 EXPORT_SYMBOL_GPL(kvm_queue_exception_e); 358 + 359 + void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) 360 + { 361 + kvm_multiple_exception(vcpu, nr, true, error_code, true); 362 + } 363 + EXPORT_SYMBOL_GPL(kvm_requeue_exception_e); 338 364 339 365 /* 340 366 * Checks if cpl <= required_cpl; if true, return true. Otherwise queue ··· 462 476 } 463 477 464 478 kvm_x86_ops->set_cr0(vcpu, cr0); 465 - vcpu->arch.cr0 = cr0; 466 479 467 480 kvm_mmu_reset_context(vcpu); 468 481 return; ··· 470 485 471 486 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 472 487 { 473 - kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f)); 488 + kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); 474 489 } 475 490 EXPORT_SYMBOL_GPL(kvm_lmsw); 476 491 ··· 502 517 } 503 518 kvm_x86_ops->set_cr4(vcpu, cr4); 504 519 vcpu->arch.cr4 = cr4; 505 - vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; 506 520 kvm_mmu_reset_context(vcpu); 507 521 } 508 522 EXPORT_SYMBOL_GPL(kvm_set_cr4); ··· 576 592 } 577 593 EXPORT_SYMBOL_GPL(kvm_get_cr8); 578 594 595 + int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) 596 + { 597 + switch (dr) { 598 + case 0 ... 3: 599 + vcpu->arch.db[dr] = val; 600 + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 601 + vcpu->arch.eff_db[dr] = val; 602 + break; 603 + case 4: 604 + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { 605 + kvm_queue_exception(vcpu, UD_VECTOR); 606 + return 1; 607 + } 608 + /* fall through */ 609 + case 6: 610 + if (val & 0xffffffff00000000ULL) { 611 + kvm_inject_gp(vcpu, 0); 612 + return 1; 613 + } 614 + vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 615 + break; 616 + case 5: 617 + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { 618 + kvm_queue_exception(vcpu, UD_VECTOR); 619 + return 1; 620 + } 621 + /* fall through */ 622 + default: /* 7 */ 623 + if (val & 0xffffffff00000000ULL) { 624 + kvm_inject_gp(vcpu, 0); 625 + return 1; 626 + } 627 + vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; 628 + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 629 + kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); 630 + vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK); 631 + } 632 + break; 633 + } 634 + 635 + return 0; 636 + } 637 + EXPORT_SYMBOL_GPL(kvm_set_dr); 638 + 639 + int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) 640 + { 641 + switch (dr) { 642 + case 0 ... 3: 643 + *val = vcpu->arch.db[dr]; 644 + break; 645 + case 4: 646 + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { 647 + kvm_queue_exception(vcpu, UD_VECTOR); 648 + return 1; 649 + } 650 + /* fall through */ 651 + case 6: 652 + *val = vcpu->arch.dr6; 653 + break; 654 + case 5: 655 + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { 656 + kvm_queue_exception(vcpu, UD_VECTOR); 657 + return 1; 658 + } 659 + /* fall through */ 660 + default: /* 7 */ 661 + *val = vcpu->arch.dr7; 662 + break; 663 + } 664 + 665 + return 0; 666 + } 667 + EXPORT_SYMBOL_GPL(kvm_get_dr); 668 + 579 669 static inline u32 bit(int bitno) 580 670 { 581 671 return 1 << (bitno & 31); ··· 664 606 * kvm-specific. Those are put in the beginning of the list. 665 607 */ 666 608 667 - #define KVM_SAVE_MSRS_BEGIN 5 609 + #define KVM_SAVE_MSRS_BEGIN 7 668 610 static u32 msrs_to_save[] = { 669 611 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 612 + MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, 670 613 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 671 614 HV_X64_MSR_APIC_ASSIST_PAGE, 672 615 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, ··· 684 625 MSR_IA32_MISC_ENABLE, 685 626 }; 686 627 687 - static void set_efer(struct kvm_vcpu *vcpu, u64 efer) 628 + static int set_efer(struct kvm_vcpu *vcpu, u64 efer) 688 629 { 689 - if (efer & efer_reserved_bits) { 690 - kvm_inject_gp(vcpu, 0); 691 - return; 692 - } 630 + if (efer & efer_reserved_bits) 631 + return 1; 693 632 694 633 if (is_paging(vcpu) 695 - && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { 696 - kvm_inject_gp(vcpu, 0); 697 - return; 698 - } 634 + && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) 635 + return 1; 699 636 700 637 if (efer & EFER_FFXSR) { 701 638 struct kvm_cpuid_entry2 *feat; 702 639 703 640 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 704 - if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { 705 - kvm_inject_gp(vcpu, 0); 706 - return; 707 - } 641 + if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) 642 + return 1; 708 643 } 709 644 710 645 if (efer & EFER_SVME) { 711 646 struct kvm_cpuid_entry2 *feat; 712 647 713 648 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 714 - if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { 715 - kvm_inject_gp(vcpu, 0); 716 - return; 717 - } 649 + if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) 650 + return 1; 718 651 } 719 - 720 - kvm_x86_ops->set_efer(vcpu, efer); 721 652 722 653 efer &= ~EFER_LMA; 723 654 efer |= vcpu->arch.efer & EFER_LMA; 655 + 656 + kvm_x86_ops->set_efer(vcpu, efer); 724 657 725 658 vcpu->arch.efer = efer; 726 659 727 660 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; 728 661 kvm_mmu_reset_context(vcpu); 662 + 663 + return 0; 729 664 } 730 665 731 666 void kvm_enable_efer_bits(u64 mask) ··· 749 696 750 697 static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) 751 698 { 752 - static int version; 699 + int version; 700 + int r; 753 701 struct pvclock_wall_clock wc; 754 702 struct timespec boot; 755 703 756 704 if (!wall_clock) 757 705 return; 758 706 759 - version++; 707 + r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version)); 708 + if (r) 709 + return; 710 + 711 + if (version & 1) 712 + ++version; /* first time write, random junk */ 713 + 714 + ++version; 760 715 761 716 kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); 762 717 ··· 856 795 857 796 vcpu->hv_clock.system_time = ts.tv_nsec + 858 797 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; 798 + 799 + vcpu->hv_clock.flags = 0; 859 800 860 801 /* 861 802 * The interface expects us to write an even number signaling that the ··· 1150 1087 { 1151 1088 switch (msr) { 1152 1089 case MSR_EFER: 1153 - set_efer(vcpu, data); 1154 - break; 1090 + return set_efer(vcpu, data); 1155 1091 case MSR_K7_HWCR: 1156 1092 data &= ~(u64)0x40; /* ignore flush filter disable */ 1093 + data &= ~(u64)0x100; /* ignore ignne emulation enable */ 1157 1094 if (data != 0) { 1158 1095 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", 1159 1096 data); ··· 1196 1133 case MSR_IA32_MISC_ENABLE: 1197 1134 vcpu->arch.ia32_misc_enable_msr = data; 1198 1135 break; 1136 + case MSR_KVM_WALL_CLOCK_NEW: 1199 1137 case MSR_KVM_WALL_CLOCK: 1200 1138 vcpu->kvm->arch.wall_clock = data; 1201 1139 kvm_write_wall_clock(vcpu->kvm, data); 1202 1140 break; 1141 + case MSR_KVM_SYSTEM_TIME_NEW: 1203 1142 case MSR_KVM_SYSTEM_TIME: { 1204 1143 if (vcpu->arch.time_page) { 1205 1144 kvm_release_page_dirty(vcpu->arch.time_page); ··· 1473 1408 data = vcpu->arch.efer; 1474 1409 break; 1475 1410 case MSR_KVM_WALL_CLOCK: 1411 + case MSR_KVM_WALL_CLOCK_NEW: 1476 1412 data = vcpu->kvm->arch.wall_clock; 1477 1413 break; 1478 1414 case MSR_KVM_SYSTEM_TIME: 1415 + case MSR_KVM_SYSTEM_TIME_NEW: 1479 1416 data = vcpu->arch.time; 1480 1417 break; 1481 1418 case MSR_IA32_P5_MC_ADDR: ··· 1616 1549 case KVM_CAP_HYPERV_VAPIC: 1617 1550 case KVM_CAP_HYPERV_SPIN: 1618 1551 case KVM_CAP_PCI_SEGMENT: 1552 + case KVM_CAP_DEBUGREGS: 1619 1553 case KVM_CAP_X86_ROBUST_SINGLESTEP: 1620 1554 r = 1; 1621 1555 break; ··· 1837 1769 { 1838 1770 int r; 1839 1771 1772 + vcpu_load(vcpu); 1840 1773 r = -E2BIG; 1841 1774 if (cpuid->nent < vcpu->arch.cpuid_nent) 1842 1775 goto out; ··· 1849 1780 1850 1781 out: 1851 1782 cpuid->nent = vcpu->arch.cpuid_nent; 1783 + vcpu_put(vcpu); 1852 1784 return r; 1853 1785 } 1854 1786 ··· 1980 1910 } 1981 1911 break; 1982 1912 } 1913 + case KVM_CPUID_SIGNATURE: { 1914 + char signature[12] = "KVMKVMKVM\0\0"; 1915 + u32 *sigptr = (u32 *)signature; 1916 + entry->eax = 0; 1917 + entry->ebx = sigptr[0]; 1918 + entry->ecx = sigptr[1]; 1919 + entry->edx = sigptr[2]; 1920 + break; 1921 + } 1922 + case KVM_CPUID_FEATURES: 1923 + entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | 1924 + (1 << KVM_FEATURE_NOP_IO_DELAY) | 1925 + (1 << KVM_FEATURE_CLOCKSOURCE2) | 1926 + (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); 1927 + entry->ebx = 0; 1928 + entry->ecx = 0; 1929 + entry->edx = 0; 1930 + break; 1983 1931 case 0x80000000: 1984 1932 entry->eax = min(entry->eax, 0x8000001a); 1985 1933 break; ··· 2006 1918 entry->ecx &= kvm_supported_word6_x86_features; 2007 1919 break; 2008 1920 } 1921 + 1922 + kvm_x86_ops->set_supported_cpuid(function, entry); 1923 + 2009 1924 put_cpu(); 2010 1925 } 2011 1926 ··· 2044 1953 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) 2045 1954 do_cpuid_ent(&cpuid_entries[nent], func, 0, 2046 1955 &nent, cpuid->nent); 1956 + 1957 + 1958 + 1959 + r = -E2BIG; 1960 + if (nent >= cpuid->nent) 1961 + goto out_free; 1962 + 1963 + do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, 1964 + cpuid->nent); 1965 + 1966 + r = -E2BIG; 1967 + if (nent >= cpuid->nent) 1968 + goto out_free; 1969 + 1970 + do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent, 1971 + cpuid->nent); 1972 + 2047 1973 r = -E2BIG; 2048 1974 if (nent >= cpuid->nent) 2049 1975 goto out_free; ··· 2140 2032 int r; 2141 2033 unsigned bank_num = mcg_cap & 0xff, bank; 2142 2034 2035 + vcpu_load(vcpu); 2143 2036 r = -EINVAL; 2144 2037 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) 2145 2038 goto out; ··· 2155 2046 for (bank = 0; bank < bank_num; bank++) 2156 2047 vcpu->arch.mce_banks[bank*4] = ~(u64)0; 2157 2048 out: 2049 + vcpu_put(vcpu); 2158 2050 return r; 2159 2051 } 2160 2052 ··· 2215 2105 { 2216 2106 vcpu_load(vcpu); 2217 2107 2218 - events->exception.injected = vcpu->arch.exception.pending; 2108 + events->exception.injected = 2109 + vcpu->arch.exception.pending && 2110 + !kvm_exception_is_soft(vcpu->arch.exception.nr); 2219 2111 events->exception.nr = vcpu->arch.exception.nr; 2220 2112 events->exception.has_error_code = vcpu->arch.exception.has_error_code; 2221 2113 events->exception.error_code = vcpu->arch.exception.error_code; 2222 2114 2223 - events->interrupt.injected = vcpu->arch.interrupt.pending; 2115 + events->interrupt.injected = 2116 + vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; 2224 2117 events->interrupt.nr = vcpu->arch.interrupt.nr; 2225 - events->interrupt.soft = vcpu->arch.interrupt.soft; 2118 + events->interrupt.soft = 0; 2119 + events->interrupt.shadow = 2120 + kvm_x86_ops->get_interrupt_shadow(vcpu, 2121 + KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); 2226 2122 2227 2123 events->nmi.injected = vcpu->arch.nmi_injected; 2228 2124 events->nmi.pending = vcpu->arch.nmi_pending; ··· 2237 2121 events->sipi_vector = vcpu->arch.sipi_vector; 2238 2122 2239 2123 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING 2240 - | KVM_VCPUEVENT_VALID_SIPI_VECTOR); 2124 + | KVM_VCPUEVENT_VALID_SIPI_VECTOR 2125 + | KVM_VCPUEVENT_VALID_SHADOW); 2241 2126 2242 2127 vcpu_put(vcpu); 2243 2128 } ··· 2247 2130 struct kvm_vcpu_events *events) 2248 2131 { 2249 2132 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING 2250 - | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) 2133 + | KVM_VCPUEVENT_VALID_SIPI_VECTOR 2134 + | KVM_VCPUEVENT_VALID_SHADOW)) 2251 2135 return -EINVAL; 2252 2136 2253 2137 vcpu_load(vcpu); ··· 2263 2145 vcpu->arch.interrupt.soft = events->interrupt.soft; 2264 2146 if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) 2265 2147 kvm_pic_clear_isr_ack(vcpu->kvm); 2148 + if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) 2149 + kvm_x86_ops->set_interrupt_shadow(vcpu, 2150 + events->interrupt.shadow); 2266 2151 2267 2152 vcpu->arch.nmi_injected = events->nmi.injected; 2268 2153 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) ··· 2274 2153 2275 2154 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) 2276 2155 vcpu->arch.sipi_vector = events->sipi_vector; 2156 + 2157 + vcpu_put(vcpu); 2158 + 2159 + return 0; 2160 + } 2161 + 2162 + static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, 2163 + struct kvm_debugregs *dbgregs) 2164 + { 2165 + vcpu_load(vcpu); 2166 + 2167 + memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); 2168 + dbgregs->dr6 = vcpu->arch.dr6; 2169 + dbgregs->dr7 = vcpu->arch.dr7; 2170 + dbgregs->flags = 0; 2171 + 2172 + vcpu_put(vcpu); 2173 + } 2174 + 2175 + static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, 2176 + struct kvm_debugregs *dbgregs) 2177 + { 2178 + if (dbgregs->flags) 2179 + return -EINVAL; 2180 + 2181 + vcpu_load(vcpu); 2182 + 2183 + memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); 2184 + vcpu->arch.dr6 = dbgregs->dr6; 2185 + vcpu->arch.dr7 = dbgregs->dr7; 2277 2186 2278 2187 vcpu_put(vcpu); 2279 2188 ··· 2464 2313 r = -EFAULT; 2465 2314 if (copy_from_user(&mce, argp, sizeof mce)) 2466 2315 goto out; 2316 + vcpu_load(vcpu); 2467 2317 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); 2318 + vcpu_put(vcpu); 2468 2319 break; 2469 2320 } 2470 2321 case KVM_GET_VCPU_EVENTS: { ··· 2488 2335 break; 2489 2336 2490 2337 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); 2338 + break; 2339 + } 2340 + case KVM_GET_DEBUGREGS: { 2341 + struct kvm_debugregs dbgregs; 2342 + 2343 + kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); 2344 + 2345 + r = -EFAULT; 2346 + if (copy_to_user(argp, &dbgregs, 2347 + sizeof(struct kvm_debugregs))) 2348 + break; 2349 + r = 0; 2350 + break; 2351 + } 2352 + case KVM_SET_DEBUGREGS: { 2353 + struct kvm_debugregs dbgregs; 2354 + 2355 + r = -EFAULT; 2356 + if (copy_from_user(&dbgregs, argp, 2357 + sizeof(struct kvm_debugregs))) 2358 + break; 2359 + 2360 + r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); 2491 2361 break; 2492 2362 } 2493 2363 default: ··· 2566 2390 struct kvm_mem_alias *alias; 2567 2391 struct kvm_mem_aliases *aliases; 2568 2392 2569 - aliases = rcu_dereference(kvm->arch.aliases); 2393 + aliases = kvm_aliases(kvm); 2570 2394 2571 2395 for (i = 0; i < aliases->naliases; ++i) { 2572 2396 alias = &aliases->aliases[i]; ··· 2585 2409 struct kvm_mem_alias *alias; 2586 2410 struct kvm_mem_aliases *aliases; 2587 2411 2588 - aliases = rcu_dereference(kvm->arch.aliases); 2412 + aliases = kvm_aliases(kvm); 2589 2413 2590 2414 for (i = 0; i < aliases->naliases; ++i) { 2591 2415 alias = &aliases->aliases[i]; ··· 2980 2804 r = -EFAULT; 2981 2805 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 2982 2806 goto out; 2807 + r = -ENXIO; 2983 2808 if (irqchip_in_kernel(kvm)) { 2984 2809 __s32 status; 2985 2810 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 2986 2811 irq_event.irq, irq_event.level); 2987 2812 if (ioctl == KVM_IRQ_LINE_STATUS) { 2813 + r = -EFAULT; 2988 2814 irq_event.status = status; 2989 2815 if (copy_to_user(argp, &irq_event, 2990 2816 sizeof irq_event)) ··· 3202 3024 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); 3203 3025 } 3204 3026 3027 + static void kvm_set_segment(struct kvm_vcpu *vcpu, 3028 + struct kvm_segment *var, int seg) 3029 + { 3030 + kvm_x86_ops->set_segment(vcpu, var, seg); 3031 + } 3032 + 3033 + void kvm_get_segment(struct kvm_vcpu *vcpu, 3034 + struct kvm_segment *var, int seg) 3035 + { 3036 + kvm_x86_ops->get_segment(vcpu, var, seg); 3037 + } 3038 + 3205 3039 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3206 3040 { 3207 3041 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; ··· 3294 3104 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); 3295 3105 } 3296 3106 3297 - static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, 3298 - struct kvm_vcpu *vcpu, u32 *error) 3107 + static int kvm_write_guest_virt_system(gva_t addr, void *val, 3108 + unsigned int bytes, 3109 + struct kvm_vcpu *vcpu, 3110 + u32 *error) 3299 3111 { 3300 3112 void *data = val; 3301 3113 int r = X86EMUL_CONTINUE; 3302 3114 3303 3115 while (bytes) { 3304 - gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); 3116 + gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, 3117 + PFERR_WRITE_MASK, error); 3305 3118 unsigned offset = addr & (PAGE_SIZE-1); 3306 3119 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); 3307 3120 int ret; ··· 3326 3133 out: 3327 3134 return r; 3328 3135 } 3329 - 3330 3136 3331 3137 static int emulator_read_emulated(unsigned long addr, 3332 3138 void *val, ··· 3429 3237 } 3430 3238 3431 3239 int emulator_write_emulated(unsigned long addr, 3432 - const void *val, 3433 - unsigned int bytes, 3434 - struct kvm_vcpu *vcpu) 3240 + const void *val, 3241 + unsigned int bytes, 3242 + struct kvm_vcpu *vcpu) 3435 3243 { 3436 3244 /* Crossing a page boundary? */ 3437 3245 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { ··· 3449 3257 } 3450 3258 EXPORT_SYMBOL_GPL(emulator_write_emulated); 3451 3259 3260 + #define CMPXCHG_TYPE(t, ptr, old, new) \ 3261 + (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) 3262 + 3263 + #ifdef CONFIG_X86_64 3264 + # define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new) 3265 + #else 3266 + # define CMPXCHG64(ptr, old, new) \ 3267 + (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) 3268 + #endif 3269 + 3452 3270 static int emulator_cmpxchg_emulated(unsigned long addr, 3453 3271 const void *old, 3454 3272 const void *new, 3455 3273 unsigned int bytes, 3456 3274 struct kvm_vcpu *vcpu) 3457 3275 { 3458 - printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); 3459 - #ifndef CONFIG_X86_64 3276 + gpa_t gpa; 3277 + struct page *page; 3278 + char *kaddr; 3279 + bool exchanged; 3280 + 3460 3281 /* guests cmpxchg8b have to be emulated atomically */ 3461 - if (bytes == 8) { 3462 - gpa_t gpa; 3463 - struct page *page; 3464 - char *kaddr; 3465 - u64 val; 3282 + if (bytes > 8 || (bytes & (bytes - 1))) 3283 + goto emul_write; 3466 3284 3467 - gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); 3285 + gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); 3468 3286 3469 - if (gpa == UNMAPPED_GVA || 3470 - (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3471 - goto emul_write; 3287 + if (gpa == UNMAPPED_GVA || 3288 + (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3289 + goto emul_write; 3472 3290 3473 - if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) 3474 - goto emul_write; 3291 + if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) 3292 + goto emul_write; 3475 3293 3476 - val = *(u64 *)new; 3294 + page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 3477 3295 3478 - page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 3479 - 3480 - kaddr = kmap_atomic(page, KM_USER0); 3481 - set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); 3482 - kunmap_atomic(kaddr, KM_USER0); 3483 - kvm_release_page_dirty(page); 3296 + kaddr = kmap_atomic(page, KM_USER0); 3297 + kaddr += offset_in_page(gpa); 3298 + switch (bytes) { 3299 + case 1: 3300 + exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); 3301 + break; 3302 + case 2: 3303 + exchanged = CMPXCHG_TYPE(u16, kaddr, old, new); 3304 + break; 3305 + case 4: 3306 + exchanged = CMPXCHG_TYPE(u32, kaddr, old, new); 3307 + break; 3308 + case 8: 3309 + exchanged = CMPXCHG64(kaddr, old, new); 3310 + break; 3311 + default: 3312 + BUG(); 3484 3313 } 3314 + kunmap_atomic(kaddr, KM_USER0); 3315 + kvm_release_page_dirty(page); 3316 + 3317 + if (!exchanged) 3318 + return X86EMUL_CMPXCHG_FAILED; 3319 + 3320 + kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1); 3321 + 3322 + return X86EMUL_CONTINUE; 3323 + 3485 3324 emul_write: 3486 - #endif 3325 + printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); 3487 3326 3488 3327 return emulator_write_emulated(addr, new, bytes, vcpu); 3328 + } 3329 + 3330 + static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) 3331 + { 3332 + /* TODO: String I/O for in kernel device */ 3333 + int r; 3334 + 3335 + if (vcpu->arch.pio.in) 3336 + r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, 3337 + vcpu->arch.pio.size, pd); 3338 + else 3339 + r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, 3340 + vcpu->arch.pio.port, vcpu->arch.pio.size, 3341 + pd); 3342 + return r; 3343 + } 3344 + 3345 + 3346 + static int emulator_pio_in_emulated(int size, unsigned short port, void *val, 3347 + unsigned int count, struct kvm_vcpu *vcpu) 3348 + { 3349 + if (vcpu->arch.pio.count) 3350 + goto data_avail; 3351 + 3352 + trace_kvm_pio(1, port, size, 1); 3353 + 3354 + vcpu->arch.pio.port = port; 3355 + vcpu->arch.pio.in = 1; 3356 + vcpu->arch.pio.count = count; 3357 + vcpu->arch.pio.size = size; 3358 + 3359 + if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { 3360 + data_avail: 3361 + memcpy(val, vcpu->arch.pio_data, size * count); 3362 + vcpu->arch.pio.count = 0; 3363 + return 1; 3364 + } 3365 + 3366 + vcpu->run->exit_reason = KVM_EXIT_IO; 3367 + vcpu->run->io.direction = KVM_EXIT_IO_IN; 3368 + vcpu->run->io.size = size; 3369 + vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; 3370 + vcpu->run->io.count = count; 3371 + vcpu->run->io.port = port; 3372 + 3373 + return 0; 3374 + } 3375 + 3376 + static int emulator_pio_out_emulated(int size, unsigned short port, 3377 + const void *val, unsigned int count, 3378 + struct kvm_vcpu *vcpu) 3379 + { 3380 + trace_kvm_pio(0, port, size, 1); 3381 + 3382 + vcpu->arch.pio.port = port; 3383 + vcpu->arch.pio.in = 0; 3384 + vcpu->arch.pio.count = count; 3385 + vcpu->arch.pio.size = size; 3386 + 3387 + memcpy(vcpu->arch.pio_data, val, size * count); 3388 + 3389 + if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { 3390 + vcpu->arch.pio.count = 0; 3391 + return 1; 3392 + } 3393 + 3394 + vcpu->run->exit_reason = KVM_EXIT_IO; 3395 + vcpu->run->io.direction = KVM_EXIT_IO_OUT; 3396 + vcpu->run->io.size = size; 3397 + vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; 3398 + vcpu->run->io.count = count; 3399 + vcpu->run->io.port = port; 3400 + 3401 + return 0; 3489 3402 } 3490 3403 3491 3404 static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) ··· 3613 3316 3614 3317 int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) 3615 3318 { 3616 - return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); 3319 + return kvm_get_dr(ctxt->vcpu, dr, dest); 3617 3320 } 3618 3321 3619 3322 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) 3620 3323 { 3621 3324 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; 3622 3325 3623 - return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); 3326 + return kvm_set_dr(ctxt->vcpu, dr, value & mask); 3624 3327 } 3625 3328 3626 3329 void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) ··· 3641 3344 } 3642 3345 EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 3643 3346 3347 + static u64 mk_cr_64(u64 curr_cr, u32 new_val) 3348 + { 3349 + return (curr_cr & ~((1ULL << 32) - 1)) | new_val; 3350 + } 3351 + 3352 + static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) 3353 + { 3354 + unsigned long value; 3355 + 3356 + switch (cr) { 3357 + case 0: 3358 + value = kvm_read_cr0(vcpu); 3359 + break; 3360 + case 2: 3361 + value = vcpu->arch.cr2; 3362 + break; 3363 + case 3: 3364 + value = vcpu->arch.cr3; 3365 + break; 3366 + case 4: 3367 + value = kvm_read_cr4(vcpu); 3368 + break; 3369 + case 8: 3370 + value = kvm_get_cr8(vcpu); 3371 + break; 3372 + default: 3373 + vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 3374 + return 0; 3375 + } 3376 + 3377 + return value; 3378 + } 3379 + 3380 + static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) 3381 + { 3382 + switch (cr) { 3383 + case 0: 3384 + kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); 3385 + break; 3386 + case 2: 3387 + vcpu->arch.cr2 = val; 3388 + break; 3389 + case 3: 3390 + kvm_set_cr3(vcpu, val); 3391 + break; 3392 + case 4: 3393 + kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); 3394 + break; 3395 + case 8: 3396 + kvm_set_cr8(vcpu, val & 0xfUL); 3397 + break; 3398 + default: 3399 + vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 3400 + } 3401 + } 3402 + 3403 + static int emulator_get_cpl(struct kvm_vcpu *vcpu) 3404 + { 3405 + return kvm_x86_ops->get_cpl(vcpu); 3406 + } 3407 + 3408 + static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) 3409 + { 3410 + kvm_x86_ops->get_gdt(vcpu, dt); 3411 + } 3412 + 3413 + static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, 3414 + struct kvm_vcpu *vcpu) 3415 + { 3416 + struct kvm_segment var; 3417 + 3418 + kvm_get_segment(vcpu, &var, seg); 3419 + 3420 + if (var.unusable) 3421 + return false; 3422 + 3423 + if (var.g) 3424 + var.limit >>= 12; 3425 + set_desc_limit(desc, var.limit); 3426 + set_desc_base(desc, (unsigned long)var.base); 3427 + desc->type = var.type; 3428 + desc->s = var.s; 3429 + desc->dpl = var.dpl; 3430 + desc->p = var.present; 3431 + desc->avl = var.avl; 3432 + desc->l = var.l; 3433 + desc->d = var.db; 3434 + desc->g = var.g; 3435 + 3436 + return true; 3437 + } 3438 + 3439 + static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, 3440 + struct kvm_vcpu *vcpu) 3441 + { 3442 + struct kvm_segment var; 3443 + 3444 + /* needed to preserve selector */ 3445 + kvm_get_segment(vcpu, &var, seg); 3446 + 3447 + var.base = get_desc_base(desc); 3448 + var.limit = get_desc_limit(desc); 3449 + if (desc->g) 3450 + var.limit = (var.limit << 12) | 0xfff; 3451 + var.type = desc->type; 3452 + var.present = desc->p; 3453 + var.dpl = desc->dpl; 3454 + var.db = desc->d; 3455 + var.s = desc->s; 3456 + var.l = desc->l; 3457 + var.g = desc->g; 3458 + var.avl = desc->avl; 3459 + var.present = desc->p; 3460 + var.unusable = !var.present; 3461 + var.padding = 0; 3462 + 3463 + kvm_set_segment(vcpu, &var, seg); 3464 + return; 3465 + } 3466 + 3467 + static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) 3468 + { 3469 + struct kvm_segment kvm_seg; 3470 + 3471 + kvm_get_segment(vcpu, &kvm_seg, seg); 3472 + return kvm_seg.selector; 3473 + } 3474 + 3475 + static void emulator_set_segment_selector(u16 sel, int seg, 3476 + struct kvm_vcpu *vcpu) 3477 + { 3478 + struct kvm_segment kvm_seg; 3479 + 3480 + kvm_get_segment(vcpu, &kvm_seg, seg); 3481 + kvm_seg.selector = sel; 3482 + kvm_set_segment(vcpu, &kvm_seg, seg); 3483 + } 3484 + 3485 + static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 3486 + { 3487 + kvm_x86_ops->set_rflags(vcpu, rflags); 3488 + } 3489 + 3644 3490 static struct x86_emulate_ops emulate_ops = { 3645 3491 .read_std = kvm_read_guest_virt_system, 3492 + .write_std = kvm_write_guest_virt_system, 3646 3493 .fetch = kvm_fetch_guest_virt, 3647 3494 .read_emulated = emulator_read_emulated, 3648 3495 .write_emulated = emulator_write_emulated, 3649 3496 .cmpxchg_emulated = emulator_cmpxchg_emulated, 3497 + .pio_in_emulated = emulator_pio_in_emulated, 3498 + .pio_out_emulated = emulator_pio_out_emulated, 3499 + .get_cached_descriptor = emulator_get_cached_descriptor, 3500 + .set_cached_descriptor = emulator_set_cached_descriptor, 3501 + .get_segment_selector = emulator_get_segment_selector, 3502 + .set_segment_selector = emulator_set_segment_selector, 3503 + .get_gdt = emulator_get_gdt, 3504 + .get_cr = emulator_get_cr, 3505 + .set_cr = emulator_set_cr, 3506 + .cpl = emulator_get_cpl, 3507 + .set_rflags = emulator_set_rflags, 3650 3508 }; 3651 3509 3652 3510 static void cache_all_regs(struct kvm_vcpu *vcpu) ··· 3832 3380 cache_all_regs(vcpu); 3833 3381 3834 3382 vcpu->mmio_is_write = 0; 3835 - vcpu->arch.pio.string = 0; 3836 3383 3837 3384 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 3838 3385 int cs_db, cs_l; 3839 3386 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 3840 3387 3841 3388 vcpu->arch.emulate_ctxt.vcpu = vcpu; 3842 - vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); 3389 + vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); 3390 + vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); 3843 3391 vcpu->arch.emulate_ctxt.mode = 3844 3392 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : 3845 3393 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) ··· 3848 3396 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 3849 3397 3850 3398 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3399 + trace_kvm_emulate_insn_start(vcpu); 3851 3400 3852 3401 /* Only allow emulation of specific instructions on #UD 3853 3402 * (namely VMMCALL, sysenter, sysexit, syscall)*/ ··· 3881 3428 ++vcpu->stat.insn_emulation; 3882 3429 if (r) { 3883 3430 ++vcpu->stat.insn_emulation_fail; 3431 + trace_kvm_emulate_insn_failed(vcpu); 3884 3432 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 3885 3433 return EMULATE_DONE; 3886 3434 return EMULATE_FAIL; ··· 3893 3439 return EMULATE_DONE; 3894 3440 } 3895 3441 3442 + restart: 3896 3443 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3897 3444 shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; 3898 3445 3899 3446 if (r == 0) 3900 3447 kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); 3901 3448 3902 - if (vcpu->arch.pio.string) 3449 + if (vcpu->arch.pio.count) { 3450 + if (!vcpu->arch.pio.in) 3451 + vcpu->arch.pio.count = 0; 3903 3452 return EMULATE_DO_MMIO; 3453 + } 3904 3454 3905 - if ((r || vcpu->mmio_is_write) && run) { 3455 + if (r || vcpu->mmio_is_write) { 3906 3456 run->exit_reason = KVM_EXIT_MMIO; 3907 3457 run->mmio.phys_addr = vcpu->mmio_phys_addr; 3908 3458 memcpy(run->mmio.data, vcpu->mmio_data, 8); ··· 3916 3458 3917 3459 if (r) { 3918 3460 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 3919 - return EMULATE_DONE; 3461 + goto done; 3920 3462 if (!vcpu->mmio_needed) { 3463 + ++vcpu->stat.insn_emulation_fail; 3464 + trace_kvm_emulate_insn_failed(vcpu); 3921 3465 kvm_report_emulation_failure(vcpu, "mmio"); 3922 3466 return EMULATE_FAIL; 3923 3467 } 3924 3468 return EMULATE_DO_MMIO; 3925 3469 } 3926 3470 3927 - kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 3928 - 3929 3471 if (vcpu->mmio_is_write) { 3930 3472 vcpu->mmio_needed = 0; 3931 3473 return EMULATE_DO_MMIO; 3932 3474 } 3933 3475 3476 + done: 3477 + if (vcpu->arch.exception.pending) 3478 + vcpu->arch.emulate_ctxt.restart = false; 3479 + 3480 + if (vcpu->arch.emulate_ctxt.restart) 3481 + goto restart; 3482 + 3934 3483 return EMULATE_DONE; 3935 3484 } 3936 3485 EXPORT_SYMBOL_GPL(emulate_instruction); 3937 3486 3938 - static int pio_copy_data(struct kvm_vcpu *vcpu) 3487 + int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) 3939 3488 { 3940 - void *p = vcpu->arch.pio_data; 3941 - gva_t q = vcpu->arch.pio.guest_gva; 3942 - unsigned bytes; 3943 - int ret; 3944 - u32 error_code; 3945 - 3946 - bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; 3947 - if (vcpu->arch.pio.in) 3948 - ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); 3949 - else 3950 - ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); 3951 - 3952 - if (ret == X86EMUL_PROPAGATE_FAULT) 3953 - kvm_inject_page_fault(vcpu, q, error_code); 3954 - 3489 + unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); 3490 + int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); 3491 + /* do not return to emulator after return from userspace */ 3492 + vcpu->arch.pio.count = 0; 3955 3493 return ret; 3956 3494 } 3957 - 3958 - int complete_pio(struct kvm_vcpu *vcpu) 3959 - { 3960 - struct kvm_pio_request *io = &vcpu->arch.pio; 3961 - long delta; 3962 - int r; 3963 - unsigned long val; 3964 - 3965 - if (!io->string) { 3966 - if (io->in) { 3967 - val = kvm_register_read(vcpu, VCPU_REGS_RAX); 3968 - memcpy(&val, vcpu->arch.pio_data, io->size); 3969 - kvm_register_write(vcpu, VCPU_REGS_RAX, val); 3970 - } 3971 - } else { 3972 - if (io->in) { 3973 - r = pio_copy_data(vcpu); 3974 - if (r) 3975 - goto out; 3976 - } 3977 - 3978 - delta = 1; 3979 - if (io->rep) { 3980 - delta *= io->cur_count; 3981 - /* 3982 - * The size of the register should really depend on 3983 - * current address size. 3984 - */ 3985 - val = kvm_register_read(vcpu, VCPU_REGS_RCX); 3986 - val -= delta; 3987 - kvm_register_write(vcpu, VCPU_REGS_RCX, val); 3988 - } 3989 - if (io->down) 3990 - delta = -delta; 3991 - delta *= io->size; 3992 - if (io->in) { 3993 - val = kvm_register_read(vcpu, VCPU_REGS_RDI); 3994 - val += delta; 3995 - kvm_register_write(vcpu, VCPU_REGS_RDI, val); 3996 - } else { 3997 - val = kvm_register_read(vcpu, VCPU_REGS_RSI); 3998 - val += delta; 3999 - kvm_register_write(vcpu, VCPU_REGS_RSI, val); 4000 - } 4001 - } 4002 - out: 4003 - io->count -= io->cur_count; 4004 - io->cur_count = 0; 4005 - 4006 - return 0; 4007 - } 4008 - 4009 - static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) 4010 - { 4011 - /* TODO: String I/O for in kernel device */ 4012 - int r; 4013 - 4014 - if (vcpu->arch.pio.in) 4015 - r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, 4016 - vcpu->arch.pio.size, pd); 4017 - else 4018 - r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, 4019 - vcpu->arch.pio.port, vcpu->arch.pio.size, 4020 - pd); 4021 - return r; 4022 - } 4023 - 4024 - static int pio_string_write(struct kvm_vcpu *vcpu) 4025 - { 4026 - struct kvm_pio_request *io = &vcpu->arch.pio; 4027 - void *pd = vcpu->arch.pio_data; 4028 - int i, r = 0; 4029 - 4030 - for (i = 0; i < io->cur_count; i++) { 4031 - if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, 4032 - io->port, io->size, pd)) { 4033 - r = -EOPNOTSUPP; 4034 - break; 4035 - } 4036 - pd += io->size; 4037 - } 4038 - return r; 4039 - } 4040 - 4041 - int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) 4042 - { 4043 - unsigned long val; 4044 - 4045 - trace_kvm_pio(!in, port, size, 1); 4046 - 4047 - vcpu->run->exit_reason = KVM_EXIT_IO; 4048 - vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 4049 - vcpu->run->io.size = vcpu->arch.pio.size = size; 4050 - vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; 4051 - vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1; 4052 - vcpu->run->io.port = vcpu->arch.pio.port = port; 4053 - vcpu->arch.pio.in = in; 4054 - vcpu->arch.pio.string = 0; 4055 - vcpu->arch.pio.down = 0; 4056 - vcpu->arch.pio.rep = 0; 4057 - 4058 - if (!vcpu->arch.pio.in) { 4059 - val = kvm_register_read(vcpu, VCPU_REGS_RAX); 4060 - memcpy(vcpu->arch.pio_data, &val, 4); 4061 - } 4062 - 4063 - if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { 4064 - complete_pio(vcpu); 4065 - return 1; 4066 - } 4067 - return 0; 4068 - } 4069 - EXPORT_SYMBOL_GPL(kvm_emulate_pio); 4070 - 4071 - int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, 4072 - int size, unsigned long count, int down, 4073 - gva_t address, int rep, unsigned port) 4074 - { 4075 - unsigned now, in_page; 4076 - int ret = 0; 4077 - 4078 - trace_kvm_pio(!in, port, size, count); 4079 - 4080 - vcpu->run->exit_reason = KVM_EXIT_IO; 4081 - vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 4082 - vcpu->run->io.size = vcpu->arch.pio.size = size; 4083 - vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; 4084 - vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count; 4085 - vcpu->run->io.port = vcpu->arch.pio.port = port; 4086 - vcpu->arch.pio.in = in; 4087 - vcpu->arch.pio.string = 1; 4088 - vcpu->arch.pio.down = down; 4089 - vcpu->arch.pio.rep = rep; 4090 - 4091 - if (!count) { 4092 - kvm_x86_ops->skip_emulated_instruction(vcpu); 4093 - return 1; 4094 - } 4095 - 4096 - if (!down) 4097 - in_page = PAGE_SIZE - offset_in_page(address); 4098 - else 4099 - in_page = offset_in_page(address) + size; 4100 - now = min(count, (unsigned long)in_page / size); 4101 - if (!now) 4102 - now = 1; 4103 - if (down) { 4104 - /* 4105 - * String I/O in reverse. Yuck. Kill the guest, fix later. 4106 - */ 4107 - pr_unimpl(vcpu, "guest string pio down\n"); 4108 - kvm_inject_gp(vcpu, 0); 4109 - return 1; 4110 - } 4111 - vcpu->run->io.count = now; 4112 - vcpu->arch.pio.cur_count = now; 4113 - 4114 - if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) 4115 - kvm_x86_ops->skip_emulated_instruction(vcpu); 4116 - 4117 - vcpu->arch.pio.guest_gva = address; 4118 - 4119 - if (!vcpu->arch.pio.in) { 4120 - /* string PIO write */ 4121 - ret = pio_copy_data(vcpu); 4122 - if (ret == X86EMUL_PROPAGATE_FAULT) 4123 - return 1; 4124 - if (ret == 0 && !pio_string_write(vcpu)) { 4125 - complete_pio(vcpu); 4126 - if (vcpu->arch.pio.count == 0) 4127 - ret = 1; 4128 - } 4129 - } 4130 - /* no string PIO read support yet */ 4131 - 4132 - return ret; 4133 - } 4134 - EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); 3495 + EXPORT_SYMBOL_GPL(kvm_fast_pio_out); 4135 3496 4136 3497 static void bounce_off(void *info) 4137 3498 { ··· 4273 3996 return emulator_write_emulated(rip, instruction, 3, vcpu); 4274 3997 } 4275 3998 4276 - static u64 mk_cr_64(u64 curr_cr, u32 new_val) 4277 - { 4278 - return (curr_cr & ~((1ULL << 32) - 1)) | new_val; 4279 - } 4280 - 4281 3999 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 4282 4000 { 4283 - struct descriptor_table dt = { limit, base }; 4001 + struct desc_ptr dt = { limit, base }; 4284 4002 4285 4003 kvm_x86_ops->set_gdt(vcpu, &dt); 4286 4004 } 4287 4005 4288 4006 void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 4289 4007 { 4290 - struct descriptor_table dt = { limit, base }; 4008 + struct desc_ptr dt = { limit, base }; 4291 4009 4292 4010 kvm_x86_ops->set_idt(vcpu, &dt); 4293 - } 4294 - 4295 - void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, 4296 - unsigned long *rflags) 4297 - { 4298 - kvm_lmsw(vcpu, msw); 4299 - *rflags = kvm_get_rflags(vcpu); 4300 - } 4301 - 4302 - unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) 4303 - { 4304 - unsigned long value; 4305 - 4306 - switch (cr) { 4307 - case 0: 4308 - value = kvm_read_cr0(vcpu); 4309 - break; 4310 - case 2: 4311 - value = vcpu->arch.cr2; 4312 - break; 4313 - case 3: 4314 - value = vcpu->arch.cr3; 4315 - break; 4316 - case 4: 4317 - value = kvm_read_cr4(vcpu); 4318 - break; 4319 - case 8: 4320 - value = kvm_get_cr8(vcpu); 4321 - break; 4322 - default: 4323 - vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 4324 - return 0; 4325 - } 4326 - 4327 - return value; 4328 - } 4329 - 4330 - void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, 4331 - unsigned long *rflags) 4332 - { 4333 - switch (cr) { 4334 - case 0: 4335 - kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); 4336 - *rflags = kvm_get_rflags(vcpu); 4337 - break; 4338 - case 2: 4339 - vcpu->arch.cr2 = val; 4340 - break; 4341 - case 3: 4342 - kvm_set_cr3(vcpu, val); 4343 - break; 4344 - case 4: 4345 - kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); 4346 - break; 4347 - case 8: 4348 - kvm_set_cr8(vcpu, val & 0xfUL); 4349 - break; 4350 - default: 4351 - vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 4352 - } 4353 4011 } 4354 4012 4355 4013 static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) ··· 4350 4138 { 4351 4139 struct kvm_cpuid_entry2 *best; 4352 4140 4141 + best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); 4142 + if (!best || best->eax < 0x80000008) 4143 + goto not_found; 4353 4144 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); 4354 4145 if (best) 4355 4146 return best->eax & 0xff; 4147 + not_found: 4356 4148 return 36; 4357 4149 } 4358 4150 ··· 4470 4254 { 4471 4255 /* try to reinject previous events if any */ 4472 4256 if (vcpu->arch.exception.pending) { 4257 + trace_kvm_inj_exception(vcpu->arch.exception.nr, 4258 + vcpu->arch.exception.has_error_code, 4259 + vcpu->arch.exception.error_code); 4473 4260 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, 4474 4261 vcpu->arch.exception.has_error_code, 4475 - vcpu->arch.exception.error_code); 4262 + vcpu->arch.exception.error_code, 4263 + vcpu->arch.exception.reinject); 4476 4264 return; 4477 4265 } 4478 4266 ··· 4706 4486 } 4707 4487 4708 4488 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4709 - post_kvm_run_save(vcpu); 4710 4489 4711 4490 vapic_exit(vcpu); 4712 4491 ··· 4733 4514 if (!irqchip_in_kernel(vcpu->kvm)) 4734 4515 kvm_set_cr8(vcpu, kvm_run->cr8); 4735 4516 4736 - if (vcpu->arch.pio.cur_count) { 4517 + if (vcpu->arch.pio.count || vcpu->mmio_needed || 4518 + vcpu->arch.emulate_ctxt.restart) { 4519 + if (vcpu->mmio_needed) { 4520 + memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 4521 + vcpu->mmio_read_completed = 1; 4522 + vcpu->mmio_needed = 0; 4523 + } 4737 4524 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4738 - r = complete_pio(vcpu); 4739 - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4740 - if (r) 4741 - goto out; 4742 - } 4743 - if (vcpu->mmio_needed) { 4744 - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 4745 - vcpu->mmio_read_completed = 1; 4746 - vcpu->mmio_needed = 0; 4747 - 4748 - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4749 - r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, 4750 - EMULTYPE_NO_DECODE); 4525 + r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); 4751 4526 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4752 4527 if (r == EMULATE_DO_MMIO) { 4753 - /* 4754 - * Read-modify-write. Back to userspace. 4755 - */ 4756 4528 r = 0; 4757 4529 goto out; 4758 4530 } ··· 4755 4545 r = __vcpu_run(vcpu); 4756 4546 4757 4547 out: 4548 + post_kvm_run_save(vcpu); 4758 4549 if (vcpu->sigset_active) 4759 4550 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 4760 4551 ··· 4827 4616 return 0; 4828 4617 } 4829 4618 4830 - void kvm_get_segment(struct kvm_vcpu *vcpu, 4831 - struct kvm_segment *var, int seg) 4832 - { 4833 - kvm_x86_ops->get_segment(vcpu, var, seg); 4834 - } 4835 - 4836 4619 void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 4837 4620 { 4838 4621 struct kvm_segment cs; ··· 4840 4635 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 4841 4636 struct kvm_sregs *sregs) 4842 4637 { 4843 - struct descriptor_table dt; 4638 + struct desc_ptr dt; 4844 4639 4845 4640 vcpu_load(vcpu); 4846 4641 ··· 4855 4650 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 4856 4651 4857 4652 kvm_x86_ops->get_idt(vcpu, &dt); 4858 - sregs->idt.limit = dt.limit; 4859 - sregs->idt.base = dt.base; 4653 + sregs->idt.limit = dt.size; 4654 + sregs->idt.base = dt.address; 4860 4655 kvm_x86_ops->get_gdt(vcpu, &dt); 4861 - sregs->gdt.limit = dt.limit; 4862 - sregs->gdt.base = dt.base; 4656 + sregs->gdt.limit = dt.size; 4657 + sregs->gdt.base = dt.address; 4863 4658 4864 4659 sregs->cr0 = kvm_read_cr0(vcpu); 4865 4660 sregs->cr2 = vcpu->arch.cr2; ··· 4898 4693 return 0; 4899 4694 } 4900 4695 4901 - static void kvm_set_segment(struct kvm_vcpu *vcpu, 4902 - struct kvm_segment *var, int seg) 4696 + int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, 4697 + bool has_error_code, u32 error_code) 4903 4698 { 4904 - kvm_x86_ops->set_segment(vcpu, var, seg); 4905 - } 4699 + int cs_db, cs_l, ret; 4700 + cache_all_regs(vcpu); 4906 4701 4907 - static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, 4908 - struct kvm_segment *kvm_desct) 4909 - { 4910 - kvm_desct->base = get_desc_base(seg_desc); 4911 - kvm_desct->limit = get_desc_limit(seg_desc); 4912 - if (seg_desc->g) { 4913 - kvm_desct->limit <<= 12; 4914 - kvm_desct->limit |= 0xfff; 4915 - } 4916 - kvm_desct->selector = selector; 4917 - kvm_desct->type = seg_desc->type; 4918 - kvm_desct->present = seg_desc->p; 4919 - kvm_desct->dpl = seg_desc->dpl; 4920 - kvm_desct->db = seg_desc->d; 4921 - kvm_desct->s = seg_desc->s; 4922 - kvm_desct->l = seg_desc->l; 4923 - kvm_desct->g = seg_desc->g; 4924 - kvm_desct->avl = seg_desc->avl; 4925 - if (!selector) 4926 - kvm_desct->unusable = 1; 4927 - else 4928 - kvm_desct->unusable = 0; 4929 - kvm_desct->padding = 0; 4930 - } 4702 + kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 4931 4703 4932 - static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, 4933 - u16 selector, 4934 - struct descriptor_table *dtable) 4935 - { 4936 - if (selector & 1 << 2) { 4937 - struct kvm_segment kvm_seg; 4704 + vcpu->arch.emulate_ctxt.vcpu = vcpu; 4705 + vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); 4706 + vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); 4707 + vcpu->arch.emulate_ctxt.mode = 4708 + (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : 4709 + (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) 4710 + ? X86EMUL_MODE_VM86 : cs_l 4711 + ? X86EMUL_MODE_PROT64 : cs_db 4712 + ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 4938 4713 4939 - kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); 4714 + ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, 4715 + tss_selector, reason, has_error_code, 4716 + error_code); 4940 4717 4941 - if (kvm_seg.unusable) 4942 - dtable->limit = 0; 4943 - else 4944 - dtable->limit = kvm_seg.limit; 4945 - dtable->base = kvm_seg.base; 4946 - } 4947 - else 4948 - kvm_x86_ops->get_gdt(vcpu, dtable); 4949 - } 4950 - 4951 - /* allowed just for 8 bytes segments */ 4952 - static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 4953 - struct desc_struct *seg_desc) 4954 - { 4955 - struct descriptor_table dtable; 4956 - u16 index = selector >> 3; 4957 - int ret; 4958 - u32 err; 4959 - gva_t addr; 4960 - 4961 - get_segment_descriptor_dtable(vcpu, selector, &dtable); 4962 - 4963 - if (dtable.limit < index * 8 + 7) { 4964 - kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); 4965 - return X86EMUL_PROPAGATE_FAULT; 4966 - } 4967 - addr = dtable.base + index * 8; 4968 - ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), 4969 - vcpu, &err); 4970 - if (ret == X86EMUL_PROPAGATE_FAULT) 4971 - kvm_inject_page_fault(vcpu, addr, err); 4972 - 4973 - return ret; 4974 - } 4975 - 4976 - /* allowed just for 8 bytes segments */ 4977 - static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 4978 - struct desc_struct *seg_desc) 4979 - { 4980 - struct descriptor_table dtable; 4981 - u16 index = selector >> 3; 4982 - 4983 - get_segment_descriptor_dtable(vcpu, selector, &dtable); 4984 - 4985 - if (dtable.limit < index * 8 + 7) 4986 - return 1; 4987 - return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); 4988 - } 4989 - 4990 - static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, 4991 - struct desc_struct *seg_desc) 4992 - { 4993 - u32 base_addr = get_desc_base(seg_desc); 4994 - 4995 - return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); 4996 - } 4997 - 4998 - static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, 4999 - struct desc_struct *seg_desc) 5000 - { 5001 - u32 base_addr = get_desc_base(seg_desc); 5002 - 5003 - return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); 5004 - } 5005 - 5006 - static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) 5007 - { 5008 - struct kvm_segment kvm_seg; 5009 - 5010 - kvm_get_segment(vcpu, &kvm_seg, seg); 5011 - return kvm_seg.selector; 5012 - } 5013 - 5014 - static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) 5015 - { 5016 - struct kvm_segment segvar = { 5017 - .base = selector << 4, 5018 - .limit = 0xffff, 5019 - .selector = selector, 5020 - .type = 3, 5021 - .present = 1, 5022 - .dpl = 3, 5023 - .db = 0, 5024 - .s = 1, 5025 - .l = 0, 5026 - .g = 0, 5027 - .avl = 0, 5028 - .unusable = 0, 5029 - }; 5030 - kvm_x86_ops->set_segment(vcpu, &segvar, seg); 5031 - return X86EMUL_CONTINUE; 5032 - } 5033 - 5034 - static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) 5035 - { 5036 - return (seg != VCPU_SREG_LDTR) && 5037 - (seg != VCPU_SREG_TR) && 5038 - (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); 5039 - } 5040 - 5041 - int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) 5042 - { 5043 - struct kvm_segment kvm_seg; 5044 - struct desc_struct seg_desc; 5045 - u8 dpl, rpl, cpl; 5046 - unsigned err_vec = GP_VECTOR; 5047 - u32 err_code = 0; 5048 - bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ 5049 - int ret; 5050 - 5051 - if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) 5052 - return kvm_load_realmode_segment(vcpu, selector, seg); 5053 - 5054 - /* NULL selector is not valid for TR, CS and SS */ 5055 - if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) 5056 - && null_selector) 5057 - goto exception; 5058 - 5059 - /* TR should be in GDT only */ 5060 - if (seg == VCPU_SREG_TR && (selector & (1 << 2))) 5061 - goto exception; 5062 - 5063 - ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); 5064 4718 if (ret) 5065 - return ret; 4719 + return EMULATE_FAIL; 5066 4720 5067 - seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); 5068 - 5069 - if (null_selector) { /* for NULL selector skip all following checks */ 5070 - kvm_seg.unusable = 1; 5071 - goto load; 5072 - } 5073 - 5074 - err_code = selector & 0xfffc; 5075 - err_vec = GP_VECTOR; 5076 - 5077 - /* can't load system descriptor into segment selecor */ 5078 - if (seg <= VCPU_SREG_GS && !kvm_seg.s) 5079 - goto exception; 5080 - 5081 - if (!kvm_seg.present) { 5082 - err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; 5083 - goto exception; 5084 - } 5085 - 5086 - rpl = selector & 3; 5087 - dpl = kvm_seg.dpl; 5088 - cpl = kvm_x86_ops->get_cpl(vcpu); 5089 - 5090 - switch (seg) { 5091 - case VCPU_SREG_SS: 5092 - /* 5093 - * segment is not a writable data segment or segment 5094 - * selector's RPL != CPL or segment selector's RPL != CPL 5095 - */ 5096 - if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) 5097 - goto exception; 5098 - break; 5099 - case VCPU_SREG_CS: 5100 - if (!(kvm_seg.type & 8)) 5101 - goto exception; 5102 - 5103 - if (kvm_seg.type & 4) { 5104 - /* conforming */ 5105 - if (dpl > cpl) 5106 - goto exception; 5107 - } else { 5108 - /* nonconforming */ 5109 - if (rpl > cpl || dpl != cpl) 5110 - goto exception; 5111 - } 5112 - /* CS(RPL) <- CPL */ 5113 - selector = (selector & 0xfffc) | cpl; 5114 - break; 5115 - case VCPU_SREG_TR: 5116 - if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) 5117 - goto exception; 5118 - break; 5119 - case VCPU_SREG_LDTR: 5120 - if (kvm_seg.s || kvm_seg.type != 2) 5121 - goto exception; 5122 - break; 5123 - default: /* DS, ES, FS, or GS */ 5124 - /* 5125 - * segment is not a data or readable code segment or 5126 - * ((segment is a data or nonconforming code segment) 5127 - * and (both RPL and CPL > DPL)) 5128 - */ 5129 - if ((kvm_seg.type & 0xa) == 0x8 || 5130 - (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) 5131 - goto exception; 5132 - break; 5133 - } 5134 - 5135 - if (!kvm_seg.unusable && kvm_seg.s) { 5136 - /* mark segment as accessed */ 5137 - kvm_seg.type |= 1; 5138 - seg_desc.type |= 1; 5139 - save_guest_segment_descriptor(vcpu, selector, &seg_desc); 5140 - } 5141 - load: 5142 - kvm_set_segment(vcpu, &kvm_seg, seg); 5143 - return X86EMUL_CONTINUE; 5144 - exception: 5145 - kvm_queue_exception_e(vcpu, err_vec, err_code); 5146 - return X86EMUL_PROPAGATE_FAULT; 5147 - } 5148 - 5149 - static void save_state_to_tss32(struct kvm_vcpu *vcpu, 5150 - struct tss_segment_32 *tss) 5151 - { 5152 - tss->cr3 = vcpu->arch.cr3; 5153 - tss->eip = kvm_rip_read(vcpu); 5154 - tss->eflags = kvm_get_rflags(vcpu); 5155 - tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); 5156 - tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); 5157 - tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); 5158 - tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX); 5159 - tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP); 5160 - tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP); 5161 - tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI); 5162 - tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI); 5163 - tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); 5164 - tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); 5165 - tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); 5166 - tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); 5167 - tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); 5168 - tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); 5169 - tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); 5170 - } 5171 - 5172 - static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) 5173 - { 5174 - struct kvm_segment kvm_seg; 5175 - kvm_get_segment(vcpu, &kvm_seg, seg); 5176 - kvm_seg.selector = sel; 5177 - kvm_set_segment(vcpu, &kvm_seg, seg); 5178 - } 5179 - 5180 - static int load_state_from_tss32(struct kvm_vcpu *vcpu, 5181 - struct tss_segment_32 *tss) 5182 - { 5183 - kvm_set_cr3(vcpu, tss->cr3); 5184 - 5185 - kvm_rip_write(vcpu, tss->eip); 5186 - kvm_set_rflags(vcpu, tss->eflags | 2); 5187 - 5188 - kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); 5189 - kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); 5190 - kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx); 5191 - kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx); 5192 - kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp); 5193 - kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp); 5194 - kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); 5195 - kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); 5196 - 5197 - /* 5198 - * SDM says that segment selectors are loaded before segment 5199 - * descriptors 5200 - */ 5201 - kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); 5202 - kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); 5203 - kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); 5204 - kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); 5205 - kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); 5206 - kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); 5207 - kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); 5208 - 5209 - /* 5210 - * Now load segment descriptors. If fault happenes at this stage 5211 - * it is handled in a context of new task 5212 - */ 5213 - if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) 5214 - return 1; 5215 - 5216 - if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) 5217 - return 1; 5218 - 5219 - if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) 5220 - return 1; 5221 - 5222 - if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) 5223 - return 1; 5224 - 5225 - if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) 5226 - return 1; 5227 - 5228 - if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) 5229 - return 1; 5230 - 5231 - if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) 5232 - return 1; 5233 - return 0; 5234 - } 5235 - 5236 - static void save_state_to_tss16(struct kvm_vcpu *vcpu, 5237 - struct tss_segment_16 *tss) 5238 - { 5239 - tss->ip = kvm_rip_read(vcpu); 5240 - tss->flag = kvm_get_rflags(vcpu); 5241 - tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); 5242 - tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); 5243 - tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); 5244 - tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX); 5245 - tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP); 5246 - tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP); 5247 - tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI); 5248 - tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI); 5249 - 5250 - tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); 5251 - tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); 5252 - tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); 5253 - tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); 5254 - tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); 5255 - } 5256 - 5257 - static int load_state_from_tss16(struct kvm_vcpu *vcpu, 5258 - struct tss_segment_16 *tss) 5259 - { 5260 - kvm_rip_write(vcpu, tss->ip); 5261 - kvm_set_rflags(vcpu, tss->flag | 2); 5262 - kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); 5263 - kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); 5264 - kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); 5265 - kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx); 5266 - kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp); 5267 - kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp); 5268 - kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); 5269 - kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); 5270 - 5271 - /* 5272 - * SDM says that segment selectors are loaded before segment 5273 - * descriptors 5274 - */ 5275 - kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); 5276 - kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); 5277 - kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); 5278 - kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); 5279 - kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); 5280 - 5281 - /* 5282 - * Now load segment descriptors. If fault happenes at this stage 5283 - * it is handled in a context of new task 5284 - */ 5285 - if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) 5286 - return 1; 5287 - 5288 - if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) 5289 - return 1; 5290 - 5291 - if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) 5292 - return 1; 5293 - 5294 - if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) 5295 - return 1; 5296 - 5297 - if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) 5298 - return 1; 5299 - return 0; 5300 - } 5301 - 5302 - static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, 5303 - u16 old_tss_sel, u32 old_tss_base, 5304 - struct desc_struct *nseg_desc) 5305 - { 5306 - struct tss_segment_16 tss_segment_16; 5307 - int ret = 0; 5308 - 5309 - if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, 5310 - sizeof tss_segment_16)) 5311 - goto out; 5312 - 5313 - save_state_to_tss16(vcpu, &tss_segment_16); 5314 - 5315 - if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, 5316 - sizeof tss_segment_16)) 5317 - goto out; 5318 - 5319 - if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), 5320 - &tss_segment_16, sizeof tss_segment_16)) 5321 - goto out; 5322 - 5323 - if (old_tss_sel != 0xffff) { 5324 - tss_segment_16.prev_task_link = old_tss_sel; 5325 - 5326 - if (kvm_write_guest(vcpu->kvm, 5327 - get_tss_base_addr_write(vcpu, nseg_desc), 5328 - &tss_segment_16.prev_task_link, 5329 - sizeof tss_segment_16.prev_task_link)) 5330 - goto out; 5331 - } 5332 - 5333 - if (load_state_from_tss16(vcpu, &tss_segment_16)) 5334 - goto out; 5335 - 5336 - ret = 1; 5337 - out: 5338 - return ret; 5339 - } 5340 - 5341 - static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, 5342 - u16 old_tss_sel, u32 old_tss_base, 5343 - struct desc_struct *nseg_desc) 5344 - { 5345 - struct tss_segment_32 tss_segment_32; 5346 - int ret = 0; 5347 - 5348 - if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, 5349 - sizeof tss_segment_32)) 5350 - goto out; 5351 - 5352 - save_state_to_tss32(vcpu, &tss_segment_32); 5353 - 5354 - if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, 5355 - sizeof tss_segment_32)) 5356 - goto out; 5357 - 5358 - if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), 5359 - &tss_segment_32, sizeof tss_segment_32)) 5360 - goto out; 5361 - 5362 - if (old_tss_sel != 0xffff) { 5363 - tss_segment_32.prev_task_link = old_tss_sel; 5364 - 5365 - if (kvm_write_guest(vcpu->kvm, 5366 - get_tss_base_addr_write(vcpu, nseg_desc), 5367 - &tss_segment_32.prev_task_link, 5368 - sizeof tss_segment_32.prev_task_link)) 5369 - goto out; 5370 - } 5371 - 5372 - if (load_state_from_tss32(vcpu, &tss_segment_32)) 5373 - goto out; 5374 - 5375 - ret = 1; 5376 - out: 5377 - return ret; 5378 - } 5379 - 5380 - int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) 5381 - { 5382 - struct kvm_segment tr_seg; 5383 - struct desc_struct cseg_desc; 5384 - struct desc_struct nseg_desc; 5385 - int ret = 0; 5386 - u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); 5387 - u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); 5388 - u32 desc_limit; 5389 - 5390 - old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); 5391 - 5392 - /* FIXME: Handle errors. Failure to read either TSS or their 5393 - * descriptors should generate a pagefault. 5394 - */ 5395 - if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) 5396 - goto out; 5397 - 5398 - if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) 5399 - goto out; 5400 - 5401 - if (reason != TASK_SWITCH_IRET) { 5402 - int cpl; 5403 - 5404 - cpl = kvm_x86_ops->get_cpl(vcpu); 5405 - if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) { 5406 - kvm_queue_exception_e(vcpu, GP_VECTOR, 0); 5407 - return 1; 5408 - } 5409 - } 5410 - 5411 - desc_limit = get_desc_limit(&nseg_desc); 5412 - if (!nseg_desc.p || 5413 - ((desc_limit < 0x67 && (nseg_desc.type & 8)) || 5414 - desc_limit < 0x2b)) { 5415 - kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); 5416 - return 1; 5417 - } 5418 - 5419 - if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { 5420 - cseg_desc.type &= ~(1 << 1); //clear the B flag 5421 - save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); 5422 - } 5423 - 5424 - if (reason == TASK_SWITCH_IRET) { 5425 - u32 eflags = kvm_get_rflags(vcpu); 5426 - kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); 5427 - } 5428 - 5429 - /* set back link to prev task only if NT bit is set in eflags 5430 - note that old_tss_sel is not used afetr this point */ 5431 - if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) 5432 - old_tss_sel = 0xffff; 5433 - 5434 - if (nseg_desc.type & 8) 5435 - ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, 5436 - old_tss_base, &nseg_desc); 5437 - else 5438 - ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, 5439 - old_tss_base, &nseg_desc); 5440 - 5441 - if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { 5442 - u32 eflags = kvm_get_rflags(vcpu); 5443 - kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); 5444 - } 5445 - 5446 - if (reason != TASK_SWITCH_IRET) { 5447 - nseg_desc.type |= (1 << 1); 5448 - save_guest_segment_descriptor(vcpu, tss_selector, 5449 - &nseg_desc); 5450 - } 5451 - 5452 - kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); 5453 - seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); 5454 - tr_seg.type = 11; 5455 - kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); 5456 - out: 5457 - return ret; 4721 + kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 4722 + return EMULATE_DONE; 5458 4723 } 5459 4724 EXPORT_SYMBOL_GPL(kvm_task_switch); 5460 4725 ··· 4933 5258 { 4934 5259 int mmu_reset_needed = 0; 4935 5260 int pending_vec, max_bits; 4936 - struct descriptor_table dt; 5261 + struct desc_ptr dt; 4937 5262 4938 5263 vcpu_load(vcpu); 4939 5264 4940 - dt.limit = sregs->idt.limit; 4941 - dt.base = sregs->idt.base; 5265 + dt.size = sregs->idt.limit; 5266 + dt.address = sregs->idt.base; 4942 5267 kvm_x86_ops->set_idt(vcpu, &dt); 4943 - dt.limit = sregs->gdt.limit; 4944 - dt.base = sregs->gdt.base; 5268 + dt.size = sregs->gdt.limit; 5269 + dt.address = sregs->gdt.base; 4945 5270 kvm_x86_ops->set_gdt(vcpu, &dt); 4946 5271 4947 5272 vcpu->arch.cr2 = sregs->cr2; ··· 5040 5365 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); 5041 5366 } 5042 5367 5043 - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { 5044 - vcpu->arch.singlestep_cs = 5045 - get_segment_selector(vcpu, VCPU_SREG_CS); 5046 - vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); 5047 - } 5368 + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 5369 + vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + 5370 + get_segment_base(vcpu, VCPU_SREG_CS); 5048 5371 5049 5372 /* 5050 5373 * Trigger an rflags update that will inject or remove the trace ··· 5533 5860 return kvm_x86_ops->interrupt_allowed(vcpu); 5534 5861 } 5535 5862 5863 + bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) 5864 + { 5865 + unsigned long current_rip = kvm_rip_read(vcpu) + 5866 + get_segment_base(vcpu, VCPU_SREG_CS); 5867 + 5868 + return current_rip == linear_rip; 5869 + } 5870 + EXPORT_SYMBOL_GPL(kvm_is_linear_rip); 5871 + 5536 5872 unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) 5537 5873 { 5538 5874 unsigned long rflags; 5539 5875 5540 5876 rflags = kvm_x86_ops->get_rflags(vcpu); 5541 5877 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 5542 - rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); 5878 + rflags &= ~X86_EFLAGS_TF; 5543 5879 return rflags; 5544 5880 } 5545 5881 EXPORT_SYMBOL_GPL(kvm_get_rflags); ··· 5556 5874 void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 5557 5875 { 5558 5876 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && 5559 - vcpu->arch.singlestep_cs == 5560 - get_segment_selector(vcpu, VCPU_SREG_CS) && 5561 - vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) 5562 - rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; 5877 + kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) 5878 + rflags |= X86_EFLAGS_TF; 5563 5879 kvm_x86_ops->set_rflags(vcpu, rflags); 5564 5880 } 5565 5881 EXPORT_SYMBOL_GPL(kvm_set_rflags); ··· 5573 5893 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); 5574 5894 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); 5575 5895 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); 5896 + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);

+7

arch/x86/kvm/x86.h

··· 65 65 return kvm_read_cr0_bits(vcpu, X86_CR0_PG); 66 66 } 67 67 68 + static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm) 69 + { 70 + return rcu_dereference_check(kvm->arch.aliases, 71 + srcu_read_lock_held(&kvm->srcu) 72 + || lockdep_is_held(&kvm->slots_lock)); 73 + } 74 + 68 75 void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); 69 76 void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); 70 77

+26

include/linux/kvm.h

··· 160 160 #define KVM_EXIT_DCR 15 161 161 #define KVM_EXIT_NMI 16 162 162 #define KVM_EXIT_INTERNAL_ERROR 17 163 + #define KVM_EXIT_OSI 18 163 164 164 165 /* For KVM_EXIT_INTERNAL_ERROR */ 165 166 #define KVM_INTERNAL_ERROR_EMULATION 1 ··· 260 259 __u32 ndata; 261 260 __u64 data[16]; 262 261 } internal; 262 + /* KVM_EXIT_OSI */ 263 + struct { 264 + __u64 gprs[32]; 265 + } osi; 263 266 /* Fix the size of the union. */ 264 267 char padding[256]; 265 268 }; ··· 405 400 __u8 pad[36]; 406 401 }; 407 402 403 + /* for KVM_ENABLE_CAP */ 404 + struct kvm_enable_cap { 405 + /* in */ 406 + __u32 cap; 407 + __u32 flags; 408 + __u64 args[4]; 409 + __u8 pad[64]; 410 + }; 411 + 408 412 #define KVMIO 0xAE 409 413 410 414 /* ··· 515 501 #define KVM_CAP_HYPERV_VAPIC 45 516 502 #define KVM_CAP_HYPERV_SPIN 46 517 503 #define KVM_CAP_PCI_SEGMENT 47 504 + #define KVM_CAP_PPC_PAIRED_SINGLES 48 505 + #define KVM_CAP_INTR_SHADOW 49 506 + #ifdef __KVM_HAVE_DEBUGREGS 507 + #define KVM_CAP_DEBUGREGS 50 508 + #endif 518 509 #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 510 + #define KVM_CAP_PPC_OSI 52 511 + #define KVM_CAP_PPC_UNSET_IRQ 53 512 + #define KVM_CAP_ENABLE_CAP 54 519 513 520 514 #ifdef KVM_CAP_IRQ_ROUTING 521 515 ··· 710 688 /* Available with KVM_CAP_VCPU_EVENTS */ 711 689 #define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) 712 690 #define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) 691 + /* Available with KVM_CAP_DEBUGREGS */ 692 + #define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) 693 + #define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) 694 + #define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) 713 695 714 696 #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) 715 697

+14 -2

include/linux/kvm_host.h

··· 105 105 struct kvm_vcpu_arch arch; 106 106 }; 107 107 108 + /* 109 + * Some of the bitops functions do not support too long bitmaps. 110 + * This number must be determined not to exceed such limits. 111 + */ 112 + #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) 113 + 108 114 struct kvm_memory_slot { 109 115 gfn_t base_gfn; 110 116 unsigned long npages; ··· 243 237 void vcpu_load(struct kvm_vcpu *vcpu); 244 238 void vcpu_put(struct kvm_vcpu *vcpu); 245 239 246 - int kvm_init(void *opaque, unsigned int vcpu_size, 240 + int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, 247 241 struct module *module); 248 242 void kvm_exit(void); 249 243 250 244 void kvm_get_kvm(struct kvm *kvm); 251 245 void kvm_put_kvm(struct kvm *kvm); 252 246 247 + static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) 248 + { 249 + return rcu_dereference_check(kvm->memslots, 250 + srcu_read_lock_held(&kvm->srcu) 251 + || lockdep_is_held(&kvm->slots_lock)); 252 + } 253 + 253 254 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) 254 255 #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) 255 256 static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } 256 - struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); 257 257 258 258 extern struct page *bad_page; 259 259 extern pfn_t bad_pfn;

+1

include/linux/tboot.h

··· 150 150 151 151 #else 152 152 153 + #define tboot_enabled() 0 153 154 #define tboot_probe() do { } while (0) 154 155 #define tboot_shutdown(shutdown_type) do { } while (0) 155 156 #define tboot_sleep(sleep_state, pm1a_control, pm1b_control) \

-1

include/trace/events/kvm.h

··· 5 5 6 6 #undef TRACE_SYSTEM 7 7 #define TRACE_SYSTEM kvm 8 - #define TRACE_INCLUDE_FILE kvm 9 8 10 9 #if defined(__KVM_HAVE_IOAPIC) 11 10 TRACE_EVENT(kvm_set_irq,

+6 -2

virt/kvm/assigned-dev.c

··· 316 316 kvm_assigned_dev_intr, 0, 317 317 "kvm_assigned_msix_device", 318 318 (void *)dev); 319 - /* FIXME: free requested_irq's on failure */ 320 319 if (r) 321 - return r; 320 + goto err; 322 321 } 323 322 324 323 return 0; 324 + err: 325 + for (i -= 1; i >= 0; i--) 326 + free_irq(dev->host_msix_entries[i].vector, (void *)dev); 327 + pci_disable_msix(dev->dev); 328 + return r; 325 329 } 326 330 327 331 #endif

+4 -2

virt/kvm/coalesced_mmio.c

··· 120 120 return ret; 121 121 122 122 out_free_dev: 123 + kvm->coalesced_mmio_dev = NULL; 123 124 kfree(dev); 124 125 out_free_page: 126 + kvm->coalesced_mmio_ring = NULL; 125 127 __free_page(page); 126 128 out_err: 127 129 return ret; ··· 141 139 struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; 142 140 143 141 if (dev == NULL) 144 - return -EINVAL; 142 + return -ENXIO; 145 143 146 144 mutex_lock(&kvm->slots_lock); 147 145 if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { ··· 164 162 struct kvm_coalesced_mmio_zone *z; 165 163 166 164 if (dev == NULL) 167 - return -EINVAL; 165 + return -ENXIO; 168 166 169 167 mutex_lock(&kvm->slots_lock); 170 168

+2 -2

virt/kvm/iommu.c

··· 127 127 int i, r = 0; 128 128 struct kvm_memslots *slots; 129 129 130 - slots = rcu_dereference(kvm->memslots); 130 + slots = kvm_memslots(kvm); 131 131 132 132 for (i = 0; i < slots->nmemslots; i++) { 133 133 r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); ··· 286 286 int i; 287 287 struct kvm_memslots *slots; 288 288 289 - slots = rcu_dereference(kvm->memslots); 289 + slots = kvm_memslots(kvm); 290 290 291 291 for (i = 0; i < slots->nmemslots; i++) { 292 292 kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,

+31 -32

virt/kvm/kvm_main.c

··· 422 422 spin_lock(&kvm_lock); 423 423 list_add(&kvm->vm_list, &vm_list); 424 424 spin_unlock(&kvm_lock); 425 - #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 426 - kvm_coalesced_mmio_init(kvm); 427 - #endif 428 425 out: 429 426 return kvm; 430 427 ··· 556 559 memslot = &kvm->memslots->memslots[mem->slot]; 557 560 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 558 561 npages = mem->memory_size >> PAGE_SHIFT; 562 + 563 + r = -EINVAL; 564 + if (npages > KVM_MEM_MAX_NR_PAGES) 565 + goto out; 559 566 560 567 if (!npages) 561 568 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; ··· 834 833 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) 835 834 { 836 835 int i; 837 - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); 836 + struct kvm_memslots *slots = kvm_memslots(kvm); 838 837 839 838 for (i = 0; i < slots->nmemslots; ++i) { 840 839 struct kvm_memory_slot *memslot = &slots->memslots[i]; ··· 856 855 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 857 856 { 858 857 int i; 859 - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); 858 + struct kvm_memslots *slots = kvm_memslots(kvm); 860 859 861 860 gfn = unalias_gfn_instantiation(kvm, gfn); 862 861 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { ··· 900 899 int memslot_id(struct kvm *kvm, gfn_t gfn) 901 900 { 902 901 int i; 903 - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); 902 + struct kvm_memslots *slots = kvm_memslots(kvm); 904 903 struct kvm_memory_slot *memslot = NULL; 905 904 906 905 gfn = unalias_gfn(kvm, gfn); ··· 915 914 return memslot - slots->memslots; 916 915 } 917 916 917 + static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) 918 + { 919 + return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; 920 + } 921 + 918 922 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 919 923 { 920 924 struct kvm_memory_slot *slot; ··· 928 922 slot = gfn_to_memslot_unaliased(kvm, gfn); 929 923 if (!slot || slot->flags & KVM_MEMSLOT_INVALID) 930 924 return bad_hva(); 931 - return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 925 + return gfn_to_hva_memslot(slot, gfn); 932 926 } 933 927 EXPORT_SYMBOL_GPL(gfn_to_hva); 934 928 ··· 977 971 return hva_to_pfn(kvm, addr); 978 972 } 979 973 EXPORT_SYMBOL_GPL(gfn_to_pfn); 980 - 981 - static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) 982 - { 983 - return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 984 - } 985 974 986 975 pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 987 976 struct kvm_memory_slot *slot, gfn_t gfn) ··· 1191 1190 memslot = gfn_to_memslot_unaliased(kvm, gfn); 1192 1191 if (memslot && memslot->dirty_bitmap) { 1193 1192 unsigned long rel_gfn = gfn - memslot->base_gfn; 1194 - unsigned long *p = memslot->dirty_bitmap + 1195 - rel_gfn / BITS_PER_LONG; 1196 - int offset = rel_gfn % BITS_PER_LONG; 1197 1193 1198 - /* avoid RMW */ 1199 - if (!generic_test_le_bit(offset, p)) 1200 - generic___set_le_bit(offset, p); 1194 + generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); 1201 1195 } 1202 1196 } 1203 1197 ··· 1605 1609 r = -EFAULT; 1606 1610 if (copy_from_user(&zone, argp, sizeof zone)) 1607 1611 goto out; 1608 - r = -ENXIO; 1609 1612 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); 1610 1613 if (r) 1611 1614 goto out; ··· 1616 1621 r = -EFAULT; 1617 1622 if (copy_from_user(&zone, argp, sizeof zone)) 1618 1623 goto out; 1619 - r = -ENXIO; 1620 1624 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); 1621 1625 if (r) 1622 1626 goto out; ··· 1749 1755 1750 1756 static int kvm_dev_ioctl_create_vm(void) 1751 1757 { 1752 - int fd; 1758 + int fd, r; 1753 1759 struct kvm *kvm; 1754 1760 1755 1761 kvm = kvm_create_vm(); 1756 1762 if (IS_ERR(kvm)) 1757 1763 return PTR_ERR(kvm); 1764 + #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1765 + r = kvm_coalesced_mmio_init(kvm); 1766 + if (r < 0) { 1767 + kvm_put_kvm(kvm); 1768 + return r; 1769 + } 1770 + #endif 1758 1771 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); 1759 1772 if (fd < 0) 1760 1773 kvm_put_kvm(kvm); ··· 1929 1928 cpu); 1930 1929 hardware_disable(NULL); 1931 1930 break; 1932 - case CPU_UP_CANCELED: 1933 - printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 1934 - cpu); 1935 - smp_call_function_single(cpu, hardware_disable, NULL, 1); 1936 - break; 1937 1931 case CPU_ONLINE: 1938 1932 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 1939 1933 cpu); ··· 1987 1991 int len, const void *val) 1988 1992 { 1989 1993 int i; 1990 - struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); 1994 + struct kvm_io_bus *bus; 1995 + 1996 + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); 1991 1997 for (i = 0; i < bus->dev_count; i++) 1992 1998 if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) 1993 1999 return 0; ··· 2001 2003 int len, void *val) 2002 2004 { 2003 2005 int i; 2004 - struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); 2006 + struct kvm_io_bus *bus; 2005 2007 2008 + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); 2006 2009 for (i = 0; i < bus->dev_count; i++) 2007 2010 if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) 2008 2011 return 0; ··· 2178 2179 kvm_arch_vcpu_put(vcpu); 2179 2180 } 2180 2181 2181 - int kvm_init(void *opaque, unsigned int vcpu_size, 2182 + int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, 2182 2183 struct module *module) 2183 2184 { 2184 2185 int r; ··· 2228 2229 goto out_free_4; 2229 2230 2230 2231 /* A kmem cache lets us meet the alignment requirements of fx_save. */ 2231 - kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, 2232 - __alignof__(struct kvm_vcpu), 2232 + if (!vcpu_align) 2233 + vcpu_align = __alignof__(struct kvm_vcpu); 2234 + kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, 2233 2235 0, NULL); 2234 2236 if (!kvm_vcpu_cache) { 2235 2237 r = -ENOMEM; ··· 2279 2279 2280 2280 void kvm_exit(void) 2281 2281 { 2282 - tracepoint_synchronize_unregister(); 2283 2282 kvm_exit_debug(); 2284 2283 misc_deregister(&kvm_dev); 2285 2284 kmem_cache_destroy(kvm_vcpu_cache);