commit 5e83f6fbdb020b70c0e413312801424d13c58d68 · tjh.dev/kernel

-21

Documentation/feature-removal-schedule.txt

··· 487 488 ---------------------------- 489 490 - What: KVM memory aliases support 491 - When: July 2010 492 - Why: Memory aliasing support is used for speeding up guest vga access 493 - through the vga windows. 494 - 495 - Modern userspace no longer uses this feature, so it's just bitrotted 496 - code and can be removed with no impact. 497 - Who: Avi Kivity <avi@redhat.com> 498 - 499 - ---------------------------- 500 - 501 What: xtime, wall_to_monotonic 502 When: 2.6.36+ 503 Files: kernel/time/timekeeping.c include/linux/time.h ··· 494 existing timekeeping accessor functions to access 495 the equivalent functionality. 496 Who: John Stultz <johnstul@us.ibm.com> 497 - 498 - ---------------------------- 499 - 500 - What: KVM kernel-allocated memory slots 501 - When: July 2010 502 - Why: Since 2.6.25, kvm supports user-allocated memory slots, which are 503 - much more flexible than kernel-allocated slots. All current userspace 504 - supports the newer interface and this code can be removed with no 505 - impact. 506 - Who: Avi Kivity <avi@redhat.com> 507 508 ---------------------------- 509

··· 487 488 ---------------------------- 489 490 What: xtime, wall_to_monotonic 491 When: 2.6.36+ 492 Files: kernel/time/timekeeping.c include/linux/time.h ··· 505 existing timekeeping accessor functions to access 506 the equivalent functionality. 507 Who: John Stultz <johnstul@us.ibm.com> 508 509 ---------------------------- 510

+174 -34

Documentation/kvm/api.txt

··· 126 kvm adjusts nmsrs to reflect the actual number of msrs and fills in 127 the indices array with their numbers. 128 129 4.4 KVM_CHECK_EXTENSION 130 131 Capability: basic ··· 164 Parameters: struct kvm_memory_region (in) 165 Returns: 0 on success, -1 on error 166 167 - struct kvm_memory_region { 168 - __u32 slot; 169 - __u32 flags; 170 - __u64 guest_phys_addr; 171 - __u64 memory_size; /* bytes */ 172 - }; 173 - 174 - /* for kvm_memory_region::flags */ 175 - #define KVM_MEM_LOG_DIRTY_PAGES 1UL 176 - 177 - This ioctl allows the user to create or modify a guest physical memory 178 - slot. When changing an existing slot, it may be moved in the guest 179 - physical memory space, or its flags may be modified. It may not be 180 - resized. Slots may not overlap. 181 - 182 - The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which 183 - instructs kvm to keep track of writes to memory within the slot. See 184 - the KVM_GET_DIRTY_LOG ioctl. 185 - 186 - It is recommended to use the KVM_SET_USER_MEMORY_REGION ioctl instead 187 - of this API, if available. This newer API allows placing guest memory 188 - at specified locations in the host address space, yielding better 189 - control and easy access. 190 191 4.6 KVM_CREATE_VCPU 192 ··· 208 Parameters: struct kvm_memory_alias (in) 209 Returns: 0 (success), -1 (error) 210 211 - struct kvm_memory_alias { 212 - __u32 slot; /* this has a different namespace than memory slots */ 213 - __u32 flags; 214 - __u64 guest_phys_addr; 215 - __u64 memory_size; 216 - __u64 target_phys_addr; 217 - }; 218 - 219 - Defines a guest physical address space region as an alias to another 220 - region. Useful for aliased address, for example the VGA low memory 221 - window. Should not be used with userspace memory. 222 223 4.9 KVM_RUN 224 ··· 863 864 This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel 865 irqchip, the multiprocessing state must be maintained by userspace. 866 867 5. The kvm_run structure 868

··· 126 kvm adjusts nmsrs to reflect the actual number of msrs and fills in 127 the indices array with their numbers. 128 129 + Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are 130 + not returned in the MSR list, as different vcpus can have a different number 131 + of banks, as set via the KVM_X86_SETUP_MCE ioctl. 132 + 133 4.4 KVM_CHECK_EXTENSION 134 135 Capability: basic ··· 160 Parameters: struct kvm_memory_region (in) 161 Returns: 0 on success, -1 on error 162 163 + This ioctl is obsolete and has been removed. 164 165 4.6 KVM_CREATE_VCPU 166 ··· 226 Parameters: struct kvm_memory_alias (in) 227 Returns: 0 (success), -1 (error) 228 229 + This ioctl is obsolete and has been removed. 230 231 4.9 KVM_RUN 232 ··· 891 892 This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel 893 irqchip, the multiprocessing state must be maintained by userspace. 894 + 895 + 4.39 KVM_SET_IDENTITY_MAP_ADDR 896 + 897 + Capability: KVM_CAP_SET_IDENTITY_MAP_ADDR 898 + Architectures: x86 899 + Type: vm ioctl 900 + Parameters: unsigned long identity (in) 901 + Returns: 0 on success, -1 on error 902 + 903 + This ioctl defines the physical address of a one-page region in the guest 904 + physical address space. The region must be within the first 4GB of the 905 + guest physical address space and must not conflict with any memory slot 906 + or any mmio address. The guest may malfunction if it accesses this memory 907 + region. 908 + 909 + This ioctl is required on Intel-based hosts. This is needed on Intel hardware 910 + because of a quirk in the virtualization implementation (see the internals 911 + documentation when it pops into existence). 912 + 913 + 4.40 KVM_SET_BOOT_CPU_ID 914 + 915 + Capability: KVM_CAP_SET_BOOT_CPU_ID 916 + Architectures: x86, ia64 917 + Type: vm ioctl 918 + Parameters: unsigned long vcpu_id 919 + Returns: 0 on success, -1 on error 920 + 921 + Define which vcpu is the Bootstrap Processor (BSP). Values are the same 922 + as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default 923 + is vcpu 0. 924 + 925 + 4.41 KVM_GET_XSAVE 926 + 927 + Capability: KVM_CAP_XSAVE 928 + Architectures: x86 929 + Type: vcpu ioctl 930 + Parameters: struct kvm_xsave (out) 931 + Returns: 0 on success, -1 on error 932 + 933 + struct kvm_xsave { 934 + __u32 region[1024]; 935 + }; 936 + 937 + This ioctl would copy current vcpu's xsave struct to the userspace. 938 + 939 + 4.42 KVM_SET_XSAVE 940 + 941 + Capability: KVM_CAP_XSAVE 942 + Architectures: x86 943 + Type: vcpu ioctl 944 + Parameters: struct kvm_xsave (in) 945 + Returns: 0 on success, -1 on error 946 + 947 + struct kvm_xsave { 948 + __u32 region[1024]; 949 + }; 950 + 951 + This ioctl would copy userspace's xsave struct to the kernel. 952 + 953 + 4.43 KVM_GET_XCRS 954 + 955 + Capability: KVM_CAP_XCRS 956 + Architectures: x86 957 + Type: vcpu ioctl 958 + Parameters: struct kvm_xcrs (out) 959 + Returns: 0 on success, -1 on error 960 + 961 + struct kvm_xcr { 962 + __u32 xcr; 963 + __u32 reserved; 964 + __u64 value; 965 + }; 966 + 967 + struct kvm_xcrs { 968 + __u32 nr_xcrs; 969 + __u32 flags; 970 + struct kvm_xcr xcrs[KVM_MAX_XCRS]; 971 + __u64 padding[16]; 972 + }; 973 + 974 + This ioctl would copy current vcpu's xcrs to the userspace. 975 + 976 + 4.44 KVM_SET_XCRS 977 + 978 + Capability: KVM_CAP_XCRS 979 + Architectures: x86 980 + Type: vcpu ioctl 981 + Parameters: struct kvm_xcrs (in) 982 + Returns: 0 on success, -1 on error 983 + 984 + struct kvm_xcr { 985 + __u32 xcr; 986 + __u32 reserved; 987 + __u64 value; 988 + }; 989 + 990 + struct kvm_xcrs { 991 + __u32 nr_xcrs; 992 + __u32 flags; 993 + struct kvm_xcr xcrs[KVM_MAX_XCRS]; 994 + __u64 padding[16]; 995 + }; 996 + 997 + This ioctl would set vcpu's xcr to the value userspace specified. 998 + 999 + 4.45 KVM_GET_SUPPORTED_CPUID 1000 + 1001 + Capability: KVM_CAP_EXT_CPUID 1002 + Architectures: x86 1003 + Type: system ioctl 1004 + Parameters: struct kvm_cpuid2 (in/out) 1005 + Returns: 0 on success, -1 on error 1006 + 1007 + struct kvm_cpuid2 { 1008 + __u32 nent; 1009 + __u32 padding; 1010 + struct kvm_cpuid_entry2 entries[0]; 1011 + }; 1012 + 1013 + #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 1014 + #define KVM_CPUID_FLAG_STATEFUL_FUNC 2 1015 + #define KVM_CPUID_FLAG_STATE_READ_NEXT 4 1016 + 1017 + struct kvm_cpuid_entry2 { 1018 + __u32 function; 1019 + __u32 index; 1020 + __u32 flags; 1021 + __u32 eax; 1022 + __u32 ebx; 1023 + __u32 ecx; 1024 + __u32 edx; 1025 + __u32 padding[3]; 1026 + }; 1027 + 1028 + This ioctl returns x86 cpuid features which are supported by both the hardware 1029 + and kvm. Userspace can use the information returned by this ioctl to 1030 + construct cpuid information (for KVM_SET_CPUID2) that is consistent with 1031 + hardware, kernel, and userspace capabilities, and with user requirements (for 1032 + example, the user may wish to constrain cpuid to emulate older hardware, 1033 + or for feature consistency across a cluster). 1034 + 1035 + Userspace invokes KVM_GET_SUPPORTED_CPUID by passing a kvm_cpuid2 structure 1036 + with the 'nent' field indicating the number of entries in the variable-size 1037 + array 'entries'. If the number of entries is too low to describe the cpu 1038 + capabilities, an error (E2BIG) is returned. If the number is too high, 1039 + the 'nent' field is adjusted and an error (ENOMEM) is returned. If the 1040 + number is just right, the 'nent' field is adjusted to the number of valid 1041 + entries in the 'entries' array, which is then filled. 1042 + 1043 + The entries returned are the host cpuid as returned by the cpuid instruction, 1044 + with unknown or unsupported features masked out. The fields in each entry 1045 + are defined as follows: 1046 + 1047 + function: the eax value used to obtain the entry 1048 + index: the ecx value used to obtain the entry (for entries that are 1049 + affected by ecx) 1050 + flags: an OR of zero or more of the following: 1051 + KVM_CPUID_FLAG_SIGNIFCANT_INDEX: 1052 + if the index field is valid 1053 + KVM_CPUID_FLAG_STATEFUL_FUNC: 1054 + if cpuid for this function returns different values for successive 1055 + invocations; there will be several entries with the same function, 1056 + all with this flag set 1057 + KVM_CPUID_FLAG_STATE_READ_NEXT: 1058 + for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is 1059 + the first entry to be read by a cpu 1060 + eax, ebx, ecx, edx: the values returned by the cpuid instruction for 1061 + this function/index combination 1062 1063 5. The kvm_run structure 1064

+48 -4

Documentation/kvm/mmu.txt

··· 77 78 Guest memory (gpa) is part of the user address space of the process that is 79 using kvm. Userspace defines the translation between guest addresses and user 80 - addresses (gpa->hva); note that two gpas may alias to the same gva, but not 81 vice versa. 82 83 - These gvas may be backed using any method available to the host: anonymous 84 memory, file backed memory, and device memory. Memory might be paged by the 85 host at any time. 86 ··· 161 role.cr4_pae: 162 Contains the value of cr4.pae for which the page is valid (e.g. whether 163 32-bit or 64-bit gptes are in use). 164 - role.cr4_nxe: 165 Contains the value of efer.nxe for which the page is valid. 166 role.cr0_wp: 167 Contains the value of cr0.wp for which the page is valid. ··· 180 guest pages as leaves. 181 gfns: 182 An array of 512 guest frame numbers, one for each present pte. Used to 183 - perform a reverse map from a pte to a gfn. 184 slot_bitmap: 185 A bitmap containing one bit per memory slot. If the page contains a pte 186 mapping a page from memory slot n, then bit n of slot_bitmap will be set ··· 297 - mmu notifier called with updated hva 298 - look up affected sptes through reverse map 299 - drop (or update) translations 300 301 Further reading 302 ===============

··· 77 78 Guest memory (gpa) is part of the user address space of the process that is 79 using kvm. Userspace defines the translation between guest addresses and user 80 + addresses (gpa->hva); note that two gpas may alias to the same hva, but not 81 vice versa. 82 83 + These hvas may be backed using any method available to the host: anonymous 84 memory, file backed memory, and device memory. Memory might be paged by the 85 host at any time. 86 ··· 161 role.cr4_pae: 162 Contains the value of cr4.pae for which the page is valid (e.g. whether 163 32-bit or 64-bit gptes are in use). 164 + role.nxe: 165 Contains the value of efer.nxe for which the page is valid. 166 role.cr0_wp: 167 Contains the value of cr0.wp for which the page is valid. ··· 180 guest pages as leaves. 181 gfns: 182 An array of 512 guest frame numbers, one for each present pte. Used to 183 + perform a reverse map from a pte to a gfn. When role.direct is set, any 184 + element of this array can be calculated from the gfn field when used, in 185 + this case, the array of gfns is not allocated. See role.direct and gfn. 186 slot_bitmap: 187 A bitmap containing one bit per memory slot. If the page contains a pte 188 mapping a page from memory slot n, then bit n of slot_bitmap will be set ··· 295 - mmu notifier called with updated hva 296 - look up affected sptes through reverse map 297 - drop (or update) translations 298 + 299 + Emulating cr0.wp 300 + ================ 301 + 302 + If tdp is not enabled, the host must keep cr0.wp=1 so page write protection 303 + works for the guest kernel, not guest guest userspace. When the guest 304 + cr0.wp=1, this does not present a problem. However when the guest cr0.wp=0, 305 + we cannot map the permissions for gpte.u=1, gpte.w=0 to any spte (the 306 + semantics require allowing any guest kernel access plus user read access). 307 + 308 + We handle this by mapping the permissions to two possible sptes, depending 309 + on fault type: 310 + 311 + - kernel write fault: spte.u=0, spte.w=1 (allows full kernel access, 312 + disallows user access) 313 + - read fault: spte.u=1, spte.w=0 (allows full read access, disallows kernel 314 + write access) 315 + 316 + (user write faults generate a #PF) 317 + 318 + Large pages 319 + =========== 320 + 321 + The mmu supports all combinations of large and small guest and host pages. 322 + Supported page sizes include 4k, 2M, 4M, and 1G. 4M pages are treated as 323 + two separate 2M pages, on both guest and host, since the mmu always uses PAE 324 + paging. 325 + 326 + To instantiate a large spte, four constraints must be satisfied: 327 + 328 + - the spte must point to a large host page 329 + - the guest pte must be a large pte of at least equivalent size (if tdp is 330 + enabled, there is no guest pte and this condition is satisified) 331 + - if the spte will be writeable, the large page frame may not overlap any 332 + write-protected pages 333 + - the guest page must be wholly contained by a single memory slot 334 + 335 + To check the last two conditions, the mmu maintains a ->write_count set of 336 + arrays for each memory slot and large page size. Every write protected page 337 + causes its write_count to be incremented, thus preventing instantiation of 338 + a large spte. The frames at the end of an unaligned memory slot have 339 + artificically inflated ->write_counts so they can never be instantiated. 340 341 Further reading 342 ===============

+153

Documentation/kvm/msr.txt

···

··· 1 + KVM-specific MSRs. 2 + Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010 3 + ===================================================== 4 + 5 + KVM makes use of some custom MSRs to service some requests. 6 + At present, this facility is only used by kvmclock. 7 + 8 + Custom MSRs have a range reserved for them, that goes from 9 + 0x4b564d00 to 0x4b564dff. There are MSRs outside this area, 10 + but they are deprecated and their use is discouraged. 11 + 12 + Custom MSR list 13 + -------- 14 + 15 + The current supported Custom MSR list is: 16 + 17 + MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00 18 + 19 + data: 4-byte alignment physical address of a memory area which must be 20 + in guest RAM. This memory is expected to hold a copy of the following 21 + structure: 22 + 23 + struct pvclock_wall_clock { 24 + u32 version; 25 + u32 sec; 26 + u32 nsec; 27 + } __attribute__((__packed__)); 28 + 29 + whose data will be filled in by the hypervisor. The hypervisor is only 30 + guaranteed to update this data at the moment of MSR write. 31 + Users that want to reliably query this information more than once have 32 + to write more than once to this MSR. Fields have the following meanings: 33 + 34 + version: guest has to check version before and after grabbing 35 + time information and check that they are both equal and even. 36 + An odd version indicates an in-progress update. 37 + 38 + sec: number of seconds for wallclock. 39 + 40 + nsec: number of nanoseconds for wallclock. 41 + 42 + Note that although MSRs are per-CPU entities, the effect of this 43 + particular MSR is global. 44 + 45 + Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid 46 + leaf prior to usage. 47 + 48 + MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01 49 + 50 + data: 4-byte aligned physical address of a memory area which must be in 51 + guest RAM, plus an enable bit in bit 0. This memory is expected to hold 52 + a copy of the following structure: 53 + 54 + struct pvclock_vcpu_time_info { 55 + u32 version; 56 + u32 pad0; 57 + u64 tsc_timestamp; 58 + u64 system_time; 59 + u32 tsc_to_system_mul; 60 + s8 tsc_shift; 61 + u8 flags; 62 + u8 pad[2]; 63 + } __attribute__((__packed__)); /* 32 bytes */ 64 + 65 + whose data will be filled in by the hypervisor periodically. Only one 66 + write, or registration, is needed for each VCPU. The interval between 67 + updates of this structure is arbitrary and implementation-dependent. 68 + The hypervisor may update this structure at any time it sees fit until 69 + anything with bit0 == 0 is written to it. 70 + 71 + Fields have the following meanings: 72 + 73 + version: guest has to check version before and after grabbing 74 + time information and check that they are both equal and even. 75 + An odd version indicates an in-progress update. 76 + 77 + tsc_timestamp: the tsc value at the current VCPU at the time 78 + of the update of this structure. Guests can subtract this value 79 + from current tsc to derive a notion of elapsed time since the 80 + structure update. 81 + 82 + system_time: a host notion of monotonic time, including sleep 83 + time at the time this structure was last updated. Unit is 84 + nanoseconds. 85 + 86 + tsc_to_system_mul: a function of the tsc frequency. One has 87 + to multiply any tsc-related quantity by this value to get 88 + a value in nanoseconds, besides dividing by 2^tsc_shift 89 + 90 + tsc_shift: cycle to nanosecond divider, as a power of two, to 91 + allow for shift rights. One has to shift right any tsc-related 92 + quantity by this value to get a value in nanoseconds, besides 93 + multiplying by tsc_to_system_mul. 94 + 95 + With this information, guests can derive per-CPU time by 96 + doing: 97 + 98 + time = (current_tsc - tsc_timestamp) 99 + time = (time * tsc_to_system_mul) >> tsc_shift 100 + time = time + system_time 101 + 102 + flags: bits in this field indicate extended capabilities 103 + coordinated between the guest and the hypervisor. Availability 104 + of specific flags has to be checked in 0x40000001 cpuid leaf. 105 + Current flags are: 106 + 107 + flag bit | cpuid bit | meaning 108 + ------------------------------------------------------------- 109 + | | time measures taken across 110 + 0 | 24 | multiple cpus are guaranteed to 111 + | | be monotonic 112 + ------------------------------------------------------------- 113 + 114 + Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid 115 + leaf prior to usage. 116 + 117 + 118 + MSR_KVM_WALL_CLOCK: 0x11 119 + 120 + data and functioning: same as MSR_KVM_WALL_CLOCK_NEW. Use that instead. 121 + 122 + This MSR falls outside the reserved KVM range and may be removed in the 123 + future. Its usage is deprecated. 124 + 125 + Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid 126 + leaf prior to usage. 127 + 128 + MSR_KVM_SYSTEM_TIME: 0x12 129 + 130 + data and functioning: same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead. 131 + 132 + This MSR falls outside the reserved KVM range and may be removed in the 133 + future. Its usage is deprecated. 134 + 135 + Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid 136 + leaf prior to usage. 137 + 138 + The suggested algorithm for detecting kvmclock presence is then: 139 + 140 + if (!kvm_para_available()) /* refer to cpuid.txt */ 141 + return NON_PRESENT; 142 + 143 + flags = cpuid_eax(0x40000001); 144 + if (flags & 3) { 145 + msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW; 146 + msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW; 147 + return PRESENT; 148 + } else if (flags & 0) { 149 + msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; 150 + msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; 151 + return PRESENT; 152 + } else 153 + return NON_PRESENT;

+38

Documentation/kvm/review-checklist.txt

···

··· 1 + Review checklist for kvm patches 2 + ================================ 3 + 4 + 1. The patch must follow Documentation/CodingStyle and 5 + Documentation/SubmittingPatches. 6 + 7 + 2. Patches should be against kvm.git master branch. 8 + 9 + 3. If the patch introduces or modifies a new userspace API: 10 + - the API must be documented in Documentation/kvm/api.txt 11 + - the API must be discoverable using KVM_CHECK_EXTENSION 12 + 13 + 4. New state must include support for save/restore. 14 + 15 + 5. New features must default to off (userspace should explicitly request them). 16 + Performance improvements can and should default to on. 17 + 18 + 6. New cpu features should be exposed via KVM_GET_SUPPORTED_CPUID2 19 + 20 + 7. Emulator changes should be accompanied by unit tests for qemu-kvm.git 21 + kvm/test directory. 22 + 23 + 8. Changes should be vendor neutral when possible. Changes to common code 24 + are better than duplicating changes to vendor code. 25 + 26 + 9. Similarly, prefer changes to arch independent code than to arch dependent 27 + code. 28 + 29 + 10. User/kernel interfaces and guest/host interfaces must be 64-bit clean 30 + (all variables and sizes naturally aligned on 64-bit; use specific types 31 + only - u64 rather than ulong). 32 + 33 + 11. New guest visible features must either be documented in a hardware manual 34 + or be accompanied by documentation. 35 + 36 + 12. Features must be robust against reset and kexec - for example, shared 37 + host/guest memory must be unshared to prevent the host from writing to 38 + guest memory that the guest has not reserved for this purpose.

+1

arch/ia64/include/asm/kvm_host.h

··· 235 #define KVM_REQ_PTC_G 32 236 #define KVM_REQ_RESUME 33 237 238 #define KVM_NR_PAGE_SIZES 1 239 #define KVM_PAGES_PER_HPAGE(x) 1 240

··· 235 #define KVM_REQ_PTC_G 32 236 #define KVM_REQ_RESUME 33 237 238 + #define KVM_HPAGE_GFN_SHIFT(x) 0 239 #define KVM_NR_PAGE_SIZES 1 240 #define KVM_PAGES_PER_HPAGE(x) 1 241

+13 -37

arch/ia64/kvm/kvm-ia64.c

··· 725 int r; 726 sigset_t sigsaved; 727 728 - vcpu_load(vcpu); 729 - 730 if (vcpu->sigset_active) 731 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 732 ··· 746 if (vcpu->sigset_active) 747 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 748 749 - vcpu_put(vcpu); 750 return r; 751 } 752 ··· 880 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 881 int i; 882 883 - vcpu_load(vcpu); 884 - 885 for (i = 0; i < 16; i++) { 886 vpd->vgr[i] = regs->vpd.vgr[i]; 887 vpd->vbgr[i] = regs->vpd.vbgr[i]; ··· 925 vcpu->arch.irq_new_pending = 1; 926 vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu); 927 set_bit(KVM_REQ_RESUME, &vcpu->requests); 928 - 929 - vcpu_put(vcpu); 930 931 return 0; 932 } ··· 1795 kvm_vmm_info = NULL; 1796 } 1797 1798 - static int kvm_ia64_sync_dirty_log(struct kvm *kvm, 1799 - struct kvm_dirty_log *log) 1800 { 1801 - struct kvm_memory_slot *memslot; 1802 - int r, i; 1803 long base; 1804 unsigned long n; 1805 unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + 1806 offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); 1807 1808 - r = -EINVAL; 1809 - if (log->slot >= KVM_MEMORY_SLOTS) 1810 - goto out; 1811 - 1812 - memslot = &kvm->memslots->memslots[log->slot]; 1813 - r = -ENOENT; 1814 - if (!memslot->dirty_bitmap) 1815 - goto out; 1816 - 1817 n = kvm_dirty_bitmap_bytes(memslot); 1818 base = memslot->base_gfn / BITS_PER_LONG; 1819 1820 for (i = 0; i < n/sizeof(long); ++i) { 1821 memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; 1822 dirty_bitmap[base + i] = 0; 1823 } 1824 - r = 0; 1825 - out: 1826 - return r; 1827 } 1828 1829 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, ··· 1824 int is_dirty = 0; 1825 1826 mutex_lock(&kvm->slots_lock); 1827 - spin_lock(&kvm->arch.dirty_log_lock); 1828 1829 - r = kvm_ia64_sync_dirty_log(kvm, log); 1830 - if (r) 1831 goto out; 1832 1833 r = kvm_get_dirty_log(kvm, log, &is_dirty); 1834 if (r) 1835 goto out; ··· 1842 /* If nothing is dirty, don't bother messing with page tables. */ 1843 if (is_dirty) { 1844 kvm_flush_remote_tlbs(kvm); 1845 - memslot = &kvm->memslots->memslots[log->slot]; 1846 n = kvm_dirty_bitmap_bytes(memslot); 1847 memset(memslot->dirty_bitmap, 0, n); 1848 } 1849 r = 0; 1850 out: 1851 mutex_unlock(&kvm->slots_lock); 1852 - spin_unlock(&kvm->arch.dirty_log_lock); 1853 return r; 1854 } 1855 ··· 1938 return vcpu->arch.timer_fired; 1939 } 1940 1941 - gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 1942 - { 1943 - return gfn; 1944 - } 1945 - 1946 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 1947 { 1948 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) || ··· 1947 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 1948 struct kvm_mp_state *mp_state) 1949 { 1950 - vcpu_load(vcpu); 1951 mp_state->mp_state = vcpu->arch.mp_state; 1952 - vcpu_put(vcpu); 1953 return 0; 1954 } 1955 ··· 1978 { 1979 int r = 0; 1980 1981 - vcpu_load(vcpu); 1982 vcpu->arch.mp_state = mp_state->mp_state; 1983 if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) 1984 r = vcpu_reset(vcpu); 1985 - vcpu_put(vcpu); 1986 return r; 1987 }

··· 725 int r; 726 sigset_t sigsaved; 727 728 if (vcpu->sigset_active) 729 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 730 ··· 748 if (vcpu->sigset_active) 749 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 750 751 return r; 752 } 753 ··· 883 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 884 int i; 885 886 for (i = 0; i < 16; i++) { 887 vpd->vgr[i] = regs->vpd.vgr[i]; 888 vpd->vbgr[i] = regs->vpd.vbgr[i]; ··· 930 vcpu->arch.irq_new_pending = 1; 931 vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu); 932 set_bit(KVM_REQ_RESUME, &vcpu->requests); 933 934 return 0; 935 } ··· 1802 kvm_vmm_info = NULL; 1803 } 1804 1805 + static void kvm_ia64_sync_dirty_log(struct kvm *kvm, 1806 + struct kvm_memory_slot *memslot) 1807 { 1808 + int i; 1809 long base; 1810 unsigned long n; 1811 unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + 1812 offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); 1813 1814 n = kvm_dirty_bitmap_bytes(memslot); 1815 base = memslot->base_gfn / BITS_PER_LONG; 1816 1817 + spin_lock(&kvm->arch.dirty_log_lock); 1818 for (i = 0; i < n/sizeof(long); ++i) { 1819 memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; 1820 dirty_bitmap[base + i] = 0; 1821 } 1822 + spin_unlock(&kvm->arch.dirty_log_lock); 1823 } 1824 1825 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, ··· 1842 int is_dirty = 0; 1843 1844 mutex_lock(&kvm->slots_lock); 1845 1846 + r = -EINVAL; 1847 + if (log->slot >= KVM_MEMORY_SLOTS) 1848 goto out; 1849 1850 + memslot = &kvm->memslots->memslots[log->slot]; 1851 + r = -ENOENT; 1852 + if (!memslot->dirty_bitmap) 1853 + goto out; 1854 + 1855 + kvm_ia64_sync_dirty_log(kvm, memslot); 1856 r = kvm_get_dirty_log(kvm, log, &is_dirty); 1857 if (r) 1858 goto out; ··· 1855 /* If nothing is dirty, don't bother messing with page tables. */ 1856 if (is_dirty) { 1857 kvm_flush_remote_tlbs(kvm); 1858 n = kvm_dirty_bitmap_bytes(memslot); 1859 memset(memslot->dirty_bitmap, 0, n); 1860 } 1861 r = 0; 1862 out: 1863 mutex_unlock(&kvm->slots_lock); 1864 return r; 1865 } 1866 ··· 1953 return vcpu->arch.timer_fired; 1954 } 1955 1956 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 1957 { 1958 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) || ··· 1967 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 1968 struct kvm_mp_state *mp_state) 1969 { 1970 mp_state->mp_state = vcpu->arch.mp_state; 1971 return 0; 1972 } 1973 ··· 2000 { 2001 int r = 0; 2002 2003 vcpu->arch.mp_state = mp_state->mp_state; 2004 if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) 2005 r = vcpu_reset(vcpu); 2006 return r; 2007 }

+9 -1

arch/powerpc/include/asm/kvm_book3s.h

··· 115 extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); 116 extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); 117 extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); 118 - extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data); 119 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); 120 extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); 121 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);

··· 115 extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); 116 extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); 117 extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); 118 + 119 + extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); 120 + extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); 121 + extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu); 122 + extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu); 123 + extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte); 124 + extern int kvmppc_mmu_hpte_sysinit(void); 125 + extern void kvmppc_mmu_hpte_sysexit(void); 126 + 127 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); 128 extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); 129 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);

+15 -12

arch/powerpc/include/asm/kvm_fpu.h

··· 22 23 #include <linux/types.h> 24 25 - extern void fps_fres(struct thread_struct *t, u32 *dst, u32 *src1); 26 - extern void fps_frsqrte(struct thread_struct *t, u32 *dst, u32 *src1); 27 - extern void fps_fsqrts(struct thread_struct *t, u32 *dst, u32 *src1); 28 29 - extern void fps_fadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); 30 - extern void fps_fdivs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); 31 - extern void fps_fmuls(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); 32 - extern void fps_fsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); 33 34 - extern void fps_fmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, 35 u32 *src3); 36 - extern void fps_fmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, 37 u32 *src3); 38 - extern void fps_fnmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, 39 u32 *src3); 40 - extern void fps_fnmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, 41 u32 *src3); 42 - extern void fps_fsel(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, 43 u32 *src3); 44 45 #define FPD_ONE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ ··· 81 FPD_THREE_IN(fmadd) 82 FPD_THREE_IN(fnmsub) 83 FPD_THREE_IN(fnmadd) 84 85 #endif

··· 22 23 #include <linux/types.h> 24 25 + extern void fps_fres(u64 *fpscr, u32 *dst, u32 *src1); 26 + extern void fps_frsqrte(u64 *fpscr, u32 *dst, u32 *src1); 27 + extern void fps_fsqrts(u64 *fpscr, u32 *dst, u32 *src1); 28 29 + extern void fps_fadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2); 30 + extern void fps_fdivs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2); 31 + extern void fps_fmuls(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2); 32 + extern void fps_fsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2); 33 34 + extern void fps_fmadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, 35 u32 *src3); 36 + extern void fps_fmsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, 37 u32 *src3); 38 + extern void fps_fnmadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, 39 u32 *src3); 40 + extern void fps_fnmsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, 41 u32 *src3); 42 + extern void fps_fsel(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, 43 u32 *src3); 44 45 #define FPD_ONE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ ··· 81 FPD_THREE_IN(fmadd) 82 FPD_THREE_IN(fnmsub) 83 FPD_THREE_IN(fnmadd) 84 + 85 + extern void kvm_cvt_fd(u32 *from, u64 *to, u64 *fpscr); 86 + extern void kvm_cvt_df(u64 *from, u32 *to, u64 *fpscr); 87 88 #endif

+15 -3

arch/powerpc/include/asm/kvm_host.h

··· 35 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 36 37 /* We don't currently support large pages. */ 38 #define KVM_NR_PAGE_SIZES 1 39 #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) 40 41 - #define HPTEG_CACHE_NUM 1024 42 43 struct kvm; 44 struct kvm_run; ··· 158 }; 159 160 struct hpte_cache { 161 u64 host_va; 162 u64 pfn; 163 ulong slot; ··· 292 unsigned long pending_exceptions; 293 294 #ifdef CONFIG_PPC_BOOK3S 295 - struct hpte_cache hpte_cache[HPTEG_CACHE_NUM]; 296 - int hpte_cache_offset; 297 #endif 298 }; 299

··· 35 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 36 37 /* We don't currently support large pages. */ 38 + #define KVM_HPAGE_GFN_SHIFT(x) 0 39 #define KVM_NR_PAGE_SIZES 1 40 #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) 41 42 + #define HPTEG_CACHE_NUM (1 << 15) 43 + #define HPTEG_HASH_BITS_PTE 13 44 + #define HPTEG_HASH_BITS_VPTE 13 45 + #define HPTEG_HASH_BITS_VPTE_LONG 5 46 + #define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE) 47 + #define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE) 48 + #define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG) 49 50 struct kvm; 51 struct kvm_run; ··· 151 }; 152 153 struct hpte_cache { 154 + struct hlist_node list_pte; 155 + struct hlist_node list_vpte; 156 + struct hlist_node list_vpte_long; 157 u64 host_va; 158 u64 pfn; 159 ulong slot; ··· 282 unsigned long pending_exceptions; 283 284 #ifdef CONFIG_PPC_BOOK3S 285 + struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; 286 + struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; 287 + struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG]; 288 + int hpte_cache_count; 289 #endif 290 }; 291

-4

arch/powerpc/kernel/ppc_ksyms.c

··· 101 EXPORT_SYMBOL(start_thread); 102 EXPORT_SYMBOL(kernel_thread); 103 104 - #ifdef CONFIG_PPC_FPU 105 - EXPORT_SYMBOL_GPL(cvt_df); 106 - EXPORT_SYMBOL_GPL(cvt_fd); 107 - #endif 108 EXPORT_SYMBOL(giveup_fpu); 109 #ifdef CONFIG_ALTIVEC 110 EXPORT_SYMBOL(giveup_altivec);

··· 101 EXPORT_SYMBOL(start_thread); 102 EXPORT_SYMBOL(kernel_thread); 103 104 EXPORT_SYMBOL(giveup_fpu); 105 #ifdef CONFIG_ALTIVEC 106 EXPORT_SYMBOL(giveup_altivec);

+2 -1

arch/powerpc/kvm/44x_tlb.c

··· 316 gfn = gpaddr >> PAGE_SHIFT; 317 new_page = gfn_to_page(vcpu->kvm, gfn); 318 if (is_error_page(new_page)) { 319 - printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); 320 kvm_release_page_clean(new_page); 321 return; 322 }

··· 316 gfn = gpaddr >> PAGE_SHIFT; 317 new_page = gfn_to_page(vcpu->kvm, gfn); 318 if (is_error_page(new_page)) { 319 + printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n", 320 + (unsigned long long)gfn); 321 kvm_release_page_clean(new_page); 322 return; 323 }

+2

arch/powerpc/kvm/Makefile

··· 45 book3s.o \ 46 book3s_emulate.o \ 47 book3s_interrupts.o \ 48 book3s_64_mmu_host.o \ 49 book3s_64_mmu.o \ 50 book3s_32_mmu.o ··· 58 book3s.o \ 59 book3s_emulate.o \ 60 book3s_interrupts.o \ 61 book3s_32_mmu_host.o \ 62 book3s_32_mmu.o 63 kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)

··· 45 book3s.o \ 46 book3s_emulate.o \ 47 book3s_interrupts.o \ 48 + book3s_mmu_hpte.o \ 49 book3s_64_mmu_host.o \ 50 book3s_64_mmu.o \ 51 book3s_32_mmu.o ··· 57 book3s.o \ 58 book3s_emulate.o \ 59 book3s_interrupts.o \ 60 + book3s_mmu_hpte.o \ 61 book3s_32_mmu_host.o \ 62 book3s_32_mmu.o 63 kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)

+39 -40

arch/powerpc/kvm/book3s.c

··· 1047 { 1048 int i; 1049 1050 - vcpu_load(vcpu); 1051 - 1052 regs->pc = kvmppc_get_pc(vcpu); 1053 regs->cr = kvmppc_get_cr(vcpu); 1054 regs->ctr = kvmppc_get_ctr(vcpu); ··· 1067 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 1068 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 1069 1070 - vcpu_put(vcpu); 1071 - 1072 return 0; 1073 } 1074 1075 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 1076 { 1077 int i; 1078 - 1079 - vcpu_load(vcpu); 1080 1081 kvmppc_set_pc(vcpu, regs->pc); 1082 kvmppc_set_cr(vcpu, regs->cr); ··· 1093 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 1094 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 1095 1096 - vcpu_put(vcpu); 1097 - 1098 return 0; 1099 } 1100 ··· 1101 { 1102 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1103 int i; 1104 - 1105 - vcpu_load(vcpu); 1106 1107 sregs->pvr = vcpu->arch.pvr; 1108 ··· 1121 } 1122 } 1123 1124 - vcpu_put(vcpu); 1125 - 1126 return 0; 1127 } 1128 ··· 1129 { 1130 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1131 int i; 1132 - 1133 - vcpu_load(vcpu); 1134 1135 kvmppc_set_pvr(vcpu, sregs->pvr); 1136 ··· 1156 1157 /* Flush the MMU after messing with the segments */ 1158 kvmppc_mmu_pte_flush(vcpu, 0, 0); 1159 - 1160 - vcpu_put(vcpu); 1161 1162 return 0; 1163 } ··· 1293 int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1294 { 1295 int ret; 1296 - struct thread_struct ext_bkp; 1297 #ifdef CONFIG_ALTIVEC 1298 - bool save_vec = current->thread.used_vr; 1299 #endif 1300 #ifdef CONFIG_VSX 1301 - bool save_vsx = current->thread.used_vsr; 1302 #endif 1303 ulong ext_msr; 1304 ··· 1316 /* Save FPU state in stack */ 1317 if (current->thread.regs->msr & MSR_FP) 1318 giveup_fpu(current); 1319 - memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr)); 1320 - ext_bkp.fpscr = current->thread.fpscr; 1321 - ext_bkp.fpexc_mode = current->thread.fpexc_mode; 1322 1323 #ifdef CONFIG_ALTIVEC 1324 /* Save Altivec state in stack */ 1325 - if (save_vec) { 1326 if (current->thread.regs->msr & MSR_VEC) 1327 giveup_altivec(current); 1328 - memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr)); 1329 - ext_bkp.vscr = current->thread.vscr; 1330 - ext_bkp.vrsave = current->thread.vrsave; 1331 } 1332 - ext_bkp.used_vr = current->thread.used_vr; 1333 #endif 1334 1335 #ifdef CONFIG_VSX 1336 /* Save VSX state in stack */ 1337 - if (save_vsx && (current->thread.regs->msr & MSR_VSX)) 1338 __giveup_vsx(current); 1339 - ext_bkp.used_vsr = current->thread.used_vsr; 1340 #endif 1341 1342 /* Remember the MSR with disabled extensions */ ··· 1361 kvmppc_giveup_ext(vcpu, MSR_VSX); 1362 1363 /* Restore FPU state from stack */ 1364 - memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr)); 1365 - current->thread.fpscr = ext_bkp.fpscr; 1366 - current->thread.fpexc_mode = ext_bkp.fpexc_mode; 1367 1368 #ifdef CONFIG_ALTIVEC 1369 /* Restore Altivec state from stack */ 1370 - if (save_vec && current->thread.used_vr) { 1371 - memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr)); 1372 - current->thread.vscr = ext_bkp.vscr; 1373 - current->thread.vrsave= ext_bkp.vrsave; 1374 } 1375 - current->thread.used_vr = ext_bkp.used_vr; 1376 #endif 1377 1378 #ifdef CONFIG_VSX 1379 - current->thread.used_vsr = ext_bkp.used_vsr; 1380 #endif 1381 1382 return ret; ··· 1384 1385 static int kvmppc_book3s_init(void) 1386 { 1387 - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, 1388 - THIS_MODULE); 1389 } 1390 1391 static void kvmppc_book3s_exit(void) 1392 { 1393 kvm_exit(); 1394 } 1395

··· 1047 { 1048 int i; 1049 1050 regs->pc = kvmppc_get_pc(vcpu); 1051 regs->cr = kvmppc_get_cr(vcpu); 1052 regs->ctr = kvmppc_get_ctr(vcpu); ··· 1069 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 1070 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 1071 1072 return 0; 1073 } 1074 1075 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 1076 { 1077 int i; 1078 1079 kvmppc_set_pc(vcpu, regs->pc); 1080 kvmppc_set_cr(vcpu, regs->cr); ··· 1099 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 1100 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 1101 1102 return 0; 1103 } 1104 ··· 1109 { 1110 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1111 int i; 1112 1113 sregs->pvr = vcpu->arch.pvr; 1114 ··· 1131 } 1132 } 1133 1134 return 0; 1135 } 1136 ··· 1141 { 1142 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1143 int i; 1144 1145 kvmppc_set_pvr(vcpu, sregs->pvr); 1146 ··· 1170 1171 /* Flush the MMU after messing with the segments */ 1172 kvmppc_mmu_pte_flush(vcpu, 0, 0); 1173 1174 return 0; 1175 } ··· 1309 int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1310 { 1311 int ret; 1312 + double fpr[32][TS_FPRWIDTH]; 1313 + unsigned int fpscr; 1314 + int fpexc_mode; 1315 #ifdef CONFIG_ALTIVEC 1316 + vector128 vr[32]; 1317 + vector128 vscr; 1318 + unsigned long uninitialized_var(vrsave); 1319 + int used_vr; 1320 #endif 1321 #ifdef CONFIG_VSX 1322 + int used_vsr; 1323 #endif 1324 ulong ext_msr; 1325 ··· 1327 /* Save FPU state in stack */ 1328 if (current->thread.regs->msr & MSR_FP) 1329 giveup_fpu(current); 1330 + memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr)); 1331 + fpscr = current->thread.fpscr.val; 1332 + fpexc_mode = current->thread.fpexc_mode; 1333 1334 #ifdef CONFIG_ALTIVEC 1335 /* Save Altivec state in stack */ 1336 + used_vr = current->thread.used_vr; 1337 + if (used_vr) { 1338 if (current->thread.regs->msr & MSR_VEC) 1339 giveup_altivec(current); 1340 + memcpy(vr, current->thread.vr, sizeof(current->thread.vr)); 1341 + vscr = current->thread.vscr; 1342 + vrsave = current->thread.vrsave; 1343 } 1344 #endif 1345 1346 #ifdef CONFIG_VSX 1347 /* Save VSX state in stack */ 1348 + used_vsr = current->thread.used_vsr; 1349 + if (used_vsr && (current->thread.regs->msr & MSR_VSX)) 1350 __giveup_vsx(current); 1351 #endif 1352 1353 /* Remember the MSR with disabled extensions */ ··· 1372 kvmppc_giveup_ext(vcpu, MSR_VSX); 1373 1374 /* Restore FPU state from stack */ 1375 + memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr)); 1376 + current->thread.fpscr.val = fpscr; 1377 + current->thread.fpexc_mode = fpexc_mode; 1378 1379 #ifdef CONFIG_ALTIVEC 1380 /* Restore Altivec state from stack */ 1381 + if (used_vr && current->thread.used_vr) { 1382 + memcpy(current->thread.vr, vr, sizeof(current->thread.vr)); 1383 + current->thread.vscr = vscr; 1384 + current->thread.vrsave = vrsave; 1385 } 1386 + current->thread.used_vr = used_vr; 1387 #endif 1388 1389 #ifdef CONFIG_VSX 1390 + current->thread.used_vsr = used_vsr; 1391 #endif 1392 1393 return ret; ··· 1395 1396 static int kvmppc_book3s_init(void) 1397 { 1398 + int r; 1399 + 1400 + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, 1401 + THIS_MODULE); 1402 + 1403 + if (r) 1404 + return r; 1405 + 1406 + r = kvmppc_mmu_hpte_sysinit(); 1407 + 1408 + return r; 1409 } 1410 1411 static void kvmppc_book3s_exit(void) 1412 { 1413 + kvmppc_mmu_hpte_sysexit(); 1414 kvm_exit(); 1415 } 1416

+4 -4

arch/powerpc/kvm/book3s_32_mmu.c

··· 354 *vsid = VSID_REAL_DR | gvsid; 355 break; 356 case MSR_DR|MSR_IR: 357 - if (!sr->valid) 358 - return -1; 359 - 360 - *vsid = sr->vsid; 361 break; 362 default: 363 BUG();

··· 354 *vsid = VSID_REAL_DR | gvsid; 355 break; 356 case MSR_DR|MSR_IR: 357 + if (sr->valid) 358 + *vsid = sr->vsid; 359 + else 360 + *vsid = VSID_BAT | gvsid; 361 break; 362 default: 363 BUG();

+12 -122

arch/powerpc/kvm/book3s_32_mmu_host.c

··· 19 */ 20 21 #include <linux/kvm_host.h> 22 23 #include <asm/kvm_ppc.h> 24 #include <asm/kvm_book3s.h> ··· 58 static ulong htab; 59 static u32 htabmask; 60 61 - static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 62 { 63 volatile u32 *pteg; 64 65 - dprintk_mmu("KVM: Flushing SPTE: 0x%llx (0x%llx) -> 0x%llx\n", 66 - pte->pte.eaddr, pte->pte.vpage, pte->host_va); 67 - 68 pteg = (u32*)pte->slot; 69 - 70 pteg[0] = 0; 71 asm volatile ("sync"); 72 asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory"); 73 asm volatile ("sync"); 74 asm volatile ("tlbsync"); 75 - 76 - pte->host_va = 0; 77 - 78 - if (pte->pte.may_write) 79 - kvm_release_pfn_dirty(pte->pfn); 80 - else 81 - kvm_release_pfn_clean(pte->pfn); 82 - } 83 - 84 - void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) 85 - { 86 - int i; 87 - 88 - dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%x & 0x%x\n", 89 - vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); 90 - BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); 91 - 92 - guest_ea &= ea_mask; 93 - for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { 94 - struct hpte_cache *pte; 95 - 96 - pte = &vcpu->arch.hpte_cache[i]; 97 - if (!pte->host_va) 98 - continue; 99 - 100 - if ((pte->pte.eaddr & ea_mask) == guest_ea) { 101 - invalidate_pte(vcpu, pte); 102 - } 103 - } 104 - 105 - /* Doing a complete flush -> start from scratch */ 106 - if (!ea_mask) 107 - vcpu->arch.hpte_cache_offset = 0; 108 - } 109 - 110 - void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) 111 - { 112 - int i; 113 - 114 - dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", 115 - vcpu->arch.hpte_cache_offset, guest_vp, vp_mask); 116 - BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); 117 - 118 - guest_vp &= vp_mask; 119 - for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { 120 - struct hpte_cache *pte; 121 - 122 - pte = &vcpu->arch.hpte_cache[i]; 123 - if (!pte->host_va) 124 - continue; 125 - 126 - if ((pte->pte.vpage & vp_mask) == guest_vp) { 127 - invalidate_pte(vcpu, pte); 128 - } 129 - } 130 - } 131 - 132 - void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) 133 - { 134 - int i; 135 - 136 - dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%llx & 0x%llx\n", 137 - vcpu->arch.hpte_cache_offset, pa_start, pa_end); 138 - BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); 139 - 140 - for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { 141 - struct hpte_cache *pte; 142 - 143 - pte = &vcpu->arch.hpte_cache[i]; 144 - if (!pte->host_va) 145 - continue; 146 - 147 - if ((pte->pte.raddr >= pa_start) && 148 - (pte->pte.raddr < pa_end)) { 149 - invalidate_pte(vcpu, pte); 150 - } 151 - } 152 - } 153 - 154 - struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data) 155 - { 156 - int i; 157 - u64 guest_vp; 158 - 159 - guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false); 160 - for (i=0; i<vcpu->arch.hpte_cache_offset; i++) { 161 - struct hpte_cache *pte; 162 - 163 - pte = &vcpu->arch.hpte_cache[i]; 164 - if (!pte->host_va) 165 - continue; 166 - 167 - if (pte->pte.vpage == guest_vp) 168 - return &pte->pte; 169 - } 170 - 171 - return NULL; 172 - } 173 - 174 - static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) 175 - { 176 - if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM) 177 - kvmppc_mmu_pte_flush(vcpu, 0, 0); 178 - 179 - return vcpu->arch.hpte_cache_offset++; 180 } 181 182 /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using 183 * a hash, so we don't waste cycles on looping */ 184 static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) 185 { 186 - return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ 187 - ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ 188 - ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ 189 - ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ 190 - ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ 191 - ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ 192 - ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ 193 - ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); 194 } 195 196 ··· 144 register int rr = 0; 145 bool primary = false; 146 bool evict = false; 147 - int hpte_id; 148 struct hpte_cache *pte; 149 150 /* Get host physical address for gpa */ ··· 228 229 /* Now tell our Shadow PTE code about the new page */ 230 231 - hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); 232 - pte = &vcpu->arch.hpte_cache[hpte_id]; 233 234 dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", 235 orig_pte->may_write ? 'w' : '-', ··· 240 pte->host_va = va; 241 pte->pte = *orig_pte; 242 pte->pfn = hpaddr >> PAGE_SHIFT; 243 244 return 0; 245 } ··· 327 328 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 329 { 330 - kvmppc_mmu_pte_flush(vcpu, 0, 0); 331 preempt_disable(); 332 __destroy_context(to_book3s(vcpu)->context_id); 333 preempt_enable(); ··· 366 asm ( "mfsdr1 %0" : "=r"(sdr1) ); 367 htabmask = ((sdr1 & 0x1FF) << 16) | 0xFFC0; 368 htab = (ulong)__va(sdr1 & 0xffff0000); 369 370 return 0; 371 }

··· 19 */ 20 21 #include <linux/kvm_host.h> 22 + #include <linux/hash.h> 23 24 #include <asm/kvm_ppc.h> 25 #include <asm/kvm_book3s.h> ··· 57 static ulong htab; 58 static u32 htabmask; 59 60 + void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 61 { 62 volatile u32 *pteg; 63 64 + /* Remove from host HTAB */ 65 pteg = (u32*)pte->slot; 66 pteg[0] = 0; 67 + 68 + /* And make sure it's gone from the TLB too */ 69 asm volatile ("sync"); 70 asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory"); 71 asm volatile ("sync"); 72 asm volatile ("tlbsync"); 73 } 74 75 /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using 76 * a hash, so we don't waste cycles on looping */ 77 static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) 78 { 79 + return hash_64(gvsid, SID_MAP_BITS); 80 } 81 82 ··· 256 register int rr = 0; 257 bool primary = false; 258 bool evict = false; 259 struct hpte_cache *pte; 260 261 /* Get host physical address for gpa */ ··· 341 342 /* Now tell our Shadow PTE code about the new page */ 343 344 + pte = kvmppc_mmu_hpte_cache_next(vcpu); 345 346 dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", 347 orig_pte->may_write ? 'w' : '-', ··· 354 pte->host_va = va; 355 pte->pte = *orig_pte; 356 pte->pfn = hpaddr >> PAGE_SHIFT; 357 + 358 + kvmppc_mmu_hpte_cache_map(vcpu, pte); 359 360 return 0; 361 } ··· 439 440 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 441 { 442 + kvmppc_mmu_hpte_destroy(vcpu); 443 preempt_disable(); 444 __destroy_context(to_book3s(vcpu)->context_id); 445 preempt_enable(); ··· 478 asm ( "mfsdr1 %0" : "=r"(sdr1) ); 479 htabmask = ((sdr1 & 0x1FF) << 16) | 0xFFC0; 480 htab = (ulong)__va(sdr1 & 0xffff0000); 481 + 482 + kvmppc_mmu_hpte_init(vcpu); 483 484 return 0; 485 }

+9 -120

arch/powerpc/kvm/book3s_64_mmu_host.c

··· 20 */ 21 22 #include <linux/kvm_host.h> 23 24 #include <asm/kvm_ppc.h> 25 #include <asm/kvm_book3s.h> ··· 47 #define dprintk_slb(a, ...) do { } while(0) 48 #endif 49 50 - static void invalidate_pte(struct hpte_cache *pte) 51 { 52 - dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n", 53 - pte->pte.eaddr, pte->pte.vpage, pte->host_va); 54 - 55 ppc_md.hpte_invalidate(pte->slot, pte->host_va, 56 MMU_PAGE_4K, MMU_SEGSIZE_256M, 57 false); 58 - pte->host_va = 0; 59 - 60 - if (pte->pte.may_write) 61 - kvm_release_pfn_dirty(pte->pfn); 62 - else 63 - kvm_release_pfn_clean(pte->pfn); 64 - } 65 - 66 - void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) 67 - { 68 - int i; 69 - 70 - dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n", 71 - vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); 72 - BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); 73 - 74 - guest_ea &= ea_mask; 75 - for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { 76 - struct hpte_cache *pte; 77 - 78 - pte = &vcpu->arch.hpte_cache[i]; 79 - if (!pte->host_va) 80 - continue; 81 - 82 - if ((pte->pte.eaddr & ea_mask) == guest_ea) { 83 - invalidate_pte(pte); 84 - } 85 - } 86 - 87 - /* Doing a complete flush -> start from scratch */ 88 - if (!ea_mask) 89 - vcpu->arch.hpte_cache_offset = 0; 90 - } 91 - 92 - void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) 93 - { 94 - int i; 95 - 96 - dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", 97 - vcpu->arch.hpte_cache_offset, guest_vp, vp_mask); 98 - BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); 99 - 100 - guest_vp &= vp_mask; 101 - for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { 102 - struct hpte_cache *pte; 103 - 104 - pte = &vcpu->arch.hpte_cache[i]; 105 - if (!pte->host_va) 106 - continue; 107 - 108 - if ((pte->pte.vpage & vp_mask) == guest_vp) { 109 - invalidate_pte(pte); 110 - } 111 - } 112 - } 113 - 114 - void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) 115 - { 116 - int i; 117 - 118 - dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx & 0x%lx\n", 119 - vcpu->arch.hpte_cache_offset, pa_start, pa_end); 120 - BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); 121 - 122 - for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { 123 - struct hpte_cache *pte; 124 - 125 - pte = &vcpu->arch.hpte_cache[i]; 126 - if (!pte->host_va) 127 - continue; 128 - 129 - if ((pte->pte.raddr >= pa_start) && 130 - (pte->pte.raddr < pa_end)) { 131 - invalidate_pte(pte); 132 - } 133 - } 134 - } 135 - 136 - struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data) 137 - { 138 - int i; 139 - u64 guest_vp; 140 - 141 - guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false); 142 - for (i=0; i<vcpu->arch.hpte_cache_offset; i++) { 143 - struct hpte_cache *pte; 144 - 145 - pte = &vcpu->arch.hpte_cache[i]; 146 - if (!pte->host_va) 147 - continue; 148 - 149 - if (pte->pte.vpage == guest_vp) 150 - return &pte->pte; 151 - } 152 - 153 - return NULL; 154 - } 155 - 156 - static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) 157 - { 158 - if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM) 159 - kvmppc_mmu_pte_flush(vcpu, 0, 0); 160 - 161 - return vcpu->arch.hpte_cache_offset++; 162 } 163 164 /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using 165 * a hash, so we don't waste cycles on looping */ 166 static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) 167 { 168 - return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ 169 - ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ 170 - ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ 171 - ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ 172 - ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ 173 - ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ 174 - ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ 175 - ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); 176 } 177 - 178 179 static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) 180 { ··· 159 attempt++; 160 goto map_again; 161 } else { 162 - int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); 163 - struct hpte_cache *pte = &vcpu->arch.hpte_cache[hpte_id]; 164 165 dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n", 166 ((rflags & HPTE_R_PP) == 3) ? '-' : 'w', ··· 177 pte->host_va = va; 178 pte->pte = *orig_pte; 179 pte->pfn = hpaddr >> PAGE_SHIFT; 180 } 181 182 return 0; ··· 305 306 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 307 { 308 - kvmppc_mmu_pte_flush(vcpu, 0, 0); 309 __destroy_context(to_book3s(vcpu)->context_id); 310 } 311 ··· 322 vcpu3s->vsid_max = ((vcpu3s->context_id + 1) << USER_ESID_BITS) - 1; 323 vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS; 324 vcpu3s->vsid_next = vcpu3s->vsid_first; 325 326 return 0; 327 }

··· 20 */ 21 22 #include <linux/kvm_host.h> 23 + #include <linux/hash.h> 24 25 #include <asm/kvm_ppc.h> 26 #include <asm/kvm_book3s.h> ··· 46 #define dprintk_slb(a, ...) do { } while(0) 47 #endif 48 49 + void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 50 { 51 ppc_md.hpte_invalidate(pte->slot, pte->host_va, 52 MMU_PAGE_4K, MMU_SEGSIZE_256M, 53 false); 54 } 55 56 /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using 57 * a hash, so we don't waste cycles on looping */ 58 static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) 59 { 60 + return hash_64(gvsid, SID_MAP_BITS); 61 } 62 63 static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) 64 { ··· 273 attempt++; 274 goto map_again; 275 } else { 276 + struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu); 277 278 dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n", 279 ((rflags & HPTE_R_PP) == 3) ? '-' : 'w', ··· 292 pte->host_va = va; 293 pte->pte = *orig_pte; 294 pte->pfn = hpaddr >> PAGE_SHIFT; 295 + 296 + kvmppc_mmu_hpte_cache_map(vcpu, pte); 297 } 298 299 return 0; ··· 418 419 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 420 { 421 + kvmppc_mmu_hpte_destroy(vcpu); 422 __destroy_context(to_book3s(vcpu)->context_id); 423 } 424 ··· 435 vcpu3s->vsid_max = ((vcpu3s->context_id + 1) << USER_ESID_BITS) - 1; 436 vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS; 437 vcpu3s->vsid_next = vcpu3s->vsid_first; 438 + 439 + kvmppc_mmu_hpte_init(vcpu); 440 441 return 0; 442 }

+277

arch/powerpc/kvm/book3s_mmu_hpte.c

···

··· 1 + /* 2 + * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. 3 + * 4 + * Authors: 5 + * Alexander Graf <agraf@suse.de> 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License, version 2, as 9 + * published by the Free Software Foundation. 10 + * 11 + * This program is distributed in the hope that it will be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write to the Free Software 18 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + 21 + #include <linux/kvm_host.h> 22 + #include <linux/hash.h> 23 + #include <linux/slab.h> 24 + 25 + #include <asm/kvm_ppc.h> 26 + #include <asm/kvm_book3s.h> 27 + #include <asm/machdep.h> 28 + #include <asm/mmu_context.h> 29 + #include <asm/hw_irq.h> 30 + 31 + #define PTE_SIZE 12 32 + 33 + /* #define DEBUG_MMU */ 34 + 35 + #ifdef DEBUG_MMU 36 + #define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__) 37 + #else 38 + #define dprintk_mmu(a, ...) do { } while(0) 39 + #endif 40 + 41 + static struct kmem_cache *hpte_cache; 42 + 43 + static inline u64 kvmppc_mmu_hash_pte(u64 eaddr) 44 + { 45 + return hash_64(eaddr >> PTE_SIZE, HPTEG_HASH_BITS_PTE); 46 + } 47 + 48 + static inline u64 kvmppc_mmu_hash_vpte(u64 vpage) 49 + { 50 + return hash_64(vpage & 0xfffffffffULL, HPTEG_HASH_BITS_VPTE); 51 + } 52 + 53 + static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage) 54 + { 55 + return hash_64((vpage & 0xffffff000ULL) >> 12, 56 + HPTEG_HASH_BITS_VPTE_LONG); 57 + } 58 + 59 + void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 60 + { 61 + u64 index; 62 + 63 + /* Add to ePTE list */ 64 + index = kvmppc_mmu_hash_pte(pte->pte.eaddr); 65 + hlist_add_head(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]); 66 + 67 + /* Add to vPTE list */ 68 + index = kvmppc_mmu_hash_vpte(pte->pte.vpage); 69 + hlist_add_head(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]); 70 + 71 + /* Add to vPTE_long list */ 72 + index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage); 73 + hlist_add_head(&pte->list_vpte_long, 74 + &vcpu->arch.hpte_hash_vpte_long[index]); 75 + } 76 + 77 + static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 78 + { 79 + dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n", 80 + pte->pte.eaddr, pte->pte.vpage, pte->host_va); 81 + 82 + /* Different for 32 and 64 bit */ 83 + kvmppc_mmu_invalidate_pte(vcpu, pte); 84 + 85 + if (pte->pte.may_write) 86 + kvm_release_pfn_dirty(pte->pfn); 87 + else 88 + kvm_release_pfn_clean(pte->pfn); 89 + 90 + hlist_del(&pte->list_pte); 91 + hlist_del(&pte->list_vpte); 92 + hlist_del(&pte->list_vpte_long); 93 + 94 + vcpu->arch.hpte_cache_count--; 95 + kmem_cache_free(hpte_cache, pte); 96 + } 97 + 98 + static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) 99 + { 100 + struct hpte_cache *pte; 101 + struct hlist_node *node, *tmp; 102 + int i; 103 + 104 + for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { 105 + struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; 106 + 107 + hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) 108 + invalidate_pte(vcpu, pte); 109 + } 110 + } 111 + 112 + static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) 113 + { 114 + struct hlist_head *list; 115 + struct hlist_node *node, *tmp; 116 + struct hpte_cache *pte; 117 + 118 + /* Find the list of entries in the map */ 119 + list = &vcpu->arch.hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)]; 120 + 121 + /* Check the list for matching entries and invalidate */ 122 + hlist_for_each_entry_safe(pte, node, tmp, list, list_pte) 123 + if ((pte->pte.eaddr & ~0xfffUL) == guest_ea) 124 + invalidate_pte(vcpu, pte); 125 + } 126 + 127 + void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) 128 + { 129 + u64 i; 130 + 131 + dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n", 132 + vcpu->arch.hpte_cache_count, guest_ea, ea_mask); 133 + 134 + guest_ea &= ea_mask; 135 + 136 + switch (ea_mask) { 137 + case ~0xfffUL: 138 + kvmppc_mmu_pte_flush_page(vcpu, guest_ea); 139 + break; 140 + case 0x0ffff000: 141 + /* 32-bit flush w/o segment, go through all possible segments */ 142 + for (i = 0; i < 0x100000000ULL; i += 0x10000000ULL) 143 + kvmppc_mmu_pte_flush(vcpu, guest_ea | i, ~0xfffUL); 144 + break; 145 + case 0: 146 + /* Doing a complete flush -> start from scratch */ 147 + kvmppc_mmu_pte_flush_all(vcpu); 148 + break; 149 + default: 150 + WARN_ON(1); 151 + break; 152 + } 153 + } 154 + 155 + /* Flush with mask 0xfffffffff */ 156 + static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) 157 + { 158 + struct hlist_head *list; 159 + struct hlist_node *node, *tmp; 160 + struct hpte_cache *pte; 161 + u64 vp_mask = 0xfffffffffULL; 162 + 163 + list = &vcpu->arch.hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)]; 164 + 165 + /* Check the list for matching entries and invalidate */ 166 + hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte) 167 + if ((pte->pte.vpage & vp_mask) == guest_vp) 168 + invalidate_pte(vcpu, pte); 169 + } 170 + 171 + /* Flush with mask 0xffffff000 */ 172 + static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) 173 + { 174 + struct hlist_head *list; 175 + struct hlist_node *node, *tmp; 176 + struct hpte_cache *pte; 177 + u64 vp_mask = 0xffffff000ULL; 178 + 179 + list = &vcpu->arch.hpte_hash_vpte_long[ 180 + kvmppc_mmu_hash_vpte_long(guest_vp)]; 181 + 182 + /* Check the list for matching entries and invalidate */ 183 + hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) 184 + if ((pte->pte.vpage & vp_mask) == guest_vp) 185 + invalidate_pte(vcpu, pte); 186 + } 187 + 188 + void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) 189 + { 190 + dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", 191 + vcpu->arch.hpte_cache_count, guest_vp, vp_mask); 192 + guest_vp &= vp_mask; 193 + 194 + switch(vp_mask) { 195 + case 0xfffffffffULL: 196 + kvmppc_mmu_pte_vflush_short(vcpu, guest_vp); 197 + break; 198 + case 0xffffff000ULL: 199 + kvmppc_mmu_pte_vflush_long(vcpu, guest_vp); 200 + break; 201 + default: 202 + WARN_ON(1); 203 + return; 204 + } 205 + } 206 + 207 + void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) 208 + { 209 + struct hlist_node *node, *tmp; 210 + struct hpte_cache *pte; 211 + int i; 212 + 213 + dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx - 0x%lx\n", 214 + vcpu->arch.hpte_cache_count, pa_start, pa_end); 215 + 216 + for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { 217 + struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; 218 + 219 + hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) 220 + if ((pte->pte.raddr >= pa_start) && 221 + (pte->pte.raddr < pa_end)) 222 + invalidate_pte(vcpu, pte); 223 + } 224 + } 225 + 226 + struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) 227 + { 228 + struct hpte_cache *pte; 229 + 230 + pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL); 231 + vcpu->arch.hpte_cache_count++; 232 + 233 + if (vcpu->arch.hpte_cache_count == HPTEG_CACHE_NUM) 234 + kvmppc_mmu_pte_flush_all(vcpu); 235 + 236 + return pte; 237 + } 238 + 239 + void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu) 240 + { 241 + kvmppc_mmu_pte_flush(vcpu, 0, 0); 242 + } 243 + 244 + static void kvmppc_mmu_hpte_init_hash(struct hlist_head *hash_list, int len) 245 + { 246 + int i; 247 + 248 + for (i = 0; i < len; i++) 249 + INIT_HLIST_HEAD(&hash_list[i]); 250 + } 251 + 252 + int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu) 253 + { 254 + /* init hpte lookup hashes */ 255 + kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte, 256 + ARRAY_SIZE(vcpu->arch.hpte_hash_pte)); 257 + kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte, 258 + ARRAY_SIZE(vcpu->arch.hpte_hash_vpte)); 259 + kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long, 260 + ARRAY_SIZE(vcpu->arch.hpte_hash_vpte_long)); 261 + 262 + return 0; 263 + } 264 + 265 + int kvmppc_mmu_hpte_sysinit(void) 266 + { 267 + /* init hpte slab cache */ 268 + hpte_cache = kmem_cache_create("kvm-spt", sizeof(struct hpte_cache), 269 + sizeof(struct hpte_cache), 0, NULL); 270 + 271 + return 0; 272 + } 273 + 274 + void kvmppc_mmu_hpte_sysexit(void) 275 + { 276 + kmem_cache_destroy(hpte_cache); 277 + }

+37 -57

arch/powerpc/kvm/book3s_paired_singles.c

··· 159 160 static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) 161 { 162 - struct thread_struct t; 163 - 164 - t.fpscr.val = vcpu->arch.fpscr; 165 - cvt_df((double*)&vcpu->arch.fpr[rt], (float*)&vcpu->arch.qpr[rt], &t); 166 } 167 168 static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) ··· 180 int rs, ulong addr, int ls_type) 181 { 182 int emulated = EMULATE_FAIL; 183 - struct thread_struct t; 184 int r; 185 char tmp[8]; 186 int len = sizeof(u32); 187 188 if (ls_type == FPU_LS_DOUBLE) 189 len = sizeof(u64); 190 - 191 - t.fpscr.val = vcpu->arch.fpscr; 192 193 /* read from memory */ 194 r = kvmppc_ld(vcpu, &addr, len, tmp, true); ··· 204 /* put in registers */ 205 switch (ls_type) { 206 case FPU_LS_SINGLE: 207 - cvt_fd((float*)tmp, (double*)&vcpu->arch.fpr[rs], &t); 208 vcpu->arch.qpr[rs] = *((u32*)tmp); 209 break; 210 case FPU_LS_DOUBLE: ··· 223 int rs, ulong addr, int ls_type) 224 { 225 int emulated = EMULATE_FAIL; 226 - struct thread_struct t; 227 int r; 228 char tmp[8]; 229 u64 val; 230 int len; 231 232 - t.fpscr.val = vcpu->arch.fpscr; 233 - 234 switch (ls_type) { 235 case FPU_LS_SINGLE: 236 - cvt_df((double*)&vcpu->arch.fpr[rs], (float*)tmp, &t); 237 val = *((u32*)tmp); 238 len = sizeof(u32); 239 break; ··· 269 int rs, ulong addr, bool w, int i) 270 { 271 int emulated = EMULATE_FAIL; 272 - struct thread_struct t; 273 int r; 274 float one = 1.0; 275 u32 tmp[2]; 276 - 277 - t.fpscr.val = vcpu->arch.fpscr; 278 279 /* read from memory */ 280 if (w) { ··· 296 emulated = EMULATE_DONE; 297 298 /* put in registers */ 299 - cvt_fd((float*)&tmp[0], (double*)&vcpu->arch.fpr[rs], &t); 300 vcpu->arch.qpr[rs] = tmp[1]; 301 302 dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], ··· 310 int rs, ulong addr, bool w, int i) 311 { 312 int emulated = EMULATE_FAIL; 313 - struct thread_struct t; 314 int r; 315 u32 tmp[2]; 316 int len = w ? sizeof(u32) : sizeof(u64); 317 318 - t.fpscr.val = vcpu->arch.fpscr; 319 - 320 - cvt_df((double*)&vcpu->arch.fpr[rs], (float*)&tmp[0], &t); 321 tmp[1] = vcpu->arch.qpr[rs]; 322 323 r = kvmppc_st(vcpu, &addr, len, tmp, true); ··· 502 static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, 503 int reg_out, int reg_in1, int reg_in2, 504 int reg_in3, int scalar, 505 - void (*func)(struct thread_struct *t, 506 u32 *dst, u32 *src1, 507 u32 *src2, u32 *src3)) 508 { ··· 511 u32 ps0_out; 512 u32 ps0_in1, ps0_in2, ps0_in3; 513 u32 ps1_in1, ps1_in2, ps1_in3; 514 - struct thread_struct t; 515 - t.fpscr.val = vcpu->arch.fpscr; 516 517 /* RC */ 518 WARN_ON(rc); 519 520 /* PS0 */ 521 - cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); 522 - cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); 523 - cvt_df((double*)&fpr[reg_in3], (float*)&ps0_in3, &t); 524 525 if (scalar & SCALAR_LOW) 526 ps0_in2 = qpr[reg_in2]; 527 528 - func(&t, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); 529 530 dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", 531 ps0_in1, ps0_in2, ps0_in3, ps0_out); 532 533 if (!(scalar & SCALAR_NO_PS0)) 534 - cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); 535 536 /* PS1 */ 537 ps1_in1 = qpr[reg_in1]; ··· 540 ps1_in2 = ps0_in2; 541 542 if (!(scalar & SCALAR_NO_PS1)) 543 - func(&t, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); 544 545 dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", 546 ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); ··· 551 static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, 552 int reg_out, int reg_in1, int reg_in2, 553 int scalar, 554 - void (*func)(struct thread_struct *t, 555 u32 *dst, u32 *src1, 556 u32 *src2)) 557 { ··· 561 u32 ps0_in1, ps0_in2; 562 u32 ps1_out; 563 u32 ps1_in1, ps1_in2; 564 - struct thread_struct t; 565 - t.fpscr.val = vcpu->arch.fpscr; 566 567 /* RC */ 568 WARN_ON(rc); 569 570 /* PS0 */ 571 - cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); 572 573 if (scalar & SCALAR_LOW) 574 ps0_in2 = qpr[reg_in2]; 575 else 576 - cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); 577 578 - func(&t, &ps0_out, &ps0_in1, &ps0_in2); 579 580 if (!(scalar & SCALAR_NO_PS0)) { 581 dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", 582 ps0_in1, ps0_in2, ps0_out); 583 584 - cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); 585 } 586 587 /* PS1 */ ··· 589 if (scalar & SCALAR_HIGH) 590 ps1_in2 = ps0_in2; 591 592 - func(&t, &ps1_out, &ps1_in1, &ps1_in2); 593 594 if (!(scalar & SCALAR_NO_PS1)) { 595 qpr[reg_out] = ps1_out; ··· 603 604 static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, 605 int reg_out, int reg_in, 606 - void (*func)(struct thread_struct *t, 607 u32 *dst, u32 *src1)) 608 { 609 u32 *qpr = vcpu->arch.qpr; 610 u64 *fpr = vcpu->arch.fpr; 611 u32 ps0_out, ps0_in; 612 u32 ps1_in; 613 - struct thread_struct t; 614 - t.fpscr.val = vcpu->arch.fpscr; 615 616 /* RC */ 617 WARN_ON(rc); 618 619 /* PS0 */ 620 - cvt_df((double*)&fpr[reg_in], (float*)&ps0_in, &t); 621 - func(&t, &ps0_out, &ps0_in); 622 623 dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", 624 ps0_in, ps0_out); 625 626 - cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); 627 628 /* PS1 */ 629 ps1_in = qpr[reg_in]; 630 - func(&t, &qpr[reg_out], &ps1_in); 631 632 dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n", 633 ps1_in, qpr[reg_out]); ··· 651 652 bool rcomp = (inst & 1) ? true : false; 653 u32 cr = kvmppc_get_cr(vcpu); 654 - struct thread_struct t; 655 #ifdef DEBUG 656 int i; 657 #endif 658 - 659 - t.fpscr.val = vcpu->arch.fpscr; 660 661 if (!kvmppc_inst_is_paired_single(vcpu, inst)) 662 return EMULATE_FAIL; ··· 671 #ifdef DEBUG 672 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { 673 u32 f; 674 - cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); 675 dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", 676 i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); 677 } ··· 795 WARN_ON(rcomp); 796 vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; 797 /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ 798 - cvt_df((double*)&vcpu->arch.fpr[ax_rb], 799 - (float*)&vcpu->arch.qpr[ax_rd], &t); 800 break; 801 case OP_4X_PS_MERGE01: 802 WARN_ON(rcomp); ··· 807 case OP_4X_PS_MERGE10: 808 WARN_ON(rcomp); 809 /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ 810 - cvt_fd((float*)&vcpu->arch.qpr[ax_ra], 811 - (double*)&vcpu->arch.fpr[ax_rd], &t); 812 /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ 813 - cvt_df((double*)&vcpu->arch.fpr[ax_rb], 814 - (float*)&vcpu->arch.qpr[ax_rd], &t); 815 break; 816 case OP_4X_PS_MERGE11: 817 WARN_ON(rcomp); 818 /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ 819 - cvt_fd((float*)&vcpu->arch.qpr[ax_ra], 820 - (double*)&vcpu->arch.fpr[ax_rd], &t); 821 vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; 822 break; 823 } ··· 1255 #ifdef DEBUG 1256 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { 1257 u32 f; 1258 - cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); 1259 dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); 1260 } 1261 #endif

··· 159 160 static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) 161 { 162 + kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt], &vcpu->arch.fpscr); 163 } 164 165 static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) ··· 183 int rs, ulong addr, int ls_type) 184 { 185 int emulated = EMULATE_FAIL; 186 int r; 187 char tmp[8]; 188 int len = sizeof(u32); 189 190 if (ls_type == FPU_LS_DOUBLE) 191 len = sizeof(u64); 192 193 /* read from memory */ 194 r = kvmppc_ld(vcpu, &addr, len, tmp, true); ··· 210 /* put in registers */ 211 switch (ls_type) { 212 case FPU_LS_SINGLE: 213 + kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs], &vcpu->arch.fpscr); 214 vcpu->arch.qpr[rs] = *((u32*)tmp); 215 break; 216 case FPU_LS_DOUBLE: ··· 229 int rs, ulong addr, int ls_type) 230 { 231 int emulated = EMULATE_FAIL; 232 int r; 233 char tmp[8]; 234 u64 val; 235 int len; 236 237 switch (ls_type) { 238 case FPU_LS_SINGLE: 239 + kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp, &vcpu->arch.fpscr); 240 val = *((u32*)tmp); 241 len = sizeof(u32); 242 break; ··· 278 int rs, ulong addr, bool w, int i) 279 { 280 int emulated = EMULATE_FAIL; 281 int r; 282 float one = 1.0; 283 u32 tmp[2]; 284 285 /* read from memory */ 286 if (w) { ··· 308 emulated = EMULATE_DONE; 309 310 /* put in registers */ 311 + kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs], &vcpu->arch.fpscr); 312 vcpu->arch.qpr[rs] = tmp[1]; 313 314 dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], ··· 322 int rs, ulong addr, bool w, int i) 323 { 324 int emulated = EMULATE_FAIL; 325 int r; 326 u32 tmp[2]; 327 int len = w ? sizeof(u32) : sizeof(u64); 328 329 + kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0], &vcpu->arch.fpscr); 330 tmp[1] = vcpu->arch.qpr[rs]; 331 332 r = kvmppc_st(vcpu, &addr, len, tmp, true); ··· 517 static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, 518 int reg_out, int reg_in1, int reg_in2, 519 int reg_in3, int scalar, 520 + void (*func)(u64 *fpscr, 521 u32 *dst, u32 *src1, 522 u32 *src2, u32 *src3)) 523 { ··· 526 u32 ps0_out; 527 u32 ps0_in1, ps0_in2, ps0_in3; 528 u32 ps1_in1, ps1_in2, ps1_in3; 529 530 /* RC */ 531 WARN_ON(rc); 532 533 /* PS0 */ 534 + kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr); 535 + kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr); 536 + kvm_cvt_df(&fpr[reg_in3], &ps0_in3, &vcpu->arch.fpscr); 537 538 if (scalar & SCALAR_LOW) 539 ps0_in2 = qpr[reg_in2]; 540 541 + func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); 542 543 dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", 544 ps0_in1, ps0_in2, ps0_in3, ps0_out); 545 546 if (!(scalar & SCALAR_NO_PS0)) 547 + kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); 548 549 /* PS1 */ 550 ps1_in1 = qpr[reg_in1]; ··· 557 ps1_in2 = ps0_in2; 558 559 if (!(scalar & SCALAR_NO_PS1)) 560 + func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); 561 562 dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", 563 ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); ··· 568 static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, 569 int reg_out, int reg_in1, int reg_in2, 570 int scalar, 571 + void (*func)(u64 *fpscr, 572 u32 *dst, u32 *src1, 573 u32 *src2)) 574 { ··· 578 u32 ps0_in1, ps0_in2; 579 u32 ps1_out; 580 u32 ps1_in1, ps1_in2; 581 582 /* RC */ 583 WARN_ON(rc); 584 585 /* PS0 */ 586 + kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr); 587 588 if (scalar & SCALAR_LOW) 589 ps0_in2 = qpr[reg_in2]; 590 else 591 + kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr); 592 593 + func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2); 594 595 if (!(scalar & SCALAR_NO_PS0)) { 596 dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", 597 ps0_in1, ps0_in2, ps0_out); 598 599 + kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); 600 } 601 602 /* PS1 */ ··· 608 if (scalar & SCALAR_HIGH) 609 ps1_in2 = ps0_in2; 610 611 + func(&vcpu->arch.fpscr, &ps1_out, &ps1_in1, &ps1_in2); 612 613 if (!(scalar & SCALAR_NO_PS1)) { 614 qpr[reg_out] = ps1_out; ··· 622 623 static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, 624 int reg_out, int reg_in, 625 + void (*func)(u64 *t, 626 u32 *dst, u32 *src1)) 627 { 628 u32 *qpr = vcpu->arch.qpr; 629 u64 *fpr = vcpu->arch.fpr; 630 u32 ps0_out, ps0_in; 631 u32 ps1_in; 632 633 /* RC */ 634 WARN_ON(rc); 635 636 /* PS0 */ 637 + kvm_cvt_df(&fpr[reg_in], &ps0_in, &vcpu->arch.fpscr); 638 + func(&vcpu->arch.fpscr, &ps0_out, &ps0_in); 639 640 dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", 641 ps0_in, ps0_out); 642 643 + kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); 644 645 /* PS1 */ 646 ps1_in = qpr[reg_in]; 647 + func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in); 648 649 dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n", 650 ps1_in, qpr[reg_out]); ··· 672 673 bool rcomp = (inst & 1) ? true : false; 674 u32 cr = kvmppc_get_cr(vcpu); 675 #ifdef DEBUG 676 int i; 677 #endif 678 679 if (!kvmppc_inst_is_paired_single(vcpu, inst)) 680 return EMULATE_FAIL; ··· 695 #ifdef DEBUG 696 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { 697 u32 f; 698 + kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr); 699 dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", 700 i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); 701 } ··· 819 WARN_ON(rcomp); 820 vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; 821 /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ 822 + kvm_cvt_df(&vcpu->arch.fpr[ax_rb], 823 + &vcpu->arch.qpr[ax_rd], 824 + &vcpu->arch.fpscr); 825 break; 826 case OP_4X_PS_MERGE01: 827 WARN_ON(rcomp); ··· 830 case OP_4X_PS_MERGE10: 831 WARN_ON(rcomp); 832 /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ 833 + kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], 834 + &vcpu->arch.fpr[ax_rd], 835 + &vcpu->arch.fpscr); 836 /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ 837 + kvm_cvt_df(&vcpu->arch.fpr[ax_rb], 838 + &vcpu->arch.qpr[ax_rd], 839 + &vcpu->arch.fpscr); 840 break; 841 case OP_4X_PS_MERGE11: 842 WARN_ON(rcomp); 843 /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ 844 + kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], 845 + &vcpu->arch.fpr[ax_rd], 846 + &vcpu->arch.fpscr); 847 vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; 848 break; 849 } ··· 1275 #ifdef DEBUG 1276 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { 1277 u32 f; 1278 + kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr); 1279 dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); 1280 } 1281 #endif

+1 -11

arch/powerpc/kvm/booke.c

··· 144 unsigned int priority) 145 { 146 int allowed = 0; 147 - ulong msr_mask; 148 bool update_esr = false, update_dear = false; 149 150 switch (priority) { ··· 485 { 486 int i; 487 488 - vcpu_load(vcpu); 489 - 490 regs->pc = vcpu->arch.pc; 491 regs->cr = kvmppc_get_cr(vcpu); 492 regs->ctr = vcpu->arch.ctr; ··· 505 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 506 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 507 508 - vcpu_put(vcpu); 509 - 510 return 0; 511 } 512 513 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 514 { 515 int i; 516 - 517 - vcpu_load(vcpu); 518 519 vcpu->arch.pc = regs->pc; 520 kvmppc_set_cr(vcpu, regs->cr); ··· 530 531 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 532 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 533 - 534 - vcpu_put(vcpu); 535 536 return 0; 537 } ··· 561 { 562 int r; 563 564 - vcpu_load(vcpu); 565 r = kvmppc_core_vcpu_translate(vcpu, tr); 566 - vcpu_put(vcpu); 567 return r; 568 } 569

··· 144 unsigned int priority) 145 { 146 int allowed = 0; 147 + ulong uninitialized_var(msr_mask); 148 bool update_esr = false, update_dear = false; 149 150 switch (priority) { ··· 485 { 486 int i; 487 488 regs->pc = vcpu->arch.pc; 489 regs->cr = kvmppc_get_cr(vcpu); 490 regs->ctr = vcpu->arch.ctr; ··· 507 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 508 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 509 510 return 0; 511 } 512 513 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 514 { 515 int i; 516 517 vcpu->arch.pc = regs->pc; 518 kvmppc_set_cr(vcpu, regs->cr); ··· 536 537 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 538 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 539 540 return 0; 541 } ··· 569 { 570 int r; 571 572 r = kvmppc_core_vcpu_translate(vcpu, tr); 573 return r; 574 } 575

+18

arch/powerpc/kvm/fpu.S

··· 271 FPD_THREE_IN(fmadd) 272 FPD_THREE_IN(fnmsub) 273 FPD_THREE_IN(fnmadd)

··· 271 FPD_THREE_IN(fmadd) 272 FPD_THREE_IN(fnmsub) 273 FPD_THREE_IN(fnmadd) 274 + 275 + _GLOBAL(kvm_cvt_fd) 276 + lfd 0,0(r5) /* load up fpscr value */ 277 + MTFSF_L(0) 278 + lfs 0,0(r3) 279 + stfd 0,0(r4) 280 + mffs 0 281 + stfd 0,0(r5) /* save new fpscr value */ 282 + blr 283 + 284 + _GLOBAL(kvm_cvt_df) 285 + lfd 0,0(r5) /* load up fpscr value */ 286 + MTFSF_L(0) 287 + lfd 0,0(r3) 288 + stfs 0,0(r4) 289 + mffs 0 290 + stfd 0,0(r5) /* save new fpscr value */ 291 + blr

+3 -11

arch/powerpc/kvm/powerpc.c

··· 36 #define CREATE_TRACE_POINTS 37 #include "trace.h" 38 39 - gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 40 - { 41 - return gfn; 42 - } 43 - 44 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 45 { 46 return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions); ··· 282 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 283 struct kvm_run *run) 284 { 285 - u64 gpr; 286 287 if (run->mmio.len > sizeof(gpr)) { 288 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); ··· 418 int r; 419 sigset_t sigsaved; 420 421 - vcpu_load(vcpu); 422 - 423 if (vcpu->sigset_active) 424 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 425 ··· 448 449 if (vcpu->sigset_active) 450 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 451 - 452 - vcpu_put(vcpu); 453 454 return r; 455 } ··· 514 if (copy_from_user(&irq, argp, sizeof(irq))) 515 goto out; 516 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); 517 - break; 518 } 519 case KVM_ENABLE_CAP: 520 { 521 struct kvm_enable_cap cap;

··· 36 #define CREATE_TRACE_POINTS 37 #include "trace.h" 38 39 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 40 { 41 return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions); ··· 287 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 288 struct kvm_run *run) 289 { 290 + u64 uninitialized_var(gpr); 291 292 if (run->mmio.len > sizeof(gpr)) { 293 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); ··· 423 int r; 424 sigset_t sigsaved; 425 426 if (vcpu->sigset_active) 427 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 428 ··· 455 456 if (vcpu->sigset_active) 457 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 458 459 return r; 460 } ··· 523 if (copy_from_user(&irq, argp, sizeof(irq))) 524 goto out; 525 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); 526 + goto out; 527 } 528 + 529 case KVM_ENABLE_CAP: 530 { 531 struct kvm_enable_cap cap;

+3 -2

arch/s390/include/asm/kvm_host.h

··· 26 27 struct sca_entry { 28 atomic_t scn; 29 - __u64 reserved; 30 __u64 sda; 31 __u64 reserved2[2]; 32 } __attribute__((packed)); ··· 41 } __attribute__((packed)); 42 43 #define KVM_NR_PAGE_SIZES 2 44 - #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + ((x) - 1) * 8) 45 #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) 46 #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) 47 #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)

··· 26 27 struct sca_entry { 28 atomic_t scn; 29 + __u32 reserved; 30 __u64 sda; 31 __u64 reserved2[2]; 32 } __attribute__((packed)); ··· 41 } __attribute__((packed)); 42 43 #define KVM_NR_PAGE_SIZES 2 44 + #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 8) 45 + #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) 46 #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) 47 #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) 48 #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)

+1 -1

arch/s390/kvm/intercept.c

··· 135 spin_lock_bh(&vcpu->arch.local_int.lock); 136 if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { 137 vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; 138 - rc = __kvm_s390_vcpu_store_status(vcpu, 139 KVM_S390_STORE_STATUS_NOADDR); 140 if (rc >= 0) 141 rc = -EOPNOTSUPP;

··· 135 spin_lock_bh(&vcpu->arch.local_int.lock); 136 if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { 137 vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; 138 + rc = kvm_s390_vcpu_store_status(vcpu, 139 KVM_S390_STORE_STATUS_NOADDR); 140 if (rc >= 0) 141 rc = -EOPNOTSUPP;

+19 -45

arch/s390/kvm/kvm-s390.c

··· 207 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 208 { 209 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 210 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == 211 (__u64) vcpu->arch.sie_block) 212 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; ··· 297 { 298 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH); 299 set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests); 300 - vcpu->arch.sie_block->ecb = 2; 301 vcpu->arch.sie_block->eca = 0xC1002001U; 302 vcpu->arch.sie_block->fac = (int) (long) facilities; 303 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); ··· 330 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; 331 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); 332 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; 333 334 spin_lock_init(&vcpu->arch.local_int.lock); 335 INIT_LIST_HEAD(&vcpu->arch.local_int.list); ··· 365 366 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 367 { 368 - vcpu_load(vcpu); 369 kvm_s390_vcpu_initial_reset(vcpu); 370 - vcpu_put(vcpu); 371 return 0; 372 } 373 374 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 375 { 376 - vcpu_load(vcpu); 377 memcpy(&vcpu->arch.guest_gprs, &regs->gprs, sizeof(regs->gprs)); 378 - vcpu_put(vcpu); 379 return 0; 380 } 381 382 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 383 { 384 - vcpu_load(vcpu); 385 memcpy(&regs->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs)); 386 - vcpu_put(vcpu); 387 return 0; 388 } 389 390 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 391 struct kvm_sregs *sregs) 392 { 393 - vcpu_load(vcpu); 394 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); 395 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 396 - vcpu_put(vcpu); 397 return 0; 398 } 399 400 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 401 struct kvm_sregs *sregs) 402 { 403 - vcpu_load(vcpu); 404 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs)); 405 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 406 - vcpu_put(vcpu); 407 return 0; 408 } 409 410 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 411 { 412 - vcpu_load(vcpu); 413 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 414 vcpu->arch.guest_fpregs.fpc = fpu->fpc; 415 - vcpu_put(vcpu); 416 return 0; 417 } 418 419 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 420 { 421 - vcpu_load(vcpu); 422 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); 423 fpu->fpc = vcpu->arch.guest_fpregs.fpc; 424 - vcpu_put(vcpu); 425 return 0; 426 } 427 ··· 415 { 416 int rc = 0; 417 418 - vcpu_load(vcpu); 419 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) 420 rc = -EBUSY; 421 else { 422 vcpu->run->psw_mask = psw.mask; 423 vcpu->run->psw_addr = psw.addr; 424 } 425 - vcpu_put(vcpu); 426 return rc; 427 } 428 ··· 483 { 484 int rc; 485 sigset_t sigsaved; 486 - 487 - vcpu_load(vcpu); 488 489 rerun_vcpu: 490 if (vcpu->requests) ··· 552 if (vcpu->sigset_active) 553 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 554 555 - vcpu_put(vcpu); 556 - 557 vcpu->stat.exit_userspace++; 558 return rc; 559 } ··· 571 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 572 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 573 */ 574 - int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 575 { 576 const unsigned char archmode = 1; 577 int prefix; ··· 633 return 0; 634 } 635 636 - static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 637 - { 638 - int rc; 639 - 640 - vcpu_load(vcpu); 641 - rc = __kvm_s390_vcpu_store_status(vcpu, addr); 642 - vcpu_put(vcpu); 643 - return rc; 644 - } 645 - 646 long kvm_arch_vcpu_ioctl(struct file *filp, 647 unsigned int ioctl, unsigned long arg) 648 { 649 struct kvm_vcpu *vcpu = filp->private_data; 650 void __user *argp = (void __user *)arg; 651 652 switch (ioctl) { 653 case KVM_S390_INTERRUPT: { 654 struct kvm_s390_interrupt s390int; 655 656 if (copy_from_user(&s390int, argp, sizeof(s390int))) 657 - return -EFAULT; 658 - return kvm_s390_inject_vcpu(vcpu, &s390int); 659 } 660 case KVM_S390_STORE_STATUS: 661 - return kvm_s390_vcpu_store_status(vcpu, arg); 662 case KVM_S390_SET_INITIAL_PSW: { 663 psw_t psw; 664 665 if (copy_from_user(&psw, argp, sizeof(psw))) 666 - return -EFAULT; 667 - return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 668 } 669 case KVM_S390_INITIAL_RESET: 670 - return kvm_arch_vcpu_ioctl_initial_reset(vcpu); 671 default: 672 - ; 673 } 674 - return -EINVAL; 675 } 676 677 /* Section: memory related */ ··· 721 722 void kvm_arch_flush_shadow(struct kvm *kvm) 723 { 724 - } 725 - 726 - gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 727 - { 728 - return gfn; 729 } 730 731 static int __init kvm_s390_init(void)

··· 207 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 208 { 209 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 210 + clear_bit(63 - vcpu->vcpu_id, (unsigned long *) &vcpu->kvm->arch.sca->mcn); 211 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == 212 (__u64) vcpu->arch.sie_block) 213 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; ··· 296 { 297 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH); 298 set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests); 299 + vcpu->arch.sie_block->ecb = 6; 300 vcpu->arch.sie_block->eca = 0xC1002001U; 301 vcpu->arch.sie_block->fac = (int) (long) facilities; 302 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); ··· 329 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; 330 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); 331 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; 332 + set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); 333 334 spin_lock_init(&vcpu->arch.local_int.lock); 335 INIT_LIST_HEAD(&vcpu->arch.local_int.list); ··· 363 364 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 365 { 366 kvm_s390_vcpu_initial_reset(vcpu); 367 return 0; 368 } 369 370 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 371 { 372 memcpy(&vcpu->arch.guest_gprs, &regs->gprs, sizeof(regs->gprs)); 373 return 0; 374 } 375 376 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 377 { 378 memcpy(&regs->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs)); 379 return 0; 380 } 381 382 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 383 struct kvm_sregs *sregs) 384 { 385 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); 386 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 387 return 0; 388 } 389 390 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 391 struct kvm_sregs *sregs) 392 { 393 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs)); 394 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 395 return 0; 396 } 397 398 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 399 { 400 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 401 vcpu->arch.guest_fpregs.fpc = fpu->fpc; 402 return 0; 403 } 404 405 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 406 { 407 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); 408 fpu->fpc = vcpu->arch.guest_fpregs.fpc; 409 return 0; 410 } 411 ··· 427 { 428 int rc = 0; 429 430 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) 431 rc = -EBUSY; 432 else { 433 vcpu->run->psw_mask = psw.mask; 434 vcpu->run->psw_addr = psw.addr; 435 } 436 return rc; 437 } 438 ··· 497 { 498 int rc; 499 sigset_t sigsaved; 500 501 rerun_vcpu: 502 if (vcpu->requests) ··· 568 if (vcpu->sigset_active) 569 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 570 571 vcpu->stat.exit_userspace++; 572 return rc; 573 } ··· 589 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 590 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 591 */ 592 + int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 593 { 594 const unsigned char archmode = 1; 595 int prefix; ··· 651 return 0; 652 } 653 654 long kvm_arch_vcpu_ioctl(struct file *filp, 655 unsigned int ioctl, unsigned long arg) 656 { 657 struct kvm_vcpu *vcpu = filp->private_data; 658 void __user *argp = (void __user *)arg; 659 + long r; 660 661 switch (ioctl) { 662 case KVM_S390_INTERRUPT: { 663 struct kvm_s390_interrupt s390int; 664 665 + r = -EFAULT; 666 if (copy_from_user(&s390int, argp, sizeof(s390int))) 667 + break; 668 + r = kvm_s390_inject_vcpu(vcpu, &s390int); 669 + break; 670 } 671 case KVM_S390_STORE_STATUS: 672 + r = kvm_s390_vcpu_store_status(vcpu, arg); 673 + break; 674 case KVM_S390_SET_INITIAL_PSW: { 675 psw_t psw; 676 677 + r = -EFAULT; 678 if (copy_from_user(&psw, argp, sizeof(psw))) 679 + break; 680 + r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 681 + break; 682 } 683 case KVM_S390_INITIAL_RESET: 684 + r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); 685 + break; 686 default: 687 + r = -EINVAL; 688 } 689 + return r; 690 } 691 692 /* Section: memory related */ ··· 742 743 void kvm_arch_flush_shadow(struct kvm *kvm) 744 { 745 } 746 747 static int __init kvm_s390_init(void)

+1 -1

arch/s390/kvm/kvm-s390.h

··· 92 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); 93 94 /* implemented in kvm-s390.c */ 95 - int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, 96 unsigned long addr); 97 /* implemented in diag.c */ 98 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);

··· 92 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); 93 94 /* implemented in kvm-s390.c */ 95 + int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, 96 unsigned long addr); 97 /* implemented in diag.c */ 98 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);

+2

arch/x86/include/asm/i387.h

··· 482 memcpy(dst->state, src->state, xstate_size); 483 } 484 485 #endif /* __ASSEMBLY__ */ 486 487 #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5

··· 482 memcpy(dst->state, src->state, xstate_size); 483 } 484 485 + extern void fpu_finit(struct fpu *fpu); 486 + 487 #endif /* __ASSEMBLY__ */ 488 489 #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5

+22

arch/x86/include/asm/kvm.h

··· 22 #define __KVM_HAVE_XEN_HVM 23 #define __KVM_HAVE_VCPU_EVENTS 24 #define __KVM_HAVE_DEBUGREGS 25 26 /* Architectural interrupt line count. */ 27 #define KVM_NR_INTERRUPTS 256 ··· 299 __u64 dr7; 300 __u64 flags; 301 __u64 reserved[9]; 302 }; 303 304 #endif /* _ASM_X86_KVM_H */

··· 22 #define __KVM_HAVE_XEN_HVM 23 #define __KVM_HAVE_VCPU_EVENTS 24 #define __KVM_HAVE_DEBUGREGS 25 + #define __KVM_HAVE_XSAVE 26 + #define __KVM_HAVE_XCRS 27 28 /* Architectural interrupt line count. */ 29 #define KVM_NR_INTERRUPTS 256 ··· 297 __u64 dr7; 298 __u64 flags; 299 __u64 reserved[9]; 300 + }; 301 + 302 + /* for KVM_CAP_XSAVE */ 303 + struct kvm_xsave { 304 + __u32 region[1024]; 305 + }; 306 + 307 + #define KVM_MAX_XCRS 16 308 + 309 + struct kvm_xcr { 310 + __u32 xcr; 311 + __u32 reserved; 312 + __u64 value; 313 + }; 314 + 315 + struct kvm_xcrs { 316 + __u32 nr_xcrs; 317 + __u32 flags; 318 + struct kvm_xcr xcrs[KVM_MAX_XCRS]; 319 + __u64 padding[16]; 320 }; 321 322 #endif /* _ASM_X86_KVM_H */

+25 -5

arch/x86/include/asm/kvm_emulate.h

··· 51 #define X86EMUL_UNHANDLEABLE 1 52 /* Terminate emulation but return success to the caller. */ 53 #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ 54 - #define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */ 55 - #define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */ 56 struct x86_emulate_ops { 57 /* 58 * read_std: Read bytes of standard (non-emulated/special) memory. ··· 94 int (*read_emulated)(unsigned long addr, 95 void *val, 96 unsigned int bytes, 97 struct kvm_vcpu *vcpu); 98 99 /* ··· 107 int (*write_emulated)(unsigned long addr, 108 const void *val, 109 unsigned int bytes, 110 struct kvm_vcpu *vcpu); 111 112 /* ··· 122 const void *old, 123 const void *new, 124 unsigned int bytes, 125 struct kvm_vcpu *vcpu); 126 127 int (*pio_in_emulated)(int size, unsigned short port, void *val, ··· 137 int seg, struct kvm_vcpu *vcpu); 138 u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); 139 void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); 140 void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); 141 ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); 142 - void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); 143 int (*cpl)(struct kvm_vcpu *vcpu); 144 - void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); 145 }; 146 147 /* Type, address-of, and value of an instruction's operand. */ 148 struct operand { 149 enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; 150 unsigned int bytes; 151 - unsigned long val, orig_val, *ptr; 152 }; 153 154 struct fetch_cache { ··· 199 unsigned long modrm_val; 200 struct fetch_cache fetch; 201 struct read_cache io_read; 202 }; 203 204 struct x86_emulate_ctxt { ··· 216 int interruptibility; 217 218 bool restart; /* restart string instruction after writeback */ 219 /* decode cache */ 220 struct decode_cache decode; 221 };

··· 51 #define X86EMUL_UNHANDLEABLE 1 52 /* Terminate emulation but return success to the caller. */ 53 #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ 54 + #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ 55 + #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ 56 + #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ 57 + 58 struct x86_emulate_ops { 59 /* 60 * read_std: Read bytes of standard (non-emulated/special) memory. ··· 92 int (*read_emulated)(unsigned long addr, 93 void *val, 94 unsigned int bytes, 95 + unsigned int *error, 96 struct kvm_vcpu *vcpu); 97 98 /* ··· 104 int (*write_emulated)(unsigned long addr, 105 const void *val, 106 unsigned int bytes, 107 + unsigned int *error, 108 struct kvm_vcpu *vcpu); 109 110 /* ··· 118 const void *old, 119 const void *new, 120 unsigned int bytes, 121 + unsigned int *error, 122 struct kvm_vcpu *vcpu); 123 124 int (*pio_in_emulated)(int size, unsigned short port, void *val, ··· 132 int seg, struct kvm_vcpu *vcpu); 133 u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); 134 void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); 135 + unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); 136 void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); 137 ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); 138 + int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); 139 int (*cpl)(struct kvm_vcpu *vcpu); 140 + int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); 141 + int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); 142 + int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); 143 + int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); 144 }; 145 146 /* Type, address-of, and value of an instruction's operand. */ 147 struct operand { 148 enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; 149 unsigned int bytes; 150 + unsigned long orig_val, *ptr; 151 + union { 152 + unsigned long val; 153 + char valptr[sizeof(unsigned long) + 2]; 154 + }; 155 }; 156 157 struct fetch_cache { ··· 186 unsigned long modrm_val; 187 struct fetch_cache fetch; 188 struct read_cache io_read; 189 + struct read_cache mem_read; 190 }; 191 192 struct x86_emulate_ctxt { ··· 202 int interruptibility; 203 204 bool restart; /* restart string instruction after writeback */ 205 + 206 + int exception; /* exception that happens during emulation or -1 */ 207 + u32 error_code; /* error code for exception */ 208 + bool error_code_valid; 209 + unsigned long cr2; /* faulted address in case of #PF */ 210 + 211 /* decode cache */ 212 struct decode_cache decode; 213 };

+17 -53

arch/x86/include/asm/kvm_host.h

··· 15 #include <linux/mm.h> 16 #include <linux/mmu_notifier.h> 17 #include <linux/tracepoint.h> 18 19 #include <linux/kvm.h> 20 #include <linux/kvm_para.h> ··· 40 0xFFFFFF0000000000ULL) 41 42 #define INVALID_PAGE (~(hpa_t)0) 43 #define UNMAPPED_GVA (~(gpa_t)0) 44 45 /* KVM Hugepage definitions for x86 */ 46 #define KVM_NR_PAGE_SIZES 3 47 - #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9)) 48 #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) 49 #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) 50 #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) ··· 72 #define SELECTOR_RPL_MASK 0x03 73 74 #define IOPL_SHIFT 12 75 - 76 - #define KVM_ALIAS_SLOTS 4 77 78 #define KVM_PERMILLE_MMU_PAGES 20 79 #define KVM_MIN_ALLOC_MMU_PAGES 64 ··· 243 void (*prefetch_page)(struct kvm_vcpu *vcpu, 244 struct kvm_mmu_page *page); 245 int (*sync_page)(struct kvm_vcpu *vcpu, 246 - struct kvm_mmu_page *sp); 247 void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); 248 hpa_t root_hpa; 249 int root_level; ··· 303 unsigned long mmu_seq; 304 } update_pte; 305 306 - struct i387_fxsave_struct host_fx_image; 307 - struct i387_fxsave_struct guest_fx_image; 308 309 gva_t mmio_fault_cr2; 310 struct kvm_pio_request pio; ··· 362 363 /* fields used by HYPER-V emulation */ 364 u64 hv_vapic; 365 - }; 366 367 - struct kvm_mem_alias { 368 - gfn_t base_gfn; 369 - unsigned long npages; 370 - gfn_t target_gfn; 371 - #define KVM_ALIAS_INVALID 1UL 372 - unsigned long flags; 373 - }; 374 - 375 - #define KVM_ARCH_HAS_UNALIAS_INSTANTIATION 376 - 377 - struct kvm_mem_aliases { 378 - struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; 379 - int naliases; 380 }; 381 382 struct kvm_arch { 383 - struct kvm_mem_aliases *aliases; 384 - 385 unsigned int n_free_mmu_pages; 386 unsigned int n_requested_mmu_pages; 387 unsigned int n_alloc_mmu_pages; ··· 520 521 void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); 522 523 const struct trace_print_flags *exit_reasons_str; 524 }; 525 ··· 565 #define EMULTYPE_SKIP (1 << 2) 566 int emulate_instruction(struct kvm_vcpu *vcpu, 567 unsigned long cr2, u16 error_code, int emulation_type); 568 - void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); 569 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); 570 void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); 571 ··· 579 int kvm_emulate_halt(struct kvm_vcpu *vcpu); 580 int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); 581 int emulate_clts(struct kvm_vcpu *vcpu); 582 - int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, 583 - unsigned long *dest); 584 - int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, 585 - unsigned long value); 586 587 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); 588 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); ··· 587 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, 588 bool has_error_code, u32 error_code); 589 590 - void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); 591 - void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); 592 - void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); 593 void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); 594 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); 595 int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); 596 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); 597 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); 598 void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); 599 600 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); 601 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); ··· 616 617 void kvm_inject_nmi(struct kvm_vcpu *vcpu); 618 619 - void fx_init(struct kvm_vcpu *vcpu); 620 - 621 - int emulator_write_emulated(unsigned long addr, 622 - const void *val, 623 - unsigned int bytes, 624 - struct kvm_vcpu *vcpu); 625 626 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); 627 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ··· 644 645 int complete_pio(struct kvm_vcpu *vcpu); 646 bool kvm_check_iopl(struct kvm_vcpu *vcpu); 647 - 648 - struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); 649 650 static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) 651 { ··· 697 return value; 698 } 699 #endif 700 - 701 - static inline void kvm_fx_save(struct i387_fxsave_struct *image) 702 - { 703 - asm("fxsave (%0)":: "r" (image)); 704 - } 705 - 706 - static inline void kvm_fx_restore(struct i387_fxsave_struct *image) 707 - { 708 - asm("fxrstor (%0)":: "r" (image)); 709 - } 710 - 711 - static inline void kvm_fx_finit(void) 712 - { 713 - asm("finit"); 714 - } 715 716 static inline u32 get_rdx_init_val(void) 717 {

··· 15 #include <linux/mm.h> 16 #include <linux/mmu_notifier.h> 17 #include <linux/tracepoint.h> 18 + #include <linux/cpumask.h> 19 20 #include <linux/kvm.h> 21 #include <linux/kvm_para.h> ··· 39 0xFFFFFF0000000000ULL) 40 41 #define INVALID_PAGE (~(hpa_t)0) 42 + #define VALID_PAGE(x) ((x) != INVALID_PAGE) 43 + 44 #define UNMAPPED_GVA (~(gpa_t)0) 45 46 /* KVM Hugepage definitions for x86 */ 47 #define KVM_NR_PAGE_SIZES 3 48 + #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) 49 + #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) 50 #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) 51 #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) 52 #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) ··· 68 #define SELECTOR_RPL_MASK 0x03 69 70 #define IOPL_SHIFT 12 71 72 #define KVM_PERMILLE_MMU_PAGES 20 73 #define KVM_MIN_ALLOC_MMU_PAGES 64 ··· 241 void (*prefetch_page)(struct kvm_vcpu *vcpu, 242 struct kvm_mmu_page *page); 243 int (*sync_page)(struct kvm_vcpu *vcpu, 244 + struct kvm_mmu_page *sp, bool clear_unsync); 245 void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); 246 hpa_t root_hpa; 247 int root_level; ··· 301 unsigned long mmu_seq; 302 } update_pte; 303 304 + struct fpu guest_fpu; 305 + u64 xcr0; 306 307 gva_t mmio_fault_cr2; 308 struct kvm_pio_request pio; ··· 360 361 /* fields used by HYPER-V emulation */ 362 u64 hv_vapic; 363 364 + cpumask_var_t wbinvd_dirty_mask; 365 }; 366 367 struct kvm_arch { 368 unsigned int n_free_mmu_pages; 369 unsigned int n_requested_mmu_pages; 370 unsigned int n_alloc_mmu_pages; ··· 533 534 void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); 535 536 + bool (*has_wbinvd_exit)(void); 537 + 538 const struct trace_print_flags *exit_reasons_str; 539 }; 540 ··· 576 #define EMULTYPE_SKIP (1 << 2) 577 int emulate_instruction(struct kvm_vcpu *vcpu, 578 unsigned long cr2, u16 error_code, int emulation_type); 579 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); 580 void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); 581 ··· 591 int kvm_emulate_halt(struct kvm_vcpu *vcpu); 592 int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); 593 int emulate_clts(struct kvm_vcpu *vcpu); 594 + int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); 595 596 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); 597 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); ··· 602 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, 603 bool has_error_code, u32 error_code); 604 605 + int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); 606 + int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); 607 + int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); 608 void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); 609 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); 610 int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); 611 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); 612 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); 613 void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); 614 + int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr); 615 616 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); 617 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); ··· 630 631 void kvm_inject_nmi(struct kvm_vcpu *vcpu); 632 633 + int fx_init(struct kvm_vcpu *vcpu); 634 635 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); 636 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ··· 663 664 int complete_pio(struct kvm_vcpu *vcpu); 665 bool kvm_check_iopl(struct kvm_vcpu *vcpu); 666 667 static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) 668 { ··· 718 return value; 719 } 720 #endif 721 722 static inline u32 get_rdx_init_val(void) 723 {

+2

arch/x86/include/asm/msr-index.h

··· 20 #define _EFER_LMA 10 /* Long mode active (read-only) */ 21 #define _EFER_NX 11 /* No execute enable */ 22 #define _EFER_SVME 12 /* Enable virtualization */ 23 #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ 24 25 #define EFER_SCE (1<<_EFER_SCE) ··· 28 #define EFER_LMA (1<<_EFER_LMA) 29 #define EFER_NX (1<<_EFER_NX) 30 #define EFER_SVME (1<<_EFER_SVME) 31 #define EFER_FFXSR (1<<_EFER_FFXSR) 32 33 /* Intel MSRs. Some also available on other CPUs */

··· 20 #define _EFER_LMA 10 /* Long mode active (read-only) */ 21 #define _EFER_NX 11 /* No execute enable */ 22 #define _EFER_SVME 12 /* Enable virtualization */ 23 + #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ 24 #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ 25 26 #define EFER_SCE (1<<_EFER_SCE) ··· 27 #define EFER_LMA (1<<_EFER_LMA) 28 #define EFER_NX (1<<_EFER_NX) 29 #define EFER_SVME (1<<_EFER_SVME) 30 + #define EFER_LMSLE (1<<_EFER_LMSLE) 31 #define EFER_FFXSR (1<<_EFER_FFXSR) 32 33 /* Intel MSRs. Some also available on other CPUs */

+5

arch/x86/include/asm/vmx.h

··· 257 #define EXIT_REASON_IO_INSTRUCTION 30 258 #define EXIT_REASON_MSR_READ 31 259 #define EXIT_REASON_MSR_WRITE 32 260 #define EXIT_REASON_MWAIT_INSTRUCTION 36 261 #define EXIT_REASON_MONITOR_INSTRUCTION 39 262 #define EXIT_REASON_PAUSE_INSTRUCTION 40 ··· 267 #define EXIT_REASON_EPT_VIOLATION 48 268 #define EXIT_REASON_EPT_MISCONFIG 49 269 #define EXIT_REASON_WBINVD 54 270 271 /* 272 * Interruption-information format ··· 376 #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) 377 #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) 378 #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) 379 380 #define VMX_EPT_DEFAULT_GAW 3 381 #define VMX_EPT_MAX_GAW 0x4

··· 257 #define EXIT_REASON_IO_INSTRUCTION 30 258 #define EXIT_REASON_MSR_READ 31 259 #define EXIT_REASON_MSR_WRITE 32 260 + #define EXIT_REASON_INVALID_STATE 33 261 #define EXIT_REASON_MWAIT_INSTRUCTION 36 262 #define EXIT_REASON_MONITOR_INSTRUCTION 39 263 #define EXIT_REASON_PAUSE_INSTRUCTION 40 ··· 266 #define EXIT_REASON_EPT_VIOLATION 48 267 #define EXIT_REASON_EPT_MISCONFIG 49 268 #define EXIT_REASON_WBINVD 54 269 + #define EXIT_REASON_XSETBV 55 270 271 /* 272 * Interruption-information format ··· 374 #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) 375 #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) 376 #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) 377 + 378 + #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */ 379 + #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ 380 381 #define VMX_EPT_DEFAULT_GAW 3 382 #define VMX_EPT_MAX_GAW 0x4

+6

arch/x86/include/asm/xsave.h

··· 13 14 #define FXSAVE_SIZE 512 15 16 /* 17 * These are the features that the OS can handle currently. 18 */

··· 13 14 #define FXSAVE_SIZE 512 15 16 + #define XSAVE_HDR_SIZE 64 17 + #define XSAVE_HDR_OFFSET FXSAVE_SIZE 18 + 19 + #define XSAVE_YMM_SIZE 256 20 + #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) 21 + 22 /* 23 * These are the features that the OS can handle currently. 24 */

+2 -1

arch/x86/kernel/i387.c

··· 107 } 108 #endif /* CONFIG_X86_64 */ 109 110 - static void fpu_finit(struct fpu *fpu) 111 { 112 #ifdef CONFIG_X86_32 113 if (!HAVE_HWFP) { ··· 132 fp->fos = 0xffff0000u; 133 } 134 } 135 136 /* 137 * The _current_ task is using the FPU for the first time

··· 107 } 108 #endif /* CONFIG_X86_64 */ 109 110 + void fpu_finit(struct fpu *fpu) 111 { 112 #ifdef CONFIG_X86_32 113 if (!HAVE_HWFP) { ··· 132 fp->fos = 0xffff0000u; 133 } 134 } 135 + EXPORT_SYMBOL_GPL(fpu_finit); 136 137 /* 138 * The _current_ task is using the FPU for the first time

+1

arch/x86/kernel/process.c

··· 28 EXPORT_SYMBOL(idle_nomwait); 29 30 struct kmem_cache *task_xstate_cachep; 31 32 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 33 {

··· 28 EXPORT_SYMBOL(idle_nomwait); 29 30 struct kmem_cache *task_xstate_cachep; 31 + EXPORT_SYMBOL_GPL(task_xstate_cachep); 32 33 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 34 {

+422 -327

arch/x86/kvm/emulate.c

··· 9 * privileged instructions: 10 * 11 * Copyright (C) 2006 Qumranet 12 * 13 * Avi Kivity <avi@qumranet.com> 14 * Yaniv Kamay <yaniv@qumranet.com> ··· 68 #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ 69 #define SrcImmU (9<<4) /* Immediate operand, unsigned */ 70 #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ 71 #define SrcMask (0xf<<4) 72 /* Generic ModRM decode. */ 73 #define ModRM (1<<8) ··· 92 #define Src2CL (1<<29) 93 #define Src2ImmByte (2<<29) 94 #define Src2One (3<<29) 95 - #define Src2Imm16 (4<<29) 96 - #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be 97 - in memory and second argument is located 98 - immediately after the first one in memory. */ 99 #define Src2Mask (7<<29) 100 101 enum { ··· 124 /* 0x20 - 0x27 */ 125 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 126 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 127 - DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, 128 /* 0x28 - 0x2F */ 129 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 130 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 131 - 0, 0, 0, 0, 132 /* 0x30 - 0x37 */ 133 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 134 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 135 - 0, 0, 0, 0, 136 /* 0x38 - 0x3F */ 137 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 138 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, ··· 170 /* 0x88 - 0x8F */ 171 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, 172 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, 173 - DstMem | SrcReg | ModRM | Mov, ModRM | DstReg, 174 - DstReg | SrcMem | ModRM | Mov, Group | Group1A, 175 /* 0x90 - 0x97 */ 176 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, 177 /* 0x98 - 0x9F */ 178 - 0, 0, SrcImm | Src2Imm16 | No64, 0, 179 ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, 180 /* 0xA0 - 0xA7 */ 181 - ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, 182 - ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, 183 ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, 184 ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, 185 /* 0xA8 - 0xAF */ 186 - 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String, 187 ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, 188 ByteOp | DstDI | String, DstDI | String, 189 /* 0xB0 - 0xB7 */ ··· 215 ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, 216 /* 0xE8 - 0xEF */ 217 SrcImm | Stack, SrcImm | ImplicitOps, 218 - SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, 219 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, 220 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, 221 /* 0xF0 - 0xF7 */ ··· 337 [Group1A*8] = 338 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, 339 [Group3_Byte*8] = 340 - ByteOp | SrcImm | DstMem | ModRM, 0, 341 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, 342 0, 0, 0, 0, 343 [Group3*8] = 344 - DstMem | SrcImm | ModRM, 0, 345 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 346 0, 0, 0, 0, 347 [Group4*8] = 348 - ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, 349 0, 0, 0, 0, 0, 0, 350 [Group5*8] = 351 - DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 352 SrcMem | ModRM | Stack, 0, 353 - SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps, 354 SrcMem | ModRM | Stack, 0, 355 [Group7*8] = 356 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, ··· 576 (_type)_x; \ 577 }) 578 579 static inline unsigned long ad_mask(struct decode_cache *c) 580 { 581 return (1UL << (c->ad_bytes << 3)) - 1; ··· 624 c->seg_override = seg; 625 } 626 627 - static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) 628 { 629 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) 630 return 0; 631 632 - return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg); 633 } 634 635 static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, 636 struct decode_cache *c) 637 { 638 if (!c->has_seg_override) 639 return 0; 640 641 - return seg_base(ctxt, c->seg_override); 642 } 643 644 - static unsigned long es_base(struct x86_emulate_ctxt *ctxt) 645 { 646 - return seg_base(ctxt, VCPU_SREG_ES); 647 } 648 649 - static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) 650 { 651 - return seg_base(ctxt, VCPU_SREG_SS); 652 } 653 654 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, ··· 974 /* we cannot decode insn before we complete previous rep insn */ 975 WARN_ON(ctxt->restart); 976 977 - /* Shadow copy of register state. Committed on successful emulation. */ 978 - memset(c, 0, sizeof(struct decode_cache)); 979 c->eip = ctxt->eip; 980 c->fetch.start = c->fetch.end = c->eip; 981 - ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); 982 - memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); 983 984 switch (mode) { 985 case X86EMUL_MODE_REAL: ··· 1099 set_seg_override(c, VCPU_SREG_DS); 1100 1101 if (!(!c->twobyte && c->b == 0x8d)) 1102 - c->modrm_ea += seg_override_base(ctxt, c); 1103 1104 if (c->ad_bytes != 8) 1105 c->modrm_ea = (u32)c->modrm_ea; ··· 1187 else 1188 c->src.val = insn_fetch(u8, 1, c->eip); 1189 break; 1190 case SrcOne: 1191 c->src.bytes = 1; 1192 c->src.val = 1; ··· 1214 c->src.type = OP_MEM; 1215 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1216 c->src.ptr = (unsigned long *) 1217 - register_address(c, seg_override_base(ctxt, c), 1218 c->regs[VCPU_REGS_RSI]); 1219 c->src.val = 0; 1220 break; 1221 } 1222 ··· 1248 c->src2.bytes = 1; 1249 c->src2.val = insn_fetch(u8, 1, c->eip); 1250 break; 1251 - case Src2Imm16: 1252 - c->src2.type = OP_IMM; 1253 - c->src2.ptr = (unsigned long *)c->eip; 1254 - c->src2.bytes = 2; 1255 - c->src2.val = insn_fetch(u16, 2, c->eip); 1256 - break; 1257 case Src2One: 1258 c->src2.bytes = 1; 1259 c->src2.val = 1; 1260 - break; 1261 - case Src2Mem16: 1262 - c->src2.type = OP_MEM; 1263 - c->src2.bytes = 2; 1264 - c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes); 1265 - c->src2.val = 0; 1266 break; 1267 } 1268 ··· 1310 c->dst.type = OP_MEM; 1311 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1312 c->dst.ptr = (unsigned long *) 1313 - register_address(c, es_base(ctxt), 1314 c->regs[VCPU_REGS_RDI]); 1315 c->dst.val = 0; 1316 break; ··· 1318 1319 done: 1320 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; 1321 } 1322 1323 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, ··· 1418 get_descriptor_table_ptr(ctxt, ops, selector, &dt); 1419 1420 if (dt.size < index * 8 + 7) { 1421 - kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); 1422 return X86EMUL_PROPAGATE_FAULT; 1423 } 1424 addr = dt.address + index * 8; 1425 ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); 1426 if (ret == X86EMUL_PROPAGATE_FAULT) 1427 - kvm_inject_page_fault(ctxt->vcpu, addr, err); 1428 1429 return ret; 1430 } ··· 1443 get_descriptor_table_ptr(ctxt, ops, selector, &dt); 1444 1445 if (dt.size < index * 8 + 7) { 1446 - kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); 1447 return X86EMUL_PROPAGATE_FAULT; 1448 } 1449 1450 addr = dt.address + index * 8; 1451 ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); 1452 if (ret == X86EMUL_PROPAGATE_FAULT) 1453 - kvm_inject_page_fault(ctxt->vcpu, addr, err); 1454 1455 return ret; 1456 } ··· 1569 ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); 1570 return X86EMUL_CONTINUE; 1571 exception: 1572 - kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code); 1573 return X86EMUL_PROPAGATE_FAULT; 1574 } 1575 1576 - static inline void emulate_push(struct x86_emulate_ctxt *ctxt) 1577 { 1578 struct decode_cache *c = &ctxt->decode; 1579 ··· 1640 c->dst.bytes = c->op_bytes; 1641 c->dst.val = c->src.val; 1642 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); 1643 - c->dst.ptr = (void *) register_address(c, ss_base(ctxt), 1644 c->regs[VCPU_REGS_RSP]); 1645 } 1646 ··· 1651 struct decode_cache *c = &ctxt->decode; 1652 int rc; 1653 1654 - rc = ops->read_emulated(register_address(c, ss_base(ctxt), 1655 - c->regs[VCPU_REGS_RSP]), 1656 - dest, len, ctxt->vcpu); 1657 if (rc != X86EMUL_CONTINUE) 1658 return rc; 1659 ··· 1688 break; 1689 case X86EMUL_MODE_VM86: 1690 if (iopl < 3) { 1691 - kvm_inject_gp(ctxt->vcpu, 0); 1692 return X86EMUL_PROPAGATE_FAULT; 1693 } 1694 change_mask |= EFLG_IF; ··· 1704 return rc; 1705 } 1706 1707 - static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) 1708 { 1709 struct decode_cache *c = &ctxt->decode; 1710 - struct kvm_segment segment; 1711 1712 - kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg); 1713 1714 - c->src.val = segment.selector; 1715 - emulate_push(ctxt); 1716 } 1717 1718 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, ··· 1729 return rc; 1730 } 1731 1732 - static void emulate_pusha(struct x86_emulate_ctxt *ctxt) 1733 { 1734 struct decode_cache *c = &ctxt->decode; 1735 unsigned long old_esp = c->regs[VCPU_REGS_RSP]; 1736 int reg = VCPU_REGS_RAX; 1737 1738 while (reg <= VCPU_REGS_RDI) { 1739 (reg == VCPU_REGS_RSP) ? 1740 (c->src.val = old_esp) : (c->src.val = c->regs[reg]); 1741 1742 - emulate_push(ctxt); 1743 ++reg; 1744 } 1745 } 1746 1747 static int emulate_popa(struct x86_emulate_ctxt *ctxt, ··· 1853 old_eip = c->eip; 1854 c->eip = c->src.val; 1855 c->src.val = old_eip; 1856 - emulate_push(ctxt); 1857 break; 1858 } 1859 case 4: /* jmp abs */ 1860 c->eip = c->src.val; 1861 break; 1862 case 6: /* push */ 1863 - emulate_push(ctxt); 1864 break; 1865 } 1866 return X86EMUL_CONTINUE; ··· 1906 return rc; 1907 } 1908 1909 - static inline int writeback(struct x86_emulate_ctxt *ctxt, 1910 - struct x86_emulate_ops *ops) 1911 - { 1912 - int rc; 1913 - struct decode_cache *c = &ctxt->decode; 1914 - 1915 - switch (c->dst.type) { 1916 - case OP_REG: 1917 - /* The 4-byte case *is* correct: 1918 - * in 64-bit mode we zero-extend. 1919 - */ 1920 - switch (c->dst.bytes) { 1921 - case 1: 1922 - *(u8 *)c->dst.ptr = (u8)c->dst.val; 1923 - break; 1924 - case 2: 1925 - *(u16 *)c->dst.ptr = (u16)c->dst.val; 1926 - break; 1927 - case 4: 1928 - *c->dst.ptr = (u32)c->dst.val; 1929 - break; /* 64b: zero-ext */ 1930 - case 8: 1931 - *c->dst.ptr = c->dst.val; 1932 - break; 1933 - } 1934 - break; 1935 - case OP_MEM: 1936 - if (c->lock_prefix) 1937 - rc = ops->cmpxchg_emulated( 1938 - (unsigned long)c->dst.ptr, 1939 - &c->dst.orig_val, 1940 - &c->dst.val, 1941 - c->dst.bytes, 1942 - ctxt->vcpu); 1943 - else 1944 - rc = ops->write_emulated( 1945 - (unsigned long)c->dst.ptr, 1946 - &c->dst.val, 1947 - c->dst.bytes, 1948 - ctxt->vcpu); 1949 - if (rc != X86EMUL_CONTINUE) 1950 - return rc; 1951 - break; 1952 - case OP_NONE: 1953 - /* no writeback */ 1954 - break; 1955 - default: 1956 - break; 1957 - } 1958 - return X86EMUL_CONTINUE; 1959 - } 1960 - 1961 - static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) 1962 - { 1963 - u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); 1964 - /* 1965 - * an sti; sti; sequence only disable interrupts for the first 1966 - * instruction. So, if the last instruction, be it emulated or 1967 - * not, left the system with the INT_STI flag enabled, it 1968 - * means that the last instruction is an sti. We should not 1969 - * leave the flag on in this case. The same goes for mov ss 1970 - */ 1971 - if (!(int_shadow & mask)) 1972 - ctxt->interruptibility = mask; 1973 - } 1974 - 1975 static inline void 1976 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, 1977 - struct kvm_segment *cs, struct kvm_segment *ss) 1978 { 1979 - memset(cs, 0, sizeof(struct kvm_segment)); 1980 - kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS); 1981 - memset(ss, 0, sizeof(struct kvm_segment)); 1982 1983 cs->l = 0; /* will be adjusted later */ 1984 - cs->base = 0; /* flat segment */ 1985 cs->g = 1; /* 4kb granularity */ 1986 - cs->limit = 0xffffffff; /* 4GB limit */ 1987 cs->type = 0x0b; /* Read, Execute, Accessed */ 1988 cs->s = 1; 1989 cs->dpl = 0; /* will be adjusted later */ 1990 - cs->present = 1; 1991 - cs->db = 1; 1992 1993 - ss->unusable = 0; 1994 - ss->base = 0; /* flat segment */ 1995 - ss->limit = 0xffffffff; /* 4GB limit */ 1996 ss->g = 1; /* 4kb granularity */ 1997 ss->s = 1; 1998 ss->type = 0x03; /* Read/Write, Accessed */ 1999 - ss->db = 1; /* 32bit stack segment */ 2000 ss->dpl = 0; 2001 - ss->present = 1; 2002 } 2003 2004 static int 2005 - emulate_syscall(struct x86_emulate_ctxt *ctxt) 2006 { 2007 struct decode_cache *c = &ctxt->decode; 2008 - struct kvm_segment cs, ss; 2009 u64 msr_data; 2010 2011 /* syscall is not available in real mode */ 2012 if (ctxt->mode == X86EMUL_MODE_REAL || 2013 ctxt->mode == X86EMUL_MODE_VM86) { 2014 - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 2015 return X86EMUL_PROPAGATE_FAULT; 2016 } 2017 2018 - setup_syscalls_segments(ctxt, &cs, &ss); 2019 2020 - kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); 2021 msr_data >>= 32; 2022 - cs.selector = (u16)(msr_data & 0xfffc); 2023 - ss.selector = (u16)(msr_data + 8); 2024 2025 if (is_long_mode(ctxt->vcpu)) { 2026 - cs.db = 0; 2027 cs.l = 1; 2028 } 2029 - kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); 2030 - kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); 2031 2032 c->regs[VCPU_REGS_RCX] = c->eip; 2033 if (is_long_mode(ctxt->vcpu)) { 2034 #ifdef CONFIG_X86_64 2035 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; 2036 2037 - kvm_x86_ops->get_msr(ctxt->vcpu, 2038 - ctxt->mode == X86EMUL_MODE_PROT64 ? 2039 - MSR_LSTAR : MSR_CSTAR, &msr_data); 2040 c->eip = msr_data; 2041 2042 - kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); 2043 ctxt->eflags &= ~(msr_data | EFLG_RF); 2044 #endif 2045 } else { 2046 /* legacy mode */ 2047 - kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); 2048 c->eip = (u32)msr_data; 2049 2050 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); ··· 1991 } 1992 1993 static int 1994 - emulate_sysenter(struct x86_emulate_ctxt *ctxt) 1995 { 1996 struct decode_cache *c = &ctxt->decode; 1997 - struct kvm_segment cs, ss; 1998 u64 msr_data; 1999 2000 /* inject #GP if in real mode */ 2001 if (ctxt->mode == X86EMUL_MODE_REAL) { 2002 - kvm_inject_gp(ctxt->vcpu, 0); 2003 return X86EMUL_PROPAGATE_FAULT; 2004 } 2005 ··· 2008 * Therefore, we inject an #UD. 2009 */ 2010 if (ctxt->mode == X86EMUL_MODE_PROT64) { 2011 - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 2012 return X86EMUL_PROPAGATE_FAULT; 2013 } 2014 2015 - setup_syscalls_segments(ctxt, &cs, &ss); 2016 2017 - kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); 2018 switch (ctxt->mode) { 2019 case X86EMUL_MODE_PROT32: 2020 if ((msr_data & 0xfffc) == 0x0) { 2021 - kvm_inject_gp(ctxt->vcpu, 0); 2022 return X86EMUL_PROPAGATE_FAULT; 2023 } 2024 break; 2025 case X86EMUL_MODE_PROT64: 2026 if (msr_data == 0x0) { 2027 - kvm_inject_gp(ctxt->vcpu, 0); 2028 return X86EMUL_PROPAGATE_FAULT; 2029 } 2030 break; 2031 } 2032 2033 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); 2034 - cs.selector = (u16)msr_data; 2035 - cs.selector &= ~SELECTOR_RPL_MASK; 2036 - ss.selector = cs.selector + 8; 2037 - ss.selector &= ~SELECTOR_RPL_MASK; 2038 if (ctxt->mode == X86EMUL_MODE_PROT64 2039 || is_long_mode(ctxt->vcpu)) { 2040 - cs.db = 0; 2041 cs.l = 1; 2042 } 2043 2044 - kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); 2045 - kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); 2046 2047 - kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); 2048 c->eip = msr_data; 2049 2050 - kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); 2051 c->regs[VCPU_REGS_RSP] = msr_data; 2052 2053 return X86EMUL_CONTINUE; 2054 } 2055 2056 static int 2057 - emulate_sysexit(struct x86_emulate_ctxt *ctxt) 2058 { 2059 struct decode_cache *c = &ctxt->decode; 2060 - struct kvm_segment cs, ss; 2061 u64 msr_data; 2062 int usermode; 2063 2064 /* inject #GP if in real mode or Virtual 8086 mode */ 2065 if (ctxt->mode == X86EMUL_MODE_REAL || 2066 ctxt->mode == X86EMUL_MODE_VM86) { 2067 - kvm_inject_gp(ctxt->vcpu, 0); 2068 return X86EMUL_PROPAGATE_FAULT; 2069 } 2070 2071 - setup_syscalls_segments(ctxt, &cs, &ss); 2072 2073 if ((c->rex_prefix & 0x8) != 0x0) 2074 usermode = X86EMUL_MODE_PROT64; ··· 2080 2081 cs.dpl = 3; 2082 ss.dpl = 3; 2083 - kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); 2084 switch (usermode) { 2085 case X86EMUL_MODE_PROT32: 2086 - cs.selector = (u16)(msr_data + 16); 2087 if ((msr_data & 0xfffc) == 0x0) { 2088 - kvm_inject_gp(ctxt->vcpu, 0); 2089 return X86EMUL_PROPAGATE_FAULT; 2090 } 2091 - ss.selector = (u16)(msr_data + 24); 2092 break; 2093 case X86EMUL_MODE_PROT64: 2094 - cs.selector = (u16)(msr_data + 32); 2095 if (msr_data == 0x0) { 2096 - kvm_inject_gp(ctxt->vcpu, 0); 2097 return X86EMUL_PROPAGATE_FAULT; 2098 } 2099 - ss.selector = cs.selector + 8; 2100 - cs.db = 0; 2101 cs.l = 1; 2102 break; 2103 } 2104 - cs.selector |= SELECTOR_RPL_MASK; 2105 - ss.selector |= SELECTOR_RPL_MASK; 2106 2107 - kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); 2108 - kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); 2109 2110 - c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX]; 2111 - c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX]; 2112 2113 return X86EMUL_CONTINUE; 2114 } ··· 2131 struct x86_emulate_ops *ops, 2132 u16 port, u16 len) 2133 { 2134 - struct kvm_segment tr_seg; 2135 int r; 2136 u16 io_bitmap_ptr; 2137 u8 perm, bit_idx = port & 0x7; 2138 unsigned mask = (1 << len) - 1; 2139 2140 - kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR); 2141 - if (tr_seg.unusable) 2142 return false; 2143 - if (tr_seg.limit < 103) 2144 return false; 2145 - r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, 2146 - NULL); 2147 if (r != X86EMUL_CONTINUE) 2148 return false; 2149 - if (io_bitmap_ptr + port/8 > tr_seg.limit) 2150 return false; 2151 - r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1, 2152 - ctxt->vcpu, NULL); 2153 if (r != X86EMUL_CONTINUE) 2154 return false; 2155 if ((perm >> bit_idx) & mask) ··· 2165 if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) 2166 return false; 2167 return true; 2168 - } 2169 - 2170 - static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt, 2171 - struct x86_emulate_ops *ops, 2172 - int seg) 2173 - { 2174 - struct desc_struct desc; 2175 - if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu)) 2176 - return get_desc_base(&desc); 2177 - else 2178 - return ~0; 2179 } 2180 2181 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, ··· 2255 &err); 2256 if (ret == X86EMUL_PROPAGATE_FAULT) { 2257 /* FIXME: need to provide precise fault address */ 2258 - kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); 2259 return ret; 2260 } 2261 ··· 2265 &err); 2266 if (ret == X86EMUL_PROPAGATE_FAULT) { 2267 /* FIXME: need to provide precise fault address */ 2268 - kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); 2269 return ret; 2270 } 2271 ··· 2273 &err); 2274 if (ret == X86EMUL_PROPAGATE_FAULT) { 2275 /* FIXME: need to provide precise fault address */ 2276 - kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); 2277 return ret; 2278 } 2279 ··· 2286 ctxt->vcpu, &err); 2287 if (ret == X86EMUL_PROPAGATE_FAULT) { 2288 /* FIXME: need to provide precise fault address */ 2289 - kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); 2290 return ret; 2291 } 2292 } ··· 2328 struct decode_cache *c = &ctxt->decode; 2329 int ret; 2330 2331 - ops->set_cr(3, tss->cr3, ctxt->vcpu); 2332 c->eip = tss->eip; 2333 ctxt->eflags = tss->eflags | 2; 2334 c->regs[VCPU_REGS_RAX] = tss->eax; ··· 2397 &err); 2398 if (ret == X86EMUL_PROPAGATE_FAULT) { 2399 /* FIXME: need to provide precise fault address */ 2400 - kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); 2401 return ret; 2402 } 2403 ··· 2407 &err); 2408 if (ret == X86EMUL_PROPAGATE_FAULT) { 2409 /* FIXME: need to provide precise fault address */ 2410 - kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); 2411 return ret; 2412 } 2413 ··· 2415 &err); 2416 if (ret == X86EMUL_PROPAGATE_FAULT) { 2417 /* FIXME: need to provide precise fault address */ 2418 - kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); 2419 return ret; 2420 } 2421 ··· 2428 ctxt->vcpu, &err); 2429 if (ret == X86EMUL_PROPAGATE_FAULT) { 2430 /* FIXME: need to provide precise fault address */ 2431 - kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); 2432 return ret; 2433 } 2434 } ··· 2445 int ret; 2446 u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); 2447 ulong old_tss_base = 2448 - get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR); 2449 u32 desc_limit; 2450 2451 /* FIXME: old_tss_base == ~0 ? */ ··· 2462 if (reason != TASK_SWITCH_IRET) { 2463 if ((tss_selector & 3) > next_tss_desc.dpl || 2464 ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { 2465 - kvm_inject_gp(ctxt->vcpu, 0); 2466 return X86EMUL_PROPAGATE_FAULT; 2467 } 2468 } ··· 2471 if (!next_tss_desc.p || 2472 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || 2473 desc_limit < 0x2b)) { 2474 - kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR, 2475 - tss_selector & 0xfffc); 2476 return X86EMUL_PROPAGATE_FAULT; 2477 } 2478 ··· 2517 c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; 2518 c->lock_prefix = 0; 2519 c->src.val = (unsigned long) error_code; 2520 - emulate_push(ctxt); 2521 } 2522 2523 return ret; ··· 2531 struct decode_cache *c = &ctxt->decode; 2532 int rc; 2533 2534 - memset(c, 0, sizeof(struct decode_cache)); 2535 c->eip = ctxt->eip; 2536 - memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); 2537 c->dst.type = OP_NONE; 2538 2539 rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, 2540 has_error_code, error_code); 2541 2542 if (rc == X86EMUL_CONTINUE) { 2543 - memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); 2544 - kvm_rip_write(ctxt->vcpu, c->eip); 2545 rc = writeback(ctxt, ops); 2546 } 2547 2548 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; ··· 2564 int rc = X86EMUL_CONTINUE; 2565 int saved_dst_type = c->dst.type; 2566 2567 - ctxt->interruptibility = 0; 2568 - 2569 - /* Shadow copy of register state. Committed on successful emulation. 2570 - * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't 2571 - * modify them. 2572 - */ 2573 - 2574 - memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); 2575 2576 if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { 2577 - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 2578 goto done; 2579 } 2580 2581 /* LOCK prefix is allowed only with some instructions */ 2582 if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { 2583 - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 2584 goto done; 2585 } 2586 2587 /* Privileged instruction can be executed only in CPL=0 */ 2588 if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { 2589 - kvm_inject_gp(ctxt->vcpu, 0); 2590 goto done; 2591 } 2592 ··· 2589 if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { 2590 string_done: 2591 ctxt->restart = false; 2592 - kvm_rip_write(ctxt->vcpu, c->eip); 2593 goto done; 2594 } 2595 /* The second termination condition only applies for REPE ··· 2612 } 2613 2614 if (c->src.type == OP_MEM) { 2615 - rc = ops->read_emulated((unsigned long)c->src.ptr, 2616 - &c->src.val, 2617 - c->src.bytes, 2618 - ctxt->vcpu); 2619 if (rc != X86EMUL_CONTINUE) 2620 goto done; 2621 c->src.orig_val = c->src.val; 2622 } 2623 2624 if (c->src2.type == OP_MEM) { 2625 - rc = ops->read_emulated((unsigned long)c->src2.ptr, 2626 - &c->src2.val, 2627 - c->src2.bytes, 2628 - ctxt->vcpu); 2629 if (rc != X86EMUL_CONTINUE) 2630 goto done; 2631 } ··· 2632 2633 if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { 2634 /* optimisation - avoid slow emulated read if Mov */ 2635 - rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, 2636 - c->dst.bytes, ctxt->vcpu); 2637 if (rc != X86EMUL_CONTINUE) 2638 goto done; 2639 } ··· 2650 emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); 2651 break; 2652 case 0x06: /* push es */ 2653 - emulate_push_sreg(ctxt, VCPU_SREG_ES); 2654 break; 2655 case 0x07: /* pop es */ 2656 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); ··· 2662 emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); 2663 break; 2664 case 0x0e: /* push cs */ 2665 - emulate_push_sreg(ctxt, VCPU_SREG_CS); 2666 break; 2667 case 0x10 ... 0x15: 2668 adc: /* adc */ 2669 emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); 2670 break; 2671 case 0x16: /* push ss */ 2672 - emulate_push_sreg(ctxt, VCPU_SREG_SS); 2673 break; 2674 case 0x17: /* pop ss */ 2675 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); ··· 2681 emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); 2682 break; 2683 case 0x1e: /* push ds */ 2684 - emulate_push_sreg(ctxt, VCPU_SREG_DS); 2685 break; 2686 case 0x1f: /* pop ds */ 2687 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); ··· 2711 emulate_1op("dec", c->dst, ctxt->eflags); 2712 break; 2713 case 0x50 ... 0x57: /* push reg */ 2714 - emulate_push(ctxt); 2715 break; 2716 case 0x58 ... 0x5f: /* pop reg */ 2717 pop_instruction: ··· 2720 goto done; 2721 break; 2722 case 0x60: /* pusha */ 2723 - emulate_pusha(ctxt); 2724 break; 2725 case 0x61: /* popa */ 2726 rc = emulate_popa(ctxt, ops); ··· 2736 break; 2737 case 0x68: /* push imm */ 2738 case 0x6a: /* push imm8 */ 2739 - emulate_push(ctxt); 2740 break; 2741 case 0x6c: /* insb */ 2742 case 0x6d: /* insw/insd */ 2743 c->dst.bytes = min(c->dst.bytes, 4u); 2744 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], 2745 c->dst.bytes)) { 2746 - kvm_inject_gp(ctxt->vcpu, 0); 2747 goto done; 2748 } 2749 if (!pio_in_emulated(ctxt, ops, c->dst.bytes, ··· 2755 c->src.bytes = min(c->src.bytes, 4u); 2756 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], 2757 c->src.bytes)) { 2758 - kvm_inject_gp(ctxt->vcpu, 0); 2759 goto done; 2760 } 2761 ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], ··· 2788 } 2789 break; 2790 case 0x84 ... 0x85: 2791 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); 2792 break; 2793 case 0x86 ... 0x87: /* xchg */ ··· 2817 break; 2818 case 0x88 ... 0x8b: /* mov */ 2819 goto mov; 2820 - case 0x8c: { /* mov r/m, sreg */ 2821 - struct kvm_segment segreg; 2822 - 2823 - if (c->modrm_reg <= VCPU_SREG_GS) 2824 - kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); 2825 - else { 2826 - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 2827 goto done; 2828 } 2829 - c->dst.val = segreg.selector; 2830 break; 2831 - } 2832 case 0x8d: /* lea r16/r32, m */ 2833 c->dst.val = c->modrm_ea; 2834 break; ··· 2834 2835 if (c->modrm_reg == VCPU_SREG_CS || 2836 c->modrm_reg > VCPU_SREG_GS) { 2837 - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 2838 goto done; 2839 } 2840 2841 if (c->modrm_reg == VCPU_SREG_SS) 2842 - toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); 2843 2844 rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); 2845 ··· 2852 goto done; 2853 break; 2854 case 0x90: /* nop / xchg r8,rax */ 2855 - if (!(c->rex_prefix & 1)) { /* nop */ 2856 - c->dst.type = OP_NONE; 2857 break; 2858 } 2859 case 0x91 ... 0x97: /* xchg reg,rax */ 2860 - c->src.type = c->dst.type = OP_REG; 2861 - c->src.bytes = c->dst.bytes = c->op_bytes; 2862 c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX]; 2863 c->src.val = *(c->src.ptr); 2864 goto xchg; 2865 case 0x9c: /* pushf */ 2866 c->src.val = (unsigned long) ctxt->eflags; 2867 - emulate_push(ctxt); 2868 break; 2869 case 0x9d: /* popf */ 2870 c->dst.type = OP_REG; ··· 2874 if (rc != X86EMUL_CONTINUE) 2875 goto done; 2876 break; 2877 - case 0xa0 ... 0xa1: /* mov */ 2878 - c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; 2879 - c->dst.val = c->src.val; 2880 - break; 2881 - case 0xa2 ... 0xa3: /* mov */ 2882 - c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; 2883 - break; 2884 case 0xa4 ... 0xa5: /* movs */ 2885 goto mov; 2886 case 0xa6 ... 0xa7: /* cmps */ 2887 c->dst.type = OP_NONE; /* Disable writeback. */ 2888 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); 2889 goto cmp; 2890 case 0xaa ... 0xab: /* stos */ 2891 c->dst.val = c->regs[VCPU_REGS_RAX]; 2892 break; ··· 2928 long int rel = c->src.val; 2929 c->src.val = (unsigned long) c->eip; 2930 jmp_rel(c, rel); 2931 - emulate_push(ctxt); 2932 break; 2933 } 2934 case 0xe9: /* jmp rel */ 2935 goto jmp; 2936 - case 0xea: /* jmp far */ 2937 jump_far: 2938 - if (load_segment_descriptor(ctxt, ops, c->src2.val, 2939 - VCPU_SREG_CS)) 2940 goto done; 2941 2942 - c->eip = c->src.val; 2943 break; 2944 case 0xeb: 2945 jmp: /* jmp rel short */ 2946 jmp_rel(c, c->src.val); ··· 2956 do_io_in: 2957 c->dst.bytes = min(c->dst.bytes, 4u); 2958 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { 2959 - kvm_inject_gp(ctxt->vcpu, 0); 2960 goto done; 2961 } 2962 if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, 2963 &c->dst.val)) 2964 goto done; /* IO is needed */ 2965 break; 2966 - case 0xee: /* out al,dx */ 2967 - case 0xef: /* out (e/r)ax,dx */ 2968 c->src.val = c->regs[VCPU_REGS_RDX]; 2969 do_io_out: 2970 c->dst.bytes = min(c->dst.bytes, 4u); 2971 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { 2972 - kvm_inject_gp(ctxt->vcpu, 0); 2973 goto done; 2974 } 2975 ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, ··· 2993 c->dst.type = OP_NONE; /* Disable writeback. */ 2994 break; 2995 case 0xfa: /* cli */ 2996 - if (emulator_bad_iopl(ctxt, ops)) 2997 - kvm_inject_gp(ctxt->vcpu, 0); 2998 - else { 2999 ctxt->eflags &= ~X86_EFLAGS_IF; 3000 c->dst.type = OP_NONE; /* Disable writeback. */ 3001 } 3002 break; 3003 case 0xfb: /* sti */ 3004 - if (emulator_bad_iopl(ctxt, ops)) 3005 - kvm_inject_gp(ctxt->vcpu, 0); 3006 - else { 3007 - toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); 3008 ctxt->eflags |= X86_EFLAGS_IF; 3009 c->dst.type = OP_NONE; /* Disable writeback. */ 3010 } ··· 3043 c->dst.type = saved_dst_type; 3044 3045 if ((c->d & SrcMask) == SrcSI) 3046 - string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, 3047 - &c->src); 3048 3049 if ((c->d & DstMask) == DstDI) 3050 - string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); 3051 3052 if (c->rep_prefix && (c->d & String)) { 3053 struct read_cache *rc = &ctxt->decode.io_read; ··· 3061 (rc->end != 0 && rc->end == rc->pos)) 3062 ctxt->restart = false; 3063 } 3064 - 3065 - /* Commit shadow register state. */ 3066 - memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); 3067 - kvm_rip_write(ctxt->vcpu, c->eip); 3068 - ops->set_rflags(ctxt->vcpu, ctxt->eflags); 3069 3070 done: 3071 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; ··· 3132 c->dst.type = OP_NONE; 3133 break; 3134 case 5: /* not defined */ 3135 - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 3136 goto done; 3137 case 7: /* invlpg*/ 3138 emulate_invlpg(ctxt->vcpu, c->modrm_ea); ··· 3144 } 3145 break; 3146 case 0x05: /* syscall */ 3147 - rc = emulate_syscall(ctxt); 3148 if (rc != X86EMUL_CONTINUE) 3149 goto done; 3150 else ··· 3154 emulate_clts(ctxt->vcpu); 3155 c->dst.type = OP_NONE; 3156 break; 3157 - case 0x08: /* invd */ 3158 case 0x09: /* wbinvd */ 3159 case 0x0d: /* GrpP (prefetch) */ 3160 case 0x18: /* Grp16 (prefetch/nop) */ 3161 c->dst.type = OP_NONE; ··· 3168 case 1: 3169 case 5 ... 7: 3170 case 9 ... 15: 3171 - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 3172 goto done; 3173 } 3174 c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); ··· 3177 case 0x21: /* mov from dr to reg */ 3178 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && 3179 (c->modrm_reg == 4 || c->modrm_reg == 5)) { 3180 - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 3181 goto done; 3182 } 3183 - emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); 3184 c->dst.type = OP_NONE; /* no writeback */ 3185 break; 3186 case 0x22: /* mov reg, cr */ 3187 - ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu); 3188 c->dst.type = OP_NONE; 3189 break; 3190 case 0x23: /* mov from reg to dr */ 3191 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && 3192 (c->modrm_reg == 4 || c->modrm_reg == 5)) { 3193 - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 3194 goto done; 3195 } 3196 - emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]); 3197 c->dst.type = OP_NONE; /* no writeback */ 3198 break; 3199 case 0x30: 3200 /* wrmsr */ 3201 msr_data = (u32)c->regs[VCPU_REGS_RAX] 3202 | ((u64)c->regs[VCPU_REGS_RDX] << 32); 3203 - if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { 3204 - kvm_inject_gp(ctxt->vcpu, 0); 3205 goto done; 3206 } 3207 rc = X86EMUL_CONTINUE; ··· 3220 break; 3221 case 0x32: 3222 /* rdmsr */ 3223 - if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { 3224 - kvm_inject_gp(ctxt->vcpu, 0); 3225 goto done; 3226 } else { 3227 c->regs[VCPU_REGS_RAX] = (u32)msr_data; ··· 3231 c->dst.type = OP_NONE; 3232 break; 3233 case 0x34: /* sysenter */ 3234 - rc = emulate_sysenter(ctxt); 3235 if (rc != X86EMUL_CONTINUE) 3236 goto done; 3237 else 3238 goto writeback; 3239 break; 3240 case 0x35: /* sysexit */ 3241 - rc = emulate_sysexit(ctxt); 3242 if (rc != X86EMUL_CONTINUE) 3243 goto done; 3244 else ··· 3255 c->dst.type = OP_NONE; 3256 break; 3257 case 0xa0: /* push fs */ 3258 - emulate_push_sreg(ctxt, VCPU_SREG_FS); 3259 break; 3260 case 0xa1: /* pop fs */ 3261 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); ··· 3274 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); 3275 break; 3276 case 0xa8: /* push gs */ 3277 - emulate_push_sreg(ctxt, VCPU_SREG_GS); 3278 break; 3279 case 0xa9: /* pop gs */ 3280 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);

··· 9 * privileged instructions: 10 * 11 * Copyright (C) 2006 Qumranet 12 + * Copyright 2010 Red Hat, Inc. and/or its affilates. 13 * 14 * Avi Kivity <avi@qumranet.com> 15 * Yaniv Kamay <yaniv@qumranet.com> ··· 67 #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ 68 #define SrcImmU (9<<4) /* Immediate operand, unsigned */ 69 #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ 70 + #define SrcImmFAddr (0xb<<4) /* Source is immediate far address */ 71 + #define SrcMemFAddr (0xc<<4) /* Source is far address in memory */ 72 + #define SrcAcc (0xd<<4) /* Source Accumulator */ 73 #define SrcMask (0xf<<4) 74 /* Generic ModRM decode. */ 75 #define ModRM (1<<8) ··· 88 #define Src2CL (1<<29) 89 #define Src2ImmByte (2<<29) 90 #define Src2One (3<<29) 91 #define Src2Mask (7<<29) 92 93 enum { ··· 124 /* 0x20 - 0x27 */ 125 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 126 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 127 + ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, 128 /* 0x28 - 0x2F */ 129 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 130 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 131 + ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, 132 /* 0x30 - 0x37 */ 133 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 134 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 135 + ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, 136 /* 0x38 - 0x3F */ 137 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 138 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, ··· 170 /* 0x88 - 0x8F */ 171 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, 172 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, 173 + DstMem | SrcNone | ModRM | Mov, ModRM | DstReg, 174 + ImplicitOps | SrcMem16 | ModRM, Group | Group1A, 175 /* 0x90 - 0x97 */ 176 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, 177 /* 0x98 - 0x9F */ 178 + 0, 0, SrcImmFAddr | No64, 0, 179 ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, 180 /* 0xA0 - 0xA7 */ 181 + ByteOp | DstAcc | SrcMem | Mov | MemAbs, DstAcc | SrcMem | Mov | MemAbs, 182 + ByteOp | DstMem | SrcAcc | Mov | MemAbs, DstMem | SrcAcc | Mov | MemAbs, 183 ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, 184 ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, 185 /* 0xA8 - 0xAF */ 186 + DstAcc | SrcImmByte | ByteOp, DstAcc | SrcImm, ByteOp | DstDI | Mov | String, DstDI | Mov | String, 187 ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, 188 ByteOp | DstDI | String, DstDI | String, 189 /* 0xB0 - 0xB7 */ ··· 215 ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, 216 /* 0xE8 - 0xEF */ 217 SrcImm | Stack, SrcImm | ImplicitOps, 218 + SrcImmFAddr | No64, SrcImmByte | ImplicitOps, 219 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, 220 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, 221 /* 0xF0 - 0xF7 */ ··· 337 [Group1A*8] = 338 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, 339 [Group3_Byte*8] = 340 + ByteOp | SrcImm | DstMem | ModRM, ByteOp | SrcImm | DstMem | ModRM, 341 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, 342 0, 0, 0, 0, 343 [Group3*8] = 344 + DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 345 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 346 0, 0, 0, 0, 347 [Group4*8] = 348 + ByteOp | DstMem | SrcNone | ModRM | Lock, ByteOp | DstMem | SrcNone | ModRM | Lock, 349 0, 0, 0, 0, 0, 0, 350 [Group5*8] = 351 + DstMem | SrcNone | ModRM | Lock, DstMem | SrcNone | ModRM | Lock, 352 SrcMem | ModRM | Stack, 0, 353 + SrcMem | ModRM | Stack, SrcMemFAddr | ModRM | ImplicitOps, 354 SrcMem | ModRM | Stack, 0, 355 [Group7*8] = 356 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, ··· 576 (_type)_x; \ 577 }) 578 579 + #define insn_fetch_arr(_arr, _size, _eip) \ 580 + ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ 581 + if (rc != X86EMUL_CONTINUE) \ 582 + goto done; \ 583 + (_eip) += (_size); \ 584 + }) 585 + 586 static inline unsigned long ad_mask(struct decode_cache *c) 587 { 588 return (1UL << (c->ad_bytes << 3)) - 1; ··· 617 c->seg_override = seg; 618 } 619 620 + static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, 621 + struct x86_emulate_ops *ops, int seg) 622 { 623 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) 624 return 0; 625 626 + return ops->get_cached_segment_base(seg, ctxt->vcpu); 627 } 628 629 static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, 630 + struct x86_emulate_ops *ops, 631 struct decode_cache *c) 632 { 633 if (!c->has_seg_override) 634 return 0; 635 636 + return seg_base(ctxt, ops, c->seg_override); 637 } 638 639 + static unsigned long es_base(struct x86_emulate_ctxt *ctxt, 640 + struct x86_emulate_ops *ops) 641 { 642 + return seg_base(ctxt, ops, VCPU_SREG_ES); 643 } 644 645 + static unsigned long ss_base(struct x86_emulate_ctxt *ctxt, 646 + struct x86_emulate_ops *ops) 647 { 648 + return seg_base(ctxt, ops, VCPU_SREG_SS); 649 + } 650 + 651 + static void emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, 652 + u32 error, bool valid) 653 + { 654 + ctxt->exception = vec; 655 + ctxt->error_code = error; 656 + ctxt->error_code_valid = valid; 657 + ctxt->restart = false; 658 + } 659 + 660 + static void emulate_gp(struct x86_emulate_ctxt *ctxt, int err) 661 + { 662 + emulate_exception(ctxt, GP_VECTOR, err, true); 663 + } 664 + 665 + static void emulate_pf(struct x86_emulate_ctxt *ctxt, unsigned long addr, 666 + int err) 667 + { 668 + ctxt->cr2 = addr; 669 + emulate_exception(ctxt, PF_VECTOR, err, true); 670 + } 671 + 672 + static void emulate_ud(struct x86_emulate_ctxt *ctxt) 673 + { 674 + emulate_exception(ctxt, UD_VECTOR, 0, false); 675 + } 676 + 677 + static void emulate_ts(struct x86_emulate_ctxt *ctxt, int err) 678 + { 679 + emulate_exception(ctxt, TS_VECTOR, err, true); 680 } 681 682 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, ··· 932 /* we cannot decode insn before we complete previous rep insn */ 933 WARN_ON(ctxt->restart); 934 935 c->eip = ctxt->eip; 936 c->fetch.start = c->fetch.end = c->eip; 937 + ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); 938 939 switch (mode) { 940 case X86EMUL_MODE_REAL: ··· 1060 set_seg_override(c, VCPU_SREG_DS); 1061 1062 if (!(!c->twobyte && c->b == 0x8d)) 1063 + c->modrm_ea += seg_override_base(ctxt, ops, c); 1064 1065 if (c->ad_bytes != 8) 1066 c->modrm_ea = (u32)c->modrm_ea; ··· 1148 else 1149 c->src.val = insn_fetch(u8, 1, c->eip); 1150 break; 1151 + case SrcAcc: 1152 + c->src.type = OP_REG; 1153 + c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1154 + c->src.ptr = &c->regs[VCPU_REGS_RAX]; 1155 + switch (c->src.bytes) { 1156 + case 1: 1157 + c->src.val = *(u8 *)c->src.ptr; 1158 + break; 1159 + case 2: 1160 + c->src.val = *(u16 *)c->src.ptr; 1161 + break; 1162 + case 4: 1163 + c->src.val = *(u32 *)c->src.ptr; 1164 + break; 1165 + case 8: 1166 + c->src.val = *(u64 *)c->src.ptr; 1167 + break; 1168 + } 1169 + break; 1170 case SrcOne: 1171 c->src.bytes = 1; 1172 c->src.val = 1; ··· 1156 c->src.type = OP_MEM; 1157 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1158 c->src.ptr = (unsigned long *) 1159 + register_address(c, seg_override_base(ctxt, ops, c), 1160 c->regs[VCPU_REGS_RSI]); 1161 c->src.val = 0; 1162 + break; 1163 + case SrcImmFAddr: 1164 + c->src.type = OP_IMM; 1165 + c->src.ptr = (unsigned long *)c->eip; 1166 + c->src.bytes = c->op_bytes + 2; 1167 + insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip); 1168 + break; 1169 + case SrcMemFAddr: 1170 + c->src.type = OP_MEM; 1171 + c->src.ptr = (unsigned long *)c->modrm_ea; 1172 + c->src.bytes = c->op_bytes + 2; 1173 break; 1174 } 1175 ··· 1179 c->src2.bytes = 1; 1180 c->src2.val = insn_fetch(u8, 1, c->eip); 1181 break; 1182 case Src2One: 1183 c->src2.bytes = 1; 1184 c->src2.val = 1; 1185 break; 1186 } 1187 ··· 1253 c->dst.type = OP_MEM; 1254 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1255 c->dst.ptr = (unsigned long *) 1256 + register_address(c, es_base(ctxt, ops), 1257 c->regs[VCPU_REGS_RDI]); 1258 c->dst.val = 0; 1259 break; ··· 1261 1262 done: 1263 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; 1264 + } 1265 + 1266 + static int read_emulated(struct x86_emulate_ctxt *ctxt, 1267 + struct x86_emulate_ops *ops, 1268 + unsigned long addr, void *dest, unsigned size) 1269 + { 1270 + int rc; 1271 + struct read_cache *mc = &ctxt->decode.mem_read; 1272 + u32 err; 1273 + 1274 + while (size) { 1275 + int n = min(size, 8u); 1276 + size -= n; 1277 + if (mc->pos < mc->end) 1278 + goto read_cached; 1279 + 1280 + rc = ops->read_emulated(addr, mc->data + mc->end, n, &err, 1281 + ctxt->vcpu); 1282 + if (rc == X86EMUL_PROPAGATE_FAULT) 1283 + emulate_pf(ctxt, addr, err); 1284 + if (rc != X86EMUL_CONTINUE) 1285 + return rc; 1286 + mc->end += n; 1287 + 1288 + read_cached: 1289 + memcpy(dest, mc->data + mc->pos, n); 1290 + mc->pos += n; 1291 + dest += n; 1292 + addr += n; 1293 + } 1294 + return X86EMUL_CONTINUE; 1295 } 1296 1297 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, ··· 1330 get_descriptor_table_ptr(ctxt, ops, selector, &dt); 1331 1332 if (dt.size < index * 8 + 7) { 1333 + emulate_gp(ctxt, selector & 0xfffc); 1334 return X86EMUL_PROPAGATE_FAULT; 1335 } 1336 addr = dt.address + index * 8; 1337 ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); 1338 if (ret == X86EMUL_PROPAGATE_FAULT) 1339 + emulate_pf(ctxt, addr, err); 1340 1341 return ret; 1342 } ··· 1355 get_descriptor_table_ptr(ctxt, ops, selector, &dt); 1356 1357 if (dt.size < index * 8 + 7) { 1358 + emulate_gp(ctxt, selector & 0xfffc); 1359 return X86EMUL_PROPAGATE_FAULT; 1360 } 1361 1362 addr = dt.address + index * 8; 1363 ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); 1364 if (ret == X86EMUL_PROPAGATE_FAULT) 1365 + emulate_pf(ctxt, addr, err); 1366 1367 return ret; 1368 } ··· 1481 ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); 1482 return X86EMUL_CONTINUE; 1483 exception: 1484 + emulate_exception(ctxt, err_vec, err_code, true); 1485 return X86EMUL_PROPAGATE_FAULT; 1486 } 1487 1488 + static inline int writeback(struct x86_emulate_ctxt *ctxt, 1489 + struct x86_emulate_ops *ops) 1490 + { 1491 + int rc; 1492 + struct decode_cache *c = &ctxt->decode; 1493 + u32 err; 1494 + 1495 + switch (c->dst.type) { 1496 + case OP_REG: 1497 + /* The 4-byte case *is* correct: 1498 + * in 64-bit mode we zero-extend. 1499 + */ 1500 + switch (c->dst.bytes) { 1501 + case 1: 1502 + *(u8 *)c->dst.ptr = (u8)c->dst.val; 1503 + break; 1504 + case 2: 1505 + *(u16 *)c->dst.ptr = (u16)c->dst.val; 1506 + break; 1507 + case 4: 1508 + *c->dst.ptr = (u32)c->dst.val; 1509 + break; /* 64b: zero-ext */ 1510 + case 8: 1511 + *c->dst.ptr = c->dst.val; 1512 + break; 1513 + } 1514 + break; 1515 + case OP_MEM: 1516 + if (c->lock_prefix) 1517 + rc = ops->cmpxchg_emulated( 1518 + (unsigned long)c->dst.ptr, 1519 + &c->dst.orig_val, 1520 + &c->dst.val, 1521 + c->dst.bytes, 1522 + &err, 1523 + ctxt->vcpu); 1524 + else 1525 + rc = ops->write_emulated( 1526 + (unsigned long)c->dst.ptr, 1527 + &c->dst.val, 1528 + c->dst.bytes, 1529 + &err, 1530 + ctxt->vcpu); 1531 + if (rc == X86EMUL_PROPAGATE_FAULT) 1532 + emulate_pf(ctxt, 1533 + (unsigned long)c->dst.ptr, err); 1534 + if (rc != X86EMUL_CONTINUE) 1535 + return rc; 1536 + break; 1537 + case OP_NONE: 1538 + /* no writeback */ 1539 + break; 1540 + default: 1541 + break; 1542 + } 1543 + return X86EMUL_CONTINUE; 1544 + } 1545 + 1546 + static inline void emulate_push(struct x86_emulate_ctxt *ctxt, 1547 + struct x86_emulate_ops *ops) 1548 { 1549 struct decode_cache *c = &ctxt->decode; 1550 ··· 1493 c->dst.bytes = c->op_bytes; 1494 c->dst.val = c->src.val; 1495 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); 1496 + c->dst.ptr = (void *) register_address(c, ss_base(ctxt, ops), 1497 c->regs[VCPU_REGS_RSP]); 1498 } 1499 ··· 1504 struct decode_cache *c = &ctxt->decode; 1505 int rc; 1506 1507 + rc = read_emulated(ctxt, ops, register_address(c, ss_base(ctxt, ops), 1508 + c->regs[VCPU_REGS_RSP]), 1509 + dest, len); 1510 if (rc != X86EMUL_CONTINUE) 1511 return rc; 1512 ··· 1541 break; 1542 case X86EMUL_MODE_VM86: 1543 if (iopl < 3) { 1544 + emulate_gp(ctxt, 0); 1545 return X86EMUL_PROPAGATE_FAULT; 1546 } 1547 change_mask |= EFLG_IF; ··· 1557 return rc; 1558 } 1559 1560 + static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, 1561 + struct x86_emulate_ops *ops, int seg) 1562 { 1563 struct decode_cache *c = &ctxt->decode; 1564 1565 + c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); 1566 1567 + emulate_push(ctxt, ops); 1568 } 1569 1570 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, ··· 1583 return rc; 1584 } 1585 1586 + static int emulate_pusha(struct x86_emulate_ctxt *ctxt, 1587 + struct x86_emulate_ops *ops) 1588 { 1589 struct decode_cache *c = &ctxt->decode; 1590 unsigned long old_esp = c->regs[VCPU_REGS_RSP]; 1591 + int rc = X86EMUL_CONTINUE; 1592 int reg = VCPU_REGS_RAX; 1593 1594 while (reg <= VCPU_REGS_RDI) { 1595 (reg == VCPU_REGS_RSP) ? 1596 (c->src.val = old_esp) : (c->src.val = c->regs[reg]); 1597 1598 + emulate_push(ctxt, ops); 1599 + 1600 + rc = writeback(ctxt, ops); 1601 + if (rc != X86EMUL_CONTINUE) 1602 + return rc; 1603 + 1604 ++reg; 1605 } 1606 + 1607 + /* Disable writeback. */ 1608 + c->dst.type = OP_NONE; 1609 + 1610 + return rc; 1611 } 1612 1613 static int emulate_popa(struct x86_emulate_ctxt *ctxt, ··· 1695 old_eip = c->eip; 1696 c->eip = c->src.val; 1697 c->src.val = old_eip; 1698 + emulate_push(ctxt, ops); 1699 break; 1700 } 1701 case 4: /* jmp abs */ 1702 c->eip = c->src.val; 1703 break; 1704 case 6: /* push */ 1705 + emulate_push(ctxt, ops); 1706 break; 1707 } 1708 return X86EMUL_CONTINUE; ··· 1748 return rc; 1749 } 1750 1751 static inline void 1752 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, 1753 + struct x86_emulate_ops *ops, struct desc_struct *cs, 1754 + struct desc_struct *ss) 1755 { 1756 + memset(cs, 0, sizeof(struct desc_struct)); 1757 + ops->get_cached_descriptor(cs, VCPU_SREG_CS, ctxt->vcpu); 1758 + memset(ss, 0, sizeof(struct desc_struct)); 1759 1760 cs->l = 0; /* will be adjusted later */ 1761 + set_desc_base(cs, 0); /* flat segment */ 1762 cs->g = 1; /* 4kb granularity */ 1763 + set_desc_limit(cs, 0xfffff); /* 4GB limit */ 1764 cs->type = 0x0b; /* Read, Execute, Accessed */ 1765 cs->s = 1; 1766 cs->dpl = 0; /* will be adjusted later */ 1767 + cs->p = 1; 1768 + cs->d = 1; 1769 1770 + set_desc_base(ss, 0); /* flat segment */ 1771 + set_desc_limit(ss, 0xfffff); /* 4GB limit */ 1772 ss->g = 1; /* 4kb granularity */ 1773 ss->s = 1; 1774 ss->type = 0x03; /* Read/Write, Accessed */ 1775 + ss->d = 1; /* 32bit stack segment */ 1776 ss->dpl = 0; 1777 + ss->p = 1; 1778 } 1779 1780 static int 1781 + emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) 1782 { 1783 struct decode_cache *c = &ctxt->decode; 1784 + struct desc_struct cs, ss; 1785 u64 msr_data; 1786 + u16 cs_sel, ss_sel; 1787 1788 /* syscall is not available in real mode */ 1789 if (ctxt->mode == X86EMUL_MODE_REAL || 1790 ctxt->mode == X86EMUL_MODE_VM86) { 1791 + emulate_ud(ctxt); 1792 return X86EMUL_PROPAGATE_FAULT; 1793 } 1794 1795 + setup_syscalls_segments(ctxt, ops, &cs, &ss); 1796 1797 + ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); 1798 msr_data >>= 32; 1799 + cs_sel = (u16)(msr_data & 0xfffc); 1800 + ss_sel = (u16)(msr_data + 8); 1801 1802 if (is_long_mode(ctxt->vcpu)) { 1803 + cs.d = 0; 1804 cs.l = 1; 1805 } 1806 + ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); 1807 + ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); 1808 + ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); 1809 + ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); 1810 1811 c->regs[VCPU_REGS_RCX] = c->eip; 1812 if (is_long_mode(ctxt->vcpu)) { 1813 #ifdef CONFIG_X86_64 1814 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; 1815 1816 + ops->get_msr(ctxt->vcpu, 1817 + ctxt->mode == X86EMUL_MODE_PROT64 ? 1818 + MSR_LSTAR : MSR_CSTAR, &msr_data); 1819 c->eip = msr_data; 1820 1821 + ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); 1822 ctxt->eflags &= ~(msr_data | EFLG_RF); 1823 #endif 1824 } else { 1825 /* legacy mode */ 1826 + ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); 1827 c->eip = (u32)msr_data; 1828 1829 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); ··· 1896 } 1897 1898 static int 1899 + emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) 1900 { 1901 struct decode_cache *c = &ctxt->decode; 1902 + struct desc_struct cs, ss; 1903 u64 msr_data; 1904 + u16 cs_sel, ss_sel; 1905 1906 /* inject #GP if in real mode */ 1907 if (ctxt->mode == X86EMUL_MODE_REAL) { 1908 + emulate_gp(ctxt, 0); 1909 return X86EMUL_PROPAGATE_FAULT; 1910 } 1911 ··· 1912 * Therefore, we inject an #UD. 1913 */ 1914 if (ctxt->mode == X86EMUL_MODE_PROT64) { 1915 + emulate_ud(ctxt); 1916 return X86EMUL_PROPAGATE_FAULT; 1917 } 1918 1919 + setup_syscalls_segments(ctxt, ops, &cs, &ss); 1920 1921 + ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); 1922 switch (ctxt->mode) { 1923 case X86EMUL_MODE_PROT32: 1924 if ((msr_data & 0xfffc) == 0x0) { 1925 + emulate_gp(ctxt, 0); 1926 return X86EMUL_PROPAGATE_FAULT; 1927 } 1928 break; 1929 case X86EMUL_MODE_PROT64: 1930 if (msr_data == 0x0) { 1931 + emulate_gp(ctxt, 0); 1932 return X86EMUL_PROPAGATE_FAULT; 1933 } 1934 break; 1935 } 1936 1937 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); 1938 + cs_sel = (u16)msr_data; 1939 + cs_sel &= ~SELECTOR_RPL_MASK; 1940 + ss_sel = cs_sel + 8; 1941 + ss_sel &= ~SELECTOR_RPL_MASK; 1942 if (ctxt->mode == X86EMUL_MODE_PROT64 1943 || is_long_mode(ctxt->vcpu)) { 1944 + cs.d = 0; 1945 cs.l = 1; 1946 } 1947 1948 + ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); 1949 + ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); 1950 + ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); 1951 + ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); 1952 1953 + ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); 1954 c->eip = msr_data; 1955 1956 + ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); 1957 c->regs[VCPU_REGS_RSP] = msr_data; 1958 1959 return X86EMUL_CONTINUE; 1960 } 1961 1962 static int 1963 + emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) 1964 { 1965 struct decode_cache *c = &ctxt->decode; 1966 + struct desc_struct cs, ss; 1967 u64 msr_data; 1968 int usermode; 1969 + u16 cs_sel, ss_sel; 1970 1971 /* inject #GP if in real mode or Virtual 8086 mode */ 1972 if (ctxt->mode == X86EMUL_MODE_REAL || 1973 ctxt->mode == X86EMUL_MODE_VM86) { 1974 + emulate_gp(ctxt, 0); 1975 return X86EMUL_PROPAGATE_FAULT; 1976 } 1977 1978 + setup_syscalls_segments(ctxt, ops, &cs, &ss); 1979 1980 if ((c->rex_prefix & 0x8) != 0x0) 1981 usermode = X86EMUL_MODE_PROT64; ··· 1981 1982 cs.dpl = 3; 1983 ss.dpl = 3; 1984 + ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); 1985 switch (usermode) { 1986 case X86EMUL_MODE_PROT32: 1987 + cs_sel = (u16)(msr_data + 16); 1988 if ((msr_data & 0xfffc) == 0x0) { 1989 + emulate_gp(ctxt, 0); 1990 return X86EMUL_PROPAGATE_FAULT; 1991 } 1992 + ss_sel = (u16)(msr_data + 24); 1993 break; 1994 case X86EMUL_MODE_PROT64: 1995 + cs_sel = (u16)(msr_data + 32); 1996 if (msr_data == 0x0) { 1997 + emulate_gp(ctxt, 0); 1998 return X86EMUL_PROPAGATE_FAULT; 1999 } 2000 + ss_sel = cs_sel + 8; 2001 + cs.d = 0; 2002 cs.l = 1; 2003 break; 2004 } 2005 + cs_sel |= SELECTOR_RPL_MASK; 2006 + ss_sel |= SELECTOR_RPL_MASK; 2007 2008 + ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); 2009 + ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); 2010 + ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); 2011 + ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); 2012 2013 + c->eip = c->regs[VCPU_REGS_RDX]; 2014 + c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; 2015 2016 return X86EMUL_CONTINUE; 2017 } ··· 2030 struct x86_emulate_ops *ops, 2031 u16 port, u16 len) 2032 { 2033 + struct desc_struct tr_seg; 2034 int r; 2035 u16 io_bitmap_ptr; 2036 u8 perm, bit_idx = port & 0x7; 2037 unsigned mask = (1 << len) - 1; 2038 2039 + ops->get_cached_descriptor(&tr_seg, VCPU_SREG_TR, ctxt->vcpu); 2040 + if (!tr_seg.p) 2041 return false; 2042 + if (desc_limit_scaled(&tr_seg) < 103) 2043 return false; 2044 + r = ops->read_std(get_desc_base(&tr_seg) + 102, &io_bitmap_ptr, 2, 2045 + ctxt->vcpu, NULL); 2046 if (r != X86EMUL_CONTINUE) 2047 return false; 2048 + if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) 2049 return false; 2050 + r = ops->read_std(get_desc_base(&tr_seg) + io_bitmap_ptr + port/8, 2051 + &perm, 1, ctxt->vcpu, NULL); 2052 if (r != X86EMUL_CONTINUE) 2053 return false; 2054 if ((perm >> bit_idx) & mask) ··· 2064 if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) 2065 return false; 2066 return true; 2067 } 2068 2069 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, ··· 2165 &err); 2166 if (ret == X86EMUL_PROPAGATE_FAULT) { 2167 /* FIXME: need to provide precise fault address */ 2168 + emulate_pf(ctxt, old_tss_base, err); 2169 return ret; 2170 } 2171 ··· 2175 &err); 2176 if (ret == X86EMUL_PROPAGATE_FAULT) { 2177 /* FIXME: need to provide precise fault address */ 2178 + emulate_pf(ctxt, old_tss_base, err); 2179 return ret; 2180 } 2181 ··· 2183 &err); 2184 if (ret == X86EMUL_PROPAGATE_FAULT) { 2185 /* FIXME: need to provide precise fault address */ 2186 + emulate_pf(ctxt, new_tss_base, err); 2187 return ret; 2188 } 2189 ··· 2196 ctxt->vcpu, &err); 2197 if (ret == X86EMUL_PROPAGATE_FAULT) { 2198 /* FIXME: need to provide precise fault address */ 2199 + emulate_pf(ctxt, new_tss_base, err); 2200 return ret; 2201 } 2202 } ··· 2238 struct decode_cache *c = &ctxt->decode; 2239 int ret; 2240 2241 + if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) { 2242 + emulate_gp(ctxt, 0); 2243 + return X86EMUL_PROPAGATE_FAULT; 2244 + } 2245 c->eip = tss->eip; 2246 ctxt->eflags = tss->eflags | 2; 2247 c->regs[VCPU_REGS_RAX] = tss->eax; ··· 2304 &err); 2305 if (ret == X86EMUL_PROPAGATE_FAULT) { 2306 /* FIXME: need to provide precise fault address */ 2307 + emulate_pf(ctxt, old_tss_base, err); 2308 return ret; 2309 } 2310 ··· 2314 &err); 2315 if (ret == X86EMUL_PROPAGATE_FAULT) { 2316 /* FIXME: need to provide precise fault address */ 2317 + emulate_pf(ctxt, old_tss_base, err); 2318 return ret; 2319 } 2320 ··· 2322 &err); 2323 if (ret == X86EMUL_PROPAGATE_FAULT) { 2324 /* FIXME: need to provide precise fault address */ 2325 + emulate_pf(ctxt, new_tss_base, err); 2326 return ret; 2327 } 2328 ··· 2335 ctxt->vcpu, &err); 2336 if (ret == X86EMUL_PROPAGATE_FAULT) { 2337 /* FIXME: need to provide precise fault address */ 2338 + emulate_pf(ctxt, new_tss_base, err); 2339 return ret; 2340 } 2341 } ··· 2352 int ret; 2353 u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); 2354 ulong old_tss_base = 2355 + ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu); 2356 u32 desc_limit; 2357 2358 /* FIXME: old_tss_base == ~0 ? */ ··· 2369 if (reason != TASK_SWITCH_IRET) { 2370 if ((tss_selector & 3) > next_tss_desc.dpl || 2371 ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { 2372 + emulate_gp(ctxt, 0); 2373 return X86EMUL_PROPAGATE_FAULT; 2374 } 2375 } ··· 2378 if (!next_tss_desc.p || 2379 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || 2380 desc_limit < 0x2b)) { 2381 + emulate_ts(ctxt, tss_selector & 0xfffc); 2382 return X86EMUL_PROPAGATE_FAULT; 2383 } 2384 ··· 2425 c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; 2426 c->lock_prefix = 0; 2427 c->src.val = (unsigned long) error_code; 2428 + emulate_push(ctxt, ops); 2429 } 2430 2431 return ret; ··· 2439 struct decode_cache *c = &ctxt->decode; 2440 int rc; 2441 2442 c->eip = ctxt->eip; 2443 c->dst.type = OP_NONE; 2444 2445 rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, 2446 has_error_code, error_code); 2447 2448 if (rc == X86EMUL_CONTINUE) { 2449 rc = writeback(ctxt, ops); 2450 + if (rc == X86EMUL_CONTINUE) 2451 + ctxt->eip = c->eip; 2452 } 2453 2454 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; ··· 2474 int rc = X86EMUL_CONTINUE; 2475 int saved_dst_type = c->dst.type; 2476 2477 + ctxt->decode.mem_read.pos = 0; 2478 2479 if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { 2480 + emulate_ud(ctxt); 2481 goto done; 2482 } 2483 2484 /* LOCK prefix is allowed only with some instructions */ 2485 if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { 2486 + emulate_ud(ctxt); 2487 goto done; 2488 } 2489 2490 /* Privileged instruction can be executed only in CPL=0 */ 2491 if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { 2492 + emulate_gp(ctxt, 0); 2493 goto done; 2494 } 2495 ··· 2506 if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { 2507 string_done: 2508 ctxt->restart = false; 2509 + ctxt->eip = c->eip; 2510 goto done; 2511 } 2512 /* The second termination condition only applies for REPE ··· 2529 } 2530 2531 if (c->src.type == OP_MEM) { 2532 + rc = read_emulated(ctxt, ops, (unsigned long)c->src.ptr, 2533 + c->src.valptr, c->src.bytes); 2534 if (rc != X86EMUL_CONTINUE) 2535 goto done; 2536 c->src.orig_val = c->src.val; 2537 } 2538 2539 if (c->src2.type == OP_MEM) { 2540 + rc = read_emulated(ctxt, ops, (unsigned long)c->src2.ptr, 2541 + &c->src2.val, c->src2.bytes); 2542 if (rc != X86EMUL_CONTINUE) 2543 goto done; 2544 } ··· 2553 2554 if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { 2555 /* optimisation - avoid slow emulated read if Mov */ 2556 + rc = read_emulated(ctxt, ops, (unsigned long)c->dst.ptr, 2557 + &c->dst.val, c->dst.bytes); 2558 if (rc != X86EMUL_CONTINUE) 2559 goto done; 2560 } ··· 2571 emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); 2572 break; 2573 case 0x06: /* push es */ 2574 + emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); 2575 break; 2576 case 0x07: /* pop es */ 2577 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); ··· 2583 emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); 2584 break; 2585 case 0x0e: /* push cs */ 2586 + emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); 2587 break; 2588 case 0x10 ... 0x15: 2589 adc: /* adc */ 2590 emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); 2591 break; 2592 case 0x16: /* push ss */ 2593 + emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); 2594 break; 2595 case 0x17: /* pop ss */ 2596 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); ··· 2602 emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); 2603 break; 2604 case 0x1e: /* push ds */ 2605 + emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); 2606 break; 2607 case 0x1f: /* pop ds */ 2608 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); ··· 2632 emulate_1op("dec", c->dst, ctxt->eflags); 2633 break; 2634 case 0x50 ... 0x57: /* push reg */ 2635 + emulate_push(ctxt, ops); 2636 break; 2637 case 0x58 ... 0x5f: /* pop reg */ 2638 pop_instruction: ··· 2641 goto done; 2642 break; 2643 case 0x60: /* pusha */ 2644 + rc = emulate_pusha(ctxt, ops); 2645 + if (rc != X86EMUL_CONTINUE) 2646 + goto done; 2647 break; 2648 case 0x61: /* popa */ 2649 rc = emulate_popa(ctxt, ops); ··· 2655 break; 2656 case 0x68: /* push imm */ 2657 case 0x6a: /* push imm8 */ 2658 + emulate_push(ctxt, ops); 2659 break; 2660 case 0x6c: /* insb */ 2661 case 0x6d: /* insw/insd */ 2662 c->dst.bytes = min(c->dst.bytes, 4u); 2663 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], 2664 c->dst.bytes)) { 2665 + emulate_gp(ctxt, 0); 2666 goto done; 2667 } 2668 if (!pio_in_emulated(ctxt, ops, c->dst.bytes, ··· 2674 c->src.bytes = min(c->src.bytes, 4u); 2675 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], 2676 c->src.bytes)) { 2677 + emulate_gp(ctxt, 0); 2678 goto done; 2679 } 2680 ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], ··· 2707 } 2708 break; 2709 case 0x84 ... 0x85: 2710 + test: 2711 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); 2712 break; 2713 case 0x86 ... 0x87: /* xchg */ ··· 2735 break; 2736 case 0x88 ... 0x8b: /* mov */ 2737 goto mov; 2738 + case 0x8c: /* mov r/m, sreg */ 2739 + if (c->modrm_reg > VCPU_SREG_GS) { 2740 + emulate_ud(ctxt); 2741 goto done; 2742 } 2743 + c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); 2744 break; 2745 case 0x8d: /* lea r16/r32, m */ 2746 c->dst.val = c->modrm_ea; 2747 break; ··· 2757 2758 if (c->modrm_reg == VCPU_SREG_CS || 2759 c->modrm_reg > VCPU_SREG_GS) { 2760 + emulate_ud(ctxt); 2761 goto done; 2762 } 2763 2764 if (c->modrm_reg == VCPU_SREG_SS) 2765 + ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; 2766 2767 rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); 2768 ··· 2775 goto done; 2776 break; 2777 case 0x90: /* nop / xchg r8,rax */ 2778 + if (c->dst.ptr == (unsigned long *)&c->regs[VCPU_REGS_RAX]) { 2779 + c->dst.type = OP_NONE; /* nop */ 2780 break; 2781 } 2782 case 0x91 ... 0x97: /* xchg reg,rax */ 2783 + c->src.type = OP_REG; 2784 + c->src.bytes = c->op_bytes; 2785 c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX]; 2786 c->src.val = *(c->src.ptr); 2787 goto xchg; 2788 case 0x9c: /* pushf */ 2789 c->src.val = (unsigned long) ctxt->eflags; 2790 + emulate_push(ctxt, ops); 2791 break; 2792 case 0x9d: /* popf */ 2793 c->dst.type = OP_REG; ··· 2797 if (rc != X86EMUL_CONTINUE) 2798 goto done; 2799 break; 2800 + case 0xa0 ... 0xa3: /* mov */ 2801 case 0xa4 ... 0xa5: /* movs */ 2802 goto mov; 2803 case 0xa6 ... 0xa7: /* cmps */ 2804 c->dst.type = OP_NONE; /* Disable writeback. */ 2805 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); 2806 goto cmp; 2807 + case 0xa8 ... 0xa9: /* test ax, imm */ 2808 + goto test; 2809 case 0xaa ... 0xab: /* stos */ 2810 c->dst.val = c->regs[VCPU_REGS_RAX]; 2811 break; ··· 2855 long int rel = c->src.val; 2856 c->src.val = (unsigned long) c->eip; 2857 jmp_rel(c, rel); 2858 + emulate_push(ctxt, ops); 2859 break; 2860 } 2861 case 0xe9: /* jmp rel */ 2862 goto jmp; 2863 + case 0xea: { /* jmp far */ 2864 + unsigned short sel; 2865 jump_far: 2866 + memcpy(&sel, c->src.valptr + c->op_bytes, 2); 2867 + 2868 + if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS)) 2869 goto done; 2870 2871 + c->eip = 0; 2872 + memcpy(&c->eip, c->src.valptr, c->op_bytes); 2873 break; 2874 + } 2875 case 0xeb: 2876 jmp: /* jmp rel short */ 2877 jmp_rel(c, c->src.val); ··· 2879 do_io_in: 2880 c->dst.bytes = min(c->dst.bytes, 4u); 2881 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { 2882 + emulate_gp(ctxt, 0); 2883 goto done; 2884 } 2885 if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, 2886 &c->dst.val)) 2887 goto done; /* IO is needed */ 2888 break; 2889 + case 0xee: /* out dx,al */ 2890 + case 0xef: /* out dx,(e/r)ax */ 2891 c->src.val = c->regs[VCPU_REGS_RDX]; 2892 do_io_out: 2893 c->dst.bytes = min(c->dst.bytes, 4u); 2894 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { 2895 + emulate_gp(ctxt, 0); 2896 goto done; 2897 } 2898 ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, ··· 2916 c->dst.type = OP_NONE; /* Disable writeback. */ 2917 break; 2918 case 0xfa: /* cli */ 2919 + if (emulator_bad_iopl(ctxt, ops)) { 2920 + emulate_gp(ctxt, 0); 2921 + goto done; 2922 + } else { 2923 ctxt->eflags &= ~X86_EFLAGS_IF; 2924 c->dst.type = OP_NONE; /* Disable writeback. */ 2925 } 2926 break; 2927 case 0xfb: /* sti */ 2928 + if (emulator_bad_iopl(ctxt, ops)) { 2929 + emulate_gp(ctxt, 0); 2930 + goto done; 2931 + } else { 2932 + ctxt->interruptibility = KVM_X86_SHADOW_INT_STI; 2933 ctxt->eflags |= X86_EFLAGS_IF; 2934 c->dst.type = OP_NONE; /* Disable writeback. */ 2935 } ··· 2964 c->dst.type = saved_dst_type; 2965 2966 if ((c->d & SrcMask) == SrcSI) 2967 + string_addr_inc(ctxt, seg_override_base(ctxt, ops, c), 2968 + VCPU_REGS_RSI, &c->src); 2969 2970 if ((c->d & DstMask) == DstDI) 2971 + string_addr_inc(ctxt, es_base(ctxt, ops), VCPU_REGS_RDI, 2972 + &c->dst); 2973 2974 if (c->rep_prefix && (c->d & String)) { 2975 struct read_cache *rc = &ctxt->decode.io_read; ··· 2981 (rc->end != 0 && rc->end == rc->pos)) 2982 ctxt->restart = false; 2983 } 2984 + /* 2985 + * reset read cache here in case string instruction is restared 2986 + * without decoding 2987 + */ 2988 + ctxt->decode.mem_read.end = 0; 2989 + ctxt->eip = c->eip; 2990 2991 done: 2992 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; ··· 3051 c->dst.type = OP_NONE; 3052 break; 3053 case 5: /* not defined */ 3054 + emulate_ud(ctxt); 3055 goto done; 3056 case 7: /* invlpg*/ 3057 emulate_invlpg(ctxt->vcpu, c->modrm_ea); ··· 3063 } 3064 break; 3065 case 0x05: /* syscall */ 3066 + rc = emulate_syscall(ctxt, ops); 3067 if (rc != X86EMUL_CONTINUE) 3068 goto done; 3069 else ··· 3073 emulate_clts(ctxt->vcpu); 3074 c->dst.type = OP_NONE; 3075 break; 3076 case 0x09: /* wbinvd */ 3077 + kvm_emulate_wbinvd(ctxt->vcpu); 3078 + c->dst.type = OP_NONE; 3079 + break; 3080 + case 0x08: /* invd */ 3081 case 0x0d: /* GrpP (prefetch) */ 3082 case 0x18: /* Grp16 (prefetch/nop) */ 3083 c->dst.type = OP_NONE; ··· 3084 case 1: 3085 case 5 ... 7: 3086 case 9 ... 15: 3087 + emulate_ud(ctxt); 3088 goto done; 3089 } 3090 c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); ··· 3093 case 0x21: /* mov from dr to reg */ 3094 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && 3095 (c->modrm_reg == 4 || c->modrm_reg == 5)) { 3096 + emulate_ud(ctxt); 3097 goto done; 3098 } 3099 + ops->get_dr(c->modrm_reg, &c->regs[c->modrm_rm], ctxt->vcpu); 3100 c->dst.type = OP_NONE; /* no writeback */ 3101 break; 3102 case 0x22: /* mov reg, cr */ 3103 + if (ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu)) { 3104 + emulate_gp(ctxt, 0); 3105 + goto done; 3106 + } 3107 c->dst.type = OP_NONE; 3108 break; 3109 case 0x23: /* mov from reg to dr */ 3110 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && 3111 (c->modrm_reg == 4 || c->modrm_reg == 5)) { 3112 + emulate_ud(ctxt); 3113 goto done; 3114 } 3115 + 3116 + if (ops->set_dr(c->modrm_reg, c->regs[c->modrm_rm] & 3117 + ((ctxt->mode == X86EMUL_MODE_PROT64) ? 3118 + ~0ULL : ~0U), ctxt->vcpu) < 0) { 3119 + /* #UD condition is already handled by the code above */ 3120 + emulate_gp(ctxt, 0); 3121 + goto done; 3122 + } 3123 + 3124 c->dst.type = OP_NONE; /* no writeback */ 3125 break; 3126 case 0x30: 3127 /* wrmsr */ 3128 msr_data = (u32)c->regs[VCPU_REGS_RAX] 3129 | ((u64)c->regs[VCPU_REGS_RDX] << 32); 3130 + if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { 3131 + emulate_gp(ctxt, 0); 3132 goto done; 3133 } 3134 rc = X86EMUL_CONTINUE; ··· 3125 break; 3126 case 0x32: 3127 /* rdmsr */ 3128 + if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { 3129 + emulate_gp(ctxt, 0); 3130 goto done; 3131 } else { 3132 c->regs[VCPU_REGS_RAX] = (u32)msr_data; ··· 3136 c->dst.type = OP_NONE; 3137 break; 3138 case 0x34: /* sysenter */ 3139 + rc = emulate_sysenter(ctxt, ops); 3140 if (rc != X86EMUL_CONTINUE) 3141 goto done; 3142 else 3143 goto writeback; 3144 break; 3145 case 0x35: /* sysexit */ 3146 + rc = emulate_sysexit(ctxt, ops); 3147 if (rc != X86EMUL_CONTINUE) 3148 goto done; 3149 else ··· 3160 c->dst.type = OP_NONE; 3161 break; 3162 case 0xa0: /* push fs */ 3163 + emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); 3164 break; 3165 case 0xa1: /* pop fs */ 3166 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); ··· 3179 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); 3180 break; 3181 case 0xa8: /* push gs */ 3182 + emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); 3183 break; 3184 case 0xa9: /* pop gs */ 3185 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);

+90 -56

arch/x86/kvm/i8254.c

··· 5 * Copyright (c) 2006 Intel Corporation 6 * Copyright (c) 2007 Keir Fraser, XenSource Inc 7 * Copyright (c) 2008 Intel Corporation 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining a copy 10 * of this software and associated documentation files (the "Software"), to deal ··· 34 35 #include <linux/kvm_host.h> 36 #include <linux/slab.h> 37 38 #include "irq.h" 39 #include "i8254.h" ··· 245 { 246 struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, 247 irq_ack_notifier); 248 - raw_spin_lock(&ps->inject_lock); 249 - if (atomic_dec_return(&ps->pit_timer.pending) < 0) 250 atomic_inc(&ps->pit_timer.pending); 251 ps->irq_ack = 1; 252 - raw_spin_unlock(&ps->inject_lock); 253 } 254 255 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) ··· 276 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 277 } 278 279 - static void destroy_pit_timer(struct kvm_timer *pt) 280 { 281 - pr_debug("execute del timer!\n"); 282 - hrtimer_cancel(&pt->timer); 283 } 284 285 static bool kpit_is_periodic(struct kvm_timer *ktimer) ··· 293 .is_periodic = kpit_is_periodic, 294 }; 295 296 static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) 297 { 298 struct kvm_timer *pt = &ps->pit_timer; ··· 358 359 /* TODO The new value only affected after the retriggered */ 360 hrtimer_cancel(&pt->timer); 361 pt->period = interval; 362 ps->is_periodic = is_period; 363 364 - pt->timer.function = kvm_timer_fn; 365 pt->t_ops = &kpit_ops; 366 pt->kvm = ps->pit->kvm; 367 - pt->vcpu = pt->kvm->bsp_vcpu; 368 369 atomic_set(&pt->pending, 0); 370 ps->irq_ack = 1; ··· 413 } 414 break; 415 default: 416 - destroy_pit_timer(&ps->pit_timer); 417 } 418 } 419 ··· 692 693 mutex_init(&pit->pit_state.lock); 694 mutex_lock(&pit->pit_state.lock); 695 - raw_spin_lock_init(&pit->pit_state.inject_lock); 696 697 kvm->arch.vpit = pit; 698 pit->kvm = kvm; ··· 752 struct hrtimer *timer; 753 754 if (kvm->arch.vpit) { 755 kvm_unregister_irq_mask_notifier(kvm, 0, 756 &kvm->arch.vpit->mask_notifier); 757 kvm_unregister_irq_ack_notifier(kvm, ··· 762 mutex_lock(&kvm->arch.vpit->pit_state.lock); 763 timer = &kvm->arch.vpit->pit_state.pit_timer.timer; 764 hrtimer_cancel(timer); 765 kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); 766 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 767 kfree(kvm->arch.vpit); 768 - } 769 - } 770 - 771 - static void __inject_pit_timer_intr(struct kvm *kvm) 772 - { 773 - struct kvm_vcpu *vcpu; 774 - int i; 775 - 776 - kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); 777 - kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); 778 - 779 - /* 780 - * Provides NMI watchdog support via Virtual Wire mode. 781 - * The route is: PIT -> PIC -> LVT0 in NMI mode. 782 - * 783 - * Note: Our Virtual Wire implementation is simplified, only 784 - * propagating PIT interrupts to all VCPUs when they have set 785 - * LVT0 to NMI delivery. Other PIC interrupts are just sent to 786 - * VCPU0, and only if its LVT0 is in EXTINT mode. 787 - */ 788 - if (kvm->arch.vapics_in_nmi_mode > 0) 789 - kvm_for_each_vcpu(i, vcpu, kvm) 790 - kvm_apic_nmi_wd_deliver(vcpu); 791 - } 792 - 793 - void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) 794 - { 795 - struct kvm_pit *pit = vcpu->kvm->arch.vpit; 796 - struct kvm *kvm = vcpu->kvm; 797 - struct kvm_kpit_state *ps; 798 - 799 - if (pit) { 800 - int inject = 0; 801 - ps = &pit->pit_state; 802 - 803 - /* Try to inject pending interrupts when 804 - * last one has been acked. 805 - */ 806 - raw_spin_lock(&ps->inject_lock); 807 - if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { 808 - ps->irq_ack = 0; 809 - inject = 1; 810 - } 811 - raw_spin_unlock(&ps->inject_lock); 812 - if (inject) 813 - __inject_pit_timer_intr(kvm); 814 } 815 }

··· 5 * Copyright (c) 2006 Intel Corporation 6 * Copyright (c) 2007 Keir Fraser, XenSource Inc 7 * Copyright (c) 2008 Intel Corporation 8 + * Copyright 2009 Red Hat, Inc. and/or its affilates. 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal ··· 33 34 #include <linux/kvm_host.h> 35 #include <linux/slab.h> 36 + #include <linux/workqueue.h> 37 38 #include "irq.h" 39 #include "i8254.h" ··· 243 { 244 struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, 245 irq_ack_notifier); 246 + int value; 247 + 248 + spin_lock(&ps->inject_lock); 249 + value = atomic_dec_return(&ps->pit_timer.pending); 250 + if (value < 0) 251 + /* spurious acks can be generated if, for example, the 252 + * PIC is being reset. Handle it gracefully here 253 + */ 254 atomic_inc(&ps->pit_timer.pending); 255 + else if (value > 0) 256 + /* in this case, we had multiple outstanding pit interrupts 257 + * that we needed to inject. Reinject 258 + */ 259 + queue_work(ps->pit->wq, &ps->pit->expired); 260 ps->irq_ack = 1; 261 + spin_unlock(&ps->inject_lock); 262 } 263 264 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) ··· 263 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 264 } 265 266 + static void destroy_pit_timer(struct kvm_pit *pit) 267 { 268 + hrtimer_cancel(&pit->pit_state.pit_timer.timer); 269 + cancel_work_sync(&pit->expired); 270 } 271 272 static bool kpit_is_periodic(struct kvm_timer *ktimer) ··· 280 .is_periodic = kpit_is_periodic, 281 }; 282 283 + static void pit_do_work(struct work_struct *work) 284 + { 285 + struct kvm_pit *pit = container_of(work, struct kvm_pit, expired); 286 + struct kvm *kvm = pit->kvm; 287 + struct kvm_vcpu *vcpu; 288 + int i; 289 + struct kvm_kpit_state *ps = &pit->pit_state; 290 + int inject = 0; 291 + 292 + /* Try to inject pending interrupts when 293 + * last one has been acked. 294 + */ 295 + spin_lock(&ps->inject_lock); 296 + if (ps->irq_ack) { 297 + ps->irq_ack = 0; 298 + inject = 1; 299 + } 300 + spin_unlock(&ps->inject_lock); 301 + if (inject) { 302 + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); 303 + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); 304 + 305 + /* 306 + * Provides NMI watchdog support via Virtual Wire mode. 307 + * The route is: PIT -> PIC -> LVT0 in NMI mode. 308 + * 309 + * Note: Our Virtual Wire implementation is simplified, only 310 + * propagating PIT interrupts to all VCPUs when they have set 311 + * LVT0 to NMI delivery. Other PIC interrupts are just sent to 312 + * VCPU0, and only if its LVT0 is in EXTINT mode. 313 + */ 314 + if (kvm->arch.vapics_in_nmi_mode > 0) 315 + kvm_for_each_vcpu(i, vcpu, kvm) 316 + kvm_apic_nmi_wd_deliver(vcpu); 317 + } 318 + } 319 + 320 + static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) 321 + { 322 + struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 323 + struct kvm_pit *pt = ktimer->kvm->arch.vpit; 324 + 325 + if (ktimer->reinject || !atomic_read(&ktimer->pending)) { 326 + atomic_inc(&ktimer->pending); 327 + queue_work(pt->wq, &pt->expired); 328 + } 329 + 330 + if (ktimer->t_ops->is_periodic(ktimer)) { 331 + hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 332 + return HRTIMER_RESTART; 333 + } else 334 + return HRTIMER_NORESTART; 335 + } 336 + 337 static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) 338 { 339 struct kvm_timer *pt = &ps->pit_timer; ··· 291 292 /* TODO The new value only affected after the retriggered */ 293 hrtimer_cancel(&pt->timer); 294 + cancel_work_sync(&ps->pit->expired); 295 pt->period = interval; 296 ps->is_periodic = is_period; 297 298 + pt->timer.function = pit_timer_fn; 299 pt->t_ops = &kpit_ops; 300 pt->kvm = ps->pit->kvm; 301 302 atomic_set(&pt->pending, 0); 303 ps->irq_ack = 1; ··· 346 } 347 break; 348 default: 349 + destroy_pit_timer(kvm->arch.vpit); 350 } 351 } 352 ··· 625 626 mutex_init(&pit->pit_state.lock); 627 mutex_lock(&pit->pit_state.lock); 628 + spin_lock_init(&pit->pit_state.inject_lock); 629 + 630 + pit->wq = create_singlethread_workqueue("kvm-pit-wq"); 631 + if (!pit->wq) { 632 + mutex_unlock(&pit->pit_state.lock); 633 + kfree(pit); 634 + return NULL; 635 + } 636 + INIT_WORK(&pit->expired, pit_do_work); 637 638 kvm->arch.vpit = pit; 639 pit->kvm = kvm; ··· 677 struct hrtimer *timer; 678 679 if (kvm->arch.vpit) { 680 + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &kvm->arch.vpit->dev); 681 + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, 682 + &kvm->arch.vpit->speaker_dev); 683 kvm_unregister_irq_mask_notifier(kvm, 0, 684 &kvm->arch.vpit->mask_notifier); 685 kvm_unregister_irq_ack_notifier(kvm, ··· 684 mutex_lock(&kvm->arch.vpit->pit_state.lock); 685 timer = &kvm->arch.vpit->pit_state.pit_timer.timer; 686 hrtimer_cancel(timer); 687 + cancel_work_sync(&kvm->arch.vpit->expired); 688 kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); 689 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 690 + destroy_workqueue(kvm->arch.vpit->wq); 691 kfree(kvm->arch.vpit); 692 } 693 }

+3 -1

arch/x86/kvm/i8254.h

··· 27 u32 speaker_data_on; 28 struct mutex lock; 29 struct kvm_pit *pit; 30 - raw_spinlock_t inject_lock; 31 unsigned long irq_ack; 32 struct kvm_irq_ack_notifier irq_ack_notifier; 33 }; ··· 40 struct kvm_kpit_state pit_state; 41 int irq_source_id; 42 struct kvm_irq_mask_notifier mask_notifier; 43 }; 44 45 #define KVM_PIT_BASE_ADDRESS 0x40

··· 27 u32 speaker_data_on; 28 struct mutex lock; 29 struct kvm_pit *pit; 30 + spinlock_t inject_lock; 31 unsigned long irq_ack; 32 struct kvm_irq_ack_notifier irq_ack_notifier; 33 }; ··· 40 struct kvm_kpit_state pit_state; 41 int irq_source_id; 42 struct kvm_irq_mask_notifier mask_notifier; 43 + struct workqueue_struct *wq; 44 + struct work_struct expired; 45 }; 46 47 #define KVM_PIT_BASE_ADDRESS 0x40

+31 -17

arch/x86/kvm/i8259.c

··· 3 * 4 * Copyright (c) 2003-2004 Fabrice Bellard 5 * Copyright (c) 2007 Intel Corporation 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal ··· 34 #include <linux/kvm_host.h> 35 #include "trace.h" 36 37 static void pic_lock(struct kvm_pic *s) 38 __acquires(&s->lock) 39 { ··· 46 __releases(&s->lock) 47 { 48 bool wakeup = s->wakeup_needed; 49 - struct kvm_vcpu *vcpu; 50 51 s->wakeup_needed = false; 52 53 raw_spin_unlock(&s->lock); 54 55 if (wakeup) { 56 - vcpu = s->kvm->bsp_vcpu; 57 - if (vcpu) 58 - kvm_vcpu_kick(vcpu); 59 } 60 } 61 ··· 185 pic_set_irq1(&s->pics[0], 2, 0); 186 } 187 irq = pic_get_irq(&s->pics[0]); 188 - if (irq >= 0) 189 - s->irq_request(s->irq_request_opaque, 1); 190 - else 191 - s->irq_request(s->irq_request_opaque, 0); 192 } 193 194 void kvm_pic_update_irq(struct kvm_pic *s) ··· 270 void kvm_pic_reset(struct kvm_kpic_state *s) 271 { 272 int irq; 273 - struct kvm *kvm = s->pics_state->irq_request_opaque; 274 - struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu; 275 u8 irr = s->irr, isr = s->imr; 276 277 s->last_irr = 0; ··· 309 /* 310 * deassert a pending interrupt 311 */ 312 - s->pics_state->irq_request(s->pics_state-> 313 - irq_request_opaque, 0); 314 s->init_state = 1; 315 s->init4 = val & 1; 316 if (val & 0x02) ··· 363 } 364 } else 365 switch (s->init_state) { 366 - case 0: /* normal mode */ 367 s->imr = val; 368 pic_update_irq(s->pics_state); 369 break; 370 case 1: 371 s->irq_base = val & 0xf8; 372 s->init_state = 2; ··· 535 /* 536 * callback when PIC0 irq status changed 537 */ 538 - static void pic_irq_request(void *opaque, int level) 539 { 540 - struct kvm *kvm = opaque; 541 struct kvm_vcpu *vcpu = kvm->bsp_vcpu; 542 struct kvm_pic *s = pic_irqchip(kvm); 543 int irq = pic_get_irq(&s->pics[0]); ··· 565 s->kvm = kvm; 566 s->pics[0].elcr_mask = 0xf8; 567 s->pics[1].elcr_mask = 0xde; 568 - s->irq_request = pic_irq_request; 569 - s->irq_request_opaque = kvm; 570 s->pics[0].pics_state = s; 571 s->pics[1].pics_state = s; 572

··· 3 * 4 * Copyright (c) 2003-2004 Fabrice Bellard 5 * Copyright (c) 2007 Intel Corporation 6 + * Copyright 2009 Red Hat, Inc. and/or its affilates. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a copy 9 * of this software and associated documentation files (the "Software"), to deal ··· 33 #include <linux/kvm_host.h> 34 #include "trace.h" 35 36 + static void pic_irq_request(struct kvm *kvm, int level); 37 + 38 static void pic_lock(struct kvm_pic *s) 39 __acquires(&s->lock) 40 { ··· 43 __releases(&s->lock) 44 { 45 bool wakeup = s->wakeup_needed; 46 + struct kvm_vcpu *vcpu, *found = NULL; 47 + int i; 48 49 s->wakeup_needed = false; 50 51 raw_spin_unlock(&s->lock); 52 53 if (wakeup) { 54 + kvm_for_each_vcpu(i, vcpu, s->kvm) { 55 + if (kvm_apic_accept_pic_intr(vcpu)) { 56 + found = vcpu; 57 + break; 58 + } 59 + } 60 + 61 + if (!found) 62 + found = s->kvm->bsp_vcpu; 63 + 64 + kvm_vcpu_kick(found); 65 } 66 } 67 ··· 173 pic_set_irq1(&s->pics[0], 2, 0); 174 } 175 irq = pic_get_irq(&s->pics[0]); 176 + pic_irq_request(s->kvm, irq >= 0); 177 } 178 179 void kvm_pic_update_irq(struct kvm_pic *s) ··· 261 void kvm_pic_reset(struct kvm_kpic_state *s) 262 { 263 int irq; 264 + struct kvm_vcpu *vcpu0 = s->pics_state->kvm->bsp_vcpu; 265 u8 irr = s->irr, isr = s->imr; 266 267 s->last_irr = 0; ··· 301 /* 302 * deassert a pending interrupt 303 */ 304 + pic_irq_request(s->pics_state->kvm, 0); 305 s->init_state = 1; 306 s->init4 = val & 1; 307 if (val & 0x02) ··· 356 } 357 } else 358 switch (s->init_state) { 359 + case 0: { /* normal mode */ 360 + u8 imr_diff = s->imr ^ val, 361 + off = (s == &s->pics_state->pics[0]) ? 0 : 8; 362 s->imr = val; 363 + for (irq = 0; irq < PIC_NUM_PINS/2; irq++) 364 + if (imr_diff & (1 << irq)) 365 + kvm_fire_mask_notifiers( 366 + s->pics_state->kvm, 367 + SELECT_PIC(irq + off), 368 + irq + off, 369 + !!(s->imr & (1 << irq))); 370 pic_update_irq(s->pics_state); 371 break; 372 + } 373 case 1: 374 s->irq_base = val & 0xf8; 375 s->init_state = 2; ··· 518 /* 519 * callback when PIC0 irq status changed 520 */ 521 + static void pic_irq_request(struct kvm *kvm, int level) 522 { 523 struct kvm_vcpu *vcpu = kvm->bsp_vcpu; 524 struct kvm_pic *s = pic_irqchip(kvm); 525 int irq = pic_get_irq(&s->pics[0]); ··· 549 s->kvm = kvm; 550 s->pics[0].elcr_mask = 0xf8; 551 s->pics[1].elcr_mask = 0xde; 552 s->pics[0].pics_state = s; 553 s->pics[1].pics_state = s; 554

+1 -1

arch/x86/kvm/irq.c

··· 1 /* 2 * irq.c: API for in kernel interrupt controller 3 * Copyright (c) 2007, Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, ··· 90 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) 91 { 92 kvm_inject_apic_timer_irqs(vcpu); 93 - kvm_inject_pit_timer_irqs(vcpu); 94 /* TODO: PIT, RTC etc. */ 95 } 96 EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);

··· 1 /* 2 * irq.c: API for in kernel interrupt controller 3 * Copyright (c) 2007, Intel Corporation. 4 + * Copyright 2009 Red Hat, Inc. and/or its affilates. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, ··· 89 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) 90 { 91 kvm_inject_apic_timer_irqs(vcpu); 92 /* TODO: PIT, RTC etc. */ 93 } 94 EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);

-4

arch/x86/kvm/irq.h

··· 38 struct kvm; 39 struct kvm_vcpu; 40 41 - typedef void irq_request_func(void *opaque, int level); 42 - 43 struct kvm_kpic_state { 44 u8 last_irr; /* edge detection */ 45 u8 irr; /* interrupt request register */ ··· 65 unsigned pending_acks; 66 struct kvm *kvm; 67 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ 68 - irq_request_func *irq_request; 69 - void *irq_request_opaque; 70 int output; /* intr from master PIC */ 71 struct kvm_io_device dev; 72 void (*ack_notifier)(void *opaque, int irq);

··· 38 struct kvm; 39 struct kvm_vcpu; 40 41 struct kvm_kpic_state { 42 u8 last_irr; /* edge detection */ 43 u8 irr; /* interrupt request register */ ··· 67 unsigned pending_acks; 68 struct kvm *kvm; 69 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ 70 int output; /* intr from master PIC */ 71 struct kvm_io_device dev; 72 void (*ack_notifier)(void *opaque, int irq);

+8

arch/x86/kvm/kvm_cache_regs.h

··· 36 37 static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) 38 { 39 if (!test_bit(VCPU_EXREG_PDPTR, 40 (unsigned long *)&vcpu->arch.regs_avail)) 41 kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); ··· 69 static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) 70 { 71 return kvm_read_cr4_bits(vcpu, ~0UL); 72 } 73 74 #endif

··· 36 37 static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) 38 { 39 + might_sleep(); /* on svm */ 40 + 41 if (!test_bit(VCPU_EXREG_PDPTR, 42 (unsigned long *)&vcpu->arch.regs_avail)) 43 kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); ··· 67 static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) 68 { 69 return kvm_read_cr4_bits(vcpu, ~0UL); 70 + } 71 + 72 + static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) 73 + { 74 + return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u) 75 + | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); 76 } 77 78 #endif

+8 -9

arch/x86/kvm/lapic.c

··· 5 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2007 Novell 7 * Copyright (C) 2007 Intel 8 * 9 * Authors: 10 * Dor Laor <dor.laor@qumranet.com> ··· 329 "dest_mode 0x%x, short_hand 0x%x\n", 330 target, source, dest, dest_mode, short_hand); 331 332 - ASSERT(!target); 333 switch (short_hand) { 334 case APIC_DEST_NOSHORT: 335 if (dest_mode == 0) ··· 534 struct kvm_vcpu *vcpu = apic->vcpu; 535 struct kvm_run *run = vcpu->run; 536 537 - set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests); 538 run->tpr_access.rip = kvm_rip_read(vcpu); 539 run->tpr_access.is_write = write; 540 } ··· 1107 u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); 1108 int r = 0; 1109 1110 - if (kvm_vcpu_is_bsp(vcpu)) { 1111 - if (!apic_hw_enabled(vcpu->arch.apic)) 1112 - r = 1; 1113 - if ((lvt0 & APIC_LVT_MASKED) == 0 && 1114 - GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 1115 - r = 1; 1116 - } 1117 return r; 1118 } 1119

··· 5 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2007 Novell 7 * Copyright (C) 2007 Intel 8 + * Copyright 2009 Red Hat, Inc. and/or its affilates. 9 * 10 * Authors: 11 * Dor Laor <dor.laor@qumranet.com> ··· 328 "dest_mode 0x%x, short_hand 0x%x\n", 329 target, source, dest, dest_mode, short_hand); 330 331 + ASSERT(target); 332 switch (short_hand) { 333 case APIC_DEST_NOSHORT: 334 if (dest_mode == 0) ··· 533 struct kvm_vcpu *vcpu = apic->vcpu; 534 struct kvm_run *run = vcpu->run; 535 536 + kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); 537 run->tpr_access.rip = kvm_rip_read(vcpu); 538 run->tpr_access.is_write = write; 539 } ··· 1106 u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); 1107 int r = 0; 1108 1109 + if (!apic_hw_enabled(vcpu->arch.apic)) 1110 + r = 1; 1111 + if ((lvt0 & APIC_LVT_MASKED) == 0 && 1112 + GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 1113 + r = 1; 1114 return r; 1115 } 1116

+499 -312

arch/x86/kvm/mmu.c

··· 7 * MMU support 8 * 9 * Copyright (C) 2006 Qumranet, Inc. 10 * 11 * Authors: 12 * Yaniv Kamay <yaniv@qumranet.com> ··· 33 #include <linux/compiler.h> 34 #include <linux/srcu.h> 35 #include <linux/slab.h> 36 37 #include <asm/page.h> 38 #include <asm/cmpxchg.h> ··· 91 92 #define PT_FIRST_AVAIL_BITS_SHIFT 9 93 #define PT64_SECOND_AVAIL_BITS_SHIFT 52 94 - 95 - #define VALID_PAGE(x) ((x) != INVALID_PAGE) 96 97 #define PT64_LEVEL_BITS 9 98 ··· 173 shadow_walk_okay(&(_walker)); \ 174 shadow_walk_next(&(_walker))) 175 176 - typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); 177 178 static struct kmem_cache *pte_chain_cache; 179 static struct kmem_cache *rmap_desc_cache; ··· 288 #endif 289 } 290 291 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 292 struct kmem_cache *base_cache, int min) 293 { ··· 333 return 0; 334 } 335 336 - static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) 337 { 338 while (mc->nobjs) 339 - kfree(mc->objects[--mc->nobjs]); 340 } 341 342 static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, ··· 385 386 static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) 387 { 388 - mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache); 389 - mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache); 390 mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); 391 - mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache); 392 } 393 394 static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, ··· 410 411 static void mmu_free_pte_chain(struct kvm_pte_chain *pc) 412 { 413 - kfree(pc); 414 } 415 416 static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) ··· 421 422 static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) 423 { 424 - kfree(rd); 425 } 426 427 /* ··· 450 { 451 unsigned long idx; 452 453 - idx = (gfn / KVM_PAGES_PER_HPAGE(level)) - 454 - (slot->base_gfn / KVM_PAGES_PER_HPAGE(level)); 455 return &slot->lpage_info[level - 2][idx].write_count; 456 } 457 ··· 461 int *write_count; 462 int i; 463 464 - gfn = unalias_gfn(kvm, gfn); 465 - 466 - slot = gfn_to_memslot_unaliased(kvm, gfn); 467 for (i = PT_DIRECTORY_LEVEL; 468 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 469 write_count = slot_largepage_idx(gfn, slot, i); ··· 475 int *write_count; 476 int i; 477 478 - gfn = unalias_gfn(kvm, gfn); 479 - slot = gfn_to_memslot_unaliased(kvm, gfn); 480 for (i = PT_DIRECTORY_LEVEL; 481 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 482 write_count = slot_largepage_idx(gfn, slot, i); ··· 491 struct kvm_memory_slot *slot; 492 int *largepage_idx; 493 494 - gfn = unalias_gfn(kvm, gfn); 495 - slot = gfn_to_memslot_unaliased(kvm, gfn); 496 if (slot) { 497 largepage_idx = slot_largepage_idx(gfn, slot, level); 498 return *largepage_idx; ··· 544 545 /* 546 * Take gfn and return the reverse mapping to it. 547 - * Note: gfn must be unaliased before this function get called 548 */ 549 550 static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) ··· 555 if (likely(level == PT_PAGE_TABLE_LEVEL)) 556 return &slot->rmap[gfn - slot->base_gfn]; 557 558 - idx = (gfn / KVM_PAGES_PER_HPAGE(level)) - 559 - (slot->base_gfn / KVM_PAGES_PER_HPAGE(level)); 560 561 return &slot->lpage_info[level - 2][idx].rmap_pde; 562 } ··· 583 584 if (!is_rmap_spte(*spte)) 585 return count; 586 - gfn = unalias_gfn(vcpu->kvm, gfn); 587 sp = page_header(__pa(spte)); 588 - sp->gfns[spte - sp->spt] = gfn; 589 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); 590 if (!*rmapp) { 591 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); ··· 641 struct kvm_rmap_desc *desc; 642 struct kvm_rmap_desc *prev_desc; 643 struct kvm_mmu_page *sp; 644 - pfn_t pfn; 645 unsigned long *rmapp; 646 int i; 647 648 - if (!is_rmap_spte(*spte)) 649 - return; 650 sp = page_header(__pa(spte)); 651 - pfn = spte_to_pfn(*spte); 652 - if (*spte & shadow_accessed_mask) 653 - kvm_set_pfn_accessed(pfn); 654 - if (is_writable_pte(*spte)) 655 - kvm_set_pfn_dirty(pfn); 656 - rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); 657 if (!*rmapp) { 658 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); 659 BUG(); ··· 677 pr_err("rmap_remove: %p %llx many->many\n", spte, *spte); 678 BUG(); 679 } 680 } 681 682 static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) ··· 737 u64 *spte; 738 int i, write_protected = 0; 739 740 - gfn = unalias_gfn(kvm, gfn); 741 rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL); 742 743 spte = rmap_next(kvm, rmapp, NULL); ··· 745 BUG_ON(!(*spte & PT_PRESENT_MASK)); 746 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 747 if (is_writable_pte(*spte)) { 748 - __set_spte(spte, *spte & ~PT_WRITABLE_MASK); 749 write_protected = 1; 750 } 751 spte = rmap_next(kvm, rmapp, spte); ··· 769 BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); 770 pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); 771 if (is_writable_pte(*spte)) { 772 - rmap_remove(kvm, spte); 773 --kvm->stat.lpages; 774 - __set_spte(spte, shadow_trap_nonpresent_pte); 775 spte = NULL; 776 write_protected = 1; 777 } ··· 791 while ((spte = rmap_next(kvm, rmapp, NULL))) { 792 BUG_ON(!(*spte & PT_PRESENT_MASK)); 793 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); 794 - rmap_remove(kvm, spte); 795 - __set_spte(spte, shadow_trap_nonpresent_pte); 796 need_tlb_flush = 1; 797 } 798 return need_tlb_flush; ··· 813 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); 814 need_flush = 1; 815 if (pte_write(*ptep)) { 816 - rmap_remove(kvm, spte); 817 - __set_spte(spte, shadow_trap_nonpresent_pte); 818 spte = rmap_next(kvm, rmapp, NULL); 819 } else { 820 new_spte = *spte &~ (PT64_BASE_ADDR_MASK); ··· 821 822 new_spte &= ~PT_WRITABLE_MASK; 823 new_spte &= ~SPTE_HOST_WRITEABLE; 824 - if (is_writable_pte(*spte)) 825 - kvm_set_pfn_dirty(spte_to_pfn(*spte)); 826 - __set_spte(spte, new_spte); 827 spte = rmap_next(kvm, rmapp, spte); 828 } 829 } ··· 856 ret = handler(kvm, &memslot->rmap[gfn_offset], data); 857 858 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { 859 - int idx = gfn_offset; 860 - idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); 861 ret |= handler(kvm, 862 &memslot->lpage_info[j][idx].rmap_pde, 863 data); ··· 924 925 sp = page_header(__pa(spte)); 926 927 - gfn = unalias_gfn(vcpu->kvm, gfn); 928 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); 929 930 kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); ··· 954 static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) 955 { 956 ASSERT(is_empty_shadow_page(sp->spt)); 957 list_del(&sp->link); 958 __free_page(virt_to_page(sp->spt)); 959 - __free_page(virt_to_page(sp->gfns)); 960 - kfree(sp); 961 ++kvm->arch.n_free_mmu_pages; 962 } 963 ··· 969 } 970 971 static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, 972 - u64 *parent_pte) 973 { 974 struct kvm_mmu_page *sp; 975 976 sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); 977 sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); 978 - sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); 979 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 980 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 981 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); ··· 1062 BUG(); 1063 } 1064 1065 - 1066 static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) 1067 { 1068 struct kvm_pte_chain *pte_chain; ··· 1071 1072 if (!sp->multimapped && sp->parent_pte) { 1073 parent_sp = page_header(__pa(sp->parent_pte)); 1074 - fn(parent_sp); 1075 - mmu_parent_walk(parent_sp, fn); 1076 - return; 1077 - } 1078 - hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) 1079 - for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { 1080 - if (!pte_chain->parent_ptes[i]) 1081 - break; 1082 - parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); 1083 - fn(parent_sp); 1084 - mmu_parent_walk(parent_sp, fn); 1085 - } 1086 - } 1087 - 1088 - static void kvm_mmu_update_unsync_bitmap(u64 *spte) 1089 - { 1090 - unsigned int index; 1091 - struct kvm_mmu_page *sp = page_header(__pa(spte)); 1092 - 1093 - index = spte - sp->spt; 1094 - if (!__test_and_set_bit(index, sp->unsync_child_bitmap)) 1095 - sp->unsync_children++; 1096 - WARN_ON(!sp->unsync_children); 1097 - } 1098 - 1099 - static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) 1100 - { 1101 - struct kvm_pte_chain *pte_chain; 1102 - struct hlist_node *node; 1103 - int i; 1104 - 1105 - if (!sp->parent_pte) 1106 - return; 1107 - 1108 - if (!sp->multimapped) { 1109 - kvm_mmu_update_unsync_bitmap(sp->parent_pte); 1110 return; 1111 } 1112 1113 hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) 1114 for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { 1115 - if (!pte_chain->parent_ptes[i]) 1116 break; 1117 - kvm_mmu_update_unsync_bitmap(pte_chain->parent_ptes[i]); 1118 } 1119 } 1120 1121 - static int unsync_walk_fn(struct kvm_mmu_page *sp) 1122 - { 1123 - kvm_mmu_update_parents_unsync(sp); 1124 - return 1; 1125 - } 1126 - 1127 static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) 1128 { 1129 - mmu_parent_walk(sp, unsync_walk_fn); 1130 - kvm_mmu_update_parents_unsync(sp); 1131 } 1132 1133 static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, ··· 1114 } 1115 1116 static int nonpaging_sync_page(struct kvm_vcpu *vcpu, 1117 - struct kvm_mmu_page *sp) 1118 { 1119 return 1; 1120 } ··· 1160 int i, ret, nr_unsync_leaf = 0; 1161 1162 for_each_unsync_children(sp->unsync_child_bitmap, i) { 1163 u64 ent = sp->spt[i]; 1164 1165 - if (is_shadow_present_pte(ent) && !is_large_pte(ent)) { 1166 - struct kvm_mmu_page *child; 1167 - child = page_header(ent & PT64_BASE_ADDR_MASK); 1168 1169 - if (child->unsync_children) { 1170 - if (mmu_pages_add(pvec, child, i)) 1171 - return -ENOSPC; 1172 1173 - ret = __mmu_unsync_walk(child, pvec); 1174 - if (!ret) 1175 - __clear_bit(i, sp->unsync_child_bitmap); 1176 - else if (ret > 0) 1177 - nr_unsync_leaf += ret; 1178 - else 1179 - return ret; 1180 - } 1181 1182 - if (child->unsync) { 1183 - nr_unsync_leaf++; 1184 - if (mmu_pages_add(pvec, child, i)) 1185 - return -ENOSPC; 1186 - } 1187 - } 1188 } 1189 1190 - if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) 1191 - sp->unsync_children = 0; 1192 1193 return nr_unsync_leaf; 1194 } ··· 1208 return __mmu_unsync_walk(sp, pvec); 1209 } 1210 1211 - static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) 1212 - { 1213 - unsigned index; 1214 - struct hlist_head *bucket; 1215 - struct kvm_mmu_page *sp; 1216 - struct hlist_node *node; 1217 - 1218 - pgprintk("%s: looking for gfn %lx\n", __func__, gfn); 1219 - index = kvm_page_table_hashfn(gfn); 1220 - bucket = &kvm->arch.mmu_page_hash[index]; 1221 - hlist_for_each_entry(sp, node, bucket, hash_link) 1222 - if (sp->gfn == gfn && !sp->role.direct 1223 - && !sp->role.invalid) { 1224 - pgprintk("%s: found role %x\n", 1225 - __func__, sp->role.word); 1226 - return sp; 1227 - } 1228 - return NULL; 1229 - } 1230 - 1231 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1232 { 1233 WARN_ON(!sp->unsync); ··· 1216 --kvm->stat.mmu_unsync; 1217 } 1218 1219 - static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); 1220 1221 - static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1222 { 1223 if (sp->role.cr4_pae != !!is_pae(vcpu)) { 1224 - kvm_mmu_zap_page(vcpu->kvm, sp); 1225 return 1; 1226 } 1227 1228 - if (rmap_write_protect(vcpu->kvm, sp->gfn)) 1229 - kvm_flush_remote_tlbs(vcpu->kvm); 1230 - kvm_unlink_unsync_page(vcpu->kvm, sp); 1231 - if (vcpu->arch.mmu.sync_page(vcpu, sp)) { 1232 - kvm_mmu_zap_page(vcpu->kvm, sp); 1233 return 1; 1234 } 1235 1236 kvm_mmu_flush_tlb(vcpu); 1237 return 0; 1238 } 1239 1240 struct mmu_page_path { ··· 1365 struct kvm_mmu_page *sp; 1366 struct mmu_page_path parents; 1367 struct kvm_mmu_pages pages; 1368 1369 kvm_mmu_pages_init(parent, &parents, &pages); 1370 while (mmu_unsync_walk(parent, &pages)) { ··· 1378 kvm_flush_remote_tlbs(vcpu->kvm); 1379 1380 for_each_sp(pages, sp, parents, i) { 1381 - kvm_sync_page(vcpu, sp); 1382 mmu_pages_clear_parents(&parents); 1383 } 1384 cond_resched_lock(&vcpu->kvm->mmu_lock); 1385 kvm_mmu_pages_init(parent, &parents, &pages); 1386 } ··· 1396 u64 *parent_pte) 1397 { 1398 union kvm_mmu_page_role role; 1399 - unsigned index; 1400 unsigned quadrant; 1401 - struct hlist_head *bucket; 1402 struct kvm_mmu_page *sp; 1403 - struct hlist_node *node, *tmp; 1404 1405 role = vcpu->arch.mmu.base_role; 1406 role.level = level; ··· 1407 if (role.direct) 1408 role.cr4_pae = 0; 1409 role.access = access; 1410 - if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { 1411 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); 1412 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; 1413 role.quadrant = quadrant; 1414 } 1415 - index = kvm_page_table_hashfn(gfn); 1416 - bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 1417 - hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link) 1418 - if (sp->gfn == gfn) { 1419 - if (sp->unsync) 1420 - if (kvm_sync_page(vcpu, sp)) 1421 - continue; 1422 1423 - if (sp->role.word != role.word) 1424 - continue; 1425 1426 - mmu_page_add_parent_pte(vcpu, sp, parent_pte); 1427 - if (sp->unsync_children) { 1428 - set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); 1429 - kvm_mmu_mark_parents_unsync(sp); 1430 - } 1431 - trace_kvm_mmu_get_page(sp, false); 1432 - return sp; 1433 - } 1434 ++vcpu->kvm->stat.mmu_cache_miss; 1435 - sp = kvm_mmu_alloc_page(vcpu, parent_pte); 1436 if (!sp) 1437 return sp; 1438 sp->gfn = gfn; 1439 sp->role = role; 1440 - hlist_add_head(&sp->hash_link, bucket); 1441 if (!direct) { 1442 if (rmap_write_protect(vcpu->kvm, gfn)) 1443 kvm_flush_remote_tlbs(vcpu->kvm); 1444 account_shadowed(vcpu->kvm, gfn); 1445 } 1446 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) ··· 1492 --iterator->level; 1493 } 1494 1495 static void kvm_mmu_page_unlink_children(struct kvm *kvm, 1496 struct kvm_mmu_page *sp) 1497 { ··· 1553 } else { 1554 if (is_large_pte(ent)) 1555 --kvm->stat.lpages; 1556 - rmap_remove(kvm, &pt[i]); 1557 } 1558 } 1559 pt[i] = shadow_trap_nonpresent_pte; ··· 1596 } 1597 1598 static int mmu_zap_unsync_children(struct kvm *kvm, 1599 - struct kvm_mmu_page *parent) 1600 { 1601 int i, zapped = 0; 1602 struct mmu_page_path parents; ··· 1611 struct kvm_mmu_page *sp; 1612 1613 for_each_sp(pages, sp, parents, i) { 1614 - kvm_mmu_zap_page(kvm, sp); 1615 mmu_pages_clear_parents(&parents); 1616 zapped++; 1617 } ··· 1621 return zapped; 1622 } 1623 1624 - static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1625 { 1626 int ret; 1627 1628 - trace_kvm_mmu_zap_page(sp); 1629 ++kvm->stat.mmu_shadow_zapped; 1630 - ret = mmu_zap_unsync_children(kvm, sp); 1631 kvm_mmu_page_unlink_children(kvm, sp); 1632 kvm_mmu_unlink_parents(kvm, sp); 1633 - kvm_flush_remote_tlbs(kvm); 1634 if (!sp->role.invalid && !sp->role.direct) 1635 unaccount_shadowed(kvm, sp->gfn); 1636 if (sp->unsync) 1637 kvm_unlink_unsync_page(kvm, sp); 1638 if (!sp->root_count) { 1639 - hlist_del(&sp->hash_link); 1640 - kvm_mmu_free_page(kvm, sp); 1641 } else { 1642 - sp->role.invalid = 1; 1643 list_move(&sp->link, &kvm->arch.active_mmu_pages); 1644 kvm_reload_remote_mmus(kvm); 1645 } 1646 kvm_mmu_reset_last_pte_updated(kvm); 1647 return ret; 1648 } 1649 1650 /* ··· 1674 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) 1675 { 1676 int used_pages; 1677 1678 used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; 1679 used_pages = max(0, used_pages); ··· 1692 1693 page = container_of(kvm->arch.active_mmu_pages.prev, 1694 struct kvm_mmu_page, link); 1695 - used_pages -= kvm_mmu_zap_page(kvm, page); 1696 - used_pages--; 1697 } 1698 kvm_nr_mmu_pages = used_pages; 1699 kvm->arch.n_free_mmu_pages = 0; 1700 } ··· 1708 1709 static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) 1710 { 1711 - unsigned index; 1712 - struct hlist_head *bucket; 1713 struct kvm_mmu_page *sp; 1714 - struct hlist_node *node, *n; 1715 int r; 1716 1717 pgprintk("%s: looking for gfn %lx\n", __func__, gfn); 1718 r = 0; 1719 - index = kvm_page_table_hashfn(gfn); 1720 - bucket = &kvm->arch.mmu_page_hash[index]; 1721 - restart: 1722 - hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) 1723 - if (sp->gfn == gfn && !sp->role.direct) { 1724 - pgprintk("%s: gfn %lx role %x\n", __func__, gfn, 1725 - sp->role.word); 1726 - r = 1; 1727 - if (kvm_mmu_zap_page(kvm, sp)) 1728 - goto restart; 1729 - } 1730 return r; 1731 } 1732 1733 static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) 1734 { 1735 - unsigned index; 1736 - struct hlist_head *bucket; 1737 struct kvm_mmu_page *sp; 1738 - struct hlist_node *node, *nn; 1739 1740 - index = kvm_page_table_hashfn(gfn); 1741 - bucket = &kvm->arch.mmu_page_hash[index]; 1742 - restart: 1743 - hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { 1744 - if (sp->gfn == gfn && !sp->role.direct 1745 - && !sp->role.invalid) { 1746 - pgprintk("%s: zap %lx %x\n", 1747 - __func__, gfn, sp->role.word); 1748 - if (kvm_mmu_zap_page(kvm, sp)) 1749 - goto restart; 1750 - } 1751 } 1752 } 1753 1754 static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) ··· 1867 } 1868 EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); 1869 1870 - static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1871 { 1872 - unsigned index; 1873 - struct hlist_head *bucket; 1874 - struct kvm_mmu_page *s; 1875 - struct hlist_node *node, *n; 1876 - 1877 - index = kvm_page_table_hashfn(sp->gfn); 1878 - bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 1879 - /* don't unsync if pagetable is shadowed with multiple roles */ 1880 - hlist_for_each_entry_safe(s, node, n, bucket, hash_link) { 1881 - if (s->gfn != sp->gfn || s->role.direct) 1882 - continue; 1883 - if (s->role.word != sp->role.word) 1884 - return 1; 1885 - } 1886 trace_kvm_mmu_unsync_page(sp); 1887 ++vcpu->kvm->stat.mmu_unsync; 1888 sp->unsync = 1; 1889 1890 kvm_mmu_mark_parents_unsync(sp); 1891 - 1892 mmu_convert_notrap(sp); 1893 - return 0; 1894 } 1895 1896 static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, 1897 bool can_unsync) 1898 { 1899 - struct kvm_mmu_page *shadow; 1900 1901 - shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); 1902 - if (shadow) { 1903 - if (shadow->role.level != PT_PAGE_TABLE_LEVEL) 1904 return 1; 1905 - if (shadow->unsync) 1906 - return 0; 1907 - if (can_unsync && oos_shadow) 1908 - return kvm_unsync_page(vcpu, shadow); 1909 - return 1; 1910 } 1911 return 0; 1912 } 1913 ··· 1952 spte |= (u64)pfn << PAGE_SHIFT; 1953 1954 if ((pte_access & ACC_WRITE_MASK) 1955 - || (write_fault && !is_write_protection(vcpu) && !user_fault)) { 1956 1957 if (level > PT_PAGE_TABLE_LEVEL && 1958 has_wrprotected_page(vcpu->kvm, gfn, level)) { 1959 ret = 1; 1960 - spte = shadow_trap_nonpresent_pte; 1961 - goto set_pte; 1962 } 1963 1964 spte |= PT_WRITABLE_MASK; ··· 1990 mark_page_dirty(vcpu->kvm, gfn); 1991 1992 set_pte: 1993 - __set_spte(sptep, spte); 1994 return ret; 1995 } 1996 ··· 2005 bool reset_host_protection) 2006 { 2007 int was_rmapped = 0; 2008 - int was_writable = is_writable_pte(*sptep); 2009 int rmap_count; 2010 2011 pgprintk("%s: spte %llx access %x write_fault %d" ··· 2029 } else if (pfn != spte_to_pfn(*sptep)) { 2030 pgprintk("hfn old %lx new %lx\n", 2031 spte_to_pfn(*sptep), pfn); 2032 - rmap_remove(vcpu->kvm, sptep); 2033 - __set_spte(sptep, shadow_trap_nonpresent_pte); 2034 kvm_flush_remote_tlbs(vcpu->kvm); 2035 } else 2036 was_rmapped = 1; ··· 2040 reset_host_protection)) { 2041 if (write_fault) 2042 *ptwrite = 1; 2043 - kvm_x86_ops->tlb_flush(vcpu); 2044 } 2045 2046 pgprintk("%s: setting spte %llx\n", __func__, *sptep); ··· 2054 page_header_update_slot(vcpu->kvm, sptep, gfn); 2055 if (!was_rmapped) { 2056 rmap_count = rmap_add(vcpu, sptep, gfn); 2057 - kvm_release_pfn_clean(pfn); 2058 if (rmap_count > RMAP_RECYCLE_THRESHOLD) 2059 rmap_recycle(vcpu, sptep, gfn); 2060 - } else { 2061 - if (was_writable) 2062 - kvm_release_pfn_dirty(pfn); 2063 - else 2064 - kvm_release_pfn_clean(pfn); 2065 } 2066 if (speculative) { 2067 vcpu->arch.last_pte_updated = sptep; 2068 vcpu->arch.last_pte_gfn = gfn; ··· 2086 } 2087 2088 if (*iterator.sptep == shadow_trap_nonpresent_pte) { 2089 - pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; 2090 sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, 2091 iterator.level - 1, 2092 1, ACC_ALL, iterator.sptep); ··· 2106 } 2107 } 2108 return pt_write; 2109 } 2110 2111 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) ··· 2154 pfn = gfn_to_pfn(vcpu->kvm, gfn); 2155 2156 /* mmio */ 2157 - if (is_error_pfn(pfn)) { 2158 - kvm_release_pfn_clean(pfn); 2159 - return 1; 2160 - } 2161 2162 spin_lock(&vcpu->kvm->mmu_lock); 2163 if (mmu_notifier_retry(vcpu, mmu_seq)) ··· 2178 { 2179 int i; 2180 struct kvm_mmu_page *sp; 2181 2182 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) 2183 return; ··· 2188 2189 sp = page_header(root); 2190 --sp->root_count; 2191 - if (!sp->root_count && sp->role.invalid) 2192 - kvm_mmu_zap_page(vcpu->kvm, sp); 2193 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 2194 spin_unlock(&vcpu->kvm->mmu_lock); 2195 return; ··· 2204 sp = page_header(root); 2205 --sp->root_count; 2206 if (!sp->root_count && sp->role.invalid) 2207 - kvm_mmu_zap_page(vcpu->kvm, sp); 2208 } 2209 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; 2210 } 2211 spin_unlock(&vcpu->kvm->mmu_lock); 2212 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 2213 } ··· 2219 int ret = 0; 2220 2221 if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { 2222 - set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); 2223 ret = 1; 2224 } 2225 ··· 2247 root_gfn = 0; 2248 } 2249 spin_lock(&vcpu->kvm->mmu_lock); 2250 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 2251 PT64_ROOT_LEVEL, direct, 2252 ACC_ALL, NULL); ··· 2278 root_gfn = i << 30; 2279 } 2280 spin_lock(&vcpu->kvm->mmu_lock); 2281 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 2282 PT32_ROOT_LEVEL, direct, 2283 ACC_ALL, NULL); ··· 2374 mmu_seq = vcpu->kvm->mmu_notifier_seq; 2375 smp_rmb(); 2376 pfn = gfn_to_pfn(vcpu->kvm, gfn); 2377 - if (is_error_pfn(pfn)) { 2378 - kvm_release_pfn_clean(pfn); 2379 - return 1; 2380 - } 2381 spin_lock(&vcpu->kvm->mmu_lock); 2382 if (mmu_notifier_retry(vcpu, mmu_seq)) 2383 goto out_unlock; ··· 2417 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) 2418 { 2419 ++vcpu->stat.tlb_flush; 2420 - kvm_x86_ops->tlb_flush(vcpu); 2421 } 2422 2423 static void paging_new_cr3(struct kvm_vcpu *vcpu) ··· 2631 static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) 2632 { 2633 ASSERT(vcpu); 2634 - if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) { 2635 vcpu->arch.mmu.free(vcpu); 2636 - vcpu->arch.mmu.root_hpa = INVALID_PAGE; 2637 - } 2638 } 2639 2640 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) ··· 2650 r = mmu_topup_memory_caches(vcpu); 2651 if (r) 2652 goto out; 2653 - spin_lock(&vcpu->kvm->mmu_lock); 2654 - kvm_mmu_free_some_pages(vcpu); 2655 - spin_unlock(&vcpu->kvm->mmu_lock); 2656 r = mmu_alloc_roots(vcpu); 2657 spin_lock(&vcpu->kvm->mmu_lock); 2658 mmu_sync_roots(vcpu); ··· 2678 pte = *spte; 2679 if (is_shadow_present_pte(pte)) { 2680 if (is_last_spte(pte, sp->role.level)) 2681 - rmap_remove(vcpu->kvm, spte); 2682 else { 2683 child = page_header(pte & PT64_BASE_ADDR_MASK); 2684 mmu_page_remove_parent_pte(child, spte); ··· 2698 ++vcpu->kvm->stat.mmu_pde_zapped; 2699 return; 2700 } 2701 2702 ++vcpu->kvm->stat.mmu_pte_updated; 2703 if (!sp->role.cr4_pae) ··· 2722 return (old & ~new & PT64_PERM_MASK) != 0; 2723 } 2724 2725 - static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, u64 old, u64 new) 2726 { 2727 - if (need_remote_flush(old, new)) 2728 kvm_flush_remote_tlbs(vcpu->kvm); 2729 - else 2730 kvm_mmu_flush_tlb(vcpu); 2731 } 2732 ··· 2780 bool guest_initiated) 2781 { 2782 gfn_t gfn = gpa >> PAGE_SHIFT; 2783 struct kvm_mmu_page *sp; 2784 - struct hlist_node *node, *n; 2785 - struct hlist_head *bucket; 2786 - unsigned index; 2787 u64 entry, gentry; 2788 u64 *spte; 2789 unsigned offset = offset_in_page(gpa); ··· 2796 int npte; 2797 int r; 2798 int invlpg_counter; 2799 2800 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); 2801 ··· 2854 vcpu->arch.last_pte_updated = NULL; 2855 } 2856 } 2857 - index = kvm_page_table_hashfn(gfn); 2858 - bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 2859 2860 - restart: 2861 - hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { 2862 - if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) 2863 - continue; 2864 pte_size = sp->role.cr4_pae ? 8 : 4; 2865 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); 2866 misaligned |= bytes < 4; ··· 2873 */ 2874 pgprintk("misaligned: gpa %llx bytes %d role %x\n", 2875 gpa, bytes, sp->role.word); 2876 - if (kvm_mmu_zap_page(vcpu->kvm, sp)) 2877 - goto restart; 2878 ++vcpu->kvm->stat.mmu_flooded; 2879 continue; 2880 } ··· 2898 if (quadrant != sp->role.quadrant) 2899 continue; 2900 } 2901 spte = &sp->spt[page_offset / sizeof(*spte)]; 2902 while (npte--) { 2903 entry = *spte; 2904 mmu_pte_write_zap_pte(vcpu, sp, spte); 2905 - if (gentry) 2906 mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); 2907 - mmu_pte_write_flush_tlb(vcpu, entry, *spte); 2908 ++spte; 2909 } 2910 } 2911 kvm_mmu_audit(vcpu, "post pte write"); 2912 spin_unlock(&vcpu->kvm->mmu_lock); 2913 if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { ··· 2941 2942 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) 2943 { 2944 - while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES && 2945 !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { 2946 struct kvm_mmu_page *sp; 2947 2948 sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, 2949 struct kvm_mmu_page, link); 2950 - kvm_mmu_zap_page(vcpu->kvm, sp); 2951 ++vcpu->kvm->stat.mmu_recycled; 2952 } 2953 } 2954 2955 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) ··· 2983 return 1; 2984 case EMULATE_DO_MMIO: 2985 ++vcpu->stat.mmio_exits; 2986 - return 0; 2987 case EMULATE_FAIL: 2988 - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 2989 - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 2990 - vcpu->run->internal.ndata = 0; 2991 return 0; 2992 default: 2993 BUG(); ··· 3081 pt = sp->spt; 3082 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) 3083 /* avoid RMW */ 3084 - if (pt[i] & PT_WRITABLE_MASK) 3085 pt[i] &= ~PT_WRITABLE_MASK; 3086 } 3087 kvm_flush_remote_tlbs(kvm); ··· 3090 void kvm_mmu_zap_all(struct kvm *kvm) 3091 { 3092 struct kvm_mmu_page *sp, *node; 3093 3094 spin_lock(&kvm->mmu_lock); 3095 restart: 3096 list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) 3097 - if (kvm_mmu_zap_page(kvm, sp)) 3098 goto restart; 3099 3100 spin_unlock(&kvm->mmu_lock); 3101 - 3102 - kvm_flush_remote_tlbs(kvm); 3103 } 3104 3105 - static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) 3106 { 3107 struct kvm_mmu_page *page; 3108 3109 page = container_of(kvm->arch.active_mmu_pages.prev, 3110 struct kvm_mmu_page, link); 3111 - return kvm_mmu_zap_page(kvm, page) + 1; 3112 } 3113 3114 static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) ··· 3122 3123 list_for_each_entry(kvm, &vm_list, vm_list) { 3124 int npages, idx, freed_pages; 3125 3126 idx = srcu_read_lock(&kvm->srcu); 3127 spin_lock(&kvm->mmu_lock); ··· 3130 kvm->arch.n_free_mmu_pages; 3131 cache_count += npages; 3132 if (!kvm_freed && nr_to_scan > 0 && npages > 0) { 3133 - freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm); 3134 cache_count -= freed_pages; 3135 kvm_freed = kvm; 3136 } 3137 nr_to_scan--; 3138 3139 spin_unlock(&kvm->mmu_lock); 3140 srcu_read_unlock(&kvm->srcu, idx); 3141 } ··· 3263 3264 static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) 3265 { 3266 - kvm_set_cr3(vcpu, vcpu->arch.cr3); 3267 return 1; 3268 } 3269 ··· 3520 struct kvm_mmu_page *rev_sp; 3521 gfn_t gfn; 3522 3523 - if (*sptep & PT_WRITABLE_MASK) { 3524 rev_sp = page_header(__pa(sptep)); 3525 - gfn = rev_sp->gfns[sptep - rev_sp->spt]; 3526 3527 if (!gfn_to_memslot(kvm, gfn)) { 3528 if (!printk_ratelimit()) ··· 3536 return; 3537 } 3538 3539 - rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], 3540 - rev_sp->role.level); 3541 if (!*rmapp) { 3542 if (!printk_ratelimit()) 3543 return; ··· 3569 3570 if (!(ent & PT_PRESENT_MASK)) 3571 continue; 3572 - if (!(ent & PT_WRITABLE_MASK)) 3573 continue; 3574 inspect_spte_has_rmap(vcpu->kvm, &pt[i]); 3575 } ··· 3597 if (sp->unsync) 3598 continue; 3599 3600 - gfn = unalias_gfn(vcpu->kvm, sp->gfn); 3601 - slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn); 3602 rmapp = &slot->rmap[gfn - slot->base_gfn]; 3603 3604 spte = rmap_next(vcpu->kvm, rmapp, NULL); 3605 while (spte) { 3606 - if (*spte & PT_WRITABLE_MASK) 3607 printk(KERN_ERR "%s: (%s) shadow page has " 3608 "writable mappings: gfn %lx role %x\n", 3609 __func__, audit_msg, sp->gfn,

··· 7 * MMU support 8 * 9 * Copyright (C) 2006 Qumranet, Inc. 10 + * Copyright 2010 Red Hat, Inc. and/or its affilates. 11 * 12 * Authors: 13 * Yaniv Kamay <yaniv@qumranet.com> ··· 32 #include <linux/compiler.h> 33 #include <linux/srcu.h> 34 #include <linux/slab.h> 35 + #include <linux/uaccess.h> 36 37 #include <asm/page.h> 38 #include <asm/cmpxchg.h> ··· 89 90 #define PT_FIRST_AVAIL_BITS_SHIFT 9 91 #define PT64_SECOND_AVAIL_BITS_SHIFT 52 92 93 #define PT64_LEVEL_BITS 9 94 ··· 173 shadow_walk_okay(&(_walker)); \ 174 shadow_walk_next(&(_walker))) 175 176 + typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte); 177 178 static struct kmem_cache *pte_chain_cache; 179 static struct kmem_cache *rmap_desc_cache; ··· 288 #endif 289 } 290 291 + static u64 __xchg_spte(u64 *sptep, u64 new_spte) 292 + { 293 + #ifdef CONFIG_X86_64 294 + return xchg(sptep, new_spte); 295 + #else 296 + u64 old_spte; 297 + 298 + do { 299 + old_spte = *sptep; 300 + } while (cmpxchg64(sptep, old_spte, new_spte) != old_spte); 301 + 302 + return old_spte; 303 + #endif 304 + } 305 + 306 + static void update_spte(u64 *sptep, u64 new_spte) 307 + { 308 + u64 old_spte; 309 + 310 + if (!shadow_accessed_mask || (new_spte & shadow_accessed_mask) || 311 + !is_rmap_spte(*sptep)) 312 + __set_spte(sptep, new_spte); 313 + else { 314 + old_spte = __xchg_spte(sptep, new_spte); 315 + if (old_spte & shadow_accessed_mask) 316 + mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte))); 317 + } 318 + } 319 + 320 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 321 struct kmem_cache *base_cache, int min) 322 { ··· 304 return 0; 305 } 306 307 + static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, 308 + struct kmem_cache *cache) 309 { 310 while (mc->nobjs) 311 + kmem_cache_free(cache, mc->objects[--mc->nobjs]); 312 } 313 314 static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, ··· 355 356 static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) 357 { 358 + mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache, pte_chain_cache); 359 + mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, rmap_desc_cache); 360 mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); 361 + mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache, 362 + mmu_page_header_cache); 363 } 364 365 static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, ··· 379 380 static void mmu_free_pte_chain(struct kvm_pte_chain *pc) 381 { 382 + kmem_cache_free(pte_chain_cache, pc); 383 } 384 385 static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) ··· 390 391 static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) 392 { 393 + kmem_cache_free(rmap_desc_cache, rd); 394 + } 395 + 396 + static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) 397 + { 398 + if (!sp->role.direct) 399 + return sp->gfns[index]; 400 + 401 + return sp->gfn + (index << ((sp->role.level - 1) * PT64_LEVEL_BITS)); 402 + } 403 + 404 + static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) 405 + { 406 + if (sp->role.direct) 407 + BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index)); 408 + else 409 + sp->gfns[index] = gfn; 410 } 411 412 /* ··· 403 { 404 unsigned long idx; 405 406 + idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - 407 + (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); 408 return &slot->lpage_info[level - 2][idx].write_count; 409 } 410 ··· 414 int *write_count; 415 int i; 416 417 + slot = gfn_to_memslot(kvm, gfn); 418 for (i = PT_DIRECTORY_LEVEL; 419 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 420 write_count = slot_largepage_idx(gfn, slot, i); ··· 430 int *write_count; 431 int i; 432 433 + slot = gfn_to_memslot(kvm, gfn); 434 for (i = PT_DIRECTORY_LEVEL; 435 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 436 write_count = slot_largepage_idx(gfn, slot, i); ··· 447 struct kvm_memory_slot *slot; 448 int *largepage_idx; 449 450 + slot = gfn_to_memslot(kvm, gfn); 451 if (slot) { 452 largepage_idx = slot_largepage_idx(gfn, slot, level); 453 return *largepage_idx; ··· 501 502 /* 503 * Take gfn and return the reverse mapping to it. 504 */ 505 506 static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) ··· 513 if (likely(level == PT_PAGE_TABLE_LEVEL)) 514 return &slot->rmap[gfn - slot->base_gfn]; 515 516 + idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - 517 + (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); 518 519 return &slot->lpage_info[level - 2][idx].rmap_pde; 520 } ··· 541 542 if (!is_rmap_spte(*spte)) 543 return count; 544 sp = page_header(__pa(spte)); 545 + kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); 546 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); 547 if (!*rmapp) { 548 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); ··· 600 struct kvm_rmap_desc *desc; 601 struct kvm_rmap_desc *prev_desc; 602 struct kvm_mmu_page *sp; 603 + gfn_t gfn; 604 unsigned long *rmapp; 605 int i; 606 607 sp = page_header(__pa(spte)); 608 + gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); 609 + rmapp = gfn_to_rmap(kvm, gfn, sp->role.level); 610 if (!*rmapp) { 611 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); 612 BUG(); ··· 642 pr_err("rmap_remove: %p %llx many->many\n", spte, *spte); 643 BUG(); 644 } 645 + } 646 + 647 + static void set_spte_track_bits(u64 *sptep, u64 new_spte) 648 + { 649 + pfn_t pfn; 650 + u64 old_spte = *sptep; 651 + 652 + if (!shadow_accessed_mask || !is_shadow_present_pte(old_spte) || 653 + old_spte & shadow_accessed_mask) { 654 + __set_spte(sptep, new_spte); 655 + } else 656 + old_spte = __xchg_spte(sptep, new_spte); 657 + 658 + if (!is_rmap_spte(old_spte)) 659 + return; 660 + pfn = spte_to_pfn(old_spte); 661 + if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) 662 + kvm_set_pfn_accessed(pfn); 663 + if (is_writable_pte(old_spte)) 664 + kvm_set_pfn_dirty(pfn); 665 + } 666 + 667 + static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte) 668 + { 669 + set_spte_track_bits(sptep, new_spte); 670 + rmap_remove(kvm, sptep); 671 } 672 673 static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) ··· 676 u64 *spte; 677 int i, write_protected = 0; 678 679 rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL); 680 681 spte = rmap_next(kvm, rmapp, NULL); ··· 685 BUG_ON(!(*spte & PT_PRESENT_MASK)); 686 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 687 if (is_writable_pte(*spte)) { 688 + update_spte(spte, *spte & ~PT_WRITABLE_MASK); 689 write_protected = 1; 690 } 691 spte = rmap_next(kvm, rmapp, spte); ··· 709 BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); 710 pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); 711 if (is_writable_pte(*spte)) { 712 + drop_spte(kvm, spte, 713 + shadow_trap_nonpresent_pte); 714 --kvm->stat.lpages; 715 spte = NULL; 716 write_protected = 1; 717 } ··· 731 while ((spte = rmap_next(kvm, rmapp, NULL))) { 732 BUG_ON(!(*spte & PT_PRESENT_MASK)); 733 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); 734 + drop_spte(kvm, spte, shadow_trap_nonpresent_pte); 735 need_tlb_flush = 1; 736 } 737 return need_tlb_flush; ··· 754 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); 755 need_flush = 1; 756 if (pte_write(*ptep)) { 757 + drop_spte(kvm, spte, shadow_trap_nonpresent_pte); 758 spte = rmap_next(kvm, rmapp, NULL); 759 } else { 760 new_spte = *spte &~ (PT64_BASE_ADDR_MASK); ··· 763 764 new_spte &= ~PT_WRITABLE_MASK; 765 new_spte &= ~SPTE_HOST_WRITEABLE; 766 + new_spte &= ~shadow_accessed_mask; 767 + set_spte_track_bits(spte, new_spte); 768 spte = rmap_next(kvm, rmapp, spte); 769 } 770 } ··· 799 ret = handler(kvm, &memslot->rmap[gfn_offset], data); 800 801 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { 802 + unsigned long idx; 803 + int sh; 804 + 805 + sh = KVM_HPAGE_GFN_SHIFT(PT_DIRECTORY_LEVEL+j); 806 + idx = ((memslot->base_gfn+gfn_offset) >> sh) - 807 + (memslot->base_gfn >> sh); 808 ret |= handler(kvm, 809 &memslot->lpage_info[j][idx].rmap_pde, 810 data); ··· 863 864 sp = page_header(__pa(spte)); 865 866 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); 867 868 kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); ··· 894 static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) 895 { 896 ASSERT(is_empty_shadow_page(sp->spt)); 897 + hlist_del(&sp->hash_link); 898 list_del(&sp->link); 899 __free_page(virt_to_page(sp->spt)); 900 + if (!sp->role.direct) 901 + __free_page(virt_to_page(sp->gfns)); 902 + kmem_cache_free(mmu_page_header_cache, sp); 903 ++kvm->arch.n_free_mmu_pages; 904 } 905 ··· 907 } 908 909 static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, 910 + u64 *parent_pte, int direct) 911 { 912 struct kvm_mmu_page *sp; 913 914 sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); 915 sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); 916 + if (!direct) 917 + sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, 918 + PAGE_SIZE); 919 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 920 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 921 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); ··· 998 BUG(); 999 } 1000 1001 static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) 1002 { 1003 struct kvm_pte_chain *pte_chain; ··· 1008 1009 if (!sp->multimapped && sp->parent_pte) { 1010 parent_sp = page_header(__pa(sp->parent_pte)); 1011 + fn(parent_sp, sp->parent_pte); 1012 return; 1013 } 1014 1015 hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) 1016 for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { 1017 + u64 *spte = pte_chain->parent_ptes[i]; 1018 + 1019 + if (!spte) 1020 break; 1021 + parent_sp = page_header(__pa(spte)); 1022 + fn(parent_sp, spte); 1023 } 1024 } 1025 1026 + static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte); 1027 static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) 1028 { 1029 + mmu_parent_walk(sp, mark_unsync); 1030 + } 1031 + 1032 + static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte) 1033 + { 1034 + unsigned int index; 1035 + 1036 + index = spte - sp->spt; 1037 + if (__test_and_set_bit(index, sp->unsync_child_bitmap)) 1038 + return; 1039 + if (sp->unsync_children++) 1040 + return; 1041 + kvm_mmu_mark_parents_unsync(sp); 1042 } 1043 1044 static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, ··· 1077 } 1078 1079 static int nonpaging_sync_page(struct kvm_vcpu *vcpu, 1080 + struct kvm_mmu_page *sp, bool clear_unsync) 1081 { 1082 return 1; 1083 } ··· 1123 int i, ret, nr_unsync_leaf = 0; 1124 1125 for_each_unsync_children(sp->unsync_child_bitmap, i) { 1126 + struct kvm_mmu_page *child; 1127 u64 ent = sp->spt[i]; 1128 1129 + if (!is_shadow_present_pte(ent) || is_large_pte(ent)) 1130 + goto clear_child_bitmap; 1131 1132 + child = page_header(ent & PT64_BASE_ADDR_MASK); 1133 1134 + if (child->unsync_children) { 1135 + if (mmu_pages_add(pvec, child, i)) 1136 + return -ENOSPC; 1137 1138 + ret = __mmu_unsync_walk(child, pvec); 1139 + if (!ret) 1140 + goto clear_child_bitmap; 1141 + else if (ret > 0) 1142 + nr_unsync_leaf += ret; 1143 + else 1144 + return ret; 1145 + } else if (child->unsync) { 1146 + nr_unsync_leaf++; 1147 + if (mmu_pages_add(pvec, child, i)) 1148 + return -ENOSPC; 1149 + } else 1150 + goto clear_child_bitmap; 1151 + 1152 + continue; 1153 + 1154 + clear_child_bitmap: 1155 + __clear_bit(i, sp->unsync_child_bitmap); 1156 + sp->unsync_children--; 1157 + WARN_ON((int)sp->unsync_children < 0); 1158 } 1159 1160 1161 return nr_unsync_leaf; 1162 } ··· 1166 return __mmu_unsync_walk(sp, pvec); 1167 } 1168 1169 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1170 { 1171 WARN_ON(!sp->unsync); ··· 1194 --kvm->stat.mmu_unsync; 1195 } 1196 1197 + static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, 1198 + struct list_head *invalid_list); 1199 + static void kvm_mmu_commit_zap_page(struct kvm *kvm, 1200 + struct list_head *invalid_list); 1201 1202 + #define for_each_gfn_sp(kvm, sp, gfn, pos) \ 1203 + hlist_for_each_entry(sp, pos, \ 1204 + &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ 1205 + if ((sp)->gfn != (gfn)) {} else 1206 + 1207 + #define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \ 1208 + hlist_for_each_entry(sp, pos, \ 1209 + &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ 1210 + if ((sp)->gfn != (gfn) || (sp)->role.direct || \ 1211 + (sp)->role.invalid) {} else 1212 + 1213 + /* @sp->gfn should be write-protected at the call site */ 1214 + static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 1215 + struct list_head *invalid_list, bool clear_unsync) 1216 { 1217 if (sp->role.cr4_pae != !!is_pae(vcpu)) { 1218 + kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); 1219 return 1; 1220 } 1221 1222 + if (clear_unsync) 1223 + kvm_unlink_unsync_page(vcpu->kvm, sp); 1224 + 1225 + if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) { 1226 + kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); 1227 return 1; 1228 } 1229 1230 kvm_mmu_flush_tlb(vcpu); 1231 return 0; 1232 + } 1233 + 1234 + static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, 1235 + struct kvm_mmu_page *sp) 1236 + { 1237 + LIST_HEAD(invalid_list); 1238 + int ret; 1239 + 1240 + ret = __kvm_sync_page(vcpu, sp, &invalid_list, false); 1241 + if (ret) 1242 + kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); 1243 + 1244 + return ret; 1245 + } 1246 + 1247 + static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 1248 + struct list_head *invalid_list) 1249 + { 1250 + return __kvm_sync_page(vcpu, sp, invalid_list, true); 1251 + } 1252 + 1253 + /* @gfn should be write-protected at the call site */ 1254 + static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) 1255 + { 1256 + struct kvm_mmu_page *s; 1257 + struct hlist_node *node; 1258 + LIST_HEAD(invalid_list); 1259 + bool flush = false; 1260 + 1261 + for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { 1262 + if (!s->unsync) 1263 + continue; 1264 + 1265 + WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); 1266 + if ((s->role.cr4_pae != !!is_pae(vcpu)) || 1267 + (vcpu->arch.mmu.sync_page(vcpu, s, true))) { 1268 + kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); 1269 + continue; 1270 + } 1271 + kvm_unlink_unsync_page(vcpu->kvm, s); 1272 + flush = true; 1273 + } 1274 + 1275 + kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); 1276 + if (flush) 1277 + kvm_mmu_flush_tlb(vcpu); 1278 } 1279 1280 struct mmu_page_path { ··· 1281 struct kvm_mmu_page *sp; 1282 struct mmu_page_path parents; 1283 struct kvm_mmu_pages pages; 1284 + LIST_HEAD(invalid_list); 1285 1286 kvm_mmu_pages_init(parent, &parents, &pages); 1287 while (mmu_unsync_walk(parent, &pages)) { ··· 1293 kvm_flush_remote_tlbs(vcpu->kvm); 1294 1295 for_each_sp(pages, sp, parents, i) { 1296 + kvm_sync_page(vcpu, sp, &invalid_list); 1297 mmu_pages_clear_parents(&parents); 1298 } 1299 + kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); 1300 cond_resched_lock(&vcpu->kvm->mmu_lock); 1301 kvm_mmu_pages_init(parent, &parents, &pages); 1302 } ··· 1310 u64 *parent_pte) 1311 { 1312 union kvm_mmu_page_role role; 1313 unsigned quadrant; 1314 struct kvm_mmu_page *sp; 1315 + struct hlist_node *node; 1316 + bool need_sync = false; 1317 1318 role = vcpu->arch.mmu.base_role; 1319 role.level = level; ··· 1322 if (role.direct) 1323 role.cr4_pae = 0; 1324 role.access = access; 1325 + if (!tdp_enabled && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { 1326 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); 1327 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; 1328 role.quadrant = quadrant; 1329 } 1330 + for_each_gfn_sp(vcpu->kvm, sp, gfn, node) { 1331 + if (!need_sync && sp->unsync) 1332 + need_sync = true; 1333 1334 + if (sp->role.word != role.word) 1335 + continue; 1336 1337 + if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) 1338 + break; 1339 + 1340 + mmu_page_add_parent_pte(vcpu, sp, parent_pte); 1341 + if (sp->unsync_children) { 1342 + kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); 1343 + kvm_mmu_mark_parents_unsync(sp); 1344 + } else if (sp->unsync) 1345 + kvm_mmu_mark_parents_unsync(sp); 1346 + 1347 + trace_kvm_mmu_get_page(sp, false); 1348 + return sp; 1349 + } 1350 ++vcpu->kvm->stat.mmu_cache_miss; 1351 + sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct); 1352 if (!sp) 1353 return sp; 1354 sp->gfn = gfn; 1355 sp->role = role; 1356 + hlist_add_head(&sp->hash_link, 1357 + &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); 1358 if (!direct) { 1359 if (rmap_write_protect(vcpu->kvm, gfn)) 1360 kvm_flush_remote_tlbs(vcpu->kvm); 1361 + if (level > PT_PAGE_TABLE_LEVEL && need_sync) 1362 + kvm_sync_pages(vcpu, gfn); 1363 + 1364 account_shadowed(vcpu->kvm, gfn); 1365 } 1366 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) ··· 1402 --iterator->level; 1403 } 1404 1405 + static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) 1406 + { 1407 + u64 spte; 1408 + 1409 + spte = __pa(sp->spt) 1410 + | PT_PRESENT_MASK | PT_ACCESSED_MASK 1411 + | PT_WRITABLE_MASK | PT_USER_MASK; 1412 + __set_spte(sptep, spte); 1413 + } 1414 + 1415 + static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) 1416 + { 1417 + if (is_large_pte(*sptep)) { 1418 + drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); 1419 + kvm_flush_remote_tlbs(vcpu->kvm); 1420 + } 1421 + } 1422 + 1423 + static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, 1424 + unsigned direct_access) 1425 + { 1426 + if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) { 1427 + struct kvm_mmu_page *child; 1428 + 1429 + /* 1430 + * For the direct sp, if the guest pte's dirty bit 1431 + * changed form clean to dirty, it will corrupt the 1432 + * sp's access: allow writable in the read-only sp, 1433 + * so we should update the spte at this point to get 1434 + * a new sp with the correct access. 1435 + */ 1436 + child = page_header(*sptep & PT64_BASE_ADDR_MASK); 1437 + if (child->role.access == direct_access) 1438 + return; 1439 + 1440 + mmu_page_remove_parent_pte(child, sptep); 1441 + __set_spte(sptep, shadow_trap_nonpresent_pte); 1442 + kvm_flush_remote_tlbs(vcpu->kvm); 1443 + } 1444 + } 1445 + 1446 static void kvm_mmu_page_unlink_children(struct kvm *kvm, 1447 struct kvm_mmu_page *sp) 1448 { ··· 1422 } else { 1423 if (is_large_pte(ent)) 1424 --kvm->stat.lpages; 1425 + drop_spte(kvm, &pt[i], 1426 + shadow_trap_nonpresent_pte); 1427 } 1428 } 1429 pt[i] = shadow_trap_nonpresent_pte; ··· 1464 } 1465 1466 static int mmu_zap_unsync_children(struct kvm *kvm, 1467 + struct kvm_mmu_page *parent, 1468 + struct list_head *invalid_list) 1469 { 1470 int i, zapped = 0; 1471 struct mmu_page_path parents; ··· 1478 struct kvm_mmu_page *sp; 1479 1480 for_each_sp(pages, sp, parents, i) { 1481 + kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); 1482 mmu_pages_clear_parents(&parents); 1483 zapped++; 1484 } ··· 1488 return zapped; 1489 } 1490 1491 + static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, 1492 + struct list_head *invalid_list) 1493 { 1494 int ret; 1495 1496 + trace_kvm_mmu_prepare_zap_page(sp); 1497 ++kvm->stat.mmu_shadow_zapped; 1498 + ret = mmu_zap_unsync_children(kvm, sp, invalid_list); 1499 kvm_mmu_page_unlink_children(kvm, sp); 1500 kvm_mmu_unlink_parents(kvm, sp); 1501 if (!sp->role.invalid && !sp->role.direct) 1502 unaccount_shadowed(kvm, sp->gfn); 1503 if (sp->unsync) 1504 kvm_unlink_unsync_page(kvm, sp); 1505 if (!sp->root_count) { 1506 + /* Count self */ 1507 + ret++; 1508 + list_move(&sp->link, invalid_list); 1509 } else { 1510 list_move(&sp->link, &kvm->arch.active_mmu_pages); 1511 kvm_reload_remote_mmus(kvm); 1512 } 1513 + 1514 + sp->role.invalid = 1; 1515 kvm_mmu_reset_last_pte_updated(kvm); 1516 return ret; 1517 + } 1518 + 1519 + static void kvm_mmu_commit_zap_page(struct kvm *kvm, 1520 + struct list_head *invalid_list) 1521 + { 1522 + struct kvm_mmu_page *sp; 1523 + 1524 + if (list_empty(invalid_list)) 1525 + return; 1526 + 1527 + kvm_flush_remote_tlbs(kvm); 1528 + 1529 + do { 1530 + sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); 1531 + WARN_ON(!sp->role.invalid || sp->root_count); 1532 + kvm_mmu_free_page(kvm, sp); 1533 + } while (!list_empty(invalid_list)); 1534 + 1535 } 1536 1537 /* ··· 1521 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) 1522 { 1523 int used_pages; 1524 + LIST_HEAD(invalid_list); 1525 1526 used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; 1527 used_pages = max(0, used_pages); ··· 1538 1539 page = container_of(kvm->arch.active_mmu_pages.prev, 1540 struct kvm_mmu_page, link); 1541 + used_pages -= kvm_mmu_prepare_zap_page(kvm, page, 1542 + &invalid_list); 1543 } 1544 + kvm_mmu_commit_zap_page(kvm, &invalid_list); 1545 kvm_nr_mmu_pages = used_pages; 1546 kvm->arch.n_free_mmu_pages = 0; 1547 } ··· 1553 1554 static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) 1555 { 1556 struct kvm_mmu_page *sp; 1557 + struct hlist_node *node; 1558 + LIST_HEAD(invalid_list); 1559 int r; 1560 1561 pgprintk("%s: looking for gfn %lx\n", __func__, gfn); 1562 r = 0; 1563 + 1564 + for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { 1565 + pgprintk("%s: gfn %lx role %x\n", __func__, gfn, 1566 + sp->role.word); 1567 + r = 1; 1568 + kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); 1569 + } 1570 + kvm_mmu_commit_zap_page(kvm, &invalid_list); 1571 return r; 1572 } 1573 1574 static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) 1575 { 1576 struct kvm_mmu_page *sp; 1577 + struct hlist_node *node; 1578 + LIST_HEAD(invalid_list); 1579 1580 + for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { 1581 + pgprintk("%s: zap %lx %x\n", 1582 + __func__, gfn, sp->role.word); 1583 + kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); 1584 } 1585 + kvm_mmu_commit_zap_page(kvm, &invalid_list); 1586 } 1587 1588 static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) ··· 1723 } 1724 EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); 1725 1726 + static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1727 { 1728 trace_kvm_mmu_unsync_page(sp); 1729 ++vcpu->kvm->stat.mmu_unsync; 1730 sp->unsync = 1; 1731 1732 kvm_mmu_mark_parents_unsync(sp); 1733 mmu_convert_notrap(sp); 1734 + } 1735 + 1736 + static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) 1737 + { 1738 + struct kvm_mmu_page *s; 1739 + struct hlist_node *node; 1740 + 1741 + for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { 1742 + if (s->unsync) 1743 + continue; 1744 + WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); 1745 + __kvm_unsync_page(vcpu, s); 1746 + } 1747 } 1748 1749 static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, 1750 bool can_unsync) 1751 { 1752 + struct kvm_mmu_page *s; 1753 + struct hlist_node *node; 1754 + bool need_unsync = false; 1755 1756 + for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { 1757 + if (!can_unsync) 1758 return 1; 1759 + 1760 + if (s->role.level != PT_PAGE_TABLE_LEVEL) 1761 + return 1; 1762 + 1763 + if (!need_unsync && !s->unsync) { 1764 + if (!oos_shadow) 1765 + return 1; 1766 + need_unsync = true; 1767 + } 1768 } 1769 + if (need_unsync) 1770 + kvm_unsync_pages(vcpu, gfn); 1771 return 0; 1772 } 1773 ··· 1804 spte |= (u64)pfn << PAGE_SHIFT; 1805 1806 if ((pte_access & ACC_WRITE_MASK) 1807 + || (!tdp_enabled && write_fault && !is_write_protection(vcpu) 1808 + && !user_fault)) { 1809 1810 if (level > PT_PAGE_TABLE_LEVEL && 1811 has_wrprotected_page(vcpu->kvm, gfn, level)) { 1812 ret = 1; 1813 + drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); 1814 + goto done; 1815 } 1816 1817 spte |= PT_WRITABLE_MASK; ··· 1841 mark_page_dirty(vcpu->kvm, gfn); 1842 1843 set_pte: 1844 + if (is_writable_pte(*sptep) && !is_writable_pte(spte)) 1845 + kvm_set_pfn_dirty(pfn); 1846 + update_spte(sptep, spte); 1847 + done: 1848 return ret; 1849 } 1850 ··· 1853 bool reset_host_protection) 1854 { 1855 int was_rmapped = 0; 1856 int rmap_count; 1857 1858 pgprintk("%s: spte %llx access %x write_fault %d" ··· 1878 } else if (pfn != spte_to_pfn(*sptep)) { 1879 pgprintk("hfn old %lx new %lx\n", 1880 spte_to_pfn(*sptep), pfn); 1881 + drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); 1882 kvm_flush_remote_tlbs(vcpu->kvm); 1883 } else 1884 was_rmapped = 1; ··· 1890 reset_host_protection)) { 1891 if (write_fault) 1892 *ptwrite = 1; 1893 + kvm_mmu_flush_tlb(vcpu); 1894 } 1895 1896 pgprintk("%s: setting spte %llx\n", __func__, *sptep); ··· 1904 page_header_update_slot(vcpu->kvm, sptep, gfn); 1905 if (!was_rmapped) { 1906 rmap_count = rmap_add(vcpu, sptep, gfn); 1907 if (rmap_count > RMAP_RECYCLE_THRESHOLD) 1908 rmap_recycle(vcpu, sptep, gfn); 1909 } 1910 + kvm_release_pfn_clean(pfn); 1911 if (speculative) { 1912 vcpu->arch.last_pte_updated = sptep; 1913 vcpu->arch.last_pte_gfn = gfn; ··· 1941 } 1942 1943 if (*iterator.sptep == shadow_trap_nonpresent_pte) { 1944 + u64 base_addr = iterator.addr; 1945 + 1946 + base_addr &= PT64_LVL_ADDR_MASK(iterator.level); 1947 + pseudo_gfn = base_addr >> PAGE_SHIFT; 1948 sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, 1949 iterator.level - 1, 1950 1, ACC_ALL, iterator.sptep); ··· 1958 } 1959 } 1960 return pt_write; 1961 + } 1962 + 1963 + static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn) 1964 + { 1965 + char buf[1]; 1966 + void __user *hva; 1967 + int r; 1968 + 1969 + /* Touch the page, so send SIGBUS */ 1970 + hva = (void __user *)gfn_to_hva(kvm, gfn); 1971 + r = copy_from_user(buf, hva, 1); 1972 + } 1973 + 1974 + static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn) 1975 + { 1976 + kvm_release_pfn_clean(pfn); 1977 + if (is_hwpoison_pfn(pfn)) { 1978 + kvm_send_hwpoison_signal(kvm, gfn); 1979 + return 0; 1980 + } else if (is_fault_pfn(pfn)) 1981 + return -EFAULT; 1982 + 1983 + return 1; 1984 } 1985 1986 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) ··· 1983 pfn = gfn_to_pfn(vcpu->kvm, gfn); 1984 1985 /* mmio */ 1986 + if (is_error_pfn(pfn)) 1987 + return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); 1988 1989 spin_lock(&vcpu->kvm->mmu_lock); 1990 if (mmu_notifier_retry(vcpu, mmu_seq)) ··· 2009 { 2010 int i; 2011 struct kvm_mmu_page *sp; 2012 + LIST_HEAD(invalid_list); 2013 2014 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) 2015 return; ··· 2018 2019 sp = page_header(root); 2020 --sp->root_count; 2021 + if (!sp->root_count && sp->role.invalid) { 2022 + kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); 2023 + kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); 2024 + } 2025 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 2026 spin_unlock(&vcpu->kvm->mmu_lock); 2027 return; ··· 2032 sp = page_header(root); 2033 --sp->root_count; 2034 if (!sp->root_count && sp->role.invalid) 2035 + kvm_mmu_prepare_zap_page(vcpu->kvm, sp, 2036 + &invalid_list); 2037 } 2038 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; 2039 } 2040 + kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); 2041 spin_unlock(&vcpu->kvm->mmu_lock); 2042 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 2043 } ··· 2045 int ret = 0; 2046 2047 if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { 2048 + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 2049 ret = 1; 2050 } 2051 ··· 2073 root_gfn = 0; 2074 } 2075 spin_lock(&vcpu->kvm->mmu_lock); 2076 + kvm_mmu_free_some_pages(vcpu); 2077 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 2078 PT64_ROOT_LEVEL, direct, 2079 ACC_ALL, NULL); ··· 2103 root_gfn = i << 30; 2104 } 2105 spin_lock(&vcpu->kvm->mmu_lock); 2106 + kvm_mmu_free_some_pages(vcpu); 2107 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 2108 PT32_ROOT_LEVEL, direct, 2109 ACC_ALL, NULL); ··· 2198 mmu_seq = vcpu->kvm->mmu_notifier_seq; 2199 smp_rmb(); 2200 pfn = gfn_to_pfn(vcpu->kvm, gfn); 2201 + if (is_error_pfn(pfn)) 2202 + return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); 2203 spin_lock(&vcpu->kvm->mmu_lock); 2204 if (mmu_notifier_retry(vcpu, mmu_seq)) 2205 goto out_unlock; ··· 2243 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) 2244 { 2245 ++vcpu->stat.tlb_flush; 2246 + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 2247 } 2248 2249 static void paging_new_cr3(struct kvm_vcpu *vcpu) ··· 2457 static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) 2458 { 2459 ASSERT(vcpu); 2460 + if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) 2461 + /* mmu.free() should set root_hpa = INVALID_PAGE */ 2462 vcpu->arch.mmu.free(vcpu); 2463 } 2464 2465 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) ··· 2477 r = mmu_topup_memory_caches(vcpu); 2478 if (r) 2479 goto out; 2480 r = mmu_alloc_roots(vcpu); 2481 spin_lock(&vcpu->kvm->mmu_lock); 2482 mmu_sync_roots(vcpu); ··· 2508 pte = *spte; 2509 if (is_shadow_present_pte(pte)) { 2510 if (is_last_spte(pte, sp->role.level)) 2511 + drop_spte(vcpu->kvm, spte, shadow_trap_nonpresent_pte); 2512 else { 2513 child = page_header(pte & PT64_BASE_ADDR_MASK); 2514 mmu_page_remove_parent_pte(child, spte); ··· 2528 ++vcpu->kvm->stat.mmu_pde_zapped; 2529 return; 2530 } 2531 + 2532 + if (is_rsvd_bits_set(vcpu, *(u64 *)new, PT_PAGE_TABLE_LEVEL)) 2533 + return; 2534 2535 ++vcpu->kvm->stat.mmu_pte_updated; 2536 if (!sp->role.cr4_pae) ··· 2549 return (old & ~new & PT64_PERM_MASK) != 0; 2550 } 2551 2552 + static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, 2553 + bool remote_flush, bool local_flush) 2554 { 2555 + if (zap_page) 2556 + return; 2557 + 2558 + if (remote_flush) 2559 kvm_flush_remote_tlbs(vcpu->kvm); 2560 + else if (local_flush) 2561 kvm_mmu_flush_tlb(vcpu); 2562 } 2563 ··· 2603 bool guest_initiated) 2604 { 2605 gfn_t gfn = gpa >> PAGE_SHIFT; 2606 + union kvm_mmu_page_role mask = { .word = 0 }; 2607 struct kvm_mmu_page *sp; 2608 + struct hlist_node *node; 2609 + LIST_HEAD(invalid_list); 2610 u64 entry, gentry; 2611 u64 *spte; 2612 unsigned offset = offset_in_page(gpa); ··· 2619 int npte; 2620 int r; 2621 int invlpg_counter; 2622 + bool remote_flush, local_flush, zap_page; 2623 + 2624 + zap_page = remote_flush = local_flush = false; 2625 2626 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); 2627 ··· 2674 vcpu->arch.last_pte_updated = NULL; 2675 } 2676 } 2677 2678 + mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; 2679 + for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { 2680 pte_size = sp->role.cr4_pae ? 8 : 4; 2681 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); 2682 misaligned |= bytes < 4; ··· 2697 */ 2698 pgprintk("misaligned: gpa %llx bytes %d role %x\n", 2699 gpa, bytes, sp->role.word); 2700 + zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, 2701 + &invalid_list); 2702 ++vcpu->kvm->stat.mmu_flooded; 2703 continue; 2704 } ··· 2722 if (quadrant != sp->role.quadrant) 2723 continue; 2724 } 2725 + local_flush = true; 2726 spte = &sp->spt[page_offset / sizeof(*spte)]; 2727 while (npte--) { 2728 entry = *spte; 2729 mmu_pte_write_zap_pte(vcpu, sp, spte); 2730 + if (gentry && 2731 + !((sp->role.word ^ vcpu->arch.mmu.base_role.word) 2732 + & mask.word)) 2733 mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); 2734 + if (!remote_flush && need_remote_flush(entry, *spte)) 2735 + remote_flush = true; 2736 ++spte; 2737 } 2738 } 2739 + mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); 2740 + kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); 2741 kvm_mmu_audit(vcpu, "post pte write"); 2742 spin_unlock(&vcpu->kvm->mmu_lock); 2743 if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { ··· 2759 2760 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) 2761 { 2762 + int free_pages; 2763 + LIST_HEAD(invalid_list); 2764 + 2765 + free_pages = vcpu->kvm->arch.n_free_mmu_pages; 2766 + while (free_pages < KVM_REFILL_PAGES && 2767 !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { 2768 struct kvm_mmu_page *sp; 2769 2770 sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, 2771 struct kvm_mmu_page, link); 2772 + free_pages += kvm_mmu_prepare_zap_page(vcpu->kvm, sp, 2773 + &invalid_list); 2774 ++vcpu->kvm->stat.mmu_recycled; 2775 } 2776 + kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); 2777 } 2778 2779 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) ··· 2795 return 1; 2796 case EMULATE_DO_MMIO: 2797 ++vcpu->stat.mmio_exits; 2798 + /* fall through */ 2799 case EMULATE_FAIL: 2800 return 0; 2801 default: 2802 BUG(); ··· 2896 pt = sp->spt; 2897 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) 2898 /* avoid RMW */ 2899 + if (is_writable_pte(pt[i])) 2900 pt[i] &= ~PT_WRITABLE_MASK; 2901 } 2902 kvm_flush_remote_tlbs(kvm); ··· 2905 void kvm_mmu_zap_all(struct kvm *kvm) 2906 { 2907 struct kvm_mmu_page *sp, *node; 2908 + LIST_HEAD(invalid_list); 2909 2910 spin_lock(&kvm->mmu_lock); 2911 restart: 2912 list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) 2913 + if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) 2914 goto restart; 2915 2916 + kvm_mmu_commit_zap_page(kvm, &invalid_list); 2917 spin_unlock(&kvm->mmu_lock); 2918 } 2919 2920 + static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, 2921 + struct list_head *invalid_list) 2922 { 2923 struct kvm_mmu_page *page; 2924 2925 page = container_of(kvm->arch.active_mmu_pages.prev, 2926 struct kvm_mmu_page, link); 2927 + return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); 2928 } 2929 2930 static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) ··· 2936 2937 list_for_each_entry(kvm, &vm_list, vm_list) { 2938 int npages, idx, freed_pages; 2939 + LIST_HEAD(invalid_list); 2940 2941 idx = srcu_read_lock(&kvm->srcu); 2942 spin_lock(&kvm->mmu_lock); ··· 2943 kvm->arch.n_free_mmu_pages; 2944 cache_count += npages; 2945 if (!kvm_freed && nr_to_scan > 0 && npages > 0) { 2946 + freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm, 2947 + &invalid_list); 2948 cache_count -= freed_pages; 2949 kvm_freed = kvm; 2950 } 2951 nr_to_scan--; 2952 2953 + kvm_mmu_commit_zap_page(kvm, &invalid_list); 2954 spin_unlock(&kvm->mmu_lock); 2955 srcu_read_unlock(&kvm->srcu, idx); 2956 } ··· 3074 3075 static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) 3076 { 3077 + (void)kvm_set_cr3(vcpu, vcpu->arch.cr3); 3078 return 1; 3079 } 3080 ··· 3331 struct kvm_mmu_page *rev_sp; 3332 gfn_t gfn; 3333 3334 + if (is_writable_pte(*sptep)) { 3335 rev_sp = page_header(__pa(sptep)); 3336 + gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); 3337 3338 if (!gfn_to_memslot(kvm, gfn)) { 3339 if (!printk_ratelimit()) ··· 3347 return; 3348 } 3349 3350 + rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level); 3351 if (!*rmapp) { 3352 if (!printk_ratelimit()) 3353 return; ··· 3381 3382 if (!(ent & PT_PRESENT_MASK)) 3383 continue; 3384 + if (!is_writable_pte(ent)) 3385 continue; 3386 inspect_spte_has_rmap(vcpu->kvm, &pt[i]); 3387 } ··· 3409 if (sp->unsync) 3410 continue; 3411 3412 + slot = gfn_to_memslot(vcpu->kvm, sp->gfn); 3413 rmapp = &slot->rmap[gfn - slot->base_gfn]; 3414 3415 spte = rmap_next(vcpu->kvm, rmapp, NULL); 3416 while (spte) { 3417 + if (is_writable_pte(*spte)) 3418 printk(KERN_ERR "%s: (%s) shadow page has " 3419 "writable mappings: gfn %lx role %x\n", 3420 __func__, audit_msg, sp->gfn,

+1 -1

arch/x86/kvm/mmutrace.h

··· 190 TP_ARGS(sp) 191 ); 192 193 - DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_zap_page, 194 TP_PROTO(struct kvm_mmu_page *sp), 195 196 TP_ARGS(sp)

··· 190 TP_ARGS(sp) 191 ); 192 193 + DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page, 194 TP_PROTO(struct kvm_mmu_page *sp), 195 196 TP_ARGS(sp)

+147 -109

arch/x86/kvm/paging_tmpl.h

··· 7 * MMU support 8 * 9 * Copyright (C) 2006 Qumranet, Inc. 10 * 11 * Authors: 12 * Yaniv Kamay <yaniv@qumranet.com> ··· 119 { 120 pt_element_t pte; 121 gfn_t table_gfn; 122 - unsigned index, pt_access, pte_access; 123 gpa_t pte_gpa; 124 - int rsvd_fault = 0; 125 126 trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, 127 fetch_fault); 128 walk: 129 walker->level = vcpu->arch.mmu.root_level; 130 pte = vcpu->arch.cr3; 131 #if PTTYPE == 64 132 if (!is_long_mode(vcpu)) { 133 pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); 134 trace_kvm_mmu_paging_element(pte, walker->level); 135 - if (!is_present_gpte(pte)) 136 - goto not_present; 137 --walker->level; 138 } 139 #endif ··· 155 walker->table_gfn[walker->level - 1] = table_gfn; 156 walker->pte_gpa[walker->level - 1] = pte_gpa; 157 158 - if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) 159 - goto not_present; 160 161 trace_kvm_mmu_paging_element(pte, walker->level); 162 163 - if (!is_present_gpte(pte)) 164 - goto not_present; 165 166 - rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level); 167 - if (rsvd_fault) 168 - goto access_error; 169 170 if (write_fault && !is_writable_pte(pte)) 171 if (user_fault || is_write_protection(vcpu)) 172 - goto access_error; 173 174 if (user_fault && !(pte & PT_USER_MASK)) 175 - goto access_error; 176 177 #if PTTYPE == 64 178 if (fetch_fault && (pte & PT64_NX_MASK)) 179 - goto access_error; 180 #endif 181 182 - if (!(pte & PT_ACCESSED_MASK)) { 183 trace_kvm_mmu_set_accessed_bit(table_gfn, index, 184 sizeof(pte)); 185 - mark_page_dirty(vcpu->kvm, table_gfn); 186 if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, 187 index, pte, pte|PT_ACCESSED_MASK)) 188 goto walk; 189 pte |= PT_ACCESSED_MASK; 190 } 191 ··· 223 --walker->level; 224 } 225 226 if (write_fault && !is_dirty_gpte(pte)) { 227 bool ret; 228 229 trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); 230 - mark_page_dirty(vcpu->kvm, table_gfn); 231 ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, 232 pte|PT_DIRTY_MASK); 233 if (ret) 234 goto walk; 235 pte |= PT_DIRTY_MASK; 236 walker->ptes[walker->level - 1] = pte; 237 } ··· 242 walker->pt_access = pt_access; 243 walker->pte_access = pte_access; 244 pgprintk("%s: pte %llx pte_access %x pt_access %x\n", 245 - __func__, (u64)pte, pt_access, pte_access); 246 return 1; 247 248 - not_present: 249 walker->error_code = 0; 250 - goto err; 251 - 252 - access_error: 253 - walker->error_code = PFERR_PRESENT_MASK; 254 - 255 - err: 256 if (write_fault) 257 walker->error_code |= PFERR_WRITE_MASK; 258 if (user_fault) 259 walker->error_code |= PFERR_USER_MASK; 260 - if (fetch_fault) 261 walker->error_code |= PFERR_FETCH_MASK; 262 if (rsvd_fault) 263 walker->error_code |= PFERR_RSVD_MASK; ··· 261 return 0; 262 } 263 264 - static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, 265 u64 *spte, const void *pte) 266 { 267 pt_element_t gpte; ··· 272 gpte = *(const pt_element_t *)pte; 273 if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { 274 if (!is_present_gpte(gpte)) { 275 - if (page->unsync) 276 new_spte = shadow_trap_nonpresent_pte; 277 else 278 new_spte = shadow_notrap_nonpresent_pte; ··· 281 return; 282 } 283 pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); 284 - pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); 285 if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) 286 return; 287 pfn = vcpu->arch.update_pte.pfn; ··· 294 * we call mmu_set_spte() with reset_host_protection = true beacuse that 295 * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). 296 */ 297 - mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 298 - gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, 299 gpte_to_gfn(gpte), pfn, true, true); 300 } 301 302 /* ··· 319 int *ptwrite, pfn_t pfn) 320 { 321 unsigned access = gw->pt_access; 322 - struct kvm_mmu_page *shadow_page; 323 - u64 spte, *sptep = NULL; 324 - int direct; 325 - gfn_t table_gfn; 326 - int r; 327 - int level; 328 - pt_element_t curr_pte; 329 - struct kvm_shadow_walk_iterator iterator; 330 331 if (!is_present_gpte(gw->ptes[gw->level - 1])) 332 return NULL; 333 334 - for_each_shadow_entry(vcpu, addr, iterator) { 335 - level = iterator.level; 336 - sptep = iterator.sptep; 337 - if (iterator.level == hlevel) { 338 - mmu_set_spte(vcpu, sptep, access, 339 - gw->pte_access & access, 340 - user_fault, write_fault, 341 - gw->ptes[gw->level-1] & PT_DIRTY_MASK, 342 - ptwrite, level, 343 - gw->gfn, pfn, false, true); 344 - break; 345 } 346 347 - if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) 348 - continue; 349 350 - if (is_large_pte(*sptep)) { 351 - rmap_remove(vcpu->kvm, sptep); 352 - __set_spte(sptep, shadow_trap_nonpresent_pte); 353 - kvm_flush_remote_tlbs(vcpu->kvm); 354 - } 355 - 356 - if (level <= gw->level) { 357 - int delta = level - gw->level + 1; 358 - direct = 1; 359 - if (!is_dirty_gpte(gw->ptes[level - delta])) 360 - access &= ~ACC_WRITE_MASK; 361 - table_gfn = gpte_to_gfn(gw->ptes[level - delta]); 362 - /* advance table_gfn when emulating 1gb pages with 4k */ 363 - if (delta == 0) 364 - table_gfn += PT_INDEX(addr, level); 365 - access &= gw->pte_access; 366 - } else { 367 - direct = 0; 368 - table_gfn = gw->table_gfn[level - 2]; 369 - } 370 - shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, 371 - direct, access, sptep); 372 - if (!direct) { 373 - r = kvm_read_guest_atomic(vcpu->kvm, 374 - gw->pte_gpa[level - 2], 375 - &curr_pte, sizeof(curr_pte)); 376 - if (r || curr_pte != gw->ptes[level - 2]) { 377 - kvm_mmu_put_page(shadow_page, sptep); 378 - kvm_release_pfn_clean(pfn); 379 - sptep = NULL; 380 - break; 381 - } 382 - } 383 - 384 - spte = __pa(shadow_page->spt) 385 - | PT_PRESENT_MASK | PT_ACCESSED_MASK 386 - | PT_WRITABLE_MASK | PT_USER_MASK; 387 - *sptep = spte; 388 } 389 390 - return sptep; 391 } 392 393 /* ··· 462 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); 463 464 /* mmio */ 465 - if (is_error_pfn(pfn)) { 466 - pgprintk("gfn %lx is mmio\n", walker.gfn); 467 - kvm_release_pfn_clean(pfn); 468 - return 1; 469 - } 470 471 spin_lock(&vcpu->kvm->mmu_lock); 472 if (mmu_notifier_retry(vcpu, mmu_seq)) ··· 471 kvm_mmu_free_some_pages(vcpu); 472 sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 473 level, &write_pt, pfn); 474 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, 475 sptep, *sptep, write_pt); 476 ··· 493 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) 494 { 495 struct kvm_shadow_walk_iterator iterator; 496 gpa_t pte_gpa = -1; 497 int level; 498 u64 *sptep; ··· 505 level = iterator.level; 506 sptep = iterator.sptep; 507 508 if (is_last_spte(*sptep, level)) { 509 - struct kvm_mmu_page *sp = page_header(__pa(sptep)); 510 int offset, shift; 511 512 shift = PAGE_SHIFT - 513 (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; ··· 520 pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); 521 522 if (is_shadow_present_pte(*sptep)) { 523 - rmap_remove(vcpu->kvm, sptep); 524 if (is_large_pte(*sptep)) 525 --vcpu->kvm->stat.lpages; 526 need_flush = 1; 527 - } 528 - __set_spte(sptep, shadow_trap_nonpresent_pte); 529 break; 530 } 531 532 - if (!is_shadow_present_pte(*sptep)) 533 break; 534 } 535 ··· 604 * Using the cached information from sp->gfns is safe because: 605 * - The spte has a reference to the struct page, so the pfn for a given gfn 606 * can't change unless all sptes pointing to it are nuked first. 607 - * - Alias changes zap the entire shadow cache. 608 */ 609 - static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 610 { 611 int i, offset, nr_present; 612 bool reset_host_protection; 613 gpa_t first_pte_gpa; 614 615 offset = nr_present = 0; 616 617 if (PTTYPE == 32) 618 offset = sp->role.quadrant << PT64_LEVEL_BITS; ··· 626 unsigned pte_access; 627 pt_element_t gpte; 628 gpa_t pte_gpa; 629 - gfn_t gfn = sp->gfns[i]; 630 631 if (!is_shadow_present_pte(sp->spt[i])) 632 continue; ··· 637 sizeof(pt_element_t))) 638 return -EINVAL; 639 640 - if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) || 641 - !(gpte & PT_ACCESSED_MASK)) { 642 u64 nonpresent; 643 644 - rmap_remove(vcpu->kvm, &sp->spt[i]); 645 - if (is_present_gpte(gpte)) 646 nonpresent = shadow_trap_nonpresent_pte; 647 else 648 nonpresent = shadow_notrap_nonpresent_pte; 649 - __set_spte(&sp->spt[i], nonpresent); 650 continue; 651 } 652

··· 7 * MMU support 8 * 9 * Copyright (C) 2006 Qumranet, Inc. 10 + * Copyright 2010 Red Hat, Inc. and/or its affilates. 11 * 12 * Authors: 13 * Yaniv Kamay <yaniv@qumranet.com> ··· 118 { 119 pt_element_t pte; 120 gfn_t table_gfn; 121 + unsigned index, pt_access, uninitialized_var(pte_access); 122 gpa_t pte_gpa; 123 + bool eperm, present, rsvd_fault; 124 125 trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, 126 fetch_fault); 127 walk: 128 + present = true; 129 + eperm = rsvd_fault = false; 130 walker->level = vcpu->arch.mmu.root_level; 131 pte = vcpu->arch.cr3; 132 #if PTTYPE == 64 133 if (!is_long_mode(vcpu)) { 134 pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); 135 trace_kvm_mmu_paging_element(pte, walker->level); 136 + if (!is_present_gpte(pte)) { 137 + present = false; 138 + goto error; 139 + } 140 --walker->level; 141 } 142 #endif ··· 150 walker->table_gfn[walker->level - 1] = table_gfn; 151 walker->pte_gpa[walker->level - 1] = pte_gpa; 152 153 + if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) { 154 + present = false; 155 + break; 156 + } 157 158 trace_kvm_mmu_paging_element(pte, walker->level); 159 160 + if (!is_present_gpte(pte)) { 161 + present = false; 162 + break; 163 + } 164 165 + if (is_rsvd_bits_set(vcpu, pte, walker->level)) { 166 + rsvd_fault = true; 167 + break; 168 + } 169 170 if (write_fault && !is_writable_pte(pte)) 171 if (user_fault || is_write_protection(vcpu)) 172 + eperm = true; 173 174 if (user_fault && !(pte & PT_USER_MASK)) 175 + eperm = true; 176 177 #if PTTYPE == 64 178 if (fetch_fault && (pte & PT64_NX_MASK)) 179 + eperm = true; 180 #endif 181 182 + if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { 183 trace_kvm_mmu_set_accessed_bit(table_gfn, index, 184 sizeof(pte)); 185 if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, 186 index, pte, pte|PT_ACCESSED_MASK)) 187 goto walk; 188 + mark_page_dirty(vcpu->kvm, table_gfn); 189 pte |= PT_ACCESSED_MASK; 190 } 191 ··· 213 --walker->level; 214 } 215 216 + if (!present || eperm || rsvd_fault) 217 + goto error; 218 + 219 if (write_fault && !is_dirty_gpte(pte)) { 220 bool ret; 221 222 trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); 223 ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, 224 pte|PT_DIRTY_MASK); 225 if (ret) 226 goto walk; 227 + mark_page_dirty(vcpu->kvm, table_gfn); 228 pte |= PT_DIRTY_MASK; 229 walker->ptes[walker->level - 1] = pte; 230 } ··· 229 walker->pt_access = pt_access; 230 walker->pte_access = pte_access; 231 pgprintk("%s: pte %llx pte_access %x pt_access %x\n", 232 + __func__, (u64)pte, pte_access, pt_access); 233 return 1; 234 235 + error: 236 walker->error_code = 0; 237 + if (present) 238 + walker->error_code |= PFERR_PRESENT_MASK; 239 if (write_fault) 240 walker->error_code |= PFERR_WRITE_MASK; 241 if (user_fault) 242 walker->error_code |= PFERR_USER_MASK; 243 + if (fetch_fault && is_nx(vcpu)) 244 walker->error_code |= PFERR_FETCH_MASK; 245 if (rsvd_fault) 246 walker->error_code |= PFERR_RSVD_MASK; ··· 252 return 0; 253 } 254 255 + static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 256 u64 *spte, const void *pte) 257 { 258 pt_element_t gpte; ··· 263 gpte = *(const pt_element_t *)pte; 264 if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { 265 if (!is_present_gpte(gpte)) { 266 + if (sp->unsync) 267 new_spte = shadow_trap_nonpresent_pte; 268 else 269 new_spte = shadow_notrap_nonpresent_pte; ··· 272 return; 273 } 274 pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); 275 + pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); 276 if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) 277 return; 278 pfn = vcpu->arch.update_pte.pfn; ··· 285 * we call mmu_set_spte() with reset_host_protection = true beacuse that 286 * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). 287 */ 288 + mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, 289 + is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL, 290 gpte_to_gfn(gpte), pfn, true, true); 291 + } 292 + 293 + static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, 294 + struct guest_walker *gw, int level) 295 + { 296 + int r; 297 + pt_element_t curr_pte; 298 + 299 + r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 1], 300 + &curr_pte, sizeof(curr_pte)); 301 + return r || curr_pte != gw->ptes[level - 1]; 302 } 303 304 /* ··· 299 int *ptwrite, pfn_t pfn) 300 { 301 unsigned access = gw->pt_access; 302 + struct kvm_mmu_page *sp = NULL; 303 + bool dirty = is_dirty_gpte(gw->ptes[gw->level - 1]); 304 + int top_level; 305 + unsigned direct_access; 306 + struct kvm_shadow_walk_iterator it; 307 308 if (!is_present_gpte(gw->ptes[gw->level - 1])) 309 return NULL; 310 311 + direct_access = gw->pt_access & gw->pte_access; 312 + if (!dirty) 313 + direct_access &= ~ACC_WRITE_MASK; 314 + 315 + top_level = vcpu->arch.mmu.root_level; 316 + if (top_level == PT32E_ROOT_LEVEL) 317 + top_level = PT32_ROOT_LEVEL; 318 + /* 319 + * Verify that the top-level gpte is still there. Since the page 320 + * is a root page, it is either write protected (and cannot be 321 + * changed from now on) or it is invalid (in which case, we don't 322 + * really care if it changes underneath us after this point). 323 + */ 324 + if (FNAME(gpte_changed)(vcpu, gw, top_level)) 325 + goto out_gpte_changed; 326 + 327 + for (shadow_walk_init(&it, vcpu, addr); 328 + shadow_walk_okay(&it) && it.level > gw->level; 329 + shadow_walk_next(&it)) { 330 + gfn_t table_gfn; 331 + 332 + drop_large_spte(vcpu, it.sptep); 333 + 334 + sp = NULL; 335 + if (!is_shadow_present_pte(*it.sptep)) { 336 + table_gfn = gw->table_gfn[it.level - 2]; 337 + sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, 338 + false, access, it.sptep); 339 } 340 341 + /* 342 + * Verify that the gpte in the page we've just write 343 + * protected is still there. 344 + */ 345 + if (FNAME(gpte_changed)(vcpu, gw, it.level - 1)) 346 + goto out_gpte_changed; 347 348 + if (sp) 349 + link_shadow_page(it.sptep, sp); 350 } 351 352 + for (; 353 + shadow_walk_okay(&it) && it.level > hlevel; 354 + shadow_walk_next(&it)) { 355 + gfn_t direct_gfn; 356 + 357 + validate_direct_spte(vcpu, it.sptep, direct_access); 358 + 359 + drop_large_spte(vcpu, it.sptep); 360 + 361 + if (is_shadow_present_pte(*it.sptep)) 362 + continue; 363 + 364 + direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); 365 + 366 + sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, 367 + true, direct_access, it.sptep); 368 + link_shadow_page(it.sptep, sp); 369 + } 370 + 371 + mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, 372 + user_fault, write_fault, dirty, ptwrite, it.level, 373 + gw->gfn, pfn, false, true); 374 + 375 + return it.sptep; 376 + 377 + out_gpte_changed: 378 + if (sp) 379 + kvm_mmu_put_page(sp, it.sptep); 380 + kvm_release_pfn_clean(pfn); 381 + return NULL; 382 } 383 384 /* ··· 431 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); 432 433 /* mmio */ 434 + if (is_error_pfn(pfn)) 435 + return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn); 436 437 spin_lock(&vcpu->kvm->mmu_lock); 438 if (mmu_notifier_retry(vcpu, mmu_seq)) ··· 443 kvm_mmu_free_some_pages(vcpu); 444 sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 445 level, &write_pt, pfn); 446 + (void)sptep; 447 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, 448 sptep, *sptep, write_pt); 449 ··· 464 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) 465 { 466 struct kvm_shadow_walk_iterator iterator; 467 + struct kvm_mmu_page *sp; 468 gpa_t pte_gpa = -1; 469 int level; 470 u64 *sptep; ··· 475 level = iterator.level; 476 sptep = iterator.sptep; 477 478 + sp = page_header(__pa(sptep)); 479 if (is_last_spte(*sptep, level)) { 480 int offset, shift; 481 + 482 + if (!sp->unsync) 483 + break; 484 485 shift = PAGE_SHIFT - 486 (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; ··· 487 pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); 488 489 if (is_shadow_present_pte(*sptep)) { 490 if (is_large_pte(*sptep)) 491 --vcpu->kvm->stat.lpages; 492 + drop_spte(vcpu->kvm, sptep, 493 + shadow_trap_nonpresent_pte); 494 need_flush = 1; 495 + } else 496 + __set_spte(sptep, shadow_trap_nonpresent_pte); 497 break; 498 } 499 500 + if (!is_shadow_present_pte(*sptep) || !sp->unsync_children) 501 break; 502 } 503 ··· 570 * Using the cached information from sp->gfns is safe because: 571 * - The spte has a reference to the struct page, so the pfn for a given gfn 572 * can't change unless all sptes pointing to it are nuked first. 573 */ 574 + static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 575 + bool clear_unsync) 576 { 577 int i, offset, nr_present; 578 bool reset_host_protection; 579 gpa_t first_pte_gpa; 580 581 offset = nr_present = 0; 582 + 583 + /* direct kvm_mmu_page can not be unsync. */ 584 + BUG_ON(sp->role.direct); 585 586 if (PTTYPE == 32) 587 offset = sp->role.quadrant << PT64_LEVEL_BITS; ··· 589 unsigned pte_access; 590 pt_element_t gpte; 591 gpa_t pte_gpa; 592 + gfn_t gfn; 593 594 if (!is_shadow_present_pte(sp->spt[i])) 595 continue; ··· 600 sizeof(pt_element_t))) 601 return -EINVAL; 602 603 + gfn = gpte_to_gfn(gpte); 604 + if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL) 605 + || gfn != sp->gfns[i] || !is_present_gpte(gpte) 606 + || !(gpte & PT_ACCESSED_MASK)) { 607 u64 nonpresent; 608 609 + if (is_present_gpte(gpte) || !clear_unsync) 610 nonpresent = shadow_trap_nonpresent_pte; 611 else 612 nonpresent = shadow_notrap_nonpresent_pte; 613 + drop_spte(vcpu->kvm, &sp->spt[i], nonpresent); 614 continue; 615 } 616

+121 -17

arch/x86/kvm/svm.c

··· 4 * AMD SVM support 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 * 8 * Authors: 9 * Yaniv Kamay <yaniv@qumranet.com> ··· 286 287 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) 288 { 289 if (!npt_enabled && !(efer & EFER_LMA)) 290 efer &= ~EFER_LME; 291 292 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; 293 - vcpu->arch.efer = efer; 294 } 295 296 static int is_external_interrupt(u32 info) ··· 641 642 if (nested) { 643 printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); 644 - kvm_enable_efer_bits(EFER_SVME); 645 } 646 647 for_each_possible_cpu(cpu) { ··· 807 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. 808 */ 809 svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; 810 - kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); 811 812 save->cr4 = X86_CR4_PAE; 813 /* rdx = ?? */ ··· 904 svm->asid_generation = 0; 905 init_vmcb(svm); 906 907 - fx_init(&svm->vcpu); 908 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 909 if (kvm_vcpu_is_bsp(&svm->vcpu)) 910 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; 911 912 return &svm->vcpu; 913 914 free_page3: 915 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); 916 free_page2: ··· 1494 */ 1495 pr_err("KVM: Guest triggered AMD Erratum 383\n"); 1496 1497 - set_bit(KVM_REQ_TRIPLE_FAULT, &svm->vcpu.requests); 1498 1499 return; 1500 } ··· 1541 string = (io_info & SVM_IOIO_STR_MASK) != 0; 1542 in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 1543 if (string || in) 1544 - return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); 1545 1546 port = io_info >> 16; 1547 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; ··· 1963 svm->vmcb->save.cr3 = hsave->save.cr3; 1964 svm->vcpu.arch.cr3 = hsave->save.cr3; 1965 } else { 1966 - kvm_set_cr3(&svm->vcpu, hsave->save.cr3); 1967 } 1968 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); 1969 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); ··· 2086 svm->vmcb->save.cr3 = nested_vmcb->save.cr3; 2087 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; 2088 } else 2089 - kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); 2090 2091 /* Guest paging mode is active - reset mmu */ 2092 kvm_mmu_reset_context(&svm->vcpu); ··· 2392 2393 static int invlpg_interception(struct vcpu_svm *svm) 2394 { 2395 - if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) 2396 - pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); 2397 - return 1; 2398 } 2399 2400 static int emulate_on_interception(struct vcpu_svm *svm) 2401 { 2402 - if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) 2403 - pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); 2404 - return 1; 2405 } 2406 2407 static int cr8_write_interception(struct vcpu_svm *svm) ··· 2728 [SVM_EXIT_NPF] = pf_interception, 2729 }; 2730 2731 static int handle_exit(struct kvm_vcpu *vcpu) 2732 { 2733 struct vcpu_svm *svm = to_svm(vcpu); ··· 2865 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2866 kvm_run->fail_entry.hardware_entry_failure_reason 2867 = svm->vmcb->control.exit_code; 2868 return 0; 2869 } 2870 ··· 2923 { 2924 struct vmcb_control_area *control; 2925 2926 - trace_kvm_inj_virq(irq); 2927 - 2928 - ++svm->vcpu.stat.irq_injections; 2929 control = &svm->vmcb->control; 2930 control->int_vector = irq; 2931 control->int_ctl &= ~V_INTR_PRIO_MASK; ··· 2935 struct vcpu_svm *svm = to_svm(vcpu); 2936 2937 BUG_ON(!(gif_set(svm))); 2938 2939 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr | 2940 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; ··· 3424 return false; 3425 } 3426 3427 static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) 3428 { 3429 struct vcpu_svm *svm = to_svm(vcpu); ··· 3513 .rdtscp_supported = svm_rdtscp_supported, 3514 3515 .set_supported_cpuid = svm_set_supported_cpuid, 3516 }; 3517 3518 static int __init svm_init(void)

··· 4 * AMD SVM support 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 + * Copyright 2010 Red Hat, Inc. and/or its affilates. 8 * 9 * Authors: 10 * Yaniv Kamay <yaniv@qumranet.com> ··· 285 286 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) 287 { 288 + vcpu->arch.efer = efer; 289 if (!npt_enabled && !(efer & EFER_LMA)) 290 efer &= ~EFER_LME; 291 292 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; 293 } 294 295 static int is_external_interrupt(u32 info) ··· 640 641 if (nested) { 642 printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); 643 + kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); 644 } 645 646 for_each_possible_cpu(cpu) { ··· 806 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. 807 */ 808 svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; 809 + (void)kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); 810 811 save->cr4 = X86_CR4_PAE; 812 /* rdx = ?? */ ··· 903 svm->asid_generation = 0; 904 init_vmcb(svm); 905 906 + err = fx_init(&svm->vcpu); 907 + if (err) 908 + goto free_page4; 909 + 910 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 911 if (kvm_vcpu_is_bsp(&svm->vcpu)) 912 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; 913 914 return &svm->vcpu; 915 916 + free_page4: 917 + __free_page(hsave_page); 918 free_page3: 919 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); 920 free_page2: ··· 1488 */ 1489 pr_err("KVM: Guest triggered AMD Erratum 383\n"); 1490 1491 + kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu); 1492 1493 return; 1494 } ··· 1535 string = (io_info & SVM_IOIO_STR_MASK) != 0; 1536 in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 1537 if (string || in) 1538 + return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; 1539 1540 port = io_info >> 16; 1541 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; ··· 1957 svm->vmcb->save.cr3 = hsave->save.cr3; 1958 svm->vcpu.arch.cr3 = hsave->save.cr3; 1959 } else { 1960 + (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3); 1961 } 1962 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); 1963 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); ··· 2080 svm->vmcb->save.cr3 = nested_vmcb->save.cr3; 2081 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; 2082 } else 2083 + (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); 2084 2085 /* Guest paging mode is active - reset mmu */ 2086 kvm_mmu_reset_context(&svm->vcpu); ··· 2386 2387 static int invlpg_interception(struct vcpu_svm *svm) 2388 { 2389 + return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; 2390 } 2391 2392 static int emulate_on_interception(struct vcpu_svm *svm) 2393 { 2394 + return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; 2395 } 2396 2397 static int cr8_write_interception(struct vcpu_svm *svm) ··· 2726 [SVM_EXIT_NPF] = pf_interception, 2727 }; 2728 2729 + void dump_vmcb(struct kvm_vcpu *vcpu) 2730 + { 2731 + struct vcpu_svm *svm = to_svm(vcpu); 2732 + struct vmcb_control_area *control = &svm->vmcb->control; 2733 + struct vmcb_save_area *save = &svm->vmcb->save; 2734 + 2735 + pr_err("VMCB Control Area:\n"); 2736 + pr_err("cr_read: %04x\n", control->intercept_cr_read); 2737 + pr_err("cr_write: %04x\n", control->intercept_cr_write); 2738 + pr_err("dr_read: %04x\n", control->intercept_dr_read); 2739 + pr_err("dr_write: %04x\n", control->intercept_dr_write); 2740 + pr_err("exceptions: %08x\n", control->intercept_exceptions); 2741 + pr_err("intercepts: %016llx\n", control->intercept); 2742 + pr_err("pause filter count: %d\n", control->pause_filter_count); 2743 + pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa); 2744 + pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa); 2745 + pr_err("tsc_offset: %016llx\n", control->tsc_offset); 2746 + pr_err("asid: %d\n", control->asid); 2747 + pr_err("tlb_ctl: %d\n", control->tlb_ctl); 2748 + pr_err("int_ctl: %08x\n", control->int_ctl); 2749 + pr_err("int_vector: %08x\n", control->int_vector); 2750 + pr_err("int_state: %08x\n", control->int_state); 2751 + pr_err("exit_code: %08x\n", control->exit_code); 2752 + pr_err("exit_info1: %016llx\n", control->exit_info_1); 2753 + pr_err("exit_info2: %016llx\n", control->exit_info_2); 2754 + pr_err("exit_int_info: %08x\n", control->exit_int_info); 2755 + pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err); 2756 + pr_err("nested_ctl: %lld\n", control->nested_ctl); 2757 + pr_err("nested_cr3: %016llx\n", control->nested_cr3); 2758 + pr_err("event_inj: %08x\n", control->event_inj); 2759 + pr_err("event_inj_err: %08x\n", control->event_inj_err); 2760 + pr_err("lbr_ctl: %lld\n", control->lbr_ctl); 2761 + pr_err("next_rip: %016llx\n", control->next_rip); 2762 + pr_err("VMCB State Save Area:\n"); 2763 + pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n", 2764 + save->es.selector, save->es.attrib, 2765 + save->es.limit, save->es.base); 2766 + pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n", 2767 + save->cs.selector, save->cs.attrib, 2768 + save->cs.limit, save->cs.base); 2769 + pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n", 2770 + save->ss.selector, save->ss.attrib, 2771 + save->ss.limit, save->ss.base); 2772 + pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n", 2773 + save->ds.selector, save->ds.attrib, 2774 + save->ds.limit, save->ds.base); 2775 + pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n", 2776 + save->fs.selector, save->fs.attrib, 2777 + save->fs.limit, save->fs.base); 2778 + pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n", 2779 + save->gs.selector, save->gs.attrib, 2780 + save->gs.limit, save->gs.base); 2781 + pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", 2782 + save->gdtr.selector, save->gdtr.attrib, 2783 + save->gdtr.limit, save->gdtr.base); 2784 + pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", 2785 + save->ldtr.selector, save->ldtr.attrib, 2786 + save->ldtr.limit, save->ldtr.base); 2787 + pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", 2788 + save->idtr.selector, save->idtr.attrib, 2789 + save->idtr.limit, save->idtr.base); 2790 + pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n", 2791 + save->tr.selector, save->tr.attrib, 2792 + save->tr.limit, save->tr.base); 2793 + pr_err("cpl: %d efer: %016llx\n", 2794 + save->cpl, save->efer); 2795 + pr_err("cr0: %016llx cr2: %016llx\n", 2796 + save->cr0, save->cr2); 2797 + pr_err("cr3: %016llx cr4: %016llx\n", 2798 + save->cr3, save->cr4); 2799 + pr_err("dr6: %016llx dr7: %016llx\n", 2800 + save->dr6, save->dr7); 2801 + pr_err("rip: %016llx rflags: %016llx\n", 2802 + save->rip, save->rflags); 2803 + pr_err("rsp: %016llx rax: %016llx\n", 2804 + save->rsp, save->rax); 2805 + pr_err("star: %016llx lstar: %016llx\n", 2806 + save->star, save->lstar); 2807 + pr_err("cstar: %016llx sfmask: %016llx\n", 2808 + save->cstar, save->sfmask); 2809 + pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n", 2810 + save->kernel_gs_base, save->sysenter_cs); 2811 + pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n", 2812 + save->sysenter_esp, save->sysenter_eip); 2813 + pr_err("gpat: %016llx dbgctl: %016llx\n", 2814 + save->g_pat, save->dbgctl); 2815 + pr_err("br_from: %016llx br_to: %016llx\n", 2816 + save->br_from, save->br_to); 2817 + pr_err("excp_from: %016llx excp_to: %016llx\n", 2818 + save->last_excp_from, save->last_excp_to); 2819 + 2820 + } 2821 + 2822 static int handle_exit(struct kvm_vcpu *vcpu) 2823 { 2824 struct vcpu_svm *svm = to_svm(vcpu); ··· 2770 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2771 kvm_run->fail_entry.hardware_entry_failure_reason 2772 = svm->vmcb->control.exit_code; 2773 + pr_err("KVM: FAILED VMRUN WITH VMCB:\n"); 2774 + dump_vmcb(vcpu); 2775 return 0; 2776 } 2777 ··· 2826 { 2827 struct vmcb_control_area *control; 2828 2829 control = &svm->vmcb->control; 2830 control->int_vector = irq; 2831 control->int_ctl &= ~V_INTR_PRIO_MASK; ··· 2841 struct vcpu_svm *svm = to_svm(vcpu); 2842 2843 BUG_ON(!(gif_set(svm))); 2844 + 2845 + trace_kvm_inj_virq(vcpu->arch.interrupt.nr); 2846 + ++vcpu->stat.irq_injections; 2847 2848 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr | 2849 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; ··· 3327 return false; 3328 } 3329 3330 + static bool svm_has_wbinvd_exit(void) 3331 + { 3332 + return true; 3333 + } 3334 + 3335 static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) 3336 { 3337 struct vcpu_svm *svm = to_svm(vcpu); ··· 3411 .rdtscp_supported = svm_rdtscp_supported, 3412 3413 .set_supported_cpuid = svm_set_supported_cpuid, 3414 + 3415 + .has_wbinvd_exit = svm_has_wbinvd_exit, 3416 }; 3417 3418 static int __init svm_init(void)

+15 -1

arch/x86/kvm/timer.c

··· 1 #include <linux/kvm_host.h> 2 #include <linux/kvm.h> 3 #include <linux/hrtimer.h> ··· 32 if (ktimer->reinject || !atomic_read(&ktimer->pending)) { 33 atomic_inc(&ktimer->pending); 34 /* FIXME: this code should not know anything about vcpus */ 35 - set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); 36 } 37 38 if (waitqueue_active(q))

··· 1 + /* 2 + * Kernel-based Virtual Machine driver for Linux 3 + * 4 + * This module enables machines with Intel VT-x extensions to run virtual 5 + * machines without emulation or binary translation. 6 + * 7 + * timer support 8 + * 9 + * Copyright 2010 Red Hat, Inc. and/or its affilates. 10 + * 11 + * This work is licensed under the terms of the GNU GPL, version 2. See 12 + * the COPYING file in the top-level directory. 13 + */ 14 + 15 #include <linux/kvm_host.h> 16 #include <linux/kvm.h> 17 #include <linux/hrtimer.h> ··· 18 if (ktimer->reinject || !atomic_read(&ktimer->pending)) { 19 atomic_inc(&ktimer->pending); 20 /* FIXME: this code should not know anything about vcpus */ 21 + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); 22 } 23 24 if (waitqueue_active(q))

+176 -79

arch/x86/kvm/vmx.c

··· 5 * machines without emulation or binary translation. 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * 9 * Authors: 10 * Avi Kivity <avi@qumranet.com> ··· 37 #include <asm/vmx.h> 38 #include <asm/virtext.h> 39 #include <asm/mce.h> 40 41 #include "trace.h" 42 ··· 65 66 static int __read_mostly emulate_invalid_guest_state = 0; 67 module_param(emulate_invalid_guest_state, bool, S_IRUGO); 68 69 #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ 70 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) ··· 179 180 static int init_rmode(struct kvm *kvm); 181 static u64 construct_eptp(unsigned long root_hpa); 182 183 static DEFINE_PER_CPU(struct vmcs *, vmxarea); 184 static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 185 static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); 186 187 static unsigned long *vmx_io_bitmap_a; 188 static unsigned long *vmx_io_bitmap_b; ··· 343 return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; 344 } 345 346 static inline bool cpu_has_vmx_invept_individual_addr(void) 347 { 348 return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; ··· 361 static inline bool cpu_has_vmx_invept_global(void) 362 { 363 return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; 364 } 365 366 static inline bool cpu_has_vmx_ept(void) ··· 411 static inline bool cpu_has_virtual_nmis(void) 412 { 413 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; 414 } 415 416 static inline bool report_flexpriority(void) ··· 483 vmcs, phys_addr); 484 } 485 486 static void __vcpu_clear(void *arg) 487 { 488 struct vcpu_vmx *vmx = arg; ··· 518 smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); 519 } 520 521 - static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) 522 { 523 if (vmx->vpid == 0) 524 return; 525 526 - __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); 527 } 528 529 static inline void ept_sync_global(void) ··· 870 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); 871 } 872 #endif 873 } 874 875 static void vmx_load_host_state(struct vcpu_vmx *vmx) ··· 889 static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 890 { 891 struct vcpu_vmx *vmx = to_vmx(vcpu); 892 - u64 phys_addr = __pa(vmx->vmcs); 893 u64 tsc_this, delta, new_offset; 894 895 - if (vcpu->cpu != cpu) { 896 vcpu_clear(vmx); 897 - kvm_migrate_timers(vcpu); 898 - set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); 899 - local_irq_disable(); 900 - list_add(&vmx->local_vcpus_link, 901 - &per_cpu(vcpus_on_cpu, cpu)); 902 - local_irq_enable(); 903 - } 904 905 if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { 906 - u8 error; 907 - 908 per_cpu(current_vmcs, cpu) = vmx->vmcs; 909 - asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" 910 - : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) 911 - : "cc"); 912 - if (error) 913 - printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", 914 - vmx->vmcs, phys_addr); 915 } 916 917 if (vcpu->cpu != cpu) { 918 struct desc_ptr dt; 919 unsigned long sysenter_esp; 920 921 vcpu->cpu = cpu; 922 /* ··· 940 static void vmx_vcpu_put(struct kvm_vcpu *vcpu) 941 { 942 __vmx_load_host_state(to_vmx(vcpu)); 943 } 944 945 static void vmx_fpu_activate(struct kvm_vcpu *vcpu) ··· 1346 /* locked but not enabled */ 1347 } 1348 1349 static int hardware_enable(void *garbage) 1350 { 1351 int cpu = raw_smp_processor_id(); ··· 1375 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); 1376 } 1377 write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ 1378 - asm volatile (ASM_VMX_VMXON_RAX 1379 - : : "a"(&phys_addr), "m"(phys_addr) 1380 - : "memory", "cc"); 1381 1382 - ept_sync_global(); 1383 1384 return 0; 1385 } ··· 1403 static void kvm_cpu_vmxoff(void) 1404 { 1405 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); 1406 - write_cr4(read_cr4() & ~X86_CR4_VMXE); 1407 } 1408 1409 static void hardware_disable(void *garbage) 1410 { 1411 - vmclear_local_vcpus(); 1412 - kvm_cpu_vmxoff(); 1413 } 1414 1415 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, ··· 1610 if (!cpu_has_vmx_vpid()) 1611 enable_vpid = 0; 1612 1613 - if (!cpu_has_vmx_ept()) { 1614 enable_ept = 0; 1615 enable_unrestricted_guest = 0; 1616 } ··· 1700 gfn_t base_gfn; 1701 1702 slots = kvm_memslots(kvm); 1703 - base_gfn = kvm->memslots->memslots[0].base_gfn + 1704 kvm->memslots->memslots[0].npages - 3; 1705 return base_gfn << PAGE_SHIFT; 1706 } ··· 1831 1832 static void vmx_flush_tlb(struct kvm_vcpu *vcpu) 1833 { 1834 - vpid_sync_vcpu_all(to_vmx(vcpu)); 1835 - if (enable_ept) 1836 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); 1837 } 1838 1839 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) ··· 2582 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); 2583 vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ 2584 2585 - vmcs_writel(HOST_CR0, read_cr0()); /* 22.2.3 */ 2586 vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ 2587 vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ 2588 ··· 2674 2675 static int init_rmode(struct kvm *kvm) 2676 { 2677 if (!init_rmode_tss(kvm)) 2678 - return 0; 2679 if (!init_rmode_identity_map(kvm)) 2680 - return 0; 2681 - return 1; 2682 } 2683 2684 static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) 2685 { 2686 struct vcpu_vmx *vmx = to_vmx(vcpu); 2687 u64 msr; 2688 - int ret, idx; 2689 2690 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); 2691 - idx = srcu_read_lock(&vcpu->kvm->srcu); 2692 if (!init_rmode(vmx->vcpu.kvm)) { 2693 ret = -ENOMEM; 2694 goto out; ··· 2711 msr |= MSR_IA32_APICBASE_BSP; 2712 kvm_set_apic_base(&vmx->vcpu, msr); 2713 2714 - fx_init(&vmx->vcpu); 2715 2716 seg_setup(VCPU_SREG_CS); 2717 /* ··· 2796 vmx_fpu_activate(&vmx->vcpu); 2797 update_exception_bitmap(&vmx->vcpu); 2798 2799 - vpid_sync_vcpu_all(vmx); 2800 2801 ret = 0; 2802 ··· 2804 vmx->emulation_required = 0; 2805 2806 out: 2807 - srcu_read_unlock(&vcpu->kvm->srcu, idx); 2808 return ret; 2809 } 2810 ··· 2908 { 2909 if (!cpu_has_virtual_nmis()) 2910 return to_vmx(vcpu)->soft_vnmi_blocked; 2911 - else 2912 - return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 2913 - GUEST_INTR_STATE_NMI); 2914 } 2915 2916 static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) ··· 3150 ++vcpu->stat.io_exits; 3151 3152 if (string || in) 3153 - return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); 3154 3155 port = exit_qualification >> 16; 3156 size = (exit_qualification & 7) + 1; ··· 3170 hypercall[2] = 0xc1; 3171 } 3172 3173 static int handle_cr(struct kvm_vcpu *vcpu) 3174 { 3175 unsigned long exit_qualification, val; 3176 int cr; 3177 int reg; 3178 3179 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 3180 cr = exit_qualification & 15; ··· 3194 trace_kvm_cr_write(cr, val); 3195 switch (cr) { 3196 case 0: 3197 - kvm_set_cr0(vcpu, val); 3198 - skip_emulated_instruction(vcpu); 3199 return 1; 3200 case 3: 3201 - kvm_set_cr3(vcpu, val); 3202 - skip_emulated_instruction(vcpu); 3203 return 1; 3204 case 4: 3205 - kvm_set_cr4(vcpu, val); 3206 - skip_emulated_instruction(vcpu); 3207 return 1; 3208 case 8: { 3209 u8 cr8_prev = kvm_get_cr8(vcpu); ··· 3410 static int handle_wbinvd(struct kvm_vcpu *vcpu) 3411 { 3412 skip_emulated_instruction(vcpu); 3413 - /* TODO: Add support for VT-d/pass-through device */ 3414 return 1; 3415 } 3416 3417 static int handle_apic_access(struct kvm_vcpu *vcpu) 3418 { 3419 - unsigned long exit_qualification; 3420 - enum emulation_result er; 3421 - unsigned long offset; 3422 - 3423 - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 3424 - offset = exit_qualification & 0xffful; 3425 - 3426 - er = emulate_instruction(vcpu, 0, 0, 0); 3427 - 3428 - if (er != EMULATE_DONE) { 3429 - printk(KERN_ERR 3430 - "Fail to handle apic access vmexit! Offset is 0x%lx\n", 3431 - offset); 3432 - return -ENOEXEC; 3433 - } 3434 - return 1; 3435 } 3436 3437 static int handle_task_switch(struct kvm_vcpu *vcpu) ··· 3638 goto out; 3639 } 3640 3641 - if (err != EMULATE_DONE) { 3642 - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 3643 - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 3644 - vcpu->run->internal.ndata = 0; 3645 - ret = 0; 3646 - goto out; 3647 - } 3648 3649 if (signal_pending(current)) 3650 goto out; ··· 3702 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, 3703 [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 3704 [EXIT_REASON_WBINVD] = handle_wbinvd, 3705 [EXIT_REASON_TASK_SWITCH] = handle_task_switch, 3706 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, 3707 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, ··· 3735 * to sync with guest real CR3. */ 3736 if (enable_ept && is_paging(vcpu)) 3737 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); 3738 3739 if (unlikely(vmx->fail)) { 3740 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; ··· 3948 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 3949 vmx_set_interrupt_shadow(vcpu, 0); 3950 3951 - /* 3952 - * Loading guest fpu may have cleared host cr0.ts 3953 - */ 3954 - vmcs_writel(HOST_CR0, read_cr0()); 3955 - 3956 asm( 3957 /* Store host registers */ 3958 "push %%"R"dx; push %%"R"bp;" ··· 4083 kmem_cache_free(kvm_vcpu_cache, vmx); 4084 } 4085 4086 static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) 4087 { 4088 int err; ··· 4121 if (!vmx->vmcs) 4122 goto free_msrs; 4123 4124 - vmcs_clear(vmx->vmcs); 4125 4126 cpu = get_cpu(); 4127 vmx_vcpu_load(&vmx->vcpu, cpu); ··· 4360 .rdtscp_supported = vmx_rdtscp_supported, 4361 4362 .set_supported_cpuid = vmx_set_supported_cpuid, 4363 }; 4364 4365 static int __init vmx_init(void)

··· 5 * machines without emulation or binary translation. 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 + * Copyright 2010 Red Hat, Inc. and/or its affilates. 9 * 10 * Authors: 11 * Avi Kivity <avi@qumranet.com> ··· 36 #include <asm/vmx.h> 37 #include <asm/virtext.h> 38 #include <asm/mce.h> 39 + #include <asm/i387.h> 40 + #include <asm/xcr.h> 41 42 #include "trace.h" 43 ··· 62 63 static int __read_mostly emulate_invalid_guest_state = 0; 64 module_param(emulate_invalid_guest_state, bool, S_IRUGO); 65 + 66 + static int __read_mostly vmm_exclusive = 1; 67 + module_param(vmm_exclusive, bool, S_IRUGO); 68 69 #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ 70 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) ··· 173 174 static int init_rmode(struct kvm *kvm); 175 static u64 construct_eptp(unsigned long root_hpa); 176 + static void kvm_cpu_vmxon(u64 addr); 177 + static void kvm_cpu_vmxoff(void); 178 179 static DEFINE_PER_CPU(struct vmcs *, vmxarea); 180 static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 181 static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); 182 + static DEFINE_PER_CPU(struct desc_ptr, host_gdt); 183 184 static unsigned long *vmx_io_bitmap_a; 185 static unsigned long *vmx_io_bitmap_b; ··· 334 return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; 335 } 336 337 + static inline bool cpu_has_vmx_ept_4levels(void) 338 + { 339 + return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; 340 + } 341 + 342 static inline bool cpu_has_vmx_invept_individual_addr(void) 343 { 344 return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; ··· 347 static inline bool cpu_has_vmx_invept_global(void) 348 { 349 return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; 350 + } 351 + 352 + static inline bool cpu_has_vmx_invvpid_single(void) 353 + { 354 + return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT; 355 + } 356 + 357 + static inline bool cpu_has_vmx_invvpid_global(void) 358 + { 359 + return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; 360 } 361 362 static inline bool cpu_has_vmx_ept(void) ··· 387 static inline bool cpu_has_virtual_nmis(void) 388 { 389 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; 390 + } 391 + 392 + static inline bool cpu_has_vmx_wbinvd_exit(void) 393 + { 394 + return vmcs_config.cpu_based_2nd_exec_ctrl & 395 + SECONDARY_EXEC_WBINVD_EXITING; 396 } 397 398 static inline bool report_flexpriority(void) ··· 453 vmcs, phys_addr); 454 } 455 456 + static void vmcs_load(struct vmcs *vmcs) 457 + { 458 + u64 phys_addr = __pa(vmcs); 459 + u8 error; 460 + 461 + asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" 462 + : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) 463 + : "cc", "memory"); 464 + if (error) 465 + printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", 466 + vmcs, phys_addr); 467 + } 468 + 469 static void __vcpu_clear(void *arg) 470 { 471 struct vcpu_vmx *vmx = arg; ··· 475 smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); 476 } 477 478 + static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx) 479 { 480 if (vmx->vpid == 0) 481 return; 482 483 + if (cpu_has_vmx_invvpid_single()) 484 + __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); 485 + } 486 + 487 + static inline void vpid_sync_vcpu_global(void) 488 + { 489 + if (cpu_has_vmx_invvpid_global()) 490 + __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); 491 + } 492 + 493 + static inline void vpid_sync_context(struct vcpu_vmx *vmx) 494 + { 495 + if (cpu_has_vmx_invvpid_single()) 496 + vpid_sync_vcpu_single(vmx); 497 + else 498 + vpid_sync_vcpu_global(); 499 } 500 501 static inline void ept_sync_global(void) ··· 812 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); 813 } 814 #endif 815 + if (current_thread_info()->status & TS_USEDFPU) 816 + clts(); 817 + load_gdt(&__get_cpu_var(host_gdt)); 818 } 819 820 static void vmx_load_host_state(struct vcpu_vmx *vmx) ··· 828 static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 829 { 830 struct vcpu_vmx *vmx = to_vmx(vcpu); 831 u64 tsc_this, delta, new_offset; 832 + u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 833 834 + if (!vmm_exclusive) 835 + kvm_cpu_vmxon(phys_addr); 836 + else if (vcpu->cpu != cpu) 837 vcpu_clear(vmx); 838 839 if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { 840 per_cpu(current_vmcs, cpu) = vmx->vmcs; 841 + vmcs_load(vmx->vmcs); 842 } 843 844 if (vcpu->cpu != cpu) { 845 struct desc_ptr dt; 846 unsigned long sysenter_esp; 847 + 848 + kvm_migrate_timers(vcpu); 849 + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 850 + local_irq_disable(); 851 + list_add(&vmx->local_vcpus_link, 852 + &per_cpu(vcpus_on_cpu, cpu)); 853 + local_irq_enable(); 854 855 vcpu->cpu = cpu; 856 /* ··· 884 static void vmx_vcpu_put(struct kvm_vcpu *vcpu) 885 { 886 __vmx_load_host_state(to_vmx(vcpu)); 887 + if (!vmm_exclusive) { 888 + __vcpu_clear(to_vmx(vcpu)); 889 + kvm_cpu_vmxoff(); 890 + } 891 } 892 893 static void vmx_fpu_activate(struct kvm_vcpu *vcpu) ··· 1286 /* locked but not enabled */ 1287 } 1288 1289 + static void kvm_cpu_vmxon(u64 addr) 1290 + { 1291 + asm volatile (ASM_VMX_VMXON_RAX 1292 + : : "a"(&addr), "m"(addr) 1293 + : "memory", "cc"); 1294 + } 1295 + 1296 static int hardware_enable(void *garbage) 1297 { 1298 int cpu = raw_smp_processor_id(); ··· 1308 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); 1309 } 1310 write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ 1311 1312 + if (vmm_exclusive) { 1313 + kvm_cpu_vmxon(phys_addr); 1314 + ept_sync_global(); 1315 + } 1316 + 1317 + store_gdt(&__get_cpu_var(host_gdt)); 1318 1319 return 0; 1320 } ··· 1334 static void kvm_cpu_vmxoff(void) 1335 { 1336 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); 1337 } 1338 1339 static void hardware_disable(void *garbage) 1340 { 1341 + if (vmm_exclusive) { 1342 + vmclear_local_vcpus(); 1343 + kvm_cpu_vmxoff(); 1344 + } 1345 + write_cr4(read_cr4() & ~X86_CR4_VMXE); 1346 } 1347 1348 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, ··· 1539 if (!cpu_has_vmx_vpid()) 1540 enable_vpid = 0; 1541 1542 + if (!cpu_has_vmx_ept() || 1543 + !cpu_has_vmx_ept_4levels()) { 1544 enable_ept = 0; 1545 enable_unrestricted_guest = 0; 1546 } ··· 1628 gfn_t base_gfn; 1629 1630 slots = kvm_memslots(kvm); 1631 + base_gfn = slots->memslots[0].base_gfn + 1632 kvm->memslots->memslots[0].npages - 3; 1633 return base_gfn << PAGE_SHIFT; 1634 } ··· 1759 1760 static void vmx_flush_tlb(struct kvm_vcpu *vcpu) 1761 { 1762 + vpid_sync_context(to_vmx(vcpu)); 1763 + if (enable_ept) { 1764 + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) 1765 + return; 1766 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); 1767 + } 1768 } 1769 1770 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) ··· 2507 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); 2508 vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ 2509 2510 + vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ 2511 vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ 2512 vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ 2513 ··· 2599 2600 static int init_rmode(struct kvm *kvm) 2601 { 2602 + int idx, ret = 0; 2603 + 2604 + idx = srcu_read_lock(&kvm->srcu); 2605 if (!init_rmode_tss(kvm)) 2606 + goto exit; 2607 if (!init_rmode_identity_map(kvm)) 2608 + goto exit; 2609 + 2610 + ret = 1; 2611 + exit: 2612 + srcu_read_unlock(&kvm->srcu, idx); 2613 + return ret; 2614 } 2615 2616 static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) 2617 { 2618 struct vcpu_vmx *vmx = to_vmx(vcpu); 2619 u64 msr; 2620 + int ret; 2621 2622 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); 2623 if (!init_rmode(vmx->vcpu.kvm)) { 2624 ret = -ENOMEM; 2625 goto out; ··· 2630 msr |= MSR_IA32_APICBASE_BSP; 2631 kvm_set_apic_base(&vmx->vcpu, msr); 2632 2633 + ret = fx_init(&vmx->vcpu); 2634 + if (ret != 0) 2635 + goto out; 2636 2637 seg_setup(VCPU_SREG_CS); 2638 /* ··· 2713 vmx_fpu_activate(&vmx->vcpu); 2714 update_exception_bitmap(&vmx->vcpu); 2715 2716 + vpid_sync_context(vmx); 2717 2718 ret = 0; 2719 ··· 2721 vmx->emulation_required = 0; 2722 2723 out: 2724 return ret; 2725 } 2726 ··· 2826 { 2827 if (!cpu_has_virtual_nmis()) 2828 return to_vmx(vcpu)->soft_vnmi_blocked; 2829 + return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; 2830 } 2831 2832 static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) ··· 3070 ++vcpu->stat.io_exits; 3071 3072 if (string || in) 3073 + return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; 3074 3075 port = exit_qualification >> 16; 3076 size = (exit_qualification & 7) + 1; ··· 3090 hypercall[2] = 0xc1; 3091 } 3092 3093 + static void complete_insn_gp(struct kvm_vcpu *vcpu, int err) 3094 + { 3095 + if (err) 3096 + kvm_inject_gp(vcpu, 0); 3097 + else 3098 + skip_emulated_instruction(vcpu); 3099 + } 3100 + 3101 static int handle_cr(struct kvm_vcpu *vcpu) 3102 { 3103 unsigned long exit_qualification, val; 3104 int cr; 3105 int reg; 3106 + int err; 3107 3108 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 3109 cr = exit_qualification & 15; ··· 3105 trace_kvm_cr_write(cr, val); 3106 switch (cr) { 3107 case 0: 3108 + err = kvm_set_cr0(vcpu, val); 3109 + complete_insn_gp(vcpu, err); 3110 return 1; 3111 case 3: 3112 + err = kvm_set_cr3(vcpu, val); 3113 + complete_insn_gp(vcpu, err); 3114 return 1; 3115 case 4: 3116 + err = kvm_set_cr4(vcpu, val); 3117 + complete_insn_gp(vcpu, err); 3118 return 1; 3119 case 8: { 3120 u8 cr8_prev = kvm_get_cr8(vcpu); ··· 3321 static int handle_wbinvd(struct kvm_vcpu *vcpu) 3322 { 3323 skip_emulated_instruction(vcpu); 3324 + kvm_emulate_wbinvd(vcpu); 3325 + return 1; 3326 + } 3327 + 3328 + static int handle_xsetbv(struct kvm_vcpu *vcpu) 3329 + { 3330 + u64 new_bv = kvm_read_edx_eax(vcpu); 3331 + u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX); 3332 + 3333 + if (kvm_set_xcr(vcpu, index, new_bv) == 0) 3334 + skip_emulated_instruction(vcpu); 3335 return 1; 3336 } 3337 3338 static int handle_apic_access(struct kvm_vcpu *vcpu) 3339 { 3340 + return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; 3341 } 3342 3343 static int handle_task_switch(struct kvm_vcpu *vcpu) ··· 3554 goto out; 3555 } 3556 3557 + if (err != EMULATE_DONE) 3558 + return 0; 3559 3560 if (signal_pending(current)) 3561 goto out; ··· 3623 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, 3624 [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 3625 [EXIT_REASON_WBINVD] = handle_wbinvd, 3626 + [EXIT_REASON_XSETBV] = handle_xsetbv, 3627 [EXIT_REASON_TASK_SWITCH] = handle_task_switch, 3628 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, 3629 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, ··· 3655 * to sync with guest real CR3. */ 3656 if (enable_ept && is_paging(vcpu)) 3657 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); 3658 + 3659 + if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { 3660 + vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; 3661 + vcpu->run->fail_entry.hardware_entry_failure_reason 3662 + = exit_reason; 3663 + return 0; 3664 + } 3665 3666 if (unlikely(vmx->fail)) { 3667 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; ··· 3861 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 3862 vmx_set_interrupt_shadow(vcpu, 0); 3863 3864 asm( 3865 /* Store host registers */ 3866 "push %%"R"dx; push %%"R"bp;" ··· 4001 kmem_cache_free(kvm_vcpu_cache, vmx); 4002 } 4003 4004 + static inline void vmcs_init(struct vmcs *vmcs) 4005 + { 4006 + u64 phys_addr = __pa(per_cpu(vmxarea, raw_smp_processor_id())); 4007 + 4008 + if (!vmm_exclusive) 4009 + kvm_cpu_vmxon(phys_addr); 4010 + 4011 + vmcs_clear(vmcs); 4012 + 4013 + if (!vmm_exclusive) 4014 + kvm_cpu_vmxoff(); 4015 + } 4016 + 4017 static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) 4018 { 4019 int err; ··· 4026 if (!vmx->vmcs) 4027 goto free_msrs; 4028 4029 + vmcs_init(vmx->vmcs); 4030 4031 cpu = get_cpu(); 4032 vmx_vcpu_load(&vmx->vcpu, cpu); ··· 4265 .rdtscp_supported = vmx_rdtscp_supported, 4266 4267 .set_supported_cpuid = vmx_set_supported_cpuid, 4268 + 4269 + .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, 4270 }; 4271 4272 static int __init vmx_init(void)

+652 -528

arch/x86/kvm/x86.c

··· 6 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2008 Qumranet, Inc. 8 * Copyright IBM Corporation, 2008 9 * 10 * Authors: 11 * Avi Kivity <avi@qumranet.com> ··· 42 #include <linux/srcu.h> 43 #include <linux/slab.h> 44 #include <linux/perf_event.h> 45 #include <trace/events/kvm.h> 46 47 #define CREATE_TRACE_POINTS 48 #include "trace.h" 49 50 #include <asm/debugreg.h> 51 - #include <asm/uaccess.h> 52 #include <asm/msr.h> 53 #include <asm/desc.h> 54 #include <asm/mtrr.h> 55 #include <asm/mce.h> 56 57 #define MAX_IO_MSRS 256 58 #define CR0_RESERVED_BITS \ ··· 65 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ 66 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ 67 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ 68 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) 69 70 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) ··· 150 { "largepages", VM_STAT(lpages) }, 151 { NULL } 152 }; 153 154 static void kvm_on_user_return(struct user_return_notifier *urn) 155 { ··· 296 prev_nr = vcpu->arch.exception.nr; 297 if (prev_nr == DF_VECTOR) { 298 /* triple fault -> shutdown */ 299 - set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); 300 return; 301 } 302 class1 = exception_class(prev_nr); ··· 425 return changed; 426 } 427 428 - void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 429 { 430 cr0 |= X86_CR0_ET; 431 432 #ifdef CONFIG_X86_64 433 - if (cr0 & 0xffffffff00000000UL) { 434 - kvm_inject_gp(vcpu, 0); 435 - return; 436 - } 437 #endif 438 439 cr0 &= ~CR0_RESERVED_BITS; 440 441 - if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { 442 - kvm_inject_gp(vcpu, 0); 443 - return; 444 - } 445 446 - if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { 447 - kvm_inject_gp(vcpu, 0); 448 - return; 449 - } 450 451 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 452 #ifdef CONFIG_X86_64 453 if ((vcpu->arch.efer & EFER_LME)) { 454 int cs_db, cs_l; 455 456 - if (!is_pae(vcpu)) { 457 - kvm_inject_gp(vcpu, 0); 458 - return; 459 - } 460 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 461 - if (cs_l) { 462 - kvm_inject_gp(vcpu, 0); 463 - return; 464 - 465 - } 466 } else 467 #endif 468 - if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 469 - kvm_inject_gp(vcpu, 0); 470 - return; 471 - } 472 - 473 } 474 475 kvm_x86_ops->set_cr0(vcpu, cr0); 476 477 - kvm_mmu_reset_context(vcpu); 478 - return; 479 } 480 EXPORT_SYMBOL_GPL(kvm_set_cr0); 481 482 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 483 { 484 - kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); 485 } 486 EXPORT_SYMBOL_GPL(kvm_lmsw); 487 488 - void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 489 { 490 unsigned long old_cr4 = kvm_read_cr4(vcpu); 491 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 492 493 - if (cr4 & CR4_RESERVED_BITS) { 494 - kvm_inject_gp(vcpu, 0); 495 - return; 496 - } 497 498 if (is_long_mode(vcpu)) { 499 - if (!(cr4 & X86_CR4_PAE)) { 500 - kvm_inject_gp(vcpu, 0); 501 - return; 502 - } 503 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) 504 && ((cr4 ^ old_cr4) & pdptr_bits) 505 - && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 506 - kvm_inject_gp(vcpu, 0); 507 - return; 508 - } 509 510 - if (cr4 & X86_CR4_VMXE) { 511 - kvm_inject_gp(vcpu, 0); 512 - return; 513 - } 514 kvm_x86_ops->set_cr4(vcpu, cr4); 515 - vcpu->arch.cr4 = cr4; 516 - kvm_mmu_reset_context(vcpu); 517 } 518 EXPORT_SYMBOL_GPL(kvm_set_cr4); 519 520 - void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 521 { 522 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { 523 kvm_mmu_sync_roots(vcpu); 524 kvm_mmu_flush_tlb(vcpu); 525 - return; 526 } 527 528 if (is_long_mode(vcpu)) { 529 - if (cr3 & CR3_L_MODE_RESERVED_BITS) { 530 - kvm_inject_gp(vcpu, 0); 531 - return; 532 - } 533 } else { 534 if (is_pae(vcpu)) { 535 - if (cr3 & CR3_PAE_RESERVED_BITS) { 536 - kvm_inject_gp(vcpu, 0); 537 - return; 538 - } 539 - if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { 540 - kvm_inject_gp(vcpu, 0); 541 - return; 542 - } 543 } 544 /* 545 * We don't check reserved bits in nonpae mode, because ··· 599 * to debug) behavior on the guest side. 600 */ 601 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) 602 - kvm_inject_gp(vcpu, 0); 603 - else { 604 - vcpu->arch.cr3 = cr3; 605 - vcpu->arch.mmu.new_cr3(vcpu); 606 - } 607 } 608 EXPORT_SYMBOL_GPL(kvm_set_cr3); 609 610 - void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 611 { 612 - if (cr8 & CR8_RESERVED_BITS) { 613 - kvm_inject_gp(vcpu, 0); 614 - return; 615 - } 616 if (irqchip_in_kernel(vcpu->kvm)) 617 kvm_lapic_set_tpr(vcpu, cr8); 618 else 619 vcpu->arch.cr8 = cr8; 620 } 621 EXPORT_SYMBOL_GPL(kvm_set_cr8); 622 ··· 633 } 634 EXPORT_SYMBOL_GPL(kvm_get_cr8); 635 636 - int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) 637 { 638 switch (dr) { 639 case 0 ... 3: ··· 642 vcpu->arch.eff_db[dr] = val; 643 break; 644 case 4: 645 - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { 646 - kvm_queue_exception(vcpu, UD_VECTOR); 647 - return 1; 648 - } 649 /* fall through */ 650 case 6: 651 - if (val & 0xffffffff00000000ULL) { 652 - kvm_inject_gp(vcpu, 0); 653 - return 1; 654 - } 655 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 656 break; 657 case 5: 658 - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { 659 - kvm_queue_exception(vcpu, UD_VECTOR); 660 - return 1; 661 - } 662 /* fall through */ 663 default: /* 7 */ 664 - if (val & 0xffffffff00000000ULL) { 665 - kvm_inject_gp(vcpu, 0); 666 - return 1; 667 - } 668 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; 669 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 670 kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); ··· 667 668 return 0; 669 } 670 EXPORT_SYMBOL_GPL(kvm_set_dr); 671 672 - int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) 673 { 674 switch (dr) { 675 case 0 ... 3: 676 *val = vcpu->arch.db[dr]; 677 break; 678 case 4: 679 - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { 680 - kvm_queue_exception(vcpu, UD_VECTOR); 681 return 1; 682 - } 683 /* fall through */ 684 case 6: 685 *val = vcpu->arch.dr6; 686 break; 687 case 5: 688 - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { 689 - kvm_queue_exception(vcpu, UD_VECTOR); 690 return 1; 691 - } 692 /* fall through */ 693 default: /* 7 */ 694 *val = vcpu->arch.dr7; ··· 706 707 return 0; 708 } 709 - EXPORT_SYMBOL_GPL(kvm_get_dr); 710 711 - static inline u32 bit(int bitno) 712 { 713 - return 1 << (bitno & 31); 714 } 715 716 /* 717 * List of msr numbers which we expose to userspace through KVM_GET_MSRS ··· 744 745 static u32 emulated_msrs[] = { 746 MSR_IA32_MISC_ENABLE, 747 }; 748 749 static int set_efer(struct kvm_vcpu *vcpu, u64 efer) 750 { 751 if (efer & efer_reserved_bits) 752 return 1; 753 ··· 780 781 kvm_x86_ops->set_efer(vcpu, efer); 782 783 - vcpu->arch.efer = efer; 784 - 785 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; 786 kvm_mmu_reset_context(vcpu); 787 788 return 0; 789 } ··· 950 951 if (!vcpu->time_page) 952 return 0; 953 - set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests); 954 return 1; 955 } 956 ··· 1592 { 1593 int i, idx; 1594 1595 - vcpu_load(vcpu); 1596 - 1597 idx = srcu_read_lock(&vcpu->kvm->srcu); 1598 for (i = 0; i < msrs->nmsrs; ++i) 1599 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 1600 break; 1601 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1602 - 1603 - vcpu_put(vcpu); 1604 1605 return i; 1606 } ··· 1682 case KVM_CAP_PCI_SEGMENT: 1683 case KVM_CAP_DEBUGREGS: 1684 case KVM_CAP_X86_ROBUST_SINGLESTEP: 1685 r = 1; 1686 break; 1687 case KVM_CAP_COALESCED_MMIO: ··· 1705 break; 1706 case KVM_CAP_MCE: 1707 r = KVM_MAX_MCE_BANKS; 1708 break; 1709 default: 1710 r = 0; ··· 1785 return r; 1786 } 1787 1788 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1789 { 1790 kvm_x86_ops->vcpu_load(vcpu, cpu); 1791 if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { 1792 unsigned long khz = cpufreq_quick_get(cpu); ··· 1819 1820 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1821 { 1822 - kvm_put_guest_fpu(vcpu); 1823 kvm_x86_ops->vcpu_put(vcpu); 1824 } 1825 1826 static int is_efer_nx(void) ··· 1869 if (copy_from_user(cpuid_entries, entries, 1870 cpuid->nent * sizeof(struct kvm_cpuid_entry))) 1871 goto out_free; 1872 - vcpu_load(vcpu); 1873 for (i = 0; i < cpuid->nent; i++) { 1874 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; 1875 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; ··· 1886 r = 0; 1887 kvm_apic_set_version(vcpu); 1888 kvm_x86_ops->cpuid_update(vcpu); 1889 - vcpu_put(vcpu); 1890 1891 out_free: 1892 vfree(cpuid_entries); ··· 1907 if (copy_from_user(&vcpu->arch.cpuid_entries, entries, 1908 cpuid->nent * sizeof(struct kvm_cpuid_entry2))) 1909 goto out; 1910 - vcpu_load(vcpu); 1911 vcpu->arch.cpuid_nent = cpuid->nent; 1912 kvm_apic_set_version(vcpu); 1913 kvm_x86_ops->cpuid_update(vcpu); 1914 - vcpu_put(vcpu); 1915 return 0; 1916 1917 out: ··· 1923 { 1924 int r; 1925 1926 - vcpu_load(vcpu); 1927 r = -E2BIG; 1928 if (cpuid->nent < vcpu->arch.cpuid_nent) 1929 goto out; ··· 1934 1935 out: 1936 cpuid->nent = vcpu->arch.cpuid_nent; 1937 - vcpu_put(vcpu); 1938 return r; 1939 } 1940 ··· 1985 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); 1986 /* cpuid 1.ecx */ 1987 const u32 kvm_supported_word4_x86_features = 1988 - F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ | 1989 0 /* DS-CPL, VMX, SMX, EST */ | 1990 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | 1991 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | 1992 0 /* Reserved, DCA */ | F(XMM4_1) | 1993 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 1994 - 0 /* Reserved, XSAVE, OSXSAVE */; 1995 /* cpuid 0x80000001.ecx */ 1996 const u32 kvm_supported_word6_x86_features = 1997 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | ··· 2006 2007 switch (function) { 2008 case 0: 2009 - entry->eax = min(entry->eax, (u32)0xb); 2010 break; 2011 case 1: 2012 entry->edx &= kvm_supported_word0_x86_features; ··· 2056 for (i = 1; *nent < maxnent; ++i) { 2057 level_type = entry[i - 1].ecx & 0xff00; 2058 if (!level_type) 2059 break; 2060 do_cpuid_1_ent(&entry[i], function, i); 2061 entry[i].flags |= ··· 2179 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 2180 struct kvm_lapic_state *s) 2181 { 2182 - vcpu_load(vcpu); 2183 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); 2184 - vcpu_put(vcpu); 2185 2186 return 0; 2187 } ··· 2187 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, 2188 struct kvm_lapic_state *s) 2189 { 2190 - vcpu_load(vcpu); 2191 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); 2192 kvm_apic_post_state_restore(vcpu); 2193 update_cr8_intercept(vcpu); 2194 - vcpu_put(vcpu); 2195 2196 return 0; 2197 } ··· 2201 return -EINVAL; 2202 if (irqchip_in_kernel(vcpu->kvm)) 2203 return -ENXIO; 2204 - vcpu_load(vcpu); 2205 2206 kvm_queue_interrupt(vcpu, irq->irq, false); 2207 - 2208 - vcpu_put(vcpu); 2209 2210 return 0; 2211 } 2212 2213 static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) 2214 { 2215 - vcpu_load(vcpu); 2216 kvm_inject_nmi(vcpu); 2217 - vcpu_put(vcpu); 2218 2219 return 0; 2220 } ··· 2229 int r; 2230 unsigned bank_num = mcg_cap & 0xff, bank; 2231 2232 - vcpu_load(vcpu); 2233 r = -EINVAL; 2234 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) 2235 goto out; ··· 2243 for (bank = 0; bank < bank_num; bank++) 2244 vcpu->arch.mce_banks[bank*4] = ~(u64)0; 2245 out: 2246 - vcpu_put(vcpu); 2247 return r; 2248 } 2249 ··· 2275 printk(KERN_DEBUG "kvm: set_mce: " 2276 "injects mce exception while " 2277 "previous one is in progress!\n"); 2278 - set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); 2279 return 0; 2280 } 2281 if (banks[1] & MCI_STATUS_VAL) ··· 2300 static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, 2301 struct kvm_vcpu_events *events) 2302 { 2303 - vcpu_load(vcpu); 2304 - 2305 events->exception.injected = 2306 vcpu->arch.exception.pending && 2307 !kvm_exception_is_soft(vcpu->arch.exception.nr); ··· 2324 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING 2325 | KVM_VCPUEVENT_VALID_SIPI_VECTOR 2326 | KVM_VCPUEVENT_VALID_SHADOW); 2327 - 2328 - vcpu_put(vcpu); 2329 } 2330 2331 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, ··· 2333 | KVM_VCPUEVENT_VALID_SIPI_VECTOR 2334 | KVM_VCPUEVENT_VALID_SHADOW)) 2335 return -EINVAL; 2336 - 2337 - vcpu_load(vcpu); 2338 2339 vcpu->arch.exception.pending = events->exception.injected; 2340 vcpu->arch.exception.nr = events->exception.nr; ··· 2356 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) 2357 vcpu->arch.sipi_vector = events->sipi_vector; 2358 2359 - vcpu_put(vcpu); 2360 - 2361 return 0; 2362 } 2363 2364 static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, 2365 struct kvm_debugregs *dbgregs) 2366 { 2367 - vcpu_load(vcpu); 2368 - 2369 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); 2370 dbgregs->dr6 = vcpu->arch.dr6; 2371 dbgregs->dr7 = vcpu->arch.dr7; 2372 dbgregs->flags = 0; 2373 - 2374 - vcpu_put(vcpu); 2375 } 2376 2377 static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, ··· 2374 if (dbgregs->flags) 2375 return -EINVAL; 2376 2377 - vcpu_load(vcpu); 2378 - 2379 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); 2380 vcpu->arch.dr6 = dbgregs->dr6; 2381 vcpu->arch.dr7 = dbgregs->dr7; 2382 2383 - vcpu_put(vcpu); 2384 - 2385 return 0; 2386 } 2387 2388 long kvm_arch_vcpu_ioctl(struct file *filp, ··· 2458 struct kvm_vcpu *vcpu = filp->private_data; 2459 void __user *argp = (void __user *)arg; 2460 int r; 2461 - struct kvm_lapic_state *lapic = NULL; 2462 2463 switch (ioctl) { 2464 case KVM_GET_LAPIC: { 2465 r = -EINVAL; 2466 if (!vcpu->arch.apic) 2467 goto out; 2468 - lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 2469 2470 r = -ENOMEM; 2471 - if (!lapic) 2472 goto out; 2473 - r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic); 2474 if (r) 2475 goto out; 2476 r = -EFAULT; 2477 - if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state))) 2478 goto out; 2479 r = 0; 2480 break; ··· 2489 r = -EINVAL; 2490 if (!vcpu->arch.apic) 2491 goto out; 2492 - lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 2493 r = -ENOMEM; 2494 - if (!lapic) 2495 goto out; 2496 r = -EFAULT; 2497 - if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state))) 2498 goto out; 2499 - r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic); 2500 if (r) 2501 goto out; 2502 r = 0; ··· 2612 r = -EFAULT; 2613 if (copy_from_user(&mce, argp, sizeof mce)) 2614 goto out; 2615 - vcpu_load(vcpu); 2616 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); 2617 - vcpu_put(vcpu); 2618 break; 2619 } 2620 case KVM_GET_VCPU_EVENTS: { ··· 2659 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); 2660 break; 2661 } 2662 default: 2663 r = -EINVAL; 2664 } 2665 out: 2666 - kfree(lapic); 2667 return r; 2668 } 2669 ··· 2760 static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) 2761 { 2762 return kvm->arch.n_alloc_mmu_pages; 2763 - } 2764 - 2765 - gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) 2766 - { 2767 - int i; 2768 - struct kvm_mem_alias *alias; 2769 - struct kvm_mem_aliases *aliases; 2770 - 2771 - aliases = kvm_aliases(kvm); 2772 - 2773 - for (i = 0; i < aliases->naliases; ++i) { 2774 - alias = &aliases->aliases[i]; 2775 - if (alias->flags & KVM_ALIAS_INVALID) 2776 - continue; 2777 - if (gfn >= alias->base_gfn 2778 - && gfn < alias->base_gfn + alias->npages) 2779 - return alias->target_gfn + gfn - alias->base_gfn; 2780 - } 2781 - return gfn; 2782 - } 2783 - 2784 - gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 2785 - { 2786 - int i; 2787 - struct kvm_mem_alias *alias; 2788 - struct kvm_mem_aliases *aliases; 2789 - 2790 - aliases = kvm_aliases(kvm); 2791 - 2792 - for (i = 0; i < aliases->naliases; ++i) { 2793 - alias = &aliases->aliases[i]; 2794 - if (gfn >= alias->base_gfn 2795 - && gfn < alias->base_gfn + alias->npages) 2796 - return alias->target_gfn + gfn - alias->base_gfn; 2797 - } 2798 - return gfn; 2799 - } 2800 - 2801 - /* 2802 - * Set a new alias region. Aliases map a portion of physical memory into 2803 - * another portion. This is useful for memory windows, for example the PC 2804 - * VGA region. 2805 - */ 2806 - static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, 2807 - struct kvm_memory_alias *alias) 2808 - { 2809 - int r, n; 2810 - struct kvm_mem_alias *p; 2811 - struct kvm_mem_aliases *aliases, *old_aliases; 2812 - 2813 - r = -EINVAL; 2814 - /* General sanity checks */ 2815 - if (alias->memory_size & (PAGE_SIZE - 1)) 2816 - goto out; 2817 - if (alias->guest_phys_addr & (PAGE_SIZE - 1)) 2818 - goto out; 2819 - if (alias->slot >= KVM_ALIAS_SLOTS) 2820 - goto out; 2821 - if (alias->guest_phys_addr + alias->memory_size 2822 - < alias->guest_phys_addr) 2823 - goto out; 2824 - if (alias->target_phys_addr + alias->memory_size 2825 - < alias->target_phys_addr) 2826 - goto out; 2827 - 2828 - r = -ENOMEM; 2829 - aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); 2830 - if (!aliases) 2831 - goto out; 2832 - 2833 - mutex_lock(&kvm->slots_lock); 2834 - 2835 - /* invalidate any gfn reference in case of deletion/shrinking */ 2836 - memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); 2837 - aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID; 2838 - old_aliases = kvm->arch.aliases; 2839 - rcu_assign_pointer(kvm->arch.aliases, aliases); 2840 - synchronize_srcu_expedited(&kvm->srcu); 2841 - kvm_mmu_zap_all(kvm); 2842 - kfree(old_aliases); 2843 - 2844 - r = -ENOMEM; 2845 - aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); 2846 - if (!aliases) 2847 - goto out_unlock; 2848 - 2849 - memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); 2850 - 2851 - p = &aliases->aliases[alias->slot]; 2852 - p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 2853 - p->npages = alias->memory_size >> PAGE_SHIFT; 2854 - p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; 2855 - p->flags &= ~(KVM_ALIAS_INVALID); 2856 - 2857 - for (n = KVM_ALIAS_SLOTS; n > 0; --n) 2858 - if (aliases->aliases[n - 1].npages) 2859 - break; 2860 - aliases->naliases = n; 2861 - 2862 - old_aliases = kvm->arch.aliases; 2863 - rcu_assign_pointer(kvm->arch.aliases, aliases); 2864 - synchronize_srcu_expedited(&kvm->srcu); 2865 - kfree(old_aliases); 2866 - r = 0; 2867 - 2868 - out_unlock: 2869 - mutex_unlock(&kvm->slots_lock); 2870 - out: 2871 - return r; 2872 } 2873 2874 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) ··· 2890 struct kvm_memory_slot *memslot; 2891 unsigned long n; 2892 unsigned long is_dirty = 0; 2893 - unsigned long *dirty_bitmap = NULL; 2894 2895 mutex_lock(&kvm->slots_lock); 2896 ··· 2904 2905 n = kvm_dirty_bitmap_bytes(memslot); 2906 2907 - r = -ENOMEM; 2908 - dirty_bitmap = vmalloc(n); 2909 - if (!dirty_bitmap) 2910 - goto out; 2911 - memset(dirty_bitmap, 0, n); 2912 - 2913 for (i = 0; !is_dirty && i < n/sizeof(long); i++) 2914 is_dirty = memslot->dirty_bitmap[i]; 2915 2916 /* If nothing is dirty, don't bother messing with page tables. */ 2917 if (is_dirty) { 2918 struct kvm_memslots *slots, *old_slots; 2919 2920 spin_lock(&kvm->mmu_lock); 2921 kvm_mmu_slot_remove_write_access(kvm, log->slot); 2922 spin_unlock(&kvm->mmu_lock); 2923 2924 - slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 2925 - if (!slots) 2926 - goto out_free; 2927 2928 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 2929 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; 2930 ··· 2936 synchronize_srcu_expedited(&kvm->srcu); 2937 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; 2938 kfree(old_slots); 2939 } 2940 2941 r = 0; 2942 - if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) 2943 - r = -EFAULT; 2944 - out_free: 2945 - vfree(dirty_bitmap); 2946 out: 2947 mutex_unlock(&kvm->slots_lock); 2948 return r; ··· 2969 union { 2970 struct kvm_pit_state ps; 2971 struct kvm_pit_state2 ps2; 2972 - struct kvm_memory_alias alias; 2973 struct kvm_pit_config pit_config; 2974 } u; 2975 ··· 2989 goto out; 2990 break; 2991 } 2992 - case KVM_SET_MEMORY_REGION: { 2993 - struct kvm_memory_region kvm_mem; 2994 - struct kvm_userspace_memory_region kvm_userspace_mem; 2995 - 2996 - r = -EFAULT; 2997 - if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) 2998 - goto out; 2999 - kvm_userspace_mem.slot = kvm_mem.slot; 3000 - kvm_userspace_mem.flags = kvm_mem.flags; 3001 - kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr; 3002 - kvm_userspace_mem.memory_size = kvm_mem.memory_size; 3003 - r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0); 3004 - if (r) 3005 - goto out; 3006 - break; 3007 - } 3008 case KVM_SET_NR_MMU_PAGES: 3009 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); 3010 if (r) ··· 2996 break; 2997 case KVM_GET_NR_MMU_PAGES: 2998 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); 2999 - break; 3000 - case KVM_SET_MEMORY_ALIAS: 3001 - r = -EFAULT; 3002 - if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias))) 3003 - goto out; 3004 - r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias); 3005 - if (r) 3006 - goto out; 3007 break; 3008 case KVM_CREATE_IRQCHIP: { 3009 struct kvm_pic *vpic; ··· 3336 } 3337 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); 3338 if (ret < 0) { 3339 - r = X86EMUL_UNHANDLEABLE; 3340 goto out; 3341 } 3342 ··· 3392 } 3393 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); 3394 if (ret < 0) { 3395 - r = X86EMUL_UNHANDLEABLE; 3396 goto out; 3397 } 3398 ··· 3407 static int emulator_read_emulated(unsigned long addr, 3408 void *val, 3409 unsigned int bytes, 3410 struct kvm_vcpu *vcpu) 3411 { 3412 gpa_t gpa; 3413 - u32 error_code; 3414 3415 if (vcpu->mmio_read_completed) { 3416 memcpy(val, vcpu->mmio_data, bytes); ··· 3420 return X86EMUL_CONTINUE; 3421 } 3422 3423 - gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code); 3424 3425 - if (gpa == UNMAPPED_GVA) { 3426 - kvm_inject_page_fault(vcpu, addr, error_code); 3427 return X86EMUL_PROPAGATE_FAULT; 3428 - } 3429 3430 /* For APIC access vmexit */ 3431 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) ··· 3445 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); 3446 3447 vcpu->mmio_needed = 1; 3448 - vcpu->mmio_phys_addr = gpa; 3449 - vcpu->mmio_size = bytes; 3450 - vcpu->mmio_is_write = 0; 3451 3452 - return X86EMUL_UNHANDLEABLE; 3453 } 3454 3455 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, ··· 3468 static int emulator_write_emulated_onepage(unsigned long addr, 3469 const void *val, 3470 unsigned int bytes, 3471 struct kvm_vcpu *vcpu) 3472 { 3473 gpa_t gpa; 3474 - u32 error_code; 3475 3476 - gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code); 3477 3478 - if (gpa == UNMAPPED_GVA) { 3479 - kvm_inject_page_fault(vcpu, addr, error_code); 3480 return X86EMUL_PROPAGATE_FAULT; 3481 - } 3482 3483 /* For APIC access vmexit */ 3484 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) ··· 3494 return X86EMUL_CONTINUE; 3495 3496 vcpu->mmio_needed = 1; 3497 - vcpu->mmio_phys_addr = gpa; 3498 - vcpu->mmio_size = bytes; 3499 - vcpu->mmio_is_write = 1; 3500 - memcpy(vcpu->mmio_data, val, bytes); 3501 3502 return X86EMUL_CONTINUE; 3503 } ··· 3506 int emulator_write_emulated(unsigned long addr, 3507 const void *val, 3508 unsigned int bytes, 3509 struct kvm_vcpu *vcpu) 3510 { 3511 /* Crossing a page boundary? */ ··· 3514 int rc, now; 3515 3516 now = -addr & ~PAGE_MASK; 3517 - rc = emulator_write_emulated_onepage(addr, val, now, vcpu); 3518 if (rc != X86EMUL_CONTINUE) 3519 return rc; 3520 addr += now; 3521 val += now; 3522 bytes -= now; 3523 } 3524 - return emulator_write_emulated_onepage(addr, val, bytes, vcpu); 3525 } 3526 - EXPORT_SYMBOL_GPL(emulator_write_emulated); 3527 3528 #define CMPXCHG_TYPE(t, ptr, old, new) \ 3529 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) ··· 3540 const void *old, 3541 const void *new, 3542 unsigned int bytes, 3543 struct kvm_vcpu *vcpu) 3544 { 3545 gpa_t gpa; ··· 3562 goto emul_write; 3563 3564 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 3565 3566 kaddr = kmap_atomic(page, KM_USER0); 3567 kaddr += offset_in_page(gpa); ··· 3598 emul_write: 3599 printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); 3600 3601 - return emulator_write_emulated(addr, new, bytes, vcpu); 3602 } 3603 3604 static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) ··· 3686 return X86EMUL_CONTINUE; 3687 } 3688 3689 int emulate_clts(struct kvm_vcpu *vcpu) 3690 { 3691 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); ··· 3708 return X86EMUL_CONTINUE; 3709 } 3710 3711 - int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) 3712 { 3713 - return kvm_get_dr(ctxt->vcpu, dr, dest); 3714 } 3715 3716 - int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) 3717 { 3718 - unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; 3719 3720 - return kvm_set_dr(ctxt->vcpu, dr, value & mask); 3721 } 3722 - 3723 - void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) 3724 - { 3725 - u8 opcodes[4]; 3726 - unsigned long rip = kvm_rip_read(vcpu); 3727 - unsigned long rip_linear; 3728 - 3729 - if (!printk_ratelimit()) 3730 - return; 3731 - 3732 - rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); 3733 - 3734 - kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL); 3735 - 3736 - printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", 3737 - context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); 3738 - } 3739 - EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 3740 3741 static u64 mk_cr_64(u64 curr_cr, u32 new_val) 3742 { ··· 3752 return value; 3753 } 3754 3755 - static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) 3756 { 3757 switch (cr) { 3758 case 0: 3759 - kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); 3760 break; 3761 case 2: 3762 vcpu->arch.cr2 = val; 3763 break; 3764 case 3: 3765 - kvm_set_cr3(vcpu, val); 3766 break; 3767 case 4: 3768 - kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); 3769 break; 3770 case 8: 3771 - kvm_set_cr8(vcpu, val & 0xfUL); 3772 break; 3773 default: 3774 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 3775 } 3776 } 3777 3778 static int emulator_get_cpl(struct kvm_vcpu *vcpu) ··· 3788 static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) 3789 { 3790 kvm_x86_ops->get_gdt(vcpu, dt); 3791 } 3792 3793 static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, ··· 3868 kvm_set_segment(vcpu, &kvm_seg, seg); 3869 } 3870 3871 - static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 3872 - { 3873 - kvm_x86_ops->set_rflags(vcpu, rflags); 3874 - } 3875 - 3876 static struct x86_emulate_ops emulate_ops = { 3877 .read_std = kvm_read_guest_virt_system, 3878 .write_std = kvm_write_guest_virt_system, ··· 3881 .set_cached_descriptor = emulator_set_cached_descriptor, 3882 .get_segment_selector = emulator_get_segment_selector, 3883 .set_segment_selector = emulator_set_segment_selector, 3884 .get_gdt = emulator_get_gdt, 3885 .get_cr = emulator_get_cr, 3886 .set_cr = emulator_set_cr, 3887 .cpl = emulator_get_cpl, 3888 - .set_rflags = emulator_set_rflags, 3889 }; 3890 3891 static void cache_all_regs(struct kvm_vcpu *vcpu) ··· 3900 vcpu->arch.regs_dirty = ~0; 3901 } 3902 3903 int emulate_instruction(struct kvm_vcpu *vcpu, 3904 unsigned long cr2, 3905 u16 error_code, 3906 int emulation_type) 3907 { 3908 - int r, shadow_mask; 3909 - struct decode_cache *c; 3910 - struct kvm_run *run = vcpu->run; 3911 3912 kvm_clear_exception_queue(vcpu); 3913 vcpu->arch.mmio_fault_cr2 = cr2; ··· 3979 * for example. 3980 */ 3981 cache_all_regs(vcpu); 3982 - 3983 - vcpu->mmio_is_write = 0; 3984 3985 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 3986 int cs_db, cs_l; ··· 3993 ? X86EMUL_MODE_VM86 : cs_l 3994 ? X86EMUL_MODE_PROT64 : cs_db 3995 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 3996 3997 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3998 trace_kvm_emulate_insn_start(vcpu); 3999 4000 /* Only allow emulation of specific instructions on #UD 4001 * (namely VMMCALL, sysenter, sysexit, syscall)*/ 4002 - c = &vcpu->arch.emulate_ctxt.decode; 4003 if (emulation_type & EMULTYPE_TRAP_UD) { 4004 if (!c->twobyte) 4005 return EMULATE_FAIL; ··· 4030 4031 ++vcpu->stat.insn_emulation; 4032 if (r) { 4033 - ++vcpu->stat.insn_emulation_fail; 4034 - trace_kvm_emulate_insn_failed(vcpu); 4035 - if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 4036 return EMULATE_DONE; 4037 - return EMULATE_FAIL; 4038 } 4039 } 4040 ··· 4043 return EMULATE_DONE; 4044 } 4045 4046 restart: 4047 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 4048 - shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; 4049 4050 - if (r == 0) 4051 - kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); 4052 4053 if (vcpu->arch.pio.count) { 4054 if (!vcpu->arch.pio.in) ··· 4073 return EMULATE_DO_MMIO; 4074 } 4075 4076 - if (r || vcpu->mmio_is_write) { 4077 - run->exit_reason = KVM_EXIT_MMIO; 4078 - run->mmio.phys_addr = vcpu->mmio_phys_addr; 4079 - memcpy(run->mmio.data, vcpu->mmio_data, 8); 4080 - run->mmio.len = vcpu->mmio_size; 4081 - run->mmio.is_write = vcpu->mmio_is_write; 4082 - } 4083 - 4084 - if (r) { 4085 - if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 4086 - goto done; 4087 - if (!vcpu->mmio_needed) { 4088 - ++vcpu->stat.insn_emulation_fail; 4089 - trace_kvm_emulate_insn_failed(vcpu); 4090 - kvm_report_emulation_failure(vcpu, "mmio"); 4091 - return EMULATE_FAIL; 4092 - } 4093 return EMULATE_DO_MMIO; 4094 } 4095 - 4096 - if (vcpu->mmio_is_write) { 4097 - vcpu->mmio_needed = 0; 4098 - return EMULATE_DO_MMIO; 4099 - } 4100 - 4101 - done: 4102 - if (vcpu->arch.exception.pending) 4103 - vcpu->arch.emulate_ctxt.restart = false; 4104 4105 if (vcpu->arch.emulate_ctxt.restart) 4106 goto restart; ··· 4252 4253 perf_register_guest_info_callbacks(&kvm_guest_cbs); 4254 4255 return 0; 4256 4257 out: ··· 4417 4418 kvm_x86_ops->patch_hypercall(vcpu, instruction); 4419 4420 - return emulator_write_emulated(rip, instruction, 3, vcpu); 4421 } 4422 4423 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) ··· 4653 } 4654 } 4655 4656 static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 4657 { 4658 int r; 4659 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && 4660 vcpu->run->request_interrupt_window; 4661 4662 - if (vcpu->requests) 4663 - if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 4664 - kvm_mmu_unload(vcpu); 4665 - 4666 - r = kvm_mmu_reload(vcpu); 4667 - if (unlikely(r)) 4668 - goto out; 4669 - 4670 if (vcpu->requests) { 4671 - if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) 4672 __kvm_migrate_timers(vcpu); 4673 - if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests)) 4674 kvm_write_guest_time(vcpu); 4675 - if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) 4676 kvm_mmu_sync_roots(vcpu); 4677 - if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 4678 kvm_x86_ops->tlb_flush(vcpu); 4679 - if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, 4680 - &vcpu->requests)) { 4681 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; 4682 r = 0; 4683 goto out; 4684 } 4685 - if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { 4686 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; 4687 r = 0; 4688 goto out; 4689 } 4690 - if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) { 4691 vcpu->fpu_active = 0; 4692 kvm_x86_ops->fpu_deactivate(vcpu); 4693 } 4694 } 4695 4696 preempt_disable(); 4697 4698 kvm_x86_ops->prepare_guest_switch(vcpu); 4699 if (vcpu->fpu_active) 4700 kvm_load_guest_fpu(vcpu); 4701 4702 local_irq_disable(); 4703 4704 - clear_bit(KVM_REQ_KICK, &vcpu->requests); 4705 - smp_mb__after_clear_bit(); 4706 - 4707 - if (vcpu->requests || need_resched() || signal_pending(current)) { 4708 - set_bit(KVM_REQ_KICK, &vcpu->requests); 4709 local_irq_enable(); 4710 preempt_enable(); 4711 r = 1; ··· 4769 if (hw_breakpoint_active()) 4770 hw_breakpoint_restore(); 4771 4772 - set_bit(KVM_REQ_KICK, &vcpu->requests); 4773 local_irq_enable(); 4774 4775 ++vcpu->stat.exits; ··· 4832 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4833 kvm_vcpu_block(vcpu); 4834 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 4835 - if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) 4836 { 4837 switch(vcpu->arch.mp_state) { 4838 case KVM_MP_STATE_HALTED: ··· 4884 int r; 4885 sigset_t sigsaved; 4886 4887 - vcpu_load(vcpu); 4888 - 4889 if (vcpu->sigset_active) 4890 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 4891 ··· 4908 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4909 r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); 4910 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4911 - if (r == EMULATE_DO_MMIO) { 4912 r = 0; 4913 goto out; 4914 } ··· 4924 if (vcpu->sigset_active) 4925 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 4926 4927 - vcpu_put(vcpu); 4928 return r; 4929 } 4930 4931 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4932 { 4933 - vcpu_load(vcpu); 4934 - 4935 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); 4936 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); 4937 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); ··· 4951 regs->rip = kvm_rip_read(vcpu); 4952 regs->rflags = kvm_get_rflags(vcpu); 4953 4954 - vcpu_put(vcpu); 4955 - 4956 return 0; 4957 } 4958 4959 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4960 { 4961 - vcpu_load(vcpu); 4962 - 4963 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); 4964 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); 4965 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); ··· 4980 4981 vcpu->arch.exception.pending = false; 4982 4983 - vcpu_put(vcpu); 4984 - 4985 return 0; 4986 } 4987 ··· 4997 struct kvm_sregs *sregs) 4998 { 4999 struct desc_ptr dt; 5000 - 5001 - vcpu_load(vcpu); 5002 5003 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 5004 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); ··· 5029 set_bit(vcpu->arch.interrupt.nr, 5030 (unsigned long *)sregs->interrupt_bitmap); 5031 5032 - vcpu_put(vcpu); 5033 - 5034 return 0; 5035 } 5036 5037 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 5038 struct kvm_mp_state *mp_state) 5039 { 5040 - vcpu_load(vcpu); 5041 mp_state->mp_state = vcpu->arch.mp_state; 5042 - vcpu_put(vcpu); 5043 return 0; 5044 } 5045 5046 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 5047 struct kvm_mp_state *mp_state) 5048 { 5049 - vcpu_load(vcpu); 5050 vcpu->arch.mp_state = mp_state->mp_state; 5051 - vcpu_put(vcpu); 5052 return 0; 5053 } 5054 5055 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, 5056 bool has_error_code, u32 error_code) 5057 { 5058 int cs_db, cs_l, ret; 5059 cache_all_regs(vcpu); 5060 ··· 5064 ? X86EMUL_MODE_VM86 : cs_l 5065 ? X86EMUL_MODE_PROT64 : cs_db 5066 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 5067 5068 ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, 5069 tss_selector, reason, has_error_code, ··· 5074 if (ret) 5075 return EMULATE_FAIL; 5076 5077 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 5078 return EMULATE_DONE; 5079 } ··· 5087 int mmu_reset_needed = 0; 5088 int pending_vec, max_bits; 5089 struct desc_ptr dt; 5090 - 5091 - vcpu_load(vcpu); 5092 5093 dt.size = sregs->idt.limit; 5094 dt.address = sregs->idt.base; ··· 5147 !is_protmode(vcpu)) 5148 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 5149 5150 - vcpu_put(vcpu); 5151 - 5152 return 0; 5153 } 5154 ··· 5156 unsigned long rflags; 5157 int i, r; 5158 5159 - vcpu_load(vcpu); 5160 - 5161 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { 5162 r = -EBUSY; 5163 if (vcpu->arch.exception.pending) 5164 - goto unlock_out; 5165 if (dbg->control & KVM_GUESTDBG_INJECT_DB) 5166 kvm_queue_exception(vcpu, DB_VECTOR); 5167 else ··· 5201 5202 r = 0; 5203 5204 - unlock_out: 5205 - vcpu_put(vcpu); 5206 5207 return r; 5208 } 5209 - 5210 - /* 5211 - * fxsave fpu state. Taken from x86_64/processor.h. To be killed when 5212 - * we have asm/x86/processor.h 5213 - */ 5214 - struct fxsave { 5215 - u16 cwd; 5216 - u16 swd; 5217 - u16 twd; 5218 - u16 fop; 5219 - u64 rip; 5220 - u64 rdp; 5221 - u32 mxcsr; 5222 - u32 mxcsr_mask; 5223 - u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ 5224 - #ifdef CONFIG_X86_64 5225 - u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ 5226 - #else 5227 - u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ 5228 - #endif 5229 - }; 5230 5231 /* 5232 * Translate a guest virtual address to a guest physical address. ··· 5216 gpa_t gpa; 5217 int idx; 5218 5219 - vcpu_load(vcpu); 5220 idx = srcu_read_lock(&vcpu->kvm->srcu); 5221 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); 5222 srcu_read_unlock(&vcpu->kvm->srcu, idx); ··· 5223 tr->valid = gpa != UNMAPPED_GVA; 5224 tr->writeable = 1; 5225 tr->usermode = 0; 5226 - vcpu_put(vcpu); 5227 5228 return 0; 5229 } 5230 5231 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 5232 { 5233 - struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; 5234 - 5235 - vcpu_load(vcpu); 5236 5237 memcpy(fpu->fpr, fxsave->st_space, 128); 5238 fpu->fcw = fxsave->cwd; ··· 5241 fpu->last_dp = fxsave->rdp; 5242 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); 5243 5244 - vcpu_put(vcpu); 5245 - 5246 return 0; 5247 } 5248 5249 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 5250 { 5251 - struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; 5252 - 5253 - vcpu_load(vcpu); 5254 5255 memcpy(fxsave->st_space, fpu->fpr, 128); 5256 fxsave->cwd = fpu->fcw; ··· 5258 fxsave->rdp = fpu->last_dp; 5259 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); 5260 5261 - vcpu_put(vcpu); 5262 - 5263 return 0; 5264 } 5265 5266 - void fx_init(struct kvm_vcpu *vcpu) 5267 { 5268 - unsigned after_mxcsr_mask; 5269 5270 /* 5271 - * Touch the fpu the first time in non atomic context as if 5272 - * this is the first fpu instruction the exception handler 5273 - * will fire before the instruction returns and it'll have to 5274 - * allocate ram with GFP_KERNEL. 5275 */ 5276 - if (!used_math()) 5277 - kvm_fx_save(&vcpu->arch.host_fx_image); 5278 - 5279 - /* Initialize guest FPU by resetting ours and saving into guest's */ 5280 - preempt_disable(); 5281 - kvm_fx_save(&vcpu->arch.host_fx_image); 5282 - kvm_fx_finit(); 5283 - kvm_fx_save(&vcpu->arch.guest_fx_image); 5284 - kvm_fx_restore(&vcpu->arch.host_fx_image); 5285 - preempt_enable(); 5286 5287 vcpu->arch.cr0 |= X86_CR0_ET; 5288 - after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space); 5289 - vcpu->arch.guest_fx_image.mxcsr = 0x1f80; 5290 - memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask, 5291 - 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask); 5292 } 5293 EXPORT_SYMBOL_GPL(fx_init); 5294 5295 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 5296 { 5297 if (vcpu->guest_fpu_loaded) 5298 return; 5299 5300 vcpu->guest_fpu_loaded = 1; 5301 - kvm_fx_save(&vcpu->arch.host_fx_image); 5302 - kvm_fx_restore(&vcpu->arch.guest_fx_image); 5303 trace_kvm_fpu(1); 5304 } 5305 5306 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) 5307 { 5308 if (!vcpu->guest_fpu_loaded) 5309 return; 5310 5311 vcpu->guest_fpu_loaded = 0; 5312 - kvm_fx_save(&vcpu->arch.guest_fx_image); 5313 - kvm_fx_restore(&vcpu->arch.host_fx_image); 5314 ++vcpu->stat.fpu_reload; 5315 - set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); 5316 trace_kvm_fpu(0); 5317 } 5318 ··· 5325 vcpu->arch.time_page = NULL; 5326 } 5327 5328 kvm_x86_ops->vcpu_free(vcpu); 5329 } 5330 ··· 5339 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 5340 { 5341 int r; 5342 - 5343 - /* We do fxsave: this must be aligned. */ 5344 - BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); 5345 5346 vcpu->arch.mtrr_state.have_fixed = 1; 5347 vcpu_load(vcpu); ··· 5361 kvm_mmu_unload(vcpu); 5362 vcpu_put(vcpu); 5363 5364 kvm_x86_ops->vcpu_free(vcpu); 5365 } 5366 ··· 5455 } 5456 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; 5457 5458 return 0; 5459 fail_free_lapic: 5460 kvm_free_lapic(vcpu); 5461 fail_mmu_destroy: ··· 5489 5490 if (!kvm) 5491 return ERR_PTR(-ENOMEM); 5492 - 5493 - kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); 5494 - if (!kvm->arch.aliases) { 5495 - kfree(kvm); 5496 - return ERR_PTR(-ENOMEM); 5497 - } 5498 5499 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 5500 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); ··· 5532 void kvm_arch_sync_events(struct kvm *kvm) 5533 { 5534 kvm_free_all_assigned_devices(kvm); 5535 } 5536 5537 void kvm_arch_destroy_vm(struct kvm *kvm) 5538 { 5539 kvm_iommu_unmap_guest(kvm); 5540 - kvm_free_pit(kvm); 5541 kfree(kvm->arch.vpic); 5542 kfree(kvm->arch.vioapic); 5543 kvm_free_vcpus(kvm); ··· 5547 if (kvm->arch.ept_identity_pagetable) 5548 put_page(kvm->arch.ept_identity_pagetable); 5549 cleanup_srcu_struct(&kvm->srcu); 5550 - kfree(kvm->arch.aliases); 5551 kfree(kvm); 5552 } 5553 ··· 5557 int user_alloc) 5558 { 5559 int npages = memslot->npages; 5560 5561 /*To keep backward compatibility with older userspace, 5562 *x86 needs to hanlde !user_alloc case. ··· 5574 userspace_addr = do_mmap(NULL, 0, 5575 npages * PAGE_SIZE, 5576 PROT_READ | PROT_WRITE, 5577 - MAP_PRIVATE | MAP_ANONYMOUS, 5578 0); 5579 up_write(&current->mm->mmap_sem); 5580 ··· 5647 5648 me = get_cpu(); 5649 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) 5650 - if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) 5651 smp_send_reschedule(cpu); 5652 put_cpu(); 5653 }

··· 6 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2008 Qumranet, Inc. 8 * Copyright IBM Corporation, 2008 9 + * Copyright 2010 Red Hat, Inc. and/or its affilates. 10 * 11 * Authors: 12 * Avi Kivity <avi@qumranet.com> ··· 41 #include <linux/srcu.h> 42 #include <linux/slab.h> 43 #include <linux/perf_event.h> 44 + #include <linux/uaccess.h> 45 #include <trace/events/kvm.h> 46 47 #define CREATE_TRACE_POINTS 48 #include "trace.h" 49 50 #include <asm/debugreg.h> 51 #include <asm/msr.h> 52 #include <asm/desc.h> 53 #include <asm/mtrr.h> 54 #include <asm/mce.h> 55 + #include <asm/i387.h> 56 + #include <asm/xcr.h> 57 58 #define MAX_IO_MSRS 256 59 #define CR0_RESERVED_BITS \ ··· 62 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ 63 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ 64 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ 65 + | X86_CR4_OSXSAVE \ 66 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) 67 68 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) ··· 146 { "largepages", VM_STAT(lpages) }, 147 { NULL } 148 }; 149 + 150 + u64 __read_mostly host_xcr0; 151 + 152 + static inline u32 bit(int bitno) 153 + { 154 + return 1 << (bitno & 31); 155 + } 156 157 static void kvm_on_user_return(struct user_return_notifier *urn) 158 { ··· 285 prev_nr = vcpu->arch.exception.nr; 286 if (prev_nr == DF_VECTOR) { 287 /* triple fault -> shutdown */ 288 + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 289 return; 290 } 291 class1 = exception_class(prev_nr); ··· 414 return changed; 415 } 416 417 + int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 418 { 419 + unsigned long old_cr0 = kvm_read_cr0(vcpu); 420 + unsigned long update_bits = X86_CR0_PG | X86_CR0_WP | 421 + X86_CR0_CD | X86_CR0_NW; 422 + 423 cr0 |= X86_CR0_ET; 424 425 #ifdef CONFIG_X86_64 426 + if (cr0 & 0xffffffff00000000UL) 427 + return 1; 428 #endif 429 430 cr0 &= ~CR0_RESERVED_BITS; 431 432 + if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) 433 + return 1; 434 435 + if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) 436 + return 1; 437 438 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 439 #ifdef CONFIG_X86_64 440 if ((vcpu->arch.efer & EFER_LME)) { 441 int cs_db, cs_l; 442 443 + if (!is_pae(vcpu)) 444 + return 1; 445 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 446 + if (cs_l) 447 + return 1; 448 } else 449 #endif 450 + if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) 451 + return 1; 452 } 453 454 kvm_x86_ops->set_cr0(vcpu, cr0); 455 456 + if ((cr0 ^ old_cr0) & update_bits) 457 + kvm_mmu_reset_context(vcpu); 458 + return 0; 459 } 460 EXPORT_SYMBOL_GPL(kvm_set_cr0); 461 462 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 463 { 464 + (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); 465 } 466 EXPORT_SYMBOL_GPL(kvm_lmsw); 467 468 + int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) 469 + { 470 + u64 xcr0; 471 + 472 + /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ 473 + if (index != XCR_XFEATURE_ENABLED_MASK) 474 + return 1; 475 + xcr0 = xcr; 476 + if (kvm_x86_ops->get_cpl(vcpu) != 0) 477 + return 1; 478 + if (!(xcr0 & XSTATE_FP)) 479 + return 1; 480 + if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) 481 + return 1; 482 + if (xcr0 & ~host_xcr0) 483 + return 1; 484 + vcpu->arch.xcr0 = xcr0; 485 + vcpu->guest_xcr0_loaded = 0; 486 + return 0; 487 + } 488 + 489 + int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) 490 + { 491 + if (__kvm_set_xcr(vcpu, index, xcr)) { 492 + kvm_inject_gp(vcpu, 0); 493 + return 1; 494 + } 495 + return 0; 496 + } 497 + EXPORT_SYMBOL_GPL(kvm_set_xcr); 498 + 499 + static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) 500 + { 501 + struct kvm_cpuid_entry2 *best; 502 + 503 + best = kvm_find_cpuid_entry(vcpu, 1, 0); 504 + return best && (best->ecx & bit(X86_FEATURE_XSAVE)); 505 + } 506 + 507 + static void update_cpuid(struct kvm_vcpu *vcpu) 508 + { 509 + struct kvm_cpuid_entry2 *best; 510 + 511 + best = kvm_find_cpuid_entry(vcpu, 1, 0); 512 + if (!best) 513 + return; 514 + 515 + /* Update OSXSAVE bit */ 516 + if (cpu_has_xsave && best->function == 0x1) { 517 + best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); 518 + if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) 519 + best->ecx |= bit(X86_FEATURE_OSXSAVE); 520 + } 521 + } 522 + 523 + int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 524 { 525 unsigned long old_cr4 = kvm_read_cr4(vcpu); 526 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 527 528 + if (cr4 & CR4_RESERVED_BITS) 529 + return 1; 530 + 531 + if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE)) 532 + return 1; 533 534 if (is_long_mode(vcpu)) { 535 + if (!(cr4 & X86_CR4_PAE)) 536 + return 1; 537 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) 538 && ((cr4 ^ old_cr4) & pdptr_bits) 539 + && !load_pdptrs(vcpu, vcpu->arch.cr3)) 540 + return 1; 541 542 + if (cr4 & X86_CR4_VMXE) 543 + return 1; 544 + 545 kvm_x86_ops->set_cr4(vcpu, cr4); 546 + 547 + if ((cr4 ^ old_cr4) & pdptr_bits) 548 + kvm_mmu_reset_context(vcpu); 549 + 550 + if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) 551 + update_cpuid(vcpu); 552 + 553 + return 0; 554 } 555 EXPORT_SYMBOL_GPL(kvm_set_cr4); 556 557 + int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 558 { 559 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { 560 kvm_mmu_sync_roots(vcpu); 561 kvm_mmu_flush_tlb(vcpu); 562 + return 0; 563 } 564 565 if (is_long_mode(vcpu)) { 566 + if (cr3 & CR3_L_MODE_RESERVED_BITS) 567 + return 1; 568 } else { 569 if (is_pae(vcpu)) { 570 + if (cr3 & CR3_PAE_RESERVED_BITS) 571 + return 1; 572 + if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) 573 + return 1; 574 } 575 /* 576 * We don't check reserved bits in nonpae mode, because ··· 546 * to debug) behavior on the guest side. 547 */ 548 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) 549 + return 1; 550 + vcpu->arch.cr3 = cr3; 551 + vcpu->arch.mmu.new_cr3(vcpu); 552 + return 0; 553 } 554 EXPORT_SYMBOL_GPL(kvm_set_cr3); 555 556 + int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 557 { 558 + if (cr8 & CR8_RESERVED_BITS) 559 + return 1; 560 if (irqchip_in_kernel(vcpu->kvm)) 561 kvm_lapic_set_tpr(vcpu, cr8); 562 else 563 vcpu->arch.cr8 = cr8; 564 + return 0; 565 + } 566 + 567 + void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 568 + { 569 + if (__kvm_set_cr8(vcpu, cr8)) 570 + kvm_inject_gp(vcpu, 0); 571 } 572 EXPORT_SYMBOL_GPL(kvm_set_cr8); 573 ··· 576 } 577 EXPORT_SYMBOL_GPL(kvm_get_cr8); 578 579 + static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) 580 { 581 switch (dr) { 582 case 0 ... 3: ··· 585 vcpu->arch.eff_db[dr] = val; 586 break; 587 case 4: 588 + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 589 + return 1; /* #UD */ 590 /* fall through */ 591 case 6: 592 + if (val & 0xffffffff00000000ULL) 593 + return -1; /* #GP */ 594 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 595 break; 596 case 5: 597 + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 598 + return 1; /* #UD */ 599 /* fall through */ 600 default: /* 7 */ 601 + if (val & 0xffffffff00000000ULL) 602 + return -1; /* #GP */ 603 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; 604 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 605 kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); ··· 618 619 return 0; 620 } 621 + 622 + int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) 623 + { 624 + int res; 625 + 626 + res = __kvm_set_dr(vcpu, dr, val); 627 + if (res > 0) 628 + kvm_queue_exception(vcpu, UD_VECTOR); 629 + else if (res < 0) 630 + kvm_inject_gp(vcpu, 0); 631 + 632 + return res; 633 + } 634 EXPORT_SYMBOL_GPL(kvm_set_dr); 635 636 + static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) 637 { 638 switch (dr) { 639 case 0 ... 3: 640 *val = vcpu->arch.db[dr]; 641 break; 642 case 4: 643 + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 644 return 1; 645 /* fall through */ 646 case 6: 647 *val = vcpu->arch.dr6; 648 break; 649 case 5: 650 + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 651 return 1; 652 /* fall through */ 653 default: /* 7 */ 654 *val = vcpu->arch.dr7; ··· 648 649 return 0; 650 } 651 652 + int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) 653 { 654 + if (_kvm_get_dr(vcpu, dr, val)) { 655 + kvm_queue_exception(vcpu, UD_VECTOR); 656 + return 1; 657 + } 658 + return 0; 659 } 660 + EXPORT_SYMBOL_GPL(kvm_get_dr); 661 662 /* 663 * List of msr numbers which we expose to userspace through KVM_GET_MSRS ··· 682 683 static u32 emulated_msrs[] = { 684 MSR_IA32_MISC_ENABLE, 685 + MSR_IA32_MCG_STATUS, 686 + MSR_IA32_MCG_CTL, 687 }; 688 689 static int set_efer(struct kvm_vcpu *vcpu, u64 efer) 690 { 691 + u64 old_efer = vcpu->arch.efer; 692 + 693 if (efer & efer_reserved_bits) 694 return 1; 695 ··· 714 715 kvm_x86_ops->set_efer(vcpu, efer); 716 717 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; 718 kvm_mmu_reset_context(vcpu); 719 + 720 + /* Update reserved bits */ 721 + if ((efer ^ old_efer) & EFER_NX) 722 + kvm_mmu_reset_context(vcpu); 723 724 return 0; 725 } ··· 882 883 if (!vcpu->time_page) 884 return 0; 885 + kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v); 886 return 1; 887 } 888 ··· 1524 { 1525 int i, idx; 1526 1527 idx = srcu_read_lock(&vcpu->kvm->srcu); 1528 for (i = 0; i < msrs->nmsrs; ++i) 1529 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 1530 break; 1531 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1532 1533 return i; 1534 } ··· 1618 case KVM_CAP_PCI_SEGMENT: 1619 case KVM_CAP_DEBUGREGS: 1620 case KVM_CAP_X86_ROBUST_SINGLESTEP: 1621 + case KVM_CAP_XSAVE: 1622 r = 1; 1623 break; 1624 case KVM_CAP_COALESCED_MMIO: ··· 1640 break; 1641 case KVM_CAP_MCE: 1642 r = KVM_MAX_MCE_BANKS; 1643 + break; 1644 + case KVM_CAP_XCRS: 1645 + r = cpu_has_xsave; 1646 break; 1647 default: 1648 r = 0; ··· 1717 return r; 1718 } 1719 1720 + static void wbinvd_ipi(void *garbage) 1721 + { 1722 + wbinvd(); 1723 + } 1724 + 1725 + static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) 1726 + { 1727 + return vcpu->kvm->arch.iommu_domain && 1728 + !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY); 1729 + } 1730 + 1731 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1732 { 1733 + /* Address WBINVD may be executed by guest */ 1734 + if (need_emulate_wbinvd(vcpu)) { 1735 + if (kvm_x86_ops->has_wbinvd_exit()) 1736 + cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); 1737 + else if (vcpu->cpu != -1 && vcpu->cpu != cpu) 1738 + smp_call_function_single(vcpu->cpu, 1739 + wbinvd_ipi, NULL, 1); 1740 + } 1741 + 1742 kvm_x86_ops->vcpu_load(vcpu, cpu); 1743 if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { 1744 unsigned long khz = cpufreq_quick_get(cpu); ··· 1731 1732 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1733 { 1734 kvm_x86_ops->vcpu_put(vcpu); 1735 + kvm_put_guest_fpu(vcpu); 1736 } 1737 1738 static int is_efer_nx(void) ··· 1781 if (copy_from_user(cpuid_entries, entries, 1782 cpuid->nent * sizeof(struct kvm_cpuid_entry))) 1783 goto out_free; 1784 for (i = 0; i < cpuid->nent; i++) { 1785 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; 1786 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; ··· 1799 r = 0; 1800 kvm_apic_set_version(vcpu); 1801 kvm_x86_ops->cpuid_update(vcpu); 1802 + update_cpuid(vcpu); 1803 1804 out_free: 1805 vfree(cpuid_entries); ··· 1820 if (copy_from_user(&vcpu->arch.cpuid_entries, entries, 1821 cpuid->nent * sizeof(struct kvm_cpuid_entry2))) 1822 goto out; 1823 vcpu->arch.cpuid_nent = cpuid->nent; 1824 kvm_apic_set_version(vcpu); 1825 kvm_x86_ops->cpuid_update(vcpu); 1826 + update_cpuid(vcpu); 1827 return 0; 1828 1829 out: ··· 1837 { 1838 int r; 1839 1840 r = -E2BIG; 1841 if (cpuid->nent < vcpu->arch.cpuid_nent) 1842 goto out; ··· 1849 1850 out: 1851 cpuid->nent = vcpu->arch.cpuid_nent; 1852 return r; 1853 } 1854 ··· 1901 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); 1902 /* cpuid 1.ecx */ 1903 const u32 kvm_supported_word4_x86_features = 1904 + F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | 1905 0 /* DS-CPL, VMX, SMX, EST */ | 1906 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | 1907 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | 1908 0 /* Reserved, DCA */ | F(XMM4_1) | 1909 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 1910 + 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX); 1911 /* cpuid 0x80000001.ecx */ 1912 const u32 kvm_supported_word6_x86_features = 1913 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | ··· 1922 1923 switch (function) { 1924 case 0: 1925 + entry->eax = min(entry->eax, (u32)0xd); 1926 break; 1927 case 1: 1928 entry->edx &= kvm_supported_word0_x86_features; ··· 1972 for (i = 1; *nent < maxnent; ++i) { 1973 level_type = entry[i - 1].ecx & 0xff00; 1974 if (!level_type) 1975 + break; 1976 + do_cpuid_1_ent(&entry[i], function, i); 1977 + entry[i].flags |= 1978 + KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1979 + ++*nent; 1980 + } 1981 + break; 1982 + } 1983 + case 0xd: { 1984 + int i; 1985 + 1986 + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1987 + for (i = 1; *nent < maxnent; ++i) { 1988 + if (entry[i - 1].eax == 0 && i != 2) 1989 break; 1990 do_cpuid_1_ent(&entry[i], function, i); 1991 entry[i].flags |= ··· 2081 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 2082 struct kvm_lapic_state *s) 2083 { 2084 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); 2085 2086 return 0; 2087 } ··· 2091 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, 2092 struct kvm_lapic_state *s) 2093 { 2094 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); 2095 kvm_apic_post_state_restore(vcpu); 2096 update_cr8_intercept(vcpu); 2097 2098 return 0; 2099 } ··· 2107 return -EINVAL; 2108 if (irqchip_in_kernel(vcpu->kvm)) 2109 return -ENXIO; 2110 2111 kvm_queue_interrupt(vcpu, irq->irq, false); 2112 2113 return 0; 2114 } 2115 2116 static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) 2117 { 2118 kvm_inject_nmi(vcpu); 2119 2120 return 0; 2121 } ··· 2140 int r; 2141 unsigned bank_num = mcg_cap & 0xff, bank; 2142 2143 r = -EINVAL; 2144 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) 2145 goto out; ··· 2155 for (bank = 0; bank < bank_num; bank++) 2156 vcpu->arch.mce_banks[bank*4] = ~(u64)0; 2157 out: 2158 return r; 2159 } 2160 ··· 2188 printk(KERN_DEBUG "kvm: set_mce: " 2189 "injects mce exception while " 2190 "previous one is in progress!\n"); 2191 + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 2192 return 0; 2193 } 2194 if (banks[1] & MCI_STATUS_VAL) ··· 2213 static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, 2214 struct kvm_vcpu_events *events) 2215 { 2216 events->exception.injected = 2217 vcpu->arch.exception.pending && 2218 !kvm_exception_is_soft(vcpu->arch.exception.nr); ··· 2239 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING 2240 | KVM_VCPUEVENT_VALID_SIPI_VECTOR 2241 | KVM_VCPUEVENT_VALID_SHADOW); 2242 } 2243 2244 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, ··· 2250 | KVM_VCPUEVENT_VALID_SIPI_VECTOR 2251 | KVM_VCPUEVENT_VALID_SHADOW)) 2252 return -EINVAL; 2253 2254 vcpu->arch.exception.pending = events->exception.injected; 2255 vcpu->arch.exception.nr = events->exception.nr; ··· 2275 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) 2276 vcpu->arch.sipi_vector = events->sipi_vector; 2277 2278 return 0; 2279 } 2280 2281 static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, 2282 struct kvm_debugregs *dbgregs) 2283 { 2284 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); 2285 dbgregs->dr6 = vcpu->arch.dr6; 2286 dbgregs->dr7 = vcpu->arch.dr7; 2287 dbgregs->flags = 0; 2288 } 2289 2290 static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, ··· 2299 if (dbgregs->flags) 2300 return -EINVAL; 2301 2302 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); 2303 vcpu->arch.dr6 = dbgregs->dr6; 2304 vcpu->arch.dr7 = dbgregs->dr7; 2305 2306 return 0; 2307 + } 2308 + 2309 + static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, 2310 + struct kvm_xsave *guest_xsave) 2311 + { 2312 + if (cpu_has_xsave) 2313 + memcpy(guest_xsave->region, 2314 + &vcpu->arch.guest_fpu.state->xsave, 2315 + sizeof(struct xsave_struct)); 2316 + else { 2317 + memcpy(guest_xsave->region, 2318 + &vcpu->arch.guest_fpu.state->fxsave, 2319 + sizeof(struct i387_fxsave_struct)); 2320 + *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = 2321 + XSTATE_FPSSE; 2322 + } 2323 + } 2324 + 2325 + static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, 2326 + struct kvm_xsave *guest_xsave) 2327 + { 2328 + u64 xstate_bv = 2329 + *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; 2330 + 2331 + if (cpu_has_xsave) 2332 + memcpy(&vcpu->arch.guest_fpu.state->xsave, 2333 + guest_xsave->region, sizeof(struct xsave_struct)); 2334 + else { 2335 + if (xstate_bv & ~XSTATE_FPSSE) 2336 + return -EINVAL; 2337 + memcpy(&vcpu->arch.guest_fpu.state->fxsave, 2338 + guest_xsave->region, sizeof(struct i387_fxsave_struct)); 2339 + } 2340 + return 0; 2341 + } 2342 + 2343 + static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, 2344 + struct kvm_xcrs *guest_xcrs) 2345 + { 2346 + if (!cpu_has_xsave) { 2347 + guest_xcrs->nr_xcrs = 0; 2348 + return; 2349 + } 2350 + 2351 + guest_xcrs->nr_xcrs = 1; 2352 + guest_xcrs->flags = 0; 2353 + guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK; 2354 + guest_xcrs->xcrs[0].value = vcpu->arch.xcr0; 2355 + } 2356 + 2357 + static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, 2358 + struct kvm_xcrs *guest_xcrs) 2359 + { 2360 + int i, r = 0; 2361 + 2362 + if (!cpu_has_xsave) 2363 + return -EINVAL; 2364 + 2365 + if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags) 2366 + return -EINVAL; 2367 + 2368 + for (i = 0; i < guest_xcrs->nr_xcrs; i++) 2369 + /* Only support XCR0 currently */ 2370 + if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) { 2371 + r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK, 2372 + guest_xcrs->xcrs[0].value); 2373 + break; 2374 + } 2375 + if (r) 2376 + r = -EINVAL; 2377 + return r; 2378 } 2379 2380 long kvm_arch_vcpu_ioctl(struct file *filp, ··· 2316 struct kvm_vcpu *vcpu = filp->private_data; 2317 void __user *argp = (void __user *)arg; 2318 int r; 2319 + union { 2320 + struct kvm_lapic_state *lapic; 2321 + struct kvm_xsave *xsave; 2322 + struct kvm_xcrs *xcrs; 2323 + void *buffer; 2324 + } u; 2325 2326 + u.buffer = NULL; 2327 switch (ioctl) { 2328 case KVM_GET_LAPIC: { 2329 r = -EINVAL; 2330 if (!vcpu->arch.apic) 2331 goto out; 2332 + u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 2333 2334 r = -ENOMEM; 2335 + if (!u.lapic) 2336 goto out; 2337 + r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic); 2338 if (r) 2339 goto out; 2340 r = -EFAULT; 2341 + if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state))) 2342 goto out; 2343 r = 0; 2344 break; ··· 2341 r = -EINVAL; 2342 if (!vcpu->arch.apic) 2343 goto out; 2344 + u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 2345 r = -ENOMEM; 2346 + if (!u.lapic) 2347 goto out; 2348 r = -EFAULT; 2349 + if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state))) 2350 goto out; 2351 + r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); 2352 if (r) 2353 goto out; 2354 r = 0; ··· 2464 r = -EFAULT; 2465 if (copy_from_user(&mce, argp, sizeof mce)) 2466 goto out; 2467 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); 2468 break; 2469 } 2470 case KVM_GET_VCPU_EVENTS: { ··· 2513 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); 2514 break; 2515 } 2516 + case KVM_GET_XSAVE: { 2517 + u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); 2518 + r = -ENOMEM; 2519 + if (!u.xsave) 2520 + break; 2521 + 2522 + kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave); 2523 + 2524 + r = -EFAULT; 2525 + if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave))) 2526 + break; 2527 + r = 0; 2528 + break; 2529 + } 2530 + case KVM_SET_XSAVE: { 2531 + u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); 2532 + r = -ENOMEM; 2533 + if (!u.xsave) 2534 + break; 2535 + 2536 + r = -EFAULT; 2537 + if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave))) 2538 + break; 2539 + 2540 + r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); 2541 + break; 2542 + } 2543 + case KVM_GET_XCRS: { 2544 + u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); 2545 + r = -ENOMEM; 2546 + if (!u.xcrs) 2547 + break; 2548 + 2549 + kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs); 2550 + 2551 + r = -EFAULT; 2552 + if (copy_to_user(argp, u.xcrs, 2553 + sizeof(struct kvm_xcrs))) 2554 + break; 2555 + r = 0; 2556 + break; 2557 + } 2558 + case KVM_SET_XCRS: { 2559 + u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); 2560 + r = -ENOMEM; 2561 + if (!u.xcrs) 2562 + break; 2563 + 2564 + r = -EFAULT; 2565 + if (copy_from_user(u.xcrs, argp, 2566 + sizeof(struct kvm_xcrs))) 2567 + break; 2568 + 2569 + r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); 2570 + break; 2571 + } 2572 default: 2573 r = -EINVAL; 2574 } 2575 out: 2576 + kfree(u.buffer); 2577 return r; 2578 } 2579 ··· 2558 static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) 2559 { 2560 return kvm->arch.n_alloc_mmu_pages; 2561 } 2562 2563 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) ··· 2797 struct kvm_memory_slot *memslot; 2798 unsigned long n; 2799 unsigned long is_dirty = 0; 2800 2801 mutex_lock(&kvm->slots_lock); 2802 ··· 2812 2813 n = kvm_dirty_bitmap_bytes(memslot); 2814 2815 for (i = 0; !is_dirty && i < n/sizeof(long); i++) 2816 is_dirty = memslot->dirty_bitmap[i]; 2817 2818 /* If nothing is dirty, don't bother messing with page tables. */ 2819 if (is_dirty) { 2820 struct kvm_memslots *slots, *old_slots; 2821 + unsigned long *dirty_bitmap; 2822 2823 spin_lock(&kvm->mmu_lock); 2824 kvm_mmu_slot_remove_write_access(kvm, log->slot); 2825 spin_unlock(&kvm->mmu_lock); 2826 2827 + r = -ENOMEM; 2828 + dirty_bitmap = vmalloc(n); 2829 + if (!dirty_bitmap) 2830 + goto out; 2831 + memset(dirty_bitmap, 0, n); 2832 2833 + r = -ENOMEM; 2834 + slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 2835 + if (!slots) { 2836 + vfree(dirty_bitmap); 2837 + goto out; 2838 + } 2839 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 2840 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; 2841 ··· 2841 synchronize_srcu_expedited(&kvm->srcu); 2842 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; 2843 kfree(old_slots); 2844 + 2845 + r = -EFAULT; 2846 + if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) { 2847 + vfree(dirty_bitmap); 2848 + goto out; 2849 + } 2850 + vfree(dirty_bitmap); 2851 + } else { 2852 + r = -EFAULT; 2853 + if (clear_user(log->dirty_bitmap, n)) 2854 + goto out; 2855 } 2856 2857 r = 0; 2858 out: 2859 mutex_unlock(&kvm->slots_lock); 2860 return r; ··· 2867 union { 2868 struct kvm_pit_state ps; 2869 struct kvm_pit_state2 ps2; 2870 struct kvm_pit_config pit_config; 2871 } u; 2872 ··· 2888 goto out; 2889 break; 2890 } 2891 case KVM_SET_NR_MMU_PAGES: 2892 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); 2893 if (r) ··· 2911 break; 2912 case KVM_GET_NR_MMU_PAGES: 2913 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); 2914 break; 2915 case KVM_CREATE_IRQCHIP: { 2916 struct kvm_pic *vpic; ··· 3259 } 3260 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); 3261 if (ret < 0) { 3262 + r = X86EMUL_IO_NEEDED; 3263 goto out; 3264 } 3265 ··· 3315 } 3316 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); 3317 if (ret < 0) { 3318 + r = X86EMUL_IO_NEEDED; 3319 goto out; 3320 } 3321 ··· 3330 static int emulator_read_emulated(unsigned long addr, 3331 void *val, 3332 unsigned int bytes, 3333 + unsigned int *error_code, 3334 struct kvm_vcpu *vcpu) 3335 { 3336 gpa_t gpa; 3337 3338 if (vcpu->mmio_read_completed) { 3339 memcpy(val, vcpu->mmio_data, bytes); ··· 3343 return X86EMUL_CONTINUE; 3344 } 3345 3346 + gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code); 3347 3348 + if (gpa == UNMAPPED_GVA) 3349 return X86EMUL_PROPAGATE_FAULT; 3350 3351 /* For APIC access vmexit */ 3352 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) ··· 3370 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); 3371 3372 vcpu->mmio_needed = 1; 3373 + vcpu->run->exit_reason = KVM_EXIT_MMIO; 3374 + vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; 3375 + vcpu->run->mmio.len = vcpu->mmio_size = bytes; 3376 + vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; 3377 3378 + return X86EMUL_IO_NEEDED; 3379 } 3380 3381 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, ··· 3392 static int emulator_write_emulated_onepage(unsigned long addr, 3393 const void *val, 3394 unsigned int bytes, 3395 + unsigned int *error_code, 3396 struct kvm_vcpu *vcpu) 3397 { 3398 gpa_t gpa; 3399 3400 + gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code); 3401 3402 + if (gpa == UNMAPPED_GVA) 3403 return X86EMUL_PROPAGATE_FAULT; 3404 3405 /* For APIC access vmexit */ 3406 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) ··· 3420 return X86EMUL_CONTINUE; 3421 3422 vcpu->mmio_needed = 1; 3423 + vcpu->run->exit_reason = KVM_EXIT_MMIO; 3424 + vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; 3425 + vcpu->run->mmio.len = vcpu->mmio_size = bytes; 3426 + vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; 3427 + memcpy(vcpu->run->mmio.data, val, bytes); 3428 3429 return X86EMUL_CONTINUE; 3430 } ··· 3431 int emulator_write_emulated(unsigned long addr, 3432 const void *val, 3433 unsigned int bytes, 3434 + unsigned int *error_code, 3435 struct kvm_vcpu *vcpu) 3436 { 3437 /* Crossing a page boundary? */ ··· 3438 int rc, now; 3439 3440 now = -addr & ~PAGE_MASK; 3441 + rc = emulator_write_emulated_onepage(addr, val, now, error_code, 3442 + vcpu); 3443 if (rc != X86EMUL_CONTINUE) 3444 return rc; 3445 addr += now; 3446 val += now; 3447 bytes -= now; 3448 } 3449 + return emulator_write_emulated_onepage(addr, val, bytes, error_code, 3450 + vcpu); 3451 } 3452 3453 #define CMPXCHG_TYPE(t, ptr, old, new) \ 3454 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) ··· 3463 const void *old, 3464 const void *new, 3465 unsigned int bytes, 3466 + unsigned int *error_code, 3467 struct kvm_vcpu *vcpu) 3468 { 3469 gpa_t gpa; ··· 3484 goto emul_write; 3485 3486 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 3487 + if (is_error_page(page)) { 3488 + kvm_release_page_clean(page); 3489 + goto emul_write; 3490 + } 3491 3492 kaddr = kmap_atomic(page, KM_USER0); 3493 kaddr += offset_in_page(gpa); ··· 3516 emul_write: 3517 printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); 3518 3519 + return emulator_write_emulated(addr, new, bytes, error_code, vcpu); 3520 } 3521 3522 static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) ··· 3604 return X86EMUL_CONTINUE; 3605 } 3606 3607 + int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) 3608 + { 3609 + if (!need_emulate_wbinvd(vcpu)) 3610 + return X86EMUL_CONTINUE; 3611 + 3612 + if (kvm_x86_ops->has_wbinvd_exit()) { 3613 + smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, 3614 + wbinvd_ipi, NULL, 1); 3615 + cpumask_clear(vcpu->arch.wbinvd_dirty_mask); 3616 + } 3617 + wbinvd(); 3618 + return X86EMUL_CONTINUE; 3619 + } 3620 + EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); 3621 + 3622 int emulate_clts(struct kvm_vcpu *vcpu) 3623 { 3624 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); ··· 3611 return X86EMUL_CONTINUE; 3612 } 3613 3614 + int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) 3615 { 3616 + return _kvm_get_dr(vcpu, dr, dest); 3617 } 3618 3619 + int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) 3620 { 3621 3622 + return __kvm_set_dr(vcpu, dr, value); 3623 } 3624 3625 static u64 mk_cr_64(u64 curr_cr, u32 new_val) 3626 { ··· 3674 return value; 3675 } 3676 3677 + static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) 3678 { 3679 + int res = 0; 3680 + 3681 switch (cr) { 3682 case 0: 3683 + res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); 3684 break; 3685 case 2: 3686 vcpu->arch.cr2 = val; 3687 break; 3688 case 3: 3689 + res = kvm_set_cr3(vcpu, val); 3690 break; 3691 case 4: 3692 + res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); 3693 break; 3694 case 8: 3695 + res = __kvm_set_cr8(vcpu, val & 0xfUL); 3696 break; 3697 default: 3698 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 3699 + res = -1; 3700 } 3701 + 3702 + return res; 3703 } 3704 3705 static int emulator_get_cpl(struct kvm_vcpu *vcpu) ··· 3705 static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) 3706 { 3707 kvm_x86_ops->get_gdt(vcpu, dt); 3708 + } 3709 + 3710 + static unsigned long emulator_get_cached_segment_base(int seg, 3711 + struct kvm_vcpu *vcpu) 3712 + { 3713 + return get_segment_base(vcpu, seg); 3714 } 3715 3716 static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, ··· 3779 kvm_set_segment(vcpu, &kvm_seg, seg); 3780 } 3781 3782 static struct x86_emulate_ops emulate_ops = { 3783 .read_std = kvm_read_guest_virt_system, 3784 .write_std = kvm_write_guest_virt_system, ··· 3797 .set_cached_descriptor = emulator_set_cached_descriptor, 3798 .get_segment_selector = emulator_get_segment_selector, 3799 .set_segment_selector = emulator_set_segment_selector, 3800 + .get_cached_segment_base = emulator_get_cached_segment_base, 3801 .get_gdt = emulator_get_gdt, 3802 .get_cr = emulator_get_cr, 3803 .set_cr = emulator_set_cr, 3804 .cpl = emulator_get_cpl, 3805 + .get_dr = emulator_get_dr, 3806 + .set_dr = emulator_set_dr, 3807 + .set_msr = kvm_set_msr, 3808 + .get_msr = kvm_get_msr, 3809 }; 3810 3811 static void cache_all_regs(struct kvm_vcpu *vcpu) ··· 3812 vcpu->arch.regs_dirty = ~0; 3813 } 3814 3815 + static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) 3816 + { 3817 + u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); 3818 + /* 3819 + * an sti; sti; sequence only disable interrupts for the first 3820 + * instruction. So, if the last instruction, be it emulated or 3821 + * not, left the system with the INT_STI flag enabled, it 3822 + * means that the last instruction is an sti. We should not 3823 + * leave the flag on in this case. The same goes for mov ss 3824 + */ 3825 + if (!(int_shadow & mask)) 3826 + kvm_x86_ops->set_interrupt_shadow(vcpu, mask); 3827 + } 3828 + 3829 + static void inject_emulated_exception(struct kvm_vcpu *vcpu) 3830 + { 3831 + struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; 3832 + if (ctxt->exception == PF_VECTOR) 3833 + kvm_inject_page_fault(vcpu, ctxt->cr2, ctxt->error_code); 3834 + else if (ctxt->error_code_valid) 3835 + kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code); 3836 + else 3837 + kvm_queue_exception(vcpu, ctxt->exception); 3838 + } 3839 + 3840 + static int handle_emulation_failure(struct kvm_vcpu *vcpu) 3841 + { 3842 + ++vcpu->stat.insn_emulation_fail; 3843 + trace_kvm_emulate_insn_failed(vcpu); 3844 + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 3845 + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 3846 + vcpu->run->internal.ndata = 0; 3847 + kvm_queue_exception(vcpu, UD_VECTOR); 3848 + return EMULATE_FAIL; 3849 + } 3850 + 3851 + static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) 3852 + { 3853 + gpa_t gpa; 3854 + 3855 + if (tdp_enabled) 3856 + return false; 3857 + 3858 + /* 3859 + * if emulation was due to access to shadowed page table 3860 + * and it failed try to unshadow page and re-entetr the 3861 + * guest to let CPU execute the instruction. 3862 + */ 3863 + if (kvm_mmu_unprotect_page_virt(vcpu, gva)) 3864 + return true; 3865 + 3866 + gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL); 3867 + 3868 + if (gpa == UNMAPPED_GVA) 3869 + return true; /* let cpu generate fault */ 3870 + 3871 + if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT))) 3872 + return true; 3873 + 3874 + return false; 3875 + } 3876 + 3877 int emulate_instruction(struct kvm_vcpu *vcpu, 3878 unsigned long cr2, 3879 u16 error_code, 3880 int emulation_type) 3881 { 3882 + int r; 3883 + struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 3884 3885 kvm_clear_exception_queue(vcpu); 3886 vcpu->arch.mmio_fault_cr2 = cr2; ··· 3830 * for example. 3831 */ 3832 cache_all_regs(vcpu); 3833 3834 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 3835 int cs_db, cs_l; ··· 3846 ? X86EMUL_MODE_VM86 : cs_l 3847 ? X86EMUL_MODE_PROT64 : cs_db 3848 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 3849 + memset(c, 0, sizeof(struct decode_cache)); 3850 + memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); 3851 + vcpu->arch.emulate_ctxt.interruptibility = 0; 3852 + vcpu->arch.emulate_ctxt.exception = -1; 3853 3854 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3855 trace_kvm_emulate_insn_start(vcpu); 3856 3857 /* Only allow emulation of specific instructions on #UD 3858 * (namely VMMCALL, sysenter, sysexit, syscall)*/ 3859 if (emulation_type & EMULTYPE_TRAP_UD) { 3860 if (!c->twobyte) 3861 return EMULATE_FAIL; ··· 3880 3881 ++vcpu->stat.insn_emulation; 3882 if (r) { 3883 + if (reexecute_instruction(vcpu, cr2)) 3884 return EMULATE_DONE; 3885 + if (emulation_type & EMULTYPE_SKIP) 3886 + return EMULATE_FAIL; 3887 + return handle_emulation_failure(vcpu); 3888 } 3889 } 3890 ··· 3893 return EMULATE_DONE; 3894 } 3895 3896 + /* this is needed for vmware backdor interface to work since it 3897 + changes registers values during IO operation */ 3898 + memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); 3899 + 3900 restart: 3901 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3902 3903 + if (r) { /* emulation failed */ 3904 + if (reexecute_instruction(vcpu, cr2)) 3905 + return EMULATE_DONE; 3906 + 3907 + return handle_emulation_failure(vcpu); 3908 + } 3909 + 3910 + toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); 3911 + kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 3912 + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 3913 + kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); 3914 + 3915 + if (vcpu->arch.emulate_ctxt.exception >= 0) { 3916 + inject_emulated_exception(vcpu); 3917 + return EMULATE_DONE; 3918 + } 3919 3920 if (vcpu->arch.pio.count) { 3921 if (!vcpu->arch.pio.in) ··· 3906 return EMULATE_DO_MMIO; 3907 } 3908 3909 + if (vcpu->mmio_needed) { 3910 + if (vcpu->mmio_is_write) 3911 + vcpu->mmio_needed = 0; 3912 return EMULATE_DO_MMIO; 3913 } 3914 3915 if (vcpu->arch.emulate_ctxt.restart) 3916 goto restart; ··· 4108 4109 perf_register_guest_info_callbacks(&kvm_guest_cbs); 4110 4111 + if (cpu_has_xsave) 4112 + host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 4113 + 4114 return 0; 4115 4116 out: ··· 4270 4271 kvm_x86_ops->patch_hypercall(vcpu, instruction); 4272 4273 + return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); 4274 } 4275 4276 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) ··· 4506 } 4507 } 4508 4509 + static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) 4510 + { 4511 + if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && 4512 + !vcpu->guest_xcr0_loaded) { 4513 + /* kvm_set_xcr() also depends on this */ 4514 + xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); 4515 + vcpu->guest_xcr0_loaded = 1; 4516 + } 4517 + } 4518 + 4519 + static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) 4520 + { 4521 + if (vcpu->guest_xcr0_loaded) { 4522 + if (vcpu->arch.xcr0 != host_xcr0) 4523 + xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0); 4524 + vcpu->guest_xcr0_loaded = 0; 4525 + } 4526 + } 4527 + 4528 static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 4529 { 4530 int r; 4531 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && 4532 vcpu->run->request_interrupt_window; 4533 4534 if (vcpu->requests) { 4535 + if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) 4536 + kvm_mmu_unload(vcpu); 4537 + if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) 4538 __kvm_migrate_timers(vcpu); 4539 + if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) 4540 kvm_write_guest_time(vcpu); 4541 + if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) 4542 kvm_mmu_sync_roots(vcpu); 4543 + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) 4544 kvm_x86_ops->tlb_flush(vcpu); 4545 + if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { 4546 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; 4547 r = 0; 4548 goto out; 4549 } 4550 + if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { 4551 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; 4552 r = 0; 4553 goto out; 4554 } 4555 + if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) { 4556 vcpu->fpu_active = 0; 4557 kvm_x86_ops->fpu_deactivate(vcpu); 4558 } 4559 } 4560 + 4561 + r = kvm_mmu_reload(vcpu); 4562 + if (unlikely(r)) 4563 + goto out; 4564 4565 preempt_disable(); 4566 4567 kvm_x86_ops->prepare_guest_switch(vcpu); 4568 if (vcpu->fpu_active) 4569 kvm_load_guest_fpu(vcpu); 4570 + kvm_load_guest_xcr0(vcpu); 4571 + 4572 + atomic_set(&vcpu->guest_mode, 1); 4573 + smp_wmb(); 4574 4575 local_irq_disable(); 4576 4577 + if (!atomic_read(&vcpu->guest_mode) || vcpu->requests 4578 + || need_resched() || signal_pending(current)) { 4579 + atomic_set(&vcpu->guest_mode, 0); 4580 + smp_wmb(); 4581 local_irq_enable(); 4582 preempt_enable(); 4583 r = 1; ··· 4603 if (hw_breakpoint_active()) 4604 hw_breakpoint_restore(); 4605 4606 + atomic_set(&vcpu->guest_mode, 0); 4607 + smp_wmb(); 4608 local_irq_enable(); 4609 4610 ++vcpu->stat.exits; ··· 4665 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4666 kvm_vcpu_block(vcpu); 4667 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 4668 + if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) 4669 { 4670 switch(vcpu->arch.mp_state) { 4671 case KVM_MP_STATE_HALTED: ··· 4717 int r; 4718 sigset_t sigsaved; 4719 4720 if (vcpu->sigset_active) 4721 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 4722 ··· 4743 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4744 r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); 4745 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4746 + if (r != EMULATE_DONE) { 4747 r = 0; 4748 goto out; 4749 } ··· 4759 if (vcpu->sigset_active) 4760 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 4761 4762 return r; 4763 } 4764 4765 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4766 { 4767 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); 4768 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); 4769 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); ··· 4789 regs->rip = kvm_rip_read(vcpu); 4790 regs->rflags = kvm_get_rflags(vcpu); 4791 4792 return 0; 4793 } 4794 4795 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4796 { 4797 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); 4798 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); 4799 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); ··· 4822 4823 vcpu->arch.exception.pending = false; 4824 4825 return 0; 4826 } 4827 ··· 4841 struct kvm_sregs *sregs) 4842 { 4843 struct desc_ptr dt; 4844 4845 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 4846 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); ··· 4875 set_bit(vcpu->arch.interrupt.nr, 4876 (unsigned long *)sregs->interrupt_bitmap); 4877 4878 return 0; 4879 } 4880 4881 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 4882 struct kvm_mp_state *mp_state) 4883 { 4884 mp_state->mp_state = vcpu->arch.mp_state; 4885 return 0; 4886 } 4887 4888 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 4889 struct kvm_mp_state *mp_state) 4890 { 4891 vcpu->arch.mp_state = mp_state->mp_state; 4892 return 0; 4893 } 4894 4895 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, 4896 bool has_error_code, u32 error_code) 4897 { 4898 + struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 4899 int cs_db, cs_l, ret; 4900 cache_all_regs(vcpu); 4901 ··· 4915 ? X86EMUL_MODE_VM86 : cs_l 4916 ? X86EMUL_MODE_PROT64 : cs_db 4917 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 4918 + memset(c, 0, sizeof(struct decode_cache)); 4919 + memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); 4920 4921 ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, 4922 tss_selector, reason, has_error_code, ··· 4923 if (ret) 4924 return EMULATE_FAIL; 4925 4926 + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 4927 + kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); 4928 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 4929 return EMULATE_DONE; 4930 } ··· 4934 int mmu_reset_needed = 0; 4935 int pending_vec, max_bits; 4936 struct desc_ptr dt; 4937 4938 dt.size = sregs->idt.limit; 4939 dt.address = sregs->idt.base; ··· 4996 !is_protmode(vcpu)) 4997 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 4998 4999 return 0; 5000 } 5001 ··· 5007 unsigned long rflags; 5008 int i, r; 5009 5010 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { 5011 r = -EBUSY; 5012 if (vcpu->arch.exception.pending) 5013 + goto out; 5014 if (dbg->control & KVM_GUESTDBG_INJECT_DB) 5015 kvm_queue_exception(vcpu, DB_VECTOR); 5016 else ··· 5054 5055 r = 0; 5056 5057 + out: 5058 5059 return r; 5060 } 5061 5062 /* 5063 * Translate a guest virtual address to a guest physical address. ··· 5091 gpa_t gpa; 5092 int idx; 5093 5094 idx = srcu_read_lock(&vcpu->kvm->srcu); 5095 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); 5096 srcu_read_unlock(&vcpu->kvm->srcu, idx); ··· 5099 tr->valid = gpa != UNMAPPED_GVA; 5100 tr->writeable = 1; 5101 tr->usermode = 0; 5102 5103 return 0; 5104 } 5105 5106 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 5107 { 5108 + struct i387_fxsave_struct *fxsave = 5109 + &vcpu->arch.guest_fpu.state->fxsave; 5110 5111 memcpy(fpu->fpr, fxsave->st_space, 128); 5112 fpu->fcw = fxsave->cwd; ··· 5119 fpu->last_dp = fxsave->rdp; 5120 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); 5121 5122 return 0; 5123 } 5124 5125 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 5126 { 5127 + struct i387_fxsave_struct *fxsave = 5128 + &vcpu->arch.guest_fpu.state->fxsave; 5129 5130 memcpy(fxsave->st_space, fpu->fpr, 128); 5131 fxsave->cwd = fpu->fcw; ··· 5139 fxsave->rdp = fpu->last_dp; 5140 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); 5141 5142 return 0; 5143 } 5144 5145 + int fx_init(struct kvm_vcpu *vcpu) 5146 { 5147 + int err; 5148 + 5149 + err = fpu_alloc(&vcpu->arch.guest_fpu); 5150 + if (err) 5151 + return err; 5152 + 5153 + fpu_finit(&vcpu->arch.guest_fpu); 5154 5155 /* 5156 + * Ensure guest xcr0 is valid for loading 5157 */ 5158 + vcpu->arch.xcr0 = XSTATE_FP; 5159 5160 vcpu->arch.cr0 |= X86_CR0_ET; 5161 + 5162 + return 0; 5163 } 5164 EXPORT_SYMBOL_GPL(fx_init); 5165 + 5166 + static void fx_free(struct kvm_vcpu *vcpu) 5167 + { 5168 + fpu_free(&vcpu->arch.guest_fpu); 5169 + } 5170 5171 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 5172 { 5173 if (vcpu->guest_fpu_loaded) 5174 return; 5175 5176 + /* 5177 + * Restore all possible states in the guest, 5178 + * and assume host would use all available bits. 5179 + * Guest xcr0 would be loaded later. 5180 + */ 5181 + kvm_put_guest_xcr0(vcpu); 5182 vcpu->guest_fpu_loaded = 1; 5183 + unlazy_fpu(current); 5184 + fpu_restore_checking(&vcpu->arch.guest_fpu); 5185 trace_kvm_fpu(1); 5186 } 5187 5188 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) 5189 { 5190 + kvm_put_guest_xcr0(vcpu); 5191 + 5192 if (!vcpu->guest_fpu_loaded) 5193 return; 5194 5195 vcpu->guest_fpu_loaded = 0; 5196 + fpu_save_init(&vcpu->arch.guest_fpu); 5197 ++vcpu->stat.fpu_reload; 5198 + kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); 5199 trace_kvm_fpu(0); 5200 } 5201 ··· 5204 vcpu->arch.time_page = NULL; 5205 } 5206 5207 + free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); 5208 + fx_free(vcpu); 5209 kvm_x86_ops->vcpu_free(vcpu); 5210 } 5211 ··· 5216 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 5217 { 5218 int r; 5219 5220 vcpu->arch.mtrr_state.have_fixed = 1; 5221 vcpu_load(vcpu); ··· 5241 kvm_mmu_unload(vcpu); 5242 vcpu_put(vcpu); 5243 5244 + fx_free(vcpu); 5245 kvm_x86_ops->vcpu_free(vcpu); 5246 } 5247 ··· 5334 } 5335 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; 5336 5337 + if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) 5338 + goto fail_free_mce_banks; 5339 + 5340 return 0; 5341 + fail_free_mce_banks: 5342 + kfree(vcpu->arch.mce_banks); 5343 fail_free_lapic: 5344 kvm_free_lapic(vcpu); 5345 fail_mmu_destroy: ··· 5363 5364 if (!kvm) 5365 return ERR_PTR(-ENOMEM); 5366 5367 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 5368 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); ··· 5412 void kvm_arch_sync_events(struct kvm *kvm) 5413 { 5414 kvm_free_all_assigned_devices(kvm); 5415 + kvm_free_pit(kvm); 5416 } 5417 5418 void kvm_arch_destroy_vm(struct kvm *kvm) 5419 { 5420 kvm_iommu_unmap_guest(kvm); 5421 kfree(kvm->arch.vpic); 5422 kfree(kvm->arch.vioapic); 5423 kvm_free_vcpus(kvm); ··· 5427 if (kvm->arch.ept_identity_pagetable) 5428 put_page(kvm->arch.ept_identity_pagetable); 5429 cleanup_srcu_struct(&kvm->srcu); 5430 kfree(kvm); 5431 } 5432 ··· 5438 int user_alloc) 5439 { 5440 int npages = memslot->npages; 5441 + int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; 5442 + 5443 + /* Prevent internal slot pages from being moved by fork()/COW. */ 5444 + if (memslot->id >= KVM_MEMORY_SLOTS) 5445 + map_flags = MAP_SHARED | MAP_ANONYMOUS; 5446 5447 /*To keep backward compatibility with older userspace, 5448 *x86 needs to hanlde !user_alloc case. ··· 5450 userspace_addr = do_mmap(NULL, 0, 5451 npages * PAGE_SIZE, 5452 PROT_READ | PROT_WRITE, 5453 + map_flags, 5454 0); 5455 up_write(&current->mm->mmap_sem); 5456 ··· 5523 5524 me = get_cpu(); 5525 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) 5526 + if (atomic_xchg(&vcpu->guest_mode, 0)) 5527 smp_send_reschedule(cpu); 5528 put_cpu(); 5529 }

-7

arch/x86/kvm/x86.h

··· 65 return kvm_read_cr0_bits(vcpu, X86_CR0_PG); 66 } 67 68 - static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm) 69 - { 70 - return rcu_dereference_check(kvm->arch.aliases, 71 - srcu_read_lock_held(&kvm->srcu) 72 - || lockdep_is_held(&kvm->slots_lock)); 73 - } 74 - 75 void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); 76 void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); 77

··· 65 return kvm_read_cr0_bits(vcpu, X86_CR0_PG); 66 } 67 68 void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); 69 void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); 70

+13

include/linux/kvm.h

··· 524 #define KVM_CAP_PPC_OSI 52 525 #define KVM_CAP_PPC_UNSET_IRQ 53 526 #define KVM_CAP_ENABLE_CAP 54 527 528 #ifdef KVM_CAP_IRQ_ROUTING 529 ··· 619 */ 620 #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) 621 #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) 622 #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) 623 #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) 624 #define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) ··· 721 #define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) 722 #define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) 723 #define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) 724 725 #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) 726

··· 524 #define KVM_CAP_PPC_OSI 52 525 #define KVM_CAP_PPC_UNSET_IRQ 53 526 #define KVM_CAP_ENABLE_CAP 54 527 + #ifdef __KVM_HAVE_XSAVE 528 + #define KVM_CAP_XSAVE 55 529 + #endif 530 + #ifdef __KVM_HAVE_XCRS 531 + #define KVM_CAP_XCRS 56 532 + #endif 533 534 #ifdef KVM_CAP_IRQ_ROUTING 535 ··· 613 */ 614 #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) 615 #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) 616 + /* KVM_SET_MEMORY_ALIAS is obsolete: */ 617 #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) 618 #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) 619 #define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) ··· 714 #define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) 715 #define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) 716 #define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) 717 + /* Available with KVM_CAP_XSAVE */ 718 + #define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct kvm_xsave) 719 + #define KVM_SET_XSAVE _IOW(KVMIO, 0xa5, struct kvm_xsave) 720 + /* Available with KVM_CAP_XCRS */ 721 + #define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) 722 + #define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) 723 724 #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) 725

+27 -8

include/linux/kvm_host.h

··· 81 int vcpu_id; 82 struct mutex mutex; 83 int cpu; 84 struct kvm_run *run; 85 unsigned long requests; 86 unsigned long guest_debug; 87 int srcu_idx; 88 89 int fpu_active; 90 - int guest_fpu_loaded; 91 wait_queue_head_t wq; 92 int sigset_active; 93 sigset_t sigset; ··· 124 } *lpage_info[KVM_NR_PAGE_SIZES - 1]; 125 unsigned long userspace_addr; 126 int user_alloc; 127 }; 128 129 static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) ··· 268 269 int is_error_page(struct page *page); 270 int is_error_pfn(pfn_t pfn); 271 int kvm_is_error_hva(unsigned long addr); 272 int kvm_set_memory_region(struct kvm *kvm, 273 struct kvm_userspace_memory_region *mem, ··· 288 int user_alloc); 289 void kvm_disable_largepages(void); 290 void kvm_arch_flush_shadow(struct kvm *kvm); 291 - gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); 292 - gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn); 293 294 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); 295 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); ··· 447 struct kvm_irq_mask_notifier *kimn); 448 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, 449 struct kvm_irq_mask_notifier *kimn); 450 - void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); 451 452 #ifdef __KVM_HAVE_IOAPIC 453 void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, ··· 565 } 566 #endif 567 568 - #ifndef KVM_ARCH_HAS_UNALIAS_INSTANTIATION 569 - #define unalias_gfn_instantiation unalias_gfn 570 - #endif 571 - 572 #ifdef CONFIG_HAVE_KVM_IRQCHIP 573 574 #define KVM_MAX_IRQ_ROUTES 1024 ··· 626 } 627 628 #endif 629 630 #endif 631

··· 81 int vcpu_id; 82 struct mutex mutex; 83 int cpu; 84 + atomic_t guest_mode; 85 struct kvm_run *run; 86 unsigned long requests; 87 unsigned long guest_debug; 88 int srcu_idx; 89 90 int fpu_active; 91 + int guest_fpu_loaded, guest_xcr0_loaded; 92 wait_queue_head_t wq; 93 int sigset_active; 94 sigset_t sigset; ··· 123 } *lpage_info[KVM_NR_PAGE_SIZES - 1]; 124 unsigned long userspace_addr; 125 int user_alloc; 126 + int id; 127 }; 128 129 static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) ··· 266 267 int is_error_page(struct page *page); 268 int is_error_pfn(pfn_t pfn); 269 + int is_hwpoison_pfn(pfn_t pfn); 270 + int is_fault_pfn(pfn_t pfn); 271 int kvm_is_error_hva(unsigned long addr); 272 int kvm_set_memory_region(struct kvm *kvm, 273 struct kvm_userspace_memory_region *mem, ··· 284 int user_alloc); 285 void kvm_disable_largepages(void); 286 void kvm_arch_flush_shadow(struct kvm *kvm); 287 288 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); 289 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); ··· 445 struct kvm_irq_mask_notifier *kimn); 446 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, 447 struct kvm_irq_mask_notifier *kimn); 448 + void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, 449 + bool mask); 450 451 #ifdef __KVM_HAVE_IOAPIC 452 void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, ··· 562 } 563 #endif 564 565 #ifdef CONFIG_HAVE_KVM_IRQCHIP 566 567 #define KVM_MAX_IRQ_ROUTES 1024 ··· 627 } 628 629 #endif 630 + 631 + static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) 632 + { 633 + set_bit(req, &vcpu->requests); 634 + } 635 + 636 + static inline bool kvm_make_check_request(int req, struct kvm_vcpu *vcpu) 637 + { 638 + return test_and_set_bit(req, &vcpu->requests); 639 + } 640 + 641 + static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) 642 + { 643 + if (test_bit(req, &vcpu->requests)) { 644 + clear_bit(req, &vcpu->requests); 645 + return true; 646 + } else { 647 + return false; 648 + } 649 + } 650 651 #endif 652

+2 -2

include/linux/kvm_types.h

··· 32 33 typedef unsigned long gva_t; 34 typedef u64 gpa_t; 35 - typedef unsigned long gfn_t; 36 37 typedef unsigned long hva_t; 38 typedef u64 hpa_t; 39 - typedef unsigned long hfn_t; 40 41 typedef hfn_t pfn_t; 42

··· 32 33 typedef unsigned long gva_t; 34 typedef u64 gpa_t; 35 + typedef u64 gfn_t; 36 37 typedef unsigned long hva_t; 38 typedef u64 hpa_t; 39 + typedef u64 hfn_t; 40 41 typedef hfn_t pfn_t; 42

+8

include/linux/mm.h

··· 1465 extern void shake_page(struct page *p, int access); 1466 extern atomic_long_t mce_bad_pages; 1467 extern int soft_offline_page(struct page *page, int flags); 1468 1469 extern void dump_page(struct page *page); 1470

··· 1465 extern void shake_page(struct page *p, int access); 1466 extern atomic_long_t mce_bad_pages; 1467 extern int soft_offline_page(struct page *page, int flags); 1468 + #ifdef CONFIG_MEMORY_FAILURE 1469 + int is_hwpoison_address(unsigned long addr); 1470 + #else 1471 + static inline int is_hwpoison_address(unsigned long addr) 1472 + { 1473 + return 0; 1474 + } 1475 + #endif 1476 1477 extern void dump_page(struct page *page); 1478

+33

mm/memory-failure.c

··· 45 #include <linux/page-isolation.h> 46 #include <linux/suspend.h> 47 #include <linux/slab.h> 48 #include "internal.h" 49 50 int sysctl_memory_failure_early_kill __read_mostly = 0; ··· 1297 /* keep elevated page count for bad page */ 1298 return ret; 1299 }

··· 45 #include <linux/page-isolation.h> 46 #include <linux/suspend.h> 47 #include <linux/slab.h> 48 + #include <linux/swapops.h> 49 #include "internal.h" 50 51 int sysctl_memory_failure_early_kill __read_mostly = 0; ··· 1296 /* keep elevated page count for bad page */ 1297 return ret; 1298 } 1299 + 1300 + /* 1301 + * The caller must hold current->mm->mmap_sem in read mode. 1302 + */ 1303 + int is_hwpoison_address(unsigned long addr) 1304 + { 1305 + pgd_t *pgdp; 1306 + pud_t pud, *pudp; 1307 + pmd_t pmd, *pmdp; 1308 + pte_t pte, *ptep; 1309 + swp_entry_t entry; 1310 + 1311 + pgdp = pgd_offset(current->mm, addr); 1312 + if (!pgd_present(*pgdp)) 1313 + return 0; 1314 + pudp = pud_offset(pgdp, addr); 1315 + pud = *pudp; 1316 + if (!pud_present(pud) || pud_large(pud)) 1317 + return 0; 1318 + pmdp = pmd_offset(pudp, addr); 1319 + pmd = *pmdp; 1320 + if (!pmd_present(pmd) || pmd_large(pmd)) 1321 + return 0; 1322 + ptep = pte_offset_map(pmdp, addr); 1323 + pte = *ptep; 1324 + pte_unmap(ptep); 1325 + if (!is_swap_pte(pte)) 1326 + return 0; 1327 + entry = pte_to_swp_entry(pte); 1328 + return is_hwpoison_entry(entry); 1329 + } 1330 + EXPORT_SYMBOL_GPL(is_hwpoison_address);

+1 -6

virt/kvm/assigned-dev.c

··· 1 /* 2 * Kernel-based Virtual Machine - device assignment support 3 * 4 - * Copyright (C) 2006-9 Red Hat, Inc 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * the COPYING file in the top-level directory. ··· 58 static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) 59 { 60 struct kvm_assigned_dev_kernel *assigned_dev; 61 - struct kvm *kvm; 62 int i; 63 64 assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, 65 interrupt_work); 66 - kvm = assigned_dev->kvm; 67 68 spin_lock_irq(&assigned_dev->assigned_dev_lock); 69 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { ··· 445 int r = -EINVAL; 446 struct kvm_assigned_dev_kernel *match; 447 unsigned long host_irq_type, guest_irq_type; 448 - 449 - if (!capable(CAP_SYS_RAWIO)) 450 - return -EPERM; 451 452 if (!irqchip_in_kernel(kvm)) 453 return r;

··· 1 /* 2 * Kernel-based Virtual Machine - device assignment support 3 * 4 + * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * the COPYING file in the top-level directory. ··· 58 static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) 59 { 60 struct kvm_assigned_dev_kernel *assigned_dev; 61 int i; 62 63 assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, 64 interrupt_work); 65 66 spin_lock_irq(&assigned_dev->assigned_dev_lock); 67 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { ··· 447 int r = -EINVAL; 448 struct kvm_assigned_dev_kernel *match; 449 unsigned long host_irq_type, guest_irq_type; 450 451 if (!irqchip_in_kernel(kvm)) 452 return r;

+1

virt/kvm/coalesced_mmio.c

+1

virt/kvm/eventfd.c

··· 2 * kvm eventfd support - use eventfd objects to signal various KVM events 3 * 4 * Copyright 2009 Novell. All Rights Reserved. 5 + * Copyright 2010 Red Hat, Inc. and/or its affiliates. 6 * 7 * Author: 8 * Gregory Haskins <ghaskins@novell.com>

+2 -1

virt/kvm/ioapic.c

··· 1 /* 2 * Copyright (C) 2001 MandrakeSoft S.A. 3 * 4 * MandrakeSoft S.A. 5 * 43, rue d'Aboukir ··· 152 update_handled_vectors(ioapic); 153 mask_after = e->fields.mask; 154 if (mask_before != mask_after) 155 - kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); 156 if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG 157 && ioapic->irr & (1 << index)) 158 ioapic_service(ioapic, index);

··· 1 /* 2 * Copyright (C) 2001 MandrakeSoft S.A. 3 + * Copyright 2010 Red Hat, Inc. and/or its affiliates. 4 * 5 * MandrakeSoft S.A. 6 * 43, rue d'Aboukir ··· 151 update_handled_vectors(ioapic); 152 mask_after = e->fields.mask; 153 if (mask_before != mask_after) 154 + kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); 155 if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG 156 && ioapic->irr & (1 << index)) 157 ioapic_service(ioapic, index);

+9 -3

virt/kvm/iommu.c

··· 16 * 17 * Copyright (C) 2006-2008 Intel Corporation 18 * Copyright IBM Corporation, 2008 19 * Author: Allen M. Kay <allen.m.kay@intel.com> 20 * Author: Weidong Han <weidong.han@intel.com> 21 * Author: Ben-Ami Yassour <benami@il.ibm.com> ··· 108 get_order(page_size), flags); 109 if (r) { 110 printk(KERN_ERR "kvm_iommu_map_address:" 111 - "iommu failed to map pfn=%lx\n", pfn); 112 goto unmap_pages; 113 } 114 ··· 126 127 static int kvm_iommu_map_memslots(struct kvm *kvm) 128 { 129 - int i, r = 0; 130 struct kvm_memslots *slots; 131 132 slots = kvm_memslots(kvm); 133 134 for (i = 0; i < slots->nmemslots; i++) { ··· 137 if (r) 138 break; 139 } 140 141 return r; 142 } ··· 287 288 static int kvm_iommu_unmap_memslots(struct kvm *kvm) 289 { 290 - int i; 291 struct kvm_memslots *slots; 292 293 slots = kvm_memslots(kvm); 294 295 for (i = 0; i < slots->nmemslots; i++) { 296 kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, 297 slots->memslots[i].npages); 298 } 299 300 return 0; 301 }

··· 16 * 17 * Copyright (C) 2006-2008 Intel Corporation 18 * Copyright IBM Corporation, 2008 19 + * Copyright 2010 Red Hat, Inc. and/or its affiliates. 20 + * 21 * Author: Allen M. Kay <allen.m.kay@intel.com> 22 * Author: Weidong Han <weidong.han@intel.com> 23 * Author: Ben-Ami Yassour <benami@il.ibm.com> ··· 106 get_order(page_size), flags); 107 if (r) { 108 printk(KERN_ERR "kvm_iommu_map_address:" 109 + "iommu failed to map pfn=%llx\n", pfn); 110 goto unmap_pages; 111 } 112 ··· 124 125 static int kvm_iommu_map_memslots(struct kvm *kvm) 126 { 127 + int i, idx, r = 0; 128 struct kvm_memslots *slots; 129 130 + idx = srcu_read_lock(&kvm->srcu); 131 slots = kvm_memslots(kvm); 132 133 for (i = 0; i < slots->nmemslots; i++) { ··· 134 if (r) 135 break; 136 } 137 + srcu_read_unlock(&kvm->srcu, idx); 138 139 return r; 140 } ··· 283 284 static int kvm_iommu_unmap_memslots(struct kvm *kvm) 285 { 286 + int i, idx; 287 struct kvm_memslots *slots; 288 289 + idx = srcu_read_lock(&kvm->srcu); 290 slots = kvm_memslots(kvm); 291 292 for (i = 0; i < slots->nmemslots; i++) { 293 kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, 294 slots->memslots[i].npages); 295 } 296 + srcu_read_unlock(&kvm->srcu, idx); 297 298 return 0; 299 }

+10 -5

virt/kvm/irq_comm.c

··· 17 * Authors: 18 * Yaozu (Eddie) Dong <Eddie.dong@intel.com> 19 * 20 */ 21 22 #include <linux/kvm_host.h> ··· 100 if (r < 0) 101 r = 0; 102 r += kvm_apic_set_irq(vcpu, irq); 103 - } else { 104 if (!lowest) 105 lowest = vcpu; 106 else if (kvm_apic_compare_prio(vcpu, lowest) < 0) ··· 279 synchronize_rcu(); 280 } 281 282 - void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) 283 { 284 struct kvm_irq_mask_notifier *kimn; 285 struct hlist_node *n; 286 287 rcu_read_lock(); 288 - hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) 289 - if (kimn->irq == irq) 290 - kimn->func(kimn, mask); 291 rcu_read_unlock(); 292 } 293

··· 17 * Authors: 18 * Yaozu (Eddie) Dong <Eddie.dong@intel.com> 19 * 20 + * Copyright 2010 Red Hat, Inc. and/or its affilates. 21 */ 22 23 #include <linux/kvm_host.h> ··· 99 if (r < 0) 100 r = 0; 101 r += kvm_apic_set_irq(vcpu, irq); 102 + } else if (kvm_lapic_enabled(vcpu)) { 103 if (!lowest) 104 lowest = vcpu; 105 else if (kvm_apic_compare_prio(vcpu, lowest) < 0) ··· 278 synchronize_rcu(); 279 } 280 281 + void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, 282 + bool mask) 283 { 284 struct kvm_irq_mask_notifier *kimn; 285 struct hlist_node *n; 286 + int gsi; 287 288 rcu_read_lock(); 289 + gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; 290 + if (gsi != -1) 291 + hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) 292 + if (kimn->irq == gsi) 293 + kimn->func(kimn, mask); 294 rcu_read_unlock(); 295 } 296

+79 -27

virt/kvm/kvm_main.c

··· 5 * machines without emulation or binary translation. 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * 9 * Authors: 10 * Avi Kivity <avi@qumranet.com> ··· 93 94 static bool largepages_enabled = true; 95 96 inline int kvm_is_mmio_pfn(pfn_t pfn) 97 { 98 if (pfn_valid(pfn)) { ··· 148 raw_spin_lock(&kvm->requests_lock); 149 me = smp_processor_id(); 150 kvm_for_each_vcpu(i, vcpu, kvm) { 151 - if (test_and_set_bit(req, &vcpu->requests)) 152 continue; 153 cpu = vcpu->cpu; 154 if (cpus != NULL && cpu != -1 && cpu != me) ··· 573 574 new = old = *memslot; 575 576 new.base_gfn = base_gfn; 577 new.npages = npages; 578 new.flags = mem->flags; ··· 604 /* Allocate if a slot is being created */ 605 #ifndef CONFIG_S390 606 if (npages && !new.rmap) { 607 - new.rmap = vmalloc(npages * sizeof(struct page *)); 608 609 if (!new.rmap) 610 goto out_free; ··· 629 if (new.lpage_info[i]) 630 continue; 631 632 - lpages = 1 + (base_gfn + npages - 1) / 633 - KVM_PAGES_PER_HPAGE(level); 634 - lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level); 635 636 new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); 637 ··· 641 memset(new.lpage_info[i], 0, 642 lpages * sizeof(*new.lpage_info[i])); 643 644 - if (base_gfn % KVM_PAGES_PER_HPAGE(level)) 645 new.lpage_info[i][0].write_count = 1; 646 - if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level)) 647 new.lpage_info[i][lpages - 1].write_count = 1; 648 ugfn = new.userspace_addr >> PAGE_SHIFT; 649 /* ··· 818 819 int is_error_page(struct page *page) 820 { 821 - return page == bad_page; 822 } 823 EXPORT_SYMBOL_GPL(is_error_page); 824 825 int is_error_pfn(pfn_t pfn) 826 { 827 - return pfn == bad_pfn; 828 } 829 EXPORT_SYMBOL_GPL(is_error_pfn); 830 831 static inline unsigned long bad_hva(void) 832 { ··· 851 } 852 EXPORT_SYMBOL_GPL(kvm_is_error_hva); 853 854 - struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) 855 { 856 int i; 857 struct kvm_memslots *slots = kvm_memslots(kvm); ··· 865 } 866 return NULL; 867 } 868 - EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased); 869 - 870 - struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 871 - { 872 - gfn = unalias_gfn(kvm, gfn); 873 - return gfn_to_memslot_unaliased(kvm, gfn); 874 - } 875 876 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 877 { 878 int i; 879 struct kvm_memslots *slots = kvm_memslots(kvm); 880 881 - gfn = unalias_gfn_instantiation(kvm, gfn); 882 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 883 struct kvm_memory_slot *memslot = &slots->memslots[i]; 884 ··· 916 struct kvm_memslots *slots = kvm_memslots(kvm); 917 struct kvm_memory_slot *memslot = NULL; 918 919 - gfn = unalias_gfn(kvm, gfn); 920 for (i = 0; i < slots->nmemslots; ++i) { 921 memslot = &slots->memslots[i]; 922 ··· 936 { 937 struct kvm_memory_slot *slot; 938 939 - gfn = unalias_gfn_instantiation(kvm, gfn); 940 - slot = gfn_to_memslot_unaliased(kvm, gfn); 941 if (!slot || slot->flags & KVM_MEMSLOT_INVALID) 942 return bad_hva(); 943 return gfn_to_hva_memslot(slot, gfn); ··· 957 struct vm_area_struct *vma; 958 959 down_read(&current->mm->mmap_sem); 960 vma = find_vma(current->mm, addr); 961 962 if (vma == NULL || addr < vma->vm_start || 963 !(vma->vm_flags & VM_PFNMAP)) { 964 up_read(&current->mm->mmap_sem); 965 - get_page(bad_page); 966 - return page_to_pfn(bad_page); 967 } 968 969 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; ··· 1204 { 1205 struct kvm_memory_slot *memslot; 1206 1207 - gfn = unalias_gfn(kvm, gfn); 1208 - memslot = gfn_to_memslot_unaliased(kvm, gfn); 1209 if (memslot && memslot->dirty_bitmap) { 1210 unsigned long rel_gfn = gfn - memslot->base_gfn; 1211 ··· 1223 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1224 1225 if (kvm_arch_vcpu_runnable(vcpu)) { 1226 - set_bit(KVM_REQ_UNHALT, &vcpu->requests); 1227 break; 1228 } 1229 if (kvm_cpu_has_pending_timer(vcpu)) ··· 1394 1395 if (vcpu->kvm->mm != current->mm) 1396 return -EIO; 1397 switch (ioctl) { 1398 case KVM_RUN: 1399 r = -EINVAL; ··· 1548 goto out; 1549 p = &sigset; 1550 } 1551 - r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); 1552 break; 1553 } 1554 case KVM_GET_FPU: { ··· 1583 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); 1584 } 1585 out: 1586 kfree(fpu); 1587 kfree(kvm_sregs); 1588 return r; ··· 2226 2227 bad_pfn = page_to_pfn(bad_page); 2228 2229 if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { 2230 r = -ENOMEM; 2231 goto out_free_0; ··· 2316 out_free_0a: 2317 free_cpumask_var(cpus_hardware_enabled); 2318 out_free_0: 2319 __free_page(bad_page); 2320 out: 2321 kvm_arch_exit(); ··· 2341 kvm_arch_hardware_unsetup(); 2342 kvm_arch_exit(); 2343 free_cpumask_var(cpus_hardware_enabled); 2344 __free_page(bad_page); 2345 } 2346 EXPORT_SYMBOL_GPL(kvm_exit);

··· 5 * machines without emulation or binary translation. 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 + * Copyright 2010 Red Hat, Inc. and/or its affilates. 9 * 10 * Authors: 11 * Avi Kivity <avi@qumranet.com> ··· 92 93 static bool largepages_enabled = true; 94 95 + static struct page *hwpoison_page; 96 + static pfn_t hwpoison_pfn; 97 + 98 + static struct page *fault_page; 99 + static pfn_t fault_pfn; 100 + 101 inline int kvm_is_mmio_pfn(pfn_t pfn) 102 { 103 if (pfn_valid(pfn)) { ··· 141 raw_spin_lock(&kvm->requests_lock); 142 me = smp_processor_id(); 143 kvm_for_each_vcpu(i, vcpu, kvm) { 144 + if (kvm_make_check_request(req, vcpu)) 145 continue; 146 cpu = vcpu->cpu; 147 if (cpus != NULL && cpu != -1 && cpu != me) ··· 566 567 new = old = *memslot; 568 569 + new.id = mem->slot; 570 new.base_gfn = base_gfn; 571 new.npages = npages; 572 new.flags = mem->flags; ··· 596 /* Allocate if a slot is being created */ 597 #ifndef CONFIG_S390 598 if (npages && !new.rmap) { 599 + new.rmap = vmalloc(npages * sizeof(*new.rmap)); 600 601 if (!new.rmap) 602 goto out_free; ··· 621 if (new.lpage_info[i]) 622 continue; 623 624 + lpages = 1 + ((base_gfn + npages - 1) 625 + >> KVM_HPAGE_GFN_SHIFT(level)); 626 + lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level); 627 628 new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); 629 ··· 633 memset(new.lpage_info[i], 0, 634 lpages * sizeof(*new.lpage_info[i])); 635 636 + if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) 637 new.lpage_info[i][0].write_count = 1; 638 + if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) 639 new.lpage_info[i][lpages - 1].write_count = 1; 640 ugfn = new.userspace_addr >> PAGE_SHIFT; 641 /* ··· 810 811 int is_error_page(struct page *page) 812 { 813 + return page == bad_page || page == hwpoison_page || page == fault_page; 814 } 815 EXPORT_SYMBOL_GPL(is_error_page); 816 817 int is_error_pfn(pfn_t pfn) 818 { 819 + return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn; 820 } 821 EXPORT_SYMBOL_GPL(is_error_pfn); 822 + 823 + int is_hwpoison_pfn(pfn_t pfn) 824 + { 825 + return pfn == hwpoison_pfn; 826 + } 827 + EXPORT_SYMBOL_GPL(is_hwpoison_pfn); 828 + 829 + int is_fault_pfn(pfn_t pfn) 830 + { 831 + return pfn == fault_pfn; 832 + } 833 + EXPORT_SYMBOL_GPL(is_fault_pfn); 834 835 static inline unsigned long bad_hva(void) 836 { ··· 831 } 832 EXPORT_SYMBOL_GPL(kvm_is_error_hva); 833 834 + struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 835 { 836 int i; 837 struct kvm_memslots *slots = kvm_memslots(kvm); ··· 845 } 846 return NULL; 847 } 848 + EXPORT_SYMBOL_GPL(gfn_to_memslot); 849 850 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 851 { 852 int i; 853 struct kvm_memslots *slots = kvm_memslots(kvm); 854 855 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 856 struct kvm_memory_slot *memslot = &slots->memslots[i]; 857 ··· 903 struct kvm_memslots *slots = kvm_memslots(kvm); 904 struct kvm_memory_slot *memslot = NULL; 905 906 for (i = 0; i < slots->nmemslots; ++i) { 907 memslot = &slots->memslots[i]; 908 ··· 924 { 925 struct kvm_memory_slot *slot; 926 927 + slot = gfn_to_memslot(kvm, gfn); 928 if (!slot || slot->flags & KVM_MEMSLOT_INVALID) 929 return bad_hva(); 930 return gfn_to_hva_memslot(slot, gfn); ··· 946 struct vm_area_struct *vma; 947 948 down_read(&current->mm->mmap_sem); 949 + if (is_hwpoison_address(addr)) { 950 + up_read(&current->mm->mmap_sem); 951 + get_page(hwpoison_page); 952 + return page_to_pfn(hwpoison_page); 953 + } 954 + 955 vma = find_vma(current->mm, addr); 956 957 if (vma == NULL || addr < vma->vm_start || 958 !(vma->vm_flags & VM_PFNMAP)) { 959 up_read(&current->mm->mmap_sem); 960 + get_page(fault_page); 961 + return page_to_pfn(fault_page); 962 } 963 964 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; ··· 1187 { 1188 struct kvm_memory_slot *memslot; 1189 1190 + memslot = gfn_to_memslot(kvm, gfn); 1191 if (memslot && memslot->dirty_bitmap) { 1192 unsigned long rel_gfn = gfn - memslot->base_gfn; 1193 ··· 1207 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1208 1209 if (kvm_arch_vcpu_runnable(vcpu)) { 1210 + kvm_make_request(KVM_REQ_UNHALT, vcpu); 1211 break; 1212 } 1213 if (kvm_cpu_has_pending_timer(vcpu)) ··· 1378 1379 if (vcpu->kvm->mm != current->mm) 1380 return -EIO; 1381 + 1382 + #if defined(CONFIG_S390) || defined(CONFIG_PPC) 1383 + /* 1384 + * Special cases: vcpu ioctls that are asynchronous to vcpu execution, 1385 + * so vcpu_load() would break it. 1386 + */ 1387 + if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT) 1388 + return kvm_arch_vcpu_ioctl(filp, ioctl, arg); 1389 + #endif 1390 + 1391 + 1392 + vcpu_load(vcpu); 1393 switch (ioctl) { 1394 case KVM_RUN: 1395 r = -EINVAL; ··· 1520 goto out; 1521 p = &sigset; 1522 } 1523 + r = kvm_vcpu_ioctl_set_sigmask(vcpu, p); 1524 break; 1525 } 1526 case KVM_GET_FPU: { ··· 1555 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); 1556 } 1557 out: 1558 + vcpu_put(vcpu); 1559 kfree(fpu); 1560 kfree(kvm_sregs); 1561 return r; ··· 2197 2198 bad_pfn = page_to_pfn(bad_page); 2199 2200 + hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 2201 + 2202 + if (hwpoison_page == NULL) { 2203 + r = -ENOMEM; 2204 + goto out_free_0; 2205 + } 2206 + 2207 + hwpoison_pfn = page_to_pfn(hwpoison_page); 2208 + 2209 + fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 2210 + 2211 + if (fault_page == NULL) { 2212 + r = -ENOMEM; 2213 + goto out_free_0; 2214 + } 2215 + 2216 + fault_pfn = page_to_pfn(fault_page); 2217 + 2218 if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { 2219 r = -ENOMEM; 2220 goto out_free_0; ··· 2269 out_free_0a: 2270 free_cpumask_var(cpus_hardware_enabled); 2271 out_free_0: 2272 + if (fault_page) 2273 + __free_page(fault_page); 2274 + if (hwpoison_page) 2275 + __free_page(hwpoison_page); 2276 __free_page(bad_page); 2277 out: 2278 kvm_arch_exit(); ··· 2290 kvm_arch_hardware_unsetup(); 2291 kvm_arch_exit(); 2292 free_cpumask_var(cpus_hardware_enabled); 2293 + __free_page(hwpoison_page); 2294 __free_page(bad_page); 2295 } 2296 EXPORT_SYMBOL_GPL(kvm_exit);