Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

kvm: selftests: add API testing infrastructure

Testsuite contributed by Google and cleaned up by myself for
inclusion in Linux.

Signed-off-by: Ken Hofsass <hofsass@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

+5813
+1
tools/testing/selftests/Makefile
··· 14 14 TARGETS += intel_pstate 15 15 TARGETS += ipc 16 16 TARGETS += kcmp 17 + TARGETS += kvm 17 18 TARGETS += lib 18 19 TARGETS += membarrier 19 20 TARGETS += memfd
+38
tools/testing/selftests/kvm/Makefile
··· 1 + all: 2 + 3 + top_srcdir = ../../../../ 4 + UNAME_M := $(shell uname -m) 5 + 6 + LIBKVM = lib/assert.c lib/kvm_util.c lib/sparsebit.c 7 + LIBKVM_x86_64 = lib/x86.c 8 + 9 + TEST_GEN_PROGS_x86_64 = set_sregs_test 10 + 11 + TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) 12 + LIBKVM += $(LIBKVM_$(UNAME_M)) 13 + 14 + INSTALL_HDR_PATH = $(top_srcdir)/usr 15 + LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ 16 + CFLAGS += -O2 -g -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) 17 + 18 + # After inclusion, $(OUTPUT) is defined and 19 + # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ 20 + include ../lib.mk 21 + 22 + STATIC_LIBS := $(OUTPUT)/libkvm.a 23 + LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM)) 24 + EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) 25 + 26 + x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ)))) 27 + $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c 28 + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ 29 + 30 + $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) 31 + $(AR) crs $@ $^ 32 + 33 + $(LINUX_HDR_PATH): 34 + make -C $(top_srcdir) headers_install 35 + 36 + all: $(STATIC_LIBS) $(LINUX_HDR_PATH) 37 + $(TEST_GEN_PROGS): $(STATIC_LIBS) 38 + $(TEST_GEN_PROGS) $(LIBKVM_OBJ): | $(LINUX_HDR_PATH)
+139
tools/testing/selftests/kvm/include/kvm_util.h
··· 1 + /* 2 + * tools/testing/selftests/kvm/include/kvm_util.h 3 + * 4 + * Copyright (C) 2018, Google LLC. 5 + * 6 + * This work is licensed under the terms of the GNU GPL, version 2. 7 + * 8 + */ 9 + #ifndef SELFTEST_KVM_UTIL_H 10 + #define SELFTEST_KVM_UTIL_H 1 11 + 12 + #include "test_util.h" 13 + 14 + #include "asm/kvm.h" 15 + #include "linux/kvm.h" 16 + #include <sys/ioctl.h> 17 + 18 + #include "sparsebit.h" 19 + 20 + /* 21 + * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be 22 + * created. Only applies to VMs using EPT. 23 + */ 24 + #define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul 25 + 26 + 27 + /* Callers of kvm_util only have an incomplete/opaque description of the 28 + * structure kvm_util is using to maintain the state of a VM. 29 + */ 30 + struct kvm_vm; 31 + 32 + typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ 33 + typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ 34 + 35 + /* Minimum allocated guest virtual and physical addresses */ 36 + #define KVM_UTIL_MIN_VADDR 0x2000 37 + 38 + #define DEFAULT_GUEST_PHY_PAGES 512 39 + #define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 40 + #define DEFAULT_STACK_PGS 5 41 + 42 + enum vm_guest_mode { 43 + VM_MODE_FLAT48PG, 44 + }; 45 + 46 + enum vm_mem_backing_src_type { 47 + VM_MEM_SRC_ANONYMOUS, 48 + VM_MEM_SRC_ANONYMOUS_THP, 49 + VM_MEM_SRC_ANONYMOUS_HUGETLB, 50 + }; 51 + 52 + int kvm_check_cap(long cap); 53 + 54 + struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); 55 + void kvm_vm_free(struct kvm_vm *vmp); 56 + 57 + int kvm_memcmp_hva_gva(void *hva, 58 + struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); 59 + 60 + void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); 61 + void vcpu_dump(FILE *stream, struct kvm_vm *vm, 62 + uint32_t vcpuid, uint8_t indent); 63 + 64 + void vm_create_irqchip(struct kvm_vm *vm); 65 + 66 + void vm_userspace_mem_region_add(struct kvm_vm *vm, 67 + enum vm_mem_backing_src_type src_type, 68 + uint64_t guest_paddr, uint32_t slot, uint64_t npages, 69 + uint32_t flags); 70 + 71 + void vcpu_ioctl(struct kvm_vm *vm, 72 + uint32_t vcpuid, unsigned long ioctl, void *arg); 73 + void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); 74 + void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); 75 + void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid); 76 + vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, 77 + uint32_t data_memslot, uint32_t pgd_memslot); 78 + void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); 79 + void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); 80 + vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); 81 + vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); 82 + 83 + struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); 84 + void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); 85 + int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); 86 + void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, 87 + struct kvm_mp_state *mp_state); 88 + void vcpu_regs_get(struct kvm_vm *vm, 89 + uint32_t vcpuid, struct kvm_regs *regs); 90 + void vcpu_regs_set(struct kvm_vm *vm, 91 + uint32_t vcpuid, struct kvm_regs *regs); 92 + void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...); 93 + void vcpu_sregs_get(struct kvm_vm *vm, 94 + uint32_t vcpuid, struct kvm_sregs *sregs); 95 + void vcpu_sregs_set(struct kvm_vm *vm, 96 + uint32_t vcpuid, struct kvm_sregs *sregs); 97 + int _vcpu_sregs_set(struct kvm_vm *vm, 98 + uint32_t vcpuid, struct kvm_sregs *sregs); 99 + void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, 100 + struct kvm_vcpu_events *events); 101 + void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, 102 + struct kvm_vcpu_events *events); 103 + 104 + const char *exit_reason_str(unsigned int exit_reason); 105 + 106 + void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot); 107 + void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 108 + uint32_t pgd_memslot); 109 + vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, 110 + vm_paddr_t paddr_min, uint32_t memslot); 111 + 112 + void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid); 113 + void vcpu_set_cpuid( 114 + struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); 115 + 116 + struct kvm_cpuid2 *allocate_kvm_cpuid2(void); 117 + struct kvm_cpuid_entry2 * 118 + find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, 119 + uint32_t index); 120 + 121 + static inline struct kvm_cpuid_entry2 * 122 + find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function) 123 + { 124 + return find_cpuid_index_entry(cpuid, function, 0); 125 + } 126 + 127 + struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code); 128 + void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); 129 + 130 + struct kvm_userspace_memory_region * 131 + kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 132 + uint64_t end); 133 + 134 + struct kvm_dirty_log * 135 + allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region); 136 + 137 + int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); 138 + 139 + #endif /* SELFTEST_KVM_UTIL_H */
+75
tools/testing/selftests/kvm/include/sparsebit.h
··· 1 + /* 2 + * tools/testing/selftests/kvm/include/sparsebit.h 3 + * 4 + * Copyright (C) 2018, Google LLC. 5 + * 6 + * This work is licensed under the terms of the GNU GPL, version 2. 7 + * 8 + * 9 + * Header file that describes API to the sparsebit library. 10 + * This library provides a memory efficient means of storing 11 + * the settings of bits indexed via a uint64_t. Memory usage 12 + * is reasonable, significantly less than (2^64 / 8) bytes, as 13 + * long as bits that are mostly set or mostly cleared are close 14 + * to each other. This library is efficient in memory usage 15 + * even in the case where most bits are set. 16 + */ 17 + 18 + #ifndef _TEST_SPARSEBIT_H_ 19 + #define _TEST_SPARSEBIT_H_ 20 + 21 + #include <stdbool.h> 22 + #include <stdint.h> 23 + #include <stdio.h> 24 + 25 + #ifdef __cplusplus 26 + extern "C" { 27 + #endif 28 + 29 + struct sparsebit; 30 + typedef uint64_t sparsebit_idx_t; 31 + typedef uint64_t sparsebit_num_t; 32 + 33 + struct sparsebit *sparsebit_alloc(void); 34 + void sparsebit_free(struct sparsebit **sbitp); 35 + void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src); 36 + 37 + bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx); 38 + bool sparsebit_is_set_num(struct sparsebit *sbit, 39 + sparsebit_idx_t idx, sparsebit_num_t num); 40 + bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx); 41 + bool sparsebit_is_clear_num(struct sparsebit *sbit, 42 + sparsebit_idx_t idx, sparsebit_num_t num); 43 + sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit); 44 + bool sparsebit_any_set(struct sparsebit *sbit); 45 + bool sparsebit_any_clear(struct sparsebit *sbit); 46 + bool sparsebit_all_set(struct sparsebit *sbit); 47 + bool sparsebit_all_clear(struct sparsebit *sbit); 48 + sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit); 49 + sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit); 50 + sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev); 51 + sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev); 52 + sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit, 53 + sparsebit_idx_t start, sparsebit_num_t num); 54 + sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit, 55 + sparsebit_idx_t start, sparsebit_num_t num); 56 + 57 + void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx); 58 + void sparsebit_set_num(struct sparsebit *sbitp, sparsebit_idx_t start, 59 + sparsebit_num_t num); 60 + void sparsebit_set_all(struct sparsebit *sbitp); 61 + 62 + void sparsebit_clear(struct sparsebit *sbitp, sparsebit_idx_t idx); 63 + void sparsebit_clear_num(struct sparsebit *sbitp, 64 + sparsebit_idx_t start, sparsebit_num_t num); 65 + void sparsebit_clear_all(struct sparsebit *sbitp); 66 + 67 + void sparsebit_dump(FILE *stream, struct sparsebit *sbit, 68 + unsigned int indent); 69 + void sparsebit_validate_internal(struct sparsebit *sbit); 70 + 71 + #ifdef __cplusplus 72 + } 73 + #endif 74 + 75 + #endif /* _TEST_SPARSEBIT_H_ */
+45
tools/testing/selftests/kvm/include/test_util.h
··· 1 + /* 2 + * tools/testing/selftests/kvm/include/test_util.h 3 + * 4 + * Copyright (C) 2018, Google LLC. 5 + * 6 + * This work is licensed under the terms of the GNU GPL, version 2. 7 + * 8 + */ 9 + 10 + #ifndef TEST_UTIL_H 11 + #define TEST_UTIL_H 1 12 + 13 + #include <stdlib.h> 14 + #include <stdarg.h> 15 + #include <stdbool.h> 16 + #include <stdio.h> 17 + #include <string.h> 18 + #include <inttypes.h> 19 + #include <errno.h> 20 + #include <unistd.h> 21 + #include <fcntl.h> 22 + 23 + ssize_t test_write(int fd, const void *buf, size_t count); 24 + ssize_t test_read(int fd, void *buf, size_t count); 25 + int test_seq_read(const char *path, char **bufp, size_t *sizep); 26 + 27 + void test_assert(bool exp, const char *exp_str, 28 + const char *file, unsigned int line, const char *fmt, ...); 29 + 30 + #define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) 31 + 32 + #define TEST_ASSERT(e, fmt, ...) \ 33 + test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__) 34 + 35 + #define ASSERT_EQ(a, b) do { \ 36 + typeof(a) __a = (a); \ 37 + typeof(b) __b = (b); \ 38 + TEST_ASSERT(__a == __b, \ 39 + "ASSERT_EQ(%s, %s) failed.\n" \ 40 + "\t%s is %#lx\n" \ 41 + "\t%s is %#lx", \ 42 + #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \ 43 + } while (0) 44 + 45 + #endif /* TEST_UTIL_H */
+1043
tools/testing/selftests/kvm/include/x86.h
··· 1 + /* 2 + * tools/testing/selftests/kvm/include/x86.h 3 + * 4 + * Copyright (C) 2018, Google LLC. 5 + * 6 + * This work is licensed under the terms of the GNU GPL, version 2. 7 + * 8 + */ 9 + 10 + #ifndef SELFTEST_KVM_X86_H 11 + #define SELFTEST_KVM_X86_H 12 + 13 + #include <assert.h> 14 + #include <stdint.h> 15 + 16 + #define X86_EFLAGS_FIXED (1u << 1) 17 + 18 + #define X86_CR4_VME (1ul << 0) 19 + #define X86_CR4_PVI (1ul << 1) 20 + #define X86_CR4_TSD (1ul << 2) 21 + #define X86_CR4_DE (1ul << 3) 22 + #define X86_CR4_PSE (1ul << 4) 23 + #define X86_CR4_PAE (1ul << 5) 24 + #define X86_CR4_MCE (1ul << 6) 25 + #define X86_CR4_PGE (1ul << 7) 26 + #define X86_CR4_PCE (1ul << 8) 27 + #define X86_CR4_OSFXSR (1ul << 9) 28 + #define X86_CR4_OSXMMEXCPT (1ul << 10) 29 + #define X86_CR4_UMIP (1ul << 11) 30 + #define X86_CR4_VMXE (1ul << 13) 31 + #define X86_CR4_SMXE (1ul << 14) 32 + #define X86_CR4_FSGSBASE (1ul << 16) 33 + #define X86_CR4_PCIDE (1ul << 17) 34 + #define X86_CR4_OSXSAVE (1ul << 18) 35 + #define X86_CR4_SMEP (1ul << 20) 36 + #define X86_CR4_SMAP (1ul << 21) 37 + #define X86_CR4_PKE (1ul << 22) 38 + 39 + /* The enum values match the intruction encoding of each register */ 40 + enum x86_register { 41 + RAX = 0, 42 + RCX, 43 + RDX, 44 + RBX, 45 + RSP, 46 + RBP, 47 + RSI, 48 + RDI, 49 + R8, 50 + R9, 51 + R10, 52 + R11, 53 + R12, 54 + R13, 55 + R14, 56 + R15, 57 + }; 58 + 59 + struct desc64 { 60 + uint16_t limit0; 61 + uint16_t base0; 62 + unsigned base1:8, type:5, dpl:2, p:1; 63 + unsigned limit1:4, zero0:3, g:1, base2:8; 64 + uint32_t base3; 65 + uint32_t zero1; 66 + } __attribute__((packed)); 67 + 68 + struct desc_ptr { 69 + uint16_t size; 70 + uint64_t address; 71 + } __attribute__((packed)); 72 + 73 + static inline uint64_t get_desc64_base(const struct desc64 *desc) 74 + { 75 + return ((uint64_t)desc->base3 << 32) | 76 + (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); 77 + } 78 + 79 + static inline uint64_t rdtsc(void) 80 + { 81 + uint32_t eax, edx; 82 + 83 + /* 84 + * The lfence is to wait (on Intel CPUs) until all previous 85 + * instructions have been executed. 86 + */ 87 + __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx)); 88 + return ((uint64_t)edx) << 32 | eax; 89 + } 90 + 91 + static inline uint64_t rdtscp(uint32_t *aux) 92 + { 93 + uint32_t eax, edx; 94 + 95 + __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux)); 96 + return ((uint64_t)edx) << 32 | eax; 97 + } 98 + 99 + static inline uint64_t rdmsr(uint32_t msr) 100 + { 101 + uint32_t a, d; 102 + 103 + __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); 104 + 105 + return a | ((uint64_t) d << 32); 106 + } 107 + 108 + static inline void wrmsr(uint32_t msr, uint64_t value) 109 + { 110 + uint32_t a = value; 111 + uint32_t d = value >> 32; 112 + 113 + __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory"); 114 + } 115 + 116 + 117 + static inline uint16_t inw(uint16_t port) 118 + { 119 + uint16_t tmp; 120 + 121 + __asm__ __volatile__("in %%dx, %%ax" 122 + : /* output */ "=a" (tmp) 123 + : /* input */ "d" (port)); 124 + 125 + return tmp; 126 + } 127 + 128 + static inline uint16_t get_es(void) 129 + { 130 + uint16_t es; 131 + 132 + __asm__ __volatile__("mov %%es, %[es]" 133 + : /* output */ [es]"=rm"(es)); 134 + return es; 135 + } 136 + 137 + static inline uint16_t get_cs(void) 138 + { 139 + uint16_t cs; 140 + 141 + __asm__ __volatile__("mov %%cs, %[cs]" 142 + : /* output */ [cs]"=rm"(cs)); 143 + return cs; 144 + } 145 + 146 + static inline uint16_t get_ss(void) 147 + { 148 + uint16_t ss; 149 + 150 + __asm__ __volatile__("mov %%ss, %[ss]" 151 + : /* output */ [ss]"=rm"(ss)); 152 + return ss; 153 + } 154 + 155 + static inline uint16_t get_ds(void) 156 + { 157 + uint16_t ds; 158 + 159 + __asm__ __volatile__("mov %%ds, %[ds]" 160 + : /* output */ [ds]"=rm"(ds)); 161 + return ds; 162 + } 163 + 164 + static inline uint16_t get_fs(void) 165 + { 166 + uint16_t fs; 167 + 168 + __asm__ __volatile__("mov %%fs, %[fs]" 169 + : /* output */ [fs]"=rm"(fs)); 170 + return fs; 171 + } 172 + 173 + static inline uint16_t get_gs(void) 174 + { 175 + uint16_t gs; 176 + 177 + __asm__ __volatile__("mov %%gs, %[gs]" 178 + : /* output */ [gs]"=rm"(gs)); 179 + return gs; 180 + } 181 + 182 + static inline uint16_t get_tr(void) 183 + { 184 + uint16_t tr; 185 + 186 + __asm__ __volatile__("str %[tr]" 187 + : /* output */ [tr]"=rm"(tr)); 188 + return tr; 189 + } 190 + 191 + static inline uint64_t get_cr0(void) 192 + { 193 + uint64_t cr0; 194 + 195 + __asm__ __volatile__("mov %%cr0, %[cr0]" 196 + : /* output */ [cr0]"=r"(cr0)); 197 + return cr0; 198 + } 199 + 200 + static inline uint64_t get_cr3(void) 201 + { 202 + uint64_t cr3; 203 + 204 + __asm__ __volatile__("mov %%cr3, %[cr3]" 205 + : /* output */ [cr3]"=r"(cr3)); 206 + return cr3; 207 + } 208 + 209 + static inline uint64_t get_cr4(void) 210 + { 211 + uint64_t cr4; 212 + 213 + __asm__ __volatile__("mov %%cr4, %[cr4]" 214 + : /* output */ [cr4]"=r"(cr4)); 215 + return cr4; 216 + } 217 + 218 + static inline void set_cr4(uint64_t val) 219 + { 220 + __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); 221 + } 222 + 223 + static inline uint64_t get_gdt_base(void) 224 + { 225 + struct desc_ptr gdt; 226 + __asm__ __volatile__("sgdt %[gdt]" 227 + : /* output */ [gdt]"=m"(gdt)); 228 + return gdt.address; 229 + } 230 + 231 + static inline uint64_t get_idt_base(void) 232 + { 233 + struct desc_ptr idt; 234 + __asm__ __volatile__("sidt %[idt]" 235 + : /* output */ [idt]"=m"(idt)); 236 + return idt.address; 237 + } 238 + 239 + #define SET_XMM(__var, __xmm) \ 240 + asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) 241 + 242 + static inline void set_xmm(int n, unsigned long val) 243 + { 244 + switch (n) { 245 + case 0: 246 + SET_XMM(val, xmm0); 247 + break; 248 + case 1: 249 + SET_XMM(val, xmm1); 250 + break; 251 + case 2: 252 + SET_XMM(val, xmm2); 253 + break; 254 + case 3: 255 + SET_XMM(val, xmm3); 256 + break; 257 + case 4: 258 + SET_XMM(val, xmm4); 259 + break; 260 + case 5: 261 + SET_XMM(val, xmm5); 262 + break; 263 + case 6: 264 + SET_XMM(val, xmm6); 265 + break; 266 + case 7: 267 + SET_XMM(val, xmm7); 268 + break; 269 + } 270 + } 271 + 272 + typedef unsigned long v1di __attribute__ ((vector_size (8))); 273 + static inline unsigned long get_xmm(int n) 274 + { 275 + assert(n >= 0 && n <= 7); 276 + 277 + register v1di xmm0 __asm__("%xmm0"); 278 + register v1di xmm1 __asm__("%xmm1"); 279 + register v1di xmm2 __asm__("%xmm2"); 280 + register v1di xmm3 __asm__("%xmm3"); 281 + register v1di xmm4 __asm__("%xmm4"); 282 + register v1di xmm5 __asm__("%xmm5"); 283 + register v1di xmm6 __asm__("%xmm6"); 284 + register v1di xmm7 __asm__("%xmm7"); 285 + switch (n) { 286 + case 0: 287 + return (unsigned long)xmm0; 288 + case 1: 289 + return (unsigned long)xmm1; 290 + case 2: 291 + return (unsigned long)xmm2; 292 + case 3: 293 + return (unsigned long)xmm3; 294 + case 4: 295 + return (unsigned long)xmm4; 296 + case 5: 297 + return (unsigned long)xmm5; 298 + case 6: 299 + return (unsigned long)xmm6; 300 + case 7: 301 + return (unsigned long)xmm7; 302 + } 303 + return 0; 304 + } 305 + 306 + /* 307 + * Basic CPU control in CR0 308 + */ 309 + #define X86_CR0_PE (1UL<<0) /* Protection Enable */ 310 + #define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */ 311 + #define X86_CR0_EM (1UL<<2) /* Emulation */ 312 + #define X86_CR0_TS (1UL<<3) /* Task Switched */ 313 + #define X86_CR0_ET (1UL<<4) /* Extension Type */ 314 + #define X86_CR0_NE (1UL<<5) /* Numeric Error */ 315 + #define X86_CR0_WP (1UL<<16) /* Write Protect */ 316 + #define X86_CR0_AM (1UL<<18) /* Alignment Mask */ 317 + #define X86_CR0_NW (1UL<<29) /* Not Write-through */ 318 + #define X86_CR0_CD (1UL<<30) /* Cache Disable */ 319 + #define X86_CR0_PG (1UL<<31) /* Paging */ 320 + 321 + /* 322 + * CPU model specific register (MSR) numbers. 323 + */ 324 + 325 + /* x86-64 specific MSRs */ 326 + #define MSR_EFER 0xc0000080 /* extended feature register */ 327 + #define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ 328 + #define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ 329 + #define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ 330 + #define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ 331 + #define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ 332 + #define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ 333 + #define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ 334 + #define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ 335 + 336 + /* EFER bits: */ 337 + #define EFER_SCE (1<<0) /* SYSCALL/SYSRET */ 338 + #define EFER_LME (1<<8) /* Long mode enable */ 339 + #define EFER_LMA (1<<10) /* Long mode active (read-only) */ 340 + #define EFER_NX (1<<11) /* No execute enable */ 341 + #define EFER_SVME (1<<12) /* Enable virtualization */ 342 + #define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */ 343 + #define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */ 344 + 345 + /* Intel MSRs. Some also available on other CPUs */ 346 + 347 + #define MSR_PPIN_CTL 0x0000004e 348 + #define MSR_PPIN 0x0000004f 349 + 350 + #define MSR_IA32_PERFCTR0 0x000000c1 351 + #define MSR_IA32_PERFCTR1 0x000000c2 352 + #define MSR_FSB_FREQ 0x000000cd 353 + #define MSR_PLATFORM_INFO 0x000000ce 354 + #define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 355 + #define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) 356 + 357 + #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 358 + #define NHM_C3_AUTO_DEMOTE (1UL << 25) 359 + #define NHM_C1_AUTO_DEMOTE (1UL << 26) 360 + #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) 361 + #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) 362 + #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) 363 + 364 + #define MSR_MTRRcap 0x000000fe 365 + #define MSR_IA32_BBL_CR_CTL 0x00000119 366 + #define MSR_IA32_BBL_CR_CTL3 0x0000011e 367 + 368 + #define MSR_IA32_SYSENTER_CS 0x00000174 369 + #define MSR_IA32_SYSENTER_ESP 0x00000175 370 + #define MSR_IA32_SYSENTER_EIP 0x00000176 371 + 372 + #define MSR_IA32_MCG_CAP 0x00000179 373 + #define MSR_IA32_MCG_STATUS 0x0000017a 374 + #define MSR_IA32_MCG_CTL 0x0000017b 375 + #define MSR_IA32_MCG_EXT_CTL 0x000004d0 376 + 377 + #define MSR_OFFCORE_RSP_0 0x000001a6 378 + #define MSR_OFFCORE_RSP_1 0x000001a7 379 + #define MSR_TURBO_RATIO_LIMIT 0x000001ad 380 + #define MSR_TURBO_RATIO_LIMIT1 0x000001ae 381 + #define MSR_TURBO_RATIO_LIMIT2 0x000001af 382 + 383 + #define MSR_LBR_SELECT 0x000001c8 384 + #define MSR_LBR_TOS 0x000001c9 385 + #define MSR_LBR_NHM_FROM 0x00000680 386 + #define MSR_LBR_NHM_TO 0x000006c0 387 + #define MSR_LBR_CORE_FROM 0x00000040 388 + #define MSR_LBR_CORE_TO 0x00000060 389 + 390 + #define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */ 391 + #define LBR_INFO_MISPRED BIT_ULL(63) 392 + #define LBR_INFO_IN_TX BIT_ULL(62) 393 + #define LBR_INFO_ABORT BIT_ULL(61) 394 + #define LBR_INFO_CYCLES 0xffff 395 + 396 + #define MSR_IA32_PEBS_ENABLE 0x000003f1 397 + #define MSR_IA32_DS_AREA 0x00000600 398 + #define MSR_IA32_PERF_CAPABILITIES 0x00000345 399 + #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 400 + 401 + #define MSR_IA32_RTIT_CTL 0x00000570 402 + #define MSR_IA32_RTIT_STATUS 0x00000571 403 + #define MSR_IA32_RTIT_ADDR0_A 0x00000580 404 + #define MSR_IA32_RTIT_ADDR0_B 0x00000581 405 + #define MSR_IA32_RTIT_ADDR1_A 0x00000582 406 + #define MSR_IA32_RTIT_ADDR1_B 0x00000583 407 + #define MSR_IA32_RTIT_ADDR2_A 0x00000584 408 + #define MSR_IA32_RTIT_ADDR2_B 0x00000585 409 + #define MSR_IA32_RTIT_ADDR3_A 0x00000586 410 + #define MSR_IA32_RTIT_ADDR3_B 0x00000587 411 + #define MSR_IA32_RTIT_CR3_MATCH 0x00000572 412 + #define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 413 + #define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 414 + 415 + #define MSR_MTRRfix64K_00000 0x00000250 416 + #define MSR_MTRRfix16K_80000 0x00000258 417 + #define MSR_MTRRfix16K_A0000 0x00000259 418 + #define MSR_MTRRfix4K_C0000 0x00000268 419 + #define MSR_MTRRfix4K_C8000 0x00000269 420 + #define MSR_MTRRfix4K_D0000 0x0000026a 421 + #define MSR_MTRRfix4K_D8000 0x0000026b 422 + #define MSR_MTRRfix4K_E0000 0x0000026c 423 + #define MSR_MTRRfix4K_E8000 0x0000026d 424 + #define MSR_MTRRfix4K_F0000 0x0000026e 425 + #define MSR_MTRRfix4K_F8000 0x0000026f 426 + #define MSR_MTRRdefType 0x000002ff 427 + 428 + #define MSR_IA32_CR_PAT 0x00000277 429 + 430 + #define MSR_IA32_DEBUGCTLMSR 0x000001d9 431 + #define MSR_IA32_LASTBRANCHFROMIP 0x000001db 432 + #define MSR_IA32_LASTBRANCHTOIP 0x000001dc 433 + #define MSR_IA32_LASTINTFROMIP 0x000001dd 434 + #define MSR_IA32_LASTINTTOIP 0x000001de 435 + 436 + /* DEBUGCTLMSR bits (others vary by model): */ 437 + #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ 438 + #define DEBUGCTLMSR_BTF_SHIFT 1 439 + #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ 440 + #define DEBUGCTLMSR_TR (1UL << 6) 441 + #define DEBUGCTLMSR_BTS (1UL << 7) 442 + #define DEBUGCTLMSR_BTINT (1UL << 8) 443 + #define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) 444 + #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) 445 + #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) 446 + #define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 447 + #define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) 448 + 449 + #define MSR_PEBS_FRONTEND 0x000003f7 450 + 451 + #define MSR_IA32_POWER_CTL 0x000001fc 452 + 453 + #define MSR_IA32_MC0_CTL 0x00000400 454 + #define MSR_IA32_MC0_STATUS 0x00000401 455 + #define MSR_IA32_MC0_ADDR 0x00000402 456 + #define MSR_IA32_MC0_MISC 0x00000403 457 + 458 + /* C-state Residency Counters */ 459 + #define MSR_PKG_C3_RESIDENCY 0x000003f8 460 + #define MSR_PKG_C6_RESIDENCY 0x000003f9 461 + #define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa 462 + #define MSR_PKG_C7_RESIDENCY 0x000003fa 463 + #define MSR_CORE_C3_RESIDENCY 0x000003fc 464 + #define MSR_CORE_C6_RESIDENCY 0x000003fd 465 + #define MSR_CORE_C7_RESIDENCY 0x000003fe 466 + #define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff 467 + #define MSR_PKG_C2_RESIDENCY 0x0000060d 468 + #define MSR_PKG_C8_RESIDENCY 0x00000630 469 + #define MSR_PKG_C9_RESIDENCY 0x00000631 470 + #define MSR_PKG_C10_RESIDENCY 0x00000632 471 + 472 + /* Interrupt Response Limit */ 473 + #define MSR_PKGC3_IRTL 0x0000060a 474 + #define MSR_PKGC6_IRTL 0x0000060b 475 + #define MSR_PKGC7_IRTL 0x0000060c 476 + #define MSR_PKGC8_IRTL 0x00000633 477 + #define MSR_PKGC9_IRTL 0x00000634 478 + #define MSR_PKGC10_IRTL 0x00000635 479 + 480 + /* Run Time Average Power Limiting (RAPL) Interface */ 481 + 482 + #define MSR_RAPL_POWER_UNIT 0x00000606 483 + 484 + #define MSR_PKG_POWER_LIMIT 0x00000610 485 + #define MSR_PKG_ENERGY_STATUS 0x00000611 486 + #define MSR_PKG_PERF_STATUS 0x00000613 487 + #define MSR_PKG_POWER_INFO 0x00000614 488 + 489 + #define MSR_DRAM_POWER_LIMIT 0x00000618 490 + #define MSR_DRAM_ENERGY_STATUS 0x00000619 491 + #define MSR_DRAM_PERF_STATUS 0x0000061b 492 + #define MSR_DRAM_POWER_INFO 0x0000061c 493 + 494 + #define MSR_PP0_POWER_LIMIT 0x00000638 495 + #define MSR_PP0_ENERGY_STATUS 0x00000639 496 + #define MSR_PP0_POLICY 0x0000063a 497 + #define MSR_PP0_PERF_STATUS 0x0000063b 498 + 499 + #define MSR_PP1_POWER_LIMIT 0x00000640 500 + #define MSR_PP1_ENERGY_STATUS 0x00000641 501 + #define MSR_PP1_POLICY 0x00000642 502 + 503 + /* Config TDP MSRs */ 504 + #define MSR_CONFIG_TDP_NOMINAL 0x00000648 505 + #define MSR_CONFIG_TDP_LEVEL_1 0x00000649 506 + #define MSR_CONFIG_TDP_LEVEL_2 0x0000064A 507 + #define MSR_CONFIG_TDP_CONTROL 0x0000064B 508 + #define MSR_TURBO_ACTIVATION_RATIO 0x0000064C 509 + 510 + #define MSR_PLATFORM_ENERGY_STATUS 0x0000064D 511 + 512 + #define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 513 + #define MSR_PKG_ANY_CORE_C0_RES 0x00000659 514 + #define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A 515 + #define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B 516 + 517 + #define MSR_CORE_C1_RES 0x00000660 518 + #define MSR_MODULE_C6_RES_MS 0x00000664 519 + 520 + #define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 521 + #define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 522 + 523 + #define MSR_ATOM_CORE_RATIOS 0x0000066a 524 + #define MSR_ATOM_CORE_VIDS 0x0000066b 525 + #define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c 526 + #define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d 527 + 528 + 529 + #define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 530 + #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 531 + #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 532 + 533 + /* Hardware P state interface */ 534 + #define MSR_PPERF 0x0000064e 535 + #define MSR_PERF_LIMIT_REASONS 0x0000064f 536 + #define MSR_PM_ENABLE 0x00000770 537 + #define MSR_HWP_CAPABILITIES 0x00000771 538 + #define MSR_HWP_REQUEST_PKG 0x00000772 539 + #define MSR_HWP_INTERRUPT 0x00000773 540 + #define MSR_HWP_REQUEST 0x00000774 541 + #define MSR_HWP_STATUS 0x00000777 542 + 543 + /* CPUID.6.EAX */ 544 + #define HWP_BASE_BIT (1<<7) 545 + #define HWP_NOTIFICATIONS_BIT (1<<8) 546 + #define HWP_ACTIVITY_WINDOW_BIT (1<<9) 547 + #define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) 548 + #define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) 549 + 550 + /* IA32_HWP_CAPABILITIES */ 551 + #define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) 552 + #define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) 553 + #define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) 554 + #define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) 555 + 556 + /* IA32_HWP_REQUEST */ 557 + #define HWP_MIN_PERF(x) (x & 0xff) 558 + #define HWP_MAX_PERF(x) ((x & 0xff) << 8) 559 + #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) 560 + #define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) 561 + #define HWP_EPP_PERFORMANCE 0x00 562 + #define HWP_EPP_BALANCE_PERFORMANCE 0x80 563 + #define HWP_EPP_BALANCE_POWERSAVE 0xC0 564 + #define HWP_EPP_POWERSAVE 0xFF 565 + #define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) 566 + #define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) 567 + 568 + /* IA32_HWP_STATUS */ 569 + #define HWP_GUARANTEED_CHANGE(x) (x & 0x1) 570 + #define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) 571 + 572 + /* IA32_HWP_INTERRUPT */ 573 + #define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) 574 + #define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) 575 + 576 + #define MSR_AMD64_MC0_MASK 0xc0010044 577 + 578 + #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) 579 + #define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) 580 + #define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) 581 + #define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) 582 + 583 + #define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) 584 + 585 + /* These are consecutive and not in the normal 4er MCE bank block */ 586 + #define MSR_IA32_MC0_CTL2 0x00000280 587 + #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) 588 + 589 + #define MSR_P6_PERFCTR0 0x000000c1 590 + #define MSR_P6_PERFCTR1 0x000000c2 591 + #define MSR_P6_EVNTSEL0 0x00000186 592 + #define MSR_P6_EVNTSEL1 0x00000187 593 + 594 + #define MSR_KNC_PERFCTR0 0x00000020 595 + #define MSR_KNC_PERFCTR1 0x00000021 596 + #define MSR_KNC_EVNTSEL0 0x00000028 597 + #define MSR_KNC_EVNTSEL1 0x00000029 598 + 599 + /* Alternative perfctr range with full access. */ 600 + #define MSR_IA32_PMC0 0x000004c1 601 + 602 + /* AMD64 MSRs. Not complete. See the architecture manual for a more 603 + complete list. */ 604 + 605 + #define MSR_AMD64_PATCH_LEVEL 0x0000008b 606 + #define MSR_AMD64_TSC_RATIO 0xc0000104 607 + #define MSR_AMD64_NB_CFG 0xc001001f 608 + #define MSR_AMD64_PATCH_LOADER 0xc0010020 609 + #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 610 + #define MSR_AMD64_OSVW_STATUS 0xc0010141 611 + #define MSR_AMD64_LS_CFG 0xc0011020 612 + #define MSR_AMD64_DC_CFG 0xc0011022 613 + #define MSR_AMD64_BU_CFG2 0xc001102a 614 + #define MSR_AMD64_IBSFETCHCTL 0xc0011030 615 + #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 616 + #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 617 + #define MSR_AMD64_IBSFETCH_REG_COUNT 3 618 + #define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1) 619 + #define MSR_AMD64_IBSOPCTL 0xc0011033 620 + #define MSR_AMD64_IBSOPRIP 0xc0011034 621 + #define MSR_AMD64_IBSOPDATA 0xc0011035 622 + #define MSR_AMD64_IBSOPDATA2 0xc0011036 623 + #define MSR_AMD64_IBSOPDATA3 0xc0011037 624 + #define MSR_AMD64_IBSDCLINAD 0xc0011038 625 + #define MSR_AMD64_IBSDCPHYSAD 0xc0011039 626 + #define MSR_AMD64_IBSOP_REG_COUNT 7 627 + #define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) 628 + #define MSR_AMD64_IBSCTL 0xc001103a 629 + #define MSR_AMD64_IBSBRTARGET 0xc001103b 630 + #define MSR_AMD64_IBSOPDATA4 0xc001103d 631 + #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ 632 + #define MSR_AMD64_SEV 0xc0010131 633 + #define MSR_AMD64_SEV_ENABLED_BIT 0 634 + #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) 635 + 636 + /* Fam 17h MSRs */ 637 + #define MSR_F17H_IRPERF 0xc00000e9 638 + 639 + /* Fam 16h MSRs */ 640 + #define MSR_F16H_L2I_PERF_CTL 0xc0010230 641 + #define MSR_F16H_L2I_PERF_CTR 0xc0010231 642 + #define MSR_F16H_DR1_ADDR_MASK 0xc0011019 643 + #define MSR_F16H_DR2_ADDR_MASK 0xc001101a 644 + #define MSR_F16H_DR3_ADDR_MASK 0xc001101b 645 + #define MSR_F16H_DR0_ADDR_MASK 0xc0011027 646 + 647 + /* Fam 15h MSRs */ 648 + #define MSR_F15H_PERF_CTL 0xc0010200 649 + #define MSR_F15H_PERF_CTR 0xc0010201 650 + #define MSR_F15H_NB_PERF_CTL 0xc0010240 651 + #define MSR_F15H_NB_PERF_CTR 0xc0010241 652 + #define MSR_F15H_PTSC 0xc0010280 653 + #define MSR_F15H_IC_CFG 0xc0011021 654 + 655 + /* Fam 10h MSRs */ 656 + #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 657 + #define FAM10H_MMIO_CONF_ENABLE (1<<0) 658 + #define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf 659 + #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 660 + #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL 661 + #define FAM10H_MMIO_CONF_BASE_SHIFT 20 662 + #define MSR_FAM10H_NODE_ID 0xc001100c 663 + #define MSR_F10H_DECFG 0xc0011029 664 + #define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 665 + #define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) 666 + 667 + /* K8 MSRs */ 668 + #define MSR_K8_TOP_MEM1 0xc001001a 669 + #define MSR_K8_TOP_MEM2 0xc001001d 670 + #define MSR_K8_SYSCFG 0xc0010010 671 + #define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 672 + #define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) 673 + #define MSR_K8_INT_PENDING_MSG 0xc0010055 674 + /* C1E active bits in int pending message */ 675 + #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 676 + #define MSR_K8_TSEG_ADDR 0xc0010112 677 + #define MSR_K8_TSEG_MASK 0xc0010113 678 + #define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ 679 + #define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ 680 + #define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ 681 + 682 + /* K7 MSRs */ 683 + #define MSR_K7_EVNTSEL0 0xc0010000 684 + #define MSR_K7_PERFCTR0 0xc0010004 685 + #define MSR_K7_EVNTSEL1 0xc0010001 686 + #define MSR_K7_PERFCTR1 0xc0010005 687 + #define MSR_K7_EVNTSEL2 0xc0010002 688 + #define MSR_K7_PERFCTR2 0xc0010006 689 + #define MSR_K7_EVNTSEL3 0xc0010003 690 + #define MSR_K7_PERFCTR3 0xc0010007 691 + #define MSR_K7_CLK_CTL 0xc001001b 692 + #define MSR_K7_HWCR 0xc0010015 693 + #define MSR_K7_HWCR_SMMLOCK_BIT 0 694 + #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) 695 + #define MSR_K7_FID_VID_CTL 0xc0010041 696 + #define MSR_K7_FID_VID_STATUS 0xc0010042 697 + 698 + /* K6 MSRs */ 699 + #define MSR_K6_WHCR 0xc0000082 700 + #define MSR_K6_UWCCR 0xc0000085 701 + #define MSR_K6_EPMR 0xc0000086 702 + #define MSR_K6_PSOR 0xc0000087 703 + #define MSR_K6_PFIR 0xc0000088 704 + 705 + /* Centaur-Hauls/IDT defined MSRs. */ 706 + #define MSR_IDT_FCR1 0x00000107 707 + #define MSR_IDT_FCR2 0x00000108 708 + #define MSR_IDT_FCR3 0x00000109 709 + #define MSR_IDT_FCR4 0x0000010a 710 + 711 + #define MSR_IDT_MCR0 0x00000110 712 + #define MSR_IDT_MCR1 0x00000111 713 + #define MSR_IDT_MCR2 0x00000112 714 + #define MSR_IDT_MCR3 0x00000113 715 + #define MSR_IDT_MCR4 0x00000114 716 + #define MSR_IDT_MCR5 0x00000115 717 + #define MSR_IDT_MCR6 0x00000116 718 + #define MSR_IDT_MCR7 0x00000117 719 + #define MSR_IDT_MCR_CTRL 0x00000120 720 + 721 + /* VIA Cyrix defined MSRs*/ 722 + #define MSR_VIA_FCR 0x00001107 723 + #define MSR_VIA_LONGHAUL 0x0000110a 724 + #define MSR_VIA_RNG 0x0000110b 725 + #define MSR_VIA_BCR2 0x00001147 726 + 727 + /* Transmeta defined MSRs */ 728 + #define MSR_TMTA_LONGRUN_CTRL 0x80868010 729 + #define MSR_TMTA_LONGRUN_FLAGS 0x80868011 730 + #define MSR_TMTA_LRTI_READOUT 0x80868018 731 + #define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a 732 + 733 + /* Intel defined MSRs. */ 734 + #define MSR_IA32_P5_MC_ADDR 0x00000000 735 + #define MSR_IA32_P5_MC_TYPE 0x00000001 736 + #define MSR_IA32_TSC 0x00000010 737 + #define MSR_IA32_PLATFORM_ID 0x00000017 738 + #define MSR_IA32_EBL_CR_POWERON 0x0000002a 739 + #define MSR_EBC_FREQUENCY_ID 0x0000002c 740 + #define MSR_SMI_COUNT 0x00000034 741 + #define MSR_IA32_FEATURE_CONTROL 0x0000003a 742 + #define MSR_IA32_TSC_ADJUST 0x0000003b 743 + #define MSR_IA32_BNDCFGS 0x00000d90 744 + 745 + #define MSR_IA32_BNDCFGS_RSVD 0x00000ffc 746 + 747 + #define MSR_IA32_XSS 0x00000da0 748 + 749 + #define FEATURE_CONTROL_LOCKED (1<<0) 750 + #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) 751 + #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) 752 + #define FEATURE_CONTROL_LMCE (1<<20) 753 + 754 + #define MSR_IA32_APICBASE 0x0000001b 755 + #define MSR_IA32_APICBASE_BSP (1<<8) 756 + #define MSR_IA32_APICBASE_ENABLE (1<<11) 757 + #define MSR_IA32_APICBASE_BASE (0xfffff<<12) 758 + 759 + #define MSR_IA32_TSCDEADLINE 0x000006e0 760 + 761 + #define MSR_IA32_UCODE_WRITE 0x00000079 762 + #define MSR_IA32_UCODE_REV 0x0000008b 763 + 764 + #define MSR_IA32_SMM_MONITOR_CTL 0x0000009b 765 + #define MSR_IA32_SMBASE 0x0000009e 766 + 767 + #define MSR_IA32_PERF_STATUS 0x00000198 768 + #define MSR_IA32_PERF_CTL 0x00000199 769 + #define INTEL_PERF_CTL_MASK 0xffff 770 + #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 771 + #define MSR_AMD_PERF_STATUS 0xc0010063 772 + #define MSR_AMD_PERF_CTL 0xc0010062 773 + 774 + #define MSR_IA32_MPERF 0x000000e7 775 + #define MSR_IA32_APERF 0x000000e8 776 + 777 + #define MSR_IA32_THERM_CONTROL 0x0000019a 778 + #define MSR_IA32_THERM_INTERRUPT 0x0000019b 779 + 780 + #define THERM_INT_HIGH_ENABLE (1 << 0) 781 + #define THERM_INT_LOW_ENABLE (1 << 1) 782 + #define THERM_INT_PLN_ENABLE (1 << 24) 783 + 784 + #define MSR_IA32_THERM_STATUS 0x0000019c 785 + 786 + #define THERM_STATUS_PROCHOT (1 << 0) 787 + #define THERM_STATUS_POWER_LIMIT (1 << 10) 788 + 789 + #define MSR_THERM2_CTL 0x0000019d 790 + 791 + #define MSR_THERM2_CTL_TM_SELECT (1ULL << 16) 792 + 793 + #define MSR_IA32_MISC_ENABLE 0x000001a0 794 + 795 + #define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 796 + 797 + #define MSR_MISC_FEATURE_CONTROL 0x000001a4 798 + #define MSR_MISC_PWR_MGMT 0x000001aa 799 + 800 + #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 801 + #define ENERGY_PERF_BIAS_PERFORMANCE 0 802 + #define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4 803 + #define ENERGY_PERF_BIAS_NORMAL 6 804 + #define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8 805 + #define ENERGY_PERF_BIAS_POWERSAVE 15 806 + 807 + #define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 808 + 809 + #define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) 810 + #define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) 811 + 812 + #define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 813 + 814 + #define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) 815 + #define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) 816 + #define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) 817 + 818 + /* Thermal Thresholds Support */ 819 + #define THERM_INT_THRESHOLD0_ENABLE (1 << 15) 820 + #define THERM_SHIFT_THRESHOLD0 8 821 + #define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0) 822 + #define THERM_INT_THRESHOLD1_ENABLE (1 << 23) 823 + #define THERM_SHIFT_THRESHOLD1 16 824 + #define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1) 825 + #define THERM_STATUS_THRESHOLD0 (1 << 6) 826 + #define THERM_LOG_THRESHOLD0 (1 << 7) 827 + #define THERM_STATUS_THRESHOLD1 (1 << 8) 828 + #define THERM_LOG_THRESHOLD1 (1 << 9) 829 + 830 + /* MISC_ENABLE bits: architectural */ 831 + #define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0 832 + #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) 833 + #define MSR_IA32_MISC_ENABLE_TCC_BIT 1 834 + #define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT) 835 + #define MSR_IA32_MISC_ENABLE_EMON_BIT 7 836 + #define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT) 837 + #define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11 838 + #define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT) 839 + #define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12 840 + #define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT) 841 + #define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16 842 + #define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT) 843 + #define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18 844 + #define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT) 845 + #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22 846 + #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) 847 + #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23 848 + #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT) 849 + #define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34 850 + #define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT) 851 + 852 + /* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ 853 + #define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2 854 + #define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT) 855 + #define MSR_IA32_MISC_ENABLE_TM1_BIT 3 856 + #define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT) 857 + #define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4 858 + #define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT) 859 + #define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6 860 + #define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT) 861 + #define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8 862 + #define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT) 863 + #define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9 864 + #define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) 865 + #define MSR_IA32_MISC_ENABLE_FERR_BIT 10 866 + #define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT) 867 + #define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10 868 + #define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT) 869 + #define MSR_IA32_MISC_ENABLE_TM2_BIT 13 870 + #define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT) 871 + #define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19 872 + #define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT) 873 + #define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20 874 + #define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT) 875 + #define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24 876 + #define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT) 877 + #define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37 878 + #define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT) 879 + #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38 880 + #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT) 881 + #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 882 + #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) 883 + 884 + /* MISC_FEATURES_ENABLES non-architectural features */ 885 + #define MSR_MISC_FEATURES_ENABLES 0x00000140 886 + 887 + #define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0 888 + #define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT) 889 + #define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1 890 + 891 + #define MSR_IA32_TSC_DEADLINE 0x000006E0 892 + 893 + /* P4/Xeon+ specific */ 894 + #define MSR_IA32_MCG_EAX 0x00000180 895 + #define MSR_IA32_MCG_EBX 0x00000181 896 + #define MSR_IA32_MCG_ECX 0x00000182 897 + #define MSR_IA32_MCG_EDX 0x00000183 898 + #define MSR_IA32_MCG_ESI 0x00000184 899 + #define MSR_IA32_MCG_EDI 0x00000185 900 + #define MSR_IA32_MCG_EBP 0x00000186 901 + #define MSR_IA32_MCG_ESP 0x00000187 902 + #define MSR_IA32_MCG_EFLAGS 0x00000188 903 + #define MSR_IA32_MCG_EIP 0x00000189 904 + #define MSR_IA32_MCG_RESERVED 0x0000018a 905 + 906 + /* Pentium IV performance counter MSRs */ 907 + #define MSR_P4_BPU_PERFCTR0 0x00000300 908 + #define MSR_P4_BPU_PERFCTR1 0x00000301 909 + #define MSR_P4_BPU_PERFCTR2 0x00000302 910 + #define MSR_P4_BPU_PERFCTR3 0x00000303 911 + #define MSR_P4_MS_PERFCTR0 0x00000304 912 + #define MSR_P4_MS_PERFCTR1 0x00000305 913 + #define MSR_P4_MS_PERFCTR2 0x00000306 914 + #define MSR_P4_MS_PERFCTR3 0x00000307 915 + #define MSR_P4_FLAME_PERFCTR0 0x00000308 916 + #define MSR_P4_FLAME_PERFCTR1 0x00000309 917 + #define MSR_P4_FLAME_PERFCTR2 0x0000030a 918 + #define MSR_P4_FLAME_PERFCTR3 0x0000030b 919 + #define MSR_P4_IQ_PERFCTR0 0x0000030c 920 + #define MSR_P4_IQ_PERFCTR1 0x0000030d 921 + #define MSR_P4_IQ_PERFCTR2 0x0000030e 922 + #define MSR_P4_IQ_PERFCTR3 0x0000030f 923 + #define MSR_P4_IQ_PERFCTR4 0x00000310 924 + #define MSR_P4_IQ_PERFCTR5 0x00000311 925 + #define MSR_P4_BPU_CCCR0 0x00000360 926 + #define MSR_P4_BPU_CCCR1 0x00000361 927 + #define MSR_P4_BPU_CCCR2 0x00000362 928 + #define MSR_P4_BPU_CCCR3 0x00000363 929 + #define MSR_P4_MS_CCCR0 0x00000364 930 + #define MSR_P4_MS_CCCR1 0x00000365 931 + #define MSR_P4_MS_CCCR2 0x00000366 932 + #define MSR_P4_MS_CCCR3 0x00000367 933 + #define MSR_P4_FLAME_CCCR0 0x00000368 934 + #define MSR_P4_FLAME_CCCR1 0x00000369 935 + #define MSR_P4_FLAME_CCCR2 0x0000036a 936 + #define MSR_P4_FLAME_CCCR3 0x0000036b 937 + #define MSR_P4_IQ_CCCR0 0x0000036c 938 + #define MSR_P4_IQ_CCCR1 0x0000036d 939 + #define MSR_P4_IQ_CCCR2 0x0000036e 940 + #define MSR_P4_IQ_CCCR3 0x0000036f 941 + #define MSR_P4_IQ_CCCR4 0x00000370 942 + #define MSR_P4_IQ_CCCR5 0x00000371 943 + #define MSR_P4_ALF_ESCR0 0x000003ca 944 + #define MSR_P4_ALF_ESCR1 0x000003cb 945 + #define MSR_P4_BPU_ESCR0 0x000003b2 946 + #define MSR_P4_BPU_ESCR1 0x000003b3 947 + #define MSR_P4_BSU_ESCR0 0x000003a0 948 + #define MSR_P4_BSU_ESCR1 0x000003a1 949 + #define MSR_P4_CRU_ESCR0 0x000003b8 950 + #define MSR_P4_CRU_ESCR1 0x000003b9 951 + #define MSR_P4_CRU_ESCR2 0x000003cc 952 + #define MSR_P4_CRU_ESCR3 0x000003cd 953 + #define MSR_P4_CRU_ESCR4 0x000003e0 954 + #define MSR_P4_CRU_ESCR5 0x000003e1 955 + #define MSR_P4_DAC_ESCR0 0x000003a8 956 + #define MSR_P4_DAC_ESCR1 0x000003a9 957 + #define MSR_P4_FIRM_ESCR0 0x000003a4 958 + #define MSR_P4_FIRM_ESCR1 0x000003a5 959 + #define MSR_P4_FLAME_ESCR0 0x000003a6 960 + #define MSR_P4_FLAME_ESCR1 0x000003a7 961 + #define MSR_P4_FSB_ESCR0 0x000003a2 962 + #define MSR_P4_FSB_ESCR1 0x000003a3 963 + #define MSR_P4_IQ_ESCR0 0x000003ba 964 + #define MSR_P4_IQ_ESCR1 0x000003bb 965 + #define MSR_P4_IS_ESCR0 0x000003b4 966 + #define MSR_P4_IS_ESCR1 0x000003b5 967 + #define MSR_P4_ITLB_ESCR0 0x000003b6 968 + #define MSR_P4_ITLB_ESCR1 0x000003b7 969 + #define MSR_P4_IX_ESCR0 0x000003c8 970 + #define MSR_P4_IX_ESCR1 0x000003c9 971 + #define MSR_P4_MOB_ESCR0 0x000003aa 972 + #define MSR_P4_MOB_ESCR1 0x000003ab 973 + #define MSR_P4_MS_ESCR0 0x000003c0 974 + #define MSR_P4_MS_ESCR1 0x000003c1 975 + #define MSR_P4_PMH_ESCR0 0x000003ac 976 + #define MSR_P4_PMH_ESCR1 0x000003ad 977 + #define MSR_P4_RAT_ESCR0 0x000003bc 978 + #define MSR_P4_RAT_ESCR1 0x000003bd 979 + #define MSR_P4_SAAT_ESCR0 0x000003ae 980 + #define MSR_P4_SAAT_ESCR1 0x000003af 981 + #define MSR_P4_SSU_ESCR0 0x000003be 982 + #define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */ 983 + 984 + #define MSR_P4_TBPU_ESCR0 0x000003c2 985 + #define MSR_P4_TBPU_ESCR1 0x000003c3 986 + #define MSR_P4_TC_ESCR0 0x000003c4 987 + #define MSR_P4_TC_ESCR1 0x000003c5 988 + #define MSR_P4_U2L_ESCR0 0x000003b0 989 + #define MSR_P4_U2L_ESCR1 0x000003b1 990 + 991 + #define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 992 + 993 + /* Intel Core-based CPU performance counters */ 994 + #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 995 + #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a 996 + #define MSR_CORE_PERF_FIXED_CTR2 0x0000030b 997 + #define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d 998 + #define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e 999 + #define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f 1000 + #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 1001 + 1002 + /* Geode defined MSRs */ 1003 + #define MSR_GEODE_BUSCONT_CONF0 0x00001900 1004 + 1005 + /* Intel VT MSRs */ 1006 + #define MSR_IA32_VMX_BASIC 0x00000480 1007 + #define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 1008 + #define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 1009 + #define MSR_IA32_VMX_EXIT_CTLS 0x00000483 1010 + #define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 1011 + #define MSR_IA32_VMX_MISC 0x00000485 1012 + #define MSR_IA32_VMX_CR0_FIXED0 0x00000486 1013 + #define MSR_IA32_VMX_CR0_FIXED1 0x00000487 1014 + #define MSR_IA32_VMX_CR4_FIXED0 0x00000488 1015 + #define MSR_IA32_VMX_CR4_FIXED1 0x00000489 1016 + #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a 1017 + #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b 1018 + #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c 1019 + #define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d 1020 + #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e 1021 + #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f 1022 + #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 1023 + #define MSR_IA32_VMX_VMFUNC 0x00000491 1024 + 1025 + /* VMX_BASIC bits and bitmasks */ 1026 + #define VMX_BASIC_VMCS_SIZE_SHIFT 32 1027 + #define VMX_BASIC_TRUE_CTLS (1ULL << 55) 1028 + #define VMX_BASIC_64 0x0001000000000000LLU 1029 + #define VMX_BASIC_MEM_TYPE_SHIFT 50 1030 + #define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU 1031 + #define VMX_BASIC_MEM_TYPE_WB 6LLU 1032 + #define VMX_BASIC_INOUT 0x0040000000000000LLU 1033 + 1034 + /* MSR_IA32_VMX_MISC bits */ 1035 + #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) 1036 + #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F 1037 + /* AMD-V MSRs */ 1038 + 1039 + #define MSR_VM_CR 0xc0010114 1040 + #define MSR_VM_IGNNE 0xc0010115 1041 + #define MSR_VM_HSAVE_PA 0xc0010117 1042 + 1043 + #endif /* !SELFTEST_KVM_X86_H */
+87
tools/testing/selftests/kvm/lib/assert.c
··· 1 + /* 2 + * tools/testing/selftests/kvm/lib/assert.c 3 + * 4 + * Copyright (C) 2018, Google LLC. 5 + * 6 + * This work is licensed under the terms of the GNU GPL, version 2. 7 + */ 8 + 9 + #define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/ 10 + 11 + #include "test_util.h" 12 + 13 + #include <execinfo.h> 14 + #include <sys/syscall.h> 15 + 16 + /* Dumps the current stack trace to stderr. */ 17 + static void __attribute__((noinline)) test_dump_stack(void); 18 + static void test_dump_stack(void) 19 + { 20 + /* 21 + * Build and run this command: 22 + * 23 + * addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \ 24 + * grep -v test_dump_stack | cat -n 1>&2 25 + * 26 + * Note that the spacing is different and there's no newline. 27 + */ 28 + size_t i; 29 + size_t n = 20; 30 + void *stack[n]; 31 + const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai"; 32 + const char *pipeline = "|cat -n 1>&2"; 33 + char cmd[strlen(addr2line) + strlen(pipeline) + 34 + /* N bytes per addr * 2 digits per byte + 1 space per addr: */ 35 + n * (((sizeof(void *)) * 2) + 1) + 36 + /* Null terminator: */ 37 + 1]; 38 + char *c; 39 + 40 + n = backtrace(stack, n); 41 + c = &cmd[0]; 42 + c += sprintf(c, "%s", addr2line); 43 + /* 44 + * Skip the first 3 frames: backtrace, test_dump_stack, and 45 + * test_assert. We hope that backtrace isn't inlined and the other two 46 + * we've declared noinline. 47 + */ 48 + for (i = 2; i < n; i++) 49 + c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1); 50 + c += sprintf(c, "%s", pipeline); 51 + #pragma GCC diagnostic push 52 + #pragma GCC diagnostic ignored "-Wunused-result" 53 + system(cmd); 54 + #pragma GCC diagnostic pop 55 + } 56 + 57 + static pid_t gettid(void) 58 + { 59 + return syscall(SYS_gettid); 60 + } 61 + 62 + void __attribute__((noinline)) 63 + test_assert(bool exp, const char *exp_str, 64 + const char *file, unsigned int line, const char *fmt, ...) 65 + { 66 + va_list ap; 67 + 68 + if (!(exp)) { 69 + va_start(ap, fmt); 70 + 71 + fprintf(stderr, "==== Test Assertion Failure ====\n" 72 + " %s:%u: %s\n" 73 + " pid=%d tid=%d\n", 74 + file, line, exp_str, getpid(), gettid()); 75 + test_dump_stack(); 76 + if (fmt) { 77 + fputs(" ", stderr); 78 + vfprintf(stderr, fmt, ap); 79 + fputs("\n", stderr); 80 + } 81 + va_end(ap); 82 + 83 + exit(254); 84 + } 85 + 86 + return; 87 + }
+1480
tools/testing/selftests/kvm/lib/kvm_util.c
··· 1 + /* 2 + * tools/testing/selftests/kvm/lib/kvm_util.c 3 + * 4 + * Copyright (C) 2018, Google LLC. 5 + * 6 + * This work is licensed under the terms of the GNU GPL, version 2. 7 + */ 8 + 9 + #include "test_util.h" 10 + #include "kvm_util.h" 11 + #include "kvm_util_internal.h" 12 + 13 + #include <assert.h> 14 + #include <sys/mman.h> 15 + #include <sys/types.h> 16 + #include <sys/stat.h> 17 + 18 + #define KVM_DEV_PATH "/dev/kvm" 19 + 20 + #define KVM_UTIL_PGS_PER_HUGEPG 512 21 + #define KVM_UTIL_MIN_PADDR 0x2000 22 + 23 + /* Aligns x up to the next multiple of size. Size must be a power of 2. */ 24 + static void *align(void *x, size_t size) 25 + { 26 + size_t mask = size - 1; 27 + TEST_ASSERT(size != 0 && !(size & (size - 1)), 28 + "size not a power of 2: %lu", size); 29 + return (void *) (((size_t) x + mask) & ~mask); 30 + } 31 + 32 + /* Capability 33 + * 34 + * Input Args: 35 + * cap - Capability 36 + * 37 + * Output Args: None 38 + * 39 + * Return: 40 + * On success, the Value corresponding to the capability (KVM_CAP_*) 41 + * specified by the value of cap. On failure a TEST_ASSERT failure 42 + * is produced. 43 + * 44 + * Looks up and returns the value corresponding to the capability 45 + * (KVM_CAP_*) given by cap. 46 + */ 47 + int kvm_check_cap(long cap) 48 + { 49 + int ret; 50 + int kvm_fd; 51 + 52 + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); 53 + TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", 54 + KVM_DEV_PATH, kvm_fd, errno); 55 + 56 + ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); 57 + TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" 58 + " rc: %i errno: %i", ret, errno); 59 + 60 + close(kvm_fd); 61 + 62 + return ret; 63 + } 64 + 65 + /* VM Create 66 + * 67 + * Input Args: 68 + * mode - VM Mode (e.g. VM_MODE_FLAT48PG) 69 + * phy_pages - Physical memory pages 70 + * perm - permission 71 + * 72 + * Output Args: None 73 + * 74 + * Return: 75 + * Pointer to opaque structure that describes the created VM. 76 + * 77 + * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG). 78 + * When phy_pages is non-zero, a memory region of phy_pages physical pages 79 + * is created and mapped starting at guest physical address 0. The file 80 + * descriptor to control the created VM is created with the permissions 81 + * given by perm (e.g. O_RDWR). 82 + */ 83 + struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) 84 + { 85 + struct kvm_vm *vm; 86 + int kvm_fd; 87 + 88 + /* Allocate memory. */ 89 + vm = calloc(1, sizeof(*vm)); 90 + TEST_ASSERT(vm != NULL, "Insufficent Memory"); 91 + 92 + vm->mode = mode; 93 + kvm_fd = open(KVM_DEV_PATH, perm); 94 + TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", 95 + KVM_DEV_PATH, kvm_fd, errno); 96 + 97 + /* Create VM. */ 98 + vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL); 99 + TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " 100 + "rc: %i errno: %i", vm->fd, errno); 101 + 102 + close(kvm_fd); 103 + 104 + /* Setup mode specific traits. */ 105 + switch (vm->mode) { 106 + case VM_MODE_FLAT48PG: 107 + vm->page_size = 0x1000; 108 + vm->page_shift = 12; 109 + 110 + /* Limit to 48-bit canonical virtual addresses. */ 111 + vm->vpages_valid = sparsebit_alloc(); 112 + sparsebit_set_num(vm->vpages_valid, 113 + 0, (1ULL << (48 - 1)) >> vm->page_shift); 114 + sparsebit_set_num(vm->vpages_valid, 115 + (~((1ULL << (48 - 1)) - 1)) >> vm->page_shift, 116 + (1ULL << (48 - 1)) >> vm->page_shift); 117 + 118 + /* Limit physical addresses to 52-bits. */ 119 + vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1; 120 + break; 121 + 122 + default: 123 + TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); 124 + } 125 + 126 + /* Allocate and setup memory for guest. */ 127 + vm->vpages_mapped = sparsebit_alloc(); 128 + if (phy_pages != 0) 129 + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 130 + 0, 0, phy_pages, 0); 131 + 132 + return vm; 133 + } 134 + 135 + /* Userspace Memory Region Find 136 + * 137 + * Input Args: 138 + * vm - Virtual Machine 139 + * start - Starting VM physical address 140 + * end - Ending VM physical address, inclusive. 141 + * 142 + * Output Args: None 143 + * 144 + * Return: 145 + * Pointer to overlapping region, NULL if no such region. 146 + * 147 + * Searches for a region with any physical memory that overlaps with 148 + * any portion of the guest physical addresses from start to end 149 + * inclusive. If multiple overlapping regions exist, a pointer to any 150 + * of the regions is returned. Null is returned only when no overlapping 151 + * region exists. 152 + */ 153 + static struct userspace_mem_region *userspace_mem_region_find( 154 + struct kvm_vm *vm, uint64_t start, uint64_t end) 155 + { 156 + struct userspace_mem_region *region; 157 + 158 + for (region = vm->userspace_mem_region_head; region; 159 + region = region->next) { 160 + uint64_t existing_start = region->region.guest_phys_addr; 161 + uint64_t existing_end = region->region.guest_phys_addr 162 + + region->region.memory_size - 1; 163 + if (start <= existing_end && end >= existing_start) 164 + return region; 165 + } 166 + 167 + return NULL; 168 + } 169 + 170 + /* KVM Userspace Memory Region Find 171 + * 172 + * Input Args: 173 + * vm - Virtual Machine 174 + * start - Starting VM physical address 175 + * end - Ending VM physical address, inclusive. 176 + * 177 + * Output Args: None 178 + * 179 + * Return: 180 + * Pointer to overlapping region, NULL if no such region. 181 + * 182 + * Public interface to userspace_mem_region_find. Allows tests to look up 183 + * the memslot datastructure for a given range of guest physical memory. 184 + */ 185 + struct kvm_userspace_memory_region * 186 + kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 187 + uint64_t end) 188 + { 189 + struct userspace_mem_region *region; 190 + 191 + region = userspace_mem_region_find(vm, start, end); 192 + if (!region) 193 + return NULL; 194 + 195 + return &region->region; 196 + } 197 + 198 + /* VCPU Find 199 + * 200 + * Input Args: 201 + * vm - Virtual Machine 202 + * vcpuid - VCPU ID 203 + * 204 + * Output Args: None 205 + * 206 + * Return: 207 + * Pointer to VCPU structure 208 + * 209 + * Locates a vcpu structure that describes the VCPU specified by vcpuid and 210 + * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU 211 + * for the specified vcpuid. 212 + */ 213 + struct vcpu *vcpu_find(struct kvm_vm *vm, 214 + uint32_t vcpuid) 215 + { 216 + struct vcpu *vcpup; 217 + 218 + for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) { 219 + if (vcpup->id == vcpuid) 220 + return vcpup; 221 + } 222 + 223 + return NULL; 224 + } 225 + 226 + /* VM VCPU Remove 227 + * 228 + * Input Args: 229 + * vm - Virtual Machine 230 + * vcpuid - VCPU ID 231 + * 232 + * Output Args: None 233 + * 234 + * Return: None, TEST_ASSERT failures for all error conditions 235 + * 236 + * Within the VM specified by vm, removes the VCPU given by vcpuid. 237 + */ 238 + static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid) 239 + { 240 + struct vcpu *vcpu = vcpu_find(vm, vcpuid); 241 + 242 + int ret = close(vcpu->fd); 243 + TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i " 244 + "errno: %i", ret, errno); 245 + 246 + if (vcpu->next) 247 + vcpu->next->prev = vcpu->prev; 248 + if (vcpu->prev) 249 + vcpu->prev->next = vcpu->next; 250 + else 251 + vm->vcpu_head = vcpu->next; 252 + free(vcpu); 253 + } 254 + 255 + 256 + /* Destroys and frees the VM pointed to by vmp. 257 + */ 258 + void kvm_vm_free(struct kvm_vm *vmp) 259 + { 260 + int ret; 261 + 262 + if (vmp == NULL) 263 + return; 264 + 265 + /* Free userspace_mem_regions. */ 266 + while (vmp->userspace_mem_region_head) { 267 + struct userspace_mem_region *region 268 + = vmp->userspace_mem_region_head; 269 + 270 + region->region.memory_size = 0; 271 + ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, 272 + &region->region); 273 + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, " 274 + "rc: %i errno: %i", ret, errno); 275 + 276 + vmp->userspace_mem_region_head = region->next; 277 + sparsebit_free(&region->unused_phy_pages); 278 + ret = munmap(region->mmap_start, region->mmap_size); 279 + TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", 280 + ret, errno); 281 + 282 + free(region); 283 + } 284 + 285 + /* Free VCPUs. */ 286 + while (vmp->vcpu_head) 287 + vm_vcpu_rm(vmp, vmp->vcpu_head->id); 288 + 289 + /* Free sparsebit arrays. */ 290 + sparsebit_free(&vmp->vpages_valid); 291 + sparsebit_free(&vmp->vpages_mapped); 292 + 293 + /* Close file descriptor for the VM. */ 294 + ret = close(vmp->fd); 295 + TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" 296 + " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno); 297 + 298 + /* Free the structure describing the VM. */ 299 + free(vmp); 300 + } 301 + 302 + /* Memory Compare, host virtual to guest virtual 303 + * 304 + * Input Args: 305 + * hva - Starting host virtual address 306 + * vm - Virtual Machine 307 + * gva - Starting guest virtual address 308 + * len - number of bytes to compare 309 + * 310 + * Output Args: None 311 + * 312 + * Input/Output Args: None 313 + * 314 + * Return: 315 + * Returns 0 if the bytes starting at hva for a length of len 316 + * are equal the guest virtual bytes starting at gva. Returns 317 + * a value < 0, if bytes at hva are less than those at gva. 318 + * Otherwise a value > 0 is returned. 319 + * 320 + * Compares the bytes starting at the host virtual address hva, for 321 + * a length of len, to the guest bytes starting at the guest virtual 322 + * address given by gva. 323 + */ 324 + int kvm_memcmp_hva_gva(void *hva, 325 + struct kvm_vm *vm, vm_vaddr_t gva, size_t len) 326 + { 327 + size_t amt; 328 + 329 + /* Compare a batch of bytes until either a match is found 330 + * or all the bytes have been compared. 331 + */ 332 + for (uintptr_t offset = 0; offset < len; offset += amt) { 333 + uintptr_t ptr1 = (uintptr_t)hva + offset; 334 + 335 + /* Determine host address for guest virtual address 336 + * at offset. 337 + */ 338 + uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); 339 + 340 + /* Determine amount to compare on this pass. 341 + * Don't allow the comparsion to cross a page boundary. 342 + */ 343 + amt = len - offset; 344 + if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) 345 + amt = vm->page_size - (ptr1 % vm->page_size); 346 + if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) 347 + amt = vm->page_size - (ptr2 % vm->page_size); 348 + 349 + assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); 350 + assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); 351 + 352 + /* Perform the comparison. If there is a difference 353 + * return that result to the caller, otherwise need 354 + * to continue on looking for a mismatch. 355 + */ 356 + int ret = memcmp((void *)ptr1, (void *)ptr2, amt); 357 + if (ret != 0) 358 + return ret; 359 + } 360 + 361 + /* No mismatch found. Let the caller know the two memory 362 + * areas are equal. 363 + */ 364 + return 0; 365 + } 366 + 367 + /* Allocate an instance of struct kvm_cpuid2 368 + * 369 + * Input Args: None 370 + * 371 + * Output Args: None 372 + * 373 + * Return: A pointer to the allocated struct. The caller is responsible 374 + * for freeing this struct. 375 + * 376 + * Since kvm_cpuid2 uses a 0-length array to allow a the size of the 377 + * array to be decided at allocation time, allocation is slightly 378 + * complicated. This function uses a reasonable default length for 379 + * the array and performs the appropriate allocation. 380 + */ 381 + struct kvm_cpuid2 *allocate_kvm_cpuid2(void) 382 + { 383 + struct kvm_cpuid2 *cpuid; 384 + int nent = 100; 385 + size_t size; 386 + 387 + size = sizeof(*cpuid); 388 + size += nent * sizeof(struct kvm_cpuid_entry2); 389 + cpuid = malloc(size); 390 + if (!cpuid) { 391 + perror("malloc"); 392 + abort(); 393 + } 394 + 395 + cpuid->nent = nent; 396 + 397 + return cpuid; 398 + } 399 + 400 + /* KVM Supported CPUID Get 401 + * 402 + * Input Args: None 403 + * 404 + * Output Args: 405 + * cpuid - The supported KVM CPUID 406 + * 407 + * Return: void 408 + * 409 + * Get the guest CPUID supported by KVM. 410 + */ 411 + void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) 412 + { 413 + int ret; 414 + int kvm_fd; 415 + 416 + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); 417 + TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", 418 + KVM_DEV_PATH, kvm_fd, errno); 419 + 420 + ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); 421 + TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", 422 + ret, errno); 423 + 424 + close(kvm_fd); 425 + } 426 + 427 + /* Locate a cpuid entry. 428 + * 429 + * Input Args: 430 + * cpuid: The cpuid. 431 + * function: The function of the cpuid entry to find. 432 + * 433 + * Output Args: None 434 + * 435 + * Return: A pointer to the cpuid entry. Never returns NULL. 436 + */ 437 + struct kvm_cpuid_entry2 * 438 + find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, 439 + uint32_t index) 440 + { 441 + struct kvm_cpuid_entry2 *entry = NULL; 442 + int i; 443 + 444 + for (i = 0; i < cpuid->nent; i++) { 445 + if (cpuid->entries[i].function == function && 446 + cpuid->entries[i].index == index) { 447 + entry = &cpuid->entries[i]; 448 + break; 449 + } 450 + } 451 + 452 + TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", 453 + function, index); 454 + return entry; 455 + } 456 + 457 + /* VM Userspace Memory Region Add 458 + * 459 + * Input Args: 460 + * vm - Virtual Machine 461 + * backing_src - Storage source for this region. 462 + * NULL to use anonymous memory. 463 + * guest_paddr - Starting guest physical address 464 + * slot - KVM region slot 465 + * npages - Number of physical pages 466 + * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) 467 + * 468 + * Output Args: None 469 + * 470 + * Return: None 471 + * 472 + * Allocates a memory area of the number of pages specified by npages 473 + * and maps it to the VM specified by vm, at a starting physical address 474 + * given by guest_paddr. The region is created with a KVM region slot 475 + * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The 476 + * region is created with the flags given by flags. 477 + */ 478 + void vm_userspace_mem_region_add(struct kvm_vm *vm, 479 + enum vm_mem_backing_src_type src_type, 480 + uint64_t guest_paddr, uint32_t slot, uint64_t npages, 481 + uint32_t flags) 482 + { 483 + int ret; 484 + unsigned long pmem_size = 0; 485 + struct userspace_mem_region *region; 486 + size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size; 487 + 488 + TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 489 + "address not on a page boundary.\n" 490 + " guest_paddr: 0x%lx vm->page_size: 0x%x", 491 + guest_paddr, vm->page_size); 492 + TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) 493 + <= vm->max_gfn, "Physical range beyond maximum " 494 + "supported physical address,\n" 495 + " guest_paddr: 0x%lx npages: 0x%lx\n" 496 + " vm->max_gfn: 0x%lx vm->page_size: 0x%x", 497 + guest_paddr, npages, vm->max_gfn, vm->page_size); 498 + 499 + /* Confirm a mem region with an overlapping address doesn't 500 + * already exist. 501 + */ 502 + region = (struct userspace_mem_region *) userspace_mem_region_find( 503 + vm, guest_paddr, guest_paddr + npages * vm->page_size); 504 + if (region != NULL) 505 + TEST_ASSERT(false, "overlapping userspace_mem_region already " 506 + "exists\n" 507 + " requested guest_paddr: 0x%lx npages: 0x%lx " 508 + "page_size: 0x%x\n" 509 + " existing guest_paddr: 0x%lx size: 0x%lx", 510 + guest_paddr, npages, vm->page_size, 511 + (uint64_t) region->region.guest_phys_addr, 512 + (uint64_t) region->region.memory_size); 513 + 514 + /* Confirm no region with the requested slot already exists. */ 515 + for (region = vm->userspace_mem_region_head; region; 516 + region = region->next) { 517 + if (region->region.slot == slot) 518 + break; 519 + if ((guest_paddr <= (region->region.guest_phys_addr 520 + + region->region.memory_size)) 521 + && ((guest_paddr + npages * vm->page_size) 522 + >= region->region.guest_phys_addr)) 523 + break; 524 + } 525 + if (region != NULL) 526 + TEST_ASSERT(false, "A mem region with the requested slot " 527 + "or overlapping physical memory range already exists.\n" 528 + " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 529 + " existing slot: %u paddr: 0x%lx size: 0x%lx", 530 + slot, guest_paddr, npages, 531 + region->region.slot, 532 + (uint64_t) region->region.guest_phys_addr, 533 + (uint64_t) region->region.memory_size); 534 + 535 + /* Allocate and initialize new mem region structure. */ 536 + region = calloc(1, sizeof(*region)); 537 + TEST_ASSERT(region != NULL, "Insufficient Memory"); 538 + region->mmap_size = npages * vm->page_size; 539 + 540 + /* Enough memory to align up to a huge page. */ 541 + if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 542 + region->mmap_size += huge_page_size; 543 + region->mmap_start = mmap(NULL, region->mmap_size, 544 + PROT_READ | PROT_WRITE, 545 + MAP_PRIVATE | MAP_ANONYMOUS 546 + | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0), 547 + -1, 0); 548 + TEST_ASSERT(region->mmap_start != MAP_FAILED, 549 + "test_malloc failed, mmap_start: %p errno: %i", 550 + region->mmap_start, errno); 551 + 552 + /* Align THP allocation up to start of a huge page. */ 553 + region->host_mem = align(region->mmap_start, 554 + src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1); 555 + 556 + /* As needed perform madvise */ 557 + if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { 558 + ret = madvise(region->host_mem, npages * vm->page_size, 559 + src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 560 + TEST_ASSERT(ret == 0, "madvise failed,\n" 561 + " addr: %p\n" 562 + " length: 0x%lx\n" 563 + " src_type: %x", 564 + region->host_mem, npages * vm->page_size, src_type); 565 + } 566 + 567 + region->unused_phy_pages = sparsebit_alloc(); 568 + sparsebit_set_num(region->unused_phy_pages, 569 + guest_paddr >> vm->page_shift, npages); 570 + region->region.slot = slot; 571 + region->region.flags = flags; 572 + region->region.guest_phys_addr = guest_paddr; 573 + region->region.memory_size = npages * vm->page_size; 574 + region->region.userspace_addr = (uintptr_t) region->host_mem; 575 + ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region); 576 + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 577 + " rc: %i errno: %i\n" 578 + " slot: %u flags: 0x%x\n" 579 + " guest_phys_addr: 0x%lx size: 0x%lx", 580 + ret, errno, slot, flags, 581 + guest_paddr, (uint64_t) region->region.memory_size); 582 + 583 + /* Add to linked-list of memory regions. */ 584 + if (vm->userspace_mem_region_head) 585 + vm->userspace_mem_region_head->prev = region; 586 + region->next = vm->userspace_mem_region_head; 587 + vm->userspace_mem_region_head = region; 588 + } 589 + 590 + /* Memslot to region 591 + * 592 + * Input Args: 593 + * vm - Virtual Machine 594 + * memslot - KVM memory slot ID 595 + * 596 + * Output Args: None 597 + * 598 + * Return: 599 + * Pointer to memory region structure that describe memory region 600 + * using kvm memory slot ID given by memslot. TEST_ASSERT failure 601 + * on error (e.g. currently no memory region using memslot as a KVM 602 + * memory slot ID). 603 + */ 604 + static struct userspace_mem_region *memslot2region(struct kvm_vm *vm, 605 + uint32_t memslot) 606 + { 607 + struct userspace_mem_region *region; 608 + 609 + for (region = vm->userspace_mem_region_head; region; 610 + region = region->next) { 611 + if (region->region.slot == memslot) 612 + break; 613 + } 614 + if (region == NULL) { 615 + fprintf(stderr, "No mem region with the requested slot found,\n" 616 + " requested slot: %u\n", memslot); 617 + fputs("---- vm dump ----\n", stderr); 618 + vm_dump(stderr, vm, 2); 619 + TEST_ASSERT(false, "Mem region not found"); 620 + } 621 + 622 + return region; 623 + } 624 + 625 + /* VM Memory Region Flags Set 626 + * 627 + * Input Args: 628 + * vm - Virtual Machine 629 + * flags - Starting guest physical address 630 + * 631 + * Output Args: None 632 + * 633 + * Return: None 634 + * 635 + * Sets the flags of the memory region specified by the value of slot, 636 + * to the values given by flags. 637 + */ 638 + void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) 639 + { 640 + int ret; 641 + struct userspace_mem_region *region; 642 + 643 + /* Locate memory region. */ 644 + region = memslot2region(vm, slot); 645 + 646 + region->region.flags = flags; 647 + 648 + ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region); 649 + 650 + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 651 + " rc: %i errno: %i slot: %u flags: 0x%x", 652 + ret, errno, slot, flags); 653 + } 654 + 655 + /* VCPU mmap Size 656 + * 657 + * Input Args: None 658 + * 659 + * Output Args: None 660 + * 661 + * Return: 662 + * Size of VCPU state 663 + * 664 + * Returns the size of the structure pointed to by the return value 665 + * of vcpu_state(). 666 + */ 667 + static int vcpu_mmap_sz(void) 668 + { 669 + int dev_fd, ret; 670 + 671 + dev_fd = open(KVM_DEV_PATH, O_RDONLY); 672 + TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i", 673 + __func__, KVM_DEV_PATH, dev_fd, errno); 674 + 675 + ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 676 + TEST_ASSERT(ret >= sizeof(struct kvm_run), 677 + "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i", 678 + __func__, ret, errno); 679 + 680 + close(dev_fd); 681 + 682 + return ret; 683 + } 684 + 685 + /* VM VCPU Add 686 + * 687 + * Input Args: 688 + * vm - Virtual Machine 689 + * vcpuid - VCPU ID 690 + * 691 + * Output Args: None 692 + * 693 + * Return: None 694 + * 695 + * Creates and adds to the VM specified by vm and virtual CPU with 696 + * the ID given by vcpuid. 697 + */ 698 + void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid) 699 + { 700 + struct vcpu *vcpu; 701 + 702 + /* Confirm a vcpu with the specified id doesn't already exist. */ 703 + vcpu = vcpu_find(vm, vcpuid); 704 + if (vcpu != NULL) 705 + TEST_ASSERT(false, "vcpu with the specified id " 706 + "already exists,\n" 707 + " requested vcpuid: %u\n" 708 + " existing vcpuid: %u state: %p", 709 + vcpuid, vcpu->id, vcpu->state); 710 + 711 + /* Allocate and initialize new vcpu structure. */ 712 + vcpu = calloc(1, sizeof(*vcpu)); 713 + TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); 714 + vcpu->id = vcpuid; 715 + vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid); 716 + TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i", 717 + vcpu->fd, errno); 718 + 719 + TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size " 720 + "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 721 + vcpu_mmap_sz(), sizeof(*vcpu->state)); 722 + vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state), 723 + PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 724 + TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, " 725 + "vcpu id: %u errno: %i", vcpuid, errno); 726 + 727 + /* Add to linked-list of VCPUs. */ 728 + if (vm->vcpu_head) 729 + vm->vcpu_head->prev = vcpu; 730 + vcpu->next = vm->vcpu_head; 731 + vm->vcpu_head = vcpu; 732 + 733 + vcpu_setup(vm, vcpuid); 734 + } 735 + 736 + /* VM Virtual Address Unused Gap 737 + * 738 + * Input Args: 739 + * vm - Virtual Machine 740 + * sz - Size (bytes) 741 + * vaddr_min - Minimum Virtual Address 742 + * 743 + * Output Args: None 744 + * 745 + * Return: 746 + * Lowest virtual address at or below vaddr_min, with at least 747 + * sz unused bytes. TEST_ASSERT failure if no area of at least 748 + * size sz is available. 749 + * 750 + * Within the VM specified by vm, locates the lowest starting virtual 751 + * address >= vaddr_min, that has at least sz unallocated bytes. A 752 + * TEST_ASSERT failure occurs for invalid input or no area of at least 753 + * sz unallocated bytes >= vaddr_min is available. 754 + */ 755 + static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, 756 + vm_vaddr_t vaddr_min) 757 + { 758 + uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; 759 + 760 + /* Determine lowest permitted virtual page index. */ 761 + uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; 762 + if ((pgidx_start * vm->page_size) < vaddr_min) 763 + goto no_va_found; 764 + 765 + /* Loop over section with enough valid virtual page indexes. */ 766 + if (!sparsebit_is_set_num(vm->vpages_valid, 767 + pgidx_start, pages)) 768 + pgidx_start = sparsebit_next_set_num(vm->vpages_valid, 769 + pgidx_start, pages); 770 + do { 771 + /* 772 + * Are there enough unused virtual pages available at 773 + * the currently proposed starting virtual page index. 774 + * If not, adjust proposed starting index to next 775 + * possible. 776 + */ 777 + if (sparsebit_is_clear_num(vm->vpages_mapped, 778 + pgidx_start, pages)) 779 + goto va_found; 780 + pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, 781 + pgidx_start, pages); 782 + if (pgidx_start == 0) 783 + goto no_va_found; 784 + 785 + /* 786 + * If needed, adjust proposed starting virtual address, 787 + * to next range of valid virtual addresses. 788 + */ 789 + if (!sparsebit_is_set_num(vm->vpages_valid, 790 + pgidx_start, pages)) { 791 + pgidx_start = sparsebit_next_set_num( 792 + vm->vpages_valid, pgidx_start, pages); 793 + if (pgidx_start == 0) 794 + goto no_va_found; 795 + } 796 + } while (pgidx_start != 0); 797 + 798 + no_va_found: 799 + TEST_ASSERT(false, "No vaddr of specified pages available, " 800 + "pages: 0x%lx", pages); 801 + 802 + /* NOT REACHED */ 803 + return -1; 804 + 805 + va_found: 806 + TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, 807 + pgidx_start, pages), 808 + "Unexpected, invalid virtual page index range,\n" 809 + " pgidx_start: 0x%lx\n" 810 + " pages: 0x%lx", 811 + pgidx_start, pages); 812 + TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, 813 + pgidx_start, pages), 814 + "Unexpected, pages already mapped,\n" 815 + " pgidx_start: 0x%lx\n" 816 + " pages: 0x%lx", 817 + pgidx_start, pages); 818 + 819 + return pgidx_start * vm->page_size; 820 + } 821 + 822 + /* VM Virtual Address Allocate 823 + * 824 + * Input Args: 825 + * vm - Virtual Machine 826 + * sz - Size in bytes 827 + * vaddr_min - Minimum starting virtual address 828 + * data_memslot - Memory region slot for data pages 829 + * pgd_memslot - Memory region slot for new virtual translation tables 830 + * 831 + * Output Args: None 832 + * 833 + * Return: 834 + * Starting guest virtual address 835 + * 836 + * Allocates at least sz bytes within the virtual address space of the vm 837 + * given by vm. The allocated bytes are mapped to a virtual address >= 838 + * the address given by vaddr_min. Note that each allocation uses a 839 + * a unique set of pages, with the minimum real allocation being at least 840 + * a page. 841 + */ 842 + vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, 843 + uint32_t data_memslot, uint32_t pgd_memslot) 844 + { 845 + uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); 846 + 847 + virt_pgd_alloc(vm, pgd_memslot); 848 + 849 + /* Find an unused range of virtual page addresses of at least 850 + * pages in length. 851 + */ 852 + vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); 853 + 854 + /* Map the virtual pages. */ 855 + for (vm_vaddr_t vaddr = vaddr_start; pages > 0; 856 + pages--, vaddr += vm->page_size) { 857 + vm_paddr_t paddr; 858 + 859 + paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot); 860 + 861 + virt_pg_map(vm, vaddr, paddr, pgd_memslot); 862 + 863 + sparsebit_set(vm->vpages_mapped, 864 + vaddr >> vm->page_shift); 865 + } 866 + 867 + return vaddr_start; 868 + } 869 + 870 + /* Address VM Physical to Host Virtual 871 + * 872 + * Input Args: 873 + * vm - Virtual Machine 874 + * gpa - VM physical address 875 + * 876 + * Output Args: None 877 + * 878 + * Return: 879 + * Equivalent host virtual address 880 + * 881 + * Locates the memory region containing the VM physical address given 882 + * by gpa, within the VM given by vm. When found, the host virtual 883 + * address providing the memory to the vm physical address is returned. 884 + * A TEST_ASSERT failure occurs if no region containing gpa exists. 885 + */ 886 + void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) 887 + { 888 + struct userspace_mem_region *region; 889 + for (region = vm->userspace_mem_region_head; region; 890 + region = region->next) { 891 + if ((gpa >= region->region.guest_phys_addr) 892 + && (gpa <= (region->region.guest_phys_addr 893 + + region->region.memory_size - 1))) 894 + return (void *) ((uintptr_t) region->host_mem 895 + + (gpa - region->region.guest_phys_addr)); 896 + } 897 + 898 + TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa); 899 + return NULL; 900 + } 901 + 902 + /* Address Host Virtual to VM Physical 903 + * 904 + * Input Args: 905 + * vm - Virtual Machine 906 + * hva - Host virtual address 907 + * 908 + * Output Args: None 909 + * 910 + * Return: 911 + * Equivalent VM physical address 912 + * 913 + * Locates the memory region containing the host virtual address given 914 + * by hva, within the VM given by vm. When found, the equivalent 915 + * VM physical address is returned. A TEST_ASSERT failure occurs if no 916 + * region containing hva exists. 917 + */ 918 + vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) 919 + { 920 + struct userspace_mem_region *region; 921 + for (region = vm->userspace_mem_region_head; region; 922 + region = region->next) { 923 + if ((hva >= region->host_mem) 924 + && (hva <= (region->host_mem 925 + + region->region.memory_size - 1))) 926 + return (vm_paddr_t) ((uintptr_t) 927 + region->region.guest_phys_addr 928 + + (hva - (uintptr_t) region->host_mem)); 929 + } 930 + 931 + TEST_ASSERT(false, "No mapping to a guest physical address, " 932 + "hva: %p", hva); 933 + return -1; 934 + } 935 + 936 + /* VM Create IRQ Chip 937 + * 938 + * Input Args: 939 + * vm - Virtual Machine 940 + * 941 + * Output Args: None 942 + * 943 + * Return: None 944 + * 945 + * Creates an interrupt controller chip for the VM specified by vm. 946 + */ 947 + void vm_create_irqchip(struct kvm_vm *vm) 948 + { 949 + int ret; 950 + 951 + ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0); 952 + TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, " 953 + "rc: %i errno: %i", ret, errno); 954 + } 955 + 956 + /* VM VCPU State 957 + * 958 + * Input Args: 959 + * vm - Virtual Machine 960 + * vcpuid - VCPU ID 961 + * 962 + * Output Args: None 963 + * 964 + * Return: 965 + * Pointer to structure that describes the state of the VCPU. 966 + * 967 + * Locates and returns a pointer to a structure that describes the 968 + * state of the VCPU with the given vcpuid. 969 + */ 970 + struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid) 971 + { 972 + struct vcpu *vcpu = vcpu_find(vm, vcpuid); 973 + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 974 + 975 + return vcpu->state; 976 + } 977 + 978 + /* VM VCPU Run 979 + * 980 + * Input Args: 981 + * vm - Virtual Machine 982 + * vcpuid - VCPU ID 983 + * 984 + * Output Args: None 985 + * 986 + * Return: None 987 + * 988 + * Switch to executing the code for the VCPU given by vcpuid, within the VM 989 + * given by vm. 990 + */ 991 + void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) 992 + { 993 + int ret = _vcpu_run(vm, vcpuid); 994 + TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " 995 + "rc: %i errno: %i", ret, errno); 996 + } 997 + 998 + int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) 999 + { 1000 + struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1001 + int rc; 1002 + 1003 + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1004 + do { 1005 + rc = ioctl(vcpu->fd, KVM_RUN, NULL); 1006 + } while (rc == -1 && errno == EINTR); 1007 + return rc; 1008 + } 1009 + 1010 + /* VM VCPU Set MP State 1011 + * 1012 + * Input Args: 1013 + * vm - Virtual Machine 1014 + * vcpuid - VCPU ID 1015 + * mp_state - mp_state to be set 1016 + * 1017 + * Output Args: None 1018 + * 1019 + * Return: None 1020 + * 1021 + * Sets the MP state of the VCPU given by vcpuid, to the state given 1022 + * by mp_state. 1023 + */ 1024 + void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, 1025 + struct kvm_mp_state *mp_state) 1026 + { 1027 + struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1028 + int ret; 1029 + 1030 + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1031 + 1032 + ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state); 1033 + TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, " 1034 + "rc: %i errno: %i", ret, errno); 1035 + } 1036 + 1037 + /* VM VCPU Regs Get 1038 + * 1039 + * Input Args: 1040 + * vm - Virtual Machine 1041 + * vcpuid - VCPU ID 1042 + * 1043 + * Output Args: 1044 + * regs - current state of VCPU regs 1045 + * 1046 + * Return: None 1047 + * 1048 + * Obtains the current register state for the VCPU specified by vcpuid 1049 + * and stores it at the location given by regs. 1050 + */ 1051 + void vcpu_regs_get(struct kvm_vm *vm, 1052 + uint32_t vcpuid, struct kvm_regs *regs) 1053 + { 1054 + struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1055 + int ret; 1056 + 1057 + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1058 + 1059 + /* Get the regs. */ 1060 + ret = ioctl(vcpu->fd, KVM_GET_REGS, regs); 1061 + TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i", 1062 + ret, errno); 1063 + } 1064 + 1065 + /* VM VCPU Regs Set 1066 + * 1067 + * Input Args: 1068 + * vm - Virtual Machine 1069 + * vcpuid - VCPU ID 1070 + * regs - Values to set VCPU regs to 1071 + * 1072 + * Output Args: None 1073 + * 1074 + * Return: None 1075 + * 1076 + * Sets the regs of the VCPU specified by vcpuid to the values 1077 + * given by regs. 1078 + */ 1079 + void vcpu_regs_set(struct kvm_vm *vm, 1080 + uint32_t vcpuid, struct kvm_regs *regs) 1081 + { 1082 + struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1083 + int ret; 1084 + 1085 + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1086 + 1087 + /* Set the regs. */ 1088 + ret = ioctl(vcpu->fd, KVM_SET_REGS, regs); 1089 + TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i", 1090 + ret, errno); 1091 + } 1092 + 1093 + void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, 1094 + struct kvm_vcpu_events *events) 1095 + { 1096 + struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1097 + int ret; 1098 + 1099 + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1100 + 1101 + /* Get the regs. */ 1102 + ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events); 1103 + TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i", 1104 + ret, errno); 1105 + } 1106 + 1107 + void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, 1108 + struct kvm_vcpu_events *events) 1109 + { 1110 + struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1111 + int ret; 1112 + 1113 + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1114 + 1115 + /* Set the regs. */ 1116 + ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events); 1117 + TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i", 1118 + ret, errno); 1119 + } 1120 + 1121 + /* VM VCPU Args Set 1122 + * 1123 + * Input Args: 1124 + * vm - Virtual Machine 1125 + * vcpuid - VCPU ID 1126 + * num - number of arguments 1127 + * ... - arguments, each of type uint64_t 1128 + * 1129 + * Output Args: None 1130 + * 1131 + * Return: None 1132 + * 1133 + * Sets the first num function input arguments to the values 1134 + * given as variable args. Each of the variable args is expected to 1135 + * be of type uint64_t. 1136 + */ 1137 + void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) 1138 + { 1139 + va_list ap; 1140 + struct kvm_regs regs; 1141 + 1142 + TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" 1143 + " num: %u\n", 1144 + num); 1145 + 1146 + va_start(ap, num); 1147 + vcpu_regs_get(vm, vcpuid, &regs); 1148 + 1149 + if (num >= 1) 1150 + regs.rdi = va_arg(ap, uint64_t); 1151 + 1152 + if (num >= 2) 1153 + regs.rsi = va_arg(ap, uint64_t); 1154 + 1155 + if (num >= 3) 1156 + regs.rdx = va_arg(ap, uint64_t); 1157 + 1158 + if (num >= 4) 1159 + regs.rcx = va_arg(ap, uint64_t); 1160 + 1161 + if (num >= 5) 1162 + regs.r8 = va_arg(ap, uint64_t); 1163 + 1164 + if (num >= 6) 1165 + regs.r9 = va_arg(ap, uint64_t); 1166 + 1167 + vcpu_regs_set(vm, vcpuid, &regs); 1168 + va_end(ap); 1169 + } 1170 + 1171 + /* VM VCPU System Regs Get 1172 + * 1173 + * Input Args: 1174 + * vm - Virtual Machine 1175 + * vcpuid - VCPU ID 1176 + * 1177 + * Output Args: 1178 + * sregs - current state of VCPU system regs 1179 + * 1180 + * Return: None 1181 + * 1182 + * Obtains the current system register state for the VCPU specified by 1183 + * vcpuid and stores it at the location given by sregs. 1184 + */ 1185 + void vcpu_sregs_get(struct kvm_vm *vm, 1186 + uint32_t vcpuid, struct kvm_sregs *sregs) 1187 + { 1188 + struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1189 + int ret; 1190 + 1191 + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1192 + 1193 + /* Get the regs. */ 1194 + /* Get the regs. */ 1195 + ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs); 1196 + TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i", 1197 + ret, errno); 1198 + } 1199 + 1200 + /* VM VCPU System Regs Set 1201 + * 1202 + * Input Args: 1203 + * vm - Virtual Machine 1204 + * vcpuid - VCPU ID 1205 + * sregs - Values to set VCPU system regs to 1206 + * 1207 + * Output Args: None 1208 + * 1209 + * Return: None 1210 + * 1211 + * Sets the system regs of the VCPU specified by vcpuid to the values 1212 + * given by sregs. 1213 + */ 1214 + void vcpu_sregs_set(struct kvm_vm *vm, 1215 + uint32_t vcpuid, struct kvm_sregs *sregs) 1216 + { 1217 + int ret = _vcpu_sregs_set(vm, vcpuid, sregs); 1218 + TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " 1219 + "rc: %i errno: %i", ret, errno); 1220 + } 1221 + 1222 + int _vcpu_sregs_set(struct kvm_vm *vm, 1223 + uint32_t vcpuid, struct kvm_sregs *sregs) 1224 + { 1225 + struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1226 + int ret; 1227 + 1228 + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1229 + 1230 + /* Get the regs. */ 1231 + return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); 1232 + } 1233 + 1234 + /* VCPU Ioctl 1235 + * 1236 + * Input Args: 1237 + * vm - Virtual Machine 1238 + * vcpuid - VCPU ID 1239 + * cmd - Ioctl number 1240 + * arg - Argument to pass to the ioctl 1241 + * 1242 + * Return: None 1243 + * 1244 + * Issues an arbitrary ioctl on a VCPU fd. 1245 + */ 1246 + void vcpu_ioctl(struct kvm_vm *vm, 1247 + uint32_t vcpuid, unsigned long cmd, void *arg) 1248 + { 1249 + struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1250 + int ret; 1251 + 1252 + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1253 + 1254 + ret = ioctl(vcpu->fd, cmd, arg); 1255 + TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)", 1256 + cmd, ret, errno, strerror(errno)); 1257 + } 1258 + 1259 + /* VM Ioctl 1260 + * 1261 + * Input Args: 1262 + * vm - Virtual Machine 1263 + * cmd - Ioctl number 1264 + * arg - Argument to pass to the ioctl 1265 + * 1266 + * Return: None 1267 + * 1268 + * Issues an arbitrary ioctl on a VM fd. 1269 + */ 1270 + void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) 1271 + { 1272 + int ret; 1273 + 1274 + ret = ioctl(vm->fd, cmd, arg); 1275 + TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)", 1276 + cmd, ret, errno, strerror(errno)); 1277 + } 1278 + 1279 + /* VM Dump 1280 + * 1281 + * Input Args: 1282 + * vm - Virtual Machine 1283 + * indent - Left margin indent amount 1284 + * 1285 + * Output Args: 1286 + * stream - Output FILE stream 1287 + * 1288 + * Return: None 1289 + * 1290 + * Dumps the current state of the VM given by vm, to the FILE stream 1291 + * given by stream. 1292 + */ 1293 + void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 1294 + { 1295 + struct userspace_mem_region *region; 1296 + struct vcpu *vcpu; 1297 + 1298 + fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); 1299 + fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); 1300 + fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); 1301 + fprintf(stream, "%*sMem Regions:\n", indent, ""); 1302 + for (region = vm->userspace_mem_region_head; region; 1303 + region = region->next) { 1304 + fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " 1305 + "host_virt: %p\n", indent + 2, "", 1306 + (uint64_t) region->region.guest_phys_addr, 1307 + (uint64_t) region->region.memory_size, 1308 + region->host_mem); 1309 + fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); 1310 + sparsebit_dump(stream, region->unused_phy_pages, 0); 1311 + } 1312 + fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 1313 + sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 1314 + fprintf(stream, "%*spgd_created: %u\n", indent, "", 1315 + vm->pgd_created); 1316 + if (vm->pgd_created) { 1317 + fprintf(stream, "%*sVirtual Translation Tables:\n", 1318 + indent + 2, ""); 1319 + virt_dump(stream, vm, indent + 4); 1320 + } 1321 + fprintf(stream, "%*sVCPUs:\n", indent, ""); 1322 + for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next) 1323 + vcpu_dump(stream, vm, vcpu->id, indent + 2); 1324 + } 1325 + 1326 + /* VM VCPU Dump 1327 + * 1328 + * Input Args: 1329 + * vm - Virtual Machine 1330 + * vcpuid - VCPU ID 1331 + * indent - Left margin indent amount 1332 + * 1333 + * Output Args: 1334 + * stream - Output FILE stream 1335 + * 1336 + * Return: None 1337 + * 1338 + * Dumps the current state of the VCPU specified by vcpuid, within the VM 1339 + * given by vm, to the FILE stream given by stream. 1340 + */ 1341 + void vcpu_dump(FILE *stream, struct kvm_vm *vm, 1342 + uint32_t vcpuid, uint8_t indent) 1343 + { 1344 + struct kvm_regs regs; 1345 + struct kvm_sregs sregs; 1346 + 1347 + fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); 1348 + 1349 + fprintf(stream, "%*sregs:\n", indent + 2, ""); 1350 + vcpu_regs_get(vm, vcpuid, &regs); 1351 + regs_dump(stream, &regs, indent + 4); 1352 + 1353 + fprintf(stream, "%*ssregs:\n", indent + 2, ""); 1354 + vcpu_sregs_get(vm, vcpuid, &sregs); 1355 + sregs_dump(stream, &sregs, indent + 4); 1356 + } 1357 + 1358 + /* Known KVM exit reasons */ 1359 + static struct exit_reason { 1360 + unsigned int reason; 1361 + const char *name; 1362 + } exit_reasons_known[] = { 1363 + {KVM_EXIT_UNKNOWN, "UNKNOWN"}, 1364 + {KVM_EXIT_EXCEPTION, "EXCEPTION"}, 1365 + {KVM_EXIT_IO, "IO"}, 1366 + {KVM_EXIT_HYPERCALL, "HYPERCALL"}, 1367 + {KVM_EXIT_DEBUG, "DEBUG"}, 1368 + {KVM_EXIT_HLT, "HLT"}, 1369 + {KVM_EXIT_MMIO, "MMIO"}, 1370 + {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, 1371 + {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, 1372 + {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, 1373 + {KVM_EXIT_INTR, "INTR"}, 1374 + {KVM_EXIT_SET_TPR, "SET_TPR"}, 1375 + {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, 1376 + {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, 1377 + {KVM_EXIT_S390_RESET, "S390_RESET"}, 1378 + {KVM_EXIT_DCR, "DCR"}, 1379 + {KVM_EXIT_NMI, "NMI"}, 1380 + {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, 1381 + {KVM_EXIT_OSI, "OSI"}, 1382 + {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, 1383 + #ifdef KVM_EXIT_MEMORY_NOT_PRESENT 1384 + {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, 1385 + #endif 1386 + }; 1387 + 1388 + /* Exit Reason String 1389 + * 1390 + * Input Args: 1391 + * exit_reason - Exit reason 1392 + * 1393 + * Output Args: None 1394 + * 1395 + * Return: 1396 + * Constant string pointer describing the exit reason. 1397 + * 1398 + * Locates and returns a constant string that describes the KVM exit 1399 + * reason given by exit_reason. If no such string is found, a constant 1400 + * string of "Unknown" is returned. 1401 + */ 1402 + const char *exit_reason_str(unsigned int exit_reason) 1403 + { 1404 + unsigned int n1; 1405 + 1406 + for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { 1407 + if (exit_reason == exit_reasons_known[n1].reason) 1408 + return exit_reasons_known[n1].name; 1409 + } 1410 + 1411 + return "Unknown"; 1412 + } 1413 + 1414 + /* Physical Page Allocate 1415 + * 1416 + * Input Args: 1417 + * vm - Virtual Machine 1418 + * paddr_min - Physical address minimum 1419 + * memslot - Memory region to allocate page from 1420 + * 1421 + * Output Args: None 1422 + * 1423 + * Return: 1424 + * Starting physical address 1425 + * 1426 + * Within the VM specified by vm, locates an available physical page 1427 + * at or above paddr_min. If found, the page is marked as in use 1428 + * and its address is returned. A TEST_ASSERT failure occurs if no 1429 + * page is available at or above paddr_min. 1430 + */ 1431 + vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, 1432 + vm_paddr_t paddr_min, uint32_t memslot) 1433 + { 1434 + struct userspace_mem_region *region; 1435 + sparsebit_idx_t pg; 1436 + 1437 + TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 1438 + "not divisable by page size.\n" 1439 + " paddr_min: 0x%lx page_size: 0x%x", 1440 + paddr_min, vm->page_size); 1441 + 1442 + /* Locate memory region. */ 1443 + region = memslot2region(vm, memslot); 1444 + 1445 + /* Locate next available physical page at or above paddr_min. */ 1446 + pg = paddr_min >> vm->page_shift; 1447 + 1448 + if (!sparsebit_is_set(region->unused_phy_pages, pg)) { 1449 + pg = sparsebit_next_set(region->unused_phy_pages, pg); 1450 + if (pg == 0) { 1451 + fprintf(stderr, "No guest physical page available, " 1452 + "paddr_min: 0x%lx page_size: 0x%x memslot: %u", 1453 + paddr_min, vm->page_size, memslot); 1454 + fputs("---- vm dump ----\n", stderr); 1455 + vm_dump(stderr, vm, 2); 1456 + abort(); 1457 + } 1458 + } 1459 + 1460 + /* Specify page as in use and return its address. */ 1461 + sparsebit_clear(region->unused_phy_pages, pg); 1462 + 1463 + return pg * vm->page_size; 1464 + } 1465 + 1466 + /* Address Guest Virtual to Host Virtual 1467 + * 1468 + * Input Args: 1469 + * vm - Virtual Machine 1470 + * gva - VM virtual address 1471 + * 1472 + * Output Args: None 1473 + * 1474 + * Return: 1475 + * Equivalent host virtual address 1476 + */ 1477 + void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) 1478 + { 1479 + return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); 1480 + }
+67
tools/testing/selftests/kvm/lib/kvm_util_internal.h
··· 1 + /* 2 + * tools/testing/selftests/kvm/lib/kvm_util.c 3 + * 4 + * Copyright (C) 2018, Google LLC. 5 + * 6 + * This work is licensed under the terms of the GNU GPL, version 2. 7 + */ 8 + 9 + #ifndef KVM_UTIL_INTERNAL_H 10 + #define KVM_UTIL_INTERNAL_H 1 11 + 12 + #include "sparsebit.h" 13 + 14 + #ifndef BITS_PER_BYTE 15 + #define BITS_PER_BYTE 8 16 + #endif 17 + 18 + #ifndef BITS_PER_LONG 19 + #define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long)) 20 + #endif 21 + 22 + #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) 23 + #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) 24 + 25 + /* Concrete definition of struct kvm_vm. */ 26 + struct userspace_mem_region { 27 + struct userspace_mem_region *next, *prev; 28 + struct kvm_userspace_memory_region region; 29 + struct sparsebit *unused_phy_pages; 30 + int fd; 31 + off_t offset; 32 + void *host_mem; 33 + void *mmap_start; 34 + size_t mmap_size; 35 + }; 36 + 37 + struct vcpu { 38 + struct vcpu *next, *prev; 39 + uint32_t id; 40 + int fd; 41 + struct kvm_run *state; 42 + }; 43 + 44 + struct kvm_vm { 45 + int mode; 46 + int fd; 47 + unsigned int page_size; 48 + unsigned int page_shift; 49 + uint64_t max_gfn; 50 + struct vcpu *vcpu_head; 51 + struct userspace_mem_region *userspace_mem_region_head; 52 + struct sparsebit *vpages_valid; 53 + struct sparsebit *vpages_mapped; 54 + bool pgd_created; 55 + vm_paddr_t pgd; 56 + }; 57 + 58 + struct vcpu *vcpu_find(struct kvm_vm *vm, 59 + uint32_t vcpuid); 60 + void vcpu_setup(struct kvm_vm *vm, int vcpuid); 61 + void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); 62 + void regs_dump(FILE *stream, struct kvm_regs *regs, 63 + uint8_t indent); 64 + void sregs_dump(FILE *stream, struct kvm_sregs *sregs, 65 + uint8_t indent); 66 + 67 + #endif
+2087
tools/testing/selftests/kvm/lib/sparsebit.c
··· 1 + /* 2 + * Sparse bit array 3 + * 4 + * Copyright (C) 2018, Google LLC. 5 + * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver) 6 + * 7 + * This work is licensed under the terms of the GNU GPL, version 2. 8 + * 9 + * This library provides functions to support a memory efficient bit array, 10 + * with an index size of 2^64. A sparsebit array is allocated through 11 + * the use sparsebit_alloc() and free'd via sparsebit_free(), 12 + * such as in the following: 13 + * 14 + * struct sparsebit *s; 15 + * s = sparsebit_alloc(); 16 + * sparsebit_free(&s); 17 + * 18 + * The struct sparsebit type resolves down to a struct sparsebit. 19 + * Note that, sparsebit_free() takes a pointer to the sparsebit 20 + * structure. This is so that sparsebit_free() is able to poison 21 + * the pointer (e.g. set it to NULL) to the struct sparsebit before 22 + * returning to the caller. 23 + * 24 + * Between the return of sparsebit_alloc() and the call of 25 + * sparsebit_free(), there are multiple query and modifying operations 26 + * that can be performed on the allocated sparsebit array. All of 27 + * these operations take as a parameter the value returned from 28 + * sparsebit_alloc() and most also take a bit index. Frequently 29 + * used routines include: 30 + * 31 + * ---- Query Operations 32 + * sparsebit_is_set(s, idx) 33 + * sparsebit_is_clear(s, idx) 34 + * sparsebit_any_set(s) 35 + * sparsebit_first_set(s) 36 + * sparsebit_next_set(s, prev_idx) 37 + * 38 + * ---- Modifying Operations 39 + * sparsebit_set(s, idx) 40 + * sparsebit_clear(s, idx) 41 + * sparsebit_set_num(s, idx, num); 42 + * sparsebit_clear_num(s, idx, num); 43 + * 44 + * A common operation, is to itterate over all the bits set in a test 45 + * sparsebit array. This can be done via code with the following structure: 46 + * 47 + * sparsebit_idx_t idx; 48 + * if (sparsebit_any_set(s)) { 49 + * idx = sparsebit_first_set(s); 50 + * do { 51 + * ... 52 + * idx = sparsebit_next_set(s, idx); 53 + * } while (idx != 0); 54 + * } 55 + * 56 + * The index of the first bit set needs to be obtained via 57 + * sparsebit_first_set(), because sparsebit_next_set(), needs 58 + * the index of the previously set. The sparsebit_idx_t type is 59 + * unsigned, so there is no previous index before 0 that is available. 60 + * Also, the call to sparsebit_first_set() is not made unless there 61 + * is at least 1 bit in the array set. This is because sparsebit_first_set() 62 + * aborts if sparsebit_first_set() is called with no bits set. 63 + * It is the callers responsibility to assure that the 64 + * sparsebit array has at least a single bit set before calling 65 + * sparsebit_first_set(). 66 + * 67 + * ==== Implementation Overview ==== 68 + * For the most part the internal implementation of sparsebit is 69 + * opaque to the caller. One important implementation detail that the 70 + * caller may need to be aware of is the spatial complexity of the 71 + * implementation. This implementation of a sparsebit array is not 72 + * only sparse, in that it uses memory proportional to the number of bits 73 + * set. It is also efficient in memory usage when most of the bits are 74 + * set. 75 + * 76 + * At a high-level the state of the bit settings are maintained through 77 + * the use of a binary-search tree, where each node contains at least 78 + * the following members: 79 + * 80 + * typedef uint64_t sparsebit_idx_t; 81 + * typedef uint64_t sparsebit_num_t; 82 + * 83 + * sparsebit_idx_t idx; 84 + * uint32_t mask; 85 + * sparsebit_num_t num_after; 86 + * 87 + * The idx member contains the bit index of the first bit described by this 88 + * node, while the mask member stores the setting of the first 32-bits. 89 + * The setting of the bit at idx + n, where 0 <= n < 32, is located in the 90 + * mask member at 1 << n. 91 + * 92 + * Nodes are sorted by idx and the bits described by two nodes will never 93 + * overlap. The idx member is always aligned to the mask size, i.e. a 94 + * multiple of 32. 95 + * 96 + * Beyond a typical implementation, the nodes in this implementation also 97 + * contains a member named num_after. The num_after member holds the 98 + * number of bits immediately after the mask bits that are contiguously set. 99 + * The use of the num_after member allows this implementation to efficiently 100 + * represent cases where most bits are set. For example, the case of all 101 + * but the last two bits set, is represented by the following two nodes: 102 + * 103 + * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0 104 + * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0 105 + * 106 + * ==== Invariants ==== 107 + * This implementation usses the following invariants: 108 + * 109 + * + Node are only used to represent bits that are set. 110 + * Nodes with a mask of 0 and num_after of 0 are not allowed. 111 + * 112 + * + Sum of bits set in all the nodes is equal to the value of 113 + * the struct sparsebit_pvt num_set member. 114 + * 115 + * + The setting of at least one bit is always described in a nodes 116 + * mask (mask >= 1). 117 + * 118 + * + A node with all mask bits set only occurs when the last bit 119 + * described by the previous node is not equal to this nodes 120 + * starting index - 1. All such occurences of this condition are 121 + * avoided by moving the setting of the nodes mask bits into 122 + * the previous nodes num_after setting. 123 + * 124 + * + Node starting index is evenly divisable by the number of bits 125 + * within a nodes mask member. 126 + * 127 + * + Nodes never represent a range of bits that wrap around the 128 + * highest supported index. 129 + * 130 + * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1) 131 + * 132 + * As a consequence of the above, the num_after member of a node 133 + * will always be <=: 134 + * 135 + * maximum_index - nodes_starting_index - number_of_mask_bits 136 + * 137 + * + Nodes within the binary search tree are sorted based on each 138 + * nodes starting index. 139 + * 140 + * + The range of bits described by any two nodes do not overlap. The 141 + * range of bits described by a single node is: 142 + * 143 + * start: node->idx 144 + * end (inclusive): node->idx + MASK_BITS + node->num_after - 1; 145 + * 146 + * Note, at times these invariants are temporarily violated for a 147 + * specific portion of the code. For example, when setting a mask 148 + * bit, there is a small delay between when the mask bit is set and the 149 + * value in the struct sparsebit_pvt num_set member is updated. Other 150 + * temporary violations occur when node_split() is called with a specified 151 + * index and assures that a node where its mask represents the bit 152 + * at the specified index exists. At times to do this node_split() 153 + * must split an existing node into two nodes or create a node that 154 + * has no bits set. Such temporary violations must be corrected before 155 + * returning to the caller. These corrections are typically performed 156 + * by the local function node_reduce(). 157 + */ 158 + 159 + #include "test_util.h" 160 + #include "sparsebit.h" 161 + #include <limits.h> 162 + #include <assert.h> 163 + 164 + #define DUMP_LINE_MAX 100 /* Does not include indent amount */ 165 + 166 + typedef uint32_t mask_t; 167 + #define MASK_BITS (sizeof(mask_t) * CHAR_BIT) 168 + 169 + struct node { 170 + struct node *parent; 171 + struct node *left; 172 + struct node *right; 173 + sparsebit_idx_t idx; /* index of least-significant bit in mask */ 174 + sparsebit_num_t num_after; /* num contiguously set after mask */ 175 + mask_t mask; 176 + }; 177 + 178 + struct sparsebit { 179 + /* 180 + * Points to root node of the binary search 181 + * tree. Equal to NULL when no bits are set in 182 + * the entire sparsebit array. 183 + */ 184 + struct node *root; 185 + 186 + /* 187 + * A redundant count of the total number of bits set. Used for 188 + * diagnostic purposes and to change the time complexity of 189 + * sparsebit_num_set() from O(n) to O(1). 190 + * Note: Due to overflow, a value of 0 means none or all set. 191 + */ 192 + sparsebit_num_t num_set; 193 + }; 194 + 195 + /* Returns the number of set bits described by the settings 196 + * of the node pointed to by nodep. 197 + */ 198 + static sparsebit_num_t node_num_set(struct node *nodep) 199 + { 200 + return nodep->num_after + __builtin_popcount(nodep->mask); 201 + } 202 + 203 + /* Returns a pointer to the node that describes the 204 + * lowest bit index. 205 + */ 206 + static struct node *node_first(struct sparsebit *s) 207 + { 208 + struct node *nodep; 209 + 210 + for (nodep = s->root; nodep && nodep->left; nodep = nodep->left) 211 + ; 212 + 213 + return nodep; 214 + } 215 + 216 + /* Returns a pointer to the node that describes the 217 + * lowest bit index > the index of the node pointed to by np. 218 + * Returns NULL if no node with a higher index exists. 219 + */ 220 + static struct node *node_next(struct sparsebit *s, struct node *np) 221 + { 222 + struct node *nodep = np; 223 + 224 + /* 225 + * If current node has a right child, next node is the left-most 226 + * of the right child. 227 + */ 228 + if (nodep->right) { 229 + for (nodep = nodep->right; nodep->left; nodep = nodep->left) 230 + ; 231 + return nodep; 232 + } 233 + 234 + /* 235 + * No right child. Go up until node is left child of a parent. 236 + * That parent is then the next node. 237 + */ 238 + while (nodep->parent && nodep == nodep->parent->right) 239 + nodep = nodep->parent; 240 + 241 + return nodep->parent; 242 + } 243 + 244 + /* Searches for and returns a pointer to the node that describes the 245 + * highest index < the index of the node pointed to by np. 246 + * Returns NULL if no node with a lower index exists. 247 + */ 248 + static struct node *node_prev(struct sparsebit *s, struct node *np) 249 + { 250 + struct node *nodep = np; 251 + 252 + /* 253 + * If current node has a left child, next node is the right-most 254 + * of the left child. 255 + */ 256 + if (nodep->left) { 257 + for (nodep = nodep->left; nodep->right; nodep = nodep->right) 258 + ; 259 + return (struct node *) nodep; 260 + } 261 + 262 + /* 263 + * No left child. Go up until node is right child of a parent. 264 + * That parent is then the next node. 265 + */ 266 + while (nodep->parent && nodep == nodep->parent->left) 267 + nodep = nodep->parent; 268 + 269 + return (struct node *) nodep->parent; 270 + } 271 + 272 + 273 + /* Allocates space to hold a copy of the node sub-tree pointed to by 274 + * subtree and duplicates the bit settings to the newly allocated nodes. 275 + * Returns the newly allocated copy of subtree. 276 + */ 277 + static struct node *node_copy_subtree(struct node *subtree) 278 + { 279 + struct node *root; 280 + 281 + /* Duplicate the node at the root of the subtree */ 282 + root = calloc(1, sizeof(*root)); 283 + if (!root) { 284 + perror("calloc"); 285 + abort(); 286 + } 287 + 288 + root->idx = subtree->idx; 289 + root->mask = subtree->mask; 290 + root->num_after = subtree->num_after; 291 + 292 + /* As needed, recursively duplicate the left and right subtrees */ 293 + if (subtree->left) { 294 + root->left = node_copy_subtree(subtree->left); 295 + root->left->parent = root; 296 + } 297 + 298 + if (subtree->right) { 299 + root->right = node_copy_subtree(subtree->right); 300 + root->right->parent = root; 301 + } 302 + 303 + return root; 304 + } 305 + 306 + /* Searches for and returns a pointer to the node that describes the setting 307 + * of the bit given by idx. A node describes the setting of a bit if its 308 + * index is within the bits described by the mask bits or the number of 309 + * contiguous bits set after the mask. Returns NULL if there is no such node. 310 + */ 311 + static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx) 312 + { 313 + struct node *nodep; 314 + 315 + /* Find the node that describes the setting of the bit at idx */ 316 + for (nodep = s->root; nodep; 317 + nodep = nodep->idx > idx ? nodep->left : nodep->right) { 318 + if (idx >= nodep->idx && 319 + idx <= nodep->idx + MASK_BITS + nodep->num_after - 1) 320 + break; 321 + } 322 + 323 + return nodep; 324 + } 325 + 326 + /* Entry Requirements: 327 + * + A node that describes the setting of idx is not already present. 328 + * 329 + * Adds a new node to describe the setting of the bit at the index given 330 + * by idx. Returns a pointer to the newly added node. 331 + * 332 + * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced. 333 + */ 334 + static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx) 335 + { 336 + struct node *nodep, *parentp, *prev; 337 + 338 + /* Allocate and initialize the new node. */ 339 + nodep = calloc(1, sizeof(*nodep)); 340 + if (!nodep) { 341 + perror("calloc"); 342 + abort(); 343 + } 344 + 345 + nodep->idx = idx & -MASK_BITS; 346 + 347 + /* If no nodes, set it up as the root node. */ 348 + if (!s->root) { 349 + s->root = nodep; 350 + return nodep; 351 + } 352 + 353 + /* 354 + * Find the parent where the new node should be attached 355 + * and add the node there. 356 + */ 357 + parentp = s->root; 358 + while (true) { 359 + if (idx < parentp->idx) { 360 + if (!parentp->left) { 361 + parentp->left = nodep; 362 + nodep->parent = parentp; 363 + break; 364 + } 365 + parentp = parentp->left; 366 + } else { 367 + assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1); 368 + if (!parentp->right) { 369 + parentp->right = nodep; 370 + nodep->parent = parentp; 371 + break; 372 + } 373 + parentp = parentp->right; 374 + } 375 + } 376 + 377 + /* 378 + * Does num_after bits of previous node overlap with the mask 379 + * of the new node? If so set the bits in the new nodes mask 380 + * and reduce the previous nodes num_after. 381 + */ 382 + prev = node_prev(s, nodep); 383 + while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) { 384 + unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1) 385 + - nodep->idx; 386 + assert(prev->num_after > 0); 387 + assert(n1 < MASK_BITS); 388 + assert(!(nodep->mask & (1 << n1))); 389 + nodep->mask |= (1 << n1); 390 + prev->num_after--; 391 + } 392 + 393 + return nodep; 394 + } 395 + 396 + /* Returns whether all the bits in the sparsebit array are set. */ 397 + bool sparsebit_all_set(struct sparsebit *s) 398 + { 399 + /* 400 + * If any nodes there must be at least one bit set. Only case 401 + * where a bit is set and total num set is 0, is when all bits 402 + * are set. 403 + */ 404 + return s->root && s->num_set == 0; 405 + } 406 + 407 + /* Clears all bits described by the node pointed to by nodep, then 408 + * removes the node. 409 + */ 410 + static void node_rm(struct sparsebit *s, struct node *nodep) 411 + { 412 + struct node *tmp; 413 + sparsebit_num_t num_set; 414 + 415 + num_set = node_num_set(nodep); 416 + assert(s->num_set >= num_set || sparsebit_all_set(s)); 417 + s->num_set -= node_num_set(nodep); 418 + 419 + /* Have both left and right child */ 420 + if (nodep->left && nodep->right) { 421 + /* 422 + * Move left children to the leftmost leaf node 423 + * of the right child. 424 + */ 425 + for (tmp = nodep->right; tmp->left; tmp = tmp->left) 426 + ; 427 + tmp->left = nodep->left; 428 + nodep->left = NULL; 429 + tmp->left->parent = tmp; 430 + } 431 + 432 + /* Left only child */ 433 + if (nodep->left) { 434 + if (!nodep->parent) { 435 + s->root = nodep->left; 436 + nodep->left->parent = NULL; 437 + } else { 438 + nodep->left->parent = nodep->parent; 439 + if (nodep == nodep->parent->left) 440 + nodep->parent->left = nodep->left; 441 + else { 442 + assert(nodep == nodep->parent->right); 443 + nodep->parent->right = nodep->left; 444 + } 445 + } 446 + 447 + nodep->parent = nodep->left = nodep->right = NULL; 448 + free(nodep); 449 + 450 + return; 451 + } 452 + 453 + 454 + /* Right only child */ 455 + if (nodep->right) { 456 + if (!nodep->parent) { 457 + s->root = nodep->right; 458 + nodep->right->parent = NULL; 459 + } else { 460 + nodep->right->parent = nodep->parent; 461 + if (nodep == nodep->parent->left) 462 + nodep->parent->left = nodep->right; 463 + else { 464 + assert(nodep == nodep->parent->right); 465 + nodep->parent->right = nodep->right; 466 + } 467 + } 468 + 469 + nodep->parent = nodep->left = nodep->right = NULL; 470 + free(nodep); 471 + 472 + return; 473 + } 474 + 475 + /* Leaf Node */ 476 + if (!nodep->parent) { 477 + s->root = NULL; 478 + } else { 479 + if (nodep->parent->left == nodep) 480 + nodep->parent->left = NULL; 481 + else { 482 + assert(nodep == nodep->parent->right); 483 + nodep->parent->right = NULL; 484 + } 485 + } 486 + 487 + nodep->parent = nodep->left = nodep->right = NULL; 488 + free(nodep); 489 + 490 + return; 491 + } 492 + 493 + /* Splits the node containing the bit at idx so that there is a node 494 + * that starts at the specified index. If no such node exists, a new 495 + * node at the specified index is created. Returns the new node. 496 + * 497 + * idx must start of a mask boundary. 498 + */ 499 + static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx) 500 + { 501 + struct node *nodep1, *nodep2; 502 + sparsebit_idx_t offset; 503 + sparsebit_num_t orig_num_after; 504 + 505 + assert(!(idx % MASK_BITS)); 506 + 507 + /* 508 + * Is there a node that describes the setting of idx? 509 + * If not, add it. 510 + */ 511 + nodep1 = node_find(s, idx); 512 + if (!nodep1) 513 + return node_add(s, idx); 514 + 515 + /* 516 + * All done if the starting index of the node is where the 517 + * split should occur. 518 + */ 519 + if (nodep1->idx == idx) 520 + return nodep1; 521 + 522 + /* 523 + * Split point not at start of mask, so it must be part of 524 + * bits described by num_after. 525 + */ 526 + 527 + /* 528 + * Calculate offset within num_after for where the split is 529 + * to occur. 530 + */ 531 + offset = idx - (nodep1->idx + MASK_BITS); 532 + orig_num_after = nodep1->num_after; 533 + 534 + /* 535 + * Add a new node to describe the bits starting at 536 + * the split point. 537 + */ 538 + nodep1->num_after = offset; 539 + nodep2 = node_add(s, idx); 540 + 541 + /* Move bits after the split point into the new node */ 542 + nodep2->num_after = orig_num_after - offset; 543 + if (nodep2->num_after >= MASK_BITS) { 544 + nodep2->mask = ~(mask_t) 0; 545 + nodep2->num_after -= MASK_BITS; 546 + } else { 547 + nodep2->mask = (1 << nodep2->num_after) - 1; 548 + nodep2->num_after = 0; 549 + } 550 + 551 + return nodep2; 552 + } 553 + 554 + /* Iteratively reduces the node pointed to by nodep and its adjacent 555 + * nodes into a more compact form. For example, a node with a mask with 556 + * all bits set adjacent to a previous node, will get combined into a 557 + * single node with an increased num_after setting. 558 + * 559 + * After each reduction, a further check is made to see if additional 560 + * reductions are possible with the new previous and next nodes. Note, 561 + * a search for a reduction is only done across the nodes nearest nodep 562 + * and those that became part of a reduction. Reductions beyond nodep 563 + * and the adjacent nodes that are reduced are not discovered. It is the 564 + * responsibility of the caller to pass a nodep that is within one node 565 + * of each possible reduction. 566 + * 567 + * This function does not fix the temporary violation of all invariants. 568 + * For example it does not fix the case where the bit settings described 569 + * by two or more nodes overlap. Such a violation introduces the potential 570 + * complication of a bit setting for a specific index having different settings 571 + * in different nodes. This would then introduce the further complication 572 + * of which node has the correct setting of the bit and thus such conditions 573 + * are not allowed. 574 + * 575 + * This function is designed to fix invariant violations that are introduced 576 + * by node_split() and by changes to the nodes mask or num_after members. 577 + * For example, when setting a bit within a nodes mask, the function that 578 + * sets the bit doesn't have to worry about whether the setting of that 579 + * bit caused the mask to have leading only or trailing only bits set. 580 + * Instead, the function can call node_reduce(), with nodep equal to the 581 + * node address that it set a mask bit in, and node_reduce() will notice 582 + * the cases of leading or trailing only bits and that there is an 583 + * adjacent node that the bit settings could be merged into. 584 + * 585 + * This implementation specifically detects and corrects violation of the 586 + * following invariants: 587 + * 588 + * + Node are only used to represent bits that are set. 589 + * Nodes with a mask of 0 and num_after of 0 are not allowed. 590 + * 591 + * + The setting of at least one bit is always described in a nodes 592 + * mask (mask >= 1). 593 + * 594 + * + A node with all mask bits set only occurs when the last bit 595 + * described by the previous node is not equal to this nodes 596 + * starting index - 1. All such occurences of this condition are 597 + * avoided by moving the setting of the nodes mask bits into 598 + * the previous nodes num_after setting. 599 + */ 600 + static void node_reduce(struct sparsebit *s, struct node *nodep) 601 + { 602 + bool reduction_performed; 603 + 604 + do { 605 + reduction_performed = false; 606 + struct node *prev, *next, *tmp; 607 + 608 + /* 1) Potential reductions within the current node. */ 609 + 610 + /* Nodes with all bits cleared may be removed. */ 611 + if (nodep->mask == 0 && nodep->num_after == 0) { 612 + /* 613 + * About to remove the node pointed to by 614 + * nodep, which normally would cause a problem 615 + * for the next pass through the reduction loop, 616 + * because the node at the starting point no longer 617 + * exists. This potential problem is handled 618 + * by first remembering the location of the next 619 + * or previous nodes. Doesn't matter which, because 620 + * once the node at nodep is removed, there will be 621 + * no other nodes between prev and next. 622 + * 623 + * Note, the checks performed on nodep against both 624 + * both prev and next both check for an adjacent 625 + * node that can be reduced into a single node. As 626 + * such, after removing the node at nodep, doesn't 627 + * matter whether the nodep for the next pass 628 + * through the loop is equal to the previous pass 629 + * prev or next node. Either way, on the next pass 630 + * the one not selected will become either the 631 + * prev or next node. 632 + */ 633 + tmp = node_next(s, nodep); 634 + if (!tmp) 635 + tmp = node_prev(s, nodep); 636 + 637 + node_rm(s, nodep); 638 + nodep = NULL; 639 + 640 + nodep = tmp; 641 + reduction_performed = true; 642 + continue; 643 + } 644 + 645 + /* 646 + * When the mask is 0, can reduce the amount of num_after 647 + * bits by moving the initial num_after bits into the mask. 648 + */ 649 + if (nodep->mask == 0) { 650 + assert(nodep->num_after != 0); 651 + assert(nodep->idx + MASK_BITS > nodep->idx); 652 + 653 + nodep->idx += MASK_BITS; 654 + 655 + if (nodep->num_after >= MASK_BITS) { 656 + nodep->mask = ~0; 657 + nodep->num_after -= MASK_BITS; 658 + } else { 659 + nodep->mask = (1u << nodep->num_after) - 1; 660 + nodep->num_after = 0; 661 + } 662 + 663 + reduction_performed = true; 664 + continue; 665 + } 666 + 667 + /* 668 + * 2) Potential reductions between the current and 669 + * previous nodes. 670 + */ 671 + prev = node_prev(s, nodep); 672 + if (prev) { 673 + sparsebit_idx_t prev_highest_bit; 674 + 675 + /* Nodes with no bits set can be removed. */ 676 + if (prev->mask == 0 && prev->num_after == 0) { 677 + node_rm(s, prev); 678 + 679 + reduction_performed = true; 680 + continue; 681 + } 682 + 683 + /* 684 + * All mask bits set and previous node has 685 + * adjacent index. 686 + */ 687 + if (nodep->mask + 1 == 0 && 688 + prev->idx + MASK_BITS == nodep->idx) { 689 + prev->num_after += MASK_BITS + nodep->num_after; 690 + nodep->mask = 0; 691 + nodep->num_after = 0; 692 + 693 + reduction_performed = true; 694 + continue; 695 + } 696 + 697 + /* 698 + * Is node adjacent to previous node and the node 699 + * contains a single contiguous range of bits 700 + * starting from the beginning of the mask? 701 + */ 702 + prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after; 703 + if (prev_highest_bit + 1 == nodep->idx && 704 + (nodep->mask | (nodep->mask >> 1)) == nodep->mask) { 705 + /* 706 + * How many contiguous bits are there? 707 + * Is equal to the total number of set 708 + * bits, due to an earlier check that 709 + * there is a single contiguous range of 710 + * set bits. 711 + */ 712 + unsigned int num_contiguous 713 + = __builtin_popcount(nodep->mask); 714 + assert((num_contiguous > 0) && 715 + ((1ULL << num_contiguous) - 1) == nodep->mask); 716 + 717 + prev->num_after += num_contiguous; 718 + nodep->mask = 0; 719 + 720 + /* 721 + * For predictable performance, handle special 722 + * case where all mask bits are set and there 723 + * is a non-zero num_after setting. This code 724 + * is functionally correct without the following 725 + * conditionalized statements, but without them 726 + * the value of num_after is only reduced by 727 + * the number of mask bits per pass. There are 728 + * cases where num_after can be close to 2^64. 729 + * Without this code it could take nearly 730 + * (2^64) / 32 passes to perform the full 731 + * reduction. 732 + */ 733 + if (num_contiguous == MASK_BITS) { 734 + prev->num_after += nodep->num_after; 735 + nodep->num_after = 0; 736 + } 737 + 738 + reduction_performed = true; 739 + continue; 740 + } 741 + } 742 + 743 + /* 744 + * 3) Potential reductions between the current and 745 + * next nodes. 746 + */ 747 + next = node_next(s, nodep); 748 + if (next) { 749 + /* Nodes with no bits set can be removed. */ 750 + if (next->mask == 0 && next->num_after == 0) { 751 + node_rm(s, next); 752 + reduction_performed = true; 753 + continue; 754 + } 755 + 756 + /* 757 + * Is next node index adjacent to current node 758 + * and has a mask with all bits set? 759 + */ 760 + if (next->idx == nodep->idx + MASK_BITS + nodep->num_after && 761 + next->mask == ~(mask_t) 0) { 762 + nodep->num_after += MASK_BITS; 763 + next->mask = 0; 764 + nodep->num_after += next->num_after; 765 + next->num_after = 0; 766 + 767 + node_rm(s, next); 768 + next = NULL; 769 + 770 + reduction_performed = true; 771 + continue; 772 + } 773 + } 774 + } while (nodep && reduction_performed); 775 + } 776 + 777 + /* Returns whether the bit at the index given by idx, within the 778 + * sparsebit array is set or not. 779 + */ 780 + bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx) 781 + { 782 + struct node *nodep; 783 + 784 + /* Find the node that describes the setting of the bit at idx */ 785 + for (nodep = s->root; nodep; 786 + nodep = nodep->idx > idx ? nodep->left : nodep->right) 787 + if (idx >= nodep->idx && 788 + idx <= nodep->idx + MASK_BITS + nodep->num_after - 1) 789 + goto have_node; 790 + 791 + return false; 792 + 793 + have_node: 794 + /* Bit is set if it is any of the bits described by num_after */ 795 + if (nodep->num_after && idx >= nodep->idx + MASK_BITS) 796 + return true; 797 + 798 + /* Is the corresponding mask bit set */ 799 + assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS); 800 + return !!(nodep->mask & (1 << (idx - nodep->idx))); 801 + } 802 + 803 + /* Within the sparsebit array pointed to by s, sets the bit 804 + * at the index given by idx. 805 + */ 806 + static void bit_set(struct sparsebit *s, sparsebit_idx_t idx) 807 + { 808 + struct node *nodep; 809 + 810 + /* Skip bits that are already set */ 811 + if (sparsebit_is_set(s, idx)) 812 + return; 813 + 814 + /* 815 + * Get a node where the bit at idx is described by the mask. 816 + * The node_split will also create a node, if there isn't 817 + * already a node that describes the setting of bit. 818 + */ 819 + nodep = node_split(s, idx & -MASK_BITS); 820 + 821 + /* Set the bit within the nodes mask */ 822 + assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1); 823 + assert(!(nodep->mask & (1 << (idx - nodep->idx)))); 824 + nodep->mask |= 1 << (idx - nodep->idx); 825 + s->num_set++; 826 + 827 + node_reduce(s, nodep); 828 + } 829 + 830 + /* Within the sparsebit array pointed to by s, clears the bit 831 + * at the index given by idx. 832 + */ 833 + static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx) 834 + { 835 + struct node *nodep; 836 + 837 + /* Skip bits that are already cleared */ 838 + if (!sparsebit_is_set(s, idx)) 839 + return; 840 + 841 + /* Is there a node that describes the setting of this bit? */ 842 + nodep = node_find(s, idx); 843 + if (!nodep) 844 + return; 845 + 846 + /* 847 + * If a num_after bit, split the node, so that the bit is 848 + * part of a node mask. 849 + */ 850 + if (idx >= nodep->idx + MASK_BITS) 851 + nodep = node_split(s, idx & -MASK_BITS); 852 + 853 + /* 854 + * After node_split above, bit at idx should be within the mask. 855 + * Clear that bit. 856 + */ 857 + assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1); 858 + assert(nodep->mask & (1 << (idx - nodep->idx))); 859 + nodep->mask &= ~(1 << (idx - nodep->idx)); 860 + assert(s->num_set > 0 || sparsebit_all_set(s)); 861 + s->num_set--; 862 + 863 + node_reduce(s, nodep); 864 + } 865 + 866 + /* Recursively dumps to the FILE stream given by stream the contents 867 + * of the sub-tree of nodes pointed to by nodep. Each line of output 868 + * is prefixed by the number of spaces given by indent. On each 869 + * recursion, the indent amount is increased by 2. This causes nodes 870 + * at each level deeper into the binary search tree to be displayed 871 + * with a greater indent. 872 + */ 873 + static void dump_nodes(FILE *stream, struct node *nodep, 874 + unsigned int indent) 875 + { 876 + char *node_type; 877 + 878 + /* Dump contents of node */ 879 + if (!nodep->parent) 880 + node_type = "root"; 881 + else if (nodep == nodep->parent->left) 882 + node_type = "left"; 883 + else { 884 + assert(nodep == nodep->parent->right); 885 + node_type = "right"; 886 + } 887 + fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep); 888 + fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "", 889 + nodep->parent, nodep->left, nodep->right); 890 + fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n", 891 + indent, "", nodep->idx, nodep->mask, nodep->num_after); 892 + 893 + /* If present, dump contents of left child nodes */ 894 + if (nodep->left) 895 + dump_nodes(stream, nodep->left, indent + 2); 896 + 897 + /* If present, dump contents of right child nodes */ 898 + if (nodep->right) 899 + dump_nodes(stream, nodep->right, indent + 2); 900 + } 901 + 902 + static inline sparsebit_idx_t node_first_set(struct node *nodep, int start) 903 + { 904 + mask_t leading = (mask_t)1 << start; 905 + int n1 = __builtin_ctz(nodep->mask & -leading); 906 + 907 + return nodep->idx + n1; 908 + } 909 + 910 + static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start) 911 + { 912 + mask_t leading = (mask_t)1 << start; 913 + int n1 = __builtin_ctz(~nodep->mask & -leading); 914 + 915 + return nodep->idx + n1; 916 + } 917 + 918 + /* Dumps to the FILE stream specified by stream, the implementation dependent 919 + * internal state of s. Each line of output is prefixed with the number 920 + * of spaces given by indent. The output is completely implementation 921 + * dependent and subject to change. Output from this function should only 922 + * be used for diagnostic purposes. For example, this function can be 923 + * used by test cases after they detect an unexpected condition, as a means 924 + * to capture diagnostic information. 925 + */ 926 + static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s, 927 + unsigned int indent) 928 + { 929 + /* Dump the contents of s */ 930 + fprintf(stream, "%*sroot: %p\n", indent, "", s->root); 931 + fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set); 932 + 933 + if (s->root) 934 + dump_nodes(stream, s->root, indent); 935 + } 936 + 937 + /* Allocates and returns a new sparsebit array. The initial state 938 + * of the newly allocated sparsebit array has all bits cleared. 939 + */ 940 + struct sparsebit *sparsebit_alloc(void) 941 + { 942 + struct sparsebit *s; 943 + 944 + /* Allocate top level structure. */ 945 + s = calloc(1, sizeof(*s)); 946 + if (!s) { 947 + perror("calloc"); 948 + abort(); 949 + } 950 + 951 + return s; 952 + } 953 + 954 + /* Frees the implementation dependent data for the sparsebit array 955 + * pointed to by s and poisons the pointer to that data. 956 + */ 957 + void sparsebit_free(struct sparsebit **sbitp) 958 + { 959 + struct sparsebit *s = *sbitp; 960 + 961 + if (!s) 962 + return; 963 + 964 + sparsebit_clear_all(s); 965 + free(s); 966 + *sbitp = NULL; 967 + } 968 + 969 + /* Makes a copy of the sparsebit array given by s, to the sparsebit 970 + * array given by d. Note, d must have already been allocated via 971 + * sparsebit_alloc(). It can though already have bits set, which 972 + * if different from src will be cleared. 973 + */ 974 + void sparsebit_copy(struct sparsebit *d, struct sparsebit *s) 975 + { 976 + /* First clear any bits already set in the destination */ 977 + sparsebit_clear_all(d); 978 + 979 + if (s->root) { 980 + d->root = node_copy_subtree(s->root); 981 + d->num_set = s->num_set; 982 + } 983 + } 984 + 985 + /* Returns whether num consecutive bits starting at idx are all set. */ 986 + bool sparsebit_is_set_num(struct sparsebit *s, 987 + sparsebit_idx_t idx, sparsebit_num_t num) 988 + { 989 + sparsebit_idx_t next_cleared; 990 + 991 + assert(num > 0); 992 + assert(idx + num - 1 >= idx); 993 + 994 + /* With num > 0, the first bit must be set. */ 995 + if (!sparsebit_is_set(s, idx)) 996 + return false; 997 + 998 + /* Find the next cleared bit */ 999 + next_cleared = sparsebit_next_clear(s, idx); 1000 + 1001 + /* 1002 + * If no cleared bits beyond idx, then there are at least num 1003 + * set bits. idx + num doesn't wrap. Otherwise check if 1004 + * there are enough set bits between idx and the next cleared bit. 1005 + */ 1006 + return next_cleared == 0 || next_cleared - idx >= num; 1007 + } 1008 + 1009 + /* Returns whether the bit at the index given by idx. */ 1010 + bool sparsebit_is_clear(struct sparsebit *s, 1011 + sparsebit_idx_t idx) 1012 + { 1013 + return !sparsebit_is_set(s, idx); 1014 + } 1015 + 1016 + /* Returns whether num consecutive bits starting at idx are all cleared. */ 1017 + bool sparsebit_is_clear_num(struct sparsebit *s, 1018 + sparsebit_idx_t idx, sparsebit_num_t num) 1019 + { 1020 + sparsebit_idx_t next_set; 1021 + 1022 + assert(num > 0); 1023 + assert(idx + num - 1 >= idx); 1024 + 1025 + /* With num > 0, the first bit must be cleared. */ 1026 + if (!sparsebit_is_clear(s, idx)) 1027 + return false; 1028 + 1029 + /* Find the next set bit */ 1030 + next_set = sparsebit_next_set(s, idx); 1031 + 1032 + /* 1033 + * If no set bits beyond idx, then there are at least num 1034 + * cleared bits. idx + num doesn't wrap. Otherwise check if 1035 + * there are enough cleared bits between idx and the next set bit. 1036 + */ 1037 + return next_set == 0 || next_set - idx >= num; 1038 + } 1039 + 1040 + /* Returns the total number of bits set. Note: 0 is also returned for 1041 + * the case of all bits set. This is because with all bits set, there 1042 + * is 1 additional bit set beyond what can be represented in the return 1043 + * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0, 1044 + * to determine if the sparsebit array has any bits set. 1045 + */ 1046 + sparsebit_num_t sparsebit_num_set(struct sparsebit *s) 1047 + { 1048 + return s->num_set; 1049 + } 1050 + 1051 + /* Returns whether any bit is set in the sparsebit array. */ 1052 + bool sparsebit_any_set(struct sparsebit *s) 1053 + { 1054 + /* 1055 + * Nodes only describe set bits. If any nodes then there 1056 + * is at least 1 bit set. 1057 + */ 1058 + if (!s->root) 1059 + return false; 1060 + 1061 + /* 1062 + * Every node should have a non-zero mask. For now will 1063 + * just assure that the root node has a non-zero mask, 1064 + * which is a quick check that at least 1 bit is set. 1065 + */ 1066 + assert(s->root->mask != 0); 1067 + assert(s->num_set > 0 || 1068 + (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS && 1069 + s->root->mask == ~(mask_t) 0)); 1070 + 1071 + return true; 1072 + } 1073 + 1074 + /* Returns whether all the bits in the sparsebit array are cleared. */ 1075 + bool sparsebit_all_clear(struct sparsebit *s) 1076 + { 1077 + return !sparsebit_any_set(s); 1078 + } 1079 + 1080 + /* Returns whether all the bits in the sparsebit array are set. */ 1081 + bool sparsebit_any_clear(struct sparsebit *s) 1082 + { 1083 + return !sparsebit_all_set(s); 1084 + } 1085 + 1086 + /* Returns the index of the first set bit. Abort if no bits are set. 1087 + */ 1088 + sparsebit_idx_t sparsebit_first_set(struct sparsebit *s) 1089 + { 1090 + struct node *nodep; 1091 + 1092 + /* Validate at least 1 bit is set */ 1093 + assert(sparsebit_any_set(s)); 1094 + 1095 + nodep = node_first(s); 1096 + return node_first_set(nodep, 0); 1097 + } 1098 + 1099 + /* Returns the index of the first cleared bit. Abort if 1100 + * no bits are cleared. 1101 + */ 1102 + sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s) 1103 + { 1104 + struct node *nodep1, *nodep2; 1105 + 1106 + /* Validate at least 1 bit is cleared. */ 1107 + assert(sparsebit_any_clear(s)); 1108 + 1109 + /* If no nodes or first node index > 0 then lowest cleared is 0 */ 1110 + nodep1 = node_first(s); 1111 + if (!nodep1 || nodep1->idx > 0) 1112 + return 0; 1113 + 1114 + /* Does the mask in the first node contain any cleared bits. */ 1115 + if (nodep1->mask != ~(mask_t) 0) 1116 + return node_first_clear(nodep1, 0); 1117 + 1118 + /* 1119 + * All mask bits set in first node. If there isn't a second node 1120 + * then the first cleared bit is the first bit after the bits 1121 + * described by the first node. 1122 + */ 1123 + nodep2 = node_next(s, nodep1); 1124 + if (!nodep2) { 1125 + /* 1126 + * No second node. First cleared bit is first bit beyond 1127 + * bits described by first node. 1128 + */ 1129 + assert(nodep1->mask == ~(mask_t) 0); 1130 + assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0); 1131 + return nodep1->idx + MASK_BITS + nodep1->num_after; 1132 + } 1133 + 1134 + /* 1135 + * There is a second node. 1136 + * If it is not adjacent to the first node, then there is a gap 1137 + * of cleared bits between the nodes, and the first cleared bit 1138 + * is the first bit within the gap. 1139 + */ 1140 + if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx) 1141 + return nodep1->idx + MASK_BITS + nodep1->num_after; 1142 + 1143 + /* 1144 + * Second node is adjacent to the first node. 1145 + * Because it is adjacent, its mask should be non-zero. If all 1146 + * its mask bits are set, then with it being adjacent, it should 1147 + * have had the mask bits moved into the num_after setting of the 1148 + * previous node. 1149 + */ 1150 + return node_first_clear(nodep2, 0); 1151 + } 1152 + 1153 + /* Returns index of next bit set within s after the index given by prev. 1154 + * Returns 0 if there are no bits after prev that are set. 1155 + */ 1156 + sparsebit_idx_t sparsebit_next_set(struct sparsebit *s, 1157 + sparsebit_idx_t prev) 1158 + { 1159 + sparsebit_idx_t lowest_possible = prev + 1; 1160 + sparsebit_idx_t start; 1161 + struct node *nodep; 1162 + 1163 + /* A bit after the highest index can't be set. */ 1164 + if (lowest_possible == 0) 1165 + return 0; 1166 + 1167 + /* 1168 + * Find the leftmost 'candidate' overlapping or to the right 1169 + * of lowest_possible. 1170 + */ 1171 + struct node *candidate = NULL; 1172 + 1173 + /* True iff lowest_possible is within candidate */ 1174 + bool contains = false; 1175 + 1176 + /* 1177 + * Find node that describes setting of bit at lowest_possible. 1178 + * If such a node doesn't exist, find the node with the lowest 1179 + * starting index that is > lowest_possible. 1180 + */ 1181 + for (nodep = s->root; nodep;) { 1182 + if ((nodep->idx + MASK_BITS + nodep->num_after - 1) 1183 + >= lowest_possible) { 1184 + candidate = nodep; 1185 + if (candidate->idx <= lowest_possible) { 1186 + contains = true; 1187 + break; 1188 + } 1189 + nodep = nodep->left; 1190 + } else { 1191 + nodep = nodep->right; 1192 + } 1193 + } 1194 + if (!candidate) 1195 + return 0; 1196 + 1197 + assert(candidate->mask != 0); 1198 + 1199 + /* Does the candidate node describe the setting of lowest_possible? */ 1200 + if (!contains) { 1201 + /* 1202 + * Candidate doesn't describe setting of bit at lowest_possible. 1203 + * Candidate points to the first node with a starting index 1204 + * > lowest_possible. 1205 + */ 1206 + assert(candidate->idx > lowest_possible); 1207 + 1208 + return node_first_set(candidate, 0); 1209 + } 1210 + 1211 + /* 1212 + * Candidate describes setting of bit at lowest_possible. 1213 + * Note: although the node describes the setting of the bit 1214 + * at lowest_possible, its possible that its setting and the 1215 + * setting of all latter bits described by this node are 0. 1216 + * For now, just handle the cases where this node describes 1217 + * a bit at or after an index of lowest_possible that is set. 1218 + */ 1219 + start = lowest_possible - candidate->idx; 1220 + 1221 + if (start < MASK_BITS && candidate->mask >= (1 << start)) 1222 + return node_first_set(candidate, start); 1223 + 1224 + if (candidate->num_after) { 1225 + sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS; 1226 + 1227 + return lowest_possible < first_num_after_idx 1228 + ? first_num_after_idx : lowest_possible; 1229 + } 1230 + 1231 + /* 1232 + * Although candidate node describes setting of bit at 1233 + * the index of lowest_possible, all bits at that index and 1234 + * latter that are described by candidate are cleared. With 1235 + * this, the next bit is the first bit in the next node, if 1236 + * such a node exists. If a next node doesn't exist, then 1237 + * there is no next set bit. 1238 + */ 1239 + candidate = node_next(s, candidate); 1240 + if (!candidate) 1241 + return 0; 1242 + 1243 + return node_first_set(candidate, 0); 1244 + } 1245 + 1246 + /* Returns index of next bit cleared within s after the index given by prev. 1247 + * Returns 0 if there are no bits after prev that are cleared. 1248 + */ 1249 + sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s, 1250 + sparsebit_idx_t prev) 1251 + { 1252 + sparsebit_idx_t lowest_possible = prev + 1; 1253 + sparsebit_idx_t idx; 1254 + struct node *nodep1, *nodep2; 1255 + 1256 + /* A bit after the highest index can't be set. */ 1257 + if (lowest_possible == 0) 1258 + return 0; 1259 + 1260 + /* 1261 + * Does a node describing the setting of lowest_possible exist? 1262 + * If not, the bit at lowest_possible is cleared. 1263 + */ 1264 + nodep1 = node_find(s, lowest_possible); 1265 + if (!nodep1) 1266 + return lowest_possible; 1267 + 1268 + /* Does a mask bit in node 1 describe the next cleared bit. */ 1269 + for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++) 1270 + if (!(nodep1->mask & (1 << idx))) 1271 + return nodep1->idx + idx; 1272 + 1273 + /* 1274 + * Next cleared bit is not described by node 1. If there 1275 + * isn't a next node, then next cleared bit is described 1276 + * by bit after the bits described by the first node. 1277 + */ 1278 + nodep2 = node_next(s, nodep1); 1279 + if (!nodep2) 1280 + return nodep1->idx + MASK_BITS + nodep1->num_after; 1281 + 1282 + /* 1283 + * There is a second node. 1284 + * If it is not adjacent to the first node, then there is a gap 1285 + * of cleared bits between the nodes, and the next cleared bit 1286 + * is the first bit within the gap. 1287 + */ 1288 + if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx) 1289 + return nodep1->idx + MASK_BITS + nodep1->num_after; 1290 + 1291 + /* 1292 + * Second node is adjacent to the first node. 1293 + * Because it is adjacent, its mask should be non-zero. If all 1294 + * its mask bits are set, then with it being adjacent, it should 1295 + * have had the mask bits moved into the num_after setting of the 1296 + * previous node. 1297 + */ 1298 + return node_first_clear(nodep2, 0); 1299 + } 1300 + 1301 + /* Starting with the index 1 greater than the index given by start, finds 1302 + * and returns the index of the first sequence of num consecutively set 1303 + * bits. Returns a value of 0 of no such sequence exists. 1304 + */ 1305 + sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s, 1306 + sparsebit_idx_t start, sparsebit_num_t num) 1307 + { 1308 + sparsebit_idx_t idx; 1309 + 1310 + assert(num >= 1); 1311 + 1312 + for (idx = sparsebit_next_set(s, start); 1313 + idx != 0 && idx + num - 1 >= idx; 1314 + idx = sparsebit_next_set(s, idx)) { 1315 + assert(sparsebit_is_set(s, idx)); 1316 + 1317 + /* 1318 + * Does the sequence of bits starting at idx consist of 1319 + * num set bits? 1320 + */ 1321 + if (sparsebit_is_set_num(s, idx, num)) 1322 + return idx; 1323 + 1324 + /* 1325 + * Sequence of set bits at idx isn't large enough. 1326 + * Skip this entire sequence of set bits. 1327 + */ 1328 + idx = sparsebit_next_clear(s, idx); 1329 + if (idx == 0) 1330 + return 0; 1331 + } 1332 + 1333 + return 0; 1334 + } 1335 + 1336 + /* Starting with the index 1 greater than the index given by start, finds 1337 + * and returns the index of the first sequence of num consecutively cleared 1338 + * bits. Returns a value of 0 of no such sequence exists. 1339 + */ 1340 + sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s, 1341 + sparsebit_idx_t start, sparsebit_num_t num) 1342 + { 1343 + sparsebit_idx_t idx; 1344 + 1345 + assert(num >= 1); 1346 + 1347 + for (idx = sparsebit_next_clear(s, start); 1348 + idx != 0 && idx + num - 1 >= idx; 1349 + idx = sparsebit_next_clear(s, idx)) { 1350 + assert(sparsebit_is_clear(s, idx)); 1351 + 1352 + /* 1353 + * Does the sequence of bits starting at idx consist of 1354 + * num cleared bits? 1355 + */ 1356 + if (sparsebit_is_clear_num(s, idx, num)) 1357 + return idx; 1358 + 1359 + /* 1360 + * Sequence of cleared bits at idx isn't large enough. 1361 + * Skip this entire sequence of cleared bits. 1362 + */ 1363 + idx = sparsebit_next_set(s, idx); 1364 + if (idx == 0) 1365 + return 0; 1366 + } 1367 + 1368 + return 0; 1369 + } 1370 + 1371 + /* Sets the bits * in the inclusive range idx through idx + num - 1. */ 1372 + void sparsebit_set_num(struct sparsebit *s, 1373 + sparsebit_idx_t start, sparsebit_num_t num) 1374 + { 1375 + struct node *nodep, *next; 1376 + unsigned int n1; 1377 + sparsebit_idx_t idx; 1378 + sparsebit_num_t n; 1379 + sparsebit_idx_t middle_start, middle_end; 1380 + 1381 + assert(num > 0); 1382 + assert(start + num - 1 >= start); 1383 + 1384 + /* 1385 + * Leading - bits before first mask boundary. 1386 + * 1387 + * TODO(lhuemill): With some effort it may be possible to 1388 + * replace the following loop with a sequential sequence 1389 + * of statements. High level sequence would be: 1390 + * 1391 + * 1. Use node_split() to force node that describes setting 1392 + * of idx to be within the mask portion of a node. 1393 + * 2. Form mask of bits to be set. 1394 + * 3. Determine number of mask bits already set in the node 1395 + * and store in a local variable named num_already_set. 1396 + * 4. Set the appropriate mask bits within the node. 1397 + * 5. Increment struct sparsebit_pvt num_set member 1398 + * by the number of bits that were actually set. 1399 + * Exclude from the counts bits that were already set. 1400 + * 6. Before returning to the caller, use node_reduce() to 1401 + * handle the multiple corner cases that this method 1402 + * introduces. 1403 + */ 1404 + for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--) 1405 + bit_set(s, idx); 1406 + 1407 + /* Middle - bits spanning one or more entire mask */ 1408 + middle_start = idx; 1409 + middle_end = middle_start + (n & -MASK_BITS) - 1; 1410 + if (n >= MASK_BITS) { 1411 + nodep = node_split(s, middle_start); 1412 + 1413 + /* 1414 + * As needed, split just after end of middle bits. 1415 + * No split needed if end of middle bits is at highest 1416 + * supported bit index. 1417 + */ 1418 + if (middle_end + 1 > middle_end) 1419 + (void) node_split(s, middle_end + 1); 1420 + 1421 + /* Delete nodes that only describe bits within the middle. */ 1422 + for (next = node_next(s, nodep); 1423 + next && (next->idx < middle_end); 1424 + next = node_next(s, nodep)) { 1425 + assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end); 1426 + node_rm(s, next); 1427 + next = NULL; 1428 + } 1429 + 1430 + /* As needed set each of the mask bits */ 1431 + for (n1 = 0; n1 < MASK_BITS; n1++) { 1432 + if (!(nodep->mask & (1 << n1))) { 1433 + nodep->mask |= 1 << n1; 1434 + s->num_set++; 1435 + } 1436 + } 1437 + 1438 + s->num_set -= nodep->num_after; 1439 + nodep->num_after = middle_end - middle_start + 1 - MASK_BITS; 1440 + s->num_set += nodep->num_after; 1441 + 1442 + node_reduce(s, nodep); 1443 + } 1444 + idx = middle_end + 1; 1445 + n -= middle_end - middle_start + 1; 1446 + 1447 + /* Trailing - bits at and beyond last mask boundary */ 1448 + assert(n < MASK_BITS); 1449 + for (; n > 0; idx++, n--) 1450 + bit_set(s, idx); 1451 + } 1452 + 1453 + /* Clears the bits * in the inclusive range idx through idx + num - 1. */ 1454 + void sparsebit_clear_num(struct sparsebit *s, 1455 + sparsebit_idx_t start, sparsebit_num_t num) 1456 + { 1457 + struct node *nodep, *next; 1458 + unsigned int n1; 1459 + sparsebit_idx_t idx; 1460 + sparsebit_num_t n; 1461 + sparsebit_idx_t middle_start, middle_end; 1462 + 1463 + assert(num > 0); 1464 + assert(start + num - 1 >= start); 1465 + 1466 + /* Leading - bits before first mask boundary */ 1467 + for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--) 1468 + bit_clear(s, idx); 1469 + 1470 + /* Middle - bits spanning one or more entire mask */ 1471 + middle_start = idx; 1472 + middle_end = middle_start + (n & -MASK_BITS) - 1; 1473 + if (n >= MASK_BITS) { 1474 + nodep = node_split(s, middle_start); 1475 + 1476 + /* 1477 + * As needed, split just after end of middle bits. 1478 + * No split needed if end of middle bits is at highest 1479 + * supported bit index. 1480 + */ 1481 + if (middle_end + 1 > middle_end) 1482 + (void) node_split(s, middle_end + 1); 1483 + 1484 + /* Delete nodes that only describe bits within the middle. */ 1485 + for (next = node_next(s, nodep); 1486 + next && (next->idx < middle_end); 1487 + next = node_next(s, nodep)) { 1488 + assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end); 1489 + node_rm(s, next); 1490 + next = NULL; 1491 + } 1492 + 1493 + /* As needed clear each of the mask bits */ 1494 + for (n1 = 0; n1 < MASK_BITS; n1++) { 1495 + if (nodep->mask & (1 << n1)) { 1496 + nodep->mask &= ~(1 << n1); 1497 + s->num_set--; 1498 + } 1499 + } 1500 + 1501 + /* Clear any bits described by num_after */ 1502 + s->num_set -= nodep->num_after; 1503 + nodep->num_after = 0; 1504 + 1505 + /* 1506 + * Delete the node that describes the beginning of 1507 + * the middle bits and perform any allowed reductions 1508 + * with the nodes prev or next of nodep. 1509 + */ 1510 + node_reduce(s, nodep); 1511 + nodep = NULL; 1512 + } 1513 + idx = middle_end + 1; 1514 + n -= middle_end - middle_start + 1; 1515 + 1516 + /* Trailing - bits at and beyond last mask boundary */ 1517 + assert(n < MASK_BITS); 1518 + for (; n > 0; idx++, n--) 1519 + bit_clear(s, idx); 1520 + } 1521 + 1522 + /* Sets the bit at the index given by idx. */ 1523 + void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx) 1524 + { 1525 + sparsebit_set_num(s, idx, 1); 1526 + } 1527 + 1528 + /* Clears the bit at the index given by idx. */ 1529 + void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx) 1530 + { 1531 + sparsebit_clear_num(s, idx, 1); 1532 + } 1533 + 1534 + /* Sets the bits in the entire addressable range of the sparsebit array. */ 1535 + void sparsebit_set_all(struct sparsebit *s) 1536 + { 1537 + sparsebit_set(s, 0); 1538 + sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0); 1539 + assert(sparsebit_all_set(s)); 1540 + } 1541 + 1542 + /* Clears the bits in the entire addressable range of the sparsebit array. */ 1543 + void sparsebit_clear_all(struct sparsebit *s) 1544 + { 1545 + sparsebit_clear(s, 0); 1546 + sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0); 1547 + assert(!sparsebit_any_set(s)); 1548 + } 1549 + 1550 + static size_t display_range(FILE *stream, sparsebit_idx_t low, 1551 + sparsebit_idx_t high, bool prepend_comma_space) 1552 + { 1553 + char *fmt_str; 1554 + size_t sz; 1555 + 1556 + /* Determine the printf format string */ 1557 + if (low == high) 1558 + fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx"; 1559 + else 1560 + fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx"; 1561 + 1562 + /* 1563 + * When stream is NULL, just determine the size of what would 1564 + * have been printed, else print the range. 1565 + */ 1566 + if (!stream) 1567 + sz = snprintf(NULL, 0, fmt_str, low, high); 1568 + else 1569 + sz = fprintf(stream, fmt_str, low, high); 1570 + 1571 + return sz; 1572 + } 1573 + 1574 + 1575 + /* Dumps to the FILE stream given by stream, the bit settings 1576 + * of s. Each line of output is prefixed with the number of 1577 + * spaces given by indent. The length of each line is implementation 1578 + * dependent and does not depend on the indent amount. The following 1579 + * is an example output of a sparsebit array that has bits: 1580 + * 1581 + * 0x5, 0x8, 0xa:0xe, 0x12 1582 + * 1583 + * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18 1584 + * are set. Note that a ':', instead of a '-' is used to specify a range of 1585 + * contiguous bits. This is done because '-' is used to specify command-line 1586 + * options, and sometimes ranges are specified as command-line arguments. 1587 + */ 1588 + void sparsebit_dump(FILE *stream, struct sparsebit *s, 1589 + unsigned int indent) 1590 + { 1591 + size_t current_line_len = 0; 1592 + size_t sz; 1593 + struct node *nodep; 1594 + 1595 + if (!sparsebit_any_set(s)) 1596 + return; 1597 + 1598 + /* Display initial indent */ 1599 + fprintf(stream, "%*s", indent, ""); 1600 + 1601 + /* For each node */ 1602 + for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) { 1603 + unsigned int n1; 1604 + sparsebit_idx_t low, high; 1605 + 1606 + /* For each group of bits in the mask */ 1607 + for (n1 = 0; n1 < MASK_BITS; n1++) { 1608 + if (nodep->mask & (1 << n1)) { 1609 + low = high = nodep->idx + n1; 1610 + 1611 + for (; n1 < MASK_BITS; n1++) { 1612 + if (nodep->mask & (1 << n1)) 1613 + high = nodep->idx + n1; 1614 + else 1615 + break; 1616 + } 1617 + 1618 + if ((n1 == MASK_BITS) && nodep->num_after) 1619 + high += nodep->num_after; 1620 + 1621 + /* 1622 + * How much room will it take to display 1623 + * this range. 1624 + */ 1625 + sz = display_range(NULL, low, high, 1626 + current_line_len != 0); 1627 + 1628 + /* 1629 + * If there is not enough room, display 1630 + * a newline plus the indent of the next 1631 + * line. 1632 + */ 1633 + if (current_line_len + sz > DUMP_LINE_MAX) { 1634 + fputs("\n", stream); 1635 + fprintf(stream, "%*s", indent, ""); 1636 + current_line_len = 0; 1637 + } 1638 + 1639 + /* Display the range */ 1640 + sz = display_range(stream, low, high, 1641 + current_line_len != 0); 1642 + current_line_len += sz; 1643 + } 1644 + } 1645 + 1646 + /* 1647 + * If num_after and most significant-bit of mask is not 1648 + * set, then still need to display a range for the bits 1649 + * described by num_after. 1650 + */ 1651 + if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) { 1652 + low = nodep->idx + MASK_BITS; 1653 + high = nodep->idx + MASK_BITS + nodep->num_after - 1; 1654 + 1655 + /* 1656 + * How much room will it take to display 1657 + * this range. 1658 + */ 1659 + sz = display_range(NULL, low, high, 1660 + current_line_len != 0); 1661 + 1662 + /* 1663 + * If there is not enough room, display 1664 + * a newline plus the indent of the next 1665 + * line. 1666 + */ 1667 + if (current_line_len + sz > DUMP_LINE_MAX) { 1668 + fputs("\n", stream); 1669 + fprintf(stream, "%*s", indent, ""); 1670 + current_line_len = 0; 1671 + } 1672 + 1673 + /* Display the range */ 1674 + sz = display_range(stream, low, high, 1675 + current_line_len != 0); 1676 + current_line_len += sz; 1677 + } 1678 + } 1679 + fputs("\n", stream); 1680 + } 1681 + 1682 + /* Validates the internal state of the sparsebit array given by 1683 + * s. On error, diagnostic information is printed to stderr and 1684 + * abort is called. 1685 + */ 1686 + void sparsebit_validate_internal(struct sparsebit *s) 1687 + { 1688 + bool error_detected = false; 1689 + struct node *nodep, *prev = NULL; 1690 + sparsebit_num_t total_bits_set = 0; 1691 + unsigned int n1; 1692 + 1693 + /* For each node */ 1694 + for (nodep = node_first(s); nodep; 1695 + prev = nodep, nodep = node_next(s, nodep)) { 1696 + 1697 + /* 1698 + * Increase total bits set by the number of bits set 1699 + * in this node. 1700 + */ 1701 + for (n1 = 0; n1 < MASK_BITS; n1++) 1702 + if (nodep->mask & (1 << n1)) 1703 + total_bits_set++; 1704 + 1705 + total_bits_set += nodep->num_after; 1706 + 1707 + /* 1708 + * Arbitrary choice as to whether a mask of 0 is allowed 1709 + * or not. For diagnostic purposes it is beneficial to 1710 + * have only one valid means to represent a set of bits. 1711 + * To support this an arbitrary choice has been made 1712 + * to not allow a mask of zero. 1713 + */ 1714 + if (nodep->mask == 0) { 1715 + fprintf(stderr, "Node mask of zero, " 1716 + "nodep: %p nodep->mask: 0x%x", 1717 + nodep, nodep->mask); 1718 + error_detected = true; 1719 + break; 1720 + } 1721 + 1722 + /* 1723 + * Validate num_after is not greater than the max index 1724 + * - the number of mask bits. The num_after member 1725 + * uses 0-based indexing and thus has no value that 1726 + * represents all bits set. This limitation is handled 1727 + * by requiring a non-zero mask. With a non-zero mask, 1728 + * MASK_BITS worth of bits are described by the mask, 1729 + * which makes the largest needed num_after equal to: 1730 + * 1731 + * (~(sparsebit_num_t) 0) - MASK_BITS + 1 1732 + */ 1733 + if (nodep->num_after 1734 + > (~(sparsebit_num_t) 0) - MASK_BITS + 1) { 1735 + fprintf(stderr, "num_after too large, " 1736 + "nodep: %p nodep->num_after: 0x%lx", 1737 + nodep, nodep->num_after); 1738 + error_detected = true; 1739 + break; 1740 + } 1741 + 1742 + /* Validate node index is divisible by the mask size */ 1743 + if (nodep->idx % MASK_BITS) { 1744 + fprintf(stderr, "Node index not divisable by " 1745 + "mask size,\n" 1746 + " nodep: %p nodep->idx: 0x%lx " 1747 + "MASK_BITS: %lu\n", 1748 + nodep, nodep->idx, MASK_BITS); 1749 + error_detected = true; 1750 + break; 1751 + } 1752 + 1753 + /* 1754 + * Validate bits described by node don't wrap beyond the 1755 + * highest supported index. 1756 + */ 1757 + if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) { 1758 + fprintf(stderr, "Bits described by node wrap " 1759 + "beyond highest supported index,\n" 1760 + " nodep: %p nodep->idx: 0x%lx\n" 1761 + " MASK_BITS: %lu nodep->num_after: 0x%lx", 1762 + nodep, nodep->idx, MASK_BITS, nodep->num_after); 1763 + error_detected = true; 1764 + break; 1765 + } 1766 + 1767 + /* Check parent pointers. */ 1768 + if (nodep->left) { 1769 + if (nodep->left->parent != nodep) { 1770 + fprintf(stderr, "Left child parent pointer " 1771 + "doesn't point to this node,\n" 1772 + " nodep: %p nodep->left: %p " 1773 + "nodep->left->parent: %p", 1774 + nodep, nodep->left, 1775 + nodep->left->parent); 1776 + error_detected = true; 1777 + break; 1778 + } 1779 + } 1780 + 1781 + if (nodep->right) { 1782 + if (nodep->right->parent != nodep) { 1783 + fprintf(stderr, "Right child parent pointer " 1784 + "doesn't point to this node,\n" 1785 + " nodep: %p nodep->right: %p " 1786 + "nodep->right->parent: %p", 1787 + nodep, nodep->right, 1788 + nodep->right->parent); 1789 + error_detected = true; 1790 + break; 1791 + } 1792 + } 1793 + 1794 + if (!nodep->parent) { 1795 + if (s->root != nodep) { 1796 + fprintf(stderr, "Unexpected root node, " 1797 + "s->root: %p nodep: %p", 1798 + s->root, nodep); 1799 + error_detected = true; 1800 + break; 1801 + } 1802 + } 1803 + 1804 + if (prev) { 1805 + /* 1806 + * Is index of previous node before index of 1807 + * current node? 1808 + */ 1809 + if (prev->idx >= nodep->idx) { 1810 + fprintf(stderr, "Previous node index " 1811 + ">= current node index,\n" 1812 + " prev: %p prev->idx: 0x%lx\n" 1813 + " nodep: %p nodep->idx: 0x%lx", 1814 + prev, prev->idx, nodep, nodep->idx); 1815 + error_detected = true; 1816 + break; 1817 + } 1818 + 1819 + /* 1820 + * Nodes occur in asscending order, based on each 1821 + * nodes starting index. 1822 + */ 1823 + if ((prev->idx + MASK_BITS + prev->num_after - 1) 1824 + >= nodep->idx) { 1825 + fprintf(stderr, "Previous node bit range " 1826 + "overlap with current node bit range,\n" 1827 + " prev: %p prev->idx: 0x%lx " 1828 + "prev->num_after: 0x%lx\n" 1829 + " nodep: %p nodep->idx: 0x%lx " 1830 + "nodep->num_after: 0x%lx\n" 1831 + " MASK_BITS: %lu", 1832 + prev, prev->idx, prev->num_after, 1833 + nodep, nodep->idx, nodep->num_after, 1834 + MASK_BITS); 1835 + error_detected = true; 1836 + break; 1837 + } 1838 + 1839 + /* 1840 + * When the node has all mask bits set, it shouldn't 1841 + * be adjacent to the last bit described by the 1842 + * previous node. 1843 + */ 1844 + if (nodep->mask == ~(mask_t) 0 && 1845 + prev->idx + MASK_BITS + prev->num_after == nodep->idx) { 1846 + fprintf(stderr, "Current node has mask with " 1847 + "all bits set and is adjacent to the " 1848 + "previous node,\n" 1849 + " prev: %p prev->idx: 0x%lx " 1850 + "prev->num_after: 0x%lx\n" 1851 + " nodep: %p nodep->idx: 0x%lx " 1852 + "nodep->num_after: 0x%lx\n" 1853 + " MASK_BITS: %lu", 1854 + prev, prev->idx, prev->num_after, 1855 + nodep, nodep->idx, nodep->num_after, 1856 + MASK_BITS); 1857 + 1858 + error_detected = true; 1859 + break; 1860 + } 1861 + } 1862 + } 1863 + 1864 + if (!error_detected) { 1865 + /* 1866 + * Is sum of bits set in each node equal to the count 1867 + * of total bits set. 1868 + */ 1869 + if (s->num_set != total_bits_set) { 1870 + fprintf(stderr, "Number of bits set missmatch,\n" 1871 + " s->num_set: 0x%lx total_bits_set: 0x%lx", 1872 + s->num_set, total_bits_set); 1873 + 1874 + error_detected = true; 1875 + } 1876 + } 1877 + 1878 + if (error_detected) { 1879 + fputs(" dump_internal:\n", stderr); 1880 + sparsebit_dump_internal(stderr, s, 4); 1881 + abort(); 1882 + } 1883 + } 1884 + 1885 + 1886 + #ifdef FUZZ 1887 + /* A simple but effective fuzzing driver. Look for bugs with the help 1888 + * of some invariants and of a trivial representation of sparsebit. 1889 + * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let 1890 + * afl-fuzz do the magic. :) 1891 + */ 1892 + 1893 + #include <stdlib.h> 1894 + #include <assert.h> 1895 + 1896 + struct range { 1897 + sparsebit_idx_t first, last; 1898 + bool set; 1899 + }; 1900 + 1901 + struct sparsebit *s; 1902 + struct range ranges[1000]; 1903 + int num_ranges; 1904 + 1905 + static bool get_value(sparsebit_idx_t idx) 1906 + { 1907 + int i; 1908 + 1909 + for (i = num_ranges; --i >= 0; ) 1910 + if (ranges[i].first <= idx && idx <= ranges[i].last) 1911 + return ranges[i].set; 1912 + 1913 + return false; 1914 + } 1915 + 1916 + static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last) 1917 + { 1918 + sparsebit_num_t num; 1919 + sparsebit_idx_t next; 1920 + 1921 + if (first < last) { 1922 + num = last - first + 1; 1923 + } else { 1924 + num = first - last + 1; 1925 + first = last; 1926 + last = first + num - 1; 1927 + } 1928 + 1929 + switch (code) { 1930 + case 0: 1931 + sparsebit_set(s, first); 1932 + assert(sparsebit_is_set(s, first)); 1933 + assert(!sparsebit_is_clear(s, first)); 1934 + assert(sparsebit_any_set(s)); 1935 + assert(!sparsebit_all_clear(s)); 1936 + if (get_value(first)) 1937 + return; 1938 + if (num_ranges == 1000) 1939 + exit(0); 1940 + ranges[num_ranges++] = (struct range) 1941 + { .first = first, .last = first, .set = true }; 1942 + break; 1943 + case 1: 1944 + sparsebit_clear(s, first); 1945 + assert(!sparsebit_is_set(s, first)); 1946 + assert(sparsebit_is_clear(s, first)); 1947 + assert(sparsebit_any_clear(s)); 1948 + assert(!sparsebit_all_set(s)); 1949 + if (!get_value(first)) 1950 + return; 1951 + if (num_ranges == 1000) 1952 + exit(0); 1953 + ranges[num_ranges++] = (struct range) 1954 + { .first = first, .last = first, .set = false }; 1955 + break; 1956 + case 2: 1957 + assert(sparsebit_is_set(s, first) == get_value(first)); 1958 + assert(sparsebit_is_clear(s, first) == !get_value(first)); 1959 + break; 1960 + case 3: 1961 + if (sparsebit_any_set(s)) 1962 + assert(get_value(sparsebit_first_set(s))); 1963 + if (sparsebit_any_clear(s)) 1964 + assert(!get_value(sparsebit_first_clear(s))); 1965 + sparsebit_set_all(s); 1966 + assert(!sparsebit_any_clear(s)); 1967 + assert(sparsebit_all_set(s)); 1968 + num_ranges = 0; 1969 + ranges[num_ranges++] = (struct range) 1970 + { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true }; 1971 + break; 1972 + case 4: 1973 + if (sparsebit_any_set(s)) 1974 + assert(get_value(sparsebit_first_set(s))); 1975 + if (sparsebit_any_clear(s)) 1976 + assert(!get_value(sparsebit_first_clear(s))); 1977 + sparsebit_clear_all(s); 1978 + assert(!sparsebit_any_set(s)); 1979 + assert(sparsebit_all_clear(s)); 1980 + num_ranges = 0; 1981 + break; 1982 + case 5: 1983 + next = sparsebit_next_set(s, first); 1984 + assert(next == 0 || next > first); 1985 + assert(next == 0 || get_value(next)); 1986 + break; 1987 + case 6: 1988 + next = sparsebit_next_clear(s, first); 1989 + assert(next == 0 || next > first); 1990 + assert(next == 0 || !get_value(next)); 1991 + break; 1992 + case 7: 1993 + next = sparsebit_next_clear(s, first); 1994 + if (sparsebit_is_set_num(s, first, num)) { 1995 + assert(next == 0 || next > last); 1996 + if (first) 1997 + next = sparsebit_next_set(s, first - 1); 1998 + else if (sparsebit_any_set(s)) 1999 + next = sparsebit_first_set(s); 2000 + else 2001 + return; 2002 + assert(next == first); 2003 + } else { 2004 + assert(sparsebit_is_clear(s, first) || next <= last); 2005 + } 2006 + break; 2007 + case 8: 2008 + next = sparsebit_next_set(s, first); 2009 + if (sparsebit_is_clear_num(s, first, num)) { 2010 + assert(next == 0 || next > last); 2011 + if (first) 2012 + next = sparsebit_next_clear(s, first - 1); 2013 + else if (sparsebit_any_clear(s)) 2014 + next = sparsebit_first_clear(s); 2015 + else 2016 + return; 2017 + assert(next == first); 2018 + } else { 2019 + assert(sparsebit_is_set(s, first) || next <= last); 2020 + } 2021 + break; 2022 + case 9: 2023 + sparsebit_set_num(s, first, num); 2024 + assert(sparsebit_is_set_num(s, first, num)); 2025 + assert(!sparsebit_is_clear_num(s, first, num)); 2026 + assert(sparsebit_any_set(s)); 2027 + assert(!sparsebit_all_clear(s)); 2028 + if (num_ranges == 1000) 2029 + exit(0); 2030 + ranges[num_ranges++] = (struct range) 2031 + { .first = first, .last = last, .set = true }; 2032 + break; 2033 + case 10: 2034 + sparsebit_clear_num(s, first, num); 2035 + assert(!sparsebit_is_set_num(s, first, num)); 2036 + assert(sparsebit_is_clear_num(s, first, num)); 2037 + assert(sparsebit_any_clear(s)); 2038 + assert(!sparsebit_all_set(s)); 2039 + if (num_ranges == 1000) 2040 + exit(0); 2041 + ranges[num_ranges++] = (struct range) 2042 + { .first = first, .last = last, .set = false }; 2043 + break; 2044 + case 11: 2045 + sparsebit_validate_internal(s); 2046 + break; 2047 + default: 2048 + break; 2049 + } 2050 + } 2051 + 2052 + unsigned char get8(void) 2053 + { 2054 + int ch; 2055 + 2056 + ch = getchar(); 2057 + if (ch == EOF) 2058 + exit(0); 2059 + return ch; 2060 + } 2061 + 2062 + uint64_t get64(void) 2063 + { 2064 + uint64_t x; 2065 + 2066 + x = get8(); 2067 + x = (x << 8) | get8(); 2068 + x = (x << 8) | get8(); 2069 + x = (x << 8) | get8(); 2070 + x = (x << 8) | get8(); 2071 + x = (x << 8) | get8(); 2072 + x = (x << 8) | get8(); 2073 + return (x << 8) | get8(); 2074 + } 2075 + 2076 + int main(void) 2077 + { 2078 + s = sparsebit_alloc(); 2079 + for (;;) { 2080 + uint8_t op = get8() & 0xf; 2081 + uint64_t first = get64(); 2082 + uint64_t last = get64(); 2083 + 2084 + operate(op, first, last); 2085 + } 2086 + } 2087 + #endif
+697
tools/testing/selftests/kvm/lib/x86.c
··· 1 + /* 2 + * tools/testing/selftests/kvm/lib/x86.c 3 + * 4 + * Copyright (C) 2018, Google LLC. 5 + * 6 + * This work is licensed under the terms of the GNU GPL, version 2. 7 + */ 8 + 9 + #define _GNU_SOURCE /* for program_invocation_name */ 10 + 11 + #include "test_util.h" 12 + #include "kvm_util.h" 13 + #include "kvm_util_internal.h" 14 + #include "x86.h" 15 + 16 + /* Minimum physical address used for virtual translation tables. */ 17 + #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 18 + 19 + /* Virtual translation table structure declarations */ 20 + struct pageMapL4Entry { 21 + uint64_t present:1; 22 + uint64_t writable:1; 23 + uint64_t user:1; 24 + uint64_t write_through:1; 25 + uint64_t cache_disable:1; 26 + uint64_t accessed:1; 27 + uint64_t ignored_06:1; 28 + uint64_t page_size:1; 29 + uint64_t ignored_11_08:4; 30 + uint64_t address:40; 31 + uint64_t ignored_62_52:11; 32 + uint64_t execute_disable:1; 33 + }; 34 + 35 + struct pageDirectoryPointerEntry { 36 + uint64_t present:1; 37 + uint64_t writable:1; 38 + uint64_t user:1; 39 + uint64_t write_through:1; 40 + uint64_t cache_disable:1; 41 + uint64_t accessed:1; 42 + uint64_t ignored_06:1; 43 + uint64_t page_size:1; 44 + uint64_t ignored_11_08:4; 45 + uint64_t address:40; 46 + uint64_t ignored_62_52:11; 47 + uint64_t execute_disable:1; 48 + }; 49 + 50 + struct pageDirectoryEntry { 51 + uint64_t present:1; 52 + uint64_t writable:1; 53 + uint64_t user:1; 54 + uint64_t write_through:1; 55 + uint64_t cache_disable:1; 56 + uint64_t accessed:1; 57 + uint64_t ignored_06:1; 58 + uint64_t page_size:1; 59 + uint64_t ignored_11_08:4; 60 + uint64_t address:40; 61 + uint64_t ignored_62_52:11; 62 + uint64_t execute_disable:1; 63 + }; 64 + 65 + struct pageTableEntry { 66 + uint64_t present:1; 67 + uint64_t writable:1; 68 + uint64_t user:1; 69 + uint64_t write_through:1; 70 + uint64_t cache_disable:1; 71 + uint64_t accessed:1; 72 + uint64_t dirty:1; 73 + uint64_t reserved_07:1; 74 + uint64_t global:1; 75 + uint64_t ignored_11_09:3; 76 + uint64_t address:40; 77 + uint64_t ignored_62_52:11; 78 + uint64_t execute_disable:1; 79 + }; 80 + 81 + /* Register Dump 82 + * 83 + * Input Args: 84 + * indent - Left margin indent amount 85 + * regs - register 86 + * 87 + * Output Args: 88 + * stream - Output FILE stream 89 + * 90 + * Return: None 91 + * 92 + * Dumps the state of the registers given by regs, to the FILE stream 93 + * given by steam. 94 + */ 95 + void regs_dump(FILE *stream, struct kvm_regs *regs, 96 + uint8_t indent) 97 + { 98 + fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " 99 + "rcx: 0x%.16llx rdx: 0x%.16llx\n", 100 + indent, "", 101 + regs->rax, regs->rbx, regs->rcx, regs->rdx); 102 + fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " 103 + "rsp: 0x%.16llx rbp: 0x%.16llx\n", 104 + indent, "", 105 + regs->rsi, regs->rdi, regs->rsp, regs->rbp); 106 + fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " 107 + "r10: 0x%.16llx r11: 0x%.16llx\n", 108 + indent, "", 109 + regs->r8, regs->r9, regs->r10, regs->r11); 110 + fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " 111 + "r14: 0x%.16llx r15: 0x%.16llx\n", 112 + indent, "", 113 + regs->r12, regs->r13, regs->r14, regs->r15); 114 + fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", 115 + indent, "", 116 + regs->rip, regs->rflags); 117 + } 118 + 119 + /* Segment Dump 120 + * 121 + * Input Args: 122 + * indent - Left margin indent amount 123 + * segment - KVM segment 124 + * 125 + * Output Args: 126 + * stream - Output FILE stream 127 + * 128 + * Return: None 129 + * 130 + * Dumps the state of the KVM segment given by segment, to the FILE stream 131 + * given by steam. 132 + */ 133 + static void segment_dump(FILE *stream, struct kvm_segment *segment, 134 + uint8_t indent) 135 + { 136 + fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " 137 + "selector: 0x%.4x type: 0x%.2x\n", 138 + indent, "", segment->base, segment->limit, 139 + segment->selector, segment->type); 140 + fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " 141 + "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", 142 + indent, "", segment->present, segment->dpl, 143 + segment->db, segment->s, segment->l); 144 + fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " 145 + "unusable: 0x%.2x padding: 0x%.2x\n", 146 + indent, "", segment->g, segment->avl, 147 + segment->unusable, segment->padding); 148 + } 149 + 150 + /* dtable Dump 151 + * 152 + * Input Args: 153 + * indent - Left margin indent amount 154 + * dtable - KVM dtable 155 + * 156 + * Output Args: 157 + * stream - Output FILE stream 158 + * 159 + * Return: None 160 + * 161 + * Dumps the state of the KVM dtable given by dtable, to the FILE stream 162 + * given by steam. 163 + */ 164 + static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, 165 + uint8_t indent) 166 + { 167 + fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " 168 + "padding: 0x%.4x 0x%.4x 0x%.4x\n", 169 + indent, "", dtable->base, dtable->limit, 170 + dtable->padding[0], dtable->padding[1], dtable->padding[2]); 171 + } 172 + 173 + /* System Register Dump 174 + * 175 + * Input Args: 176 + * indent - Left margin indent amount 177 + * sregs - System registers 178 + * 179 + * Output Args: 180 + * stream - Output FILE stream 181 + * 182 + * Return: None 183 + * 184 + * Dumps the state of the system registers given by sregs, to the FILE stream 185 + * given by steam. 186 + */ 187 + void sregs_dump(FILE *stream, struct kvm_sregs *sregs, 188 + uint8_t indent) 189 + { 190 + unsigned int i; 191 + 192 + fprintf(stream, "%*scs:\n", indent, ""); 193 + segment_dump(stream, &sregs->cs, indent + 2); 194 + fprintf(stream, "%*sds:\n", indent, ""); 195 + segment_dump(stream, &sregs->ds, indent + 2); 196 + fprintf(stream, "%*ses:\n", indent, ""); 197 + segment_dump(stream, &sregs->es, indent + 2); 198 + fprintf(stream, "%*sfs:\n", indent, ""); 199 + segment_dump(stream, &sregs->fs, indent + 2); 200 + fprintf(stream, "%*sgs:\n", indent, ""); 201 + segment_dump(stream, &sregs->gs, indent + 2); 202 + fprintf(stream, "%*sss:\n", indent, ""); 203 + segment_dump(stream, &sregs->ss, indent + 2); 204 + fprintf(stream, "%*str:\n", indent, ""); 205 + segment_dump(stream, &sregs->tr, indent + 2); 206 + fprintf(stream, "%*sldt:\n", indent, ""); 207 + segment_dump(stream, &sregs->ldt, indent + 2); 208 + 209 + fprintf(stream, "%*sgdt:\n", indent, ""); 210 + dtable_dump(stream, &sregs->gdt, indent + 2); 211 + fprintf(stream, "%*sidt:\n", indent, ""); 212 + dtable_dump(stream, &sregs->idt, indent + 2); 213 + 214 + fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " 215 + "cr3: 0x%.16llx cr4: 0x%.16llx\n", 216 + indent, "", 217 + sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); 218 + fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " 219 + "apic_base: 0x%.16llx\n", 220 + indent, "", 221 + sregs->cr8, sregs->efer, sregs->apic_base); 222 + 223 + fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); 224 + for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { 225 + fprintf(stream, "%*s%.16llx\n", indent + 2, "", 226 + sregs->interrupt_bitmap[i]); 227 + } 228 + } 229 + 230 + void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) 231 + { 232 + int rc; 233 + 234 + TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " 235 + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 236 + 237 + /* If needed, create page map l4 table. */ 238 + if (!vm->pgd_created) { 239 + vm_paddr_t paddr = vm_phy_page_alloc(vm, 240 + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); 241 + vm->pgd = paddr; 242 + 243 + /* Set pointer to pgd tables in all the VCPUs that 244 + * have already been created. Future VCPUs will have 245 + * the value set as each one is created. 246 + */ 247 + for (struct vcpu *vcpu = vm->vcpu_head; vcpu; 248 + vcpu = vcpu->next) { 249 + struct kvm_sregs sregs; 250 + 251 + /* Obtain the current system register settings */ 252 + vcpu_sregs_get(vm, vcpu->id, &sregs); 253 + 254 + /* Set and store the pointer to the start of the 255 + * pgd tables. 256 + */ 257 + sregs.cr3 = vm->pgd; 258 + vcpu_sregs_set(vm, vcpu->id, &sregs); 259 + } 260 + 261 + vm->pgd_created = true; 262 + } 263 + } 264 + 265 + /* VM Virtual Page Map 266 + * 267 + * Input Args: 268 + * vm - Virtual Machine 269 + * vaddr - VM Virtual Address 270 + * paddr - VM Physical Address 271 + * pgd_memslot - Memory region slot for new virtual translation tables 272 + * 273 + * Output Args: None 274 + * 275 + * Return: None 276 + * 277 + * Within the VM given by vm, creates a virtual translation for the page 278 + * starting at vaddr to the page starting at paddr. 279 + */ 280 + void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 281 + uint32_t pgd_memslot) 282 + { 283 + uint16_t index[4]; 284 + struct pageMapL4Entry *pml4e; 285 + 286 + TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " 287 + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 288 + 289 + TEST_ASSERT((vaddr % vm->page_size) == 0, 290 + "Virtual address not on page boundary,\n" 291 + " vaddr: 0x%lx vm->page_size: 0x%x", 292 + vaddr, vm->page_size); 293 + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, 294 + (vaddr >> vm->page_shift)), 295 + "Invalid virtual address, vaddr: 0x%lx", 296 + vaddr); 297 + TEST_ASSERT((paddr % vm->page_size) == 0, 298 + "Physical address not on page boundary,\n" 299 + " paddr: 0x%lx vm->page_size: 0x%x", 300 + paddr, vm->page_size); 301 + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, 302 + "Physical address beyond beyond maximum supported,\n" 303 + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 304 + paddr, vm->max_gfn, vm->page_size); 305 + 306 + index[0] = (vaddr >> 12) & 0x1ffu; 307 + index[1] = (vaddr >> 21) & 0x1ffu; 308 + index[2] = (vaddr >> 30) & 0x1ffu; 309 + index[3] = (vaddr >> 39) & 0x1ffu; 310 + 311 + /* Allocate page directory pointer table if not present. */ 312 + pml4e = addr_gpa2hva(vm, vm->pgd); 313 + if (!pml4e[index[3]].present) { 314 + pml4e[index[3]].address = vm_phy_page_alloc(vm, 315 + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 316 + >> vm->page_shift; 317 + pml4e[index[3]].writable = true; 318 + pml4e[index[3]].present = true; 319 + } 320 + 321 + /* Allocate page directory table if not present. */ 322 + struct pageDirectoryPointerEntry *pdpe; 323 + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); 324 + if (!pdpe[index[2]].present) { 325 + pdpe[index[2]].address = vm_phy_page_alloc(vm, 326 + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 327 + >> vm->page_shift; 328 + pdpe[index[2]].writable = true; 329 + pdpe[index[2]].present = true; 330 + } 331 + 332 + /* Allocate page table if not present. */ 333 + struct pageDirectoryEntry *pde; 334 + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); 335 + if (!pde[index[1]].present) { 336 + pde[index[1]].address = vm_phy_page_alloc(vm, 337 + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) 338 + >> vm->page_shift; 339 + pde[index[1]].writable = true; 340 + pde[index[1]].present = true; 341 + } 342 + 343 + /* Fill in page table entry. */ 344 + struct pageTableEntry *pte; 345 + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); 346 + pte[index[0]].address = paddr >> vm->page_shift; 347 + pte[index[0]].writable = true; 348 + pte[index[0]].present = 1; 349 + } 350 + 351 + /* Virtual Translation Tables Dump 352 + * 353 + * Input Args: 354 + * vm - Virtual Machine 355 + * indent - Left margin indent amount 356 + * 357 + * Output Args: 358 + * stream - Output FILE stream 359 + * 360 + * Return: None 361 + * 362 + * Dumps to the FILE stream given by stream, the contents of all the 363 + * virtual translation tables for the VM given by vm. 364 + */ 365 + void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 366 + { 367 + struct pageMapL4Entry *pml4e, *pml4e_start; 368 + struct pageDirectoryPointerEntry *pdpe, *pdpe_start; 369 + struct pageDirectoryEntry *pde, *pde_start; 370 + struct pageTableEntry *pte, *pte_start; 371 + 372 + if (!vm->pgd_created) 373 + return; 374 + 375 + fprintf(stream, "%*s " 376 + " no\n", indent, ""); 377 + fprintf(stream, "%*s index hvaddr gpaddr " 378 + "addr w exec dirty\n", 379 + indent, ""); 380 + pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm, 381 + vm->pgd); 382 + for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { 383 + pml4e = &pml4e_start[n1]; 384 + if (!pml4e->present) 385 + continue; 386 + fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u " 387 + " %u\n", 388 + indent, "", 389 + pml4e - pml4e_start, pml4e, 390 + addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address, 391 + pml4e->writable, pml4e->execute_disable); 392 + 393 + pdpe_start = addr_gpa2hva(vm, pml4e->address 394 + * vm->page_size); 395 + for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { 396 + pdpe = &pdpe_start[n2]; 397 + if (!pdpe->present) 398 + continue; 399 + fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx " 400 + "%u %u\n", 401 + indent, "", 402 + pdpe - pdpe_start, pdpe, 403 + addr_hva2gpa(vm, pdpe), 404 + (uint64_t) pdpe->address, pdpe->writable, 405 + pdpe->execute_disable); 406 + 407 + pde_start = addr_gpa2hva(vm, 408 + pdpe->address * vm->page_size); 409 + for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { 410 + pde = &pde_start[n3]; 411 + if (!pde->present) 412 + continue; 413 + fprintf(stream, "%*spde 0x%-3zx %p " 414 + "0x%-12lx 0x%-10lx %u %u\n", 415 + indent, "", pde - pde_start, pde, 416 + addr_hva2gpa(vm, pde), 417 + (uint64_t) pde->address, pde->writable, 418 + pde->execute_disable); 419 + 420 + pte_start = addr_gpa2hva(vm, 421 + pde->address * vm->page_size); 422 + for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { 423 + pte = &pte_start[n4]; 424 + if (!pte->present) 425 + continue; 426 + fprintf(stream, "%*spte 0x%-3zx %p " 427 + "0x%-12lx 0x%-10lx %u %u " 428 + " %u 0x%-10lx\n", 429 + indent, "", 430 + pte - pte_start, pte, 431 + addr_hva2gpa(vm, pte), 432 + (uint64_t) pte->address, 433 + pte->writable, 434 + pte->execute_disable, 435 + pte->dirty, 436 + ((uint64_t) n1 << 27) 437 + | ((uint64_t) n2 << 18) 438 + | ((uint64_t) n3 << 9) 439 + | ((uint64_t) n4)); 440 + } 441 + } 442 + } 443 + } 444 + } 445 + 446 + /* Set Unusable Segment 447 + * 448 + * Input Args: None 449 + * 450 + * Output Args: 451 + * segp - Pointer to segment register 452 + * 453 + * Return: None 454 + * 455 + * Sets the segment register pointed to by segp to an unusable state. 456 + */ 457 + static void kvm_seg_set_unusable(struct kvm_segment *segp) 458 + { 459 + memset(segp, 0, sizeof(*segp)); 460 + segp->unusable = true; 461 + } 462 + 463 + /* Set Long Mode Flat Kernel Code Segment 464 + * 465 + * Input Args: 466 + * selector - selector value 467 + * 468 + * Output Args: 469 + * segp - Pointer to KVM segment 470 + * 471 + * Return: None 472 + * 473 + * Sets up the KVM segment pointed to by segp, to be a code segment 474 + * with the selector value given by selector. 475 + */ 476 + static void kvm_seg_set_kernel_code_64bit(uint16_t selector, 477 + struct kvm_segment *segp) 478 + { 479 + memset(segp, 0, sizeof(*segp)); 480 + segp->selector = selector; 481 + segp->limit = 0xFFFFFFFFu; 482 + segp->s = 0x1; /* kTypeCodeData */ 483 + segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed 484 + * | kFlagCodeReadable 485 + */ 486 + segp->g = true; 487 + segp->l = true; 488 + segp->present = 1; 489 + } 490 + 491 + /* Set Long Mode Flat Kernel Data Segment 492 + * 493 + * Input Args: 494 + * selector - selector value 495 + * 496 + * Output Args: 497 + * segp - Pointer to KVM segment 498 + * 499 + * Return: None 500 + * 501 + * Sets up the KVM segment pointed to by segp, to be a data segment 502 + * with the selector value given by selector. 503 + */ 504 + static void kvm_seg_set_kernel_data_64bit(uint16_t selector, 505 + struct kvm_segment *segp) 506 + { 507 + memset(segp, 0, sizeof(*segp)); 508 + segp->selector = selector; 509 + segp->limit = 0xFFFFFFFFu; 510 + segp->s = 0x1; /* kTypeCodeData */ 511 + segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed 512 + * | kFlagDataWritable 513 + */ 514 + segp->g = true; 515 + segp->present = true; 516 + } 517 + 518 + /* Address Guest Virtual to Guest Physical 519 + * 520 + * Input Args: 521 + * vm - Virtual Machine 522 + * gpa - VM virtual address 523 + * 524 + * Output Args: None 525 + * 526 + * Return: 527 + * Equivalent VM physical address 528 + * 529 + * Translates the VM virtual address given by gva to a VM physical 530 + * address and then locates the memory region containing the VM 531 + * physical address, within the VM given by vm. When found, the host 532 + * virtual address providing the memory to the vm physical address is returned. 533 + * A TEST_ASSERT failure occurs if no region containing translated 534 + * VM virtual address exists. 535 + */ 536 + vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) 537 + { 538 + uint16_t index[4]; 539 + struct pageMapL4Entry *pml4e; 540 + struct pageDirectoryPointerEntry *pdpe; 541 + struct pageDirectoryEntry *pde; 542 + struct pageTableEntry *pte; 543 + void *hva; 544 + 545 + TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " 546 + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 547 + 548 + index[0] = (gva >> 12) & 0x1ffu; 549 + index[1] = (gva >> 21) & 0x1ffu; 550 + index[2] = (gva >> 30) & 0x1ffu; 551 + index[3] = (gva >> 39) & 0x1ffu; 552 + 553 + if (!vm->pgd_created) 554 + goto unmapped_gva; 555 + pml4e = addr_gpa2hva(vm, vm->pgd); 556 + if (!pml4e[index[3]].present) 557 + goto unmapped_gva; 558 + 559 + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); 560 + if (!pdpe[index[2]].present) 561 + goto unmapped_gva; 562 + 563 + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); 564 + if (!pde[index[1]].present) 565 + goto unmapped_gva; 566 + 567 + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); 568 + if (!pte[index[0]].present) 569 + goto unmapped_gva; 570 + 571 + return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu); 572 + 573 + unmapped_gva: 574 + TEST_ASSERT(false, "No mapping for vm virtual address, " 575 + "gva: 0x%lx", gva); 576 + } 577 + 578 + void vcpu_setup(struct kvm_vm *vm, int vcpuid) 579 + { 580 + struct kvm_sregs sregs; 581 + 582 + /* Set mode specific system register values. */ 583 + vcpu_sregs_get(vm, vcpuid, &sregs); 584 + 585 + switch (vm->mode) { 586 + case VM_MODE_FLAT48PG: 587 + sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; 588 + sregs.cr4 |= X86_CR4_PAE; 589 + sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); 590 + 591 + kvm_seg_set_unusable(&sregs.ldt); 592 + kvm_seg_set_kernel_code_64bit(0x8, &sregs.cs); 593 + kvm_seg_set_kernel_data_64bit(0x10, &sregs.ds); 594 + kvm_seg_set_kernel_data_64bit(0x10, &sregs.es); 595 + break; 596 + 597 + default: 598 + TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); 599 + } 600 + vcpu_sregs_set(vm, vcpuid, &sregs); 601 + 602 + /* If virtual translation table have been setup, set system register 603 + * to point to the tables. It's okay if they haven't been setup yet, 604 + * in that the code that sets up the virtual translation tables, will 605 + * go back through any VCPUs that have already been created and set 606 + * their values. 607 + */ 608 + if (vm->pgd_created) { 609 + struct kvm_sregs sregs; 610 + 611 + vcpu_sregs_get(vm, vcpuid, &sregs); 612 + 613 + sregs.cr3 = vm->pgd; 614 + vcpu_sregs_set(vm, vcpuid, &sregs); 615 + } 616 + } 617 + /* Adds a vCPU with reasonable defaults (i.e., a stack) 618 + * 619 + * Input Args: 620 + * vcpuid - The id of the VCPU to add to the VM. 621 + * guest_code - The vCPU's entry point 622 + */ 623 + void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) 624 + { 625 + struct kvm_mp_state mp_state; 626 + struct kvm_regs regs; 627 + vm_vaddr_t stack_vaddr; 628 + stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), 629 + DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); 630 + 631 + /* Create VCPU */ 632 + vm_vcpu_add(vm, vcpuid); 633 + 634 + /* Setup guest general purpose registers */ 635 + vcpu_regs_get(vm, vcpuid, &regs); 636 + regs.rflags = regs.rflags | 0x2; 637 + regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); 638 + regs.rip = (unsigned long) guest_code; 639 + vcpu_regs_set(vm, vcpuid, &regs); 640 + 641 + /* Setup the MP state */ 642 + mp_state.mp_state = 0; 643 + vcpu_set_mp_state(vm, vcpuid, &mp_state); 644 + } 645 + 646 + /* VM VCPU CPUID Set 647 + * 648 + * Input Args: 649 + * vm - Virtual Machine 650 + * vcpuid - VCPU id 651 + * cpuid - The CPUID values to set. 652 + * 653 + * Output Args: None 654 + * 655 + * Return: void 656 + * 657 + * Set the VCPU's CPUID. 658 + */ 659 + void vcpu_set_cpuid(struct kvm_vm *vm, 660 + uint32_t vcpuid, struct kvm_cpuid2 *cpuid) 661 + { 662 + struct vcpu *vcpu = vcpu_find(vm, vcpuid); 663 + int rc; 664 + 665 + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 666 + 667 + rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); 668 + TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", 669 + rc, errno); 670 + 671 + } 672 + /* Create a VM with reasonable defaults 673 + * 674 + * Input Args: 675 + * vcpuid - The id of the single VCPU to add to the VM. 676 + * guest_code - The vCPU's entry point 677 + * 678 + * Output Args: None 679 + * 680 + * Return: 681 + * Pointer to opaque structure that describes the created VM. 682 + */ 683 + struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code) 684 + { 685 + struct kvm_vm *vm; 686 + 687 + /* Create VM */ 688 + vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR); 689 + 690 + /* Setup IRQ Chip */ 691 + vm_create_irqchip(vm); 692 + 693 + /* Add the first vCPU. */ 694 + vm_vcpu_add_default(vm, vcpuid, guest_code); 695 + 696 + return vm; 697 + }
+54
tools/testing/selftests/kvm/set_sregs_test.c
··· 1 + /* 2 + * KVM_SET_SREGS tests 3 + * 4 + * Copyright (C) 2018, Google LLC. 5 + * 6 + * This work is licensed under the terms of the GNU GPL, version 2. 7 + * 8 + * This is a regression test for the bug fixed by the following commit: 9 + * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values") 10 + * 11 + * That bug allowed a user-mode program that called the KVM_SET_SREGS 12 + * ioctl to put a VCPU's local APIC into an invalid state. 13 + * 14 + */ 15 + #define _GNU_SOURCE /* for program_invocation_short_name */ 16 + #include <fcntl.h> 17 + #include <stdio.h> 18 + #include <stdlib.h> 19 + #include <string.h> 20 + #include <sys/ioctl.h> 21 + 22 + #include "test_util.h" 23 + 24 + #include "kvm_util.h" 25 + #include "x86.h" 26 + 27 + #define VCPU_ID 5 28 + 29 + int main(int argc, char *argv[]) 30 + { 31 + struct kvm_sregs sregs; 32 + struct kvm_vm *vm; 33 + int rc; 34 + 35 + /* Tell stdout not to buffer its content */ 36 + setbuf(stdout, NULL); 37 + 38 + /* Create VM */ 39 + vm = vm_create_default(VCPU_ID, NULL); 40 + 41 + vcpu_sregs_get(vm, VCPU_ID, &sregs); 42 + sregs.apic_base = 1 << 10; 43 + rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); 44 + TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)", 45 + sregs.apic_base); 46 + sregs.apic_base = 1 << 11; 47 + rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); 48 + TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", 49 + sregs.apic_base); 50 + 51 + kvm_vm_free(vm); 52 + 53 + return 0; 54 + }