Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

+4

Documentation/virtual/kvm/cpuid.txt

··· 58 58 || || before enabling paravirtualized 59 59 || || tlb flush. 60 60 ------------------------------------------------------------------------------ 61 + KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit 62 + || || can be enabled by setting bit 2 63 + || || when writing to msr 0x4b564d02 64 + ------------------------------------------------------------------------------ 61 65 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side 62 66 || || per-cpu warps are expected in 63 67 || || kvmclock.

+2 -1

Documentation/virtual/kvm/msr.txt

··· 170 170 when asynchronous page faults are enabled on the vcpu 0 when 171 171 disabled. Bit 1 is 1 if asynchronous page faults can be injected 172 172 when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults 173 - are delivered to L1 as #PF vmexits. 173 + are delivered to L1 as #PF vmexits. Bit 2 can be set only if 174 + KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID. 174 175 175 176 First 4 byte of 64 byte memory location will be written to by 176 177 the hypervisor at the time of asynchronous page fault (APF)

+5

arch/arm/kvm/hyp/Makefile

··· 7 7 8 8 KVM=../../../../virt/kvm 9 9 10 + CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve) 11 + 10 12 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o 11 13 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o 12 14 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o ··· 17 15 obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o 18 16 obj-$(CONFIG_KVM_ARM_HOST) += vfp.o 19 17 obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o 18 + CFLAGS_banked-sr.o += $(CFLAGS_ARMV7VE) 19 + 20 20 obj-$(CONFIG_KVM_ARM_HOST) += entry.o 21 21 obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o 22 22 obj-$(CONFIG_KVM_ARM_HOST) += switch.o 23 + CFLAGS_switch.o += $(CFLAGS_ARMV7VE) 23 24 obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o

+4

arch/arm/kvm/hyp/banked-sr.c

··· 20 20 21 21 #include <asm/kvm_hyp.h> 22 22 23 + /* 24 + * gcc before 4.9 doesn't understand -march=armv7ve, so we have to 25 + * trick the assembler. 26 + */ 23 27 __asm__(".arch_extension virt"); 24 28 25 29 void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt)

+29 -22

arch/s390/kvm/intercept.c

··· 22 22 #include "trace.h" 23 23 #include "trace-s390.h" 24 24 25 - 26 - static const intercept_handler_t instruction_handlers[256] = { 27 - [0x01] = kvm_s390_handle_01, 28 - [0x82] = kvm_s390_handle_lpsw, 29 - [0x83] = kvm_s390_handle_diag, 30 - [0xaa] = kvm_s390_handle_aa, 31 - [0xae] = kvm_s390_handle_sigp, 32 - [0xb2] = kvm_s390_handle_b2, 33 - [0xb6] = kvm_s390_handle_stctl, 34 - [0xb7] = kvm_s390_handle_lctl, 35 - [0xb9] = kvm_s390_handle_b9, 36 - [0xe3] = kvm_s390_handle_e3, 37 - [0xe5] = kvm_s390_handle_e5, 38 - [0xeb] = kvm_s390_handle_eb, 39 - }; 40 - 41 25 u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) 42 26 { 43 27 struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; ··· 113 129 114 130 static int handle_instruction(struct kvm_vcpu *vcpu) 115 131 { 116 - intercept_handler_t handler; 117 - 118 132 vcpu->stat.exit_instruction++; 119 133 trace_kvm_s390_intercept_instruction(vcpu, 120 134 vcpu->arch.sie_block->ipa, 121 135 vcpu->arch.sie_block->ipb); 122 - handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8]; 123 - if (handler) 124 - return handler(vcpu); 125 - return -EOPNOTSUPP; 136 + 137 + switch (vcpu->arch.sie_block->ipa >> 8) { 138 + case 0x01: 139 + return kvm_s390_handle_01(vcpu); 140 + case 0x82: 141 + return kvm_s390_handle_lpsw(vcpu); 142 + case 0x83: 143 + return kvm_s390_handle_diag(vcpu); 144 + case 0xaa: 145 + return kvm_s390_handle_aa(vcpu); 146 + case 0xae: 147 + return kvm_s390_handle_sigp(vcpu); 148 + case 0xb2: 149 + return kvm_s390_handle_b2(vcpu); 150 + case 0xb6: 151 + return kvm_s390_handle_stctl(vcpu); 152 + case 0xb7: 153 + return kvm_s390_handle_lctl(vcpu); 154 + case 0xb9: 155 + return kvm_s390_handle_b9(vcpu); 156 + case 0xe3: 157 + return kvm_s390_handle_e3(vcpu); 158 + case 0xe5: 159 + return kvm_s390_handle_e5(vcpu); 160 + case 0xeb: 161 + return kvm_s390_handle_eb(vcpu); 162 + default: 163 + return -EOPNOTSUPP; 164 + } 126 165 } 127 166 128 167 static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)

+79 -44

arch/s390/kvm/interrupt.c

··· 169 169 170 170 static int ckc_irq_pending(struct kvm_vcpu *vcpu) 171 171 { 172 - if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm)) 172 + const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm); 173 + const u64 ckc = vcpu->arch.sie_block->ckc; 174 + 175 + if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) { 176 + if ((s64)ckc >= (s64)now) 177 + return 0; 178 + } else if (ckc >= now) { 173 179 return 0; 180 + } 174 181 return ckc_interrupts_enabled(vcpu); 175 182 } 176 183 ··· 192 185 if (!cpu_timer_interrupts_enabled(vcpu)) 193 186 return 0; 194 187 return kvm_s390_get_cpu_timer(vcpu) >> 63; 195 - } 196 - 197 - static inline int is_ioirq(unsigned long irq_type) 198 - { 199 - return ((irq_type >= IRQ_PEND_IO_ISC_7) && 200 - (irq_type <= IRQ_PEND_IO_ISC_0)); 201 188 } 202 189 203 190 static uint64_t isc_to_isc_bits(int isc) ··· 237 236 return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); 238 237 } 239 238 240 - static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) 239 + static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu) 241 240 { 242 241 return vcpu->kvm->arch.float_int.pending_irqs | 243 - vcpu->arch.local_int.pending_irqs | 242 + vcpu->arch.local_int.pending_irqs; 243 + } 244 + 245 + static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) 246 + { 247 + return pending_irqs_no_gisa(vcpu) | 244 248 kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7; 245 249 } 246 250 ··· 343 337 344 338 static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) 345 339 { 346 - if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK)) 340 + if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK)) 347 341 return; 348 342 else if (psw_ioint_disabled(vcpu)) 349 343 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT); ··· 1017 1011 return rc; 1018 1012 } 1019 1013 1020 - typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu); 1021 - 1022 - static const deliver_irq_t deliver_irq_funcs[] = { 1023 - [IRQ_PEND_MCHK_EX] = __deliver_machine_check, 1024 - [IRQ_PEND_MCHK_REP] = __deliver_machine_check, 1025 - [IRQ_PEND_PROG] = __deliver_prog, 1026 - [IRQ_PEND_EXT_EMERGENCY] = __deliver_emergency_signal, 1027 - [IRQ_PEND_EXT_EXTERNAL] = __deliver_external_call, 1028 - [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc, 1029 - [IRQ_PEND_EXT_CPU_TIMER] = __deliver_cpu_timer, 1030 - [IRQ_PEND_RESTART] = __deliver_restart, 1031 - [IRQ_PEND_SET_PREFIX] = __deliver_set_prefix, 1032 - [IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init, 1033 - [IRQ_PEND_EXT_SERVICE] = __deliver_service, 1034 - [IRQ_PEND_PFAULT_DONE] = __deliver_pfault_done, 1035 - [IRQ_PEND_VIRTIO] = __deliver_virtio, 1036 - }; 1037 - 1038 1014 /* Check whether an external call is pending (deliverable or not) */ 1039 1015 int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) 1040 1016 { ··· 1054 1066 1055 1067 static u64 __calculate_sltime(struct kvm_vcpu *vcpu) 1056 1068 { 1057 - u64 now, cputm, sltime = 0; 1069 + const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm); 1070 + const u64 ckc = vcpu->arch.sie_block->ckc; 1071 + u64 cputm, sltime = 0; 1058 1072 1059 1073 if (ckc_interrupts_enabled(vcpu)) { 1060 - now = kvm_s390_get_tod_clock_fast(vcpu->kvm); 1061 - sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); 1062 - /* already expired or overflow? */ 1063 - if (!sltime || vcpu->arch.sie_block->ckc <= now) 1074 + if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) { 1075 + if ((s64)now < (s64)ckc) 1076 + sltime = tod_to_ns((s64)ckc - (s64)now); 1077 + } else if (now < ckc) { 1078 + sltime = tod_to_ns(ckc - now); 1079 + } 1080 + /* already expired */ 1081 + if (!sltime) 1064 1082 return 0; 1065 1083 if (cpu_timer_interrupts_enabled(vcpu)) { 1066 1084 cputm = kvm_s390_get_cpu_timer(vcpu); ··· 1186 1192 int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) 1187 1193 { 1188 1194 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 1189 - deliver_irq_t func; 1190 1195 int rc = 0; 1191 1196 unsigned long irq_type; 1192 1197 unsigned long irqs; ··· 1205 1212 while ((irqs = deliverable_irqs(vcpu)) && !rc) { 1206 1213 /* bits are in the reverse order of interrupt priority */ 1207 1214 irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT); 1208 - if (is_ioirq(irq_type)) { 1215 + switch (irq_type) { 1216 + case IRQ_PEND_IO_ISC_0: 1217 + case IRQ_PEND_IO_ISC_1: 1218 + case IRQ_PEND_IO_ISC_2: 1219 + case IRQ_PEND_IO_ISC_3: 1220 + case IRQ_PEND_IO_ISC_4: 1221 + case IRQ_PEND_IO_ISC_5: 1222 + case IRQ_PEND_IO_ISC_6: 1223 + case IRQ_PEND_IO_ISC_7: 1209 1224 rc = __deliver_io(vcpu, irq_type); 1210 - } else { 1211 - func = deliver_irq_funcs[irq_type]; 1212 - if (!func) { 1213 - WARN_ON_ONCE(func == NULL); 1214 - clear_bit(irq_type, &li->pending_irqs); 1215 - continue; 1216 - } 1217 - rc = func(vcpu); 1225 + break; 1226 + case IRQ_PEND_MCHK_EX: 1227 + case IRQ_PEND_MCHK_REP: 1228 + rc = __deliver_machine_check(vcpu); 1229 + break; 1230 + case IRQ_PEND_PROG: 1231 + rc = __deliver_prog(vcpu); 1232 + break; 1233 + case IRQ_PEND_EXT_EMERGENCY: 1234 + rc = __deliver_emergency_signal(vcpu); 1235 + break; 1236 + case IRQ_PEND_EXT_EXTERNAL: 1237 + rc = __deliver_external_call(vcpu); 1238 + break; 1239 + case IRQ_PEND_EXT_CLOCK_COMP: 1240 + rc = __deliver_ckc(vcpu); 1241 + break; 1242 + case IRQ_PEND_EXT_CPU_TIMER: 1243 + rc = __deliver_cpu_timer(vcpu); 1244 + break; 1245 + case IRQ_PEND_RESTART: 1246 + rc = __deliver_restart(vcpu); 1247 + break; 1248 + case IRQ_PEND_SET_PREFIX: 1249 + rc = __deliver_set_prefix(vcpu); 1250 + break; 1251 + case IRQ_PEND_PFAULT_INIT: 1252 + rc = __deliver_pfault_init(vcpu); 1253 + break; 1254 + case IRQ_PEND_EXT_SERVICE: 1255 + rc = __deliver_service(vcpu); 1256 + break; 1257 + case IRQ_PEND_PFAULT_DONE: 1258 + rc = __deliver_pfault_done(vcpu); 1259 + break; 1260 + case IRQ_PEND_VIRTIO: 1261 + rc = __deliver_virtio(vcpu); 1262 + break; 1263 + default: 1264 + WARN_ONCE(1, "Unknown pending irq type %ld", irq_type); 1265 + clear_bit(irq_type, &li->pending_irqs); 1218 1266 } 1219 1267 } 1220 1268 ··· 1735 1701 kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT); 1736 1702 break; 1737 1703 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: 1738 - kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); 1704 + if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa)) 1705 + kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); 1739 1706 break; 1740 1707 default: 1741 1708 kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT);

+45 -34

arch/s390/kvm/kvm-s390.c

··· 179 179 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 180 180 unsigned long end); 181 181 182 + static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 183 + { 184 + u8 delta_idx = 0; 185 + 186 + /* 187 + * The TOD jumps by delta, we have to compensate this by adding 188 + * -delta to the epoch. 189 + */ 190 + delta = -delta; 191 + 192 + /* sign-extension - we're adding to signed values below */ 193 + if ((s64)delta < 0) 194 + delta_idx = -1; 195 + 196 + scb->epoch += delta; 197 + if (scb->ecd & ECD_MEF) { 198 + scb->epdx += delta_idx; 199 + if (scb->epoch < delta) 200 + scb->epdx += 1; 201 + } 202 + } 203 + 182 204 /* 183 205 * This callback is executed during stop_machine(). All CPUs are therefore 184 206 * temporarily stopped. In order not to change guest behavior, we have to ··· 216 194 unsigned long long *delta = v; 217 195 218 196 list_for_each_entry(kvm, &vm_list, vm_list) { 219 - kvm->arch.epoch -= *delta; 220 197 kvm_for_each_vcpu(i, vcpu, kvm) { 221 - vcpu->arch.sie_block->epoch -= *delta; 198 + kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 199 + if (i == 0) { 200 + kvm->arch.epoch = vcpu->arch.sie_block->epoch; 201 + kvm->arch.epdx = vcpu->arch.sie_block->epdx; 202 + } 222 203 if (vcpu->arch.cputm_enabled) 223 204 vcpu->arch.cputm_start += *delta; 224 205 if (vcpu->arch.vsie_block) 225 - vcpu->arch.vsie_block->epoch -= *delta; 206 + kvm_clock_sync_scb(vcpu->arch.vsie_block, 207 + *delta); 226 208 } 227 209 } 228 210 return NOTIFY_OK; ··· 928 902 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod))) 929 903 return -EFAULT; 930 904 931 - if (test_kvm_facility(kvm, 139)) 932 - kvm_s390_set_tod_clock_ext(kvm, &gtod); 933 - else if (gtod.epoch_idx == 0) 934 - kvm_s390_set_tod_clock(kvm, gtod.tod); 935 - else 905 + if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 936 906 return -EINVAL; 907 + kvm_s390_set_tod_clock(kvm, &gtod); 937 908 938 909 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 939 910 gtod.epoch_idx, gtod.tod); ··· 955 932 956 933 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 957 934 { 958 - u64 gtod; 935 + struct kvm_s390_vm_tod_clock gtod = { 0 }; 959 936 960 - if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod))) 937 + if (copy_from_user(&gtod.tod, (void __user *)attr->addr, 938 + sizeof(gtod.tod))) 961 939 return -EFAULT; 962 940 963 - kvm_s390_set_tod_clock(kvm, gtod); 964 - VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod); 941 + kvm_s390_set_tod_clock(kvm, &gtod); 942 + VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 965 943 return 0; 966 944 } 967 945 ··· 2413 2389 mutex_lock(&vcpu->kvm->lock); 2414 2390 preempt_disable(); 2415 2391 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 2392 + vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 2416 2393 preempt_enable(); 2417 2394 mutex_unlock(&vcpu->kvm->lock); 2418 2395 if (!kvm_is_ucontrol(vcpu->kvm)) { ··· 3046 3021 return 0; 3047 3022 } 3048 3023 3049 - void kvm_s390_set_tod_clock_ext(struct kvm *kvm, 3050 - const struct kvm_s390_vm_tod_clock *gtod) 3024 + void kvm_s390_set_tod_clock(struct kvm *kvm, 3025 + const struct kvm_s390_vm_tod_clock *gtod) 3051 3026 { 3052 3027 struct kvm_vcpu *vcpu; 3053 3028 struct kvm_s390_tod_clock_ext htod; ··· 3059 3034 get_tod_clock_ext((char *)&htod); 3060 3035 3061 3036 kvm->arch.epoch = gtod->tod - htod.tod; 3062 - kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; 3063 - 3064 - if (kvm->arch.epoch > gtod->tod) 3065 - kvm->arch.epdx -= 1; 3037 + kvm->arch.epdx = 0; 3038 + if (test_kvm_facility(kvm, 139)) { 3039 + kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; 3040 + if (kvm->arch.epoch > gtod->tod) 3041 + kvm->arch.epdx -= 1; 3042 + } 3066 3043 3067 3044 kvm_s390_vcpu_block_all(kvm); 3068 3045 kvm_for_each_vcpu(i, vcpu, kvm) { ··· 3072 3045 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 3073 3046 } 3074 3047 3075 - kvm_s390_vcpu_unblock_all(kvm); 3076 - preempt_enable(); 3077 - mutex_unlock(&kvm->lock); 3078 - } 3079 - 3080 - void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) 3081 - { 3082 - struct kvm_vcpu *vcpu; 3083 - int i; 3084 - 3085 - mutex_lock(&kvm->lock); 3086 - preempt_disable(); 3087 - kvm->arch.epoch = tod - get_tod_clock(); 3088 - kvm_s390_vcpu_block_all(kvm); 3089 - kvm_for_each_vcpu(i, vcpu, kvm) 3090 - vcpu->arch.sie_block->epoch = kvm->arch.epoch; 3091 3048 kvm_s390_vcpu_unblock_all(kvm); 3092 3049 preempt_enable(); 3093 3050 mutex_unlock(&kvm->lock);

+2 -5

arch/s390/kvm/kvm-s390.h

··· 19 19 #include <asm/processor.h> 20 20 #include <asm/sclp.h> 21 21 22 - typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); 23 - 24 22 /* Transactional Memory Execution related macros */ 25 23 #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE)) 26 24 #define TDB_FORMAT1 1 ··· 281 283 int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); 282 284 283 285 /* implemented in kvm-s390.c */ 284 - void kvm_s390_set_tod_clock_ext(struct kvm *kvm, 285 - const struct kvm_s390_vm_tod_clock *gtod); 286 - void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); 286 + void kvm_s390_set_tod_clock(struct kvm *kvm, 287 + const struct kvm_s390_vm_tod_clock *gtod); 287 288 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); 288 289 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); 289 290 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);

+96 -96

arch/s390/kvm/priv.c

··· 85 85 /* Handle SCK (SET CLOCK) interception */ 86 86 static int handle_set_clock(struct kvm_vcpu *vcpu) 87 87 { 88 + struct kvm_s390_vm_tod_clock gtod = { 0 }; 88 89 int rc; 89 90 u8 ar; 90 - u64 op2, val; 91 + u64 op2; 91 92 92 93 vcpu->stat.instruction_sck++; 93 94 ··· 98 97 op2 = kvm_s390_get_base_disp_s(vcpu, &ar); 99 98 if (op2 & 7) /* Operand must be on a doubleword boundary */ 100 99 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 101 - rc = read_guest(vcpu, op2, ar, &val, sizeof(val)); 100 + rc = read_guest(vcpu, op2, ar, &gtod.tod, sizeof(gtod.tod)); 102 101 if (rc) 103 102 return kvm_s390_inject_prog_cond(vcpu, rc); 104 103 105 - VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val); 106 - kvm_s390_set_tod_clock(vcpu->kvm, val); 104 + VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod); 105 + kvm_s390_set_tod_clock(vcpu->kvm, &gtod); 107 106 108 107 kvm_s390_set_psw_cc(vcpu, 0); 109 108 return 0; ··· 796 795 return rc; 797 796 } 798 797 799 - static const intercept_handler_t b2_handlers[256] = { 800 - [0x02] = handle_stidp, 801 - [0x04] = handle_set_clock, 802 - [0x10] = handle_set_prefix, 803 - [0x11] = handle_store_prefix, 804 - [0x12] = handle_store_cpu_address, 805 - [0x14] = kvm_s390_handle_vsie, 806 - [0x21] = handle_ipte_interlock, 807 - [0x29] = handle_iske, 808 - [0x2a] = handle_rrbe, 809 - [0x2b] = handle_sske, 810 - [0x2c] = handle_test_block, 811 - [0x30] = handle_io_inst, 812 - [0x31] = handle_io_inst, 813 - [0x32] = handle_io_inst, 814 - [0x33] = handle_io_inst, 815 - [0x34] = handle_io_inst, 816 - [0x35] = handle_io_inst, 817 - [0x36] = handle_io_inst, 818 - [0x37] = handle_io_inst, 819 - [0x38] = handle_io_inst, 820 - [0x39] = handle_io_inst, 821 - [0x3a] = handle_io_inst, 822 - [0x3b] = handle_io_inst, 823 - [0x3c] = handle_io_inst, 824 - [0x50] = handle_ipte_interlock, 825 - [0x56] = handle_sthyi, 826 - [0x5f] = handle_io_inst, 827 - [0x74] = handle_io_inst, 828 - [0x76] = handle_io_inst, 829 - [0x7d] = handle_stsi, 830 - [0xb1] = handle_stfl, 831 - [0xb2] = handle_lpswe, 832 - }; 833 - 834 798 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) 835 799 { 836 - intercept_handler_t handler; 837 - 838 - /* 839 - * A lot of B2 instructions are priviledged. Here we check for 840 - * the privileged ones, that we can handle in the kernel. 841 - * Anything else goes to userspace. 842 - */ 843 - handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; 844 - if (handler) 845 - return handler(vcpu); 846 - 847 - return -EOPNOTSUPP; 800 + switch (vcpu->arch.sie_block->ipa & 0x00ff) { 801 + case 0x02: 802 + return handle_stidp(vcpu); 803 + case 0x04: 804 + return handle_set_clock(vcpu); 805 + case 0x10: 806 + return handle_set_prefix(vcpu); 807 + case 0x11: 808 + return handle_store_prefix(vcpu); 809 + case 0x12: 810 + return handle_store_cpu_address(vcpu); 811 + case 0x14: 812 + return kvm_s390_handle_vsie(vcpu); 813 + case 0x21: 814 + case 0x50: 815 + return handle_ipte_interlock(vcpu); 816 + case 0x29: 817 + return handle_iske(vcpu); 818 + case 0x2a: 819 + return handle_rrbe(vcpu); 820 + case 0x2b: 821 + return handle_sske(vcpu); 822 + case 0x2c: 823 + return handle_test_block(vcpu); 824 + case 0x30: 825 + case 0x31: 826 + case 0x32: 827 + case 0x33: 828 + case 0x34: 829 + case 0x35: 830 + case 0x36: 831 + case 0x37: 832 + case 0x38: 833 + case 0x39: 834 + case 0x3a: 835 + case 0x3b: 836 + case 0x3c: 837 + case 0x5f: 838 + case 0x74: 839 + case 0x76: 840 + return handle_io_inst(vcpu); 841 + case 0x56: 842 + return handle_sthyi(vcpu); 843 + case 0x7d: 844 + return handle_stsi(vcpu); 845 + case 0xb1: 846 + return handle_stfl(vcpu); 847 + case 0xb2: 848 + return handle_lpswe(vcpu); 849 + default: 850 + return -EOPNOTSUPP; 851 + } 848 852 } 849 853 850 854 static int handle_epsw(struct kvm_vcpu *vcpu) ··· 1111 1105 return 0; 1112 1106 } 1113 1107 1114 - static const intercept_handler_t b9_handlers[256] = { 1115 - [0x8a] = handle_ipte_interlock, 1116 - [0x8d] = handle_epsw, 1117 - [0x8e] = handle_ipte_interlock, 1118 - [0x8f] = handle_ipte_interlock, 1119 - [0xab] = handle_essa, 1120 - [0xaf] = handle_pfmf, 1121 - }; 1122 - 1123 1108 int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) 1124 1109 { 1125 - intercept_handler_t handler; 1126 - 1127 - /* This is handled just as for the B2 instructions. */ 1128 - handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; 1129 - if (handler) 1130 - return handler(vcpu); 1131 - 1132 - return -EOPNOTSUPP; 1110 + switch (vcpu->arch.sie_block->ipa & 0x00ff) { 1111 + case 0x8a: 1112 + case 0x8e: 1113 + case 0x8f: 1114 + return handle_ipte_interlock(vcpu); 1115 + case 0x8d: 1116 + return handle_epsw(vcpu); 1117 + case 0xab: 1118 + return handle_essa(vcpu); 1119 + case 0xaf: 1120 + return handle_pfmf(vcpu); 1121 + default: 1122 + return -EOPNOTSUPP; 1123 + } 1133 1124 } 1134 1125 1135 1126 int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) ··· 1274 1271 return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; 1275 1272 } 1276 1273 1277 - static const intercept_handler_t eb_handlers[256] = { 1278 - [0x2f] = handle_lctlg, 1279 - [0x25] = handle_stctg, 1280 - [0x60] = handle_ri, 1281 - [0x61] = handle_ri, 1282 - [0x62] = handle_ri, 1283 - }; 1284 - 1285 1274 int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) 1286 1275 { 1287 - intercept_handler_t handler; 1288 - 1289 - handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; 1290 - if (handler) 1291 - return handler(vcpu); 1292 - return -EOPNOTSUPP; 1276 + switch (vcpu->arch.sie_block->ipb & 0x000000ff) { 1277 + case 0x25: 1278 + return handle_stctg(vcpu); 1279 + case 0x2f: 1280 + return handle_lctlg(vcpu); 1281 + case 0x60: 1282 + case 0x61: 1283 + case 0x62: 1284 + return handle_ri(vcpu); 1285 + default: 1286 + return -EOPNOTSUPP; 1287 + } 1293 1288 } 1294 1289 1295 1290 static int handle_tprot(struct kvm_vcpu *vcpu) ··· 1347 1346 1348 1347 int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) 1349 1348 { 1350 - /* For e5xx... instructions we only handle TPROT */ 1351 - if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01) 1349 + switch (vcpu->arch.sie_block->ipa & 0x00ff) { 1350 + case 0x01: 1352 1351 return handle_tprot(vcpu); 1353 - return -EOPNOTSUPP; 1352 + default: 1353 + return -EOPNOTSUPP; 1354 + } 1354 1355 } 1355 1356 1356 1357 static int handle_sckpf(struct kvm_vcpu *vcpu) ··· 1383 1380 return 0; 1384 1381 } 1385 1382 1386 - static const intercept_handler_t x01_handlers[256] = { 1387 - [0x04] = handle_ptff, 1388 - [0x07] = handle_sckpf, 1389 - }; 1390 - 1391 1383 int kvm_s390_handle_01(struct kvm_vcpu *vcpu) 1392 1384 { 1393 - intercept_handler_t handler; 1394 - 1395 - handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; 1396 - if (handler) 1397 - return handler(vcpu); 1398 - return -EOPNOTSUPP; 1385 + switch (vcpu->arch.sie_block->ipa & 0x00ff) { 1386 + case 0x04: 1387 + return handle_ptff(vcpu); 1388 + case 0x07: 1389 + return handle_sckpf(vcpu); 1390 + default: 1391 + return -EOPNOTSUPP; 1392 + } 1399 1393 }

+20

arch/s390/kvm/vsie.c

··· 821 821 { 822 822 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; 823 823 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; 824 + int guest_bp_isolation; 824 825 int rc; 825 826 826 827 handle_last_fault(vcpu, vsie_page); ··· 832 831 s390_handle_mcck(); 833 832 834 833 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 834 + 835 + /* save current guest state of bp isolation override */ 836 + guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST); 837 + 838 + /* 839 + * The guest is running with BPBC, so we have to force it on for our 840 + * nested guest. This is done by enabling BPBC globally, so the BPBC 841 + * control in the SCB (which the nested guest can modify) is simply 842 + * ignored. 843 + */ 844 + if (test_kvm_facility(vcpu->kvm, 82) && 845 + vcpu->arch.sie_block->fpf & FPF_BPBC) 846 + set_thread_flag(TIF_ISOLATE_BP_GUEST); 847 + 835 848 local_irq_disable(); 836 849 guest_enter_irqoff(); 837 850 local_irq_enable(); ··· 855 840 local_irq_disable(); 856 841 guest_exit_irqoff(); 857 842 local_irq_enable(); 843 + 844 + /* restore guest state for bp isolation override */ 845 + if (!guest_bp_isolation) 846 + clear_thread_flag(TIF_ISOLATE_BP_GUEST); 847 + 858 848 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 859 849 860 850 if (rc == -EINTR) {

-3

arch/x86/include/asm/kvm_host.h

··· 1464 1464 #define put_smstate(type, buf, offset, val) \ 1465 1465 *(type *)((buf) + (offset) - 0x7e00) = val 1466 1466 1467 - void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, 1468 - unsigned long start, unsigned long end); 1469 - 1470 1467 #endif /* _ASM_X86_KVM_HOST_H */

+1

arch/x86/include/uapi/asm/kvm_para.h

··· 26 26 #define KVM_FEATURE_PV_EOI 6 27 27 #define KVM_FEATURE_PV_UNHALT 7 28 28 #define KVM_FEATURE_PV_TLB_FLUSH 9 29 + #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 29 30 30 31 /* The last 8 bits are used to indicate how to interpret the flags field 31 32 * in pvclock structure. If no bits are set, all flags are ignored.

+11 -9

arch/x86/kernel/kvm.c

··· 49 49 50 50 static int kvmapf = 1; 51 51 52 - static int parse_no_kvmapf(char *arg) 52 + static int __init parse_no_kvmapf(char *arg) 53 53 { 54 54 kvmapf = 0; 55 55 return 0; ··· 58 58 early_param("no-kvmapf", parse_no_kvmapf); 59 59 60 60 static int steal_acc = 1; 61 - static int parse_no_stealacc(char *arg) 61 + static int __init parse_no_stealacc(char *arg) 62 62 { 63 63 steal_acc = 0; 64 64 return 0; ··· 67 67 early_param("no-steal-acc", parse_no_stealacc); 68 68 69 69 static int kvmclock_vsyscall = 1; 70 - static int parse_no_kvmclock_vsyscall(char *arg) 70 + static int __init parse_no_kvmclock_vsyscall(char *arg) 71 71 { 72 72 kvmclock_vsyscall = 0; 73 73 return 0; ··· 341 341 #endif 342 342 pa |= KVM_ASYNC_PF_ENABLED; 343 343 344 - /* Async page fault support for L1 hypervisor is optional */ 345 - if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN, 346 - (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0) 347 - wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); 344 + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT)) 345 + pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; 346 + 347 + wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); 348 348 __this_cpu_write(apf_reason.enabled, 1); 349 349 printk(KERN_INFO"KVM setup async PF for cpu %d\n", 350 350 smp_processor_id()); ··· 545 545 pv_time_ops.steal_clock = kvm_steal_clock; 546 546 } 547 547 548 - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) 548 + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && 549 + !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) 549 550 pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; 550 551 551 552 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) ··· 634 633 { 635 634 int cpu; 636 635 637 - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) { 636 + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && 637 + !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { 638 638 for_each_possible_cpu(cpu) { 639 639 zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), 640 640 GFP_KERNEL, cpu_to_node(cpu));

+2 -1

arch/x86/kvm/cpuid.c

··· 607 607 (1 << KVM_FEATURE_PV_EOI) | 608 608 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | 609 609 (1 << KVM_FEATURE_PV_UNHALT) | 610 - (1 << KVM_FEATURE_PV_TLB_FLUSH); 610 + (1 << KVM_FEATURE_PV_TLB_FLUSH) | 611 + (1 << KVM_FEATURE_ASYNC_PF_VMEXIT); 611 612 612 613 if (sched_info_on()) 613 614 entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);

-1

arch/x86/kvm/lapic.c

··· 2165 2165 */ 2166 2166 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 2167 2167 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 2168 - kvm_lapic_reset(vcpu, false); 2169 2168 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 2170 2169 2171 2170 return 0;

+1 -1

arch/x86/kvm/mmu.c

··· 3029 3029 return RET_PF_RETRY; 3030 3030 } 3031 3031 3032 - return -EFAULT; 3032 + return RET_PF_EMULATE; 3033 3033 } 3034 3034 3035 3035 static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,

+24 -13

arch/x86/kvm/svm.c

··· 300 300 static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); 301 301 module_param(sev, int, 0444); 302 302 303 + static u8 rsm_ins_bytes[] = "\x0f\xaa"; 304 + 303 305 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); 304 306 static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); 305 307 static void svm_complete_interrupts(struct vcpu_svm *svm); ··· 1385 1383 set_intercept(svm, INTERCEPT_SKINIT); 1386 1384 set_intercept(svm, INTERCEPT_WBINVD); 1387 1385 set_intercept(svm, INTERCEPT_XSETBV); 1386 + set_intercept(svm, INTERCEPT_RSM); 1388 1387 1389 1388 if (!kvm_mwait_in_guest()) { 1390 1389 set_intercept(svm, INTERCEPT_MONITOR); ··· 3702 3699 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 3703 3700 } 3704 3701 3702 + static int rsm_interception(struct vcpu_svm *svm) 3703 + { 3704 + return x86_emulate_instruction(&svm->vcpu, 0, 0, 3705 + rsm_ins_bytes, 2) == EMULATE_DONE; 3706 + } 3707 + 3705 3708 static int rdpmc_interception(struct vcpu_svm *svm) 3706 3709 { 3707 3710 int err; ··· 4550 4541 [SVM_EXIT_MWAIT] = mwait_interception, 4551 4542 [SVM_EXIT_XSETBV] = xsetbv_interception, 4552 4543 [SVM_EXIT_NPF] = npf_interception, 4553 - [SVM_EXIT_RSM] = emulate_on_interception, 4544 + [SVM_EXIT_RSM] = rsm_interception, 4554 4545 [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, 4555 4546 [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, 4556 4547 }; ··· 6245 6236 6246 6237 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) 6247 6238 { 6239 + void __user *measure = (void __user *)(uintptr_t)argp->data; 6248 6240 struct kvm_sev_info *sev = &kvm->arch.sev_info; 6249 6241 struct sev_data_launch_measure *data; 6250 6242 struct kvm_sev_launch_measure params; 6243 + void __user *p = NULL; 6251 6244 void *blob = NULL; 6252 6245 int ret; 6253 6246 6254 6247 if (!sev_guest(kvm)) 6255 6248 return -ENOTTY; 6256 6249 6257 - if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params))) 6250 + if (copy_from_user(&params, measure, sizeof(params))) 6258 6251 return -EFAULT; 6259 6252 6260 6253 data = kzalloc(sizeof(*data), GFP_KERNEL); ··· 6267 6256 if (!params.len) 6268 6257 goto cmd; 6269 6258 6270 - if (params.uaddr) { 6259 + p = (void __user *)(uintptr_t)params.uaddr; 6260 + if (p) { 6271 6261 if (params.len > SEV_FW_BLOB_MAX_SIZE) { 6272 6262 ret = -EINVAL; 6273 - goto e_free; 6274 - } 6275 - 6276 - if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) { 6277 - ret = -EFAULT; 6278 6263 goto e_free; 6279 6264 } 6280 6265 ··· 6297 6290 goto e_free_blob; 6298 6291 6299 6292 if (blob) { 6300 - if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len)) 6293 + if (copy_to_user(p, blob, params.len)) 6301 6294 ret = -EFAULT; 6302 6295 } 6303 6296 6304 6297 done: 6305 6298 params.len = data->len; 6306 - if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) 6299 + if (copy_to_user(measure, &params, sizeof(params))) 6307 6300 ret = -EFAULT; 6308 6301 e_free_blob: 6309 6302 kfree(blob); ··· 6604 6597 struct page **pages; 6605 6598 void *blob, *hdr; 6606 6599 unsigned long n; 6607 - int ret; 6600 + int ret, offset; 6608 6601 6609 6602 if (!sev_guest(kvm)) 6610 6603 return -ENOTTY; ··· 6630 6623 if (!data) 6631 6624 goto e_unpin_memory; 6632 6625 6626 + offset = params.guest_uaddr & (PAGE_SIZE - 1); 6627 + data->guest_address = __sme_page_pa(pages[0]) + offset; 6628 + data->guest_len = params.guest_len; 6629 + 6633 6630 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); 6634 6631 if (IS_ERR(blob)) { 6635 6632 ret = PTR_ERR(blob); ··· 6648 6637 ret = PTR_ERR(hdr); 6649 6638 goto e_free_blob; 6650 6639 } 6651 - data->trans_address = __psp_pa(blob); 6652 - data->trans_len = params.trans_len; 6640 + data->hdr_address = __psp_pa(hdr); 6641 + data->hdr_len = params.hdr_len; 6653 6642 6654 6643 data->handle = sev->handle; 6655 6644 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);

+8 -2

arch/x86/kvm/vmx.c

··· 4485 4485 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, 4486 4486 SECONDARY_EXEC_DESC); 4487 4487 hw_cr4 &= ~X86_CR4_UMIP; 4488 - } else 4488 + } else if (!is_guest_mode(vcpu) || 4489 + !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) 4489 4490 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, 4490 4491 SECONDARY_EXEC_DESC); 4491 4492 ··· 11200 11199 if (ret) 11201 11200 return ret; 11202 11201 11203 - if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) 11202 + /* 11203 + * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken 11204 + * by event injection, halt vcpu. 11205 + */ 11206 + if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && 11207 + !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) 11204 11208 return kvm_vcpu_halt(vcpu); 11205 11209 11206 11210 vmx->nested.nested_run_pending = 1;

+3 -4

arch/x86/kvm/x86.c

··· 7975 7975 kvm_vcpu_mtrr_init(vcpu); 7976 7976 vcpu_load(vcpu); 7977 7977 kvm_vcpu_reset(vcpu, false); 7978 + kvm_lapic_reset(vcpu, false); 7978 7979 kvm_mmu_setup(vcpu); 7979 7980 vcpu_put(vcpu); 7980 7981 return 0; ··· 8461 8460 return r; 8462 8461 } 8463 8462 8464 - if (!size) { 8465 - r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); 8466 - WARN_ON(r < 0); 8467 - } 8463 + if (!size) 8464 + vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); 8468 8465 8469 8466 return 0; 8470 8467 }

+4 -4

drivers/crypto/ccp/psp-dev.c

··· 211 211 { 212 212 int ret; 213 213 214 - ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, 0, error); 214 + ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error); 215 215 if (ret) 216 216 return ret; 217 217 ··· 271 271 return rc; 272 272 } 273 273 274 - return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, 0, &argp->error); 274 + return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, NULL, &argp->error); 275 275 } 276 276 277 277 static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp) ··· 299 299 return rc; 300 300 } 301 301 302 - return __sev_do_cmd_locked(cmd, 0, &argp->error); 302 + return __sev_do_cmd_locked(cmd, NULL, &argp->error); 303 303 } 304 304 305 305 static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp) ··· 624 624 625 625 int sev_guest_df_flush(int *error) 626 626 { 627 - return sev_do_cmd(SEV_CMD_DF_FLUSH, 0, error); 627 + return sev_do_cmd(SEV_CMD_DF_FLUSH, NULL, error); 628 628 } 629 629 EXPORT_SYMBOL_GPL(sev_guest_df_flush); 630 630

+5 -1

include/linux/kvm_host.h

··· 1105 1105 { 1106 1106 } 1107 1107 #endif 1108 - void kvm_arch_irq_routing_update(struct kvm *kvm); 1109 1108 1110 1109 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 1111 1110 { ··· 1112 1113 } 1113 1114 1114 1115 #endif /* CONFIG_HAVE_KVM_EVENTFD */ 1116 + 1117 + void kvm_arch_irq_routing_update(struct kvm *kvm); 1115 1118 1116 1119 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) 1117 1120 { ··· 1272 1271 return -ENOIOCTLCMD; 1273 1272 } 1274 1273 #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ 1274 + 1275 + void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, 1276 + unsigned long start, unsigned long end); 1275 1277 1276 1278 #endif

+1 -1

include/uapi/linux/psp-sev.h

··· 42 42 SEV_RET_INVALID_PLATFORM_STATE, 43 43 SEV_RET_INVALID_GUEST_STATE, 44 44 SEV_RET_INAVLID_CONFIG, 45 - SEV_RET_INVALID_len, 45 + SEV_RET_INVALID_LEN, 46 46 SEV_RET_ALREADY_OWNED, 47 47 SEV_RET_INVALID_CERTIFICATE, 48 48 SEV_RET_POLICY_FAILURE,

+286 -219

tools/kvm/kvm_stat/kvm_stat

··· 33 33 import struct 34 34 import re 35 35 import subprocess 36 - from collections import defaultdict 36 + from collections import defaultdict, namedtuple 37 37 38 38 VMX_EXIT_REASONS = { 39 39 'EXCEPTION_NMI': 0, ··· 228 228 } 229 229 230 230 ENCODING = locale.getpreferredencoding(False) 231 + TRACE_FILTER = re.compile(r'^[^\(]*$') 231 232 232 233 233 234 class Arch(object): ··· 261 260 return ArchX86(SVM_EXIT_REASONS) 262 261 return 263 262 263 + def tracepoint_is_child(self, field): 264 + if (TRACE_FILTER.match(field)): 265 + return None 266 + return field.split('(', 1)[0] 267 + 264 268 265 269 class ArchX86(Arch): 266 270 def __init__(self, exit_reasons): 267 271 self.sc_perf_evt_open = 298 268 272 self.ioctl_numbers = IOCTL_NUMBERS 269 273 self.exit_reasons = exit_reasons 274 + 275 + def debugfs_is_child(self, field): 276 + """ Returns name of parent if 'field' is a child, None otherwise """ 277 + return None 270 278 271 279 272 280 class ArchPPC(Arch): ··· 292 282 self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 293 283 self.exit_reasons = {} 294 284 285 + def debugfs_is_child(self, field): 286 + """ Returns name of parent if 'field' is a child, None otherwise """ 287 + return None 288 + 295 289 296 290 class ArchA64(Arch): 297 291 def __init__(self): ··· 303 289 self.ioctl_numbers = IOCTL_NUMBERS 304 290 self.exit_reasons = AARCH64_EXIT_REASONS 305 291 292 + def debugfs_is_child(self, field): 293 + """ Returns name of parent if 'field' is a child, None otherwise """ 294 + return None 295 + 306 296 307 297 class ArchS390(Arch): 308 298 def __init__(self): 309 299 self.sc_perf_evt_open = 331 310 300 self.ioctl_numbers = IOCTL_NUMBERS 311 301 self.exit_reasons = None 302 + 303 + def debugfs_is_child(self, field): 304 + """ Returns name of parent if 'field' is a child, None otherwise """ 305 + if field.startswith('instruction_'): 306 + return 'exit_instruction' 307 + 312 308 313 309 ARCH = Arch.get_arch() 314 310 ··· 354 330 355 331 PERF_TYPE_TRACEPOINT = 2 356 332 PERF_FORMAT_GROUP = 1 << 3 357 - 358 - PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' 359 - PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' 360 333 361 334 362 335 class Group(object): ··· 397 376 self.syscall = self.libc.syscall 398 377 self.name = name 399 378 self.fd = None 400 - self.setup_event(group, trace_cpu, trace_pid, trace_point, 401 - trace_filter, trace_set) 379 + self._setup_event(group, trace_cpu, trace_pid, trace_point, 380 + trace_filter, trace_set) 402 381 403 382 def __del__(self): 404 383 """Closes the event's file descriptor. ··· 411 390 if self.fd: 412 391 os.close(self.fd) 413 392 414 - def perf_event_open(self, attr, pid, cpu, group_fd, flags): 393 + def _perf_event_open(self, attr, pid, cpu, group_fd, flags): 415 394 """Wrapper for the sys_perf_evt_open() syscall. 416 395 417 396 Used to set up performance events, returns a file descriptor or -1 ··· 430 409 ctypes.c_int(pid), ctypes.c_int(cpu), 431 410 ctypes.c_int(group_fd), ctypes.c_long(flags)) 432 411 433 - def setup_event_attribute(self, trace_set, trace_point): 412 + def _setup_event_attribute(self, trace_set, trace_point): 434 413 """Returns an initialized ctype perf_event_attr struct.""" 435 414 436 415 id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set, ··· 440 419 event_attr.config = int(open(id_path).read()) 441 420 return event_attr 442 421 443 - def setup_event(self, group, trace_cpu, trace_pid, trace_point, 444 - trace_filter, trace_set): 422 + def _setup_event(self, group, trace_cpu, trace_pid, trace_point, 423 + trace_filter, trace_set): 445 424 """Sets up the perf event in Linux. 446 425 447 426 Issues the syscall to register the event in the kernel and ··· 449 428 450 429 """ 451 430 452 - event_attr = self.setup_event_attribute(trace_set, trace_point) 431 + event_attr = self._setup_event_attribute(trace_set, trace_point) 453 432 454 433 # First event will be group leader. 455 434 group_leader = -1 ··· 458 437 if group.events: 459 438 group_leader = group.events[0].fd 460 439 461 - fd = self.perf_event_open(event_attr, trace_pid, 462 - trace_cpu, group_leader, 0) 440 + fd = self._perf_event_open(event_attr, trace_pid, 441 + trace_cpu, group_leader, 0) 463 442 if fd == -1: 464 443 err = ctypes.get_errno() 465 444 raise OSError(err, os.strerror(err), ··· 496 475 497 476 class Provider(object): 498 477 """Encapsulates functionalities used by all providers.""" 478 + def __init__(self, pid): 479 + self.child_events = False 480 + self.pid = pid 481 + 499 482 @staticmethod 500 483 def is_field_wanted(fields_filter, field): 501 484 """Indicate whether field is valid according to fields_filter.""" ··· 525 500 """ 526 501 def __init__(self, pid, fields_filter): 527 502 self.group_leaders = [] 528 - self.filters = self.get_filters() 503 + self.filters = self._get_filters() 529 504 self.update_fields(fields_filter) 530 - self.pid = pid 505 + super(TracepointProvider, self).__init__(pid) 531 506 532 507 @staticmethod 533 - def get_filters(): 508 + def _get_filters(): 534 509 """Returns a dict of trace events, their filter ids and 535 510 the values that can be filtered. 536 511 ··· 546 521 filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) 547 522 return filters 548 523 549 - def get_available_fields(self): 550 - """Returns a list of available event's of format 'event name(filter 524 + def _get_available_fields(self): 525 + """Returns a list of available events of format 'event name(filter 551 526 name)'. 552 527 553 528 All available events have directories under ··· 574 549 575 550 def update_fields(self, fields_filter): 576 551 """Refresh fields, applying fields_filter""" 577 - self.fields = [field for field in self.get_available_fields() 578 - if self.is_field_wanted(fields_filter, field)] 552 + self.fields = [field for field in self._get_available_fields() 553 + if self.is_field_wanted(fields_filter, field) or 554 + ARCH.tracepoint_is_child(field)] 579 555 580 556 @staticmethod 581 - def get_online_cpus(): 557 + def _get_online_cpus(): 582 558 """Returns a list of cpu id integers.""" 583 559 def parse_int_list(list_string): 584 560 """Returns an int list from a string of comma separated integers and ··· 601 575 cpu_string = cpu_list.readline() 602 576 return parse_int_list(cpu_string) 603 577 604 - def setup_traces(self): 578 + def _setup_traces(self): 605 579 """Creates all event and group objects needed to be able to retrieve 606 580 data.""" 607 - fields = self.get_available_fields() 581 + fields = self._get_available_fields() 608 582 if self._pid > 0: 609 583 # Fetch list of all threads of the monitored pid, as qemu 610 584 # starts a thread for each vcpu. 611 585 path = os.path.join('/proc', str(self._pid), 'task') 612 586 groupids = self.walkdir(path)[1] 613 587 else: 614 - groupids = self.get_online_cpus() 588 + groupids = self._get_online_cpus() 615 589 616 590 # The constant is needed as a buffer for python libs, std 617 591 # streams and other files that the script opens. ··· 689 663 # The garbage collector will get rid of all Event/Group 690 664 # objects and open files after removing the references. 691 665 self.group_leaders = [] 692 - self.setup_traces() 666 + self._setup_traces() 693 667 self.fields = self._fields 694 668 695 669 def read(self, by_guest=0): ··· 697 671 ret = defaultdict(int) 698 672 for group in self.group_leaders: 699 673 for name, val in group.read().items(): 700 - if name in self._fields: 701 - ret[name] += val 674 + if name not in self._fields: 675 + continue 676 + parent = ARCH.tracepoint_is_child(name) 677 + if parent: 678 + name += ' ' + parent 679 + ret[name] += val 702 680 return ret 703 681 704 682 def reset(self): ··· 720 690 self._baseline = {} 721 691 self.do_read = True 722 692 self.paths = [] 723 - self.pid = pid 693 + super(DebugfsProvider, self).__init__(pid) 724 694 if include_past: 725 - self.restore() 695 + self._restore() 726 696 727 - def get_available_fields(self): 697 + def _get_available_fields(self): 728 698 """"Returns a list of available fields. 729 699 730 700 The fields are all available KVM debugfs files ··· 734 704 735 705 def update_fields(self, fields_filter): 736 706 """Refresh fields, applying fields_filter""" 737 - self._fields = [field for field in self.get_available_fields() 738 - if self.is_field_wanted(fields_filter, field)] 707 + self._fields = [field for field in self._get_available_fields() 708 + if self.is_field_wanted(fields_filter, field) or 709 + ARCH.debugfs_is_child(field)] 739 710 740 711 @property 741 712 def fields(self): ··· 789 758 paths.append(dir) 790 759 for path in paths: 791 760 for field in self._fields: 792 - value = self.read_field(field, path) 761 + value = self._read_field(field, path) 793 762 key = path + field 794 763 if reset == 1: 795 764 self._baseline[key] = value ··· 797 766 self._baseline[key] = 0 798 767 if self._baseline.get(key, -1) == -1: 799 768 self._baseline[key] = value 800 - increment = (results.get(field, 0) + value - 801 - self._baseline.get(key, 0)) 802 - if by_guest: 803 - pid = key.split('-')[0] 804 - if pid in results: 805 - results[pid] += increment 806 - else: 807 - results[pid] = increment 769 + parent = ARCH.debugfs_is_child(field) 770 + if parent: 771 + field = field + ' ' + parent 772 + else: 773 + if by_guest: 774 + field = key.split('-')[0] # set 'field' to 'pid' 775 + increment = value - self._baseline.get(key, 0) 776 + if field in results: 777 + results[field] += increment 808 778 else: 809 779 results[field] = increment 810 780 811 781 return results 812 782 813 - def read_field(self, field, path): 783 + def _read_field(self, field, path): 814 784 """Returns the value of a single field from a specific VM.""" 815 785 try: 816 786 return int(open(os.path.join(PATH_DEBUGFS_KVM, ··· 826 794 self._baseline = {} 827 795 self.read(1) 828 796 829 - def restore(self): 797 + def _restore(self): 830 798 """Reset field counters""" 831 799 self._baseline = {} 832 800 self.read(2) 801 + 802 + 803 + EventStat = namedtuple('EventStat', ['value', 'delta']) 833 804 834 805 835 806 class Stats(object): ··· 843 808 844 809 """ 845 810 def __init__(self, options): 846 - self.providers = self.get_providers(options) 811 + self.providers = self._get_providers(options) 847 812 self._pid_filter = options.pid 848 813 self._fields_filter = options.fields 849 814 self.values = {} 815 + self._child_events = False 850 816 851 - @staticmethod 852 - def get_providers(options): 817 + def _get_providers(self, options): 853 818 """Returns a list of data providers depending on the passed options.""" 854 819 providers = [] 855 820 ··· 861 826 862 827 return providers 863 828 864 - def update_provider_filters(self): 829 + def _update_provider_filters(self): 865 830 """Propagates fields filters to providers.""" 866 831 # As we reset the counters when updating the fields we can 867 832 # also clear the cache of old values. ··· 882 847 def fields_filter(self, fields_filter): 883 848 if fields_filter != self._fields_filter: 884 849 self._fields_filter = fields_filter 885 - self.update_provider_filters() 850 + self._update_provider_filters() 886 851 887 852 @property 888 853 def pid_filter(self): ··· 896 861 for provider in self.providers: 897 862 provider.pid = self._pid_filter 898 863 864 + @property 865 + def child_events(self): 866 + return self._child_events 867 + 868 + @child_events.setter 869 + def child_events(self, val): 870 + self._child_events = val 871 + for provider in self.providers: 872 + provider.child_events = val 873 + 899 874 def get(self, by_guest=0): 900 875 """Returns a dict with field -> (value, delta to last value) of all 901 - provider data.""" 876 + provider data. 877 + Key formats: 878 + * plain: 'key' is event name 879 + * child-parent: 'key' is in format '<child> <parent>' 880 + * pid: 'key' is the pid of the guest, and the record contains the 881 + aggregated event data 882 + These formats are generated by the providers, and handled in class TUI. 883 + """ 902 884 for provider in self.providers: 903 885 new = provider.read(by_guest=by_guest) 904 - for key in new if by_guest else provider.fields: 905 - oldval = self.values.get(key, (0, 0))[0] 886 + for key in new: 887 + oldval = self.values.get(key, EventStat(0, 0)).value 906 888 newval = new.get(key, 0) 907 889 newdelta = newval - oldval 908 - self.values[key] = (newval, newdelta) 890 + self.values[key] = EventStat(newval, newdelta) 909 891 return self.values 910 892 911 893 def toggle_display_guests(self, to_pid): ··· 951 899 self.get(to_pid) 952 900 return 0 953 901 902 + 954 903 DELAY_DEFAULT = 3.0 955 904 MAX_GUEST_NAME_LEN = 48 956 905 MAX_REGEX_LEN = 44 957 - DEFAULT_REGEX = r'^[^\(]*$' 958 906 SORT_DEFAULT = 0 959 907 960 908 ··· 1021 969 1022 970 return res 1023 971 1024 - def print_all_gnames(self, row): 972 + def _print_all_gnames(self, row): 1025 973 """Print a list of all running guests along with their pids.""" 1026 974 self.screen.addstr(row, 2, '%8s %-60s' % 1027 975 ('Pid', 'Guest Name (fuzzy list, might be ' ··· 1084 1032 1085 1033 return name 1086 1034 1087 - def update_drilldown(self): 1088 - """Sets or removes a filter that only allows fields without braces.""" 1089 - if not self.stats.fields_filter: 1090 - self.stats.fields_filter = DEFAULT_REGEX 1091 - 1092 - elif self.stats.fields_filter == DEFAULT_REGEX: 1093 - self.stats.fields_filter = None 1094 - 1095 - def update_pid(self, pid): 1035 + def _update_pid(self, pid): 1096 1036 """Propagates pid selection to stats object.""" 1037 + self.screen.addstr(4, 1, 'Updating pid filter...') 1038 + self.screen.refresh() 1097 1039 self.stats.pid_filter = pid 1098 1040 1099 - def refresh_header(self, pid=None): 1041 + def _refresh_header(self, pid=None): 1100 1042 """Refreshes the header.""" 1101 1043 if pid is None: 1102 1044 pid = self.stats.pid_filter ··· 1105 1059 .format(pid, gname), curses.A_BOLD) 1106 1060 else: 1107 1061 self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) 1108 - if self.stats.fields_filter and self.stats.fields_filter \ 1109 - != DEFAULT_REGEX: 1062 + if self.stats.fields_filter: 1110 1063 regex = self.stats.fields_filter 1111 1064 if len(regex) > MAX_REGEX_LEN: 1112 1065 regex = regex[:MAX_REGEX_LEN] + '...' ··· 1120 1075 self.screen.addstr(4, 1, 'Collecting data...') 1121 1076 self.screen.refresh() 1122 1077 1123 - def refresh_body(self, sleeptime): 1078 + def _refresh_body(self, sleeptime): 1079 + def is_child_field(field): 1080 + return field.find('(') != -1 1081 + 1082 + def insert_child(sorted_items, child, values, parent): 1083 + num = len(sorted_items) 1084 + for i in range(0, num): 1085 + # only add child if parent is present 1086 + if parent.startswith(sorted_items[i][0]): 1087 + sorted_items.insert(i + 1, (' ' + child, values)) 1088 + 1089 + def get_sorted_events(self, stats): 1090 + """ separate parent and child events """ 1091 + if self._sorting == SORT_DEFAULT: 1092 + def sortkey((_k, v)): 1093 + # sort by (delta value, overall value) 1094 + return (v.delta, v.value) 1095 + else: 1096 + def sortkey((_k, v)): 1097 + # sort by overall value 1098 + return v.value 1099 + 1100 + childs = [] 1101 + sorted_items = [] 1102 + # we can't rule out child events to appear prior to parents even 1103 + # when sorted - separate out all children first, and add in later 1104 + for key, values in sorted(stats.items(), key=sortkey, 1105 + reverse=True): 1106 + if values == (0, 0): 1107 + continue 1108 + if key.find(' ') != -1: 1109 + if not self.stats.child_events: 1110 + continue 1111 + childs.insert(0, (key, values)) 1112 + else: 1113 + sorted_items.append((key, values)) 1114 + if self.stats.child_events: 1115 + for key, values in childs: 1116 + (child, parent) = key.split(' ') 1117 + insert_child(sorted_items, child, values, parent) 1118 + 1119 + return sorted_items 1120 + 1124 1121 row = 3 1125 1122 self.screen.move(row, 0) 1126 1123 self.screen.clrtobot() 1127 1124 stats = self.stats.get(self._display_guests) 1128 - 1129 - def sortCurAvg(x): 1130 - # sort by current events if available 1131 - if stats[x][1]: 1132 - return (-stats[x][1], -stats[x][0]) 1133 - else: 1134 - return (0, -stats[x][0]) 1135 - 1136 - def sortTotal(x): 1137 - # sort by totals 1138 - return (0, -stats[x][0]) 1139 1125 total = 0. 1140 - for key in stats.keys(): 1141 - if key.find('(') is -1: 1142 - total += stats[key][0] 1143 - if self._sorting == SORT_DEFAULT: 1144 - sortkey = sortCurAvg 1145 - else: 1146 - sortkey = sortTotal 1126 + ctotal = 0. 1127 + for key, values in stats.items(): 1128 + if self._display_guests: 1129 + if self.get_gname_from_pid(key): 1130 + total += values.value 1131 + continue 1132 + if not key.find(' ') != -1: 1133 + total += values.value 1134 + else: 1135 + ctotal += values.value 1136 + if total == 0.: 1137 + # we don't have any fields, or all non-child events are filtered 1138 + total = ctotal 1139 + 1140 + # print events 1147 1141 tavg = 0 1148 - for key in sorted(stats.keys(), key=sortkey): 1149 - if row >= self.screen.getmaxyx()[0] - 1: 1142 + tcur = 0 1143 + for key, values in get_sorted_events(self, stats): 1144 + if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0): 1150 1145 break 1151 - values = stats[key] 1152 - if not values[0] and not values[1]: 1153 - break 1154 - if values[0] is not None: 1155 - cur = int(round(values[1] / sleeptime)) if values[1] else '' 1156 - if self._display_guests: 1157 - key = self.get_gname_from_pid(key) 1158 - self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % 1159 - (key, values[0], values[0] * 100 / total, 1160 - cur)) 1161 - if cur is not '' and key.find('(') is -1: 1162 - tavg += cur 1146 + if self._display_guests: 1147 + key = self.get_gname_from_pid(key) 1148 + if not key: 1149 + continue 1150 + cur = int(round(values.delta / sleeptime)) if values.delta else '' 1151 + if key[0] != ' ': 1152 + if values.delta: 1153 + tcur += values.delta 1154 + ptotal = values.value 1155 + ltotal = total 1156 + else: 1157 + ltotal = ptotal 1158 + self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key, 1159 + values.value, 1160 + values.value * 100 / float(ltotal), cur)) 1163 1161 row += 1 1164 1162 if row == 3: 1165 1163 self.screen.addstr(4, 1, 'No matching events reported yet') 1166 - else: 1164 + if row > 4: 1165 + tavg = int(round(tcur / sleeptime)) if tcur > 0 else '' 1167 1166 self.screen.addstr(row, 1, '%-40s %10d %8s' % 1168 - ('Total', total, tavg if tavg else ''), 1169 - curses.A_BOLD) 1167 + ('Total', total, tavg), curses.A_BOLD) 1170 1168 self.screen.refresh() 1171 1169 1172 - def show_msg(self, text): 1170 + def _show_msg(self, text): 1173 1171 """Display message centered text and exit on key press""" 1174 1172 hint = 'Press any key to continue' 1175 1173 curses.cbreak() ··· 1227 1139 curses.A_STANDOUT) 1228 1140 self.screen.getkey() 1229 1141 1230 - def show_help_interactive(self): 1142 + def _show_help_interactive(self): 1231 1143 """Display help with list of interactive commands""" 1232 1144 msg = (' b toggle events by guests (debugfs only, honors' 1233 1145 ' filters)', 1234 1146 ' c clear filter', 1235 1147 ' f filter by regular expression', 1236 - ' g filter by guest name', 1148 + ' g filter by guest name/PID', 1237 1149 ' h display interactive commands reference', 1238 1150 ' o toggle sorting order (Total vs CurAvg/s)', 1239 - ' p filter by PID', 1151 + ' p filter by guest name/PID', 1240 1152 ' q quit', 1241 1153 ' r reset stats', 1242 1154 ' s set update interval', ··· 1253 1165 self.screen.addstr(row, 0, line) 1254 1166 row += 1 1255 1167 self.screen.getkey() 1256 - self.refresh_header() 1168 + self._refresh_header() 1257 1169 1258 - def show_filter_selection(self): 1170 + def _show_filter_selection(self): 1259 1171 """Draws filter selection mask. 1260 1172 1261 1173 Asks for a valid regex and sets the fields filter accordingly. 1262 1174 1263 1175 """ 1176 + msg = '' 1264 1177 while True: 1265 1178 self.screen.erase() 1266 1179 self.screen.addstr(0, 0, ··· 1270 1181 self.screen.addstr(2, 0, 1271 1182 "Current regex: {0}" 1272 1183 .format(self.stats.fields_filter)) 1184 + self.screen.addstr(5, 0, msg) 1273 1185 self.screen.addstr(3, 0, "New regex: ") 1274 1186 curses.echo() 1275 1187 regex = self.screen.getstr().decode(ENCODING) 1276 1188 curses.noecho() 1277 1189 if len(regex) == 0: 1278 - self.stats.fields_filter = DEFAULT_REGEX 1279 - self.refresh_header() 1190 + self.stats.fields_filter = '' 1191 + self._refresh_header() 1280 1192 return 1281 1193 try: 1282 1194 re.compile(regex) 1283 1195 self.stats.fields_filter = regex 1284 - self.refresh_header() 1196 + self._refresh_header() 1285 1197 return 1286 1198 except re.error: 1199 + msg = '"' + regex + '": Not a valid regular expression' 1287 1200 continue 1288 1201 1289 - def show_vm_selection_by_pid(self): 1290 - """Draws PID selection mask. 1291 - 1292 - Asks for a pid until a valid pid or 0 has been entered. 1293 - 1294 - """ 1295 - msg = '' 1296 - while True: 1297 - self.screen.erase() 1298 - self.screen.addstr(0, 0, 1299 - 'Show statistics for specific pid.', 1300 - curses.A_BOLD) 1301 - self.screen.addstr(1, 0, 1302 - 'This might limit the shown data to the trace ' 1303 - 'statistics.') 1304 - self.screen.addstr(5, 0, msg) 1305 - self.print_all_gnames(7) 1306 - 1307 - curses.echo() 1308 - self.screen.addstr(3, 0, "Pid [0 or pid]: ") 1309 - pid = self.screen.getstr().decode(ENCODING) 1310 - curses.noecho() 1311 - 1312 - try: 1313 - if len(pid) > 0: 1314 - pid = int(pid) 1315 - if pid != 0 and not os.path.isdir(os.path.join('/proc/', 1316 - str(pid))): 1317 - msg = '"' + str(pid) + '": Not a running process' 1318 - continue 1319 - else: 1320 - pid = 0 1321 - self.refresh_header(pid) 1322 - self.update_pid(pid) 1323 - break 1324 - except ValueError: 1325 - msg = '"' + str(pid) + '": Not a valid pid' 1326 - 1327 - def show_set_update_interval(self): 1202 + def _show_set_update_interval(self): 1328 1203 """Draws update interval selection mask.""" 1329 1204 msg = '' 1330 1205 while True: ··· 1318 1265 1319 1266 except ValueError: 1320 1267 msg = '"' + str(val) + '": Invalid value' 1321 - self.refresh_header() 1268 + self._refresh_header() 1322 1269 1323 - def show_vm_selection_by_guest_name(self): 1270 + def _show_vm_selection_by_guest(self): 1324 1271 """Draws guest selection mask. 1325 1272 1326 - Asks for a guest name until a valid guest name or '' is entered. 1273 + Asks for a guest name or pid until a valid guest name or '' is entered. 1327 1274 1328 1275 """ 1329 1276 msg = '' 1330 1277 while True: 1331 1278 self.screen.erase() 1332 1279 self.screen.addstr(0, 0, 1333 - 'Show statistics for specific guest.', 1280 + 'Show statistics for specific guest or pid.', 1334 1281 curses.A_BOLD) 1335 1282 self.screen.addstr(1, 0, 1336 1283 'This might limit the shown data to the trace ' 1337 1284 'statistics.') 1338 1285 self.screen.addstr(5, 0, msg) 1339 - self.print_all_gnames(7) 1286 + self._print_all_gnames(7) 1340 1287 curses.echo() 1341 - self.screen.addstr(3, 0, "Guest [ENTER or guest]: ") 1342 - gname = self.screen.getstr().decode(ENCODING) 1288 + curses.curs_set(1) 1289 + self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ") 1290 + guest = self.screen.getstr().decode(ENCODING) 1343 1291 curses.noecho() 1344 1292 1345 - if not gname: 1346 - self.refresh_header(0) 1347 - self.update_pid(0) 1293 + pid = 0 1294 + if not guest or guest == '0': 1348 1295 break 1349 - else: 1350 - pids = [] 1351 - try: 1352 - pids = self.get_pid_from_gname(gname) 1353 - except: 1354 - msg = '"' + gname + '": Internal error while searching, ' \ 1355 - 'use pid filter instead' 1296 + if guest.isdigit(): 1297 + if not os.path.isdir(os.path.join('/proc/', guest)): 1298 + msg = '"' + guest + '": Not a running process' 1356 1299 continue 1357 - if len(pids) == 0: 1358 - msg = '"' + gname + '": Not an active guest' 1359 - continue 1360 - if len(pids) > 1: 1361 - msg = '"' + gname + '": Multiple matches found, use pid ' \ 1362 - 'filter instead' 1363 - continue 1364 - self.refresh_header(pids[0]) 1365 - self.update_pid(pids[0]) 1300 + pid = int(guest) 1366 1301 break 1302 + pids = [] 1303 + try: 1304 + pids = self.get_pid_from_gname(guest) 1305 + except: 1306 + msg = '"' + guest + '": Internal error while searching, ' \ 1307 + 'use pid filter instead' 1308 + continue 1309 + if len(pids) == 0: 1310 + msg = '"' + guest + '": Not an active guest' 1311 + continue 1312 + if len(pids) > 1: 1313 + msg = '"' + guest + '": Multiple matches found, use pid ' \ 1314 + 'filter instead' 1315 + continue 1316 + pid = pids[0] 1317 + break 1318 + curses.curs_set(0) 1319 + self._refresh_header(pid) 1320 + self._update_pid(pid) 1367 1321 1368 1322 def show_stats(self): 1369 1323 """Refreshes the screen and processes user input.""" 1370 1324 sleeptime = self._delay_initial 1371 - self.refresh_header() 1325 + self._refresh_header() 1372 1326 start = 0.0 # result based on init value never appears on screen 1373 1327 while True: 1374 - self.refresh_body(time.time() - start) 1328 + self._refresh_body(time.time() - start) 1375 1329 curses.halfdelay(int(sleeptime * 10)) 1376 1330 start = time.time() 1377 1331 sleeptime = self._delay_regular ··· 1387 1327 if char == 'b': 1388 1328 self._display_guests = not self._display_guests 1389 1329 if self.stats.toggle_display_guests(self._display_guests): 1390 - self.show_msg(['Command not available with tracepoints' 1391 - ' enabled', 'Restart with debugfs only ' 1392 - '(see option \'-d\') and try again!']) 1330 + self._show_msg(['Command not available with ' 1331 + 'tracepoints enabled', 'Restart with ' 1332 + 'debugfs only (see option \'-d\') and ' 1333 + 'try again!']) 1393 1334 self._display_guests = not self._display_guests 1394 - self.refresh_header() 1335 + self._refresh_header() 1395 1336 if char == 'c': 1396 - self.stats.fields_filter = DEFAULT_REGEX 1397 - self.refresh_header(0) 1398 - self.update_pid(0) 1337 + self.stats.fields_filter = '' 1338 + self._refresh_header(0) 1339 + self._update_pid(0) 1399 1340 if char == 'f': 1400 1341 curses.curs_set(1) 1401 - self.show_filter_selection() 1342 + self._show_filter_selection() 1402 1343 curses.curs_set(0) 1403 1344 sleeptime = self._delay_initial 1404 - if char == 'g': 1405 - curses.curs_set(1) 1406 - self.show_vm_selection_by_guest_name() 1407 - curses.curs_set(0) 1345 + if char == 'g' or char == 'p': 1346 + self._show_vm_selection_by_guest() 1408 1347 sleeptime = self._delay_initial 1409 1348 if char == 'h': 1410 - self.show_help_interactive() 1349 + self._show_help_interactive() 1411 1350 if char == 'o': 1412 1351 self._sorting = not self._sorting 1413 - if char == 'p': 1414 - curses.curs_set(1) 1415 - self.show_vm_selection_by_pid() 1416 - curses.curs_set(0) 1417 - sleeptime = self._delay_initial 1418 1352 if char == 'q': 1419 1353 break 1420 1354 if char == 'r': 1421 1355 self.stats.reset() 1422 1356 if char == 's': 1423 1357 curses.curs_set(1) 1424 - self.show_set_update_interval() 1358 + self._show_set_update_interval() 1425 1359 curses.curs_set(0) 1426 1360 sleeptime = self._delay_initial 1427 1361 if char == 'x': 1428 - self.update_drilldown() 1429 - # prevents display of current values on next refresh 1430 - self.stats.get(self._display_guests) 1362 + self.stats.child_events = not self.stats.child_events 1431 1363 except KeyboardInterrupt: 1432 1364 break 1433 1365 except curses.error: ··· 1432 1380 s = stats.get() 1433 1381 time.sleep(1) 1434 1382 s = stats.get() 1435 - for key in sorted(s.keys()): 1436 - values = s[key] 1437 - print('%-42s%10d%10d' % (key, values[0], values[1])) 1383 + for key, values in sorted(s.items()): 1384 + print('%-42s%10d%10d' % (key.split(' ')[0], values.value, 1385 + values.delta)) 1438 1386 except KeyboardInterrupt: 1439 1387 pass 1440 1388 ··· 1444 1392 keys = sorted(stats.get().keys()) 1445 1393 1446 1394 def banner(): 1447 - for k in keys: 1448 - print(k, end=' ') 1395 + for key in keys: 1396 + print(key.split(' ')[0], end=' ') 1449 1397 print() 1450 1398 1451 1399 def statline(): 1452 1400 s = stats.get() 1453 - for k in keys: 1454 - print(' %9d' % s[k][1], end=' ') 1401 + for key in keys: 1402 + print(' %9d' % s[key].delta, end=' ') 1455 1403 print() 1456 1404 line = 0 1457 1405 banner_repeat = 20 ··· 1556 1504 ) 1557 1505 optparser.add_option('-f', '--fields', 1558 1506 action='store', 1559 - default=DEFAULT_REGEX, 1507 + default='', 1560 1508 dest='fields', 1561 1509 help='''fields to display (regex) 1562 1510 "-f help" for a list of available events''', ··· 1591 1539 1592 1540 def check_access(options): 1593 1541 """Exits if the current user can't access all needed directories.""" 1594 - if not os.path.exists('/sys/kernel/debug'): 1595 - sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.') 1596 - sys.exit(1) 1597 - 1598 - if not os.path.exists(PATH_DEBUGFS_KVM): 1599 - sys.stderr.write("Please make sure, that debugfs is mounted and " 1600 - "readable by the current user:\n" 1601 - "('mount -t debugfs debugfs /sys/kernel/debug')\n" 1602 - "Also ensure, that the kvm modules are loaded.\n") 1603 - sys.exit(1) 1604 - 1605 1542 if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or 1606 1543 not options.debugfs): 1607 1544 sys.stderr.write("Please enable CONFIG_TRACING in your kernel " ··· 1608 1567 return options 1609 1568 1610 1569 1570 + def assign_globals(): 1571 + global PATH_DEBUGFS_KVM 1572 + global PATH_DEBUGFS_TRACING 1573 + 1574 + debugfs = '' 1575 + for line in file('/proc/mounts'): 1576 + if line.split(' ')[0] == 'debugfs': 1577 + debugfs = line.split(' ')[1] 1578 + break 1579 + if debugfs == '': 1580 + sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in " 1581 + "your kernel, mounted and\nreadable by the current " 1582 + "user:\n" 1583 + "('mount -t debugfs debugfs /sys/kernel/debug')\n") 1584 + sys.exit(1) 1585 + 1586 + PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm') 1587 + PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing') 1588 + 1589 + if not os.path.exists(PATH_DEBUGFS_KVM): 1590 + sys.stderr.write("Please make sure that CONFIG_KVM is enabled in " 1591 + "your kernel and that the modules are loaded.\n") 1592 + sys.exit(1) 1593 + 1594 + 1611 1595 def main(): 1596 + assign_globals() 1612 1597 options = get_options() 1613 1598 options = check_access(options) 1614 1599

+2 -2

tools/kvm/kvm_stat/kvm_stat.txt

··· 35 35 36 36 *f*:: filter by regular expression 37 37 38 - *g*:: filter by guest name 38 + *g*:: filter by guest name/PID 39 39 40 40 *h*:: display interactive commands reference 41 41 42 42 *o*:: toggle sorting order (Total vs CurAvg/s) 43 43 44 - *p*:: filter by PID 44 + *p*:: filter by guest name/PID 45 45 46 46 *q*:: quit 47 47

+64 -52

virt/kvm/arm/arch_timer.c

··· 36 36 static unsigned int host_vtimer_irq; 37 37 static u32 host_vtimer_irq_flags; 38 38 39 + static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); 40 + 39 41 static const struct kvm_irq_level default_ptimer_irq = { 40 42 .irq = 30, 41 43 .level = 1, ··· 58 56 return timecounter->cc->read(timecounter->cc); 59 57 } 60 58 59 + static inline bool userspace_irqchip(struct kvm *kvm) 60 + { 61 + return static_branch_unlikely(&userspace_irqchip_in_use) && 62 + unlikely(!irqchip_in_kernel(kvm)); 63 + } 64 + 61 65 static void soft_timer_start(struct hrtimer *hrt, u64 ns) 62 66 { 63 67 hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), ··· 75 67 hrtimer_cancel(hrt); 76 68 if (work) 77 69 cancel_work_sync(work); 78 - } 79 - 80 - static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu) 81 - { 82 - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 83 - 84 - /* 85 - * When using a userspace irqchip with the architected timers, we must 86 - * prevent continuously exiting from the guest, and therefore mask the 87 - * physical interrupt by disabling it on the host interrupt controller 88 - * when the virtual level is high, such that the guest can make 89 - * forward progress. Once we detect the output level being 90 - * de-asserted, we unmask the interrupt again so that we exit from the 91 - * guest when the timer fires. 92 - */ 93 - if (vtimer->irq.level) 94 - disable_percpu_irq(host_vtimer_irq); 95 - else 96 - enable_percpu_irq(host_vtimer_irq, 0); 97 70 } 98 71 99 72 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) ··· 95 106 if (kvm_timer_should_fire(vtimer)) 96 107 kvm_timer_update_irq(vcpu, true, vtimer); 97 108 98 - if (static_branch_unlikely(&userspace_irqchip_in_use) && 99 - unlikely(!irqchip_in_kernel(vcpu->kvm))) 100 - kvm_vtimer_update_mask_user(vcpu); 109 + if (userspace_irqchip(vcpu->kvm) && 110 + !static_branch_unlikely(&has_gic_active_state)) 111 + disable_percpu_irq(host_vtimer_irq); 101 112 102 113 return IRQ_HANDLED; 103 114 } ··· 279 290 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, 280 291 timer_ctx->irq.level); 281 292 282 - if (!static_branch_unlikely(&userspace_irqchip_in_use) || 283 - likely(irqchip_in_kernel(vcpu->kvm))) { 293 + if (!userspace_irqchip(vcpu->kvm)) { 284 294 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 285 295 timer_ctx->irq.irq, 286 296 timer_ctx->irq.level, ··· 338 350 phys_timer_emulate(vcpu); 339 351 } 340 352 341 - static void __timer_snapshot_state(struct arch_timer_context *timer) 342 - { 343 - timer->cnt_ctl = read_sysreg_el0(cntv_ctl); 344 - timer->cnt_cval = read_sysreg_el0(cntv_cval); 345 - } 346 - 347 353 static void vtimer_save_state(struct kvm_vcpu *vcpu) 348 354 { 349 355 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; ··· 349 367 if (!vtimer->loaded) 350 368 goto out; 351 369 352 - if (timer->enabled) 353 - __timer_snapshot_state(vtimer); 370 + if (timer->enabled) { 371 + vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); 372 + vtimer->cnt_cval = read_sysreg_el0(cntv_cval); 373 + } 354 374 355 375 /* Disable the virtual timer */ 356 376 write_sysreg_el0(0, cntv_ctl); ··· 444 460 kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); 445 461 } 446 462 447 - static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu) 463 + static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active) 464 + { 465 + int r; 466 + r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active); 467 + WARN_ON(r); 468 + } 469 + 470 + static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu) 448 471 { 449 472 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 450 473 bool phys_active; 451 - int ret; 452 474 453 - phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); 454 - 455 - ret = irq_set_irqchip_state(host_vtimer_irq, 456 - IRQCHIP_STATE_ACTIVE, 457 - phys_active); 458 - WARN_ON(ret); 475 + if (irqchip_in_kernel(vcpu->kvm)) 476 + phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); 477 + else 478 + phys_active = vtimer->irq.level; 479 + set_vtimer_irq_phys_active(vcpu, phys_active); 459 480 } 460 481 461 - static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu) 482 + static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) 462 483 { 463 - kvm_vtimer_update_mask_user(vcpu); 484 + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 485 + 486 + /* 487 + * When using a userspace irqchip with the architected timers and a 488 + * host interrupt controller that doesn't support an active state, we 489 + * must still prevent continuously exiting from the guest, and 490 + * therefore mask the physical interrupt by disabling it on the host 491 + * interrupt controller when the virtual level is high, such that the 492 + * guest can make forward progress. Once we detect the output level 493 + * being de-asserted, we unmask the interrupt again so that we exit 494 + * from the guest when the timer fires. 495 + */ 496 + if (vtimer->irq.level) 497 + disable_percpu_irq(host_vtimer_irq); 498 + else 499 + enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 464 500 } 465 501 466 502 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) ··· 491 487 if (unlikely(!timer->enabled)) 492 488 return; 493 489 494 - if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 495 - kvm_timer_vcpu_load_user(vcpu); 490 + if (static_branch_likely(&has_gic_active_state)) 491 + kvm_timer_vcpu_load_gic(vcpu); 496 492 else 497 - kvm_timer_vcpu_load_vgic(vcpu); 493 + kvm_timer_vcpu_load_nogic(vcpu); 498 494 499 495 set_cntvoff(vtimer->cntvoff); 500 496 ··· 559 555 { 560 556 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 561 557 562 - if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { 563 - __timer_snapshot_state(vtimer); 564 - if (!kvm_timer_should_fire(vtimer)) { 565 - kvm_timer_update_irq(vcpu, false, vtimer); 566 - kvm_vtimer_update_mask_user(vcpu); 567 - } 558 + if (!kvm_timer_should_fire(vtimer)) { 559 + kvm_timer_update_irq(vcpu, false, vtimer); 560 + if (static_branch_likely(&has_gic_active_state)) 561 + set_vtimer_irq_phys_active(vcpu, false); 562 + else 563 + enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 568 564 } 569 565 } 570 566 571 567 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) 572 568 { 573 - unmask_vtimer_irq_user(vcpu); 569 + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 570 + 571 + if (unlikely(!timer->enabled)) 572 + return; 573 + 574 + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 575 + unmask_vtimer_irq_user(vcpu); 574 576 } 575 577 576 578 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) ··· 763 753 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 764 754 goto out_free_irq; 765 755 } 756 + 757 + static_branch_enable(&has_gic_active_state); 766 758 } 767 759 768 760 kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);

+1 -2

virt/kvm/kvm_main.c

··· 969 969 /* Check for overlaps */ 970 970 r = -EEXIST; 971 971 kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) { 972 - if ((slot->id >= KVM_USER_MEM_SLOTS) || 973 - (slot->id == id)) 972 + if (slot->id == id) 974 973 continue; 975 974 if (!((base_gfn + npages <= slot->base_gfn) || 976 975 (base_gfn >= slot->base_gfn + slot->npages)))