Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'kvm-s390-next-6.19-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD

- SCA rework
- VIRT_XFER_TO_GUEST_WORK support
- Operation exception forwarding support
- Cleanups

+271 -271
+17 -2
Documentation/virt/kvm/api.rst
··· 7855 7855 :Architectures: s390 7856 7856 :Parameters: none 7857 7857 7858 - With this capability enabled, all illegal instructions 0x0000 (2 bytes) will 7858 + With this capability enabled, the illegal instruction 0x0000 (2 bytes) will 7859 7859 be intercepted and forwarded to user space. User space can use this 7860 7860 mechanism e.g. to realize 2-byte software breakpoints. The kernel will 7861 7861 not inject an operating exception for these instructions, user space has ··· 8727 8727 When this capability is enabled, KVM resets the VCPU when setting 8728 8728 MP_STATE_INIT_RECEIVED through IOCTL. The original MP_STATE is preserved. 8729 8729 8730 - 7.43 KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 8730 + 7.44 KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 8731 8731 ------------------------------------------- 8732 8732 8733 8733 :Architectures: arm64 ··· 8749 8749 When this capability is enabled, KVM may exit to userspace for SEAs taken to 8750 8750 EL2 resulting from a guest access. See ``KVM_EXIT_ARM_SEA`` for more 8751 8751 information. 8752 + 8753 + 7.46 KVM_CAP_S390_USER_OPEREXEC 8754 + ------------------------------- 8755 + 8756 + :Architectures: s390 8757 + :Parameters: none 8758 + 8759 + When this capability is enabled KVM forwards all operation exceptions 8760 + that it doesn't handle itself to user space. This also includes the 8761 + 0x0000 instructions managed by KVM_CAP_S390_USER_INSTR0. This is 8762 + helpful if user space wants to emulate instructions which are not 8763 + (yet) implemented in hardware. 8764 + 8765 + This capability can be enabled dynamically even if VCPUs were already 8766 + created and are running. 8752 8767 8753 8768 8. Other capabilities. 8754 8769 ======================
+4 -4
arch/s390/include/asm/kvm_host.h
··· 146 146 u64 instruction_diagnose_500; 147 147 u64 instruction_diagnose_other; 148 148 u64 pfault_sync; 149 + u64 signal_exits; 149 150 }; 150 151 151 152 #define PGM_OPERATION 0x01 ··· 632 631 struct mmu_notifier mmu_notifier; 633 632 }; 634 633 635 - struct kvm_arch{ 636 - void *sca; 637 - int use_esca; 638 - rwlock_t sca_lock; 634 + struct kvm_arch { 635 + struct esca_block *sca; 639 636 debug_info_t *dbf; 640 637 struct kvm_s390_float_interrupt float_int; 641 638 struct kvm_device *flic; ··· 649 650 int user_sigp; 650 651 int user_stsi; 651 652 int user_instr0; 653 + int user_operexec; 652 654 struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; 653 655 wait_queue_head_t ipte_wq; 654 656 int ipte_lock_count;
+1
arch/s390/include/asm/stacktrace.h
··· 66 66 unsigned long sie_flags; 67 67 unsigned long sie_control_block_phys; 68 68 unsigned long sie_guest_asce; 69 + unsigned long sie_irq; 69 70 }; 70 71 }; 71 72 unsigned long gprs[10];
+1
arch/s390/kernel/asm-offsets.c
··· 64 64 OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags); 65 65 OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys); 66 66 OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce); 67 + OFFSET(__SF_SIE_IRQ, stack_frame, sie_irq); 67 68 DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame)); 68 69 BLANK(); 69 70 OFFSET(__SFUSER_BACKCHAIN, stack_frame_user, back_chain);
+2
arch/s390/kernel/entry.S
··· 189 189 mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags 190 190 lmg %r0,%r13,0(%r4) # load guest gprs 0-13 191 191 mvi __TI_sie(%r14),1 192 + stosm __SF_SIE_IRQ(%r15),0x03 # enable interrupts 192 193 lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce 193 194 lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer 194 195 oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now ··· 213 212 lg %r14,__LC_CURRENT(%r14) 214 213 mvi __TI_sie(%r14),0 215 214 SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL) 215 + stnsm __SF_SIE_IRQ(%r15),0xfc # disable interrupts 216 216 lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area 217 217 stmg %r0,%r13,0(%r14) # save guest gprs 0-13 218 218 xgr %r0,%r0 # clear guest registers to
+1
arch/s390/kvm/Kconfig
··· 29 29 select HAVE_KVM_NO_POLL 30 30 select KVM_VFIO 31 31 select MMU_NOTIFIER 32 + select VIRT_XFER_TO_GUEST_WORK 32 33 help 33 34 Support hosting paravirtualized guest machines using the SIE 34 35 virtualization capability on the mainframe. This should work
+6 -21
arch/s390/kvm/gaccess.c
··· 109 109 110 110 int ipte_lock_held(struct kvm *kvm) 111 111 { 112 - if (sclp.has_siif) { 113 - int rc; 112 + if (sclp.has_siif) 113 + return kvm->arch.sca->ipte_control.kh != 0; 114 114 115 - read_lock(&kvm->arch.sca_lock); 116 - rc = kvm_s390_get_ipte_control(kvm)->kh != 0; 117 - read_unlock(&kvm->arch.sca_lock); 118 - return rc; 119 - } 120 115 return kvm->arch.ipte_lock_count != 0; 121 116 } 122 117 ··· 124 129 if (kvm->arch.ipte_lock_count > 1) 125 130 goto out; 126 131 retry: 127 - read_lock(&kvm->arch.sca_lock); 128 - ic = kvm_s390_get_ipte_control(kvm); 132 + ic = &kvm->arch.sca->ipte_control; 129 133 old = READ_ONCE(*ic); 130 134 do { 131 135 if (old.k) { 132 - read_unlock(&kvm->arch.sca_lock); 133 136 cond_resched(); 134 137 goto retry; 135 138 } 136 139 new = old; 137 140 new.k = 1; 138 141 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 139 - read_unlock(&kvm->arch.sca_lock); 140 142 out: 141 143 mutex_unlock(&kvm->arch.ipte_mutex); 142 144 } ··· 146 154 kvm->arch.ipte_lock_count--; 147 155 if (kvm->arch.ipte_lock_count) 148 156 goto out; 149 - read_lock(&kvm->arch.sca_lock); 150 - ic = kvm_s390_get_ipte_control(kvm); 157 + ic = &kvm->arch.sca->ipte_control; 151 158 old = READ_ONCE(*ic); 152 159 do { 153 160 new = old; 154 161 new.k = 0; 155 162 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 156 - read_unlock(&kvm->arch.sca_lock); 157 163 wake_up(&kvm->arch.ipte_wq); 158 164 out: 159 165 mutex_unlock(&kvm->arch.ipte_mutex); ··· 162 172 union ipte_control old, new, *ic; 163 173 164 174 retry: 165 - read_lock(&kvm->arch.sca_lock); 166 - ic = kvm_s390_get_ipte_control(kvm); 175 + ic = &kvm->arch.sca->ipte_control; 167 176 old = READ_ONCE(*ic); 168 177 do { 169 178 if (old.kg) { 170 - read_unlock(&kvm->arch.sca_lock); 171 179 cond_resched(); 172 180 goto retry; 173 181 } ··· 173 185 new.k = 1; 174 186 new.kh++; 175 187 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 176 - read_unlock(&kvm->arch.sca_lock); 177 188 } 178 189 179 190 static void ipte_unlock_siif(struct kvm *kvm) 180 191 { 181 192 union ipte_control old, new, *ic; 182 193 183 - read_lock(&kvm->arch.sca_lock); 184 - ic = kvm_s390_get_ipte_control(kvm); 194 + ic = &kvm->arch.sca->ipte_control; 185 195 old = READ_ONCE(*ic); 186 196 do { 187 197 new = old; ··· 187 201 if (!new.kh) 188 202 new.k = 0; 189 203 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 190 - read_unlock(&kvm->arch.sca_lock); 191 204 if (!new.kh) 192 205 wake_up(&kvm->arch.ipte_wq); 193 206 }
+3
arch/s390/kvm/intercept.c
··· 471 471 if (vcpu->arch.sie_block->ipa == 0xb256) 472 472 return handle_sthyi(vcpu); 473 473 474 + if (vcpu->kvm->arch.user_operexec) 475 + return -EOPNOTSUPP; 476 + 474 477 if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0) 475 478 return -EOPNOTSUPP; 476 479 rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &newpsw, sizeof(psw_t));
+17 -63
arch/s390/kvm/interrupt.c
··· 45 45 /* handle external calls via sigp interpretation facility */ 46 46 static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) 47 47 { 48 - int c, scn; 48 + struct esca_block *sca = vcpu->kvm->arch.sca; 49 + union esca_sigp_ctrl sigp_ctrl = sca->cpu[vcpu->vcpu_id].sigp_ctrl; 49 50 50 51 if (!kvm_s390_test_cpuflags(vcpu, CPUSTAT_ECALL_PEND)) 51 52 return 0; 52 53 53 54 BUG_ON(!kvm_s390_use_sca_entries()); 54 - read_lock(&vcpu->kvm->arch.sca_lock); 55 - if (vcpu->kvm->arch.use_esca) { 56 - struct esca_block *sca = vcpu->kvm->arch.sca; 57 - union esca_sigp_ctrl sigp_ctrl = 58 - sca->cpu[vcpu->vcpu_id].sigp_ctrl; 59 - 60 - c = sigp_ctrl.c; 61 - scn = sigp_ctrl.scn; 62 - } else { 63 - struct bsca_block *sca = vcpu->kvm->arch.sca; 64 - union bsca_sigp_ctrl sigp_ctrl = 65 - sca->cpu[vcpu->vcpu_id].sigp_ctrl; 66 - 67 - c = sigp_ctrl.c; 68 - scn = sigp_ctrl.scn; 69 - } 70 - read_unlock(&vcpu->kvm->arch.sca_lock); 71 55 72 56 if (src_id) 73 - *src_id = scn; 57 + *src_id = sigp_ctrl.scn; 74 58 75 - return c; 59 + return sigp_ctrl.c; 76 60 } 77 61 78 62 static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) 79 63 { 64 + struct esca_block *sca = vcpu->kvm->arch.sca; 65 + union esca_sigp_ctrl *sigp_ctrl = &sca->cpu[vcpu->vcpu_id].sigp_ctrl; 66 + union esca_sigp_ctrl old_val, new_val = {.scn = src_id, .c = 1}; 80 67 int expect, rc; 81 68 82 69 BUG_ON(!kvm_s390_use_sca_entries()); 83 - read_lock(&vcpu->kvm->arch.sca_lock); 84 - if (vcpu->kvm->arch.use_esca) { 85 - struct esca_block *sca = vcpu->kvm->arch.sca; 86 - union esca_sigp_ctrl *sigp_ctrl = 87 - &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); 88 - union esca_sigp_ctrl new_val = {0}, old_val; 89 70 90 - old_val = READ_ONCE(*sigp_ctrl); 91 - new_val.scn = src_id; 92 - new_val.c = 1; 93 - old_val.c = 0; 71 + old_val = READ_ONCE(*sigp_ctrl); 72 + old_val.c = 0; 94 73 95 - expect = old_val.value; 96 - rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); 97 - } else { 98 - struct bsca_block *sca = vcpu->kvm->arch.sca; 99 - union bsca_sigp_ctrl *sigp_ctrl = 100 - &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); 101 - union bsca_sigp_ctrl new_val = {0}, old_val; 102 - 103 - old_val = READ_ONCE(*sigp_ctrl); 104 - new_val.scn = src_id; 105 - new_val.c = 1; 106 - old_val.c = 0; 107 - 108 - expect = old_val.value; 109 - rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); 110 - } 111 - read_unlock(&vcpu->kvm->arch.sca_lock); 74 + expect = old_val.value; 75 + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); 112 76 113 77 if (rc != expect) { 114 78 /* another external call is pending */ ··· 84 120 85 121 static void sca_clear_ext_call(struct kvm_vcpu *vcpu) 86 122 { 123 + struct esca_block *sca = vcpu->kvm->arch.sca; 124 + union esca_sigp_ctrl *sigp_ctrl = &sca->cpu[vcpu->vcpu_id].sigp_ctrl; 125 + 87 126 if (!kvm_s390_use_sca_entries()) 88 127 return; 89 128 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND); 90 - read_lock(&vcpu->kvm->arch.sca_lock); 91 - if (vcpu->kvm->arch.use_esca) { 92 - struct esca_block *sca = vcpu->kvm->arch.sca; 93 - union esca_sigp_ctrl *sigp_ctrl = 94 - &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); 95 129 96 - WRITE_ONCE(sigp_ctrl->value, 0); 97 - } else { 98 - struct bsca_block *sca = vcpu->kvm->arch.sca; 99 - union bsca_sigp_ctrl *sigp_ctrl = 100 - &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); 101 - 102 - WRITE_ONCE(sigp_ctrl->value, 0); 103 - } 104 - read_unlock(&vcpu->kvm->arch.sca_lock); 130 + WRITE_ONCE(sigp_ctrl->value, 0); 105 131 } 106 132 107 133 int psw_extint_disabled(struct kvm_vcpu *vcpu) ··· 1178 1224 { 1179 1225 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 1180 1226 1181 - if (!sclp.has_sigpif) 1227 + if (!kvm_s390_use_sca_entries()) 1182 1228 return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); 1183 1229 1184 1230 return sca_ext_call_pending(vcpu, NULL); ··· 1503 1549 if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL) 1504 1550 return -EINVAL; 1505 1551 1506 - if (sclp.has_sigpif && !kvm_s390_pv_cpu_get_handle(vcpu)) 1552 + if (kvm_s390_use_sca_entries() && !kvm_s390_pv_cpu_get_handle(vcpu)) 1507 1553 return sca_inject_ext_call(vcpu, src_id); 1508 1554 1509 1555 if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
+62 -167
arch/s390/kvm/kvm-s390.c
··· 14 14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 15 15 16 16 #include <linux/compiler.h> 17 + #include <linux/entry-virt.h> 17 18 #include <linux/export.h> 18 19 #include <linux/err.h> 19 20 #include <linux/fs.h> ··· 186 185 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), 187 186 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), 188 187 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), 189 - STATS_DESC_COUNTER(VCPU, pfault_sync) 188 + STATS_DESC_COUNTER(VCPU, pfault_sync), 189 + STATS_DESC_COUNTER(VCPU, signal_exits) 190 190 }; 191 191 192 192 const struct kvm_stats_header kvm_vcpu_stats_header = { ··· 274 272 /* forward declarations */ 275 273 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 276 274 unsigned long end); 277 - static int sca_switch_to_extended(struct kvm *kvm); 278 275 279 276 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 280 277 { ··· 608 607 case KVM_CAP_SET_GUEST_DEBUG: 609 608 case KVM_CAP_S390_DIAG318: 610 609 case KVM_CAP_IRQFD_RESAMPLE: 610 + case KVM_CAP_S390_USER_OPEREXEC: 611 611 r = 1; 612 612 break; 613 613 case KVM_CAP_SET_GUEST_DEBUG2: ··· 634 632 case KVM_CAP_NR_VCPUS: 635 633 case KVM_CAP_MAX_VCPUS: 636 634 case KVM_CAP_MAX_VCPU_ID: 637 - r = KVM_S390_BSCA_CPU_SLOTS; 635 + /* 636 + * Return the same value for KVM_CAP_MAX_VCPUS and 637 + * KVM_CAP_MAX_VCPU_ID to conform with the KVM API. 638 + */ 639 + r = KVM_S390_ESCA_CPU_SLOTS; 638 640 if (!kvm_s390_use_sca_entries()) 639 641 r = KVM_MAX_VCPUS; 640 - else if (sclp.has_esca && sclp.has_64bscao) 641 - r = KVM_S390_ESCA_CPU_SLOTS; 642 642 if (ext == KVM_CAP_NR_VCPUS) 643 643 r = min_t(unsigned int, num_online_cpus(), r); 644 644 break; ··· 923 919 mutex_unlock(&kvm->lock); 924 920 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s", 925 921 r ? "(not available)" : "(success)"); 922 + break; 923 + case KVM_CAP_S390_USER_OPEREXEC: 924 + VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_OPEREXEC"); 925 + kvm->arch.user_operexec = 1; 926 + icpt_operexc_on_all_vcpus(kvm); 927 + r = 0; 926 928 break; 927 929 default: 928 930 r = -EINVAL; ··· 1941 1931 * Updates the Multiprocessor Topology-Change-Report bit to signal 1942 1932 * the guest with a topology change. 1943 1933 * This is only relevant if the topology facility is present. 1944 - * 1945 - * The SCA version, bsca or esca, doesn't matter as offset is the same. 1946 1934 */ 1947 1935 static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val) 1948 1936 { 1949 1937 union sca_utility new, old; 1950 - struct bsca_block *sca; 1938 + struct esca_block *sca; 1951 1939 1952 - read_lock(&kvm->arch.sca_lock); 1953 1940 sca = kvm->arch.sca; 1954 1941 old = READ_ONCE(sca->utility); 1955 1942 do { 1956 1943 new = old; 1957 1944 new.mtcr = val; 1958 1945 } while (!try_cmpxchg(&sca->utility.val, &old.val, new.val)); 1959 - read_unlock(&kvm->arch.sca_lock); 1960 1946 } 1961 1947 1962 1948 static int kvm_s390_set_topo_change_indication(struct kvm *kvm, ··· 1973 1967 if (!test_kvm_facility(kvm, 11)) 1974 1968 return -ENXIO; 1975 1969 1976 - read_lock(&kvm->arch.sca_lock); 1977 - topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr; 1978 - read_unlock(&kvm->arch.sca_lock); 1970 + topo = kvm->arch.sca->utility.mtcr; 1979 1971 1980 1972 return put_user(topo, (u8 __user *)attr->addr); 1981 1973 } ··· 2671 2667 if (kvm_s390_pv_is_protected(kvm)) 2672 2668 break; 2673 2669 2674 - /* 2675 - * FMT 4 SIE needs esca. As we never switch back to bsca from 2676 - * esca, we need no cleanup in the error cases below 2677 - */ 2678 - r = sca_switch_to_extended(kvm); 2679 - if (r) 2680 - break; 2681 - 2682 2670 mmap_write_lock(kvm->mm); 2683 2671 r = gmap_helper_disable_cow_sharing(); 2684 2672 mmap_write_unlock(kvm->mm); ··· 3313 3317 3314 3318 static void sca_dispose(struct kvm *kvm) 3315 3319 { 3316 - if (kvm->arch.use_esca) 3317 - free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 3318 - else 3319 - free_page((unsigned long)(kvm->arch.sca)); 3320 + free_pages_exact(kvm->arch.sca, sizeof(*kvm->arch.sca)); 3320 3321 kvm->arch.sca = NULL; 3321 3322 } 3322 3323 ··· 3327 3334 3328 3335 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 3329 3336 { 3330 - gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; 3331 - int i, rc; 3337 + gfp_t alloc_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO; 3332 3338 char debug_name[16]; 3333 - static unsigned long sca_offset; 3339 + int i, rc; 3334 3340 3335 3341 rc = -EINVAL; 3336 3342 #ifdef CONFIG_KVM_S390_UCONTROL ··· 3350 3358 3351 3359 if (!sclp.has_64bscao) 3352 3360 alloc_flags |= GFP_DMA; 3353 - rwlock_init(&kvm->arch.sca_lock); 3354 - /* start with basic SCA */ 3355 - kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 3361 + mutex_lock(&kvm_lock); 3362 + 3363 + kvm->arch.sca = alloc_pages_exact(sizeof(*kvm->arch.sca), alloc_flags); 3364 + mutex_unlock(&kvm_lock); 3356 3365 if (!kvm->arch.sca) 3357 3366 goto out_err; 3358 - mutex_lock(&kvm_lock); 3359 - sca_offset += 16; 3360 - if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 3361 - sca_offset = 0; 3362 - kvm->arch.sca = (struct bsca_block *) 3363 - ((char *) kvm->arch.sca + sca_offset); 3364 - mutex_unlock(&kvm_lock); 3365 3367 3366 - sprintf(debug_name, "kvm-%u", current->pid); 3368 + snprintf(debug_name, sizeof(debug_name), "kvm-%u", current->pid); 3367 3369 3368 3370 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 3369 3371 if (!kvm->arch.dbf) ··· 3534 3548 3535 3549 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 3536 3550 { 3551 + struct esca_block *sca = vcpu->kvm->arch.sca; 3552 + 3537 3553 if (!kvm_s390_use_sca_entries()) 3538 3554 return; 3539 - read_lock(&vcpu->kvm->arch.sca_lock); 3540 - if (vcpu->kvm->arch.use_esca) { 3541 - struct esca_block *sca = vcpu->kvm->arch.sca; 3542 3555 3543 - clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 3544 - sca->cpu[vcpu->vcpu_id].sda = 0; 3545 - } else { 3546 - struct bsca_block *sca = vcpu->kvm->arch.sca; 3547 - 3548 - clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 3549 - sca->cpu[vcpu->vcpu_id].sda = 0; 3550 - } 3551 - read_unlock(&vcpu->kvm->arch.sca_lock); 3556 + clear_bit_inv(vcpu->vcpu_id, (unsigned long *)sca->mcn); 3557 + sca->cpu[vcpu->vcpu_id].sda = 0; 3552 3558 } 3553 3559 3554 3560 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 3555 3561 { 3556 - if (!kvm_s390_use_sca_entries()) { 3557 - phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca); 3562 + struct esca_block *sca = vcpu->kvm->arch.sca; 3563 + phys_addr_t sca_phys = virt_to_phys(sca); 3558 3564 3559 - /* we still need the basic sca for the ipte control */ 3560 - vcpu->arch.sie_block->scaoh = sca_phys >> 32; 3561 - vcpu->arch.sie_block->scaol = sca_phys; 3565 + /* we still need the sca header for the ipte control */ 3566 + vcpu->arch.sie_block->scaoh = sca_phys >> 32; 3567 + vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK; 3568 + vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3569 + 3570 + if (!kvm_s390_use_sca_entries()) 3562 3571 return; 3563 - } 3564 - read_lock(&vcpu->kvm->arch.sca_lock); 3565 - if (vcpu->kvm->arch.use_esca) { 3566 - struct esca_block *sca = vcpu->kvm->arch.sca; 3567 - phys_addr_t sca_phys = virt_to_phys(sca); 3568 3572 3569 - sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); 3570 - vcpu->arch.sie_block->scaoh = sca_phys >> 32; 3571 - vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK; 3572 - vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3573 - set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 3574 - } else { 3575 - struct bsca_block *sca = vcpu->kvm->arch.sca; 3576 - phys_addr_t sca_phys = virt_to_phys(sca); 3577 - 3578 - sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); 3579 - vcpu->arch.sie_block->scaoh = sca_phys >> 32; 3580 - vcpu->arch.sie_block->scaol = sca_phys; 3581 - set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 3582 - } 3583 - read_unlock(&vcpu->kvm->arch.sca_lock); 3584 - } 3585 - 3586 - /* Basic SCA to Extended SCA data copy routines */ 3587 - static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 3588 - { 3589 - d->sda = s->sda; 3590 - d->sigp_ctrl.c = s->sigp_ctrl.c; 3591 - d->sigp_ctrl.scn = s->sigp_ctrl.scn; 3592 - } 3593 - 3594 - static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 3595 - { 3596 - int i; 3597 - 3598 - d->ipte_control = s->ipte_control; 3599 - d->mcn[0] = s->mcn; 3600 - for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 3601 - sca_copy_entry(&d->cpu[i], &s->cpu[i]); 3602 - } 3603 - 3604 - static int sca_switch_to_extended(struct kvm *kvm) 3605 - { 3606 - struct bsca_block *old_sca = kvm->arch.sca; 3607 - struct esca_block *new_sca; 3608 - struct kvm_vcpu *vcpu; 3609 - unsigned long vcpu_idx; 3610 - u32 scaol, scaoh; 3611 - phys_addr_t new_sca_phys; 3612 - 3613 - if (kvm->arch.use_esca) 3614 - return 0; 3615 - 3616 - new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); 3617 - if (!new_sca) 3618 - return -ENOMEM; 3619 - 3620 - new_sca_phys = virt_to_phys(new_sca); 3621 - scaoh = new_sca_phys >> 32; 3622 - scaol = new_sca_phys & ESCA_SCAOL_MASK; 3623 - 3624 - kvm_s390_vcpu_block_all(kvm); 3625 - write_lock(&kvm->arch.sca_lock); 3626 - 3627 - sca_copy_b_to_e(new_sca, old_sca); 3628 - 3629 - kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 3630 - vcpu->arch.sie_block->scaoh = scaoh; 3631 - vcpu->arch.sie_block->scaol = scaol; 3632 - vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3633 - } 3634 - kvm->arch.sca = new_sca; 3635 - kvm->arch.use_esca = 1; 3636 - 3637 - write_unlock(&kvm->arch.sca_lock); 3638 - kvm_s390_vcpu_unblock_all(kvm); 3639 - 3640 - free_page((unsigned long)old_sca); 3641 - 3642 - VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)", 3643 - old_sca, kvm->arch.sca); 3644 - return 0; 3573 + set_bit_inv(vcpu->vcpu_id, (unsigned long *)sca->mcn); 3574 + sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); 3645 3575 } 3646 3576 3647 3577 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 3648 3578 { 3649 - int rc; 3579 + if (!kvm_s390_use_sca_entries()) 3580 + return id < KVM_MAX_VCPUS; 3650 3581 3651 - if (!kvm_s390_use_sca_entries()) { 3652 - if (id < KVM_MAX_VCPUS) 3653 - return true; 3654 - return false; 3655 - } 3656 - if (id < KVM_S390_BSCA_CPU_SLOTS) 3657 - return true; 3658 - if (!sclp.has_esca || !sclp.has_64bscao) 3659 - return false; 3660 - 3661 - rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 3662 - 3663 - return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 3582 + return id < KVM_S390_ESCA_CPU_SLOTS; 3664 3583 } 3665 3584 3666 3585 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ ··· 3811 3920 vcpu->arch.sie_block->eca |= ECA_IB; 3812 3921 if (sclp.has_siif) 3813 3922 vcpu->arch.sie_block->eca |= ECA_SII; 3814 - if (sclp.has_sigpif) 3923 + if (kvm_s390_use_sca_entries()) 3815 3924 vcpu->arch.sie_block->eca |= ECA_SIGPI; 3816 3925 if (test_kvm_facility(vcpu->kvm, 129)) { 3817 3926 vcpu->arch.sie_block->eca |= ECA_VX; ··· 4258 4367 4259 4368 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 4260 4369 { 4261 - int ret = 0; 4262 - 4263 4370 vcpu_load(vcpu); 4264 4371 4265 4372 vcpu->run->s.regs.fpc = fpu->fpc; ··· 4268 4379 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 4269 4380 4270 4381 vcpu_put(vcpu); 4271 - return ret; 4382 + return 0; 4272 4383 } 4273 4384 4274 4385 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) ··· 4676 4787 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 4677 4788 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 4678 4789 4679 - if (need_resched()) 4680 - schedule(); 4681 - 4682 4790 if (!kvm_is_ucontrol(vcpu->kvm)) { 4683 4791 rc = kvm_s390_deliver_pending_interrupts(vcpu); 4684 4792 if (rc || guestdbg_exit_pending(vcpu)) ··· 4960 5074 * The guest_state_{enter,exit}_irqoff() functions inform lockdep and 4961 5075 * tracing that entry to the guest will enable host IRQs, and exit from 4962 5076 * the guest will disable host IRQs. 4963 - * 4964 - * We must not use lockdep/tracing/RCU in this critical section, so we 4965 - * use the low-level arch_local_irq_*() helpers to enable/disable IRQs. 4966 5077 */ 4967 - arch_local_irq_enable(); 4968 5078 ret = sie64a(scb, gprs, gasce); 4969 - arch_local_irq_disable(); 4970 5079 4971 5080 guest_state_exit_irqoff(); 4972 5081 ··· 4980 5099 */ 4981 5100 kvm_vcpu_srcu_read_lock(vcpu); 4982 5101 4983 - do { 5102 + while (true) { 4984 5103 rc = vcpu_pre_run(vcpu); 5104 + kvm_vcpu_srcu_read_unlock(vcpu); 4985 5105 if (rc || guestdbg_exit_pending(vcpu)) 4986 5106 break; 4987 5107 4988 - kvm_vcpu_srcu_read_unlock(vcpu); 4989 5108 /* 4990 5109 * As PF_VCPU will be used in fault handler, between 4991 5110 * guest_timing_enter_irqoff and guest_timing_exit_irqoff ··· 4997 5116 sizeof(sie_page->pv_grregs)); 4998 5117 } 4999 5118 5119 + xfer_to_guest_mode_check: 5000 5120 local_irq_disable(); 5121 + xfer_to_guest_mode_prepare(); 5122 + if (xfer_to_guest_mode_work_pending()) { 5123 + local_irq_enable(); 5124 + rc = kvm_xfer_to_guest_mode_handle_work(vcpu); 5125 + if (rc) 5126 + break; 5127 + goto xfer_to_guest_mode_check; 5128 + } 5129 + 5001 5130 guest_timing_enter_irqoff(); 5002 5131 __disable_cpu_timer_accounting(vcpu); 5003 5132 ··· 5037 5146 kvm_vcpu_srcu_read_lock(vcpu); 5038 5147 5039 5148 rc = vcpu_post_run(vcpu, exit_reason); 5040 - } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 5149 + if (rc || guestdbg_exit_pending(vcpu)) { 5150 + kvm_vcpu_srcu_read_unlock(vcpu); 5151 + break; 5152 + } 5153 + } 5041 5154 5042 - kvm_vcpu_srcu_read_unlock(vcpu); 5043 5155 return rc; 5044 5156 } 5045 5157 ··· 5258 5364 5259 5365 if (signal_pending(current) && !rc) { 5260 5366 kvm_run->exit_reason = KVM_EXIT_INTR; 5367 + vcpu->stat.signal_exits++; 5261 5368 rc = -EINTR; 5262 5369 } 5263 5370
+1 -8
arch/s390/kvm/kvm-s390.h
··· 570 570 int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu); 571 571 int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); 572 572 573 - /* support for Basic/Extended SCA handling */ 574 - static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm) 575 - { 576 - struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */ 577 - 578 - return &sca->ipte_control; 579 - } 580 573 static inline int kvm_s390_use_sca_entries(void) 581 574 { 582 575 /* ··· 577 584 * might use the entries. By not setting the entries and keeping them 578 585 * invalid, hardware will not access them but intercept. 579 586 */ 580 - return sclp.has_sigpif; 587 + return sclp.has_sigpif && sclp.has_esca; 581 588 } 582 589 void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, 583 590 struct mcck_volatile_info *mcck_info);
+14 -6
arch/s390/kvm/vsie.c
··· 782 782 else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu)) 783 783 rc = set_validity_icpt(scb_s, 0x0011U); 784 784 else if ((gpa & PAGE_MASK) != 785 - ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK)) 785 + ((gpa + offsetof(struct bsca_block, cpu[0]) - 1) & PAGE_MASK)) 786 786 rc = set_validity_icpt(scb_s, 0x003bU); 787 787 if (!rc) { 788 788 rc = pin_guest_page(vcpu->kvm, gpa, &hpa); ··· 1180 1180 current->thread.gmap_int_code = 0; 1181 1181 barrier(); 1182 1182 if (!kvm_s390_vcpu_sie_inhibited(vcpu)) { 1183 + xfer_to_guest_mode_check: 1183 1184 local_irq_disable(); 1185 + xfer_to_guest_mode_prepare(); 1186 + if (xfer_to_guest_mode_work_pending()) { 1187 + local_irq_enable(); 1188 + rc = kvm_xfer_to_guest_mode_handle_work(vcpu); 1189 + if (rc) 1190 + goto skip_sie; 1191 + goto xfer_to_guest_mode_check; 1192 + } 1184 1193 guest_timing_enter_irqoff(); 1185 1194 rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce); 1186 1195 guest_timing_exit_irqoff(); 1187 1196 local_irq_enable(); 1188 1197 } 1198 + 1199 + skip_sie: 1189 1200 barrier(); 1190 1201 vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE; 1191 1202 ··· 1356 1345 * but rewind the PSW to re-enter SIE once that's completed 1357 1346 * instead of passing a "no action" intercept to the guest. 1358 1347 */ 1359 - if (signal_pending(current) || 1360 - kvm_s390_vcpu_has_irq(vcpu, 0) || 1348 + if (kvm_s390_vcpu_has_irq(vcpu, 0) || 1361 1349 kvm_s390_vcpu_sie_inhibited(vcpu)) { 1362 1350 kvm_s390_rewind_psw(vcpu, 4); 1363 1351 break; 1364 1352 } 1365 - cond_resched(); 1366 1353 } 1367 1354 1368 1355 if (rc == -EFAULT) { ··· 1492 1483 if (unlikely(scb_addr & 0x1ffUL)) 1493 1484 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 1494 1485 1495 - if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) || 1496 - kvm_s390_vcpu_sie_inhibited(vcpu)) { 1486 + if (kvm_s390_vcpu_has_irq(vcpu, 0) || kvm_s390_vcpu_sie_inhibited(vcpu)) { 1497 1487 kvm_s390_rewind_psw(vcpu, 4); 1498 1488 return 0; 1499 1489 }
+1
include/uapi/linux/kvm.h
··· 973 973 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 974 974 #define KVM_CAP_GUEST_MEMFD_FLAGS 244 975 975 #define KVM_CAP_ARM_SEA_TO_USER 245 976 + #define KVM_CAP_S390_USER_OPEREXEC 246 976 977 977 978 struct kvm_irq_routing_irqchip { 978 979 __u32 irqchip;
+1
tools/testing/selftests/kvm/Makefile.kvm
··· 198 198 TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test 199 199 TEST_GEN_PROGS_s390 += s390/shared_zeropage_test 200 200 TEST_GEN_PROGS_s390 += s390/ucontrol_test 201 + TEST_GEN_PROGS_s390 += s390/user_operexec 201 202 TEST_GEN_PROGS_s390 += rseq_test 202 203 203 204 TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
+140
tools/testing/selftests/kvm/s390/user_operexec.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Test operation exception forwarding. 3 + * 4 + * Copyright IBM Corp. 2025 5 + * 6 + * Authors: 7 + * Janosch Frank <frankja@linux.ibm.com> 8 + */ 9 + #include "kselftest.h" 10 + #include "kvm_util.h" 11 + #include "test_util.h" 12 + #include "sie.h" 13 + 14 + #include <linux/kvm.h> 15 + 16 + static void guest_code_instr0(void) 17 + { 18 + asm(".word 0x0000"); 19 + } 20 + 21 + static void test_user_instr0(void) 22 + { 23 + struct kvm_vcpu *vcpu; 24 + struct kvm_vm *vm; 25 + int rc; 26 + 27 + vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0); 28 + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0); 29 + TEST_ASSERT_EQ(0, rc); 30 + 31 + vcpu_run(vcpu); 32 + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); 33 + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); 34 + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0); 35 + 36 + kvm_vm_free(vm); 37 + } 38 + 39 + static void guest_code_user_operexec(void) 40 + { 41 + asm(".word 0x0807"); 42 + } 43 + 44 + static void test_user_operexec(void) 45 + { 46 + struct kvm_vcpu *vcpu; 47 + struct kvm_vm *vm; 48 + int rc; 49 + 50 + vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec); 51 + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); 52 + TEST_ASSERT_EQ(0, rc); 53 + 54 + vcpu_run(vcpu); 55 + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); 56 + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); 57 + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807); 58 + 59 + kvm_vm_free(vm); 60 + 61 + /* 62 + * Since user_operexec is the superset it can be used for the 63 + * 0 instruction. 64 + */ 65 + vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0); 66 + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); 67 + TEST_ASSERT_EQ(0, rc); 68 + 69 + vcpu_run(vcpu); 70 + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); 71 + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); 72 + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0); 73 + 74 + kvm_vm_free(vm); 75 + } 76 + 77 + /* combine user_instr0 and user_operexec */ 78 + static void test_user_operexec_combined(void) 79 + { 80 + struct kvm_vcpu *vcpu; 81 + struct kvm_vm *vm; 82 + int rc; 83 + 84 + vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec); 85 + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0); 86 + TEST_ASSERT_EQ(0, rc); 87 + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); 88 + TEST_ASSERT_EQ(0, rc); 89 + 90 + vcpu_run(vcpu); 91 + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); 92 + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); 93 + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807); 94 + 95 + kvm_vm_free(vm); 96 + 97 + /* Reverse enablement order */ 98 + vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec); 99 + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); 100 + TEST_ASSERT_EQ(0, rc); 101 + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0); 102 + TEST_ASSERT_EQ(0, rc); 103 + 104 + vcpu_run(vcpu); 105 + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); 106 + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); 107 + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807); 108 + 109 + kvm_vm_free(vm); 110 + } 111 + 112 + /* 113 + * Run all tests above. 114 + * 115 + * Enablement after VCPU has been added is automatically tested since 116 + * we enable the capability after VCPU creation. 117 + */ 118 + static struct testdef { 119 + const char *name; 120 + void (*test)(void); 121 + } testlist[] = { 122 + { "instr0", test_user_instr0 }, 123 + { "operexec", test_user_operexec }, 124 + { "operexec_combined", test_user_operexec_combined}, 125 + }; 126 + 127 + int main(int argc, char *argv[]) 128 + { 129 + int idx; 130 + 131 + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_USER_INSTR0)); 132 + 133 + ksft_print_header(); 134 + ksft_set_plan(ARRAY_SIZE(testlist)); 135 + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { 136 + testlist[idx].test(); 137 + ksft_test_result_pass("%s\n", testlist[idx].name); 138 + } 139 + ksft_finished(); 140 + }