Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'kvm-s390-20140130' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD

Two new features are added by this patch set:
- The floating interrupt controller (flic) that allows us to inject,
clear and inspect non-vcpu local interrupts. This also gives us an
opportunity to fix deficiencies in our existing interrupt definitions.
- Support for asynchronous page faults via the pfault mechanism. Testing
show significant guest performance improvements under host swap.

+827 -109
+46
Documentation/virtual/kvm/devices/s390_flic.txt
··· 1 + FLIC (floating interrupt controller) 2 + ==================================== 3 + 4 + FLIC handles floating (non per-cpu) interrupts, i.e. I/O, service and some 5 + machine check interruptions. All interrupts are stored in a per-vm list of 6 + pending interrupts. FLIC performs operations on this list. 7 + 8 + Only one FLIC instance may be instantiated. 9 + 10 + FLIC provides support to 11 + - add interrupts (KVM_DEV_FLIC_ENQUEUE) 12 + - inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS) 13 + - purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS) 14 + - enable/disable for the guest transparent async page faults 15 + 16 + Groups: 17 + KVM_DEV_FLIC_ENQUEUE 18 + Passes a buffer and length into the kernel which are then injected into 19 + the list of pending interrupts. 20 + attr->addr contains the pointer to the buffer and attr->attr contains 21 + the length of the buffer. 22 + The format of the data structure kvm_s390_irq as it is copied from userspace 23 + is defined in usr/include/linux/kvm.h. 24 + 25 + KVM_DEV_FLIC_GET_ALL_IRQS 26 + Copies all floating interrupts into a buffer provided by userspace. 27 + When the buffer is too small it returns -ENOMEM, which is the indication 28 + for userspace to try again with a bigger buffer. 29 + All interrupts remain pending, i.e. are not deleted from the list of 30 + currently pending interrupts. 31 + attr->addr contains the userspace address of the buffer into which all 32 + interrupt data will be copied. 33 + attr->attr contains the size of the buffer in bytes. 34 + 35 + KVM_DEV_FLIC_CLEAR_IRQS 36 + Simply deletes all elements from the list of currently pending floating 37 + interrupts. No interrupts are injected into the guest. 38 + 39 + KVM_DEV_FLIC_APF_ENABLE 40 + Enables async page faults for the guest. So in case of a major page fault 41 + the host is allowed to handle this async and continues the guest. 42 + 43 + KVM_DEV_FLIC_APF_DISABLE_WAIT 44 + Disables async page faults for the guest and waits until already pending 45 + async page faults are done. This is necessary to trigger a completion interrupt 46 + for every init interrupt before migrating the interrupt list.
+25 -33
arch/s390/include/asm/kvm_host.h
··· 16 16 #include <linux/hrtimer.h> 17 17 #include <linux/interrupt.h> 18 18 #include <linux/kvm_host.h> 19 + #include <linux/kvm.h> 19 20 #include <asm/debug.h> 20 21 #include <asm/cpu.h> 21 22 ··· 169 168 u32 diagnose_9c; 170 169 }; 171 170 172 - struct kvm_s390_io_info { 173 - __u16 subchannel_id; /* 0x0b8 */ 174 - __u16 subchannel_nr; /* 0x0ba */ 175 - __u32 io_int_parm; /* 0x0bc */ 176 - __u32 io_int_word; /* 0x0c0 */ 177 - }; 178 - 179 - struct kvm_s390_ext_info { 180 - __u32 ext_params; 181 - __u64 ext_params2; 182 - }; 183 - 184 171 #define PGM_OPERATION 0x01 185 172 #define PGM_PRIVILEGED_OP 0x02 186 173 #define PGM_EXECUTE 0x03 ··· 176 187 #define PGM_ADDRESSING 0x05 177 188 #define PGM_SPECIFICATION 0x06 178 189 #define PGM_DATA 0x07 179 - 180 - struct kvm_s390_pgm_info { 181 - __u16 code; 182 - }; 183 - 184 - struct kvm_s390_prefix_info { 185 - __u32 address; 186 - }; 187 - 188 - struct kvm_s390_extcall_info { 189 - __u16 code; 190 - }; 191 - 192 - struct kvm_s390_emerg_info { 193 - __u16 code; 194 - }; 195 - 196 - struct kvm_s390_mchk_info { 197 - __u64 cr14; 198 - __u64 mcic; 199 - }; 200 190 201 191 struct kvm_s390_interrupt_info { 202 192 struct list_head list; ··· 214 246 unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1) 215 247 / sizeof(long)]; 216 248 struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS]; 249 + unsigned int irq_count; 217 250 }; 218 251 219 252 ··· 231 262 u64 stidp_data; 232 263 }; 233 264 struct gmap *gmap; 265 + #define KVM_S390_PFAULT_TOKEN_INVALID (-1UL) 266 + unsigned long pfault_token; 267 + unsigned long pfault_select; 268 + unsigned long pfault_compare; 234 269 }; 235 270 236 271 struct kvm_vm_stat { ··· 248 275 struct sca_block *sca; 249 276 debug_info_t *dbf; 250 277 struct kvm_s390_float_interrupt float_int; 278 + struct kvm_device *flic; 251 279 struct gmap *gmap; 252 280 int css_support; 253 281 }; ··· 260 286 { 261 287 return IS_ERR_VALUE(addr); 262 288 } 289 + 290 + #define ASYNC_PF_PER_VCPU 64 291 + struct kvm_vcpu; 292 + struct kvm_async_pf; 293 + struct kvm_arch_async_pf { 294 + unsigned long pfault_token; 295 + }; 296 + 297 + bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); 298 + 299 + void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 300 + struct kvm_async_pf *work); 301 + 302 + void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 303 + struct kvm_async_pf *work); 304 + 305 + void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 306 + struct kvm_async_pf *work); 263 307 264 308 extern int sie64a(struct kvm_s390_sie_block *, u64 *); 265 309 extern char sie_exit;
+2
arch/s390/include/asm/pgtable.h
··· 767 767 * @table: pointer to the page directory 768 768 * @asce: address space control element for gmap page table 769 769 * @crst_list: list of all crst tables used in the guest address space 770 + * @pfault_enabled: defines if pfaults are applicable for the guest 770 771 */ 771 772 struct gmap { 772 773 struct list_head list; ··· 776 775 unsigned long asce; 777 776 void *private; 778 777 struct list_head crst_list; 778 + bool pfault_enabled; 779 779 }; 780 780 781 781 /**
+1
arch/s390/include/asm/processor.h
··· 79 79 unsigned long ksp; /* kernel stack pointer */ 80 80 mm_segment_t mm_segment; 81 81 unsigned long gmap_addr; /* address of last gmap fault. */ 82 + unsigned int gmap_pfault; /* signal of a pending guest pfault */ 82 83 struct per_regs per_user; /* User specified PER registers */ 83 84 struct per_event per_event; /* Cause of the last PER trap */ 84 85 unsigned long per_flags; /* Flags to control debug behavior */
+19
arch/s390/include/uapi/asm/kvm.h
··· 16 16 17 17 #define __KVM_S390 18 18 19 + /* Device control API: s390-specific devices */ 20 + #define KVM_DEV_FLIC_GET_ALL_IRQS 1 21 + #define KVM_DEV_FLIC_ENQUEUE 2 22 + #define KVM_DEV_FLIC_CLEAR_IRQS 3 23 + #define KVM_DEV_FLIC_APF_ENABLE 4 24 + #define KVM_DEV_FLIC_APF_DISABLE_WAIT 5 25 + /* 26 + * We can have up to 4*64k pending subchannels + 8 adapter interrupts, 27 + * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. 28 + * There are also sclp and machine checks. This gives us 29 + * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000 30 + * Lets round up to 8192 pages. 31 + */ 32 + #define KVM_S390_MAX_FLOAT_IRQS 266250 33 + #define KVM_S390_FLIC_MAX_BUFFER 0x2000000 34 + 19 35 /* for KVM_GET_REGS and KVM_SET_REGS */ 20 36 struct kvm_regs { 21 37 /* general purpose regs for s390 */ ··· 73 57 #define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2) 74 58 #define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3) 75 59 #define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4) 60 + #define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5) 61 + #define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6) 62 + #define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7) 76 63 #endif
+2
arch/s390/kvm/Kconfig
··· 23 23 select ANON_INODES 24 24 select HAVE_KVM_CPU_RELAX_INTERCEPT 25 25 select HAVE_KVM_EVENTFD 26 + select KVM_ASYNC_PF 27 + select KVM_ASYNC_PF_SYNC 26 28 ---help--- 27 29 Support hosting paravirtualized guest machines using the SIE 28 30 virtualization capability on the mainframe. This should work
+1 -1
arch/s390/kvm/Makefile
··· 7 7 # as published by the Free Software Foundation. 8 8 9 9 KVM := ../../../virt/kvm 10 - common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o 10 + common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o 11 11 12 12 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm 13 13
+84
arch/s390/kvm/diag.c
··· 17 17 #include "kvm-s390.h" 18 18 #include "trace.h" 19 19 #include "trace-s390.h" 20 + #include "gaccess.h" 20 21 21 22 static int diag_release_pages(struct kvm_vcpu *vcpu) 22 23 { ··· 45 44 end, vcpu->arch.gmap); 46 45 } 47 46 return 0; 47 + } 48 + 49 + static int __diag_page_ref_service(struct kvm_vcpu *vcpu) 50 + { 51 + struct prs_parm { 52 + u16 code; 53 + u16 subcode; 54 + u16 parm_len; 55 + u16 parm_version; 56 + u64 token_addr; 57 + u64 select_mask; 58 + u64 compare_mask; 59 + u64 zarch; 60 + }; 61 + struct prs_parm parm; 62 + int rc; 63 + u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4; 64 + u16 ry = (vcpu->arch.sie_block->ipa & 0x0f); 65 + unsigned long hva_token = KVM_HVA_ERR_BAD; 66 + 67 + if (vcpu->run->s.regs.gprs[rx] & 7) 68 + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 69 + if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm))) 70 + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 71 + if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258) 72 + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 73 + 74 + switch (parm.subcode) { 75 + case 0: /* TOKEN */ 76 + if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) { 77 + /* 78 + * If the pagefault handshake is already activated, 79 + * the token must not be changed. We have to return 80 + * decimal 8 instead, as mandated in SC24-6084. 81 + */ 82 + vcpu->run->s.regs.gprs[ry] = 8; 83 + return 0; 84 + } 85 + 86 + if ((parm.compare_mask & parm.select_mask) != parm.compare_mask || 87 + parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL) 88 + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 89 + 90 + hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr)); 91 + if (kvm_is_error_hva(hva_token)) 92 + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 93 + 94 + vcpu->arch.pfault_token = parm.token_addr; 95 + vcpu->arch.pfault_select = parm.select_mask; 96 + vcpu->arch.pfault_compare = parm.compare_mask; 97 + vcpu->run->s.regs.gprs[ry] = 0; 98 + rc = 0; 99 + break; 100 + case 1: /* 101 + * CANCEL 102 + * Specification allows to let already pending tokens survive 103 + * the cancel, therefore to reduce code complexity, we assume 104 + * all outstanding tokens are already pending. 105 + */ 106 + if (parm.token_addr || parm.select_mask || 107 + parm.compare_mask || parm.zarch) 108 + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 109 + 110 + vcpu->run->s.regs.gprs[ry] = 0; 111 + /* 112 + * If the pfault handling was not established or is already 113 + * canceled SC24-6084 requests to return decimal 4. 114 + */ 115 + if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 116 + vcpu->run->s.regs.gprs[ry] = 4; 117 + else 118 + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 119 + 120 + rc = 0; 121 + break; 122 + default: 123 + rc = -EOPNOTSUPP; 124 + break; 125 + } 126 + 127 + return rc; 48 128 } 49 129 50 130 static int __diag_time_slice_end(struct kvm_vcpu *vcpu) ··· 232 150 return __diag_time_slice_end(vcpu); 233 151 case 0x9c: 234 152 return __diag_time_slice_end_directed(vcpu); 153 + case 0x258: 154 + return __diag_page_ref_service(vcpu); 235 155 case 0x308: 236 156 return __diag_ipl_functions(vcpu); 237 157 case 0x500:
+334 -65
arch/s390/kvm/interrupt.c
··· 31 31 return ((type & 0xfffe0000u) != 0xfffe0000u); 32 32 } 33 33 34 - static int psw_extint_disabled(struct kvm_vcpu *vcpu) 34 + int psw_extint_disabled(struct kvm_vcpu *vcpu) 35 35 { 36 36 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); 37 37 } ··· 78 78 return 1; 79 79 return 0; 80 80 case KVM_S390_INT_SERVICE: 81 - if (psw_extint_disabled(vcpu)) 82 - return 0; 83 - if (vcpu->arch.sie_block->gcr[0] & 0x200ul) 84 - return 1; 85 - return 0; 81 + case KVM_S390_INT_PFAULT_INIT: 82 + case KVM_S390_INT_PFAULT_DONE: 86 83 case KVM_S390_INT_VIRTIO: 87 84 if (psw_extint_disabled(vcpu)) 88 85 return 0; ··· 147 150 case KVM_S390_INT_EXTERNAL_CALL: 148 151 case KVM_S390_INT_EMERGENCY: 149 152 case KVM_S390_INT_SERVICE: 153 + case KVM_S390_INT_PFAULT_INIT: 154 + case KVM_S390_INT_PFAULT_DONE: 150 155 case KVM_S390_INT_VIRTIO: 151 156 if (psw_extint_disabled(vcpu)) 152 157 __set_cpuflag(vcpu, CPUSTAT_EXT_INT); ··· 221 222 __LC_EXT_NEW_PSW, sizeof(psw_t)); 222 223 rc |= put_guest(vcpu, inti->ext.ext_params, 223 224 (u32 __user *)__LC_EXT_PARAMS); 225 + break; 226 + case KVM_S390_INT_PFAULT_INIT: 227 + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, 228 + inti->ext.ext_params2); 229 + rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE); 230 + rc |= put_guest(vcpu, 0x0600, (u16 __user *) __LC_EXT_CPU_ADDR); 231 + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, 232 + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 233 + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, 234 + __LC_EXT_NEW_PSW, sizeof(psw_t)); 235 + rc |= put_guest(vcpu, inti->ext.ext_params2, 236 + (u64 __user *) __LC_EXT_PARAMS2); 237 + break; 238 + case KVM_S390_INT_PFAULT_DONE: 239 + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, 240 + inti->ext.ext_params2); 241 + rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE); 242 + rc |= put_guest(vcpu, 0x0680, (u16 __user *) __LC_EXT_CPU_ADDR); 243 + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, 244 + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 245 + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, 246 + __LC_EXT_NEW_PSW, sizeof(psw_t)); 247 + rc |= put_guest(vcpu, inti->ext.ext_params2, 248 + (u64 __user *) __LC_EXT_PARAMS2); 224 249 break; 225 250 case KVM_S390_INT_VIRTIO: 226 251 VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", ··· 380 357 return 1; 381 358 } 382 359 383 - static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) 360 + int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) 384 361 { 385 362 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 386 363 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; ··· 551 528 list_for_each_entry_safe(inti, n, &fi->list, list) { 552 529 if (__interrupt_is_deliverable(vcpu, inti)) { 553 530 list_del(&inti->list); 531 + fi->irq_count--; 554 532 deliver = 1; 555 533 break; 556 534 } ··· 607 583 if ((inti->type == KVM_S390_MCHK) && 608 584 __interrupt_is_deliverable(vcpu, inti)) { 609 585 list_del(&inti->list); 586 + fi->irq_count--; 610 587 deliver = 1; 611 588 break; 612 589 } ··· 675 650 inti = iter; 676 651 break; 677 652 } 678 - if (inti) 653 + if (inti) { 679 654 list_del_init(&inti->list); 655 + fi->irq_count--; 656 + } 680 657 if (list_empty(&fi->list)) 681 658 atomic_set(&fi->active, 0); 682 659 spin_unlock(&fi->lock); ··· 686 659 return inti; 687 660 } 688 661 689 - int kvm_s390_inject_vm(struct kvm *kvm, 690 - struct kvm_s390_interrupt *s390int) 662 + static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) 691 663 { 692 664 struct kvm_s390_local_interrupt *li; 693 665 struct kvm_s390_float_interrupt *fi; 694 - struct kvm_s390_interrupt_info *inti, *iter; 666 + struct kvm_s390_interrupt_info *iter; 695 667 int sigcpu; 696 - 697 - inti = kzalloc(sizeof(*inti), GFP_KERNEL); 698 - if (!inti) 699 - return -ENOMEM; 700 - 701 - switch (s390int->type) { 702 - case KVM_S390_INT_VIRTIO: 703 - VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx", 704 - s390int->parm, s390int->parm64); 705 - inti->type = s390int->type; 706 - inti->ext.ext_params = s390int->parm; 707 - inti->ext.ext_params2 = s390int->parm64; 708 - break; 709 - case KVM_S390_INT_SERVICE: 710 - VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); 711 - inti->type = s390int->type; 712 - inti->ext.ext_params = s390int->parm; 713 - break; 714 - case KVM_S390_PROGRAM_INT: 715 - case KVM_S390_SIGP_STOP: 716 - case KVM_S390_INT_EXTERNAL_CALL: 717 - case KVM_S390_INT_EMERGENCY: 718 - kfree(inti); 719 - return -EINVAL; 720 - case KVM_S390_MCHK: 721 - VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", 722 - s390int->parm64); 723 - inti->type = s390int->type; 724 - inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ 725 - inti->mchk.mcic = s390int->parm64; 726 - break; 727 - case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: 728 - if (s390int->type & IOINT_AI_MASK) 729 - VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); 730 - else 731 - VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", 732 - s390int->type & IOINT_CSSID_MASK, 733 - s390int->type & IOINT_SSID_MASK, 734 - s390int->type & IOINT_SCHID_MASK); 735 - inti->type = s390int->type; 736 - inti->io.subchannel_id = s390int->parm >> 16; 737 - inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; 738 - inti->io.io_int_parm = s390int->parm64 >> 32; 739 - inti->io.io_int_word = s390int->parm64 & 0x00000000ffffffffull; 740 - break; 741 - default: 742 - kfree(inti); 743 - return -EINVAL; 744 - } 745 - trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64, 746 - 2); 668 + int rc = 0; 747 669 748 670 mutex_lock(&kvm->lock); 749 671 fi = &kvm->arch.float_int; 750 672 spin_lock(&fi->lock); 751 - if (!is_ioint(inti->type)) 673 + if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) { 674 + rc = -EINVAL; 675 + goto unlock_fi; 676 + } 677 + fi->irq_count++; 678 + if (!is_ioint(inti->type)) { 752 679 list_add_tail(&inti->list, &fi->list); 753 - else { 680 + } else { 754 681 u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word); 755 682 756 683 /* Keep I/O interrupts sorted in isc order. */ ··· 733 752 if (waitqueue_active(li->wq)) 734 753 wake_up_interruptible(li->wq); 735 754 spin_unlock_bh(&li->lock); 755 + unlock_fi: 736 756 spin_unlock(&fi->lock); 737 757 mutex_unlock(&kvm->lock); 738 - return 0; 758 + return rc; 759 + } 760 + 761 + int kvm_s390_inject_vm(struct kvm *kvm, 762 + struct kvm_s390_interrupt *s390int) 763 + { 764 + struct kvm_s390_interrupt_info *inti; 765 + 766 + inti = kzalloc(sizeof(*inti), GFP_KERNEL); 767 + if (!inti) 768 + return -ENOMEM; 769 + 770 + inti->type = s390int->type; 771 + switch (inti->type) { 772 + case KVM_S390_INT_VIRTIO: 773 + VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx", 774 + s390int->parm, s390int->parm64); 775 + inti->ext.ext_params = s390int->parm; 776 + inti->ext.ext_params2 = s390int->parm64; 777 + break; 778 + case KVM_S390_INT_SERVICE: 779 + VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); 780 + inti->ext.ext_params = s390int->parm; 781 + break; 782 + case KVM_S390_INT_PFAULT_DONE: 783 + inti->type = s390int->type; 784 + inti->ext.ext_params2 = s390int->parm64; 785 + break; 786 + case KVM_S390_MCHK: 787 + VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", 788 + s390int->parm64); 789 + inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ 790 + inti->mchk.mcic = s390int->parm64; 791 + break; 792 + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: 793 + if (inti->type & IOINT_AI_MASK) 794 + VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); 795 + else 796 + VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", 797 + s390int->type & IOINT_CSSID_MASK, 798 + s390int->type & IOINT_SSID_MASK, 799 + s390int->type & IOINT_SCHID_MASK); 800 + inti->io.subchannel_id = s390int->parm >> 16; 801 + inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; 802 + inti->io.io_int_parm = s390int->parm64 >> 32; 803 + inti->io.io_int_word = s390int->parm64 & 0x00000000ffffffffull; 804 + break; 805 + default: 806 + kfree(inti); 807 + return -EINVAL; 808 + } 809 + trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64, 810 + 2); 811 + 812 + return __inject_vm(kvm, inti); 739 813 } 740 814 741 815 int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, ··· 850 814 inti->type = s390int->type; 851 815 inti->mchk.mcic = s390int->parm64; 852 816 break; 817 + case KVM_S390_INT_PFAULT_INIT: 818 + inti->type = s390int->type; 819 + inti->ext.ext_params2 = s390int->parm64; 820 + break; 853 821 case KVM_S390_INT_VIRTIO: 854 822 case KVM_S390_INT_SERVICE: 855 823 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: ··· 881 841 mutex_unlock(&vcpu->kvm->lock); 882 842 return 0; 883 843 } 844 + 845 + static void clear_floating_interrupts(struct kvm *kvm) 846 + { 847 + struct kvm_s390_float_interrupt *fi; 848 + struct kvm_s390_interrupt_info *n, *inti = NULL; 849 + 850 + mutex_lock(&kvm->lock); 851 + fi = &kvm->arch.float_int; 852 + spin_lock(&fi->lock); 853 + list_for_each_entry_safe(inti, n, &fi->list, list) { 854 + list_del(&inti->list); 855 + kfree(inti); 856 + } 857 + fi->irq_count = 0; 858 + atomic_set(&fi->active, 0); 859 + spin_unlock(&fi->lock); 860 + mutex_unlock(&kvm->lock); 861 + } 862 + 863 + static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, 864 + u8 *addr) 865 + { 866 + struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; 867 + struct kvm_s390_irq irq = {0}; 868 + 869 + irq.type = inti->type; 870 + switch (inti->type) { 871 + case KVM_S390_INT_PFAULT_INIT: 872 + case KVM_S390_INT_PFAULT_DONE: 873 + case KVM_S390_INT_VIRTIO: 874 + case KVM_S390_INT_SERVICE: 875 + irq.u.ext = inti->ext; 876 + break; 877 + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: 878 + irq.u.io = inti->io; 879 + break; 880 + case KVM_S390_MCHK: 881 + irq.u.mchk = inti->mchk; 882 + break; 883 + default: 884 + return -EINVAL; 885 + } 886 + 887 + if (copy_to_user(uptr, &irq, sizeof(irq))) 888 + return -EFAULT; 889 + 890 + return 0; 891 + } 892 + 893 + static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len) 894 + { 895 + struct kvm_s390_interrupt_info *inti; 896 + struct kvm_s390_float_interrupt *fi; 897 + int ret = 0; 898 + int n = 0; 899 + 900 + mutex_lock(&kvm->lock); 901 + fi = &kvm->arch.float_int; 902 + spin_lock(&fi->lock); 903 + 904 + list_for_each_entry(inti, &fi->list, list) { 905 + if (len < sizeof(struct kvm_s390_irq)) { 906 + /* signal userspace to try again */ 907 + ret = -ENOMEM; 908 + break; 909 + } 910 + ret = copy_irq_to_user(inti, buf); 911 + if (ret) 912 + break; 913 + buf += sizeof(struct kvm_s390_irq); 914 + len -= sizeof(struct kvm_s390_irq); 915 + n++; 916 + } 917 + 918 + spin_unlock(&fi->lock); 919 + mutex_unlock(&kvm->lock); 920 + 921 + return ret < 0 ? ret : n; 922 + } 923 + 924 + static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 925 + { 926 + int r; 927 + 928 + switch (attr->group) { 929 + case KVM_DEV_FLIC_GET_ALL_IRQS: 930 + r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr, 931 + attr->attr); 932 + break; 933 + default: 934 + r = -EINVAL; 935 + } 936 + 937 + return r; 938 + } 939 + 940 + static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti, 941 + u64 addr) 942 + { 943 + struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; 944 + void *target = NULL; 945 + void __user *source; 946 + u64 size; 947 + 948 + if (get_user(inti->type, (u64 __user *)addr)) 949 + return -EFAULT; 950 + 951 + switch (inti->type) { 952 + case KVM_S390_INT_PFAULT_INIT: 953 + case KVM_S390_INT_PFAULT_DONE: 954 + case KVM_S390_INT_VIRTIO: 955 + case KVM_S390_INT_SERVICE: 956 + target = (void *) &inti->ext; 957 + source = &uptr->u.ext; 958 + size = sizeof(inti->ext); 959 + break; 960 + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: 961 + target = (void *) &inti->io; 962 + source = &uptr->u.io; 963 + size = sizeof(inti->io); 964 + break; 965 + case KVM_S390_MCHK: 966 + target = (void *) &inti->mchk; 967 + source = &uptr->u.mchk; 968 + size = sizeof(inti->mchk); 969 + break; 970 + default: 971 + return -EINVAL; 972 + } 973 + 974 + if (copy_from_user(target, source, size)) 975 + return -EFAULT; 976 + 977 + return 0; 978 + } 979 + 980 + static int enqueue_floating_irq(struct kvm_device *dev, 981 + struct kvm_device_attr *attr) 982 + { 983 + struct kvm_s390_interrupt_info *inti = NULL; 984 + int r = 0; 985 + int len = attr->attr; 986 + 987 + if (len % sizeof(struct kvm_s390_irq) != 0) 988 + return -EINVAL; 989 + else if (len > KVM_S390_FLIC_MAX_BUFFER) 990 + return -EINVAL; 991 + 992 + while (len >= sizeof(struct kvm_s390_irq)) { 993 + inti = kzalloc(sizeof(*inti), GFP_KERNEL); 994 + if (!inti) 995 + return -ENOMEM; 996 + 997 + r = copy_irq_from_user(inti, attr->addr); 998 + if (r) { 999 + kfree(inti); 1000 + return r; 1001 + } 1002 + r = __inject_vm(dev->kvm, inti); 1003 + if (r) { 1004 + kfree(inti); 1005 + return r; 1006 + } 1007 + len -= sizeof(struct kvm_s390_irq); 1008 + attr->addr += sizeof(struct kvm_s390_irq); 1009 + } 1010 + 1011 + return r; 1012 + } 1013 + 1014 + static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1015 + { 1016 + int r = 0; 1017 + unsigned int i; 1018 + struct kvm_vcpu *vcpu; 1019 + 1020 + switch (attr->group) { 1021 + case KVM_DEV_FLIC_ENQUEUE: 1022 + r = enqueue_floating_irq(dev, attr); 1023 + break; 1024 + case KVM_DEV_FLIC_CLEAR_IRQS: 1025 + r = 0; 1026 + clear_floating_interrupts(dev->kvm); 1027 + break; 1028 + case KVM_DEV_FLIC_APF_ENABLE: 1029 + dev->kvm->arch.gmap->pfault_enabled = 1; 1030 + break; 1031 + case KVM_DEV_FLIC_APF_DISABLE_WAIT: 1032 + dev->kvm->arch.gmap->pfault_enabled = 0; 1033 + /* 1034 + * Make sure no async faults are in transition when 1035 + * clearing the queues. So we don't need to worry 1036 + * about late coming workers. 1037 + */ 1038 + synchronize_srcu(&dev->kvm->srcu); 1039 + kvm_for_each_vcpu(i, vcpu, dev->kvm) 1040 + kvm_clear_async_pf_completion_queue(vcpu); 1041 + break; 1042 + default: 1043 + r = -EINVAL; 1044 + } 1045 + 1046 + return r; 1047 + } 1048 + 1049 + static int flic_create(struct kvm_device *dev, u32 type) 1050 + { 1051 + if (!dev) 1052 + return -EINVAL; 1053 + if (dev->kvm->arch.flic) 1054 + return -EINVAL; 1055 + dev->kvm->arch.flic = dev; 1056 + return 0; 1057 + } 1058 + 1059 + static void flic_destroy(struct kvm_device *dev) 1060 + { 1061 + dev->kvm->arch.flic = NULL; 1062 + kfree(dev); 1063 + } 1064 + 1065 + /* s390 floating irq controller (flic) */ 1066 + struct kvm_device_ops kvm_flic_ops = { 1067 + .name = "kvm-flic", 1068 + .get_attr = flic_get_attr, 1069 + .set_attr = flic_set_attr, 1070 + .create = flic_create, 1071 + .destroy = flic_destroy, 1072 + };
+133 -2
arch/s390/kvm/kvm-s390.c
··· 152 152 #ifdef CONFIG_KVM_S390_UCONTROL 153 153 case KVM_CAP_S390_UCONTROL: 154 154 #endif 155 + case KVM_CAP_ASYNC_PF: 155 156 case KVM_CAP_SYNC_REGS: 156 157 case KVM_CAP_ONE_REG: 157 158 case KVM_CAP_ENABLE_CAP: 158 159 case KVM_CAP_S390_CSS_SUPPORT: 159 160 case KVM_CAP_IOEVENTFD: 161 + case KVM_CAP_DEVICE_CTRL: 160 162 r = 1; 161 163 break; 162 164 case KVM_CAP_NR_VCPUS: ··· 256 254 if (!kvm->arch.gmap) 257 255 goto out_nogmap; 258 256 kvm->arch.gmap->private = kvm; 257 + kvm->arch.gmap->pfault_enabled = 0; 259 258 } 260 259 261 260 kvm->arch.css_support = 0; ··· 274 271 { 275 272 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 276 273 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 274 + kvm_clear_async_pf_completion_queue(vcpu); 277 275 if (!kvm_is_ucontrol(vcpu->kvm)) { 278 276 clear_bit(63 - vcpu->vcpu_id, 279 277 (unsigned long *) &vcpu->kvm->arch.sca->mcn); ··· 324 320 /* Section: vcpu related */ 325 321 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 326 322 { 323 + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 324 + kvm_clear_async_pf_completion_queue(vcpu); 327 325 if (kvm_is_ucontrol(vcpu->kvm)) { 328 326 vcpu->arch.gmap = gmap_alloc(current->mm); 329 327 if (!vcpu->arch.gmap) ··· 386 380 vcpu->arch.guest_fpregs.fpc = 0; 387 381 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); 388 382 vcpu->arch.sie_block->gbea = 1; 383 + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 384 + kvm_clear_async_pf_completion_queue(vcpu); 389 385 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 390 386 } 391 387 ··· 561 553 r = put_user(vcpu->arch.sie_block->ckc, 562 554 (u64 __user *)reg->addr); 563 555 break; 556 + case KVM_REG_S390_PFTOKEN: 557 + r = put_user(vcpu->arch.pfault_token, 558 + (u64 __user *)reg->addr); 559 + break; 560 + case KVM_REG_S390_PFCOMPARE: 561 + r = put_user(vcpu->arch.pfault_compare, 562 + (u64 __user *)reg->addr); 563 + break; 564 + case KVM_REG_S390_PFSELECT: 565 + r = put_user(vcpu->arch.pfault_select, 566 + (u64 __user *)reg->addr); 567 + break; 564 568 default: 565 569 break; 566 570 } ··· 600 580 break; 601 581 case KVM_REG_S390_CLOCK_COMP: 602 582 r = get_user(vcpu->arch.sie_block->ckc, 583 + (u64 __user *)reg->addr); 584 + break; 585 + case KVM_REG_S390_PFTOKEN: 586 + r = get_user(vcpu->arch.pfault_token, 587 + (u64 __user *)reg->addr); 588 + break; 589 + case KVM_REG_S390_PFCOMPARE: 590 + r = get_user(vcpu->arch.pfault_compare, 591 + (u64 __user *)reg->addr); 592 + break; 593 + case KVM_REG_S390_PFSELECT: 594 + r = get_user(vcpu->arch.pfault_select, 603 595 (u64 __user *)reg->addr); 604 596 break; 605 597 default: ··· 732 700 return 0; 733 701 } 734 702 703 + static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu) 704 + { 705 + long rc; 706 + hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap); 707 + struct mm_struct *mm = current->mm; 708 + down_read(&mm->mmap_sem); 709 + rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL); 710 + up_read(&mm->mmap_sem); 711 + return rc; 712 + } 713 + 714 + static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 715 + unsigned long token) 716 + { 717 + struct kvm_s390_interrupt inti; 718 + inti.parm64 = token; 719 + 720 + if (start_token) { 721 + inti.type = KVM_S390_INT_PFAULT_INIT; 722 + WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti)); 723 + } else { 724 + inti.type = KVM_S390_INT_PFAULT_DONE; 725 + WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 726 + } 727 + } 728 + 729 + void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 730 + struct kvm_async_pf *work) 731 + { 732 + trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 733 + __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 734 + } 735 + 736 + void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 737 + struct kvm_async_pf *work) 738 + { 739 + trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 740 + __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 741 + } 742 + 743 + void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 744 + struct kvm_async_pf *work) 745 + { 746 + /* s390 will always inject the page directly */ 747 + } 748 + 749 + bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) 750 + { 751 + /* 752 + * s390 will always inject the page directly, 753 + * but we still want check_async_completion to cleanup 754 + */ 755 + return true; 756 + } 757 + 758 + static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 759 + { 760 + hva_t hva; 761 + struct kvm_arch_async_pf arch; 762 + int rc; 763 + 764 + if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 765 + return 0; 766 + if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 767 + vcpu->arch.pfault_compare) 768 + return 0; 769 + if (psw_extint_disabled(vcpu)) 770 + return 0; 771 + if (kvm_cpu_has_interrupt(vcpu)) 772 + return 0; 773 + if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) 774 + return 0; 775 + if (!vcpu->arch.gmap->pfault_enabled) 776 + return 0; 777 + 778 + hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap); 779 + if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8)) 780 + return 0; 781 + 782 + rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 783 + return rc; 784 + } 785 + 735 786 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 736 787 { 737 788 int rc, cpuflags; 789 + 790 + /* 791 + * On s390 notifications for arriving pages will be delivered directly 792 + * to the guest but the house keeping for completed pfaults is 793 + * handled outside the worker. 794 + */ 795 + kvm_check_async_pf_completion(vcpu); 738 796 739 797 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); 740 798 ··· 851 729 852 730 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 853 731 { 854 - int rc; 732 + int rc = -1; 855 733 856 734 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 857 735 vcpu->arch.sie_block->icptcode); ··· 865 743 current->thread.gmap_addr; 866 744 vcpu->run->s390_ucontrol.pgm_code = 0x10; 867 745 rc = -EREMOTE; 868 - } else { 746 + 747 + } else if (current->thread.gmap_pfault) { 748 + trace_kvm_s390_major_guest_pfault(vcpu); 749 + current->thread.gmap_pfault = 0; 750 + if (kvm_arch_setup_async_pf(vcpu) || 751 + (kvm_arch_fault_in_sync(vcpu) >= 0)) 752 + rc = 0; 753 + } 754 + 755 + if (rc == -1) { 869 756 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 870 757 trace_kvm_s390_sie_fault(vcpu); 871 758 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+4
arch/s390/kvm/kvm-s390.h
··· 159 159 /* implemented in diag.c */ 160 160 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); 161 161 162 + /* implemented in interrupt.c */ 163 + int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); 164 + int psw_extint_disabled(struct kvm_vcpu *vcpu); 165 + 162 166 #endif
+7
arch/s390/kvm/sigp.c
··· 224 224 static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) 225 225 { 226 226 int rc; 227 + unsigned int i; 228 + struct kvm_vcpu *v; 227 229 228 230 switch (parameter & 0xff) { 229 231 case 0: ··· 233 231 break; 234 232 case 1: 235 233 case 2: 234 + kvm_for_each_vcpu(i, v, vcpu->kvm) { 235 + v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 236 + kvm_clear_async_pf_completion_queue(v); 237 + } 238 + 236 239 rc = SIGP_CC_ORDER_CODE_ACCEPTED; 237 240 break; 238 241 default:
+46
arch/s390/kvm/trace.h
··· 30 30 TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \ 31 31 __entry->pswmask, __entry->pswaddr, p_args) 32 32 33 + TRACE_EVENT(kvm_s390_major_guest_pfault, 34 + TP_PROTO(VCPU_PROTO_COMMON), 35 + TP_ARGS(VCPU_ARGS_COMMON), 36 + 37 + TP_STRUCT__entry( 38 + VCPU_FIELD_COMMON 39 + ), 40 + 41 + TP_fast_assign( 42 + VCPU_ASSIGN_COMMON 43 + ), 44 + VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault") 45 + ); 46 + 47 + TRACE_EVENT(kvm_s390_pfault_init, 48 + TP_PROTO(VCPU_PROTO_COMMON, long pfault_token), 49 + TP_ARGS(VCPU_ARGS_COMMON, pfault_token), 50 + 51 + TP_STRUCT__entry( 52 + VCPU_FIELD_COMMON 53 + __field(long, pfault_token) 54 + ), 55 + 56 + TP_fast_assign( 57 + VCPU_ASSIGN_COMMON 58 + __entry->pfault_token = pfault_token; 59 + ), 60 + VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token) 61 + ); 62 + 63 + TRACE_EVENT(kvm_s390_pfault_done, 64 + TP_PROTO(VCPU_PROTO_COMMON, long pfault_token), 65 + TP_ARGS(VCPU_ARGS_COMMON, pfault_token), 66 + 67 + TP_STRUCT__entry( 68 + VCPU_FIELD_COMMON 69 + __field(long, pfault_token) 70 + ), 71 + 72 + TP_fast_assign( 73 + VCPU_ASSIGN_COMMON 74 + __entry->pfault_token = pfault_token; 75 + ), 76 + VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token) 77 + ); 78 + 33 79 /* 34 80 * Tracepoints for SIE entry and exit. 35 81 */
+22 -4
arch/s390/mm/fault.c
··· 50 50 #define VM_FAULT_BADMAP 0x020000 51 51 #define VM_FAULT_BADACCESS 0x040000 52 52 #define VM_FAULT_SIGNAL 0x080000 53 + #define VM_FAULT_PFAULT 0x100000 53 54 54 55 static unsigned long store_indication __read_mostly; 55 56 ··· 228 227 return; 229 228 } 230 229 case VM_FAULT_BADCONTEXT: 230 + case VM_FAULT_PFAULT: 231 231 do_no_context(regs); 232 232 break; 233 233 case VM_FAULT_SIGNAL: ··· 266 264 */ 267 265 static inline int do_exception(struct pt_regs *regs, int access) 268 266 { 267 + #ifdef CONFIG_PGSTE 268 + struct gmap *gmap; 269 + #endif 269 270 struct task_struct *tsk; 270 271 struct mm_struct *mm; 271 272 struct vm_area_struct *vma; ··· 309 304 down_read(&mm->mmap_sem); 310 305 311 306 #ifdef CONFIG_PGSTE 312 - if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { 313 - address = __gmap_fault(address, 314 - (struct gmap *) S390_lowcore.gmap); 307 + gmap = (struct gmap *) 308 + ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0); 309 + if (gmap) { 310 + address = __gmap_fault(address, gmap); 315 311 if (address == -EFAULT) { 316 312 fault = VM_FAULT_BADMAP; 317 313 goto out_up; ··· 321 315 fault = VM_FAULT_OOM; 322 316 goto out_up; 323 317 } 318 + if (gmap->pfault_enabled) 319 + flags |= FAULT_FLAG_RETRY_NOWAIT; 324 320 } 325 321 #endif 326 322 ··· 379 371 regs, address); 380 372 } 381 373 if (fault & VM_FAULT_RETRY) { 374 + #ifdef CONFIG_PGSTE 375 + if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { 376 + /* FAULT_FLAG_RETRY_NOWAIT has been set, 377 + * mmap_sem has not been released */ 378 + current->thread.gmap_pfault = 1; 379 + fault = VM_FAULT_PFAULT; 380 + goto out_up; 381 + } 382 + #endif 382 383 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 383 384 * of starvation. */ 384 - flags &= ~FAULT_FLAG_ALLOW_RETRY; 385 + flags &= ~(FAULT_FLAG_ALLOW_RETRY | 386 + FAULT_FLAG_RETRY_NOWAIT); 385 387 flags |= FAULT_FLAG_TRIED; 386 388 down_read(&mm->mmap_sem); 387 389 goto retry;
+1 -1
arch/x86/kvm/mmu.c
··· 3328 3328 arch.direct_map = vcpu->arch.mmu.direct_map; 3329 3329 arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); 3330 3330 3331 - return kvm_setup_async_pf(vcpu, gva, gfn, &arch); 3331 + return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch); 3332 3332 } 3333 3333 3334 3334 static bool can_do_async_pf(struct kvm_vcpu *vcpu)
+2 -1
include/linux/kvm_host.h
··· 192 192 193 193 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); 194 194 void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu); 195 - int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, 195 + int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, 196 196 struct kvm_arch_async_pf *arch); 197 197 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); 198 198 #endif ··· 1064 1064 extern struct kvm_device_ops kvm_xics_ops; 1065 1065 extern struct kvm_device_ops kvm_vfio_ops; 1066 1066 extern struct kvm_device_ops kvm_arm_vgic_v2_ops; 1067 + extern struct kvm_device_ops kvm_flic_ops; 1067 1068 1068 1069 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT 1069 1070
+66
include/uapi/linux/kvm.h
··· 413 413 #define KVM_S390_PROGRAM_INT 0xfffe0001u 414 414 #define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u 415 415 #define KVM_S390_RESTART 0xfffe0003u 416 + #define KVM_S390_INT_PFAULT_INIT 0xfffe0004u 417 + #define KVM_S390_INT_PFAULT_DONE 0xfffe0005u 416 418 #define KVM_S390_MCHK 0xfffe1000u 417 419 #define KVM_S390_INT_VIRTIO 0xffff2603u 418 420 #define KVM_S390_INT_SERVICE 0xffff2401u ··· 434 432 __u32 type; 435 433 __u32 parm; 436 434 __u64 parm64; 435 + }; 436 + 437 + struct kvm_s390_io_info { 438 + __u16 subchannel_id; 439 + __u16 subchannel_nr; 440 + __u32 io_int_parm; 441 + __u32 io_int_word; 442 + }; 443 + 444 + struct kvm_s390_ext_info { 445 + __u32 ext_params; 446 + __u32 pad; 447 + __u64 ext_params2; 448 + }; 449 + 450 + struct kvm_s390_pgm_info { 451 + __u64 trans_exc_code; 452 + __u64 mon_code; 453 + __u64 per_address; 454 + __u32 data_exc_code; 455 + __u16 code; 456 + __u16 mon_class_nr; 457 + __u8 per_code; 458 + __u8 per_atmid; 459 + __u8 exc_access_id; 460 + __u8 per_access_id; 461 + __u8 op_access_id; 462 + __u8 pad[3]; 463 + }; 464 + 465 + struct kvm_s390_prefix_info { 466 + __u32 address; 467 + }; 468 + 469 + struct kvm_s390_extcall_info { 470 + __u16 code; 471 + }; 472 + 473 + struct kvm_s390_emerg_info { 474 + __u16 code; 475 + }; 476 + 477 + struct kvm_s390_mchk_info { 478 + __u64 cr14; 479 + __u64 mcic; 480 + __u64 failing_storage_address; 481 + __u32 ext_damage_code; 482 + __u32 pad; 483 + __u8 fixed_logout[16]; 484 + }; 485 + 486 + struct kvm_s390_irq { 487 + __u64 type; 488 + union { 489 + struct kvm_s390_io_info io; 490 + struct kvm_s390_ext_info ext; 491 + struct kvm_s390_pgm_info pgm; 492 + struct kvm_s390_emerg_info emerg; 493 + struct kvm_s390_extcall_info extcall; 494 + struct kvm_s390_prefix_info prefix; 495 + struct kvm_s390_mchk_info mchk; 496 + char reserved[64]; 497 + } u; 437 498 }; 438 499 439 500 /* for KVM_SET_GUEST_DEBUG */ ··· 920 855 #define KVM_DEV_VFIO_GROUP_ADD 1 921 856 #define KVM_DEV_VFIO_GROUP_DEL 2 922 857 #define KVM_DEV_TYPE_ARM_VGIC_V2 5 858 + #define KVM_DEV_TYPE_FLIC 6 923 859 924 860 /* 925 861 * ioctls for VM fds
+4
virt/kvm/Kconfig
··· 22 22 config KVM_ASYNC_PF 23 23 bool 24 24 25 + # Toggle to switch between direct notification and batch job 26 + config KVM_ASYNC_PF_SYNC 27 + bool 28 + 25 29 config HAVE_KVM_MSI 26 30 bool 27 31
+23 -2
virt/kvm/async_pf.c
··· 28 28 #include "async_pf.h" 29 29 #include <trace/events/kvm.h> 30 30 31 + static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu, 32 + struct kvm_async_pf *work) 33 + { 34 + #ifdef CONFIG_KVM_ASYNC_PF_SYNC 35 + kvm_arch_async_page_present(vcpu, work); 36 + #endif 37 + } 38 + static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu, 39 + struct kvm_async_pf *work) 40 + { 41 + #ifndef CONFIG_KVM_ASYNC_PF_SYNC 42 + kvm_arch_async_page_present(vcpu, work); 43 + #endif 44 + } 45 + 31 46 static struct kmem_cache *async_pf_cache; 32 47 33 48 int kvm_async_pf_init(void) ··· 84 69 down_read(&mm->mmap_sem); 85 70 get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); 86 71 up_read(&mm->mmap_sem); 72 + kvm_async_page_present_sync(vcpu, apf); 87 73 unuse_mm(mm); 88 74 89 75 spin_lock(&vcpu->async_pf.lock); ··· 113 97 list_entry(vcpu->async_pf.queue.next, 114 98 typeof(*work), queue); 115 99 list_del(&work->queue); 100 + 101 + #ifdef CONFIG_KVM_ASYNC_PF_SYNC 102 + flush_work(&work->work); 103 + #else 116 104 if (cancel_work_sync(&work->work)) { 117 105 mmdrop(work->mm); 118 106 kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ 119 107 kmem_cache_free(async_pf_cache, work); 120 108 } 109 + #endif 121 110 } 122 111 123 112 spin_lock(&vcpu->async_pf.lock); ··· 159 138 } 160 139 } 161 140 162 - int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, 141 + int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, 163 142 struct kvm_arch_async_pf *arch) 164 143 { 165 144 struct kvm_async_pf *work; ··· 180 159 work->wakeup_all = false; 181 160 work->vcpu = vcpu; 182 161 work->gva = gva; 183 - work->addr = gfn_to_hva(vcpu->kvm, gfn); 162 + work->addr = hva; 184 163 work->arch = *arch; 185 164 work->mm = current->mm; 186 165 atomic_inc(&work->mm->mm_count);
+5
virt/kvm/kvm_main.c
··· 2284 2284 ops = &kvm_arm_vgic_v2_ops; 2285 2285 break; 2286 2286 #endif 2287 + #ifdef CONFIG_S390 2288 + case KVM_DEV_TYPE_FLIC: 2289 + ops = &kvm_flic_ops; 2290 + break; 2291 + #endif 2287 2292 default: 2288 2293 return -ENODEV; 2289 2294 }