···219219single-threaded guest vcpus, it should make all vcpu ids be a multiple220220of the number of vcpus per vcore.221221222222-On powerpc using book3s_hv mode, the vcpus are mapped onto virtual223223-threads in one or more virtual CPU cores. (This is because the224224-hardware requires all the hardware threads in a CPU core to be in the225225-same partition.) The KVM_CAP_PPC_SMT capability indicates the number226226-of vcpus per virtual core (vcore). The vcore id is obtained by227227-dividing the vcpu id by the number of vcpus per vcore. The vcpus in a228228-given vcore will always be in the same physical core as each other229229-(though that might be a different physical core from time to time).230230-Userspace can control the threading (SMT) mode of the guest by its231231-allocation of vcpu ids. For example, if userspace wants232232-single-threaded guest vcpus, it should make all vcpu ids be a multiple233233-of the number of vcpus per vcore.234234-235222For virtual cpus that have been created with S390 user controlled virtual236223machines, the resulting vcpu fd can be memory mapped at page offset237224KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual···861874be identical. This allows large pages in the guest to be backed by large862875pages in the host.863876864864-The flags field supports two flag, KVM_MEM_LOG_DIRTY_PAGES, which instructs865865-kvm to keep track of writes to memory within the slot. See KVM_GET_DIRTY_LOG866866-ioctl. The KVM_CAP_READONLY_MEM capability indicates the availability of the867867-KVM_MEM_READONLY flag. When this flag is set for a memory region, KVM only868868-allows read accesses. Writes will be posted to userspace as KVM_EXIT_MMIO869869-exits.877877+The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and878878+KVM_MEM_READONLY. The former can be set to instruct KVM to keep track of879879+writes to memory within the slot. See KVM_GET_DIRTY_LOG ioctl to know how to880880+use it. The latter can be set, if KVM_CAP_READONLY_MEM capability allows it,881881+to make a new slot read-only. In this case, writes to this memory will be882882+posted to userspace as KVM_EXIT_MMIO exits.870883871884When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of872885the memory region are automatically reflected into the guest. For example, an
+6
arch/ia64/kvm/lapic.h
···2727#define kvm_apic_present(x) (true)2828#define kvm_lapic_enabled(x) (true)29293030+static inline bool kvm_apic_vid_enabled(void)3131+{3232+ /* IA64 has no apicv supporting, do nothing here */3333+ return false;3434+}3535+3036#endif
+8
arch/s390/kvm/kvm-s390.c
···770770 } else771771 prefix = 0;772772773773+ /*774774+ * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy775775+ * copying in vcpu load/put. Lets update our copies before we save776776+ * it into the save area777777+ */778778+ save_fp_regs(&vcpu->arch.guest_fpregs);779779+ save_access_regs(vcpu->run->s.regs.acrs);780780+773781 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),774782 vcpu->arch.guest_fpregs.fprs, 128, prefix))775783 return -EFAULT;
···448448449449static bool spte_is_locklessly_modifiable(u64 spte)450450{451451- return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE));451451+ return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) ==452452+ (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE);452453}453454454455static bool spte_has_volatile_bits(u64 spte)···14611460 percpu_counter_add(&kvm_total_used_mmu_pages, nr);14621461}1463146214641464-/*14651465- * Remove the sp from shadow page cache, after call it,14661466- * we can not find this sp from the cache, and the shadow14671467- * page table is still valid.14681468- * It should be under the protection of mmu lock.14691469- */14701470-static void kvm_mmu_isolate_page(struct kvm_mmu_page *sp)14631463+static void kvm_mmu_free_page(struct kvm_mmu_page *sp)14711464{14721465 ASSERT(is_empty_shadow_page(sp->spt));14731466 hlist_del(&sp->hash_link);14741474- if (!sp->role.direct)14751475- free_page((unsigned long)sp->gfns);14761476-}14771477-14781478-/*14791479- * Free the shadow page table and the sp, we can do it14801480- * out of the protection of mmu lock.14811481- */14821482-static void kvm_mmu_free_page(struct kvm_mmu_page *sp)14831483-{14841467 list_del(&sp->link);14851468 free_page((unsigned long)sp->spt);14691469+ if (!sp->role.direct)14701470+ free_page((unsigned long)sp->gfns);14861471 kmem_cache_free(mmu_page_header_cache, sp);14871472}14881473···21122125 do {21132126 sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);21142127 WARN_ON(!sp->role.invalid || sp->root_count);21152115- kvm_mmu_isolate_page(sp);21162128 kvm_mmu_free_page(sp);21172129 } while (!list_empty(invalid_list));21182130}···23132327 if (s->role.level != PT_PAGE_TABLE_LEVEL)23142328 return 1;2315232923162316- if (!need_unsync && !s->unsync) {23302330+ if (!s->unsync)23172331 need_unsync = true;23182318- }23192332 }23202333 if (need_unsync)23212334 kvm_unsync_pages(vcpu, gfn);···36723687 else36733688 r = paging32_init_context(vcpu, context);3674368936903690+ vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);36753691 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);36763692 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);36773693 vcpu->arch.mmu.base_role.smep_andnot_wp···38393853 /* Handle a 32-bit guest writing two halves of a 64-bit gpte */38403854 *gpa &= ~(gpa_t)7;38413855 *bytes = 8;38423842- r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8));38563856+ r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8);38433857 if (r)38443858 gentry = 0;38453859 new = (const u8 *)&gentry;···39934007 !((sp->role.word ^ vcpu->arch.mmu.base_role.word)39944008 & mask.word) && rmap_can_add(vcpu))39954009 mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);39963996- if (!remote_flush && need_remote_flush(entry, *spte))40104010+ if (need_remote_flush(entry, *spte))39974011 remote_flush = true;39984012 ++spte;39994013 }
···43164316 * It's the caller's job to ensure that the target task struct43174317 * can't go away on us before we can do any checks.43184318 *43194319- * Returns true if we indeed boosted the target task.43194319+ * Returns:43204320+ * true (>0) if we indeed boosted the target task.43214321+ * false (0) if we failed to boost the target.43224322+ * -ESRCH if there's no task to yield to.43204323 */43214324bool __sched yield_to(struct task_struct *p, bool preempt)43224325{···4333433043344331again:43354332 p_rq = task_rq(p);43334333+ /*43344334+ * If we're the only runnable task on the rq and target rq also43354335+ * has only one task, there's absolutely no point in yielding.43364336+ */43374337+ if (rq->nr_running == 1 && p_rq->nr_running == 1) {43384338+ yielded = -ESRCH;43394339+ goto out_irq;43404340+ }43414341+43364342 double_rq_lock(rq, p_rq);43374343 while (task_rq(p) != p_rq) {43384344 double_rq_unlock(rq, p_rq);···43494337 }4350433843514339 if (!curr->sched_class->yield_to_task)43524352- goto out;43404340+ goto out_unlock;4353434143544342 if (curr->sched_class != p->sched_class)43554355- goto out;43434343+ goto out_unlock;4356434443574345 if (task_running(p_rq, p) || p->state)43584358- goto out;43464346+ goto out_unlock;4359434743604348 yielded = curr->sched_class->yield_to_task(rq, p, preempt);43614349 if (yielded) {···43684356 resched_task(p_rq->curr);43694357 }4370435843714371-out:43594359+out_unlock:43724360 double_rq_unlock(rq, p_rq);43614361+out_irq:43734362 local_irq_restore(flags);4374436343754375- if (yielded)43644364+ if (yielded > 0)43764365 schedule();4377436643784367 return yielded;
···217217 make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);218218}219219220220+void kvm_make_update_eoibitmap_request(struct kvm *kvm)221221+{222222+ make_all_cpus_request(kvm, KVM_REQ_EOIBITMAP);223223+}224224+220225int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)221226{222227 struct page *page;···719714}720715721716/*717717+ * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:718718+ * - create a new memory slot719719+ * - delete an existing memory slot720720+ * - modify an existing memory slot721721+ * -- move it in the guest physical memory space722722+ * -- just change its flags723723+ *724724+ * Since flags can be changed by some of these operations, the following725725+ * differentiation is the best we can do for __kvm_set_memory_region():726726+ */727727+enum kvm_mr_change {728728+ KVM_MR_CREATE,729729+ KVM_MR_DELETE,730730+ KVM_MR_MOVE,731731+ KVM_MR_FLAGS_ONLY,732732+};733733+734734+/*722735 * Allocate some memory and give it an address in the guest physical address723736 * space.724737 *···754731 struct kvm_memory_slot *slot;755732 struct kvm_memory_slot old, new;756733 struct kvm_memslots *slots = NULL, *old_memslots;734734+ enum kvm_mr_change change;757735758736 r = check_memory_region_flags(mem);759737 if (r)···796772 new.npages = npages;797773 new.flags = mem->flags;798774799799- /*800800- * Disallow changing a memory slot's size or changing anything about801801- * zero sized slots that doesn't involve making them non-zero.802802- */803775 r = -EINVAL;804804- if (npages && old.npages && npages != old.npages)805805- goto out;806806- if (!npages && !old.npages)776776+ if (npages) {777777+ if (!old.npages)778778+ change = KVM_MR_CREATE;779779+ else { /* Modify an existing slot. */780780+ if ((mem->userspace_addr != old.userspace_addr) ||781781+ (npages != old.npages) ||782782+ ((new.flags ^ old.flags) & KVM_MEM_READONLY))783783+ goto out;784784+785785+ if (base_gfn != old.base_gfn)786786+ change = KVM_MR_MOVE;787787+ else if (new.flags != old.flags)788788+ change = KVM_MR_FLAGS_ONLY;789789+ else { /* Nothing to change. */790790+ r = 0;791791+ goto out;792792+ }793793+ }794794+ } else if (old.npages) {795795+ change = KVM_MR_DELETE;796796+ } else /* Modify a non-existent slot: disallowed. */807797 goto out;808798809809- if ((npages && !old.npages) || (base_gfn != old.base_gfn)) {799799+ if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {810800 /* Check for overlaps */811801 r = -EEXIST;812802 kvm_for_each_memslot(slot, kvm->memslots) {···838800 new.dirty_bitmap = NULL;839801840802 r = -ENOMEM;841841-842842- /*843843- * Allocate if a slot is being created. If modifying a slot,844844- * the userspace_addr cannot change.845845- */846846- if (!old.npages) {803803+ if (change == KVM_MR_CREATE) {847804 new.user_alloc = user_alloc;848805 new.userspace_addr = mem->userspace_addr;849806850807 if (kvm_arch_create_memslot(&new, npages))851808 goto out_free;852852- } else if (npages && mem->userspace_addr != old.userspace_addr) {853853- r = -EINVAL;854854- goto out_free;855809 }856810857811 /* Allocate page dirty bitmap if needed */···852822 goto out_free;853823 }854824855855- if (!npages || base_gfn != old.base_gfn) {825825+ if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {856826 r = -ENOMEM;857827 slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),858828 GFP_KERNEL);···893863 goto out_free;894864 }895865896896- /* map new memory slot into the iommu */897897- if (npages) {866866+ /*867867+ * IOMMU mapping: New slots need to be mapped. Old slots need to be868868+ * un-mapped and re-mapped if their base changes. Since base change869869+ * unmapping is handled above with slot deletion, mapping alone is870870+ * needed here. Anything else the iommu might care about for existing871871+ * slots (size changes, userspace addr changes and read-only flag872872+ * changes) is disallowed above, so any other attribute changes getting873873+ * here can be skipped.874874+ */875875+ if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {898876 r = kvm_iommu_map_pages(kvm, &new);899877 if (r)900878 goto out_slots;901879 }902880903881 /* actual memory is freed via old in kvm_free_physmem_slot below */904904- if (!npages) {882882+ if (change == KVM_MR_DELETE) {905883 new.dirty_bitmap = NULL;906884 memset(&new.arch, 0, sizeof(new.arch));907885 }···17071669{17081670 struct pid *pid;17091671 struct task_struct *task = NULL;16721672+ bool ret = false;1710167317111674 rcu_read_lock();17121675 pid = rcu_dereference(target->pid);···17151676 task = get_pid_task(target->pid, PIDTYPE_PID);17161677 rcu_read_unlock();17171678 if (!task)17181718- return false;16791679+ return ret;17191680 if (task->flags & PF_VCPU) {17201681 put_task_struct(task);17211721- return false;16821682+ return ret;17221683 }17231723- if (yield_to(task, 1)) {17241724- put_task_struct(task);17251725- return true;17261726- }16841684+ ret = yield_to(task, 1);17271685 put_task_struct(task);17281728- return false;16861686+16871687+ return ret;17291688}17301689EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);17311690···17641727 return eligible;17651728}17661729#endif17301730+17671731void kvm_vcpu_on_spin(struct kvm_vcpu *me)17681732{17691733 struct kvm *kvm = me->kvm;17701734 struct kvm_vcpu *vcpu;17711735 int last_boosted_vcpu = me->kvm->last_boosted_vcpu;17721736 int yielded = 0;17371737+ int try = 3;17731738 int pass;17741739 int i;17751740···17831744 * VCPU is holding the lock that we need and will release it.17841745 * We approximate round-robin by starting at the last boosted VCPU.17851746 */17861786- for (pass = 0; pass < 2 && !yielded; pass++) {17471747+ for (pass = 0; pass < 2 && !yielded && try; pass++) {17871748 kvm_for_each_vcpu(i, vcpu, kvm) {17881749 if (!pass && i <= last_boosted_vcpu) {17891750 i = last_boosted_vcpu;···17961757 continue;17971758 if (!kvm_vcpu_eligible_for_directed_yield(vcpu))17981759 continue;17991799- if (kvm_vcpu_yield_to(vcpu)) {17601760+17611761+ yielded = kvm_vcpu_yield_to(vcpu);17621762+ if (yielded > 0) {18001763 kvm->last_boosted_vcpu = i;18011801- yielded = 1;18021764 break;17651765+ } else if (yielded < 0) {17661766+ try--;17671767+ if (!try)17681768+ break;18031769 }18041770 }18051771 }