···480348034.126 KVM_X86_SET_MSR_FILTER48044804----------------------------4805480548064806-:Capability: KVM_X86_SET_MSR_FILTER48064806+:Capability: KVM_CAP_X86_MSR_FILTER48074807:Architectures: x8648084808:Type: vm ioctl48094809:Parameters: struct kvm_msr_filter···67156715instead get bounced to user space through the KVM_EXIT_X86_RDMSR and67166716KVM_EXIT_X86_WRMSR exit notifications.6717671767186718-8.27 KVM_X86_SET_MSR_FILTER67186718+8.27 KVM_CAP_X86_MSR_FILTER67196719---------------------------6720672067216721:Architectures: x86
+12-3
arch/x86/include/asm/kvm_host.h
···113113#define VALID_PAGE(x) ((x) != INVALID_PAGE)114114115115#define UNMAPPED_GVA (~(gpa_t)0)116116+#define INVALID_GPA (~(gpa_t)0)116117117118/* KVM Hugepage definitions for x86 */118119#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G···200199201200#define KVM_NR_DB_REGS 4202201202202+#define DR6_BUS_LOCK (1 << 11)203203#define DR6_BD (1 << 13)204204#define DR6_BS (1 << 14)205205#define DR6_BT (1 << 15)···214212 * DR6_ACTIVE_LOW is also used as the init/reset value for DR6.215213 */216214#define DR6_ACTIVE_LOW 0xffff0ff0217217-#define DR6_VOLATILE 0x0001e00f215215+#define DR6_VOLATILE 0x0001e80f218216#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE)219217220218#define DR7_BP_EN_MASK 0x000000ff···409407 u32 pkru_mask;410408411409 u64 *pae_root;412412- u64 *lm_root;410410+ u64 *pml4_root;413411414412 /*415413 * check zero bits on shadow page table entries, these···14191417 bool direct_map;14201418};1421141914201420+extern u32 __read_mostly kvm_nr_uret_msrs;14221421extern u64 __read_mostly host_efer;14231422extern bool __read_mostly allow_smaller_maxphyaddr;14241423extern struct kvm_x86_ops kvm_x86_ops;···17781775 unsigned long ipi_bitmap_high, u32 min,17791776 unsigned long icr, int op_64_bit);1780177717811781-void kvm_define_user_return_msr(unsigned index, u32 msr);17781778+int kvm_add_user_return_msr(u32 msr);17791779+int kvm_find_user_return_msr(u32 msr);17821780int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);17811781+17821782+static inline bool kvm_is_supported_user_return_msr(u32 msr)17831783+{17841784+ return kvm_find_user_return_msr(msr) >= 0;17851785+}1783178617841787u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);17851788u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
···2626#include <linux/kprobes.h>2727#include <linux/nmi.h>2828#include <linux/swait.h>2929+#include <linux/syscore_ops.h>2930#include <asm/timer.h>3031#include <asm/cpu.h>3132#include <asm/traps.h>···3837#include <asm/tlb.h>3938#include <asm/cpuidle_haltpoll.h>4039#include <asm/ptrace.h>4040+#include <asm/reboot.h>4141#include <asm/svm.h>42424343DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);···347345348346 wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);349347 __this_cpu_write(apf_reason.enabled, 1);350350- pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());348348+ pr_info("setup async PF for cpu %d\n", smp_processor_id());351349 }352350353351 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {···373371 wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);374372 __this_cpu_write(apf_reason.enabled, 0);375373376376- pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());374374+ pr_info("disable async PF for cpu %d\n", smp_processor_id());377375}378376379379-static void kvm_pv_guest_cpu_reboot(void *unused)377377+static void kvm_disable_steal_time(void)380378{381381- /*382382- * We disable PV EOI before we load a new kernel by kexec,383383- * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.384384- * New kernel can re-enable when it boots.385385- */386386- if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))387387- wrmsrl(MSR_KVM_PV_EOI_EN, 0);388388- kvm_pv_disable_apf();389389- kvm_disable_steal_time();390390-}379379+ if (!has_steal_clock)380380+ return;391381392392-static int kvm_pv_reboot_notify(struct notifier_block *nb,393393- unsigned long code, void *unused)394394-{395395- if (code == SYS_RESTART)396396- on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);397397- return NOTIFY_DONE;382382+ wrmsr(MSR_KVM_STEAL_TIME, 0, 0);398383}399399-400400-static struct notifier_block kvm_pv_reboot_nb = {401401- .notifier_call = kvm_pv_reboot_notify,402402-};403384404385static u64 kvm_steal_clock(int cpu)405386{···399414 } while ((version & 1) || (version != src->version));400415401416 return steal;402402-}403403-404404-void kvm_disable_steal_time(void)405405-{406406- if (!has_steal_clock)407407- return;408408-409409- wrmsr(MSR_KVM_STEAL_TIME, 0, 0);410417}411418412419static inline void __set_percpu_decrypted(void *ptr, unsigned long size)···426449 __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));427450 __set_percpu_decrypted(&per_cpu(kvm_apic_eoi, cpu), sizeof(kvm_apic_eoi));428451 }452452+}453453+454454+static void kvm_guest_cpu_offline(bool shutdown)455455+{456456+ kvm_disable_steal_time();457457+ if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))458458+ wrmsrl(MSR_KVM_PV_EOI_EN, 0);459459+ kvm_pv_disable_apf();460460+ if (!shutdown)461461+ apf_task_wake_all();462462+ kvmclock_disable();463463+}464464+465465+static int kvm_cpu_online(unsigned int cpu)466466+{467467+ unsigned long flags;468468+469469+ local_irq_save(flags);470470+ kvm_guest_cpu_init();471471+ local_irq_restore(flags);472472+ return 0;429473}430474431475#ifdef CONFIG_SMP···633635 kvm_spinlock_init();634636}635637636636-static void kvm_guest_cpu_offline(void)637637-{638638- kvm_disable_steal_time();639639- if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))640640- wrmsrl(MSR_KVM_PV_EOI_EN, 0);641641- kvm_pv_disable_apf();642642- apf_task_wake_all();643643-}644644-645645-static int kvm_cpu_online(unsigned int cpu)646646-{647647- local_irq_disable();648648- kvm_guest_cpu_init();649649- local_irq_enable();650650- return 0;651651-}652652-653638static int kvm_cpu_down_prepare(unsigned int cpu)654639{655655- local_irq_disable();656656- kvm_guest_cpu_offline();657657- local_irq_enable();640640+ unsigned long flags;641641+642642+ local_irq_save(flags);643643+ kvm_guest_cpu_offline(false);644644+ local_irq_restore(flags);658645 return 0;659646}660647648648+#endif649649+650650+static int kvm_suspend(void)651651+{652652+ kvm_guest_cpu_offline(false);653653+654654+ return 0;655655+}656656+657657+static void kvm_resume(void)658658+{659659+ kvm_cpu_online(raw_smp_processor_id());660660+}661661+662662+static struct syscore_ops kvm_syscore_ops = {663663+ .suspend = kvm_suspend,664664+ .resume = kvm_resume,665665+};666666+667667+static void kvm_pv_guest_cpu_reboot(void *unused)668668+{669669+ kvm_guest_cpu_offline(true);670670+}671671+672672+static int kvm_pv_reboot_notify(struct notifier_block *nb,673673+ unsigned long code, void *unused)674674+{675675+ if (code == SYS_RESTART)676676+ on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);677677+ return NOTIFY_DONE;678678+}679679+680680+static struct notifier_block kvm_pv_reboot_nb = {681681+ .notifier_call = kvm_pv_reboot_notify,682682+};683683+684684+/*685685+ * After a PV feature is registered, the host will keep writing to the686686+ * registered memory location. If the guest happens to shutdown, this memory687687+ * won't be valid. In cases like kexec, in which you install a new kernel, this688688+ * means a random memory location will be kept being written.689689+ */690690+#ifdef CONFIG_KEXEC_CORE691691+static void kvm_crash_shutdown(struct pt_regs *regs)692692+{693693+ kvm_guest_cpu_offline(true);694694+ native_machine_crash_shutdown(regs);695695+}661696#endif662697663698static void __init kvm_guest_init(void)···734703 sev_map_percpu_data();735704 kvm_guest_cpu_init();736705#endif706706+707707+#ifdef CONFIG_KEXEC_CORE708708+ machine_ops.crash_shutdown = kvm_crash_shutdown;709709+#endif710710+711711+ register_syscore_ops(&kvm_syscore_ops);737712738713 /*739714 * Hard lockup detection is enabled by default. Disable it, as guests
+1-25
arch/x86/kernel/kvmclock.c
···2020#include <asm/hypervisor.h>2121#include <asm/mem_encrypt.h>2222#include <asm/x86_init.h>2323-#include <asm/reboot.h>2423#include <asm/kvmclock.h>25242625static int kvmclock __initdata = 1;···202203}203204#endif204205205205-/*206206- * After the clock is registered, the host will keep writing to the207207- * registered memory location. If the guest happens to shutdown, this memory208208- * won't be valid. In cases like kexec, in which you install a new kernel, this209209- * means a random memory location will be kept being written. So before any210210- * kind of shutdown from our side, we unregister the clock by writing anything211211- * that does not have the 'enable' bit set in the msr212212- */213213-#ifdef CONFIG_KEXEC_CORE214214-static void kvm_crash_shutdown(struct pt_regs *regs)206206+void kvmclock_disable(void)215207{216208 native_write_msr(msr_kvm_system_time, 0, 0);217217- kvm_disable_steal_time();218218- native_machine_crash_shutdown(regs);219219-}220220-#endif221221-222222-static void kvm_shutdown(void)223223-{224224- native_write_msr(msr_kvm_system_time, 0, 0);225225- kvm_disable_steal_time();226226- native_machine_shutdown();227209}228210229211static void __init kvmclock_init_mem(void)···331351#endif332352 x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;333353 x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;334334- machine_ops.shutdown = kvm_shutdown;335335-#ifdef CONFIG_KEXEC_CORE336336- machine_ops.crash_shutdown = kvm_crash_shutdown;337337-#endif338354 kvm_get_preset_lpj();339355340356 /*
+18-2
arch/x86/kvm/cpuid.c
···458458 F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |459459 F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |460460 F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |461461- F(SGX_LC)461461+ F(SGX_LC) | F(BUS_LOCK_DETECT)462462 );463463 /* Set LA57 based on hardware capability. */464464 if (cpuid_ecx(7) & F(LA57))···567567 F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |568568 F(PMM) | F(PMM_EN)569569 );570570+571571+ /*572572+ * Hide RDTSCP and RDPID if either feature is reported as supported but573573+ * probing MSR_TSC_AUX failed. This is purely a sanity check and574574+ * should never happen, but the guest will likely crash if RDTSCP or575575+ * RDPID is misreported, and KVM has botched MSR_TSC_AUX emulation in576576+ * the past. For example, the sanity check may fire if this instance of577577+ * KVM is running as L1 on top of an older, broken KVM.578578+ */579579+ if (WARN_ON((kvm_cpu_cap_has(X86_FEATURE_RDTSCP) ||580580+ kvm_cpu_cap_has(X86_FEATURE_RDPID)) &&581581+ !kvm_is_supported_user_return_msr(MSR_TSC_AUX))) {582582+ kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);583583+ kvm_cpu_cap_clear(X86_FEATURE_RDPID);584584+ }570585}571586EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);572587···652637 case 7:653638 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;654639 entry->eax = 0;655655- entry->ecx = F(RDPID);640640+ if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))641641+ entry->ecx = F(RDPID);656642 ++array->nent;657643 default:658644 break;
+1-1
arch/x86/kvm/emulate.c
···45024502 * from the register case of group9.45034503 */45044504static const struct gprefix pfx_0f_c7_7 = {45054505- N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),45054505+ N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),45064506};4507450745084508
···19131913 if (!apic->lapic_timer.hv_timer_in_use)19141914 goto out;19151915 WARN_ON(rcuwait_active(&vcpu->wait));19161916- cancel_hv_timer(apic);19171916 apic_timer_expired(apic, false);19171917+ cancel_hv_timer(apic);1918191819191919 if (apic_lvtt_period(apic) && apic->lapic_timer.period) {19201920 advance_periodic_target_expiration(apic);
+10-10
arch/x86/kvm/mmu/mmu.c
···33103310 if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) {33113311 pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;3312331233133313- if (WARN_ON_ONCE(!mmu->lm_root)) {33133313+ if (WARN_ON_ONCE(!mmu->pml4_root)) {33143314 r = -EIO;33153315 goto out_unlock;33163316 }3317331733183318- mmu->lm_root[0] = __pa(mmu->pae_root) | pm_mask;33183318+ mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask;33193319 }3320332033213321 for (i = 0; i < 4; ++i) {···33353335 }3336333633373337 if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)33383338- mmu->root_hpa = __pa(mmu->lm_root);33383338+ mmu->root_hpa = __pa(mmu->pml4_root);33393339 else33403340 mmu->root_hpa = __pa(mmu->pae_root);33413341···33503350static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)33513351{33523352 struct kvm_mmu *mmu = vcpu->arch.mmu;33533353- u64 *lm_root, *pae_root;33533353+ u64 *pml4_root, *pae_root;3354335433553355 /*33563356 * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP···33693369 if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL))33703370 return -EIO;3371337133723372- if (mmu->pae_root && mmu->lm_root)33723372+ if (mmu->pae_root && mmu->pml4_root)33733373 return 0;3374337433753375 /*33763376 * The special roots should always be allocated in concert. Yell and33773377 * bail if KVM ends up in a state where only one of the roots is valid.33783378 */33793379- if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->lm_root))33793379+ if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root))33803380 return -EIO;3381338133823382 /*···33873387 if (!pae_root)33883388 return -ENOMEM;3389338933903390- lm_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);33913391- if (!lm_root) {33903390+ pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);33913391+ if (!pml4_root) {33923392 free_page((unsigned long)pae_root);33933393 return -ENOMEM;33943394 }3395339533963396 mmu->pae_root = pae_root;33973397- mmu->lm_root = lm_root;33973397+ mmu->pml4_root = pml4_root;3398339833993399 return 0;34003400}···52615261 if (!tdp_enabled && mmu->pae_root)52625262 set_memory_encrypted((unsigned long)mmu->pae_root, 1);52635263 free_page((unsigned long)mmu->pae_root);52645264- free_page((unsigned long)mmu->lm_root);52645264+ free_page((unsigned long)mmu->pml4_root);52655265}5266526652675267static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+16-1
arch/x86/kvm/mmu/tdp_mmu.c
···388388}389389390390/**391391- * handle_changed_spte - handle bookkeeping associated with an SPTE change391391+ * __handle_changed_spte - handle bookkeeping associated with an SPTE change392392 * @kvm: kvm instance393393 * @as_id: the address space of the paging structure the SPTE was a part of394394 * @gfn: the base GFN that was mapped by the SPTE···443443 return;444444445445 trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);446446+447447+ if (is_large_pte(old_spte) != is_large_pte(new_spte)) {448448+ if (is_large_pte(old_spte))449449+ atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);450450+ else451451+ atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);452452+ }446453447454 /*448455 * The only times a SPTE should be changed from a non-present to···10161009 }1017101010181011 if (!is_shadow_present_pte(iter.old_spte)) {10121012+ /*10131013+ * If SPTE has been forzen by another thread, just10141014+ * give up and retry, avoiding unnecessary page table10151015+ * allocation and free.10161016+ */10171017+ if (is_removed_spte(iter.old_spte))10181018+ break;10191019+10191020 sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);10201021 child_pt = sp->spt;10211022
+19-4
arch/x86/kvm/svm/nested.c
···764764 nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);765765766766 svm_switch_vmcb(svm, &svm->vmcb01);767767- WARN_ON_ONCE(svm->vmcb->control.exit_code != SVM_EXIT_VMRUN);768767769768 /*770769 * On vmexit the GIF is set to false and···871872 __free_page(virt_to_page(svm->nested.vmcb02.ptr));872873 svm->nested.vmcb02.ptr = NULL;873874875875+ /*876876+ * When last_vmcb12_gpa matches the current vmcb12 gpa,877877+ * some vmcb12 fields are not loaded if they are marked clean878878+ * in the vmcb12, since in this case they are up to date already.879879+ *880880+ * When the vmcb02 is freed, this optimization becomes invalid.881881+ */882882+ svm->nested.last_vmcb12_gpa = INVALID_GPA;883883+874884 svm->nested.initialized = false;875885}876886···892884893885 if (is_guest_mode(vcpu)) {894886 svm->nested.nested_run_pending = 0;887887+ svm->nested.vmcb12_gpa = INVALID_GPA;888888+895889 leave_guest_mode(vcpu);896890897897- svm_switch_vmcb(svm, &svm->nested.vmcb02);891891+ svm_switch_vmcb(svm, &svm->vmcb01);898892899893 nested_svm_uninit_mmu_context(vcpu);900894 vmcb_mark_all_dirty(svm->vmcb);···13081298 * L2 registers if needed are moved from the current VMCB to VMCB02.13091299 */1310130013011301+ if (is_guest_mode(vcpu))13021302+ svm_leave_nested(svm);13031303+ else13041304+ svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;13051305+13061306+ svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));13071307+13111308 svm->nested.nested_run_pending =13121309 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);1313131013141311 svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;13151315- if (svm->current_vmcb == &svm->vmcb01)13161316- svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;1317131213181313 svm->vmcb01.ptr->save.es = save->es;13191314 svm->vmcb01.ptr->save.cs = save->cs;
+14-18
arch/x86/kvm/svm/sev.c
···763763}764764765765static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,766766- unsigned long __user dst_uaddr,766766+ void __user *dst_uaddr,767767 unsigned long dst_paddr,768768 int size, int *err)769769{···787787788788 if (tpage) {789789 offset = paddr & 15;790790- if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,791791- page_address(tpage) + offset, size))790790+ if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size))792791 ret = -EFAULT;793792 }794793···799800}800801801802static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,802802- unsigned long __user vaddr,803803+ void __user *vaddr,803804 unsigned long dst_paddr,804804- unsigned long __user dst_vaddr,805805+ void __user *dst_vaddr,805806 int size, int *error)806807{807808 struct page *src_tpage = NULL;···809810 int ret, len = size;810811811812 /* If source buffer is not aligned then use an intermediate buffer */812812- if (!IS_ALIGNED(vaddr, 16)) {813813+ if (!IS_ALIGNED((unsigned long)vaddr, 16)) {813814 src_tpage = alloc_page(GFP_KERNEL);814815 if (!src_tpage)815816 return -ENOMEM;816817817817- if (copy_from_user(page_address(src_tpage),818818- (void __user *)(uintptr_t)vaddr, size)) {818818+ if (copy_from_user(page_address(src_tpage), vaddr, size)) {819819 __free_page(src_tpage);820820 return -EFAULT;821821 }···828830 * - copy the source buffer in an intermediate buffer829831 * - use the intermediate buffer as source buffer830832 */831831- if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {833833+ if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {832834 int dst_offset;833835834836 dst_tpage = alloc_page(GFP_KERNEL);···853855 page_address(src_tpage), size);854856 else {855857 if (copy_from_user(page_address(dst_tpage) + dst_offset,856856- (void __user *)(uintptr_t)vaddr, size)) {858858+ vaddr, size)) {857859 ret = -EFAULT;858860 goto e_free;859861 }···933935 if (dec)934936 ret = __sev_dbg_decrypt_user(kvm,935937 __sme_page_pa(src_p[0]) + s_off,936936- dst_vaddr,938938+ (void __user *)dst_vaddr,937939 __sme_page_pa(dst_p[0]) + d_off,938940 len, &argp->error);939941 else940942 ret = __sev_dbg_encrypt_user(kvm,941943 __sme_page_pa(src_p[0]) + s_off,942942- vaddr,944944+ (void __user *)vaddr,943945 __sme_page_pa(dst_p[0]) + d_off,944944- dst_vaddr,946946+ (void __user *)dst_vaddr,945947 len, &argp->error);946948947949 sev_unpin_memory(kvm, src_p, n);···17621764e_source_unlock:17631765 mutex_unlock(&source_kvm->lock);17641766e_source_put:17651765- fput(source_kvm_file);17671767+ if (source_kvm_file)17681768+ fput(source_kvm_file);17661769 return ret;17671770}17681771···21972198 return -EINVAL;21982199}2199220022002200-static void pre_sev_es_run(struct vcpu_svm *svm)22012201+void sev_es_unmap_ghcb(struct vcpu_svm *svm)22012202{22022203 if (!svm->ghcb)22032204 return;···22322233{22332234 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);22342235 int asid = sev_get_asid(svm->vcpu.kvm);22352235-22362236- /* Perform any SEV-ES pre-run actions */22372237- pre_sev_es_run(svm);2238223622392237 /* Assign the asid allocated with this SEV guest */22402238 svm->asid = asid;
+28-34
arch/x86/kvm/svm/svm.c
···212212 * RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to213213 * defer the restoration of TSC_AUX until the CPU returns to userspace.214214 */215215-#define TSC_AUX_URET_SLOT 0215215+static int tsc_aux_uret_slot __read_mostly = -1;216216217217static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};218218···444444445445 if (sev_active()) {446446 pr_info("KVM is unsupported when running as an SEV guest\n");447447+ return 0;448448+ }449449+450450+ if (pgtable_l5_enabled()) {451451+ pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");447452 return 0;448453 }449454···964959 kvm_tsc_scaling_ratio_frac_bits = 32;965960 }966961967967- if (boot_cpu_has(X86_FEATURE_RDTSCP))968968- kvm_define_user_return_msr(TSC_AUX_URET_SLOT, MSR_TSC_AUX);962962+ tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);969963970964 /* Check for pause filtering support */971965 if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {···11041100 return svm->vmcb->control.tsc_offset;11051101}1106110211071107-static void svm_check_invpcid(struct vcpu_svm *svm)11031103+/* Evaluate instruction intercepts that depend on guest CPUID features. */11041104+static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,11051105+ struct vcpu_svm *svm)11081106{11091107 /*11101108 * Intercept INVPCID if shadow paging is enabled to sync/free shadow···11181112 svm_set_intercept(svm, INTERCEPT_INVPCID);11191113 else11201114 svm_clr_intercept(svm, INTERCEPT_INVPCID);11151115+ }11161116+11171117+ if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) {11181118+ if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))11191119+ svm_clr_intercept(svm, INTERCEPT_RDTSCP);11201120+ else11211121+ svm_set_intercept(svm, INTERCEPT_RDTSCP);11211122 }11221123}11231124···12481235 svm->current_vmcb->asid_generation = 0;12491236 svm->asid = 0;1250123712511251- svm->nested.vmcb12_gpa = 0;12521252- svm->nested.last_vmcb12_gpa = 0;12381238+ svm->nested.vmcb12_gpa = INVALID_GPA;12391239+ svm->nested.last_vmcb12_gpa = INVALID_GPA;12531240 vcpu->arch.hflags = 0;1254124112551242 if (!kvm_pause_in_guest(vcpu->kvm)) {···12611248 svm_clr_intercept(svm, INTERCEPT_PAUSE);12621249 }1263125012641264- svm_check_invpcid(svm);12511251+ svm_recalc_instruction_intercepts(vcpu, svm);1265125212661253 /*12671254 * If the host supports V_SPEC_CTRL then disable the interception···14371424 struct vcpu_svm *svm = to_svm(vcpu);14381425 struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);1439142614271427+ if (sev_es_guest(vcpu->kvm))14281428+ sev_es_unmap_ghcb(svm);14291429+14401430 if (svm->guest_state_loaded)14411431 return;14421432···14611445 }14621446 }1463144714641464- if (static_cpu_has(X86_FEATURE_RDTSCP))14651465- kvm_set_user_return_msr(TSC_AUX_URET_SLOT, svm->tsc_aux, -1ull);14481448+ if (likely(tsc_aux_uret_slot >= 0))14491449+ kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);1466145014671451 svm->guest_state_loaded = true;14681452}···26712655 msr_info->data |= (u64)svm->sysenter_esp_hi << 32;26722656 break;26732657 case MSR_TSC_AUX:26742674- if (!boot_cpu_has(X86_FEATURE_RDTSCP))26752675- return 1;26762676- if (!msr_info->host_initiated &&26772677- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))26782678- return 1;26792658 msr_info->data = svm->tsc_aux;26802659 break;26812660 /*···28872876 svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;28882877 break;28892878 case MSR_TSC_AUX:28902890- if (!boot_cpu_has(X86_FEATURE_RDTSCP))28912891- return 1;28922892-28932893- if (!msr->host_initiated &&28942894- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))28952895- return 1;28962896-28972897- /*28982898- * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has28992899- * incomplete and conflicting architectural behavior. Current29002900- * AMD CPUs completely ignore bits 63:32, i.e. they aren't29012901- * reserved and always read as zeros. Emulate AMD CPU behavior29022902- * to avoid explosions if the vCPU is migrated from an AMD host29032903- * to an Intel host.29042904- */29052905- data = (u32)data;29062906-29072879 /*29082880 * TSC_AUX is usually changed only during boot and never read29092881 * directly. Intercept TSC_AUX instead of exposing it to the29102882 * guest via direct_access_msrs, and switch it via user return.29112883 */29122884 preempt_disable();29132913- r = kvm_set_user_return_msr(TSC_AUX_URET_SLOT, data, -1ull);28852885+ r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);29142886 preempt_enable();29152887 if (r)29162888 return 1;···30783084 [SVM_EXIT_STGI] = stgi_interception,30793085 [SVM_EXIT_CLGI] = clgi_interception,30803086 [SVM_EXIT_SKINIT] = skinit_interception,30873087+ [SVM_EXIT_RDTSCP] = kvm_handle_invalid_op,30813088 [SVM_EXIT_WBINVD] = kvm_emulate_wbinvd,30823089 [SVM_EXIT_MONITOR] = kvm_emulate_monitor,30833090 [SVM_EXIT_MWAIT] = kvm_emulate_mwait,···39673972 svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&39683973 guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);3969397439703970- /* Check again if INVPCID interception if required */39713971- svm_check_invpcid(svm);39753975+ svm_recalc_instruction_intercepts(vcpu, svm);3972397639733977 /* For sev guests, the memory encryption bit is not reserved in CR3. */39743978 if (sev_guest(vcpu->kvm)) {
···398398{399399 u64 debugctl = 0;400400401401+ if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))402402+ debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;403403+401404 if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)402405 debugctl |= DEBUGCTLMSR_LBR_MASK;403406
+19-10
arch/x86/kvm/vmx/nested.c
···30983098 nested_vmx_handle_enlightened_vmptrld(vcpu, false);3099309931003100 if (evmptrld_status == EVMPTRLD_VMFAIL ||31013101- evmptrld_status == EVMPTRLD_ERROR) {31023102- pr_debug_ratelimited("%s: enlightened vmptrld failed\n",31033103- __func__);31043104- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;31053105- vcpu->run->internal.suberror =31063106- KVM_INTERNAL_ERROR_EMULATION;31073107- vcpu->run->internal.ndata = 0;31013101+ evmptrld_status == EVMPTRLD_ERROR)31083102 return false;31093109- }31103103 }3111310431123105 return true;···3187319431883195static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)31893196{31903190- if (!nested_get_evmcs_page(vcpu))31973197+ if (!nested_get_evmcs_page(vcpu)) {31983198+ pr_debug_ratelimited("%s: enlightened vmptrld failed\n",31993199+ __func__);32003200+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;32013201+ vcpu->run->internal.suberror =32023202+ KVM_INTERNAL_ERROR_EMULATION;32033203+ vcpu->run->internal.ndata = 0;32043204+31913205 return false;32063206+ }3192320731933208 if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))31943209 return false;···44364435 /* Similarly, triple faults in L2 should never escape. */44374436 WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));4438443744394439- kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);44384438+ if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {44394439+ /*44404440+ * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map44414441+ * Enlightened VMCS after migration and we still need to44424442+ * do that when something is forcing L2->L1 exit prior to44434443+ * the first L2 run.44444444+ */44454445+ (void)nested_get_evmcs_page(vcpu);44464446+ }4440444744414448 /* Service the TLB flush request for L2 before switching to L1. */44424449 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+110-110
arch/x86/kvm/vmx/vmx.c
···455455456456static unsigned long host_idt_base;457457458458-/*459459- * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm460460- * will emulate SYSCALL in legacy mode if the vendor string in guest461461- * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To462462- * support this emulation, IA32_STAR must always be included in463463- * vmx_uret_msrs_list[], even in i386 builds.464464- */465465-static const u32 vmx_uret_msrs_list[] = {466466-#ifdef CONFIG_X86_64467467- MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,468468-#endif469469- MSR_EFER, MSR_TSC_AUX, MSR_STAR,470470- MSR_IA32_TSX_CTRL,471471-};472472-473458#if IS_ENABLED(CONFIG_HYPERV)474459static bool __read_mostly enlightened_vmcs = true;475460module_param(enlightened_vmcs, bool, 0444);···682697 return r;683698}684699685685-static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)686686-{687687- int i;688688-689689- for (i = 0; i < vmx->nr_uret_msrs; ++i)690690- if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)691691- return i;692692- return -1;693693-}694694-695700struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)696701{697702 int i;698703699699- i = __vmx_find_uret_msr(vmx, msr);704704+ i = kvm_find_user_return_msr(msr);700705 if (i >= 0)701706 return &vmx->guest_uret_msrs[i];702707 return NULL;···695720static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,696721 struct vmx_uret_msr *msr, u64 data)697722{723723+ unsigned int slot = msr - vmx->guest_uret_msrs;698724 int ret = 0;699725700726 u64 old_msr_data = msr->data;701727 msr->data = data;702702- if (msr - vmx->guest_uret_msrs < vmx->nr_active_uret_msrs) {728728+ if (msr->load_into_hardware) {703729 preempt_disable();704704- ret = kvm_set_user_return_msr(msr->slot, msr->data, msr->mask);730730+ ret = kvm_set_user_return_msr(slot, msr->data, msr->mask);705731 preempt_enable();706732 if (ret)707733 msr->data = old_msr_data;···10541078 return false;10551079 }1056108010571057- i = __vmx_find_uret_msr(vmx, MSR_EFER);10811081+ i = kvm_find_user_return_msr(MSR_EFER);10581082 if (i < 0)10591083 return false;10601084···12161240 */12171241 if (!vmx->guest_uret_msrs_loaded) {12181242 vmx->guest_uret_msrs_loaded = true;12191219- for (i = 0; i < vmx->nr_active_uret_msrs; ++i)12201220- kvm_set_user_return_msr(vmx->guest_uret_msrs[i].slot,12431243+ for (i = 0; i < kvm_nr_uret_msrs; ++i) {12441244+ if (!vmx->guest_uret_msrs[i].load_into_hardware)12451245+ continue;12461246+12471247+ kvm_set_user_return_msr(i,12211248 vmx->guest_uret_msrs[i].data,12221249 vmx->guest_uret_msrs[i].mask);12231223-12501250+ }12241251 }1225125212261253 if (vmx->nested.need_vmcs12_to_shadow_sync)···17301751 vmx_clear_hlt(vcpu);17311752}1732175317331733-static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)17541754+static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,17551755+ bool load_into_hardware)17341756{17351735- struct vmx_uret_msr tmp;17361736- int from, to;17571757+ struct vmx_uret_msr *uret_msr;1737175817381738- from = __vmx_find_uret_msr(vmx, msr);17391739- if (from < 0)17591759+ uret_msr = vmx_find_uret_msr(vmx, msr);17601760+ if (!uret_msr)17401761 return;17411741- to = vmx->nr_active_uret_msrs++;1742176217431743- tmp = vmx->guest_uret_msrs[to];17441744- vmx->guest_uret_msrs[to] = vmx->guest_uret_msrs[from];17451745- vmx->guest_uret_msrs[from] = tmp;17631763+ uret_msr->load_into_hardware = load_into_hardware;17461764}1747176517481766/*···17491773 */17501774static void setup_msrs(struct vcpu_vmx *vmx)17511775{17521752- vmx->guest_uret_msrs_loaded = false;17531753- vmx->nr_active_uret_msrs = 0;17541776#ifdef CONFIG_X86_6417771777+ bool load_syscall_msrs;17781778+17551779 /*17561780 * The SYSCALL MSRs are only needed on long mode guests, and only17571781 * when EFER.SCE is set.17581782 */17591759- if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {17601760- vmx_setup_uret_msr(vmx, MSR_STAR);17611761- vmx_setup_uret_msr(vmx, MSR_LSTAR);17621762- vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK);17631763- }17831783+ load_syscall_msrs = is_long_mode(&vmx->vcpu) &&17841784+ (vmx->vcpu.arch.efer & EFER_SCE);17851785+17861786+ vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs);17871787+ vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs);17881788+ vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs);17641789#endif17651765- if (update_transition_efer(vmx))17661766- vmx_setup_uret_msr(vmx, MSR_EFER);17901790+ vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx));1767179117681768- if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))17691769- vmx_setup_uret_msr(vmx, MSR_TSC_AUX);17921792+ vmx_setup_uret_msr(vmx, MSR_TSC_AUX,17931793+ guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||17941794+ guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));1770179517711771- vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL);17961796+ /*17971797+ * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new17981798+ * kernel and old userspace. If those guests run on a tsx=off host, do17991799+ * allow guests to use TSX_CTRL, but don't change the value in hardware18001800+ * so that TSX remains always disabled.18011801+ */18021802+ vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM));1772180317731804 if (cpu_has_vmx_msr_bitmap())17741805 vmx_update_msr_bitmap(&vmx->vcpu);18061806+18071807+ /*18081808+ * The set of MSRs to load may have changed, reload MSRs before the18091809+ * next VM-Enter.18101810+ */18111811+ vmx->guest_uret_msrs_loaded = false;17751812}1776181317771814static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)···19821993 else19831994 msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];19841995 break;19851985- case MSR_TSC_AUX:19861986- if (!msr_info->host_initiated &&19871987- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))19881988- return 1;19891989- goto find_uret_msr;19901996 case MSR_IA32_DEBUGCTLMSR:19911997 msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);19921998 break;···2014203020152031 if (!intel_pmu_lbr_is_enabled(vcpu))20162032 debugctl &= ~DEBUGCTLMSR_LBR_MASK;20332033+20342034+ if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))20352035+ debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;2017203620182037 return debugctl;20192038}···23002313 else23012314 vmx->pt_desc.guest.addr_a[index / 2] = data;23022315 break;23032303- case MSR_TSC_AUX:23042304- if (!msr_info->host_initiated &&23052305- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))23062306- return 1;23072307- /* Check reserved bit, higher 32 bits should be zero */23082308- if ((data >> 32) != 0)23092309- return 1;23102310- goto find_uret_msr;23112316 case MSR_IA32_PERF_CAPABILITIES:23122317 if (data && !vcpu_to_pmu(vcpu)->version)23132318 return 1;···43484369 xsaves_enabled, false);43494370 }4350437143514351- vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP);43724372+ /*43734373+ * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either43744374+ * feature is exposed to the guest. This creates a virtualization hole43754375+ * if both are supported in hardware but only one is exposed to the43764376+ * guest, but letting the guest execute RDTSCP or RDPID when either one43774377+ * is advertised is preferable to emulating the advertised instruction43784378+ * in KVM on #UD, and obviously better than incorrectly injecting #UD.43794379+ */43804380+ if (cpu_has_vmx_rdtscp()) {43814381+ bool rdpid_or_rdtscp_enabled =43824382+ guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||43834383+ guest_cpuid_has(vcpu, X86_FEATURE_RDPID);43844384+43854385+ vmx_adjust_secondary_exec_control(vmx, &exec_control,43864386+ SECONDARY_EXEC_ENABLE_RDTSCP,43874387+ rdpid_or_rdtscp_enabled, false);43884388+ }43524389 vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);4353439043544391 vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);···6850685568516856static int vmx_create_vcpu(struct kvm_vcpu *vcpu)68526857{68586858+ struct vmx_uret_msr *tsx_ctrl;68536859 struct vcpu_vmx *vmx;68546860 int i, cpu, err;68556861···68736877 goto free_vpid;68746878 }6875687968766876- BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);68776877-68786878- for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {68796879- u32 index = vmx_uret_msrs_list[i];68806880- u32 data_low, data_high;68816881- int j = vmx->nr_uret_msrs;68826882-68836883- if (rdmsr_safe(index, &data_low, &data_high) < 0)68846884- continue;68856885- if (wrmsr_safe(index, data_low, data_high) < 0)68866886- continue;68876887-68886888- vmx->guest_uret_msrs[j].slot = i;68896889- vmx->guest_uret_msrs[j].data = 0;68906890- switch (index) {68916891- case MSR_IA32_TSX_CTRL:68926892- /*68936893- * TSX_CTRL_CPUID_CLEAR is handled in the CPUID68946894- * interception. Keep the host value unchanged to avoid68956895- * changing CPUID bits under the host kernel's feet.68966896- *68976897- * hle=0, rtm=0, tsx_ctrl=1 can be found with some68986898- * combinations of new kernel and old userspace. If68996899- * those guests run on a tsx=off host, do allow guests69006900- * to use TSX_CTRL, but do not change the value on the69016901- * host so that TSX remains always disabled.69026902- */69036903- if (boot_cpu_has(X86_FEATURE_RTM))69046904- vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;69056905- else69066906- vmx->guest_uret_msrs[j].mask = 0;69076907- break;69086908- default:69096909- vmx->guest_uret_msrs[j].mask = -1ull;69106910- break;69116911- }69126912- ++vmx->nr_uret_msrs;68806880+ for (i = 0; i < kvm_nr_uret_msrs; ++i) {68816881+ vmx->guest_uret_msrs[i].data = 0;68826882+ vmx->guest_uret_msrs[i].mask = -1ull;68836883+ }68846884+ if (boot_cpu_has(X86_FEATURE_RTM)) {68856885+ /*68866886+ * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception.68876887+ * Keep the host value unchanged to avoid changing CPUID bits68886888+ * under the host kernel's feet.68896889+ */68906890+ tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);68916891+ if (tsx_ctrl)68926892+ vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;69136893 }6914689469156895 err = alloc_loaded_vmcs(&vmx->vmcs01);···73167344 if (!cpu_has_vmx_xsaves())73177345 kvm_cpu_cap_clear(X86_FEATURE_XSAVES);7318734673197319- /* CPUID 0x80000001 */73207320- if (!cpu_has_vmx_rdtscp())73477347+ /* CPUID 0x80000001 and 0x7 (RDPID) */73487348+ if (!cpu_has_vmx_rdtscp()) {73217349 kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);73507350+ kvm_cpu_cap_clear(X86_FEATURE_RDPID);73517351+ }7322735273237353 if (cpu_has_vmx_waitpkg())73247354 kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);···73767402 /*73777403 * RDPID causes #UD if disabled through secondary execution controls.73787404 * Because it is marked as EmulateOnUD, we need to intercept it here.74057405+ * Note, RDPID is hidden behind ENABLE_RDTSCP.73797406 */73807380- case x86_intercept_rdtscp:74077407+ case x86_intercept_rdpid:73817408 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) {73827409 exception->vector = UD_VECTOR;73837410 exception->error_code_valid = false;···77447769 .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,77457770};7746777177727772+static __init void vmx_setup_user_return_msrs(void)77737773+{77747774+77757775+ /*77767776+ * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm77777777+ * will emulate SYSCALL in legacy mode if the vendor string in guest77787778+ * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To77797779+ * support this emulation, MSR_STAR is included in the list for i386,77807780+ * but is never loaded into hardware. MSR_CSTAR is also never loaded77817781+ * into hardware and is here purely for emulation purposes.77827782+ */77837783+ const u32 vmx_uret_msrs_list[] = {77847784+ #ifdef CONFIG_X86_6477857785+ MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,77867786+ #endif77877787+ MSR_EFER, MSR_TSC_AUX, MSR_STAR,77887788+ MSR_IA32_TSX_CTRL,77897789+ };77907790+ int i;77917791+77927792+ BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);77937793+77947794+ for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)77957795+ kvm_add_user_return_msr(vmx_uret_msrs_list[i]);77967796+}77977797+77477798static __init int hardware_setup(void)77487799{77497800 unsigned long host_bndcfgs;77507801 struct desc_ptr dt;77517751- int r, i, ept_lpage_level;78027802+ int r, ept_lpage_level;7752780377537804 store_idt(&dt);77547805 host_idt_base = dt.address;7755780677567756- for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)77577757- kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]);78077807+ vmx_setup_user_return_msrs();7758780877597809 if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)77607810 return -EIO;
+10-2
arch/x86/kvm/vmx/vmx.h
···3636};37373838struct vmx_uret_msr {3939- unsigned int slot; /* The MSR's slot in kvm_user_return_msrs. */3939+ bool load_into_hardware;4040 u64 data;4141 u64 mask;4242};···245245 u32 idt_vectoring_info;246246 ulong rflags;247247248248+ /*249249+ * User return MSRs are always emulated when enabled in the guest, but250250+ * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside251251+ * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to252252+ * be loaded into hardware if those conditions aren't met.253253+ * nr_active_uret_msrs tracks the number of MSRs that need to be loaded254254+ * into hardware when running the guest. guest_uret_msrs[] is resorted255255+ * whenever the number of "active" uret MSRs is modified.256256+ */248257 struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];249249- int nr_uret_msrs;250258 int nr_active_uret_msrs;251259 bool guest_uret_msrs_loaded;252260#ifdef CONFIG_X86_64
+112-41
arch/x86/kvm/x86.c
···184184 */185185#define KVM_MAX_NR_USER_RETURN_MSRS 16186186187187-struct kvm_user_return_msrs_global {188188- int nr;189189- u32 msrs[KVM_MAX_NR_USER_RETURN_MSRS];190190-};191191-192187struct kvm_user_return_msrs {193188 struct user_return_notifier urn;194189 bool registered;···193198 } values[KVM_MAX_NR_USER_RETURN_MSRS];194199};195200196196-static struct kvm_user_return_msrs_global __read_mostly user_return_msrs_global;201201+u32 __read_mostly kvm_nr_uret_msrs;202202+EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs);203203+static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];197204static struct kvm_user_return_msrs __percpu *user_return_msrs;198205199206#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \···327330 user_return_notifier_unregister(urn);328331 }329332 local_irq_restore(flags);330330- for (slot = 0; slot < user_return_msrs_global.nr; ++slot) {333333+ for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {331334 values = &msrs->values[slot];332335 if (values->host != values->curr) {333333- wrmsrl(user_return_msrs_global.msrs[slot], values->host);336336+ wrmsrl(kvm_uret_msrs_list[slot], values->host);334337 values->curr = values->host;335338 }336339 }337340}338341339339-void kvm_define_user_return_msr(unsigned slot, u32 msr)342342+static int kvm_probe_user_return_msr(u32 msr)340343{341341- BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS);342342- user_return_msrs_global.msrs[slot] = msr;343343- if (slot >= user_return_msrs_global.nr)344344- user_return_msrs_global.nr = slot + 1;344344+ u64 val;345345+ int ret;346346+347347+ preempt_disable();348348+ ret = rdmsrl_safe(msr, &val);349349+ if (ret)350350+ goto out;351351+ ret = wrmsrl_safe(msr, val);352352+out:353353+ preempt_enable();354354+ return ret;345355}346346-EXPORT_SYMBOL_GPL(kvm_define_user_return_msr);356356+357357+int kvm_add_user_return_msr(u32 msr)358358+{359359+ BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS);360360+361361+ if (kvm_probe_user_return_msr(msr))362362+ return -1;363363+364364+ kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;365365+ return kvm_nr_uret_msrs++;366366+}367367+EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);368368+369369+int kvm_find_user_return_msr(u32 msr)370370+{371371+ int i;372372+373373+ for (i = 0; i < kvm_nr_uret_msrs; ++i) {374374+ if (kvm_uret_msrs_list[i] == msr)375375+ return i;376376+ }377377+ return -1;378378+}379379+EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);347380348381static void kvm_user_return_msr_cpu_online(void)349382{···382355 u64 value;383356 int i;384357385385- for (i = 0; i < user_return_msrs_global.nr; ++i) {386386- rdmsrl_safe(user_return_msrs_global.msrs[i], &value);358358+ for (i = 0; i < kvm_nr_uret_msrs; ++i) {359359+ rdmsrl_safe(kvm_uret_msrs_list[i], &value);387360 msrs->values[i].host = value;388361 msrs->values[i].curr = value;389362 }···398371 value = (value & mask) | (msrs->values[slot].host & ~mask);399372 if (value == msrs->values[slot].curr)400373 return 0;401401- err = wrmsrl_safe(user_return_msrs_global.msrs[slot], value);374374+ err = wrmsrl_safe(kvm_uret_msrs_list[slot], value);402375 if (err)403376 return 1;404377···1176114911771150 if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))11781151 fixed |= DR6_RTM;11521152+11531153+ if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))11541154+ fixed |= DR6_BUS_LOCK;11791155 return fixed;11801156}11811157···16451615 * invokes 64-bit SYSENTER.16461616 */16471617 data = get_canonical(data, vcpu_virt_addr_bits(vcpu));16181618+ break;16191619+ case MSR_TSC_AUX:16201620+ if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))16211621+ return 1;16221622+16231623+ if (!host_initiated &&16241624+ !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&16251625+ !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))16261626+ return 1;16271627+16281628+ /*16291629+ * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has16301630+ * incomplete and conflicting architectural behavior. Current16311631+ * AMD CPUs completely ignore bits 63:32, i.e. they aren't16321632+ * reserved and always read as zeros. Enforce Intel's reserved16331633+ * bits check if and only if the guest CPU is Intel, and clear16341634+ * the bits in all other cases. This ensures cross-vendor16351635+ * migration will provide consistent behavior for the guest.16361636+ */16371637+ if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)16381638+ return 1;16391639+16401640+ data = (u32)data;16411641+ break;16481642 }1649164316501644 msr.data = data;···1704165017051651 if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))17061652 return KVM_MSR_RET_FILTERED;16531653+16541654+ switch (index) {16551655+ case MSR_TSC_AUX:16561656+ if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))16571657+ return 1;16581658+16591659+ if (!host_initiated &&16601660+ !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&16611661+ !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))16621662+ return 1;16631663+ break;16641664+ }1707166517081666 msr.index = index;17091667 msr.host_initiated = host_initiated;···55345468static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,55355469 struct kvm_msr_filter_range *user_range)55365470{55375537- struct msr_bitmap_range range;55385471 unsigned long *bitmap = NULL;55395472 size_t bitmap_size;55405540- int r;5541547355425474 if (!user_range->nmsrs)55435475 return 0;54765476+54775477+ if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE))54785478+ return -EINVAL;54795479+54805480+ if (!user_range->flags)54815481+ return -EINVAL;5544548255455483 bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);55465484 if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)···55545484 if (IS_ERR(bitmap))55555485 return PTR_ERR(bitmap);5556548655575557- range = (struct msr_bitmap_range) {54875487+ msr_filter->ranges[msr_filter->count] = (struct msr_bitmap_range) {55585488 .flags = user_range->flags,55595489 .base = user_range->base,55605490 .nmsrs = user_range->nmsrs,55615491 .bitmap = bitmap,55625492 };5563549355645564- if (range.flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) {55655565- r = -EINVAL;55665566- goto err;55675567- }55685568-55695569- if (!range.flags) {55705570- r = -EINVAL;55715571- goto err;55725572- }55735573-55745574- /* Everything ok, add this range identifier. */55755575- msr_filter->ranges[msr_filter->count] = range;55765494 msr_filter->count++;55775577-55785495 return 0;55795579-err:55805580- kfree(bitmap);55815581- return r;55825496}5583549755845498static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)···59915937 continue;59925938 break;59935939 case MSR_TSC_AUX:59945994- if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))59405940+ if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&59415941+ !kvm_cpu_cap_has(X86_FEATURE_RDPID))59955942 continue;59965943 break;59975944 case MSR_IA32_UMWAIT_CONTROL:···80958040static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);8096804180978042/*80438043+ * Indirection to move queue_work() out of the tk_core.seq write held80448044+ * region to prevent possible deadlocks against time accessors which80458045+ * are invoked with work related locks held.80468046+ */80478047+static void pvclock_irq_work_fn(struct irq_work *w)80488048+{80498049+ queue_work(system_long_wq, &pvclock_gtod_work);80508050+}80518051+80528052+static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);80538053+80548054+/*80988055 * Notification about pvclock gtod data update.80998056 */81008057static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,···8117805081188051 update_pvclock_gtod(tk);8119805281208120- /* disable master clock if host does not trust, or does not81218121- * use, TSC based clocksource.80538053+ /*80548054+ * Disable master clock if host does not trust, or does not use,80558055+ * TSC based clocksource. Delegate queue_work() to irq_work as80568056+ * this is invoked with tk_core.seq write held.81228057 */81238058 if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&81248059 atomic_read(&kvm_guest_has_master_clock) != 0)81258125- queue_work(system_long_wq, &pvclock_gtod_work);81268126-80608060+ irq_work_queue(&pvclock_irq_work);81278061 return 0;81288062}81298063···81868118 printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");81878119 goto out_free_x86_emulator_cache;81888120 }81218121+ kvm_nr_uret_msrs = 0;8189812281908123 r = kvm_mmu_module_init();81918124 if (r)···82378168 cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);82388169#ifdef CONFIG_X86_6482398170 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);81718171+ irq_work_sync(&pvclock_irq_work);81728172+ cancel_work_sync(&pvclock_gtod_work);82408173#endif82418174 kvm_x86_ops.hardware_enable = NULL;82428175 kvm_mmu_module_exit();
···13401340 stripe = bbio->stripes;13411341 for (i = 0; i < bbio->num_stripes; i++, stripe++) {13421342 u64 bytes;13431343+ struct btrfs_device *device = stripe->dev;1343134413441344- if (!stripe->dev->bdev) {13451345+ if (!device->bdev) {13451346 ASSERT(btrfs_test_opt(fs_info, DEGRADED));13461347 continue;13471348 }13491349+13501350+ if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))13511351+ continue;1348135213491353 ret = do_discard_extent(stripe, &bytes);13501354 if (!ret) {
+25-10
fs/btrfs/file.c
···20672067 return ret;20682068}2069206920702070+static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)20712071+{20722072+ struct btrfs_inode *inode = BTRFS_I(ctx->inode);20732073+ struct btrfs_fs_info *fs_info = inode->root->fs_info;20742074+20752075+ if (btrfs_inode_in_log(inode, fs_info->generation) &&20762076+ list_empty(&ctx->ordered_extents))20772077+ return true;20782078+20792079+ /*20802080+ * If we are doing a fast fsync we can not bail out if the inode's20812081+ * last_trans is <= then the last committed transaction, because we only20822082+ * update the last_trans of the inode during ordered extent completion,20832083+ * and for a fast fsync we don't wait for that, we only wait for the20842084+ * writeback to complete.20852085+ */20862086+ if (inode->last_trans <= fs_info->last_trans_committed &&20872087+ (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) ||20882088+ list_empty(&ctx->ordered_extents)))20892089+ return true;20902090+20912091+ return false;20922092+}20932093+20702094/*20712095 * fsync call for both files and directories. This logs the inode into20722096 * the tree log instead of forcing full commits whenever possible.···2209218522102186 atomic_inc(&root->log_batch);2211218722122212- /*22132213- * If we are doing a fast fsync we can not bail out if the inode's22142214- * last_trans is <= then the last committed transaction, because we only22152215- * update the last_trans of the inode during ordered extent completion,22162216- * and for a fast fsync we don't wait for that, we only wait for the22172217- * writeback to complete.22182218- */22192188 smp_mb();22202220- if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||22212221- (BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed &&22222222- (full_sync || list_empty(&ctx.ordered_extents)))) {21892189+ if (skip_inode_logging(&ctx)) {22232190 /*22242191 * We've had everything committed since the last time we were22252192 * modified so clear this flag in case it was set for whatever
···259259 if (!fa->flags_valid) {260260 /* 1 item for the inode */261261 trans = btrfs_start_transaction(root, 1);262262+ if (IS_ERR(trans))263263+ return PTR_ERR(trans);262264 goto update_flags;263265 }264266···909907 */910908 btrfs_drew_read_lock(&root->snapshot_lock);911909912912- ret = btrfs_start_delalloc_snapshot(root);910910+ ret = btrfs_start_delalloc_snapshot(root, false);913911 if (ret)914912 goto out;915913
+1-1
fs/btrfs/ordered-data.c
···984984985985 if (pre)986986 ret = clone_ordered_extent(ordered, 0, pre);987987- if (post)987987+ if (ret == 0 && post)988988 ret = clone_ordered_extent(ordered, pre + ordered->disk_num_bytes,989989 post);990990
+10-6
fs/btrfs/qgroup.c
···35453545 struct btrfs_trans_handle *trans;35463546 int ret;3547354735483548- /* Can't hold an open transaction or we run the risk of deadlocking */35493549- ASSERT(current->journal_info == NULL ||35503550- current->journal_info == BTRFS_SEND_TRANS_STUB);35513551- if (WARN_ON(current->journal_info &&35523552- current->journal_info != BTRFS_SEND_TRANS_STUB))35483548+ /*35493549+ * Can't hold an open transaction or we run the risk of deadlocking,35503550+ * and can't either be under the context of a send operation (where35513551+ * current->journal_info is set to BTRFS_SEND_TRANS_STUB), as that35523552+ * would result in a crash when starting a transaction and does not35533553+ * make sense either (send is a read-only operation).35543554+ */35553555+ ASSERT(current->journal_info == NULL);35563556+ if (WARN_ON(current->journal_info))35533557 return 0;3554355835553559 /*···35663562 return 0;35673563 }3568356435693569- ret = btrfs_start_delalloc_snapshot(root);35653565+ ret = btrfs_start_delalloc_snapshot(root, true);35703566 if (ret < 0)35713567 goto out;35723568 btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
+2-2
fs/btrfs/send.c
···71707170 int i;7171717171727172 if (root) {71737173- ret = btrfs_start_delalloc_snapshot(root);71737173+ ret = btrfs_start_delalloc_snapshot(root, false);71747174 if (ret)71757175 return ret;71767176 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);···7178717871797179 for (i = 0; i < sctx->clone_roots_cnt; i++) {71807180 root = sctx->clone_roots[i].root;71817181- ret = btrfs_start_delalloc_snapshot(root);71817181+ ret = btrfs_start_delalloc_snapshot(root, false);71827182 if (ret)71837183 return ret;71847184 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
+2-1
fs/btrfs/tree-log.c
···60616061 * (since logging them is pointless, a link count of 0 means they60626062 * will never be accessible).60636063 */60646064- if (btrfs_inode_in_log(inode, trans->transid) ||60646064+ if ((btrfs_inode_in_log(inode, trans->transid) &&60656065+ list_empty(&ctx->ordered_extents)) ||60656066 inode->vfs_inode.i_nlink == 0) {60666067 ret = BTRFS_NO_LOG_SYNC;60676068 goto end_no_trans;
···863863__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)864864#define __NR_mount_setattr 442865865__SYSCALL(__NR_mount_setattr, sys_mount_setattr)866866+#define __NR_quotactl_path 443867867+__SYSCALL(__NR_quotactl_path, sys_quotactl_path)868868+869869+#define __NR_landlock_create_ruleset 444870870+__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)871871+#define __NR_landlock_add_rule 445872872+__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)873873+#define __NR_landlock_restrict_self 446874874+__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)866875867876#undef __NR_syscalls868868-#define __NR_syscalls 443877877+#define __NR_syscalls 447869878870879/*871880 * 32 bit systems traditionally used different
+121-4
tools/include/uapi/drm/drm.h
···625625 __u64 size;626626};627627628628+/**629629+ * DRM_CAP_DUMB_BUFFER630630+ *631631+ * If set to 1, the driver supports creating dumb buffers via the632632+ * &DRM_IOCTL_MODE_CREATE_DUMB ioctl.633633+ */628634#define DRM_CAP_DUMB_BUFFER 0x1635635+/**636636+ * DRM_CAP_VBLANK_HIGH_CRTC637637+ *638638+ * If set to 1, the kernel supports specifying a CRTC index in the high bits of639639+ * &drm_wait_vblank_request.type.640640+ *641641+ * Starting kernel version 2.6.39, this capability is always set to 1.642642+ */629643#define DRM_CAP_VBLANK_HIGH_CRTC 0x2644644+/**645645+ * DRM_CAP_DUMB_PREFERRED_DEPTH646646+ *647647+ * The preferred bit depth for dumb buffers.648648+ *649649+ * The bit depth is the number of bits used to indicate the color of a single650650+ * pixel excluding any padding. This is different from the number of bits per651651+ * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per652652+ * pixel.653653+ *654654+ * Note that this preference only applies to dumb buffers, it's irrelevant for655655+ * other types of buffers.656656+ */630657#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3658658+/**659659+ * DRM_CAP_DUMB_PREFER_SHADOW660660+ *661661+ * If set to 1, the driver prefers userspace to render to a shadow buffer662662+ * instead of directly rendering to a dumb buffer. For best speed, userspace663663+ * should do streaming ordered memory copies into the dumb buffer and never664664+ * read from it.665665+ *666666+ * Note that this preference only applies to dumb buffers, it's irrelevant for667667+ * other types of buffers.668668+ */631669#define DRM_CAP_DUMB_PREFER_SHADOW 0x4670670+/**671671+ * DRM_CAP_PRIME672672+ *673673+ * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT674674+ * and &DRM_PRIME_CAP_EXPORT.675675+ *676676+ * PRIME buffers are exposed as dma-buf file descriptors. See677677+ * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing".678678+ */632679#define DRM_CAP_PRIME 0x5680680+/**681681+ * DRM_PRIME_CAP_IMPORT682682+ *683683+ * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME684684+ * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl.685685+ */633686#define DRM_PRIME_CAP_IMPORT 0x1687687+/**688688+ * DRM_PRIME_CAP_EXPORT689689+ *690690+ * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME691691+ * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl.692692+ */634693#define DRM_PRIME_CAP_EXPORT 0x2694694+/**695695+ * DRM_CAP_TIMESTAMP_MONOTONIC696696+ *697697+ * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in698698+ * struct drm_event_vblank. If set to 1, the kernel will report timestamps with699699+ * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these700700+ * clocks.701701+ *702702+ * Starting from kernel version 2.6.39, the default value for this capability703703+ * is 1. Starting kernel version 4.15, this capability is always set to 1.704704+ */635705#define DRM_CAP_TIMESTAMP_MONOTONIC 0x6706706+/**707707+ * DRM_CAP_ASYNC_PAGE_FLIP708708+ *709709+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC.710710+ */636711#define DRM_CAP_ASYNC_PAGE_FLIP 0x7637637-/*638638- * The CURSOR_WIDTH and CURSOR_HEIGHT capabilities return a valid widthxheight639639- * combination for the hardware cursor. The intention is that a hardware640640- * agnostic userspace can query a cursor plane size to use.712712+/**713713+ * DRM_CAP_CURSOR_WIDTH714714+ *715715+ * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid716716+ * width x height combination for the hardware cursor. The intention is that a717717+ * hardware agnostic userspace can query a cursor plane size to use.641718 *642719 * Note that the cross-driver contract is to merely return a valid size;643720 * drivers are free to attach another meaning on top, eg. i915 returns the644721 * maximum plane size.645722 */646723#define DRM_CAP_CURSOR_WIDTH 0x8724724+/**725725+ * DRM_CAP_CURSOR_HEIGHT726726+ *727727+ * See &DRM_CAP_CURSOR_WIDTH.728728+ */647729#define DRM_CAP_CURSOR_HEIGHT 0x9730730+/**731731+ * DRM_CAP_ADDFB2_MODIFIERS732732+ *733733+ * If set to 1, the driver supports supplying modifiers in the734734+ * &DRM_IOCTL_MODE_ADDFB2 ioctl.735735+ */648736#define DRM_CAP_ADDFB2_MODIFIERS 0x10737737+/**738738+ * DRM_CAP_PAGE_FLIP_TARGET739739+ *740740+ * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and741741+ * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in742742+ * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP743743+ * ioctl.744744+ */649745#define DRM_CAP_PAGE_FLIP_TARGET 0x11746746+/**747747+ * DRM_CAP_CRTC_IN_VBLANK_EVENT748748+ *749749+ * If set to 1, the kernel supports reporting the CRTC ID in750750+ * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and751751+ * &DRM_EVENT_FLIP_COMPLETE events.752752+ *753753+ * Starting kernel version 4.12, this capability is always set to 1.754754+ */650755#define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12756756+/**757757+ * DRM_CAP_SYNCOBJ758758+ *759759+ * If set to 1, the driver supports sync objects. See760760+ * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".761761+ */651762#define DRM_CAP_SYNCOBJ 0x13763763+/**764764+ * DRM_CAP_SYNCOBJ_TIMELINE765765+ *766766+ * If set to 1, the driver supports timeline operations on sync objects. See767767+ * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".768768+ */652769#define DRM_CAP_SYNCOBJ_TIMELINE 0x14653770654771/* DRM_IOCTL_GET_CAP ioctl argument type */
+1
tools/include/uapi/drm/i915_drm.h
···943943 __u64 offset;944944};945945946946+/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */946947struct drm_i915_gem_execbuffer {947948 /**948949 * List of buffers to be validated with their relocations to be
···127127 PERF_COUNT_SW_EMULATION_FAULTS = 8,128128 PERF_COUNT_SW_DUMMY = 9,129129 PERF_COUNT_SW_BPF_OUTPUT = 10,130130+ PERF_COUNT_SW_CGROUP_SWITCHES = 11,130131131132 PERF_COUNT_SW_MAX, /* non-ABI */132133};···327326#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */328327#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */329328#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */329329+#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */330330331331/*332332 * Hardware event_id to monitor via a performance monitoring event:···406404 cgroup : 1, /* include cgroup events */407405 text_poke : 1, /* include text poke events */408406 build_id : 1, /* use build id in mmap2 events */409409- __reserved_1 : 29;407407+ inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */408408+ remove_on_exec : 1, /* event is removed from task on exec */409409+ sigtrap : 1, /* send synchronous SIGTRAP on event */410410+ __reserved_1 : 26;410411411412 union {412413 __u32 wakeup_events; /* wakeup every n events */···461456 __u16 __reserved_2;462457 __u32 aux_sample_size;463458 __u32 __reserved_3;459459+460460+ /*461461+ * User provided data if sigtrap=1, passed back to user via462462+ * siginfo_t::si_perf, e.g. to permit user to identify the event.463463+ */464464+ __u64 sig_data;464465};465466466467/*···11821171/**11831172 * PERF_RECORD_AUX::flags bits11841173 */11851185-#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */11861186-#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */11871187-#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */11881188-#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */11741174+#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */11751175+#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */11761176+#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */11771177+#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */11781178+#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */11791179+11801180+/* CoreSight PMU AUX buffer formats */11811181+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */11821182+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */1189118311901184#define PERF_FLAG_FD_NO_GROUP (1UL << 0)11911185#define PERF_FLAG_FD_OUTPUT (1UL << 1)
···111111--tracepoints::112112 retrieve statistics from tracepoints113113114114-*z*::114114+-z::115115--skip-zero-records::116116 omit records with all zeros in logging mode117117
···522522440 common process_madvise sys_process_madvise523523441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2524524442 common mount_setattr sys_mount_setattr525525+443 common quotactl_path sys_quotactl_path526526+444 common landlock_create_ruleset sys_landlock_create_ruleset527527+445 common landlock_add_rule sys_landlock_add_rule528528+446 common landlock_restrict_self sys_landlock_restrict_self
+4
tools/perf/arch/s390/entry/syscalls/syscall.tbl
···445445440 common process_madvise sys_process_madvise sys_process_madvise446446441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2447447442 common mount_setattr sys_mount_setattr sys_mount_setattr448448+443 common quotactl_path sys_quotactl_path sys_quotactl_path449449+444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset450450+445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule451451+446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self
+4
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
···364364440 common process_madvise sys_process_madvise365365441 common epoll_pwait2 sys_epoll_pwait2366366442 common mount_setattr sys_mount_setattr367367+443 common quotactl_path sys_quotactl_path368368+444 common landlock_create_ruleset sys_landlock_create_ruleset369369+445 common landlock_add_rule sys_landlock_add_rule370370+446 common landlock_restrict_self sys_landlock_restrict_self367371368372#369373# Due to a historical design error, certain syscalls are numbered differently
···157157static int record_opts__config_freq(struct record_opts *opts)158158{159159 bool user_freq = opts->user_freq != UINT_MAX;160160+ bool user_interval = opts->user_interval != ULLONG_MAX;160161 unsigned int max_rate;161162162162- if (opts->user_interval != ULLONG_MAX)163163+ if (user_interval && user_freq) {164164+ pr_err("cannot set frequency and period at the same time\n");165165+ return -1;166166+ }167167+168168+ if (user_interval)163169 opts->default_interval = opts->user_interval;164170 if (user_freq)165171 opts->freq = opts->user_freq;