Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
"RISC-V:
- Fix missing PAGE_PFN_MASK

- Fix SRCU deadlock caused by kvm_riscv_check_vcpu_requests()

x86:
- Fix for nested virtualization when TSC scaling is active

- Estimate the size of fastcc subroutines conservatively, avoiding
disastrous underestimation when return thunks are enabled

- Avoid possible use of uninitialized fields of 'struct
kvm_lapic_irq'

Generic:
- Mark as such the boolean values available from the statistics file
descriptors

- Clarify statistics documentation"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: emulate: do not adjust size of fastop and setcc subroutines
KVM: x86: Fully initialize 'struct kvm_lapic_irq' in kvm_pv_kick_cpu_op()
Documentation: kvm: clarify histogram units
kvm: stats: tell userspace which values are boolean
x86/kvm: fix FASTOP_SIZE when return thunks are enabled
KVM: nVMX: Always enable TSC scaling for L2 when it was enabled for L1
RISC-V: KVM: Fix SRCU deadlock caused by kvm_riscv_check_vcpu_requests()
riscv: Fix missing PAGE_PFN_MASK

+54 -33
+13 -4
Documentation/virt/kvm/api.rst
··· 5657 5657 #define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) 5658 5658 #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) 5659 5659 #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) 5660 + #define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) 5660 5661 #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES 5661 5662 5662 5663 #define KVM_STATS_BASE_SHIFT 8 ··· 5703 5702 by the ``hist_param`` field. The range of the Nth bucket (1 <= N < ``size``) 5704 5703 is [``hist_param``*(N-1), ``hist_param``*N), while the range of the last 5705 5704 bucket is [``hist_param``*(``size``-1), +INF). (+INF means positive infinity 5706 - value.) The bucket value indicates how many samples fell in the bucket's range. 5705 + value.) 5707 5706 * ``KVM_STATS_TYPE_LOG_HIST`` 5708 5707 The statistic is reported as a logarithmic histogram. The number of 5709 5708 buckets is specified by the ``size`` field. The range of the first bucket is 5710 5709 [0, 1), while the range of the last bucket is [pow(2, ``size``-2), +INF). 5711 5710 Otherwise, The Nth bucket (1 < N < ``size``) covers 5712 - [pow(2, N-2), pow(2, N-1)). The bucket value indicates how many samples fell 5713 - in the bucket's range. 5711 + [pow(2, N-2), pow(2, N-1)). 5714 5712 5715 5713 Bits 4-7 of ``flags`` encode the unit: 5716 5714 ··· 5724 5724 It indicates that the statistics data is used to measure time or latency. 5725 5725 * ``KVM_STATS_UNIT_CYCLES`` 5726 5726 It indicates that the statistics data is used to measure CPU clock cycles. 5727 + * ``KVM_STATS_UNIT_BOOLEAN`` 5728 + It indicates that the statistic will always be either 0 or 1. Boolean 5729 + statistics of "peak" type will never go back from 1 to 0. Boolean 5730 + statistics can be linear histograms (with two buckets) but not logarithmic 5731 + histograms. 5732 + 5733 + Note that, in the case of histograms, the unit applies to the bucket 5734 + ranges, while the bucket value indicates how many samples fell in the 5735 + bucket's range. 5727 5736 5728 5737 Bits 8-11 of ``flags``, together with ``exponent``, encode the scale of the 5729 5738 unit: ··· 5755 5746 5756 5747 The ``bucket_size`` field is used as a parameter for histogram statistics data. 5757 5748 It is only used by linear histogram statistics data, specifying the size of a 5758 - bucket. 5749 + bucket in the unit expressed by bits 4-11 of ``flags`` together with ``exponent``. 5759 5750 5760 5751 The ``name`` field is the name string of the statistics data. The name string 5761 5752 starts at the end of ``struct kvm_stats_desc``. The maximum length including
+6 -6
arch/riscv/include/asm/pgtable-64.h
··· 175 175 176 176 static inline unsigned long _pud_pfn(pud_t pud) 177 177 { 178 - return pud_val(pud) >> _PAGE_PFN_SHIFT; 178 + return __page_val_to_pfn(pud_val(pud)); 179 179 } 180 180 181 181 static inline pmd_t *pud_pgtable(pud_t pud) ··· 278 278 279 279 static inline unsigned long _p4d_pfn(p4d_t p4d) 280 280 { 281 - return p4d_val(p4d) >> _PAGE_PFN_SHIFT; 281 + return __page_val_to_pfn(p4d_val(p4d)); 282 282 } 283 283 284 284 static inline pud_t *p4d_pgtable(p4d_t p4d) 285 285 { 286 286 if (pgtable_l4_enabled) 287 - return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT); 287 + return (pud_t *)pfn_to_virt(__page_val_to_pfn(p4d_val(p4d))); 288 288 289 289 return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) }); 290 290 } ··· 292 292 293 293 static inline struct page *p4d_page(p4d_t p4d) 294 294 { 295 - return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT); 295 + return pfn_to_page(__page_val_to_pfn(p4d_val(p4d))); 296 296 } 297 297 298 298 #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) ··· 347 347 static inline p4d_t *pgd_pgtable(pgd_t pgd) 348 348 { 349 349 if (pgtable_l5_enabled) 350 - return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT); 350 + return (p4d_t *)pfn_to_virt(__page_val_to_pfn(pgd_val(pgd))); 351 351 352 352 return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) }); 353 353 } ··· 355 355 356 356 static inline struct page *pgd_page(pgd_t pgd) 357 357 { 358 - return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT); 358 + return pfn_to_page(__page_val_to_pfn(pgd_val(pgd))); 359 359 } 360 360 #define pgd_page(pgd) pgd_page(pgd) 361 361
+3 -3
arch/riscv/include/asm/pgtable.h
··· 261 261 262 262 static inline unsigned long _pgd_pfn(pgd_t pgd) 263 263 { 264 - return pgd_val(pgd) >> _PAGE_PFN_SHIFT; 264 + return __page_val_to_pfn(pgd_val(pgd)); 265 265 } 266 266 267 267 static inline struct page *pmd_page(pmd_t pmd) ··· 590 590 return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE)); 591 591 } 592 592 593 - #define __pmd_to_phys(pmd) (pmd_val(pmd) >> _PAGE_PFN_SHIFT << PAGE_SHIFT) 593 + #define __pmd_to_phys(pmd) (__page_val_to_pfn(pmd_val(pmd)) << PAGE_SHIFT) 594 594 595 595 static inline unsigned long pmd_pfn(pmd_t pmd) 596 596 { 597 597 return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT); 598 598 } 599 599 600 - #define __pud_to_phys(pud) (pud_val(pud) >> _PAGE_PFN_SHIFT << PAGE_SHIFT) 600 + #define __pud_to_phys(pud) (__page_val_to_pfn(pud_val(pud)) << PAGE_SHIFT) 601 601 602 602 static inline unsigned long pud_pfn(pud_t pud) 603 603 {
+1 -1
arch/riscv/kvm/mmu.c
··· 54 54 55 55 static inline unsigned long gstage_pte_page_vaddr(pte_t pte) 56 56 { 57 - return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT); 57 + return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte))); 58 58 } 59 59 60 60 static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
+2
arch/riscv/kvm/vcpu.c
··· 781 781 782 782 if (kvm_request_pending(vcpu)) { 783 783 if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { 784 + kvm_vcpu_srcu_read_unlock(vcpu); 784 785 rcuwait_wait_event(wait, 785 786 (!vcpu->arch.power_off) && (!vcpu->arch.pause), 786 787 TASK_INTERRUPTIBLE); 788 + kvm_vcpu_srcu_read_lock(vcpu); 787 789 788 790 if (vcpu->arch.power_off || vcpu->arch.pause) { 789 791 /*
+7 -8
arch/x86/kvm/emulate.c
··· 189 189 #define X8(x...) X4(x), X4(x) 190 190 #define X16(x...) X8(x), X8(x) 191 191 192 - #define NR_FASTOP (ilog2(sizeof(ulong)) + 1) 193 - #define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT)) 194 - 195 192 struct opcode { 196 193 u64 flags; 197 194 u8 intercept; ··· 303 306 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for 304 307 * different operand sizes can be reached by calculation, rather than a jump 305 308 * table (which would be bigger than the code). 309 + * 310 + * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR 311 + * and 1 for the straight line speculation INT3, leaves 7 bytes for the 312 + * body of the function. Currently none is larger than 4. 306 313 */ 307 314 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); 315 + 316 + #define FASTOP_SIZE 16 308 317 309 318 #define __FOP_FUNC(name) \ 310 319 ".align " __stringify(FASTOP_SIZE) " \n\t" \ ··· 445 442 * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] 446 443 * INT3 [1 byte; CONFIG_SLS] 447 444 */ 448 - #define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ 449 - IS_ENABLED(CONFIG_SLS)) 450 - #define SETCC_LENGTH (ENDBR_INSN_SIZE + 3 + RET_LENGTH) 451 - #define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) 452 - static_assert(SETCC_LENGTH <= SETCC_ALIGN); 445 + #define SETCC_ALIGN 16 453 446 454 447 #define FOP_SETCC(op) \ 455 448 ".align " __stringify(SETCC_ALIGN) " \n\t" \
-1
arch/x86/kvm/vmx/nested.c
··· 2278 2278 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 2279 2279 SECONDARY_EXEC_APIC_REGISTER_VIRT | 2280 2280 SECONDARY_EXEC_ENABLE_VMFUNC | 2281 - SECONDARY_EXEC_TSC_SCALING | 2282 2281 SECONDARY_EXEC_DESC); 2283 2282 2284 2283 if (nested_cpu_has(vmcs12,
+11 -9
arch/x86/kvm/x86.c
··· 298 298 STATS_DESC_COUNTER(VCPU, directed_yield_successful), 299 299 STATS_DESC_COUNTER(VCPU, preemption_reported), 300 300 STATS_DESC_COUNTER(VCPU, preemption_other), 301 - STATS_DESC_ICOUNTER(VCPU, guest_mode) 301 + STATS_DESC_IBOOLEAN(VCPU, guest_mode) 302 302 }; 303 303 304 304 const struct kvm_stats_header kvm_vcpu_stats_header = { ··· 9143 9143 */ 9144 9144 static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid) 9145 9145 { 9146 - struct kvm_lapic_irq lapic_irq; 9146 + /* 9147 + * All other fields are unused for APIC_DM_REMRD, but may be consumed by 9148 + * common code, e.g. for tracing. Defer initialization to the compiler. 9149 + */ 9150 + struct kvm_lapic_irq lapic_irq = { 9151 + .delivery_mode = APIC_DM_REMRD, 9152 + .dest_mode = APIC_DEST_PHYSICAL, 9153 + .shorthand = APIC_DEST_NOSHORT, 9154 + .dest_id = apicid, 9155 + }; 9147 9156 9148 - lapic_irq.shorthand = APIC_DEST_NOSHORT; 9149 - lapic_irq.dest_mode = APIC_DEST_PHYSICAL; 9150 - lapic_irq.level = 0; 9151 - lapic_irq.dest_id = apicid; 9152 - lapic_irq.msi_redir_hint = false; 9153 - 9154 - lapic_irq.delivery_mode = APIC_DM_REMRD; 9155 9157 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); 9156 9158 } 9157 9159
+10 -1
include/linux/kvm_host.h
··· 1822 1822 STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_NONE, \ 1823 1823 KVM_STATS_BASE_POW10, 0) 1824 1824 1825 + /* Instantaneous boolean value, read only */ 1826 + #define STATS_DESC_IBOOLEAN(SCOPE, name) \ 1827 + STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \ 1828 + KVM_STATS_BASE_POW10, 0) 1829 + /* Peak (sticky) boolean value, read/write */ 1830 + #define STATS_DESC_PBOOLEAN(SCOPE, name) \ 1831 + STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \ 1832 + KVM_STATS_BASE_POW10, 0) 1833 + 1825 1834 /* Cumulative time in nanosecond */ 1826 1835 #define STATS_DESC_TIME_NSEC(SCOPE, name) \ 1827 1836 STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS, \ ··· 1862 1853 HALT_POLL_HIST_COUNT), \ 1863 1854 STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \ 1864 1855 HALT_POLL_HIST_COUNT), \ 1865 - STATS_DESC_ICOUNTER(VCPU_GENERIC, blocking) 1856 + STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking) 1866 1857 1867 1858 extern struct dentry *kvm_debugfs_dir; 1868 1859
+1
include/uapi/linux/kvm.h
··· 2083 2083 #define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) 2084 2084 #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) 2085 2085 #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) 2086 + #define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) 2086 2087 #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES 2087 2088 2088 2089 #define KVM_STATS_BASE_SHIFT 8