x86/hyper-v: Use hypercall for remote TLB flush

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Hyper-V host can suggest us to use hypercall for doing remote TLB flush,
this is supposed to work faster than IPIs.

Implementation details: to do HvFlushVirtualAddress{Space,List} hypercalls
we need to put the input somewhere in memory and we don't really want to
have memory allocation on each call so we pre-allocate per cpu memory areas
on boot.

pv_ops patching is happening very early so we need to separate
hyperv_setup_mmu_ops() and hyper_alloc_mmu().

It is possible and easy to implement local TLB flushing too and there is
even a hint for that. However, I don't see a room for optimization on the
host side as both hypercall and native tlb flush will result in vmexit. The
hint is also not set on modern Hyper-V versions.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jork Loeser <Jork.Loeser@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Simon Xiao <sixiao@microsoft.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: devel@linuxdriverproject.org
Link: http://lkml.kernel.org/r/20170802160921.21791-8-vkuznets@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Vitaly Kuznetsov and committed by

Ingo Molnar 8 years ago 2ffd9e33 7415aea6

+153 -1

7 changed files

expand all

arch

x86

hyperv

Makefile

hv_init.c

mmu.c

include

asm

mshyperv.h

uapi

asm

hyperv.h

kernel

cpu

mshyperv.c

drivers

Kconfig

+1 -1

arch/x86/hyperv/Makefile

··· 1 - obj-y := hv_init.o 1 + obj-y := hv_init.o mmu.o

arch/x86/hyperv/hv_init.c

··· 140 140 hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); 141 141 wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 142 142 143 + hyper_alloc_mmu(); 144 + 143 145 /* 144 146 * Register Hyper-V specific clocksource. 145 147 */

+138

arch/x86/hyperv/mmu.c

··· 1 + #define pr_fmt(fmt) "Hyper-V: " fmt 2 + 3 + #include <linux/hyperv.h> 4 + #include <linux/log2.h> 5 + #include <linux/slab.h> 6 + #include <linux/types.h> 7 + 8 + #include <asm/fpu/api.h> 9 + #include <asm/mshyperv.h> 10 + #include <asm/msr.h> 11 + #include <asm/tlbflush.h> 12 + 13 + /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ 14 + struct hv_flush_pcpu { 15 + u64 address_space; 16 + u64 flags; 17 + u64 processor_mask; 18 + u64 gva_list[]; 19 + }; 20 + 21 + /* Each gva in gva_list encodes up to 4096 pages to flush */ 22 + #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) 23 + 24 + static struct hv_flush_pcpu __percpu *pcpu_flush; 25 + 26 + /* 27 + * Fills in gva_list starting from offset. Returns the number of items added. 28 + */ 29 + static inline int fill_gva_list(u64 gva_list[], int offset, 30 + unsigned long start, unsigned long end) 31 + { 32 + int gva_n = offset; 33 + unsigned long cur = start, diff; 34 + 35 + do { 36 + diff = end > cur ? end - cur : 0; 37 + 38 + gva_list[gva_n] = cur & PAGE_MASK; 39 + /* 40 + * Lower 12 bits encode the number of additional 41 + * pages to flush (in addition to the 'cur' page). 42 + */ 43 + if (diff >= HV_TLB_FLUSH_UNIT) 44 + gva_list[gva_n] |= ~PAGE_MASK; 45 + else if (diff) 46 + gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT; 47 + 48 + cur += HV_TLB_FLUSH_UNIT; 49 + gva_n++; 50 + 51 + } while (cur < end); 52 + 53 + return gva_n - offset; 54 + } 55 + 56 + static void hyperv_flush_tlb_others(const struct cpumask *cpus, 57 + const struct flush_tlb_info *info) 58 + { 59 + int cpu, vcpu, gva_n, max_gvas; 60 + struct hv_flush_pcpu *flush; 61 + u64 status = U64_MAX; 62 + unsigned long flags; 63 + 64 + if (!pcpu_flush || !hv_hypercall_pg) 65 + goto do_native; 66 + 67 + if (cpumask_empty(cpus)) 68 + return; 69 + 70 + local_irq_save(flags); 71 + 72 + flush = this_cpu_ptr(pcpu_flush); 73 + 74 + if (info->mm) { 75 + flush->address_space = virt_to_phys(info->mm->pgd); 76 + flush->flags = 0; 77 + } else { 78 + flush->address_space = 0; 79 + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; 80 + } 81 + 82 + flush->processor_mask = 0; 83 + if (cpumask_equal(cpus, cpu_present_mask)) { 84 + flush->flags |= HV_FLUSH_ALL_PROCESSORS; 85 + } else { 86 + for_each_cpu(cpu, cpus) { 87 + vcpu = hv_cpu_number_to_vp_number(cpu); 88 + if (vcpu >= 64) 89 + goto do_native; 90 + 91 + __set_bit(vcpu, (unsigned long *) 92 + &flush->processor_mask); 93 + } 94 + } 95 + 96 + /* 97 + * We can flush not more than max_gvas with one hypercall. Flush the 98 + * whole address space if we were asked to do more. 99 + */ 100 + max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); 101 + 102 + if (info->end == TLB_FLUSH_ALL) { 103 + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; 104 + status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, 105 + flush, NULL); 106 + } else if (info->end && 107 + ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { 108 + status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, 109 + flush, NULL); 110 + } else { 111 + gva_n = fill_gva_list(flush->gva_list, 0, 112 + info->start, info->end); 113 + status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, 114 + gva_n, 0, flush, NULL); 115 + } 116 + 117 + local_irq_restore(flags); 118 + 119 + if (!(status & HV_HYPERCALL_RESULT_MASK)) 120 + return; 121 + do_native: 122 + native_flush_tlb_others(cpus, info); 123 + } 124 + 125 + void hyperv_setup_mmu_ops(void) 126 + { 127 + if (ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED) { 128 + pr_info("Using hypercall for remote TLB flush\n"); 129 + pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others; 130 + setup_clear_cpu_cap(X86_FEATURE_PCID); 131 + } 132 + } 133 + 134 + void hyper_alloc_mmu(void) 135 + { 136 + if (ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED) 137 + pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE); 138 + }

arch/x86/include/asm/mshyperv.h

··· 307 307 } 308 308 309 309 void hyperv_init(void); 310 + void hyperv_setup_mmu_ops(void); 311 + void hyper_alloc_mmu(void); 310 312 void hyperv_report_panic(struct pt_regs *regs); 311 313 bool hv_is_hypercall_page_setup(void); 312 314 void hyperv_cleanup(void); ··· 316 314 static inline void hyperv_init(void) {} 317 315 static inline bool hv_is_hypercall_page_setup(void) { return false; } 318 316 static inline void hyperv_cleanup(void) {} 317 + static inline void hyperv_setup_mmu_ops(void) {} 319 318 #endif /* CONFIG_HYPERV */ 320 319 321 320 #ifdef CONFIG_HYPERV_TSCPAGE

arch/x86/include/uapi/asm/hyperv.h

··· 242 242 (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) 243 243 244 244 /* Declare the various hypercall operations. */ 245 + #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 246 + #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 245 247 #define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 246 248 #define HVCALL_POST_MESSAGE 0x005c 247 249 #define HVCALL_SIGNAL_EVENT 0x005d ··· 260 258 #define HV_PROCESSOR_POWER_STATE_C1 1 261 259 #define HV_PROCESSOR_POWER_STATE_C2 2 262 260 #define HV_PROCESSOR_POWER_STATE_C3 3 261 + 262 + #define HV_FLUSH_ALL_PROCESSORS BIT(0) 263 + #define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) 264 + #define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) 265 + #define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) 263 266 264 267 /* hypercall status code */ 265 268 #define HV_STATUS_SUCCESS 0

arch/x86/kernel/cpu/mshyperv.c

··· 249 249 * Setup the hook to get control post apic initialization. 250 250 */ 251 251 x86_platform.apic_post_init = hyperv_init; 252 + hyperv_setup_mmu_ops(); 252 253 #endif 253 254 } 254 255

drivers/hv/Kconfig

··· 3 3 config HYPERV 4 4 tristate "Microsoft Hyper-V client drivers" 5 5 depends on X86 && ACPI && PCI && X86_LOCAL_APIC && HYPERVISOR_GUEST 6 + select PARAVIRT 6 7 help 7 8 Select this option to run Linux as a Hyper-V client operating 8 9 system.