include/linux/kvm_host.h at v5.1 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / include / linux / kvm_host.h
at v5.1 39 kB view raw
   1#ifndef __KVM_HOST_H
   2#define __KVM_HOST_H
   3
   4/*
   5 * This work is licensed under the terms of the GNU GPL, version 2.  See
   6 * the COPYING file in the top-level directory.
   7 */
   8
   9#include <linux/types.h>
  10#include <linux/hardirq.h>
  11#include <linux/list.h>
  12#include <linux/mutex.h>
  13#include <linux/spinlock.h>
  14#include <linux/signal.h>
  15#include <linux/sched.h>
  16#include <linux/bug.h>
  17#include <linux/mm.h>
  18#include <linux/mmu_notifier.h>
  19#include <linux/preempt.h>
  20#include <linux/msi.h>
  21#include <linux/slab.h>
  22#include <linux/vmalloc.h>
  23#include <linux/rcupdate.h>
  24#include <linux/ratelimit.h>
  25#include <linux/err.h>
  26#include <linux/irqflags.h>
  27#include <linux/context_tracking.h>
  28#include <linux/irqbypass.h>
  29#include <linux/swait.h>
  30#include <linux/refcount.h>
  31#include <linux/nospec.h>
  32#include <asm/signal.h>
  33
  34#include <linux/kvm.h>
  35#include <linux/kvm_para.h>
  36
  37#include <linux/kvm_types.h>
  38
  39#include <asm/kvm_host.h>
  40
  41#ifndef KVM_MAX_VCPU_ID
  42#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
  43#endif
  44
  45/*
  46 * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
  47 * in kvm, other bits are visible for userspace which are defined in
  48 * include/linux/kvm_h.
  49 */
  50#define KVM_MEMSLOT_INVALID	(1UL << 16)
  51
  52/*
  53 * Bit 63 of the memslot generation number is an "update in-progress flag",
  54 * e.g. is temporarily set for the duration of install_new_memslots().
  55 * This flag effectively creates a unique generation number that is used to
  56 * mark cached memslot data, e.g. MMIO accesses, as potentially being stale,
  57 * i.e. may (or may not) have come from the previous memslots generation.
  58 *
  59 * This is necessary because the actual memslots update is not atomic with
  60 * respect to the generation number update.  Updating the generation number
  61 * first would allow a vCPU to cache a spte from the old memslots using the
  62 * new generation number, and updating the generation number after switching
  63 * to the new memslots would allow cache hits using the old generation number
  64 * to reference the defunct memslots.
  65 *
  66 * This mechanism is used to prevent getting hits in KVM's caches while a
  67 * memslot update is in-progress, and to prevent cache hits *after* updating
  68 * the actual generation number against accesses that were inserted into the
  69 * cache *before* the memslots were updated.
  70 */
  71#define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS	BIT_ULL(63)
  72
  73/* Two fragments for cross MMIO pages. */
  74#define KVM_MAX_MMIO_FRAGMENTS	2
  75
  76#ifndef KVM_ADDRESS_SPACE_NUM
  77#define KVM_ADDRESS_SPACE_NUM	1
  78#endif
  79
  80/*
  81 * For the normal pfn, the highest 12 bits should be zero,
  82 * so we can mask bit 62 ~ bit 52  to indicate the error pfn,
  83 * mask bit 63 to indicate the noslot pfn.
  84 */
  85#define KVM_PFN_ERR_MASK	(0x7ffULL << 52)
  86#define KVM_PFN_ERR_NOSLOT_MASK	(0xfffULL << 52)
  87#define KVM_PFN_NOSLOT		(0x1ULL << 63)
  88
  89#define KVM_PFN_ERR_FAULT	(KVM_PFN_ERR_MASK)
  90#define KVM_PFN_ERR_HWPOISON	(KVM_PFN_ERR_MASK + 1)
  91#define KVM_PFN_ERR_RO_FAULT	(KVM_PFN_ERR_MASK + 2)
  92
  93/*
  94 * error pfns indicate that the gfn is in slot but faild to
  95 * translate it to pfn on host.
  96 */
  97static inline bool is_error_pfn(kvm_pfn_t pfn)
  98{
  99	return !!(pfn & KVM_PFN_ERR_MASK);
 100}
 101
 102/*
 103 * error_noslot pfns indicate that the gfn can not be
 104 * translated to pfn - it is not in slot or failed to
 105 * translate it to pfn.
 106 */
 107static inline bool is_error_noslot_pfn(kvm_pfn_t pfn)
 108{
 109	return !!(pfn & KVM_PFN_ERR_NOSLOT_MASK);
 110}
 111
 112/* noslot pfn indicates that the gfn is not in slot. */
 113static inline bool is_noslot_pfn(kvm_pfn_t pfn)
 114{
 115	return pfn == KVM_PFN_NOSLOT;
 116}
 117
 118/*
 119 * architectures with KVM_HVA_ERR_BAD other than PAGE_OFFSET (e.g. s390)
 120 * provide own defines and kvm_is_error_hva
 121 */
 122#ifndef KVM_HVA_ERR_BAD
 123
 124#define KVM_HVA_ERR_BAD		(PAGE_OFFSET)
 125#define KVM_HVA_ERR_RO_BAD	(PAGE_OFFSET + PAGE_SIZE)
 126
 127static inline bool kvm_is_error_hva(unsigned long addr)
 128{
 129	return addr >= PAGE_OFFSET;
 130}
 131
 132#endif
 133
 134#define KVM_ERR_PTR_BAD_PAGE	(ERR_PTR(-ENOENT))
 135
 136static inline bool is_error_page(struct page *page)
 137{
 138	return IS_ERR(page);
 139}
 140
 141#define KVM_REQUEST_MASK           GENMASK(7,0)
 142#define KVM_REQUEST_NO_WAKEUP      BIT(8)
 143#define KVM_REQUEST_WAIT           BIT(9)
 144/*
 145 * Architecture-independent vcpu->requests bit members
 146 * Bits 4-7 are reserved for more arch-independent bits.
 147 */
 148#define KVM_REQ_TLB_FLUSH         (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 149#define KVM_REQ_MMU_RELOAD        (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 150#define KVM_REQ_PENDING_TIMER     2
 151#define KVM_REQ_UNHALT            3
 152#define KVM_REQUEST_ARCH_BASE     8
 153
 154#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
 155	BUILD_BUG_ON((unsigned)(nr) >= (FIELD_SIZEOF(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \
 156	(unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \
 157})
 158#define KVM_ARCH_REQ(nr)           KVM_ARCH_REQ_FLAGS(nr, 0)
 159
 160#define KVM_USERSPACE_IRQ_SOURCE_ID		0
 161#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
 162
 163extern struct kmem_cache *kvm_vcpu_cache;
 164
 165extern spinlock_t kvm_lock;
 166extern struct list_head vm_list;
 167
 168struct kvm_io_range {
 169	gpa_t addr;
 170	int len;
 171	struct kvm_io_device *dev;
 172};
 173
 174#define NR_IOBUS_DEVS 1000
 175
 176struct kvm_io_bus {
 177	int dev_count;
 178	int ioeventfd_count;
 179	struct kvm_io_range range[];
 180};
 181
 182enum kvm_bus {
 183	KVM_MMIO_BUS,
 184	KVM_PIO_BUS,
 185	KVM_VIRTIO_CCW_NOTIFY_BUS,
 186	KVM_FAST_MMIO_BUS,
 187	KVM_NR_BUSES
 188};
 189
 190int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 191		     int len, const void *val);
 192int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
 193			    gpa_t addr, int len, const void *val, long cookie);
 194int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 195		    int len, void *val);
 196int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 197			    int len, struct kvm_io_device *dev);
 198void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 199			       struct kvm_io_device *dev);
 200struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 201					 gpa_t addr);
 202
 203#ifdef CONFIG_KVM_ASYNC_PF
 204struct kvm_async_pf {
 205	struct work_struct work;
 206	struct list_head link;
 207	struct list_head queue;
 208	struct kvm_vcpu *vcpu;
 209	struct mm_struct *mm;
 210	gva_t gva;
 211	unsigned long addr;
 212	struct kvm_arch_async_pf arch;
 213	bool   wakeup_all;
 214};
 215
 216void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
 217void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
 218int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 219		       struct kvm_arch_async_pf *arch);
 220int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 221#endif
 222
 223enum {
 224	OUTSIDE_GUEST_MODE,
 225	IN_GUEST_MODE,
 226	EXITING_GUEST_MODE,
 227	READING_SHADOW_PAGE_TABLES,
 228};
 229
 230/*
 231 * Sometimes a large or cross-page mmio needs to be broken up into separate
 232 * exits for userspace servicing.
 233 */
 234struct kvm_mmio_fragment {
 235	gpa_t gpa;
 236	void *data;
 237	unsigned len;
 238};
 239
 240struct kvm_vcpu {
 241	struct kvm *kvm;
 242#ifdef CONFIG_PREEMPT_NOTIFIERS
 243	struct preempt_notifier preempt_notifier;
 244#endif
 245	int cpu;
 246	int vcpu_id;
 247	int srcu_idx;
 248	int mode;
 249	u64 requests;
 250	unsigned long guest_debug;
 251
 252	int pre_pcpu;
 253	struct list_head blocked_vcpu_list;
 254
 255	struct mutex mutex;
 256	struct kvm_run *run;
 257
 258	int guest_xcr0_loaded;
 259	struct swait_queue_head wq;
 260	struct pid __rcu *pid;
 261	int sigset_active;
 262	sigset_t sigset;
 263	struct kvm_vcpu_stat stat;
 264	unsigned int halt_poll_ns;
 265	bool valid_wakeup;
 266
 267#ifdef CONFIG_HAS_IOMEM
 268	int mmio_needed;
 269	int mmio_read_completed;
 270	int mmio_is_write;
 271	int mmio_cur_fragment;
 272	int mmio_nr_fragments;
 273	struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS];
 274#endif
 275
 276#ifdef CONFIG_KVM_ASYNC_PF
 277	struct {
 278		u32 queued;
 279		struct list_head queue;
 280		struct list_head done;
 281		spinlock_t lock;
 282	} async_pf;
 283#endif
 284
 285#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 286	/*
 287	 * Cpu relax intercept or pause loop exit optimization
 288	 * in_spin_loop: set when a vcpu does a pause loop exit
 289	 *  or cpu relax intercepted.
 290	 * dy_eligible: indicates whether vcpu is eligible for directed yield.
 291	 */
 292	struct {
 293		bool in_spin_loop;
 294		bool dy_eligible;
 295	} spin_loop;
 296#endif
 297	bool preempted;
 298	struct kvm_vcpu_arch arch;
 299	struct dentry *debugfs_dentry;
 300};
 301
 302static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
 303{
 304	/*
 305	 * The memory barrier ensures a previous write to vcpu->requests cannot
 306	 * be reordered with the read of vcpu->mode.  It pairs with the general
 307	 * memory barrier following the write of vcpu->mode in VCPU RUN.
 308	 */
 309	smp_mb__before_atomic();
 310	return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
 311}
 312
 313/*
 314 * Some of the bitops functions do not support too long bitmaps.
 315 * This number must be determined not to exceed such limits.
 316 */
 317#define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
 318
 319struct kvm_memory_slot {
 320	gfn_t base_gfn;
 321	unsigned long npages;
 322	unsigned long *dirty_bitmap;
 323	struct kvm_arch_memory_slot arch;
 324	unsigned long userspace_addr;
 325	u32 flags;
 326	short id;
 327};
 328
 329static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
 330{
 331	return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
 332}
 333
 334static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *memslot)
 335{
 336	unsigned long len = kvm_dirty_bitmap_bytes(memslot);
 337
 338	return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap);
 339}
 340
 341struct kvm_s390_adapter_int {
 342	u64 ind_addr;
 343	u64 summary_addr;
 344	u64 ind_offset;
 345	u32 summary_offset;
 346	u32 adapter_id;
 347};
 348
 349struct kvm_hv_sint {
 350	u32 vcpu;
 351	u32 sint;
 352};
 353
 354struct kvm_kernel_irq_routing_entry {
 355	u32 gsi;
 356	u32 type;
 357	int (*set)(struct kvm_kernel_irq_routing_entry *e,
 358		   struct kvm *kvm, int irq_source_id, int level,
 359		   bool line_status);
 360	union {
 361		struct {
 362			unsigned irqchip;
 363			unsigned pin;
 364		} irqchip;
 365		struct {
 366			u32 address_lo;
 367			u32 address_hi;
 368			u32 data;
 369			u32 flags;
 370			u32 devid;
 371		} msi;
 372		struct kvm_s390_adapter_int adapter;
 373		struct kvm_hv_sint hv_sint;
 374	};
 375	struct hlist_node link;
 376};
 377
 378#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 379struct kvm_irq_routing_table {
 380	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
 381	u32 nr_rt_entries;
 382	/*
 383	 * Array indexed by gsi. Each entry contains list of irq chips
 384	 * the gsi is connected to.
 385	 */
 386	struct hlist_head map[0];
 387};
 388#endif
 389
 390#ifndef KVM_PRIVATE_MEM_SLOTS
 391#define KVM_PRIVATE_MEM_SLOTS 0
 392#endif
 393
 394#ifndef KVM_MEM_SLOTS_NUM
 395#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 396#endif
 397
 398#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
 399static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
 400{
 401	return 0;
 402}
 403#endif
 404
 405/*
 406 * Note:
 407 * memslots are not sorted by id anymore, please use id_to_memslot()
 408 * to get the memslot by its id.
 409 */
 410struct kvm_memslots {
 411	u64 generation;
 412	struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
 413	/* The mapping table from slot id to the index in memslots[]. */
 414	short id_to_index[KVM_MEM_SLOTS_NUM];
 415	atomic_t lru_slot;
 416	int used_slots;
 417};
 418
 419struct kvm {
 420	spinlock_t mmu_lock;
 421	struct mutex slots_lock;
 422	struct mm_struct *mm; /* userspace tied to this vm */
 423	struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
 424	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 425
 426	/*
 427	 * created_vcpus is protected by kvm->lock, and is incremented
 428	 * at the beginning of KVM_CREATE_VCPU.  online_vcpus is only
 429	 * incremented after storing the kvm_vcpu pointer in vcpus,
 430	 * and is accessed atomically.
 431	 */
 432	atomic_t online_vcpus;
 433	int created_vcpus;
 434	int last_boosted_vcpu;
 435	struct list_head vm_list;
 436	struct mutex lock;
 437	struct kvm_io_bus __rcu *buses[KVM_NR_BUSES];
 438#ifdef CONFIG_HAVE_KVM_EVENTFD
 439	struct {
 440		spinlock_t        lock;
 441		struct list_head  items;
 442		struct list_head  resampler_list;
 443		struct mutex      resampler_lock;
 444	} irqfds;
 445	struct list_head ioeventfds;
 446#endif
 447	struct kvm_vm_stat stat;
 448	struct kvm_arch arch;
 449	refcount_t users_count;
 450#ifdef CONFIG_KVM_MMIO
 451	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 452	spinlock_t ring_lock;
 453	struct list_head coalesced_zones;
 454#endif
 455
 456	struct mutex irq_lock;
 457#ifdef CONFIG_HAVE_KVM_IRQCHIP
 458	/*
 459	 * Update side is protected by irq_lock.
 460	 */
 461	struct kvm_irq_routing_table __rcu *irq_routing;
 462#endif
 463#ifdef CONFIG_HAVE_KVM_IRQFD
 464	struct hlist_head irq_ack_notifier_list;
 465#endif
 466
 467#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 468	struct mmu_notifier mmu_notifier;
 469	unsigned long mmu_notifier_seq;
 470	long mmu_notifier_count;
 471#endif
 472	long tlbs_dirty;
 473	struct list_head devices;
 474	bool manual_dirty_log_protect;
 475	struct dentry *debugfs_dentry;
 476	struct kvm_stat_data **debugfs_stat_data;
 477	struct srcu_struct srcu;
 478	struct srcu_struct irq_srcu;
 479	pid_t userspace_pid;
 480};
 481
 482#define kvm_err(fmt, ...) \
 483	pr_err("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
 484#define kvm_info(fmt, ...) \
 485	pr_info("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
 486#define kvm_debug(fmt, ...) \
 487	pr_debug("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
 488#define kvm_debug_ratelimited(fmt, ...) \
 489	pr_debug_ratelimited("kvm [%i]: " fmt, task_pid_nr(current), \
 490			     ## __VA_ARGS__)
 491#define kvm_pr_unimpl(fmt, ...) \
 492	pr_err_ratelimited("kvm [%i]: " fmt, \
 493			   task_tgid_nr(current), ## __VA_ARGS__)
 494
 495/* The guest did something we don't support. */
 496#define vcpu_unimpl(vcpu, fmt, ...)					\
 497	kvm_pr_unimpl("vcpu%i, guest rIP: 0x%lx " fmt,			\
 498			(vcpu)->vcpu_id, kvm_rip_read(vcpu), ## __VA_ARGS__)
 499
 500#define vcpu_debug(vcpu, fmt, ...)					\
 501	kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
 502#define vcpu_debug_ratelimited(vcpu, fmt, ...)				\
 503	kvm_debug_ratelimited("vcpu%i " fmt, (vcpu)->vcpu_id,           \
 504			      ## __VA_ARGS__)
 505#define vcpu_err(vcpu, fmt, ...)					\
 506	kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
 507
 508static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
 509{
 510	return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
 511				      lockdep_is_held(&kvm->slots_lock) ||
 512				      !refcount_read(&kvm->users_count));
 513}
 514
 515static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 516{
 517	int num_vcpus = atomic_read(&kvm->online_vcpus);
 518	i = array_index_nospec(i, num_vcpus);
 519
 520	/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu.  */
 521	smp_rmb();
 522	return kvm->vcpus[i];
 523}
 524
 525#define kvm_for_each_vcpu(idx, vcpup, kvm) \
 526	for (idx = 0; \
 527	     idx < atomic_read(&kvm->online_vcpus) && \
 528	     (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
 529	     idx++)
 530
 531static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
 532{
 533	struct kvm_vcpu *vcpu = NULL;
 534	int i;
 535
 536	if (id < 0)
 537		return NULL;
 538	if (id < KVM_MAX_VCPUS)
 539		vcpu = kvm_get_vcpu(kvm, id);
 540	if (vcpu && vcpu->vcpu_id == id)
 541		return vcpu;
 542	kvm_for_each_vcpu(i, vcpu, kvm)
 543		if (vcpu->vcpu_id == id)
 544			return vcpu;
 545	return NULL;
 546}
 547
 548static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
 549{
 550	struct kvm_vcpu *tmp;
 551	int idx;
 552
 553	kvm_for_each_vcpu(idx, tmp, vcpu->kvm)
 554		if (tmp == vcpu)
 555			return idx;
 556	BUG();
 557}
 558
 559#define kvm_for_each_memslot(memslot, slots)	\
 560	for (memslot = &slots->memslots[0];	\
 561	      memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
 562		memslot++)
 563
 564int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
 565void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 566
 567void vcpu_load(struct kvm_vcpu *vcpu);
 568void vcpu_put(struct kvm_vcpu *vcpu);
 569
 570#ifdef __KVM_HAVE_IOAPIC
 571void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm);
 572void kvm_arch_post_irq_routing_update(struct kvm *kvm);
 573#else
 574static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
 575{
 576}
 577static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
 578{
 579}
 580#endif
 581
 582#ifdef CONFIG_HAVE_KVM_IRQFD
 583int kvm_irqfd_init(void);
 584void kvm_irqfd_exit(void);
 585#else
 586static inline int kvm_irqfd_init(void)
 587{
 588	return 0;
 589}
 590
 591static inline void kvm_irqfd_exit(void)
 592{
 593}
 594#endif
 595int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 596		  struct module *module);
 597void kvm_exit(void);
 598
 599void kvm_get_kvm(struct kvm *kvm);
 600void kvm_put_kvm(struct kvm *kvm);
 601
 602static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
 603{
 604	as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM);
 605	return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
 606			lockdep_is_held(&kvm->slots_lock) ||
 607			!refcount_read(&kvm->users_count));
 608}
 609
 610static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
 611{
 612	return __kvm_memslots(kvm, 0);
 613}
 614
 615static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
 616{
 617	int as_id = kvm_arch_vcpu_memslots_id(vcpu);
 618
 619	return __kvm_memslots(vcpu->kvm, as_id);
 620}
 621
 622static inline struct kvm_memory_slot *
 623id_to_memslot(struct kvm_memslots *slots, int id)
 624{
 625	int index = slots->id_to_index[id];
 626	struct kvm_memory_slot *slot;
 627
 628	slot = &slots->memslots[index];
 629
 630	WARN_ON(slot->id != id);
 631	return slot;
 632}
 633
 634/*
 635 * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
 636 * - create a new memory slot
 637 * - delete an existing memory slot
 638 * - modify an existing memory slot
 639 *   -- move it in the guest physical memory space
 640 *   -- just change its flags
 641 *
 642 * Since flags can be changed by some of these operations, the following
 643 * differentiation is the best we can do for __kvm_set_memory_region():
 644 */
 645enum kvm_mr_change {
 646	KVM_MR_CREATE,
 647	KVM_MR_DELETE,
 648	KVM_MR_MOVE,
 649	KVM_MR_FLAGS_ONLY,
 650};
 651
 652int kvm_set_memory_region(struct kvm *kvm,
 653			  const struct kvm_userspace_memory_region *mem);
 654int __kvm_set_memory_region(struct kvm *kvm,
 655			    const struct kvm_userspace_memory_region *mem);
 656void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 657			   struct kvm_memory_slot *dont);
 658int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 659			    unsigned long npages);
 660void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
 661int kvm_arch_prepare_memory_region(struct kvm *kvm,
 662				struct kvm_memory_slot *memslot,
 663				const struct kvm_userspace_memory_region *mem,
 664				enum kvm_mr_change change);
 665void kvm_arch_commit_memory_region(struct kvm *kvm,
 666				const struct kvm_userspace_memory_region *mem,
 667				const struct kvm_memory_slot *old,
 668				const struct kvm_memory_slot *new,
 669				enum kvm_mr_change change);
 670bool kvm_largepages_enabled(void);
 671void kvm_disable_largepages(void);
 672/* flush all memory translations */
 673void kvm_arch_flush_shadow_all(struct kvm *kvm);
 674/* flush memory translations pointing to 'slot' */
 675void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 676				   struct kvm_memory_slot *slot);
 677
 678int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
 679			    struct page **pages, int nr_pages);
 680
 681struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 682unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
 683unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
 684unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 685unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
 686				      bool *writable);
 687void kvm_release_page_clean(struct page *page);
 688void kvm_release_page_dirty(struct page *page);
 689void kvm_set_page_accessed(struct page *page);
 690
 691kvm_pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
 692kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 693kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 694		      bool *writable);
 695kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 696kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
 697kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
 698			       bool atomic, bool *async, bool write_fault,
 699			       bool *writable);
 700
 701void kvm_release_pfn_clean(kvm_pfn_t pfn);
 702void kvm_release_pfn_dirty(kvm_pfn_t pfn);
 703void kvm_set_pfn_dirty(kvm_pfn_t pfn);
 704void kvm_set_pfn_accessed(kvm_pfn_t pfn);
 705void kvm_get_pfn(kvm_pfn_t pfn);
 706
 707int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 708			int len);
 709int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
 710			  unsigned long len);
 711int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
 712int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 713			   void *data, unsigned long len);
 714int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
 715			 int offset, int len);
 716int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
 717		    unsigned long len);
 718int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 719			   void *data, unsigned long len);
 720int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 721				  void *data, unsigned int offset,
 722				  unsigned long len);
 723int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 724			      gpa_t gpa, unsigned long len);
 725int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
 726int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
 727struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 728bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 729unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
 730void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 731
 732struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
 733struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
 734kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
 735kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 736struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
 737unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
 738unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
 739int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
 740			     int len);
 741int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data,
 742			       unsigned long len);
 743int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data,
 744			unsigned long len);
 745int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, const void *data,
 746			      int offset, int len);
 747int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
 748			 unsigned long len);
 749void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
 750
 751void kvm_sigset_activate(struct kvm_vcpu *vcpu);
 752void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
 753
 754void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 755void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
 756void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
 757bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
 758void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 759int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 760void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible);
 761
 762void kvm_flush_remote_tlbs(struct kvm *kvm);
 763void kvm_reload_remote_mmus(struct kvm *kvm);
 764
 765bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
 766				 unsigned long *vcpu_bitmap, cpumask_var_t tmp);
 767bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
 768
 769long kvm_arch_dev_ioctl(struct file *filp,
 770			unsigned int ioctl, unsigned long arg);
 771long kvm_arch_vcpu_ioctl(struct file *filp,
 772			 unsigned int ioctl, unsigned long arg);
 773vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
 774
 775int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
 776
 777int kvm_get_dirty_log(struct kvm *kvm,
 778			struct kvm_dirty_log *log, int *is_dirty);
 779
 780int kvm_get_dirty_log_protect(struct kvm *kvm,
 781			      struct kvm_dirty_log *log, bool *flush);
 782int kvm_clear_dirty_log_protect(struct kvm *kvm,
 783				struct kvm_clear_dirty_log *log, bool *flush);
 784
 785void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 786					struct kvm_memory_slot *slot,
 787					gfn_t gfn_offset,
 788					unsigned long mask);
 789
 790int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 791				struct kvm_dirty_log *log);
 792int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm,
 793				  struct kvm_clear_dirty_log *log);
 794
 795int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 796			bool line_status);
 797int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 798			    struct kvm_enable_cap *cap);
 799long kvm_arch_vm_ioctl(struct file *filp,
 800		       unsigned int ioctl, unsigned long arg);
 801
 802int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
 803int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
 804
 805int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 806				    struct kvm_translation *tr);
 807
 808int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
 809int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
 810int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 811				  struct kvm_sregs *sregs);
 812int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 813				  struct kvm_sregs *sregs);
 814int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 815				    struct kvm_mp_state *mp_state);
 816int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 817				    struct kvm_mp_state *mp_state);
 818int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 819					struct kvm_guest_debug *dbg);
 820int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
 821
 822int kvm_arch_init(void *opaque);
 823void kvm_arch_exit(void);
 824
 825int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
 826void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
 827
 828void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
 829
 830void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
 831void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 832void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
 833struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id);
 834int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 835void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
 836void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 837
 838bool kvm_arch_has_vcpu_debugfs(void);
 839int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu);
 840
 841int kvm_arch_hardware_enable(void);
 842void kvm_arch_hardware_disable(void);
 843int kvm_arch_hardware_setup(void);
 844void kvm_arch_hardware_unsetup(void);
 845void kvm_arch_check_processor_compat(void *rtn);
 846int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
 847bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
 848int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
 849
 850#ifndef __KVM_HAVE_ARCH_VM_ALLOC
 851/*
 852 * All architectures that want to use vzalloc currently also
 853 * need their own kvm_arch_alloc_vm implementation.
 854 */
 855static inline struct kvm *kvm_arch_alloc_vm(void)
 856{
 857	return kzalloc(sizeof(struct kvm), GFP_KERNEL);
 858}
 859
 860static inline void kvm_arch_free_vm(struct kvm *kvm)
 861{
 862	kfree(kvm);
 863}
 864#endif
 865
 866#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
 867static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
 868{
 869	return -ENOTSUPP;
 870}
 871#endif
 872
 873#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
 874void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
 875void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
 876bool kvm_arch_has_noncoherent_dma(struct kvm *kvm);
 877#else
 878static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
 879{
 880}
 881
 882static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
 883{
 884}
 885
 886static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 887{
 888	return false;
 889}
 890#endif
 891#ifdef __KVM_HAVE_ARCH_ASSIGNED_DEVICE
 892void kvm_arch_start_assignment(struct kvm *kvm);
 893void kvm_arch_end_assignment(struct kvm *kvm);
 894bool kvm_arch_has_assigned_device(struct kvm *kvm);
 895#else
 896static inline void kvm_arch_start_assignment(struct kvm *kvm)
 897{
 898}
 899
 900static inline void kvm_arch_end_assignment(struct kvm *kvm)
 901{
 902}
 903
 904static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
 905{
 906	return false;
 907}
 908#endif
 909
 910static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
 911{
 912#ifdef __KVM_HAVE_ARCH_WQP
 913	return vcpu->arch.wqp;
 914#else
 915	return &vcpu->wq;
 916#endif
 917}
 918
 919#ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED
 920/*
 921 * returns true if the virtual interrupt controller is initialized and
 922 * ready to accept virtual IRQ. On some architectures the virtual interrupt
 923 * controller is dynamically instantiated and this is not always true.
 924 */
 925bool kvm_arch_intc_initialized(struct kvm *kvm);
 926#else
 927static inline bool kvm_arch_intc_initialized(struct kvm *kvm)
 928{
 929	return true;
 930}
 931#endif
 932
 933int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
 934void kvm_arch_destroy_vm(struct kvm *kvm);
 935void kvm_arch_sync_events(struct kvm *kvm);
 936
 937int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 938void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 939
 940bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
 941
 942struct kvm_irq_ack_notifier {
 943	struct hlist_node link;
 944	unsigned gsi;
 945	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 946};
 947
 948int kvm_irq_map_gsi(struct kvm *kvm,
 949		    struct kvm_kernel_irq_routing_entry *entries, int gsi);
 950int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
 951
 952int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 953		bool line_status);
 954int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
 955		int irq_source_id, int level, bool line_status);
 956int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
 957			       struct kvm *kvm, int irq_source_id,
 958			       int level, bool line_status);
 959bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
 960void kvm_notify_acked_gsi(struct kvm *kvm, int gsi);
 961void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 962void kvm_register_irq_ack_notifier(struct kvm *kvm,
 963				   struct kvm_irq_ack_notifier *kian);
 964void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 965				   struct kvm_irq_ack_notifier *kian);
 966int kvm_request_irq_source_id(struct kvm *kvm);
 967void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 968
 969/*
 970 * search_memslots() and __gfn_to_memslot() are here because they are
 971 * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c.
 972 * gfn_to_memslot() itself isn't here as an inline because that would
 973 * bloat other code too much.
 974 */
 975static inline struct kvm_memory_slot *
 976search_memslots(struct kvm_memslots *slots, gfn_t gfn)
 977{
 978	int start = 0, end = slots->used_slots;
 979	int slot = atomic_read(&slots->lru_slot);
 980	struct kvm_memory_slot *memslots = slots->memslots;
 981
 982	if (gfn >= memslots[slot].base_gfn &&
 983	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
 984		return &memslots[slot];
 985
 986	while (start < end) {
 987		slot = start + (end - start) / 2;
 988
 989		if (gfn >= memslots[slot].base_gfn)
 990			end = slot;
 991		else
 992			start = slot + 1;
 993	}
 994
 995	if (gfn >= memslots[start].base_gfn &&
 996	    gfn < memslots[start].base_gfn + memslots[start].npages) {
 997		atomic_set(&slots->lru_slot, start);
 998		return &memslots[start];
 999	}
1000
1001	return NULL;
1002}
1003
1004static inline struct kvm_memory_slot *
1005__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
1006{
1007	return search_memslots(slots, gfn);
1008}
1009
1010static inline unsigned long
1011__gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
1012{
1013	return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
1014}
1015
1016static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
1017{
1018	return gfn_to_memslot(kvm, gfn)->id;
1019}
1020
1021static inline gfn_t
1022hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
1023{
1024	gfn_t gfn_offset = (hva - slot->userspace_addr) >> PAGE_SHIFT;
1025
1026	return slot->base_gfn + gfn_offset;
1027}
1028
1029static inline gpa_t gfn_to_gpa(gfn_t gfn)
1030{
1031	return (gpa_t)gfn << PAGE_SHIFT;
1032}
1033
1034static inline gfn_t gpa_to_gfn(gpa_t gpa)
1035{
1036	return (gfn_t)(gpa >> PAGE_SHIFT);
1037}
1038
1039static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn)
1040{
1041	return (hpa_t)pfn << PAGE_SHIFT;
1042}
1043
1044static inline struct page *kvm_vcpu_gpa_to_page(struct kvm_vcpu *vcpu,
1045						gpa_t gpa)
1046{
1047	return kvm_vcpu_gfn_to_page(vcpu, gpa_to_gfn(gpa));
1048}
1049
1050static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
1051{
1052	unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
1053
1054	return kvm_is_error_hva(hva);
1055}
1056
1057enum kvm_stat_kind {
1058	KVM_STAT_VM,
1059	KVM_STAT_VCPU,
1060};
1061
1062struct kvm_stat_data {
1063	int offset;
1064	struct kvm *kvm;
1065};
1066
1067struct kvm_stats_debugfs_item {
1068	const char *name;
1069	int offset;
1070	enum kvm_stat_kind kind;
1071};
1072extern struct kvm_stats_debugfs_item debugfs_entries[];
1073extern struct dentry *kvm_debugfs_dir;
1074
1075#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
1076static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
1077{
1078	if (unlikely(kvm->mmu_notifier_count))
1079		return 1;
1080	/*
1081	 * Ensure the read of mmu_notifier_count happens before the read
1082	 * of mmu_notifier_seq.  This interacts with the smp_wmb() in
1083	 * mmu_notifier_invalidate_range_end to make sure that the caller
1084	 * either sees the old (non-zero) value of mmu_notifier_count or
1085	 * the new (incremented) value of mmu_notifier_seq.
1086	 * PowerPC Book3s HV KVM calls this under a per-page lock
1087	 * rather than under kvm->mmu_lock, for scalability, so
1088	 * can't rely on kvm->mmu_lock to keep things ordered.
1089	 */
1090	smp_rmb();
1091	if (kvm->mmu_notifier_seq != mmu_seq)
1092		return 1;
1093	return 0;
1094}
1095#endif
1096
1097#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
1098
1099#define KVM_MAX_IRQ_ROUTES 4096 /* might need extension/rework in the future */
1100
1101bool kvm_arch_can_set_irq_routing(struct kvm *kvm);
1102int kvm_set_irq_routing(struct kvm *kvm,
1103			const struct kvm_irq_routing_entry *entries,
1104			unsigned nr,
1105			unsigned flags);
1106int kvm_set_routing_entry(struct kvm *kvm,
1107			  struct kvm_kernel_irq_routing_entry *e,
1108			  const struct kvm_irq_routing_entry *ue);
1109void kvm_free_irq_routing(struct kvm *kvm);
1110
1111#else
1112
1113static inline void kvm_free_irq_routing(struct kvm *kvm) {}
1114
1115#endif
1116
1117int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
1118
1119#ifdef CONFIG_HAVE_KVM_EVENTFD
1120
1121void kvm_eventfd_init(struct kvm *kvm);
1122int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
1123
1124#ifdef CONFIG_HAVE_KVM_IRQFD
1125int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
1126void kvm_irqfd_release(struct kvm *kvm);
1127void kvm_irq_routing_update(struct kvm *);
1128#else
1129static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
1130{
1131	return -EINVAL;
1132}
1133
1134static inline void kvm_irqfd_release(struct kvm *kvm) {}
1135#endif
1136
1137#else
1138
1139static inline void kvm_eventfd_init(struct kvm *kvm) {}
1140
1141static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
1142{
1143	return -EINVAL;
1144}
1145
1146static inline void kvm_irqfd_release(struct kvm *kvm) {}
1147
1148#ifdef CONFIG_HAVE_KVM_IRQCHIP
1149static inline void kvm_irq_routing_update(struct kvm *kvm)
1150{
1151}
1152#endif
1153
1154static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
1155{
1156	return -ENOSYS;
1157}
1158
1159#endif /* CONFIG_HAVE_KVM_EVENTFD */
1160
1161void kvm_arch_irq_routing_update(struct kvm *kvm);
1162
1163static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
1164{
1165	/*
1166	 * Ensure the rest of the request is published to kvm_check_request's
1167	 * caller.  Paired with the smp_mb__after_atomic in kvm_check_request.
1168	 */
1169	smp_wmb();
1170	set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
1171}
1172
1173static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
1174{
1175	return READ_ONCE(vcpu->requests);
1176}
1177
1178static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu)
1179{
1180	return test_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
1181}
1182
1183static inline void kvm_clear_request(int req, struct kvm_vcpu *vcpu)
1184{
1185	clear_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
1186}
1187
1188static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
1189{
1190	if (kvm_test_request(req, vcpu)) {
1191		kvm_clear_request(req, vcpu);
1192
1193		/*
1194		 * Ensure the rest of the request is visible to kvm_check_request's
1195		 * caller.  Paired with the smp_wmb in kvm_make_request.
1196		 */
1197		smp_mb__after_atomic();
1198		return true;
1199	} else {
1200		return false;
1201	}
1202}
1203
1204extern bool kvm_rebooting;
1205
1206extern unsigned int halt_poll_ns;
1207extern unsigned int halt_poll_ns_grow;
1208extern unsigned int halt_poll_ns_grow_start;
1209extern unsigned int halt_poll_ns_shrink;
1210
1211struct kvm_device {
1212	struct kvm_device_ops *ops;
1213	struct kvm *kvm;
1214	void *private;
1215	struct list_head vm_node;
1216};
1217
1218/* create, destroy, and name are mandatory */
1219struct kvm_device_ops {
1220	const char *name;
1221
1222	/*
1223	 * create is called holding kvm->lock and any operations not suitable
1224	 * to do while holding the lock should be deferred to init (see
1225	 * below).
1226	 */
1227	int (*create)(struct kvm_device *dev, u32 type);
1228
1229	/*
1230	 * init is called after create if create is successful and is called
1231	 * outside of holding kvm->lock.
1232	 */
1233	void (*init)(struct kvm_device *dev);
1234
1235	/*
1236	 * Destroy is responsible for freeing dev.
1237	 *
1238	 * Destroy may be called before or after destructors are called
1239	 * on emulated I/O regions, depending on whether a reference is
1240	 * held by a vcpu or other kvm component that gets destroyed
1241	 * after the emulated I/O.
1242	 */
1243	void (*destroy)(struct kvm_device *dev);
1244
1245	int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
1246	int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
1247	int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
1248	long (*ioctl)(struct kvm_device *dev, unsigned int ioctl,
1249		      unsigned long arg);
1250};
1251
1252void kvm_device_get(struct kvm_device *dev);
1253void kvm_device_put(struct kvm_device *dev);
1254struct kvm_device *kvm_device_from_filp(struct file *filp);
1255int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
1256void kvm_unregister_device_ops(u32 type);
1257
1258extern struct kvm_device_ops kvm_mpic_ops;
1259extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
1260extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
1261
1262#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
1263
1264static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
1265{
1266	vcpu->spin_loop.in_spin_loop = val;
1267}
1268static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
1269{
1270	vcpu->spin_loop.dy_eligible = val;
1271}
1272
1273#else /* !CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
1274
1275static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
1276{
1277}
1278
1279static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
1280{
1281}
1282#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
1283
1284#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
1285bool kvm_arch_has_irq_bypass(void);
1286int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
1287			   struct irq_bypass_producer *);
1288void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
1289			   struct irq_bypass_producer *);
1290void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
1291void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
1292int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
1293				  uint32_t guest_irq, bool set);
1294#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
1295
1296#ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS
1297/* If we wakeup during the poll time, was it a sucessful poll? */
1298static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu)
1299{
1300	return vcpu->valid_wakeup;
1301}
1302
1303#else
1304static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu)
1305{
1306	return true;
1307}
1308#endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */
1309
1310#ifdef CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL
1311long kvm_arch_vcpu_async_ioctl(struct file *filp,
1312			       unsigned int ioctl, unsigned long arg);
1313#else
1314static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
1315					     unsigned int ioctl,
1316					     unsigned long arg)
1317{
1318	return -ENOIOCTLCMD;
1319}
1320#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
1321
1322int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
1323		unsigned long start, unsigned long end, bool blockable);
1324
1325#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
1326int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
1327#else
1328static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
1329{
1330	return 0;
1331}
1332#endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
1333
1334#endif