at master 11 kB view raw
1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright (c) 2023, Microsoft Corporation. 4 */ 5 6#ifndef _MSHV_ROOT_H_ 7#define _MSHV_ROOT_H_ 8 9#include <linux/spinlock.h> 10#include <linux/mutex.h> 11#include <linux/semaphore.h> 12#include <linux/sched.h> 13#include <linux/srcu.h> 14#include <linux/wait.h> 15#include <linux/hashtable.h> 16#include <linux/dev_printk.h> 17#include <linux/build_bug.h> 18#include <linux/mmu_notifier.h> 19#include <uapi/linux/mshv.h> 20 21/* 22 * Hypervisor must be between these version numbers (inclusive) 23 * to guarantee compatibility 24 */ 25#define MSHV_HV_MIN_VERSION (27744) 26#define MSHV_HV_MAX_VERSION (27751) 27 28static_assert(HV_HYP_PAGE_SIZE == MSHV_HV_PAGE_SIZE); 29 30#define MSHV_MAX_VPS 256 31 32#define MSHV_PARTITIONS_HASH_BITS 9 33 34#define MSHV_PIN_PAGES_BATCH_SIZE (0x10000000ULL / HV_HYP_PAGE_SIZE) 35 36struct mshv_vp { 37 u32 vp_index; 38 struct mshv_partition *vp_partition; 39 struct mutex vp_mutex; 40 struct hv_vp_register_page *vp_register_page; 41 struct hv_message *vp_intercept_msg_page; 42 void *vp_ghcb_page; 43 struct hv_stats_page *vp_stats_pages[2]; 44 struct { 45 atomic64_t vp_signaled_count; 46 struct { 47 u64 intercept_suspend: 1; 48 u64 root_sched_blocked: 1; /* root scheduler only */ 49 u64 root_sched_dispatched: 1; /* root scheduler only */ 50 u64 reserved: 61; 51 } flags; 52 unsigned int kicked_by_hv; 53 wait_queue_head_t vp_suspend_queue; 54 } run; 55}; 56 57#define vp_fmt(fmt) "p%lluvp%u: " fmt 58#define vp_devprintk(level, v, fmt, ...) \ 59do { \ 60 const struct mshv_vp *__vp = (v); \ 61 const struct mshv_partition *__pt = __vp->vp_partition; \ 62 dev_##level(__pt->pt_module_dev, vp_fmt(fmt), __pt->pt_id, \ 63 __vp->vp_index, ##__VA_ARGS__); \ 64} while (0) 65#define vp_emerg(v, fmt, ...) vp_devprintk(emerg, v, fmt, ##__VA_ARGS__) 66#define vp_crit(v, fmt, ...) vp_devprintk(crit, v, fmt, ##__VA_ARGS__) 67#define vp_alert(v, fmt, ...) vp_devprintk(alert, v, fmt, ##__VA_ARGS__) 68#define vp_err(v, fmt, ...) vp_devprintk(err, v, fmt, ##__VA_ARGS__) 69#define vp_warn(v, fmt, ...) vp_devprintk(warn, v, fmt, ##__VA_ARGS__) 70#define vp_notice(v, fmt, ...) vp_devprintk(notice, v, fmt, ##__VA_ARGS__) 71#define vp_info(v, fmt, ...) vp_devprintk(info, v, fmt, ##__VA_ARGS__) 72#define vp_dbg(v, fmt, ...) vp_devprintk(dbg, v, fmt, ##__VA_ARGS__) 73 74enum mshv_region_type { 75 MSHV_REGION_TYPE_MEM_PINNED, 76 MSHV_REGION_TYPE_MEM_MOVABLE, 77 MSHV_REGION_TYPE_MMIO 78}; 79 80struct mshv_mem_region { 81 struct hlist_node hnode; 82 struct kref refcount; 83 u64 nr_pages; 84 u64 start_gfn; 85 u64 start_uaddr; 86 u32 hv_map_flags; 87 struct mshv_partition *partition; 88 enum mshv_region_type type; 89 struct mmu_interval_notifier mni; 90 struct mutex mutex; /* protects region pages remapping */ 91 struct page *pages[]; 92}; 93 94struct mshv_irq_ack_notifier { 95 struct hlist_node link; 96 unsigned int irq_ack_gsi; 97 void (*irq_acked)(struct mshv_irq_ack_notifier *mian); 98}; 99 100struct mshv_partition { 101 struct device *pt_module_dev; 102 103 struct hlist_node pt_hnode; 104 u64 pt_id; 105 refcount_t pt_ref_count; 106 struct mutex pt_mutex; 107 108 spinlock_t pt_mem_regions_lock; 109 struct hlist_head pt_mem_regions; // not ordered 110 111 u32 pt_vp_count; 112 struct mshv_vp *pt_vp_array[MSHV_MAX_VPS]; 113 114 struct mutex pt_irq_lock; 115 struct srcu_struct pt_irq_srcu; 116 struct hlist_head irq_ack_notifier_list; 117 118 struct hlist_head pt_devices; 119 120 /* 121 * MSHV does not support more than one async hypercall in flight 122 * for a single partition. Thus, it is okay to define per partition 123 * async hypercall status. 124 */ 125 struct completion async_hypercall; 126 u64 async_hypercall_status; 127 128 spinlock_t pt_irqfds_lock; 129 struct hlist_head pt_irqfds_list; 130 struct mutex irqfds_resampler_lock; 131 struct hlist_head irqfds_resampler_list; 132 133 struct hlist_head ioeventfds_list; 134 135 struct mshv_girq_routing_table __rcu *pt_girq_tbl; 136 u64 isolation_type; 137 bool import_completed; 138 bool pt_initialized; 139}; 140 141#define pt_fmt(fmt) "p%llu: " fmt 142#define pt_devprintk(level, p, fmt, ...) \ 143do { \ 144 const struct mshv_partition *__pt = (p); \ 145 dev_##level(__pt->pt_module_dev, pt_fmt(fmt), __pt->pt_id, \ 146 ##__VA_ARGS__); \ 147} while (0) 148#define pt_emerg(p, fmt, ...) pt_devprintk(emerg, p, fmt, ##__VA_ARGS__) 149#define pt_crit(p, fmt, ...) pt_devprintk(crit, p, fmt, ##__VA_ARGS__) 150#define pt_alert(p, fmt, ...) pt_devprintk(alert, p, fmt, ##__VA_ARGS__) 151#define pt_err(p, fmt, ...) pt_devprintk(err, p, fmt, ##__VA_ARGS__) 152#define pt_warn(p, fmt, ...) pt_devprintk(warn, p, fmt, ##__VA_ARGS__) 153#define pt_notice(p, fmt, ...) pt_devprintk(notice, p, fmt, ##__VA_ARGS__) 154#define pt_info(p, fmt, ...) pt_devprintk(info, p, fmt, ##__VA_ARGS__) 155#define pt_dbg(p, fmt, ...) pt_devprintk(dbg, p, fmt, ##__VA_ARGS__) 156 157struct mshv_lapic_irq { 158 u32 lapic_vector; 159 u64 lapic_apic_id; 160 union hv_interrupt_control lapic_control; 161}; 162 163#define MSHV_MAX_GUEST_IRQS 4096 164 165/* representation of one guest irq entry, either msi or legacy */ 166struct mshv_guest_irq_ent { 167 u32 girq_entry_valid; /* vfio looks at this */ 168 u32 guest_irq_num; /* a unique number for each irq */ 169 u32 girq_addr_lo; /* guest irq msi address info */ 170 u32 girq_addr_hi; 171 u32 girq_irq_data; /* idt vector in some cases */ 172}; 173 174struct mshv_girq_routing_table { 175 u32 num_rt_entries; 176 struct mshv_guest_irq_ent mshv_girq_info_tbl[]; 177}; 178 179struct hv_synic_pages { 180 struct hv_message_page *hyp_synic_message_page; 181 struct hv_synic_event_flags_page *synic_event_flags_page; 182 struct hv_synic_event_ring_page *synic_event_ring_page; 183}; 184 185struct mshv_root { 186 struct hv_synic_pages __percpu *synic_pages; 187 spinlock_t pt_ht_lock; 188 DECLARE_HASHTABLE(pt_htable, MSHV_PARTITIONS_HASH_BITS); 189 struct hv_partition_property_vmm_capabilities vmm_caps; 190}; 191 192/* 193 * Callback for doorbell events. 194 * NOTE: This is called in interrupt context. Callback 195 * should defer slow and sleeping logic to later. 196 */ 197typedef void (*doorbell_cb_t) (int doorbell_id, void *); 198 199/* 200 * port table information 201 */ 202struct port_table_info { 203 struct rcu_head portbl_rcu; 204 enum hv_port_type hv_port_type; 205 union { 206 struct { 207 u64 reserved[2]; 208 } hv_port_message; 209 struct { 210 u64 reserved[2]; 211 } hv_port_event; 212 struct { 213 u64 reserved[2]; 214 } hv_port_monitor; 215 struct { 216 doorbell_cb_t doorbell_cb; 217 void *data; 218 } hv_port_doorbell; 219 }; 220}; 221 222int mshv_update_routing_table(struct mshv_partition *partition, 223 const struct mshv_user_irq_entry *entries, 224 unsigned int numents); 225void mshv_free_routing_table(struct mshv_partition *partition); 226 227struct mshv_guest_irq_ent mshv_ret_girq_entry(struct mshv_partition *partition, 228 u32 irq_num); 229 230void mshv_copy_girq_info(struct mshv_guest_irq_ent *src_irq, 231 struct mshv_lapic_irq *dest_irq); 232 233void mshv_irqfd_routing_update(struct mshv_partition *partition); 234 235void mshv_port_table_fini(void); 236int mshv_portid_alloc(struct port_table_info *info); 237int mshv_portid_lookup(int port_id, struct port_table_info *info); 238void mshv_portid_free(int port_id); 239 240int mshv_register_doorbell(u64 partition_id, doorbell_cb_t doorbell_cb, 241 void *data, u64 gpa, u64 val, u64 flags); 242void mshv_unregister_doorbell(u64 partition_id, int doorbell_portid); 243 244void mshv_isr(void); 245int mshv_synic_init(unsigned int cpu); 246int mshv_synic_cleanup(unsigned int cpu); 247 248static inline bool mshv_partition_encrypted(struct mshv_partition *partition) 249{ 250 return partition->isolation_type == HV_PARTITION_ISOLATION_TYPE_SNP; 251} 252 253struct mshv_partition *mshv_partition_get(struct mshv_partition *partition); 254void mshv_partition_put(struct mshv_partition *partition); 255struct mshv_partition *mshv_partition_find(u64 partition_id) __must_hold(RCU); 256 257/* hypercalls */ 258 259int hv_call_withdraw_memory(u64 count, int node, u64 partition_id); 260int hv_call_create_partition(u64 flags, 261 struct hv_partition_creation_properties creation_properties, 262 union hv_partition_isolation_properties isolation_properties, 263 u64 *partition_id); 264int hv_call_initialize_partition(u64 partition_id); 265int hv_call_finalize_partition(u64 partition_id); 266int hv_call_delete_partition(u64 partition_id); 267int hv_call_map_mmio_pages(u64 partition_id, u64 gfn, u64 mmio_spa, u64 numpgs); 268int hv_call_map_gpa_pages(u64 partition_id, u64 gpa_target, u64 page_count, 269 u32 flags, struct page **pages); 270int hv_call_unmap_gpa_pages(u64 partition_id, u64 gpa_target, u64 page_count, 271 u32 flags); 272int hv_call_delete_vp(u64 partition_id, u32 vp_index); 273int hv_call_assert_virtual_interrupt(u64 partition_id, u32 vector, 274 u64 dest_addr, 275 union hv_interrupt_control control); 276int hv_call_clear_virtual_interrupt(u64 partition_id); 277int hv_call_get_gpa_access_states(u64 partition_id, u32 count, u64 gpa_base_pfn, 278 union hv_gpa_page_access_state_flags state_flags, 279 int *written_total, 280 union hv_gpa_page_access_state *states); 281int hv_call_get_vp_state(u32 vp_index, u64 partition_id, 282 struct hv_vp_state_data state_data, 283 /* Choose between pages and ret_output */ 284 u64 page_count, struct page **pages, 285 union hv_output_get_vp_state *ret_output); 286int hv_call_set_vp_state(u32 vp_index, u64 partition_id, 287 /* Choose between pages and bytes */ 288 struct hv_vp_state_data state_data, u64 page_count, 289 struct page **pages, u32 num_bytes, u8 *bytes); 290int hv_map_vp_state_page(u64 partition_id, u32 vp_index, u32 type, 291 union hv_input_vtl input_vtl, 292 struct page **state_page); 293int hv_unmap_vp_state_page(u64 partition_id, u32 vp_index, u32 type, 294 struct page *state_page, 295 union hv_input_vtl input_vtl); 296int hv_call_create_port(u64 port_partition_id, union hv_port_id port_id, 297 u64 connection_partition_id, struct hv_port_info *port_info, 298 u8 port_vtl, u8 min_connection_vtl, int node); 299int hv_call_delete_port(u64 port_partition_id, union hv_port_id port_id); 300int hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id, 301 u64 connection_partition_id, 302 union hv_connection_id connection_id, 303 struct hv_connection_info *connection_info, 304 u8 connection_vtl, int node); 305int hv_call_disconnect_port(u64 connection_partition_id, 306 union hv_connection_id connection_id); 307int hv_call_notify_port_ring_empty(u32 sint_index); 308int hv_map_stats_page(enum hv_stats_object_type type, 309 const union hv_stats_object_identity *identity, 310 void **addr); 311int hv_unmap_stats_page(enum hv_stats_object_type type, void *page_addr, 312 const union hv_stats_object_identity *identity); 313int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages, 314 u64 page_struct_count, u32 host_access, 315 u32 flags, u8 acquire); 316int hv_call_get_partition_property_ex(u64 partition_id, u64 property_code, u64 arg, 317 void *property_value, size_t property_value_sz); 318 319extern struct mshv_root mshv_root; 320extern enum hv_scheduler_type hv_scheduler_type; 321extern u8 * __percpu *hv_synic_eventring_tail; 322 323struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages, 324 u64 uaddr, u32 flags); 325int mshv_region_share(struct mshv_mem_region *region); 326int mshv_region_unshare(struct mshv_mem_region *region); 327int mshv_region_map(struct mshv_mem_region *region); 328void mshv_region_invalidate(struct mshv_mem_region *region); 329int mshv_region_pin(struct mshv_mem_region *region); 330void mshv_region_put(struct mshv_mem_region *region); 331int mshv_region_get(struct mshv_mem_region *region); 332bool mshv_region_handle_gfn_fault(struct mshv_mem_region *region, u64 gfn); 333void mshv_region_movable_fini(struct mshv_mem_region *region); 334bool mshv_region_movable_init(struct mshv_mem_region *region); 335 336#endif /* _MSHV_ROOT_H_ */