Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v4.7-rc4 721 lines 18 kB view raw
1/* 2 * 3 * Copyright (c) 2011, Microsoft Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 16 * Place - Suite 330, Boston, MA 02111-1307 USA. 17 * 18 * Authors: 19 * Haiyang Zhang <haiyangz@microsoft.com> 20 * Hank Janssen <hjanssen@microsoft.com> 21 * K. Y. Srinivasan <kys@microsoft.com> 22 * 23 */ 24 25#ifndef _HYPERV_VMBUS_H 26#define _HYPERV_VMBUS_H 27 28#include <linux/list.h> 29#include <asm/sync_bitops.h> 30#include <linux/atomic.h> 31#include <linux/hyperv.h> 32 33/* 34 * Timeout for services such as KVP and fcopy. 35 */ 36#define HV_UTIL_TIMEOUT 30 37 38/* 39 * Timeout for guest-host handshake for services. 40 */ 41#define HV_UTIL_NEGO_TIMEOUT 60 42 43/* 44 * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent 45 * is set by CPUID(HVCPUID_VERSION_FEATURES). 46 */ 47enum hv_cpuid_function { 48 HVCPUID_VERSION_FEATURES = 0x00000001, 49 HVCPUID_VENDOR_MAXFUNCTION = 0x40000000, 50 HVCPUID_INTERFACE = 0x40000001, 51 52 /* 53 * The remaining functions depend on the value of 54 * HVCPUID_INTERFACE 55 */ 56 HVCPUID_VERSION = 0x40000002, 57 HVCPUID_FEATURES = 0x40000003, 58 HVCPUID_ENLIGHTENMENT_INFO = 0x40000004, 59 HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005, 60}; 61 62#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE 0x400 63 64#define HV_X64_MSR_CRASH_P0 0x40000100 65#define HV_X64_MSR_CRASH_P1 0x40000101 66#define HV_X64_MSR_CRASH_P2 0x40000102 67#define HV_X64_MSR_CRASH_P3 0x40000103 68#define HV_X64_MSR_CRASH_P4 0x40000104 69#define HV_X64_MSR_CRASH_CTL 0x40000105 70 71#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63) 72 73/* Define version of the synthetic interrupt controller. */ 74#define HV_SYNIC_VERSION (1) 75 76#define HV_ANY_VP (0xFFFFFFFF) 77 78/* Define synthetic interrupt controller flag constants. */ 79#define HV_EVENT_FLAGS_COUNT (256 * 8) 80#define HV_EVENT_FLAGS_BYTE_COUNT (256) 81#define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(u32)) 82 83/* Define invalid partition identifier. */ 84#define HV_PARTITION_ID_INVALID ((u64)0x0) 85 86/* Define port type. */ 87enum hv_port_type { 88 HVPORT_MSG = 1, 89 HVPORT_EVENT = 2, 90 HVPORT_MONITOR = 3 91}; 92 93/* Define port information structure. */ 94struct hv_port_info { 95 enum hv_port_type port_type; 96 u32 padding; 97 union { 98 struct { 99 u32 target_sint; 100 u32 target_vp; 101 u64 rsvdz; 102 } message_port_info; 103 struct { 104 u32 target_sint; 105 u32 target_vp; 106 u16 base_flag_number; 107 u16 flag_count; 108 u32 rsvdz; 109 } event_port_info; 110 struct { 111 u64 monitor_address; 112 u64 rsvdz; 113 } monitor_port_info; 114 }; 115}; 116 117struct hv_connection_info { 118 enum hv_port_type port_type; 119 u32 padding; 120 union { 121 struct { 122 u64 rsvdz; 123 } message_connection_info; 124 struct { 125 u64 rsvdz; 126 } event_connection_info; 127 struct { 128 u64 monitor_address; 129 } monitor_connection_info; 130 }; 131}; 132 133/* 134 * Timer configuration register. 135 */ 136union hv_timer_config { 137 u64 as_uint64; 138 struct { 139 u64 enable:1; 140 u64 periodic:1; 141 u64 lazy:1; 142 u64 auto_enable:1; 143 u64 reserved_z0:12; 144 u64 sintx:4; 145 u64 reserved_z1:44; 146 }; 147}; 148 149/* Define the number of message buffers associated with each port. */ 150#define HV_PORT_MESSAGE_BUFFER_COUNT (16) 151 152/* Define the synthetic interrupt controller event flags format. */ 153union hv_synic_event_flags { 154 u8 flags8[HV_EVENT_FLAGS_BYTE_COUNT]; 155 u32 flags32[HV_EVENT_FLAGS_DWORD_COUNT]; 156}; 157 158/* Define the synthetic interrupt flags page layout. */ 159struct hv_synic_event_flags_page { 160 union hv_synic_event_flags sintevent_flags[HV_SYNIC_SINT_COUNT]; 161}; 162 163/* Define SynIC control register. */ 164union hv_synic_scontrol { 165 u64 as_uint64; 166 struct { 167 u64 enable:1; 168 u64 reserved:63; 169 }; 170}; 171 172/* Define synthetic interrupt source. */ 173union hv_synic_sint { 174 u64 as_uint64; 175 struct { 176 u64 vector:8; 177 u64 reserved1:8; 178 u64 masked:1; 179 u64 auto_eoi:1; 180 u64 reserved2:46; 181 }; 182}; 183 184/* Define the format of the SIMP register */ 185union hv_synic_simp { 186 u64 as_uint64; 187 struct { 188 u64 simp_enabled:1; 189 u64 preserved:11; 190 u64 base_simp_gpa:52; 191 }; 192}; 193 194/* Define the format of the SIEFP register */ 195union hv_synic_siefp { 196 u64 as_uint64; 197 struct { 198 u64 siefp_enabled:1; 199 u64 preserved:11; 200 u64 base_siefp_gpa:52; 201 }; 202}; 203 204/* Definitions for the monitored notification facility */ 205union hv_monitor_trigger_group { 206 u64 as_uint64; 207 struct { 208 u32 pending; 209 u32 armed; 210 }; 211}; 212 213struct hv_monitor_parameter { 214 union hv_connection_id connectionid; 215 u16 flagnumber; 216 u16 rsvdz; 217}; 218 219union hv_monitor_trigger_state { 220 u32 asu32; 221 222 struct { 223 u32 group_enable:4; 224 u32 rsvdz:28; 225 }; 226}; 227 228/* struct hv_monitor_page Layout */ 229/* ------------------------------------------------------ */ 230/* | 0 | TriggerState (4 bytes) | Rsvd1 (4 bytes) | */ 231/* | 8 | TriggerGroup[0] | */ 232/* | 10 | TriggerGroup[1] | */ 233/* | 18 | TriggerGroup[2] | */ 234/* | 20 | TriggerGroup[3] | */ 235/* | 28 | Rsvd2[0] | */ 236/* | 30 | Rsvd2[1] | */ 237/* | 38 | Rsvd2[2] | */ 238/* | 40 | NextCheckTime[0][0] | NextCheckTime[0][1] | */ 239/* | ... | */ 240/* | 240 | Latency[0][0..3] | */ 241/* | 340 | Rsvz3[0] | */ 242/* | 440 | Parameter[0][0] | */ 243/* | 448 | Parameter[0][1] | */ 244/* | ... | */ 245/* | 840 | Rsvd4[0] | */ 246/* ------------------------------------------------------ */ 247struct hv_monitor_page { 248 union hv_monitor_trigger_state trigger_state; 249 u32 rsvdz1; 250 251 union hv_monitor_trigger_group trigger_group[4]; 252 u64 rsvdz2[3]; 253 254 s32 next_checktime[4][32]; 255 256 u16 latency[4][32]; 257 u64 rsvdz3[32]; 258 259 struct hv_monitor_parameter parameter[4][32]; 260 261 u8 rsvdz4[1984]; 262}; 263 264/* Definition of the hv_post_message hypercall input structure. */ 265struct hv_input_post_message { 266 union hv_connection_id connectionid; 267 u32 reserved; 268 u32 message_type; 269 u32 payload_size; 270 u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; 271}; 272 273/* 274 * Versioning definitions used for guests reporting themselves to the 275 * hypervisor, and visa versa. 276 */ 277 278/* Version info reported by guest OS's */ 279enum hv_guest_os_vendor { 280 HVGUESTOS_VENDOR_MICROSOFT = 0x0001 281}; 282 283enum hv_guest_os_microsoft_ids { 284 HVGUESTOS_MICROSOFT_UNDEFINED = 0x00, 285 HVGUESTOS_MICROSOFT_MSDOS = 0x01, 286 HVGUESTOS_MICROSOFT_WINDOWS3X = 0x02, 287 HVGUESTOS_MICROSOFT_WINDOWS9X = 0x03, 288 HVGUESTOS_MICROSOFT_WINDOWSNT = 0x04, 289 HVGUESTOS_MICROSOFT_WINDOWSCE = 0x05 290}; 291 292/* 293 * Declare the MSR used to identify the guest OS. 294 */ 295#define HV_X64_MSR_GUEST_OS_ID 0x40000000 296 297union hv_x64_msr_guest_os_id_contents { 298 u64 as_uint64; 299 struct { 300 u64 build_number:16; 301 u64 service_version:8; /* Service Pack, etc. */ 302 u64 minor_version:8; 303 u64 major_version:8; 304 u64 os_id:8; /* enum hv_guest_os_microsoft_ids (if Vendor=MS) */ 305 u64 vendor_id:16; /* enum hv_guest_os_vendor */ 306 }; 307}; 308 309/* 310 * Declare the MSR used to setup pages used to communicate with the hypervisor. 311 */ 312#define HV_X64_MSR_HYPERCALL 0x40000001 313 314union hv_x64_msr_hypercall_contents { 315 u64 as_uint64; 316 struct { 317 u64 enable:1; 318 u64 reserved:11; 319 u64 guest_physical_address:52; 320 }; 321}; 322 323 324enum { 325 VMBUS_MESSAGE_CONNECTION_ID = 1, 326 VMBUS_MESSAGE_PORT_ID = 1, 327 VMBUS_EVENT_CONNECTION_ID = 2, 328 VMBUS_EVENT_PORT_ID = 2, 329 VMBUS_MONITOR_CONNECTION_ID = 3, 330 VMBUS_MONITOR_PORT_ID = 3, 331 VMBUS_MESSAGE_SINT = 2, 332}; 333 334/* #defines */ 335 336#define HV_PRESENT_BIT 0x80000000 337 338/* 339 * The guest OS needs to register the guest ID with the hypervisor. 340 * The guest ID is a 64 bit entity and the structure of this ID is 341 * specified in the Hyper-V specification: 342 * 343 * http://msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx 344 * 345 * While the current guideline does not specify how Linux guest ID(s) 346 * need to be generated, our plan is to publish the guidelines for 347 * Linux and other guest operating systems that currently are hosted 348 * on Hyper-V. The implementation here conforms to this yet 349 * unpublished guidelines. 350 * 351 * 352 * Bit(s) 353 * 63 - Indicates if the OS is Open Source or not; 1 is Open Source 354 * 62:56 - Os Type; Linux is 0x100 355 * 55:48 - Distro specific identification 356 * 47:16 - Linux kernel version number 357 * 15:0 - Distro specific identification 358 * 359 * 360 */ 361 362#define HV_LINUX_VENDOR_ID 0x8100 363 364/* 365 * Generate the guest ID based on the guideline described above. 366 */ 367 368static inline __u64 generate_guest_id(__u8 d_info1, __u32 kernel_version, 369 __u16 d_info2) 370{ 371 __u64 guest_id = 0; 372 373 guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 48); 374 guest_id |= (((__u64)(d_info1)) << 48); 375 guest_id |= (((__u64)(kernel_version)) << 16); 376 guest_id |= ((__u64)(d_info2)); 377 378 return guest_id; 379} 380 381 382#define HV_CPU_POWER_MANAGEMENT (1 << 0) 383#define HV_RECOMMENDATIONS_MAX 4 384 385#define HV_X64_MAX 5 386#define HV_CAPS_MAX 8 387 388 389#define HV_HYPERCALL_PARAM_ALIGN sizeof(u64) 390 391 392/* Service definitions */ 393 394#define HV_SERVICE_PARENT_PORT (0) 395#define HV_SERVICE_PARENT_CONNECTION (0) 396 397#define HV_SERVICE_CONNECT_RESPONSE_SUCCESS (0) 398#define HV_SERVICE_CONNECT_RESPONSE_INVALID_PARAMETER (1) 399#define HV_SERVICE_CONNECT_RESPONSE_UNKNOWN_SERVICE (2) 400#define HV_SERVICE_CONNECT_RESPONSE_CONNECTION_REJECTED (3) 401 402#define HV_SERVICE_CONNECT_REQUEST_MESSAGE_ID (1) 403#define HV_SERVICE_CONNECT_RESPONSE_MESSAGE_ID (2) 404#define HV_SERVICE_DISCONNECT_REQUEST_MESSAGE_ID (3) 405#define HV_SERVICE_DISCONNECT_RESPONSE_MESSAGE_ID (4) 406#define HV_SERVICE_MAX_MESSAGE_ID (4) 407 408#define HV_SERVICE_PROTOCOL_VERSION (0x0010) 409#define HV_CONNECT_PAYLOAD_BYTE_COUNT 64 410 411/* #define VMBUS_REVISION_NUMBER 6 */ 412 413/* Our local vmbus's port and connection id. Anything >0 is fine */ 414/* #define VMBUS_PORT_ID 11 */ 415 416/* 628180B8-308D-4c5e-B7DB-1BEB62E62EF4 */ 417static const uuid_le VMBUS_SERVICE_ID = { 418 .b = { 419 0xb8, 0x80, 0x81, 0x62, 0x8d, 0x30, 0x5e, 0x4c, 420 0xb7, 0xdb, 0x1b, 0xeb, 0x62, 0xe6, 0x2e, 0xf4 421 }, 422}; 423 424 425 426struct hv_context { 427 /* We only support running on top of Hyper-V 428 * So at this point this really can only contain the Hyper-V ID 429 */ 430 u64 guestid; 431 432 void *hypercall_page; 433 void *tsc_page; 434 435 bool synic_initialized; 436 437 void *synic_message_page[NR_CPUS]; 438 void *synic_event_page[NR_CPUS]; 439 /* 440 * Hypervisor's notion of virtual processor ID is different from 441 * Linux' notion of CPU ID. This information can only be retrieved 442 * in the context of the calling CPU. Setup a map for easy access 443 * to this information: 444 * 445 * vp_index[a] is the Hyper-V's processor ID corresponding to 446 * Linux cpuid 'a'. 447 */ 448 u32 vp_index[NR_CPUS]; 449 /* 450 * Starting with win8, we can take channel interrupts on any CPU; 451 * we will manage the tasklet that handles events messages on a per CPU 452 * basis. 453 */ 454 struct tasklet_struct *event_dpc[NR_CPUS]; 455 struct tasklet_struct *msg_dpc[NR_CPUS]; 456 /* 457 * To optimize the mapping of relid to channel, maintain 458 * per-cpu list of the channels based on their CPU affinity. 459 */ 460 struct list_head percpu_list[NR_CPUS]; 461 /* 462 * buffer to post messages to the host. 463 */ 464 void *post_msg_page[NR_CPUS]; 465 /* 466 * Support PV clockevent device. 467 */ 468 struct clock_event_device *clk_evt[NR_CPUS]; 469 /* 470 * To manage allocations in a NUMA node. 471 * Array indexed by numa node ID. 472 */ 473 struct cpumask *hv_numa_map; 474}; 475 476extern struct hv_context hv_context; 477 478struct ms_hyperv_tsc_page { 479 volatile u32 tsc_sequence; 480 u32 reserved1; 481 volatile u64 tsc_scale; 482 volatile s64 tsc_offset; 483 u64 reserved2[509]; 484}; 485 486struct hv_ring_buffer_debug_info { 487 u32 current_interrupt_mask; 488 u32 current_read_index; 489 u32 current_write_index; 490 u32 bytes_avail_toread; 491 u32 bytes_avail_towrite; 492}; 493 494/* Hv Interface */ 495 496extern int hv_init(void); 497 498extern void hv_cleanup(void); 499 500extern int hv_post_message(union hv_connection_id connection_id, 501 enum hv_message_type message_type, 502 void *payload, size_t payload_size); 503 504extern int hv_synic_alloc(void); 505 506extern void hv_synic_free(void); 507 508extern void hv_synic_init(void *irqarg); 509 510extern void hv_synic_cleanup(void *arg); 511 512extern void hv_synic_clockevents_cleanup(void); 513 514/* 515 * Host version information. 516 */ 517extern unsigned int host_info_eax; 518extern unsigned int host_info_ebx; 519extern unsigned int host_info_ecx; 520extern unsigned int host_info_edx; 521 522/* Interface */ 523 524 525int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, void *buffer, 526 u32 buflen); 527 528void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); 529 530int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info, 531 struct kvec *kv_list, 532 u32 kv_count, bool *signal, bool lock); 533 534int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, 535 void *buffer, u32 buflen, u32 *buffer_actual_len, 536 u64 *requestid, bool *signal, bool raw); 537 538void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info, 539 struct hv_ring_buffer_debug_info *debug_info); 540 541void hv_begin_read(struct hv_ring_buffer_info *rbi); 542 543u32 hv_end_read(struct hv_ring_buffer_info *rbi); 544 545/* 546 * Maximum channels is determined by the size of the interrupt page 547 * which is PAGE_SIZE. 1/2 of PAGE_SIZE is for send endpoint interrupt 548 * and the other is receive endpoint interrupt 549 */ 550#define MAX_NUM_CHANNELS ((PAGE_SIZE >> 1) << 3) /* 16348 channels */ 551 552/* The value here must be in multiple of 32 */ 553/* TODO: Need to make this configurable */ 554#define MAX_NUM_CHANNELS_SUPPORTED 256 555 556 557enum vmbus_connect_state { 558 DISCONNECTED, 559 CONNECTING, 560 CONNECTED, 561 DISCONNECTING 562}; 563 564#define MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT 565 566struct vmbus_connection { 567 enum vmbus_connect_state conn_state; 568 569 atomic_t next_gpadl_handle; 570 571 struct completion unload_event; 572 /* 573 * Represents channel interrupts. Each bit position represents a 574 * channel. When a channel sends an interrupt via VMBUS, it finds its 575 * bit in the sendInterruptPage, set it and calls Hv to generate a port 576 * event. The other end receives the port event and parse the 577 * recvInterruptPage to see which bit is set 578 */ 579 void *int_page; 580 void *send_int_page; 581 void *recv_int_page; 582 583 /* 584 * 2 pages - 1st page for parent->child notification and 2nd 585 * is child->parent notification 586 */ 587 struct hv_monitor_page *monitor_pages[2]; 588 struct list_head chn_msg_list; 589 spinlock_t channelmsg_lock; 590 591 /* List of channels */ 592 struct list_head chn_list; 593 struct mutex channel_mutex; 594 595 struct workqueue_struct *work_queue; 596}; 597 598 599struct vmbus_msginfo { 600 /* Bookkeeping stuff */ 601 struct list_head msglist_entry; 602 603 /* The message itself */ 604 unsigned char msg[0]; 605}; 606 607 608extern struct vmbus_connection vmbus_connection; 609 610enum vmbus_message_handler_type { 611 /* The related handler can sleep. */ 612 VMHT_BLOCKING = 0, 613 614 /* The related handler must NOT sleep. */ 615 VMHT_NON_BLOCKING = 1, 616}; 617 618struct vmbus_channel_message_table_entry { 619 enum vmbus_channel_message_type message_type; 620 enum vmbus_message_handler_type handler_type; 621 void (*message_handler)(struct vmbus_channel_message_header *msg); 622}; 623 624extern struct vmbus_channel_message_table_entry 625 channel_message_table[CHANNELMSG_COUNT]; 626 627/* Free the message slot and signal end-of-message if required */ 628static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) 629{ 630 /* 631 * On crash we're reading some other CPU's message page and we need 632 * to be careful: this other CPU may already had cleared the header 633 * and the host may already had delivered some other message there. 634 * In case we blindly write msg->header.message_type we're going 635 * to lose it. We can still lose a message of the same type but 636 * we count on the fact that there can only be one 637 * CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages 638 * on crash. 639 */ 640 if (cmpxchg(&msg->header.message_type, old_msg_type, 641 HVMSG_NONE) != old_msg_type) 642 return; 643 644 /* 645 * Make sure the write to MessageType (ie set to 646 * HVMSG_NONE) happens before we read the 647 * MessagePending and EOMing. Otherwise, the EOMing 648 * will not deliver any more messages since there is 649 * no empty slot 650 */ 651 mb(); 652 653 if (msg->header.message_flags.msg_pending) { 654 /* 655 * This will cause message queue rescan to 656 * possibly deliver another msg from the 657 * hypervisor 658 */ 659 wrmsrl(HV_X64_MSR_EOM, 0); 660 } 661} 662 663/* General vmbus interface */ 664 665struct hv_device *vmbus_device_create(const uuid_le *type, 666 const uuid_le *instance, 667 struct vmbus_channel *channel); 668 669int vmbus_device_register(struct hv_device *child_device_obj); 670void vmbus_device_unregister(struct hv_device *device_obj); 671 672/* static void */ 673/* VmbusChildDeviceDestroy( */ 674/* struct hv_device *); */ 675 676struct vmbus_channel *relid2channel(u32 relid); 677 678void vmbus_free_channels(void); 679 680/* Connection interface */ 681 682int vmbus_connect(void); 683void vmbus_disconnect(void); 684 685int vmbus_post_msg(void *buffer, size_t buflen); 686 687void vmbus_on_event(unsigned long data); 688void vmbus_on_msg_dpc(unsigned long data); 689 690int hv_kvp_init(struct hv_util_service *); 691void hv_kvp_deinit(void); 692void hv_kvp_onchannelcallback(void *); 693 694int hv_vss_init(struct hv_util_service *); 695void hv_vss_deinit(void); 696void hv_vss_onchannelcallback(void *); 697 698int hv_fcopy_init(struct hv_util_service *); 699void hv_fcopy_deinit(void); 700void hv_fcopy_onchannelcallback(void *); 701void vmbus_initiate_unload(bool crash); 702 703static inline void hv_poll_channel(struct vmbus_channel *channel, 704 void (*cb)(void *)) 705{ 706 if (!channel) 707 return; 708 709 smp_call_function_single(channel->target_cpu, cb, channel, true); 710} 711 712enum hvutil_device_state { 713 HVUTIL_DEVICE_INIT = 0, /* driver is loaded, waiting for userspace */ 714 HVUTIL_READY, /* userspace is registered */ 715 HVUTIL_HOSTMSG_RECEIVED, /* message from the host was received */ 716 HVUTIL_USERSPACE_REQ, /* request to userspace was sent */ 717 HVUTIL_USERSPACE_RECV, /* reply from userspace was received */ 718 HVUTIL_DEVICE_DYING, /* driver unload is in progress */ 719}; 720 721#endif /* _HYPERV_VMBUS_H */