Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.27-rc7 1637 lines 44 kB view raw
1#ifndef __ASM_PARAVIRT_H 2#define __ASM_PARAVIRT_H 3/* Various instructions on x86 need to be replaced for 4 * para-virtualization: those hooks are defined here. */ 5 6#ifdef CONFIG_PARAVIRT 7#include <asm/page.h> 8#include <asm/asm.h> 9 10/* Bitmask of what can be clobbered: usually at least eax. */ 11#define CLBR_NONE 0 12#define CLBR_EAX (1 << 0) 13#define CLBR_ECX (1 << 1) 14#define CLBR_EDX (1 << 2) 15 16#ifdef CONFIG_X86_64 17#define CLBR_RSI (1 << 3) 18#define CLBR_RDI (1 << 4) 19#define CLBR_R8 (1 << 5) 20#define CLBR_R9 (1 << 6) 21#define CLBR_R10 (1 << 7) 22#define CLBR_R11 (1 << 8) 23#define CLBR_ANY ((1 << 9) - 1) 24#include <asm/desc_defs.h> 25#else 26/* CLBR_ANY should match all regs platform has. For i386, that's just it */ 27#define CLBR_ANY ((1 << 3) - 1) 28#endif /* X86_64 */ 29 30#ifndef __ASSEMBLY__ 31#include <linux/types.h> 32#include <linux/cpumask.h> 33#include <asm/kmap_types.h> 34#include <asm/desc_defs.h> 35 36struct page; 37struct thread_struct; 38struct desc_ptr; 39struct tss_struct; 40struct mm_struct; 41struct desc_struct; 42 43/* general info */ 44struct pv_info { 45 unsigned int kernel_rpl; 46 int shared_kernel_pmd; 47 int paravirt_enabled; 48 const char *name; 49}; 50 51struct pv_init_ops { 52 /* 53 * Patch may replace one of the defined code sequences with 54 * arbitrary code, subject to the same register constraints. 55 * This generally means the code is not free to clobber any 56 * registers other than EAX. The patch function should return 57 * the number of bytes of code generated, as we nop pad the 58 * rest in generic code. 59 */ 60 unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, 61 unsigned long addr, unsigned len); 62 63 /* Basic arch-specific setup */ 64 void (*arch_setup)(void); 65 char *(*memory_setup)(void); 66 void (*post_allocator_init)(void); 67 68 /* Print a banner to identify the environment */ 69 void (*banner)(void); 70}; 71 72 73struct pv_lazy_ops { 74 /* Set deferred update mode, used for batching operations. */ 75 void (*enter)(void); 76 void (*leave)(void); 77}; 78 79struct pv_time_ops { 80 void (*time_init)(void); 81 82 /* Set and set time of day */ 83 unsigned long (*get_wallclock)(void); 84 int (*set_wallclock)(unsigned long); 85 86 unsigned long long (*sched_clock)(void); 87 unsigned long (*get_tsc_khz)(void); 88}; 89 90struct pv_cpu_ops { 91 /* hooks for various privileged instructions */ 92 unsigned long (*get_debugreg)(int regno); 93 void (*set_debugreg)(int regno, unsigned long value); 94 95 void (*clts)(void); 96 97 unsigned long (*read_cr0)(void); 98 void (*write_cr0)(unsigned long); 99 100 unsigned long (*read_cr4_safe)(void); 101 unsigned long (*read_cr4)(void); 102 void (*write_cr4)(unsigned long); 103 104#ifdef CONFIG_X86_64 105 unsigned long (*read_cr8)(void); 106 void (*write_cr8)(unsigned long); 107#endif 108 109 /* Segment descriptor handling */ 110 void (*load_tr_desc)(void); 111 void (*load_gdt)(const struct desc_ptr *); 112 void (*load_idt)(const struct desc_ptr *); 113 void (*store_gdt)(struct desc_ptr *); 114 void (*store_idt)(struct desc_ptr *); 115 void (*set_ldt)(const void *desc, unsigned entries); 116 unsigned long (*store_tr)(void); 117 void (*load_tls)(struct thread_struct *t, unsigned int cpu); 118#ifdef CONFIG_X86_64 119 void (*load_gs_index)(unsigned int idx); 120#endif 121 void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, 122 const void *desc); 123 void (*write_gdt_entry)(struct desc_struct *, 124 int entrynum, const void *desc, int size); 125 void (*write_idt_entry)(gate_desc *, 126 int entrynum, const gate_desc *gate); 127 void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); 128 129 void (*set_iopl_mask)(unsigned mask); 130 131 void (*wbinvd)(void); 132 void (*io_delay)(void); 133 134 /* cpuid emulation, mostly so that caps bits can be disabled */ 135 void (*cpuid)(unsigned int *eax, unsigned int *ebx, 136 unsigned int *ecx, unsigned int *edx); 137 138 /* MSR, PMC and TSR operations. 139 err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ 140 u64 (*read_msr)(unsigned int msr, int *err); 141 int (*write_msr)(unsigned int msr, unsigned low, unsigned high); 142 143 u64 (*read_tsc)(void); 144 u64 (*read_pmc)(int counter); 145 unsigned long long (*read_tscp)(unsigned int *aux); 146 147 /* 148 * Atomically enable interrupts and return to userspace. This 149 * is only ever used to return to 32-bit processes; in a 150 * 64-bit kernel, it's used for 32-on-64 compat processes, but 151 * never native 64-bit processes. (Jump, not call.) 152 */ 153 void (*irq_enable_sysexit)(void); 154 155 /* 156 * Switch to usermode gs and return to 64-bit usermode using 157 * sysret. Only used in 64-bit kernels to return to 64-bit 158 * processes. Usermode register state, including %rsp, must 159 * already be restored. 160 */ 161 void (*usergs_sysret64)(void); 162 163 /* 164 * Switch to usermode gs and return to 32-bit usermode using 165 * sysret. Used to return to 32-on-64 compat processes. 166 * Other usermode register state, including %esp, must already 167 * be restored. 168 */ 169 void (*usergs_sysret32)(void); 170 171 /* Normal iret. Jump to this with the standard iret stack 172 frame set up. */ 173 void (*iret)(void); 174 175 void (*swapgs)(void); 176 177 struct pv_lazy_ops lazy_mode; 178}; 179 180struct pv_irq_ops { 181 void (*init_IRQ)(void); 182 183 /* 184 * Get/set interrupt state. save_fl and restore_fl are only 185 * expected to use X86_EFLAGS_IF; all other bits 186 * returned from save_fl are undefined, and may be ignored by 187 * restore_fl. 188 */ 189 unsigned long (*save_fl)(void); 190 void (*restore_fl)(unsigned long); 191 void (*irq_disable)(void); 192 void (*irq_enable)(void); 193 void (*safe_halt)(void); 194 void (*halt)(void); 195 196#ifdef CONFIG_X86_64 197 void (*adjust_exception_frame)(void); 198#endif 199}; 200 201struct pv_apic_ops { 202#ifdef CONFIG_X86_LOCAL_APIC 203 /* 204 * Direct APIC operations, principally for VMI. Ideally 205 * these shouldn't be in this interface. 206 */ 207 void (*apic_write)(unsigned long reg, u32 v); 208 u32 (*apic_read)(unsigned long reg); 209 void (*setup_boot_clock)(void); 210 void (*setup_secondary_clock)(void); 211 212 void (*startup_ipi_hook)(int phys_apicid, 213 unsigned long start_eip, 214 unsigned long start_esp); 215#endif 216}; 217 218struct pv_mmu_ops { 219 /* 220 * Called before/after init_mm pagetable setup. setup_start 221 * may reset %cr3, and may pre-install parts of the pagetable; 222 * pagetable setup is expected to preserve any existing 223 * mapping. 224 */ 225 void (*pagetable_setup_start)(pgd_t *pgd_base); 226 void (*pagetable_setup_done)(pgd_t *pgd_base); 227 228 unsigned long (*read_cr2)(void); 229 void (*write_cr2)(unsigned long); 230 231 unsigned long (*read_cr3)(void); 232 void (*write_cr3)(unsigned long); 233 234 /* 235 * Hooks for intercepting the creation/use/destruction of an 236 * mm_struct. 237 */ 238 void (*activate_mm)(struct mm_struct *prev, 239 struct mm_struct *next); 240 void (*dup_mmap)(struct mm_struct *oldmm, 241 struct mm_struct *mm); 242 void (*exit_mmap)(struct mm_struct *mm); 243 244 245 /* TLB operations */ 246 void (*flush_tlb_user)(void); 247 void (*flush_tlb_kernel)(void); 248 void (*flush_tlb_single)(unsigned long addr); 249 void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, 250 unsigned long va); 251 252 /* Hooks for allocating and freeing a pagetable top-level */ 253 int (*pgd_alloc)(struct mm_struct *mm); 254 void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); 255 256 /* 257 * Hooks for allocating/releasing pagetable pages when they're 258 * attached to a pagetable 259 */ 260 void (*alloc_pte)(struct mm_struct *mm, u32 pfn); 261 void (*alloc_pmd)(struct mm_struct *mm, u32 pfn); 262 void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); 263 void (*alloc_pud)(struct mm_struct *mm, u32 pfn); 264 void (*release_pte)(u32 pfn); 265 void (*release_pmd)(u32 pfn); 266 void (*release_pud)(u32 pfn); 267 268 /* Pagetable manipulation functions */ 269 void (*set_pte)(pte_t *ptep, pte_t pteval); 270 void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, 271 pte_t *ptep, pte_t pteval); 272 void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); 273 void (*pte_update)(struct mm_struct *mm, unsigned long addr, 274 pte_t *ptep); 275 void (*pte_update_defer)(struct mm_struct *mm, 276 unsigned long addr, pte_t *ptep); 277 278 pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, 279 pte_t *ptep); 280 void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, 281 pte_t *ptep, pte_t pte); 282 283 pteval_t (*pte_val)(pte_t); 284 pteval_t (*pte_flags)(pte_t); 285 pte_t (*make_pte)(pteval_t pte); 286 287 pgdval_t (*pgd_val)(pgd_t); 288 pgd_t (*make_pgd)(pgdval_t pgd); 289 290#if PAGETABLE_LEVELS >= 3 291#ifdef CONFIG_X86_PAE 292 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); 293 void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, 294 pte_t *ptep, pte_t pte); 295 void (*pte_clear)(struct mm_struct *mm, unsigned long addr, 296 pte_t *ptep); 297 void (*pmd_clear)(pmd_t *pmdp); 298 299#endif /* CONFIG_X86_PAE */ 300 301 void (*set_pud)(pud_t *pudp, pud_t pudval); 302 303 pmdval_t (*pmd_val)(pmd_t); 304 pmd_t (*make_pmd)(pmdval_t pmd); 305 306#if PAGETABLE_LEVELS == 4 307 pudval_t (*pud_val)(pud_t); 308 pud_t (*make_pud)(pudval_t pud); 309 310 void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); 311#endif /* PAGETABLE_LEVELS == 4 */ 312#endif /* PAGETABLE_LEVELS >= 3 */ 313 314#ifdef CONFIG_HIGHPTE 315 void *(*kmap_atomic_pte)(struct page *page, enum km_type type); 316#endif 317 318 struct pv_lazy_ops lazy_mode; 319 320 /* dom0 ops */ 321 322 /* Sometimes the physical address is a pfn, and sometimes its 323 an mfn. We can tell which is which from the index. */ 324 void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, 325 unsigned long phys, pgprot_t flags); 326}; 327 328struct raw_spinlock; 329struct pv_lock_ops { 330 int (*spin_is_locked)(struct raw_spinlock *lock); 331 int (*spin_is_contended)(struct raw_spinlock *lock); 332 void (*spin_lock)(struct raw_spinlock *lock); 333 int (*spin_trylock)(struct raw_spinlock *lock); 334 void (*spin_unlock)(struct raw_spinlock *lock); 335}; 336 337/* This contains all the paravirt structures: we get a convenient 338 * number for each function using the offset which we use to indicate 339 * what to patch. */ 340struct paravirt_patch_template { 341 struct pv_init_ops pv_init_ops; 342 struct pv_time_ops pv_time_ops; 343 struct pv_cpu_ops pv_cpu_ops; 344 struct pv_irq_ops pv_irq_ops; 345 struct pv_apic_ops pv_apic_ops; 346 struct pv_mmu_ops pv_mmu_ops; 347 struct pv_lock_ops pv_lock_ops; 348}; 349 350extern struct pv_info pv_info; 351extern struct pv_init_ops pv_init_ops; 352extern struct pv_time_ops pv_time_ops; 353extern struct pv_cpu_ops pv_cpu_ops; 354extern struct pv_irq_ops pv_irq_ops; 355extern struct pv_apic_ops pv_apic_ops; 356extern struct pv_mmu_ops pv_mmu_ops; 357extern struct pv_lock_ops pv_lock_ops; 358 359#define PARAVIRT_PATCH(x) \ 360 (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) 361 362#define paravirt_type(op) \ 363 [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ 364 [paravirt_opptr] "m" (op) 365#define paravirt_clobber(clobber) \ 366 [paravirt_clobber] "i" (clobber) 367 368/* 369 * Generate some code, and mark it as patchable by the 370 * apply_paravirt() alternate instruction patcher. 371 */ 372#define _paravirt_alt(insn_string, type, clobber) \ 373 "771:\n\t" insn_string "\n" "772:\n" \ 374 ".pushsection .parainstructions,\"a\"\n" \ 375 _ASM_ALIGN "\n" \ 376 _ASM_PTR " 771b\n" \ 377 " .byte " type "\n" \ 378 " .byte 772b-771b\n" \ 379 " .short " clobber "\n" \ 380 ".popsection\n" 381 382/* Generate patchable code, with the default asm parameters. */ 383#define paravirt_alt(insn_string) \ 384 _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") 385 386/* Simple instruction patching code. */ 387#define DEF_NATIVE(ops, name, code) \ 388 extern const char start_##ops##_##name[], end_##ops##_##name[]; \ 389 asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") 390 391unsigned paravirt_patch_nop(void); 392unsigned paravirt_patch_ignore(unsigned len); 393unsigned paravirt_patch_call(void *insnbuf, 394 const void *target, u16 tgt_clobbers, 395 unsigned long addr, u16 site_clobbers, 396 unsigned len); 397unsigned paravirt_patch_jmp(void *insnbuf, const void *target, 398 unsigned long addr, unsigned len); 399unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, 400 unsigned long addr, unsigned len); 401 402unsigned paravirt_patch_insns(void *insnbuf, unsigned len, 403 const char *start, const char *end); 404 405unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 406 unsigned long addr, unsigned len); 407 408int paravirt_disable_iospace(void); 409 410/* 411 * This generates an indirect call based on the operation type number. 412 * The type number, computed in PARAVIRT_PATCH, is derived from the 413 * offset into the paravirt_patch_template structure, and can therefore be 414 * freely converted back into a structure offset. 415 */ 416#define PARAVIRT_CALL "call *%[paravirt_opptr];" 417 418/* 419 * These macros are intended to wrap calls through one of the paravirt 420 * ops structs, so that they can be later identified and patched at 421 * runtime. 422 * 423 * Normally, a call to a pv_op function is a simple indirect call: 424 * (pv_op_struct.operations)(args...). 425 * 426 * Unfortunately, this is a relatively slow operation for modern CPUs, 427 * because it cannot necessarily determine what the destination 428 * address is. In this case, the address is a runtime constant, so at 429 * the very least we can patch the call to e a simple direct call, or 430 * ideally, patch an inline implementation into the callsite. (Direct 431 * calls are essentially free, because the call and return addresses 432 * are completely predictable.) 433 * 434 * For i386, these macros rely on the standard gcc "regparm(3)" calling 435 * convention, in which the first three arguments are placed in %eax, 436 * %edx, %ecx (in that order), and the remaining arguments are placed 437 * on the stack. All caller-save registers (eax,edx,ecx) are expected 438 * to be modified (either clobbered or used for return values). 439 * X86_64, on the other hand, already specifies a register-based calling 440 * conventions, returning at %rax, with parameteres going on %rdi, %rsi, 441 * %rdx, and %rcx. Note that for this reason, x86_64 does not need any 442 * special handling for dealing with 4 arguments, unlike i386. 443 * However, x86_64 also have to clobber all caller saved registers, which 444 * unfortunately, are quite a bit (r8 - r11) 445 * 446 * The call instruction itself is marked by placing its start address 447 * and size into the .parainstructions section, so that 448 * apply_paravirt() in arch/i386/kernel/alternative.c can do the 449 * appropriate patching under the control of the backend pv_init_ops 450 * implementation. 451 * 452 * Unfortunately there's no way to get gcc to generate the args setup 453 * for the call, and then allow the call itself to be generated by an 454 * inline asm. Because of this, we must do the complete arg setup and 455 * return value handling from within these macros. This is fairly 456 * cumbersome. 457 * 458 * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. 459 * It could be extended to more arguments, but there would be little 460 * to be gained from that. For each number of arguments, there are 461 * the two VCALL and CALL variants for void and non-void functions. 462 * 463 * When there is a return value, the invoker of the macro must specify 464 * the return type. The macro then uses sizeof() on that type to 465 * determine whether its a 32 or 64 bit value, and places the return 466 * in the right register(s) (just %eax for 32-bit, and %edx:%eax for 467 * 64-bit). For x86_64 machines, it just returns at %rax regardless of 468 * the return value size. 469 * 470 * 64-bit arguments are passed as a pair of adjacent 32-bit arguments 471 * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments 472 * in low,high order 473 * 474 * Small structures are passed and returned in registers. The macro 475 * calling convention can't directly deal with this, so the wrapper 476 * functions must do this. 477 * 478 * These PVOP_* macros are only defined within this header. This 479 * means that all uses must be wrapped in inline functions. This also 480 * makes sure the incoming and outgoing types are always correct. 481 */ 482#ifdef CONFIG_X86_32 483#define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx 484#define PVOP_CALL_ARGS PVOP_VCALL_ARGS 485#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ 486 "=c" (__ecx) 487#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS 488#define EXTRA_CLOBBERS 489#define VEXTRA_CLOBBERS 490#else 491#define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx 492#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax 493#define PVOP_VCALL_CLOBBERS "=D" (__edi), \ 494 "=S" (__esi), "=d" (__edx), \ 495 "=c" (__ecx) 496 497#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) 498 499#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" 500#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" 501#endif 502 503#ifdef CONFIG_PARAVIRT_DEBUG 504#define PVOP_TEST_NULL(op) BUG_ON(op == NULL) 505#else 506#define PVOP_TEST_NULL(op) ((void)op) 507#endif 508 509#define __PVOP_CALL(rettype, op, pre, post, ...) \ 510 ({ \ 511 rettype __ret; \ 512 PVOP_CALL_ARGS; \ 513 PVOP_TEST_NULL(op); \ 514 /* This is 32-bit specific, but is okay in 64-bit */ \ 515 /* since this condition will never hold */ \ 516 if (sizeof(rettype) > sizeof(unsigned long)) { \ 517 asm volatile(pre \ 518 paravirt_alt(PARAVIRT_CALL) \ 519 post \ 520 : PVOP_CALL_CLOBBERS \ 521 : paravirt_type(op), \ 522 paravirt_clobber(CLBR_ANY), \ 523 ##__VA_ARGS__ \ 524 : "memory", "cc" EXTRA_CLOBBERS); \ 525 __ret = (rettype)((((u64)__edx) << 32) | __eax); \ 526 } else { \ 527 asm volatile(pre \ 528 paravirt_alt(PARAVIRT_CALL) \ 529 post \ 530 : PVOP_CALL_CLOBBERS \ 531 : paravirt_type(op), \ 532 paravirt_clobber(CLBR_ANY), \ 533 ##__VA_ARGS__ \ 534 : "memory", "cc" EXTRA_CLOBBERS); \ 535 __ret = (rettype)__eax; \ 536 } \ 537 __ret; \ 538 }) 539#define __PVOP_VCALL(op, pre, post, ...) \ 540 ({ \ 541 PVOP_VCALL_ARGS; \ 542 PVOP_TEST_NULL(op); \ 543 asm volatile(pre \ 544 paravirt_alt(PARAVIRT_CALL) \ 545 post \ 546 : PVOP_VCALL_CLOBBERS \ 547 : paravirt_type(op), \ 548 paravirt_clobber(CLBR_ANY), \ 549 ##__VA_ARGS__ \ 550 : "memory", "cc" VEXTRA_CLOBBERS); \ 551 }) 552 553#define PVOP_CALL0(rettype, op) \ 554 __PVOP_CALL(rettype, op, "", "") 555#define PVOP_VCALL0(op) \ 556 __PVOP_VCALL(op, "", "") 557 558#define PVOP_CALL1(rettype, op, arg1) \ 559 __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1))) 560#define PVOP_VCALL1(op, arg1) \ 561 __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1))) 562 563#define PVOP_CALL2(rettype, op, arg1, arg2) \ 564 __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ 565 "1" ((unsigned long)(arg2))) 566#define PVOP_VCALL2(op, arg1, arg2) \ 567 __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ 568 "1" ((unsigned long)(arg2))) 569 570#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ 571 __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ 572 "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) 573#define PVOP_VCALL3(op, arg1, arg2, arg3) \ 574 __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ 575 "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) 576 577/* This is the only difference in x86_64. We can make it much simpler */ 578#ifdef CONFIG_X86_32 579#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ 580 __PVOP_CALL(rettype, op, \ 581 "push %[_arg4];", "lea 4(%%esp),%%esp;", \ 582 "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ 583 "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) 584#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ 585 __PVOP_VCALL(op, \ 586 "push %[_arg4];", "lea 4(%%esp),%%esp;", \ 587 "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ 588 "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) 589#else 590#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ 591 __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ 592 "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ 593 "3"((unsigned long)(arg4))) 594#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ 595 __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ 596 "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ 597 "3"((unsigned long)(arg4))) 598#endif 599 600static inline int paravirt_enabled(void) 601{ 602 return pv_info.paravirt_enabled; 603} 604 605static inline void load_sp0(struct tss_struct *tss, 606 struct thread_struct *thread) 607{ 608 PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); 609} 610 611#define ARCH_SETUP pv_init_ops.arch_setup(); 612static inline unsigned long get_wallclock(void) 613{ 614 return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock); 615} 616 617static inline int set_wallclock(unsigned long nowtime) 618{ 619 return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime); 620} 621 622static inline void (*choose_time_init(void))(void) 623{ 624 return pv_time_ops.time_init; 625} 626 627/* The paravirtualized CPUID instruction. */ 628static inline void __cpuid(unsigned int *eax, unsigned int *ebx, 629 unsigned int *ecx, unsigned int *edx) 630{ 631 PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx); 632} 633 634/* 635 * These special macros can be used to get or set a debugging register 636 */ 637static inline unsigned long paravirt_get_debugreg(int reg) 638{ 639 return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg); 640} 641#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) 642static inline void set_debugreg(unsigned long val, int reg) 643{ 644 PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val); 645} 646 647static inline void clts(void) 648{ 649 PVOP_VCALL0(pv_cpu_ops.clts); 650} 651 652static inline unsigned long read_cr0(void) 653{ 654 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0); 655} 656 657static inline void write_cr0(unsigned long x) 658{ 659 PVOP_VCALL1(pv_cpu_ops.write_cr0, x); 660} 661 662static inline unsigned long read_cr2(void) 663{ 664 return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2); 665} 666 667static inline void write_cr2(unsigned long x) 668{ 669 PVOP_VCALL1(pv_mmu_ops.write_cr2, x); 670} 671 672static inline unsigned long read_cr3(void) 673{ 674 return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3); 675} 676 677static inline void write_cr3(unsigned long x) 678{ 679 PVOP_VCALL1(pv_mmu_ops.write_cr3, x); 680} 681 682static inline unsigned long read_cr4(void) 683{ 684 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); 685} 686static inline unsigned long read_cr4_safe(void) 687{ 688 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); 689} 690 691static inline void write_cr4(unsigned long x) 692{ 693 PVOP_VCALL1(pv_cpu_ops.write_cr4, x); 694} 695 696#ifdef CONFIG_X86_64 697static inline unsigned long read_cr8(void) 698{ 699 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8); 700} 701 702static inline void write_cr8(unsigned long x) 703{ 704 PVOP_VCALL1(pv_cpu_ops.write_cr8, x); 705} 706#endif 707 708static inline void raw_safe_halt(void) 709{ 710 PVOP_VCALL0(pv_irq_ops.safe_halt); 711} 712 713static inline void halt(void) 714{ 715 PVOP_VCALL0(pv_irq_ops.safe_halt); 716} 717 718static inline void wbinvd(void) 719{ 720 PVOP_VCALL0(pv_cpu_ops.wbinvd); 721} 722 723#define get_kernel_rpl() (pv_info.kernel_rpl) 724 725static inline u64 paravirt_read_msr(unsigned msr, int *err) 726{ 727 return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); 728} 729static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) 730{ 731 return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); 732} 733 734/* These should all do BUG_ON(_err), but our headers are too tangled. */ 735#define rdmsr(msr, val1, val2) \ 736do { \ 737 int _err; \ 738 u64 _l = paravirt_read_msr(msr, &_err); \ 739 val1 = (u32)_l; \ 740 val2 = _l >> 32; \ 741} while (0) 742 743#define wrmsr(msr, val1, val2) \ 744do { \ 745 paravirt_write_msr(msr, val1, val2); \ 746} while (0) 747 748#define rdmsrl(msr, val) \ 749do { \ 750 int _err; \ 751 val = paravirt_read_msr(msr, &_err); \ 752} while (0) 753 754#define wrmsrl(msr, val) wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32) 755#define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b) 756 757/* rdmsr with exception handling */ 758#define rdmsr_safe(msr, a, b) \ 759({ \ 760 int _err; \ 761 u64 _l = paravirt_read_msr(msr, &_err); \ 762 (*a) = (u32)_l; \ 763 (*b) = _l >> 32; \ 764 _err; \ 765}) 766 767static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) 768{ 769 int err; 770 771 *p = paravirt_read_msr(msr, &err); 772 return err; 773} 774 775static inline u64 paravirt_read_tsc(void) 776{ 777 return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); 778} 779 780#define rdtscl(low) \ 781do { \ 782 u64 _l = paravirt_read_tsc(); \ 783 low = (int)_l; \ 784} while (0) 785 786#define rdtscll(val) (val = paravirt_read_tsc()) 787 788static inline unsigned long long paravirt_sched_clock(void) 789{ 790 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); 791} 792#define calibrate_tsc() (pv_time_ops.get_tsc_khz()) 793 794static inline unsigned long long paravirt_read_pmc(int counter) 795{ 796 return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter); 797} 798 799#define rdpmc(counter, low, high) \ 800do { \ 801 u64 _l = paravirt_read_pmc(counter); \ 802 low = (u32)_l; \ 803 high = _l >> 32; \ 804} while (0) 805 806static inline unsigned long long paravirt_rdtscp(unsigned int *aux) 807{ 808 return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); 809} 810 811#define rdtscp(low, high, aux) \ 812do { \ 813 int __aux; \ 814 unsigned long __val = paravirt_rdtscp(&__aux); \ 815 (low) = (u32)__val; \ 816 (high) = (u32)(__val >> 32); \ 817 (aux) = __aux; \ 818} while (0) 819 820#define rdtscpll(val, aux) \ 821do { \ 822 unsigned long __aux; \ 823 val = paravirt_rdtscp(&__aux); \ 824 (aux) = __aux; \ 825} while (0) 826 827static inline void load_TR_desc(void) 828{ 829 PVOP_VCALL0(pv_cpu_ops.load_tr_desc); 830} 831static inline void load_gdt(const struct desc_ptr *dtr) 832{ 833 PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr); 834} 835static inline void load_idt(const struct desc_ptr *dtr) 836{ 837 PVOP_VCALL1(pv_cpu_ops.load_idt, dtr); 838} 839static inline void set_ldt(const void *addr, unsigned entries) 840{ 841 PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); 842} 843static inline void store_gdt(struct desc_ptr *dtr) 844{ 845 PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr); 846} 847static inline void store_idt(struct desc_ptr *dtr) 848{ 849 PVOP_VCALL1(pv_cpu_ops.store_idt, dtr); 850} 851static inline unsigned long paravirt_store_tr(void) 852{ 853 return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr); 854} 855#define store_tr(tr) ((tr) = paravirt_store_tr()) 856static inline void load_TLS(struct thread_struct *t, unsigned cpu) 857{ 858 PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu); 859} 860 861#ifdef CONFIG_X86_64 862static inline void load_gs_index(unsigned int gs) 863{ 864 PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs); 865} 866#endif 867 868static inline void write_ldt_entry(struct desc_struct *dt, int entry, 869 const void *desc) 870{ 871 PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc); 872} 873 874static inline void write_gdt_entry(struct desc_struct *dt, int entry, 875 void *desc, int type) 876{ 877 PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type); 878} 879 880static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) 881{ 882 PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g); 883} 884static inline void set_iopl_mask(unsigned mask) 885{ 886 PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask); 887} 888 889/* The paravirtualized I/O functions */ 890static inline void slow_down_io(void) 891{ 892 pv_cpu_ops.io_delay(); 893#ifdef REALLY_SLOW_IO 894 pv_cpu_ops.io_delay(); 895 pv_cpu_ops.io_delay(); 896 pv_cpu_ops.io_delay(); 897#endif 898} 899 900#ifdef CONFIG_X86_LOCAL_APIC 901/* 902 * Basic functions accessing APICs. 903 */ 904static inline void apic_write(unsigned long reg, u32 v) 905{ 906 PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); 907} 908 909static inline u32 apic_read(unsigned long reg) 910{ 911 return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); 912} 913 914static inline void setup_boot_clock(void) 915{ 916 PVOP_VCALL0(pv_apic_ops.setup_boot_clock); 917} 918 919static inline void setup_secondary_clock(void) 920{ 921 PVOP_VCALL0(pv_apic_ops.setup_secondary_clock); 922} 923#endif 924 925static inline void paravirt_post_allocator_init(void) 926{ 927 if (pv_init_ops.post_allocator_init) 928 (*pv_init_ops.post_allocator_init)(); 929} 930 931static inline void paravirt_pagetable_setup_start(pgd_t *base) 932{ 933 (*pv_mmu_ops.pagetable_setup_start)(base); 934} 935 936static inline void paravirt_pagetable_setup_done(pgd_t *base) 937{ 938 (*pv_mmu_ops.pagetable_setup_done)(base); 939} 940 941#ifdef CONFIG_SMP 942static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, 943 unsigned long start_esp) 944{ 945 PVOP_VCALL3(pv_apic_ops.startup_ipi_hook, 946 phys_apicid, start_eip, start_esp); 947} 948#endif 949 950static inline void paravirt_activate_mm(struct mm_struct *prev, 951 struct mm_struct *next) 952{ 953 PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next); 954} 955 956static inline void arch_dup_mmap(struct mm_struct *oldmm, 957 struct mm_struct *mm) 958{ 959 PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm); 960} 961 962static inline void arch_exit_mmap(struct mm_struct *mm) 963{ 964 PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm); 965} 966 967static inline void __flush_tlb(void) 968{ 969 PVOP_VCALL0(pv_mmu_ops.flush_tlb_user); 970} 971static inline void __flush_tlb_global(void) 972{ 973 PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel); 974} 975static inline void __flush_tlb_single(unsigned long addr) 976{ 977 PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); 978} 979 980static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, 981 unsigned long va) 982{ 983 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); 984} 985 986static inline int paravirt_pgd_alloc(struct mm_struct *mm) 987{ 988 return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm); 989} 990 991static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) 992{ 993 PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd); 994} 995 996static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn) 997{ 998 PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); 999} 1000static inline void paravirt_release_pte(unsigned pfn) 1001{ 1002 PVOP_VCALL1(pv_mmu_ops.release_pte, pfn); 1003} 1004 1005static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned pfn) 1006{ 1007 PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); 1008} 1009 1010static inline void paravirt_alloc_pmd_clone(unsigned pfn, unsigned clonepfn, 1011 unsigned start, unsigned count) 1012{ 1013 PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count); 1014} 1015static inline void paravirt_release_pmd(unsigned pfn) 1016{ 1017 PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); 1018} 1019 1020static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned pfn) 1021{ 1022 PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn); 1023} 1024static inline void paravirt_release_pud(unsigned pfn) 1025{ 1026 PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); 1027} 1028 1029#ifdef CONFIG_HIGHPTE 1030static inline void *kmap_atomic_pte(struct page *page, enum km_type type) 1031{ 1032 unsigned long ret; 1033 ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type); 1034 return (void *)ret; 1035} 1036#endif 1037 1038static inline void pte_update(struct mm_struct *mm, unsigned long addr, 1039 pte_t *ptep) 1040{ 1041 PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); 1042} 1043 1044static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, 1045 pte_t *ptep) 1046{ 1047 PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); 1048} 1049 1050static inline pte_t __pte(pteval_t val) 1051{ 1052 pteval_t ret; 1053 1054 if (sizeof(pteval_t) > sizeof(long)) 1055 ret = PVOP_CALL2(pteval_t, 1056 pv_mmu_ops.make_pte, 1057 val, (u64)val >> 32); 1058 else 1059 ret = PVOP_CALL1(pteval_t, 1060 pv_mmu_ops.make_pte, 1061 val); 1062 1063 return (pte_t) { .pte = ret }; 1064} 1065 1066static inline pteval_t pte_val(pte_t pte) 1067{ 1068 pteval_t ret; 1069 1070 if (sizeof(pteval_t) > sizeof(long)) 1071 ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val, 1072 pte.pte, (u64)pte.pte >> 32); 1073 else 1074 ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val, 1075 pte.pte); 1076 1077 return ret; 1078} 1079 1080static inline pteval_t pte_flags(pte_t pte) 1081{ 1082 pteval_t ret; 1083 1084 if (sizeof(pteval_t) > sizeof(long)) 1085 ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags, 1086 pte.pte, (u64)pte.pte >> 32); 1087 else 1088 ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, 1089 pte.pte); 1090 1091#ifdef CONFIG_PARAVIRT_DEBUG 1092 BUG_ON(ret & PTE_PFN_MASK); 1093#endif 1094 return ret; 1095} 1096 1097static inline pgd_t __pgd(pgdval_t val) 1098{ 1099 pgdval_t ret; 1100 1101 if (sizeof(pgdval_t) > sizeof(long)) 1102 ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd, 1103 val, (u64)val >> 32); 1104 else 1105 ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd, 1106 val); 1107 1108 return (pgd_t) { ret }; 1109} 1110 1111static inline pgdval_t pgd_val(pgd_t pgd) 1112{ 1113 pgdval_t ret; 1114 1115 if (sizeof(pgdval_t) > sizeof(long)) 1116 ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val, 1117 pgd.pgd, (u64)pgd.pgd >> 32); 1118 else 1119 ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val, 1120 pgd.pgd); 1121 1122 return ret; 1123} 1124 1125#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION 1126static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, 1127 pte_t *ptep) 1128{ 1129 pteval_t ret; 1130 1131 ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start, 1132 mm, addr, ptep); 1133 1134 return (pte_t) { .pte = ret }; 1135} 1136 1137static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, 1138 pte_t *ptep, pte_t pte) 1139{ 1140 if (sizeof(pteval_t) > sizeof(long)) 1141 /* 5 arg words */ 1142 pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte); 1143 else 1144 PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit, 1145 mm, addr, ptep, pte.pte); 1146} 1147 1148static inline void set_pte(pte_t *ptep, pte_t pte) 1149{ 1150 if (sizeof(pteval_t) > sizeof(long)) 1151 PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, 1152 pte.pte, (u64)pte.pte >> 32); 1153 else 1154 PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, 1155 pte.pte); 1156} 1157 1158static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, 1159 pte_t *ptep, pte_t pte) 1160{ 1161 if (sizeof(pteval_t) > sizeof(long)) 1162 /* 5 arg words */ 1163 pv_mmu_ops.set_pte_at(mm, addr, ptep, pte); 1164 else 1165 PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); 1166} 1167 1168static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) 1169{ 1170 pmdval_t val = native_pmd_val(pmd); 1171 1172 if (sizeof(pmdval_t) > sizeof(long)) 1173 PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32); 1174 else 1175 PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); 1176} 1177 1178#if PAGETABLE_LEVELS >= 3 1179static inline pmd_t __pmd(pmdval_t val) 1180{ 1181 pmdval_t ret; 1182 1183 if (sizeof(pmdval_t) > sizeof(long)) 1184 ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd, 1185 val, (u64)val >> 32); 1186 else 1187 ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd, 1188 val); 1189 1190 return (pmd_t) { ret }; 1191} 1192 1193static inline pmdval_t pmd_val(pmd_t pmd) 1194{ 1195 pmdval_t ret; 1196 1197 if (sizeof(pmdval_t) > sizeof(long)) 1198 ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val, 1199 pmd.pmd, (u64)pmd.pmd >> 32); 1200 else 1201 ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val, 1202 pmd.pmd); 1203 1204 return ret; 1205} 1206 1207static inline void set_pud(pud_t *pudp, pud_t pud) 1208{ 1209 pudval_t val = native_pud_val(pud); 1210 1211 if (sizeof(pudval_t) > sizeof(long)) 1212 PVOP_VCALL3(pv_mmu_ops.set_pud, pudp, 1213 val, (u64)val >> 32); 1214 else 1215 PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, 1216 val); 1217} 1218#if PAGETABLE_LEVELS == 4 1219static inline pud_t __pud(pudval_t val) 1220{ 1221 pudval_t ret; 1222 1223 if (sizeof(pudval_t) > sizeof(long)) 1224 ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud, 1225 val, (u64)val >> 32); 1226 else 1227 ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud, 1228 val); 1229 1230 return (pud_t) { ret }; 1231} 1232 1233static inline pudval_t pud_val(pud_t pud) 1234{ 1235 pudval_t ret; 1236 1237 if (sizeof(pudval_t) > sizeof(long)) 1238 ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val, 1239 pud.pud, (u64)pud.pud >> 32); 1240 else 1241 ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val, 1242 pud.pud); 1243 1244 return ret; 1245} 1246 1247static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) 1248{ 1249 pgdval_t val = native_pgd_val(pgd); 1250 1251 if (sizeof(pgdval_t) > sizeof(long)) 1252 PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp, 1253 val, (u64)val >> 32); 1254 else 1255 PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, 1256 val); 1257} 1258 1259static inline void pgd_clear(pgd_t *pgdp) 1260{ 1261 set_pgd(pgdp, __pgd(0)); 1262} 1263 1264static inline void pud_clear(pud_t *pudp) 1265{ 1266 set_pud(pudp, __pud(0)); 1267} 1268 1269#endif /* PAGETABLE_LEVELS == 4 */ 1270 1271#endif /* PAGETABLE_LEVELS >= 3 */ 1272 1273#ifdef CONFIG_X86_PAE 1274/* Special-case pte-setting operations for PAE, which can't update a 1275 64-bit pte atomically */ 1276static inline void set_pte_atomic(pte_t *ptep, pte_t pte) 1277{ 1278 PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep, 1279 pte.pte, pte.pte >> 32); 1280} 1281 1282static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, 1283 pte_t *ptep, pte_t pte) 1284{ 1285 /* 5 arg words */ 1286 pv_mmu_ops.set_pte_present(mm, addr, ptep, pte); 1287} 1288 1289static inline void pte_clear(struct mm_struct *mm, unsigned long addr, 1290 pte_t *ptep) 1291{ 1292 PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep); 1293} 1294 1295static inline void pmd_clear(pmd_t *pmdp) 1296{ 1297 PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp); 1298} 1299#else /* !CONFIG_X86_PAE */ 1300static inline void set_pte_atomic(pte_t *ptep, pte_t pte) 1301{ 1302 set_pte(ptep, pte); 1303} 1304 1305static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, 1306 pte_t *ptep, pte_t pte) 1307{ 1308 set_pte(ptep, pte); 1309} 1310 1311static inline void pte_clear(struct mm_struct *mm, unsigned long addr, 1312 pte_t *ptep) 1313{ 1314 set_pte_at(mm, addr, ptep, __pte(0)); 1315} 1316 1317static inline void pmd_clear(pmd_t *pmdp) 1318{ 1319 set_pmd(pmdp, __pmd(0)); 1320} 1321#endif /* CONFIG_X86_PAE */ 1322 1323/* Lazy mode for batching updates / context switch */ 1324enum paravirt_lazy_mode { 1325 PARAVIRT_LAZY_NONE, 1326 PARAVIRT_LAZY_MMU, 1327 PARAVIRT_LAZY_CPU, 1328}; 1329 1330enum paravirt_lazy_mode paravirt_get_lazy_mode(void); 1331void paravirt_enter_lazy_cpu(void); 1332void paravirt_leave_lazy_cpu(void); 1333void paravirt_enter_lazy_mmu(void); 1334void paravirt_leave_lazy_mmu(void); 1335void paravirt_leave_lazy(enum paravirt_lazy_mode mode); 1336 1337#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE 1338static inline void arch_enter_lazy_cpu_mode(void) 1339{ 1340 PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); 1341} 1342 1343static inline void arch_leave_lazy_cpu_mode(void) 1344{ 1345 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); 1346} 1347 1348static inline void arch_flush_lazy_cpu_mode(void) 1349{ 1350 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { 1351 arch_leave_lazy_cpu_mode(); 1352 arch_enter_lazy_cpu_mode(); 1353 } 1354} 1355 1356 1357#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE 1358static inline void arch_enter_lazy_mmu_mode(void) 1359{ 1360 PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter); 1361} 1362 1363static inline void arch_leave_lazy_mmu_mode(void) 1364{ 1365 PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); 1366} 1367 1368static inline void arch_flush_lazy_mmu_mode(void) 1369{ 1370 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { 1371 arch_leave_lazy_mmu_mode(); 1372 arch_enter_lazy_mmu_mode(); 1373 } 1374} 1375 1376static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, 1377 unsigned long phys, pgprot_t flags) 1378{ 1379 pv_mmu_ops.set_fixmap(idx, phys, flags); 1380} 1381 1382void _paravirt_nop(void); 1383#define paravirt_nop ((void *)_paravirt_nop) 1384 1385void paravirt_use_bytelocks(void); 1386 1387#ifdef CONFIG_SMP 1388 1389static inline int __raw_spin_is_locked(struct raw_spinlock *lock) 1390{ 1391 return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); 1392} 1393 1394static inline int __raw_spin_is_contended(struct raw_spinlock *lock) 1395{ 1396 return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); 1397} 1398 1399static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) 1400{ 1401 PVOP_VCALL1(pv_lock_ops.spin_lock, lock); 1402} 1403 1404static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock) 1405{ 1406 return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); 1407} 1408 1409static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) 1410{ 1411 PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); 1412} 1413 1414#endif 1415 1416/* These all sit in the .parainstructions section to tell us what to patch. */ 1417struct paravirt_patch_site { 1418 u8 *instr; /* original instructions */ 1419 u8 instrtype; /* type of this instruction */ 1420 u8 len; /* length of original instruction */ 1421 u16 clobbers; /* what registers you may clobber */ 1422}; 1423 1424extern struct paravirt_patch_site __parainstructions[], 1425 __parainstructions_end[]; 1426 1427#ifdef CONFIG_X86_32 1428#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;" 1429#define PV_RESTORE_REGS "popl %%edx; popl %%ecx" 1430#define PV_FLAGS_ARG "0" 1431#define PV_EXTRA_CLOBBERS 1432#define PV_VEXTRA_CLOBBERS 1433#else 1434/* We save some registers, but all of them, that's too much. We clobber all 1435 * caller saved registers but the argument parameter */ 1436#define PV_SAVE_REGS "pushq %%rdi;" 1437#define PV_RESTORE_REGS "popq %%rdi;" 1438#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi" 1439#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi" 1440#define PV_FLAGS_ARG "D" 1441#endif 1442 1443static inline unsigned long __raw_local_save_flags(void) 1444{ 1445 unsigned long f; 1446 1447 asm volatile(paravirt_alt(PV_SAVE_REGS 1448 PARAVIRT_CALL 1449 PV_RESTORE_REGS) 1450 : "=a"(f) 1451 : paravirt_type(pv_irq_ops.save_fl), 1452 paravirt_clobber(CLBR_EAX) 1453 : "memory", "cc" PV_VEXTRA_CLOBBERS); 1454 return f; 1455} 1456 1457static inline void raw_local_irq_restore(unsigned long f) 1458{ 1459 asm volatile(paravirt_alt(PV_SAVE_REGS 1460 PARAVIRT_CALL 1461 PV_RESTORE_REGS) 1462 : "=a"(f) 1463 : PV_FLAGS_ARG(f), 1464 paravirt_type(pv_irq_ops.restore_fl), 1465 paravirt_clobber(CLBR_EAX) 1466 : "memory", "cc" PV_EXTRA_CLOBBERS); 1467} 1468 1469static inline void raw_local_irq_disable(void) 1470{ 1471 asm volatile(paravirt_alt(PV_SAVE_REGS 1472 PARAVIRT_CALL 1473 PV_RESTORE_REGS) 1474 : 1475 : paravirt_type(pv_irq_ops.irq_disable), 1476 paravirt_clobber(CLBR_EAX) 1477 : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); 1478} 1479 1480static inline void raw_local_irq_enable(void) 1481{ 1482 asm volatile(paravirt_alt(PV_SAVE_REGS 1483 PARAVIRT_CALL 1484 PV_RESTORE_REGS) 1485 : 1486 : paravirt_type(pv_irq_ops.irq_enable), 1487 paravirt_clobber(CLBR_EAX) 1488 : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); 1489} 1490 1491static inline unsigned long __raw_local_irq_save(void) 1492{ 1493 unsigned long f; 1494 1495 f = __raw_local_save_flags(); 1496 raw_local_irq_disable(); 1497 return f; 1498} 1499 1500 1501/* Make sure as little as possible of this mess escapes. */ 1502#undef PARAVIRT_CALL 1503#undef __PVOP_CALL 1504#undef __PVOP_VCALL 1505#undef PVOP_VCALL0 1506#undef PVOP_CALL0 1507#undef PVOP_VCALL1 1508#undef PVOP_CALL1 1509#undef PVOP_VCALL2 1510#undef PVOP_CALL2 1511#undef PVOP_VCALL3 1512#undef PVOP_CALL3 1513#undef PVOP_VCALL4 1514#undef PVOP_CALL4 1515 1516#else /* __ASSEMBLY__ */ 1517 1518#define _PVSITE(ptype, clobbers, ops, word, algn) \ 1519771:; \ 1520 ops; \ 1521772:; \ 1522 .pushsection .parainstructions,"a"; \ 1523 .align algn; \ 1524 word 771b; \ 1525 .byte ptype; \ 1526 .byte 772b-771b; \ 1527 .short clobbers; \ 1528 .popsection 1529 1530 1531#ifdef CONFIG_X86_64 1532#define PV_SAVE_REGS \ 1533 push %rax; \ 1534 push %rcx; \ 1535 push %rdx; \ 1536 push %rsi; \ 1537 push %rdi; \ 1538 push %r8; \ 1539 push %r9; \ 1540 push %r10; \ 1541 push %r11 1542#define PV_RESTORE_REGS \ 1543 pop %r11; \ 1544 pop %r10; \ 1545 pop %r9; \ 1546 pop %r8; \ 1547 pop %rdi; \ 1548 pop %rsi; \ 1549 pop %rdx; \ 1550 pop %rcx; \ 1551 pop %rax 1552#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) 1553#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) 1554#define PARA_INDIRECT(addr) *addr(%rip) 1555#else 1556#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx 1557#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax 1558#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) 1559#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) 1560#define PARA_INDIRECT(addr) *%cs:addr 1561#endif 1562 1563#define INTERRUPT_RETURN \ 1564 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ 1565 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) 1566 1567#define DISABLE_INTERRUPTS(clobbers) \ 1568 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ 1569 PV_SAVE_REGS; \ 1570 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ 1571 PV_RESTORE_REGS;) \ 1572 1573#define ENABLE_INTERRUPTS(clobbers) \ 1574 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ 1575 PV_SAVE_REGS; \ 1576 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ 1577 PV_RESTORE_REGS;) 1578 1579#define USERGS_SYSRET32 \ 1580 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ 1581 CLBR_NONE, \ 1582 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32)) 1583 1584#ifdef CONFIG_X86_32 1585#define GET_CR0_INTO_EAX \ 1586 push %ecx; push %edx; \ 1587 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ 1588 pop %edx; pop %ecx 1589 1590#define ENABLE_INTERRUPTS_SYSEXIT \ 1591 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ 1592 CLBR_NONE, \ 1593 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) 1594 1595 1596#else /* !CONFIG_X86_32 */ 1597 1598/* 1599 * If swapgs is used while the userspace stack is still current, 1600 * there's no way to call a pvop. The PV replacement *must* be 1601 * inlined, or the swapgs instruction must be trapped and emulated. 1602 */ 1603#define SWAPGS_UNSAFE_STACK \ 1604 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ 1605 swapgs) 1606 1607#define SWAPGS \ 1608 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ 1609 PV_SAVE_REGS; \ 1610 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ 1611 PV_RESTORE_REGS \ 1612 ) 1613 1614#define GET_CR2_INTO_RCX \ 1615 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \ 1616 movq %rax, %rcx; \ 1617 xorq %rax, %rax; 1618 1619#define PARAVIRT_ADJUST_EXCEPTION_FRAME \ 1620 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \ 1621 CLBR_NONE, \ 1622 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame)) 1623 1624#define USERGS_SYSRET64 \ 1625 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ 1626 CLBR_NONE, \ 1627 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) 1628 1629#define ENABLE_INTERRUPTS_SYSEXIT32 \ 1630 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ 1631 CLBR_NONE, \ 1632 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) 1633#endif /* CONFIG_X86_32 */ 1634 1635#endif /* __ASSEMBLY__ */ 1636#endif /* CONFIG_PARAVIRT */ 1637#endif /* __ASM_PARAVIRT_H */