Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v2.6.31 485 lines 14 kB view raw
1#ifndef _ASM_X86_SYSTEM_H 2#define _ASM_X86_SYSTEM_H 3 4#include <asm/asm.h> 5#include <asm/segment.h> 6#include <asm/cpufeature.h> 7#include <asm/cmpxchg.h> 8#include <asm/nops.h> 9 10#include <linux/kernel.h> 11#include <linux/irqflags.h> 12 13/* entries in ARCH_DLINFO: */ 14#ifdef CONFIG_IA32_EMULATION 15# define AT_VECTOR_SIZE_ARCH 2 16#else 17# define AT_VECTOR_SIZE_ARCH 1 18#endif 19 20struct task_struct; /* one of the stranger aspects of C forward declarations */ 21struct task_struct *__switch_to(struct task_struct *prev, 22 struct task_struct *next); 23struct tss_struct; 24void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 25 struct tss_struct *tss); 26 27#ifdef CONFIG_X86_32 28 29#ifdef CONFIG_CC_STACKPROTECTOR 30#define __switch_canary \ 31 "movl %P[task_canary](%[next]), %%ebx\n\t" \ 32 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" 33#define __switch_canary_oparam \ 34 , [stack_canary] "=m" (per_cpu_var(stack_canary)) 35#define __switch_canary_iparam \ 36 , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) 37#else /* CC_STACKPROTECTOR */ 38#define __switch_canary 39#define __switch_canary_oparam 40#define __switch_canary_iparam 41#endif /* CC_STACKPROTECTOR */ 42 43/* 44 * Saving eflags is important. It switches not only IOPL between tasks, 45 * it also protects other tasks from NT leaking through sysenter etc. 46 */ 47#define switch_to(prev, next, last) \ 48do { \ 49 /* \ 50 * Context-switching clobbers all registers, so we clobber \ 51 * them explicitly, via unused output variables. \ 52 * (EAX and EBP is not listed because EBP is saved/restored \ 53 * explicitly for wchan access and EAX is the return value of \ 54 * __switch_to()) \ 55 */ \ 56 unsigned long ebx, ecx, edx, esi, edi; \ 57 \ 58 asm volatile("pushfl\n\t" /* save flags */ \ 59 "pushl %%ebp\n\t" /* save EBP */ \ 60 "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ 61 "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ 62 "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ 63 "pushl %[next_ip]\n\t" /* restore EIP */ \ 64 __switch_canary \ 65 "jmp __switch_to\n" /* regparm call */ \ 66 "1:\t" \ 67 "popl %%ebp\n\t" /* restore EBP */ \ 68 "popfl\n" /* restore flags */ \ 69 \ 70 /* output parameters */ \ 71 : [prev_sp] "=m" (prev->thread.sp), \ 72 [prev_ip] "=m" (prev->thread.ip), \ 73 "=a" (last), \ 74 \ 75 /* clobbered output registers: */ \ 76 "=b" (ebx), "=c" (ecx), "=d" (edx), \ 77 "=S" (esi), "=D" (edi) \ 78 \ 79 __switch_canary_oparam \ 80 \ 81 /* input parameters: */ \ 82 : [next_sp] "m" (next->thread.sp), \ 83 [next_ip] "m" (next->thread.ip), \ 84 \ 85 /* regparm parameters for __switch_to(): */ \ 86 [prev] "a" (prev), \ 87 [next] "d" (next) \ 88 \ 89 __switch_canary_iparam \ 90 \ 91 : /* reloaded segment registers */ \ 92 "memory"); \ 93} while (0) 94 95/* 96 * disable hlt during certain critical i/o operations 97 */ 98#define HAVE_DISABLE_HLT 99#else 100#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" 101#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" 102 103/* frame pointer must be last for get_wchan */ 104#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" 105#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" 106 107#define __EXTRA_CLOBBER \ 108 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ 109 "r12", "r13", "r14", "r15" 110 111#ifdef CONFIG_CC_STACKPROTECTOR 112#define __switch_canary \ 113 "movq %P[task_canary](%%rsi),%%r8\n\t" \ 114 "movq %%r8,"__percpu_arg([gs_canary])"\n\t" 115#define __switch_canary_oparam \ 116 , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary)) 117#define __switch_canary_iparam \ 118 , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) 119#else /* CC_STACKPROTECTOR */ 120#define __switch_canary 121#define __switch_canary_oparam 122#define __switch_canary_iparam 123#endif /* CC_STACKPROTECTOR */ 124 125/* Save restore flags to clear handle leaking NT */ 126#define switch_to(prev, next, last) \ 127 asm volatile(SAVE_CONTEXT \ 128 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ 129 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ 130 "call __switch_to\n\t" \ 131 ".globl thread_return\n" \ 132 "thread_return:\n\t" \ 133 "movq "__percpu_arg([current_task])",%%rsi\n\t" \ 134 __switch_canary \ 135 "movq %P[thread_info](%%rsi),%%r8\n\t" \ 136 "movq %%rax,%%rdi\n\t" \ 137 "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ 138 "jnz ret_from_fork\n\t" \ 139 RESTORE_CONTEXT \ 140 : "=a" (last) \ 141 __switch_canary_oparam \ 142 : [next] "S" (next), [prev] "D" (prev), \ 143 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ 144 [ti_flags] "i" (offsetof(struct thread_info, flags)), \ 145 [_tif_fork] "i" (_TIF_FORK), \ 146 [thread_info] "i" (offsetof(struct task_struct, stack)), \ 147 [current_task] "m" (per_cpu_var(current_task)) \ 148 __switch_canary_iparam \ 149 : "memory", "cc" __EXTRA_CLOBBER) 150#endif 151 152#ifdef __KERNEL__ 153#define _set_base(addr, base) do { unsigned long __pr; \ 154__asm__ __volatile__ ("movw %%dx,%1\n\t" \ 155 "rorl $16,%%edx\n\t" \ 156 "movb %%dl,%2\n\t" \ 157 "movb %%dh,%3" \ 158 :"=&d" (__pr) \ 159 :"m" (*((addr)+2)), \ 160 "m" (*((addr)+4)), \ 161 "m" (*((addr)+7)), \ 162 "0" (base) \ 163 ); } while (0) 164 165#define _set_limit(addr, limit) do { unsigned long __lr; \ 166__asm__ __volatile__ ("movw %%dx,%1\n\t" \ 167 "rorl $16,%%edx\n\t" \ 168 "movb %2,%%dh\n\t" \ 169 "andb $0xf0,%%dh\n\t" \ 170 "orb %%dh,%%dl\n\t" \ 171 "movb %%dl,%2" \ 172 :"=&d" (__lr) \ 173 :"m" (*(addr)), \ 174 "m" (*((addr)+6)), \ 175 "0" (limit) \ 176 ); } while (0) 177 178#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) 179#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) 180 181extern void native_load_gs_index(unsigned); 182 183/* 184 * Load a segment. Fall back on loading the zero 185 * segment if something goes wrong.. 186 */ 187#define loadsegment(seg, value) \ 188 asm volatile("\n" \ 189 "1:\t" \ 190 "movl %k0,%%" #seg "\n" \ 191 "2:\n" \ 192 ".section .fixup,\"ax\"\n" \ 193 "3:\t" \ 194 "movl %k1, %%" #seg "\n\t" \ 195 "jmp 2b\n" \ 196 ".previous\n" \ 197 _ASM_EXTABLE(1b,3b) \ 198 : :"r" (value), "r" (0) : "memory") 199 200 201/* 202 * Save a segment register away 203 */ 204#define savesegment(seg, value) \ 205 asm("mov %%" #seg ",%0":"=r" (value) : : "memory") 206 207/* 208 * x86_32 user gs accessors. 209 */ 210#ifdef CONFIG_X86_32 211#ifdef CONFIG_X86_32_LAZY_GS 212#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) 213#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) 214#define task_user_gs(tsk) ((tsk)->thread.gs) 215#define lazy_save_gs(v) savesegment(gs, (v)) 216#define lazy_load_gs(v) loadsegment(gs, (v)) 217#else /* X86_32_LAZY_GS */ 218#define get_user_gs(regs) (u16)((regs)->gs) 219#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) 220#define task_user_gs(tsk) (task_pt_regs(tsk)->gs) 221#define lazy_save_gs(v) do { } while (0) 222#define lazy_load_gs(v) do { } while (0) 223#endif /* X86_32_LAZY_GS */ 224#endif /* X86_32 */ 225 226static inline unsigned long get_limit(unsigned long segment) 227{ 228 unsigned long __limit; 229 asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); 230 return __limit + 1; 231} 232 233static inline void native_clts(void) 234{ 235 asm volatile("clts"); 236} 237 238/* 239 * Volatile isn't enough to prevent the compiler from reordering the 240 * read/write functions for the control registers and messing everything up. 241 * A memory clobber would solve the problem, but would prevent reordering of 242 * all loads stores around it, which can hurt performance. Solution is to 243 * use a variable and mimic reads and writes to it to enforce serialization 244 */ 245static unsigned long __force_order; 246 247static inline unsigned long native_read_cr0(void) 248{ 249 unsigned long val; 250 asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); 251 return val; 252} 253 254static inline void native_write_cr0(unsigned long val) 255{ 256 asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); 257} 258 259static inline unsigned long native_read_cr2(void) 260{ 261 unsigned long val; 262 asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); 263 return val; 264} 265 266static inline void native_write_cr2(unsigned long val) 267{ 268 asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); 269} 270 271static inline unsigned long native_read_cr3(void) 272{ 273 unsigned long val; 274 asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); 275 return val; 276} 277 278static inline void native_write_cr3(unsigned long val) 279{ 280 asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); 281} 282 283static inline unsigned long native_read_cr4(void) 284{ 285 unsigned long val; 286 asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); 287 return val; 288} 289 290static inline unsigned long native_read_cr4_safe(void) 291{ 292 unsigned long val; 293 /* This could fault if %cr4 does not exist. In x86_64, a cr4 always 294 * exists, so it will never fail. */ 295#ifdef CONFIG_X86_32 296 asm volatile("1: mov %%cr4, %0\n" 297 "2:\n" 298 _ASM_EXTABLE(1b, 2b) 299 : "=r" (val), "=m" (__force_order) : "0" (0)); 300#else 301 val = native_read_cr4(); 302#endif 303 return val; 304} 305 306static inline void native_write_cr4(unsigned long val) 307{ 308 asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); 309} 310 311#ifdef CONFIG_X86_64 312static inline unsigned long native_read_cr8(void) 313{ 314 unsigned long cr8; 315 asm volatile("movq %%cr8,%0" : "=r" (cr8)); 316 return cr8; 317} 318 319static inline void native_write_cr8(unsigned long val) 320{ 321 asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); 322} 323#endif 324 325static inline void native_wbinvd(void) 326{ 327 asm volatile("wbinvd": : :"memory"); 328} 329 330#ifdef CONFIG_PARAVIRT 331#include <asm/paravirt.h> 332#else 333#define read_cr0() (native_read_cr0()) 334#define write_cr0(x) (native_write_cr0(x)) 335#define read_cr2() (native_read_cr2()) 336#define write_cr2(x) (native_write_cr2(x)) 337#define read_cr3() (native_read_cr3()) 338#define write_cr3(x) (native_write_cr3(x)) 339#define read_cr4() (native_read_cr4()) 340#define read_cr4_safe() (native_read_cr4_safe()) 341#define write_cr4(x) (native_write_cr4(x)) 342#define wbinvd() (native_wbinvd()) 343#ifdef CONFIG_X86_64 344#define read_cr8() (native_read_cr8()) 345#define write_cr8(x) (native_write_cr8(x)) 346#define load_gs_index native_load_gs_index 347#endif 348 349/* Clear the 'TS' bit */ 350#define clts() (native_clts()) 351 352#endif/* CONFIG_PARAVIRT */ 353 354#define stts() write_cr0(read_cr0() | X86_CR0_TS) 355 356#endif /* __KERNEL__ */ 357 358static inline void clflush(volatile void *__p) 359{ 360 asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); 361} 362 363#define nop() asm volatile ("nop") 364 365void disable_hlt(void); 366void enable_hlt(void); 367 368void cpu_idle_wait(void); 369 370extern unsigned long arch_align_stack(unsigned long sp); 371extern void free_init_pages(char *what, unsigned long begin, unsigned long end); 372 373void default_idle(void); 374 375void stop_this_cpu(void *dummy); 376 377/* 378 * Force strict CPU ordering. 379 * And yes, this is required on UP too when we're talking 380 * to devices. 381 */ 382#ifdef CONFIG_X86_32 383/* 384 * Some non-Intel clones support out of order store. wmb() ceases to be a 385 * nop for these. 386 */ 387#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) 388#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) 389#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) 390#else 391#define mb() asm volatile("mfence":::"memory") 392#define rmb() asm volatile("lfence":::"memory") 393#define wmb() asm volatile("sfence" ::: "memory") 394#endif 395 396/** 397 * read_barrier_depends - Flush all pending reads that subsequents reads 398 * depend on. 399 * 400 * No data-dependent reads from memory-like regions are ever reordered 401 * over this barrier. All reads preceding this primitive are guaranteed 402 * to access memory (but not necessarily other CPUs' caches) before any 403 * reads following this primitive that depend on the data return by 404 * any of the preceding reads. This primitive is much lighter weight than 405 * rmb() on most CPUs, and is never heavier weight than is 406 * rmb(). 407 * 408 * These ordering constraints are respected by both the local CPU 409 * and the compiler. 410 * 411 * Ordering is not guaranteed by anything other than these primitives, 412 * not even by data dependencies. See the documentation for 413 * memory_barrier() for examples and URLs to more information. 414 * 415 * For example, the following code would force ordering (the initial 416 * value of "a" is zero, "b" is one, and "p" is "&a"): 417 * 418 * <programlisting> 419 * CPU 0 CPU 1 420 * 421 * b = 2; 422 * memory_barrier(); 423 * p = &b; q = p; 424 * read_barrier_depends(); 425 * d = *q; 426 * </programlisting> 427 * 428 * because the read of "*q" depends on the read of "p" and these 429 * two reads are separated by a read_barrier_depends(). However, 430 * the following code, with the same initial values for "a" and "b": 431 * 432 * <programlisting> 433 * CPU 0 CPU 1 434 * 435 * a = 2; 436 * memory_barrier(); 437 * b = 3; y = b; 438 * read_barrier_depends(); 439 * x = a; 440 * </programlisting> 441 * 442 * does not enforce ordering, since there is no data dependency between 443 * the read of "a" and the read of "b". Therefore, on some CPUs, such 444 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() 445 * in cases like this where there are no data dependencies. 446 **/ 447 448#define read_barrier_depends() do { } while (0) 449 450#ifdef CONFIG_SMP 451#define smp_mb() mb() 452#ifdef CONFIG_X86_PPRO_FENCE 453# define smp_rmb() rmb() 454#else 455# define smp_rmb() barrier() 456#endif 457#ifdef CONFIG_X86_OOSTORE 458# define smp_wmb() wmb() 459#else 460# define smp_wmb() barrier() 461#endif 462#define smp_read_barrier_depends() read_barrier_depends() 463#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 464#else 465#define smp_mb() barrier() 466#define smp_rmb() barrier() 467#define smp_wmb() barrier() 468#define smp_read_barrier_depends() do { } while (0) 469#define set_mb(var, value) do { var = value; barrier(); } while (0) 470#endif 471 472/* 473 * Stop RDTSC speculation. This is needed when you need to use RDTSC 474 * (or get_cycles or vread that possibly accesses the TSC) in a defined 475 * code region. 476 * 477 * (Could use an alternative three way for this if there was one.) 478 */ 479static inline void rdtsc_barrier(void) 480{ 481 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); 482 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); 483} 484 485#endif /* _ASM_X86_SYSTEM_H */