at v2.6.27 422 lines 12 kB view raw
1#ifndef _ASM_X86_SYSTEM_H_ 2#define _ASM_X86_SYSTEM_H_ 3 4#include <asm/asm.h> 5#include <asm/segment.h> 6#include <asm/cpufeature.h> 7#include <asm/cmpxchg.h> 8#include <asm/nops.h> 9 10#include <linux/kernel.h> 11#include <linux/irqflags.h> 12 13/* entries in ARCH_DLINFO: */ 14#ifdef CONFIG_IA32_EMULATION 15# define AT_VECTOR_SIZE_ARCH 2 16#else 17# define AT_VECTOR_SIZE_ARCH 1 18#endif 19 20#ifdef CONFIG_X86_32 21 22struct task_struct; /* one of the stranger aspects of C forward declarations */ 23struct task_struct *__switch_to(struct task_struct *prev, 24 struct task_struct *next); 25 26/* 27 * Saving eflags is important. It switches not only IOPL between tasks, 28 * it also protects other tasks from NT leaking through sysenter etc. 29 */ 30#define switch_to(prev, next, last) \ 31do { \ 32 /* \ 33 * Context-switching clobbers all registers, so we clobber \ 34 * them explicitly, via unused output variables. \ 35 * (EAX and EBP is not listed because EBP is saved/restored \ 36 * explicitly for wchan access and EAX is the return value of \ 37 * __switch_to()) \ 38 */ \ 39 unsigned long ebx, ecx, edx, esi, edi; \ 40 \ 41 asm volatile("pushfl\n\t" /* save flags */ \ 42 "pushl %%ebp\n\t" /* save EBP */ \ 43 "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ 44 "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ 45 "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ 46 "pushl %[next_ip]\n\t" /* restore EIP */ \ 47 "jmp __switch_to\n" /* regparm call */ \ 48 "1:\t" \ 49 "popl %%ebp\n\t" /* restore EBP */ \ 50 "popfl\n" /* restore flags */ \ 51 \ 52 /* output parameters */ \ 53 : [prev_sp] "=m" (prev->thread.sp), \ 54 [prev_ip] "=m" (prev->thread.ip), \ 55 "=a" (last), \ 56 \ 57 /* clobbered output registers: */ \ 58 "=b" (ebx), "=c" (ecx), "=d" (edx), \ 59 "=S" (esi), "=D" (edi) \ 60 \ 61 /* input parameters: */ \ 62 : [next_sp] "m" (next->thread.sp), \ 63 [next_ip] "m" (next->thread.ip), \ 64 \ 65 /* regparm parameters for __switch_to(): */ \ 66 [prev] "a" (prev), \ 67 [next] "d" (next)); \ 68} while (0) 69 70/* 71 * disable hlt during certain critical i/o operations 72 */ 73#define HAVE_DISABLE_HLT 74#else 75#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" 76#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" 77 78/* frame pointer must be last for get_wchan */ 79#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" 80#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" 81 82#define __EXTRA_CLOBBER \ 83 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ 84 "r12", "r13", "r14", "r15" 85 86/* Save restore flags to clear handle leaking NT */ 87#define switch_to(prev, next, last) \ 88 asm volatile(SAVE_CONTEXT \ 89 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ 90 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ 91 "call __switch_to\n\t" \ 92 ".globl thread_return\n" \ 93 "thread_return:\n\t" \ 94 "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ 95 "movq %P[thread_info](%%rsi),%%r8\n\t" \ 96 LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ 97 "movq %%rax,%%rdi\n\t" \ 98 "jc ret_from_fork\n\t" \ 99 RESTORE_CONTEXT \ 100 : "=a" (last) \ 101 : [next] "S" (next), [prev] "D" (prev), \ 102 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ 103 [ti_flags] "i" (offsetof(struct thread_info, flags)), \ 104 [tif_fork] "i" (TIF_FORK), \ 105 [thread_info] "i" (offsetof(struct task_struct, stack)), \ 106 [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ 107 : "memory", "cc" __EXTRA_CLOBBER) 108#endif 109 110#ifdef __KERNEL__ 111#define _set_base(addr, base) do { unsigned long __pr; \ 112__asm__ __volatile__ ("movw %%dx,%1\n\t" \ 113 "rorl $16,%%edx\n\t" \ 114 "movb %%dl,%2\n\t" \ 115 "movb %%dh,%3" \ 116 :"=&d" (__pr) \ 117 :"m" (*((addr)+2)), \ 118 "m" (*((addr)+4)), \ 119 "m" (*((addr)+7)), \ 120 "0" (base) \ 121 ); } while (0) 122 123#define _set_limit(addr, limit) do { unsigned long __lr; \ 124__asm__ __volatile__ ("movw %%dx,%1\n\t" \ 125 "rorl $16,%%edx\n\t" \ 126 "movb %2,%%dh\n\t" \ 127 "andb $0xf0,%%dh\n\t" \ 128 "orb %%dh,%%dl\n\t" \ 129 "movb %%dl,%2" \ 130 :"=&d" (__lr) \ 131 :"m" (*(addr)), \ 132 "m" (*((addr)+6)), \ 133 "0" (limit) \ 134 ); } while (0) 135 136#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) 137#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) 138 139extern void native_load_gs_index(unsigned); 140 141/* 142 * Load a segment. Fall back on loading the zero 143 * segment if something goes wrong.. 144 */ 145#define loadsegment(seg, value) \ 146 asm volatile("\n" \ 147 "1:\t" \ 148 "movl %k0,%%" #seg "\n" \ 149 "2:\n" \ 150 ".section .fixup,\"ax\"\n" \ 151 "3:\t" \ 152 "movl %k1, %%" #seg "\n\t" \ 153 "jmp 2b\n" \ 154 ".previous\n" \ 155 _ASM_EXTABLE(1b,3b) \ 156 : :"r" (value), "r" (0) : "memory") 157 158 159/* 160 * Save a segment register away 161 */ 162#define savesegment(seg, value) \ 163 asm("mov %%" #seg ",%0":"=r" (value) : : "memory") 164 165static inline unsigned long get_limit(unsigned long segment) 166{ 167 unsigned long __limit; 168 asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); 169 return __limit + 1; 170} 171 172static inline void native_clts(void) 173{ 174 asm volatile("clts"); 175} 176 177/* 178 * Volatile isn't enough to prevent the compiler from reordering the 179 * read/write functions for the control registers and messing everything up. 180 * A memory clobber would solve the problem, but would prevent reordering of 181 * all loads stores around it, which can hurt performance. Solution is to 182 * use a variable and mimic reads and writes to it to enforce serialization 183 */ 184static unsigned long __force_order; 185 186static inline unsigned long native_read_cr0(void) 187{ 188 unsigned long val; 189 asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); 190 return val; 191} 192 193static inline void native_write_cr0(unsigned long val) 194{ 195 asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); 196} 197 198static inline unsigned long native_read_cr2(void) 199{ 200 unsigned long val; 201 asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); 202 return val; 203} 204 205static inline void native_write_cr2(unsigned long val) 206{ 207 asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); 208} 209 210static inline unsigned long native_read_cr3(void) 211{ 212 unsigned long val; 213 asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); 214 return val; 215} 216 217static inline void native_write_cr3(unsigned long val) 218{ 219 asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); 220} 221 222static inline unsigned long native_read_cr4(void) 223{ 224 unsigned long val; 225 asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); 226 return val; 227} 228 229static inline unsigned long native_read_cr4_safe(void) 230{ 231 unsigned long val; 232 /* This could fault if %cr4 does not exist. In x86_64, a cr4 always 233 * exists, so it will never fail. */ 234#ifdef CONFIG_X86_32 235 asm volatile("1: mov %%cr4, %0\n" 236 "2:\n" 237 _ASM_EXTABLE(1b, 2b) 238 : "=r" (val), "=m" (__force_order) : "0" (0)); 239#else 240 val = native_read_cr4(); 241#endif 242 return val; 243} 244 245static inline void native_write_cr4(unsigned long val) 246{ 247 asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); 248} 249 250#ifdef CONFIG_X86_64 251static inline unsigned long native_read_cr8(void) 252{ 253 unsigned long cr8; 254 asm volatile("movq %%cr8,%0" : "=r" (cr8)); 255 return cr8; 256} 257 258static inline void native_write_cr8(unsigned long val) 259{ 260 asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); 261} 262#endif 263 264static inline void native_wbinvd(void) 265{ 266 asm volatile("wbinvd": : :"memory"); 267} 268 269#ifdef CONFIG_PARAVIRT 270#include <asm/paravirt.h> 271#else 272#define read_cr0() (native_read_cr0()) 273#define write_cr0(x) (native_write_cr0(x)) 274#define read_cr2() (native_read_cr2()) 275#define write_cr2(x) (native_write_cr2(x)) 276#define read_cr3() (native_read_cr3()) 277#define write_cr3(x) (native_write_cr3(x)) 278#define read_cr4() (native_read_cr4()) 279#define read_cr4_safe() (native_read_cr4_safe()) 280#define write_cr4(x) (native_write_cr4(x)) 281#define wbinvd() (native_wbinvd()) 282#ifdef CONFIG_X86_64 283#define read_cr8() (native_read_cr8()) 284#define write_cr8(x) (native_write_cr8(x)) 285#define load_gs_index native_load_gs_index 286#endif 287 288/* Clear the 'TS' bit */ 289#define clts() (native_clts()) 290 291#endif/* CONFIG_PARAVIRT */ 292 293#define stts() write_cr0(read_cr0() | X86_CR0_TS) 294 295#endif /* __KERNEL__ */ 296 297static inline void clflush(volatile void *__p) 298{ 299 asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); 300} 301 302#define nop() asm volatile ("nop") 303 304void disable_hlt(void); 305void enable_hlt(void); 306 307void cpu_idle_wait(void); 308 309extern unsigned long arch_align_stack(unsigned long sp); 310extern void free_init_pages(char *what, unsigned long begin, unsigned long end); 311 312void default_idle(void); 313 314/* 315 * Force strict CPU ordering. 316 * And yes, this is required on UP too when we're talking 317 * to devices. 318 */ 319#ifdef CONFIG_X86_32 320/* 321 * Some non-Intel clones support out of order store. wmb() ceases to be a 322 * nop for these. 323 */ 324#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) 325#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) 326#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) 327#else 328#define mb() asm volatile("mfence":::"memory") 329#define rmb() asm volatile("lfence":::"memory") 330#define wmb() asm volatile("sfence" ::: "memory") 331#endif 332 333/** 334 * read_barrier_depends - Flush all pending reads that subsequents reads 335 * depend on. 336 * 337 * No data-dependent reads from memory-like regions are ever reordered 338 * over this barrier. All reads preceding this primitive are guaranteed 339 * to access memory (but not necessarily other CPUs' caches) before any 340 * reads following this primitive that depend on the data return by 341 * any of the preceding reads. This primitive is much lighter weight than 342 * rmb() on most CPUs, and is never heavier weight than is 343 * rmb(). 344 * 345 * These ordering constraints are respected by both the local CPU 346 * and the compiler. 347 * 348 * Ordering is not guaranteed by anything other than these primitives, 349 * not even by data dependencies. See the documentation for 350 * memory_barrier() for examples and URLs to more information. 351 * 352 * For example, the following code would force ordering (the initial 353 * value of "a" is zero, "b" is one, and "p" is "&a"): 354 * 355 * <programlisting> 356 * CPU 0 CPU 1 357 * 358 * b = 2; 359 * memory_barrier(); 360 * p = &b; q = p; 361 * read_barrier_depends(); 362 * d = *q; 363 * </programlisting> 364 * 365 * because the read of "*q" depends on the read of "p" and these 366 * two reads are separated by a read_barrier_depends(). However, 367 * the following code, with the same initial values for "a" and "b": 368 * 369 * <programlisting> 370 * CPU 0 CPU 1 371 * 372 * a = 2; 373 * memory_barrier(); 374 * b = 3; y = b; 375 * read_barrier_depends(); 376 * x = a; 377 * </programlisting> 378 * 379 * does not enforce ordering, since there is no data dependency between 380 * the read of "a" and the read of "b". Therefore, on some CPUs, such 381 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() 382 * in cases like this where there are no data dependencies. 383 **/ 384 385#define read_barrier_depends() do { } while (0) 386 387#ifdef CONFIG_SMP 388#define smp_mb() mb() 389#ifdef CONFIG_X86_PPRO_FENCE 390# define smp_rmb() rmb() 391#else 392# define smp_rmb() barrier() 393#endif 394#ifdef CONFIG_X86_OOSTORE 395# define smp_wmb() wmb() 396#else 397# define smp_wmb() barrier() 398#endif 399#define smp_read_barrier_depends() read_barrier_depends() 400#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 401#else 402#define smp_mb() barrier() 403#define smp_rmb() barrier() 404#define smp_wmb() barrier() 405#define smp_read_barrier_depends() do { } while (0) 406#define set_mb(var, value) do { var = value; barrier(); } while (0) 407#endif 408 409/* 410 * Stop RDTSC speculation. This is needed when you need to use RDTSC 411 * (or get_cycles or vread that possibly accesses the TSC) in a defined 412 * code region. 413 * 414 * (Could use an alternative three way for this if there was one.) 415 */ 416static inline void rdtsc_barrier(void) 417{ 418 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); 419 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); 420} 421 422#endif