Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] i386: inline asm cleanup

i386 Inline asm cleanup. Use cr/dr accessor functions.

Also, a potential bugfix. Also, some CR accessors really should be volatile.
Reads from CR0 (numeric state may change in an exception handler), writes to
CR4 (flipping CR4.TSD) and reads from CR2 (page fault) prevent instruction
re-ordering. I did not add memory clobber to CR3 / CR4 / CR0 updates, as it
was not there to begin with, and in no case should kernel memory be clobbered,
except when doing a TLB flush, which already has memory clobber.

I noticed that page invalidation does not have a memory clobber. I can't find
a bug as a result, but there is definitely a potential for a bug here:

#define __flush_tlb_single(addr) \
__asm__ __volatile__("invlpg %0": :"m" (*(char *) addr))

Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Zachary Amsden and committed by
Linus Torvalds
4bb0d3ec 2a0694d1

+84 -83
+6 -6
arch/i386/kernel/cpu/common.c
··· 642 642 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); 643 643 644 644 /* Clear all 6 debug registers: */ 645 - 646 - #define CD(register) set_debugreg(0, register) 647 - 648 - CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); 649 - 650 - #undef CD 645 + set_debugreg(0, 0); 646 + set_debugreg(0, 1); 647 + set_debugreg(0, 2); 648 + set_debugreg(0, 3); 649 + set_debugreg(0, 6); 650 + set_debugreg(0, 7); 651 651 652 652 /* 653 653 * Force FPU initialization:
+3 -9
arch/i386/kernel/cpu/cpufreq/longhaul.c
··· 64 64 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) 65 65 66 66 67 - #define __hlt() __asm__ __volatile__("hlt": : :"memory") 68 - 69 67 /* Clock ratios multiplied by 10 */ 70 68 static int clock_ratio[32]; 71 69 static int eblcr_table[32]; ··· 166 168 outb(0xFE,0x21); /* TMR0 only */ 167 169 outb(0xFF,0x80); /* delay */ 168 170 169 - local_irq_enable(); 170 - 171 - __hlt(); 171 + safe_halt(); 172 172 wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); 173 - __hlt(); 173 + halt(); 174 174 175 175 local_irq_disable(); 176 176 ··· 247 251 bcr2.bits.CLOCKMUL = clock_ratio_index; 248 252 local_irq_disable(); 249 253 wrmsrl (MSR_VIA_BCR2, bcr2.val); 250 - local_irq_enable(); 251 - 252 - __hlt(); 254 + safe_halt(); 253 255 254 256 /* Disable software clock multiplier */ 255 257 rdmsrl (MSR_VIA_BCR2, bcr2.val);
+1 -5
arch/i386/kernel/cpu/cyrix.c
··· 132 132 setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); 133 133 /* set 'Not Write-through' */ 134 134 cr0 = 0x20000000; 135 - __asm__("movl %%cr0,%%eax\n\t" 136 - "orl %0,%%eax\n\t" 137 - "movl %%eax,%%cr0\n" 138 - : : "r" (cr0) 139 - :"ax"); 135 + write_cr0(read_cr0() | cr0); 140 136 /* CCR2 bit 2: lock NW bit and set WT1 */ 141 137 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); 142 138 }
+2 -2
arch/i386/kernel/efi.c
··· 79 79 * directory. If I have PSE, I just need to duplicate one entry in 80 80 * page directory. 81 81 */ 82 - __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); 82 + cr4 = read_cr4(); 83 83 84 84 if (cr4 & X86_CR4_PSE) { 85 85 efi_bak_pg_dir_pointer[0].pgd = ··· 115 115 cpu_gdt_descr[0].address = 116 116 (unsigned long) __va(cpu_gdt_descr[0].address); 117 117 __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr)); 118 - __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); 118 + cr4 = read_cr4(); 119 119 120 120 if (cr4 & X86_CR4_PSE) { 121 121 swapper_pg_dir[pgd_index(0)].pgd =
+1 -7
arch/i386/kernel/machine_kexec.c
··· 17 17 #include <asm/apic.h> 18 18 #include <asm/cpufeature.h> 19 19 #include <asm/desc.h> 20 - 21 - static inline unsigned long read_cr3(void) 22 - { 23 - unsigned long cr3; 24 - asm volatile("movl %%cr3,%0": "=r"(cr3)); 25 - return cr3; 26 - } 20 + #include <asm/system.h> 27 21 28 22 #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) 29 23
+6 -10
arch/i386/kernel/process.c
··· 313 313 printk(" DS: %04x ES: %04x\n", 314 314 0xffff & regs->xds,0xffff & regs->xes); 315 315 316 - __asm__("movl %%cr0, %0": "=r" (cr0)); 317 - __asm__("movl %%cr2, %0": "=r" (cr2)); 318 - __asm__("movl %%cr3, %0": "=r" (cr3)); 319 - /* This could fault if %cr4 does not exist */ 320 - __asm__("1: movl %%cr4, %0 \n" 321 - "2: \n" 322 - ".section __ex_table,\"a\" \n" 323 - ".long 1b,2b \n" 324 - ".previous \n" 325 - : "=r" (cr4): "0" (0)); 316 + cr0 = read_cr0(); 317 + cr2 = read_cr2(); 318 + cr3 = read_cr3(); 319 + if (current_cpu_data.x86 > 4) { 320 + cr4 = read_cr4(); 321 + } 326 322 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); 327 323 show_trace(NULL, &regs->esp); 328 324 }
+1 -1
arch/i386/kernel/smp.c
··· 576 576 local_irq_disable(); 577 577 disable_local_APIC(); 578 578 if (cpu_data[smp_processor_id()].hlt_works_ok) 579 - for(;;) __asm__("hlt"); 579 + for(;;) halt(); 580 580 for (;;); 581 581 } 582 582
+3 -3
arch/i386/mm/fault.c
··· 233 233 int write, si_code; 234 234 235 235 /* get the address */ 236 - __asm__("movl %%cr2,%0":"=r" (address)); 236 + address = read_cr2(); 237 237 238 238 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 239 239 SIGSEGV) == NOTIFY_STOP) ··· 453 453 printk(" at virtual address %08lx\n",address); 454 454 printk(KERN_ALERT " printing eip:\n"); 455 455 printk("%08lx\n", regs->eip); 456 - asm("movl %%cr3,%0":"=r" (page)); 456 + page = read_cr3(); 457 457 page = ((unsigned long *) __va(page))[address >> 22]; 458 458 printk(KERN_ALERT "*pde = %08lx\n", page); 459 459 /* ··· 526 526 pmd_t *pmd, *pmd_k; 527 527 pte_t *pte_k; 528 528 529 - asm("movl %%cr3,%0":"=r" (pgd_paddr)); 529 + pgd_paddr = read_cr3(); 530 530 pgd = index + (pgd_t *)__va(pgd_paddr); 531 531 pgd_k = init_mm.pgd + index; 532 532
+1 -1
arch/i386/mm/pageattr.c
··· 62 62 { 63 63 /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */ 64 64 if (boot_cpu_data.x86_model >= 4) 65 - asm volatile("wbinvd":::"memory"); 65 + wbinvd(); 66 66 /* Flush all to work around Errata in early athlons regarding 67 67 * large page flushing. 68 68 */
+8 -8
arch/i386/power/cpu.c
··· 57 57 /* 58 58 * control registers 59 59 */ 60 - asm volatile ("movl %%cr0, %0" : "=r" (ctxt->cr0)); 61 - asm volatile ("movl %%cr2, %0" : "=r" (ctxt->cr2)); 62 - asm volatile ("movl %%cr3, %0" : "=r" (ctxt->cr3)); 63 - asm volatile ("movl %%cr4, %0" : "=r" (ctxt->cr4)); 60 + ctxt->cr0 = read_cr0(); 61 + ctxt->cr2 = read_cr2(); 62 + ctxt->cr3 = read_cr3(); 63 + ctxt->cr4 = read_cr4(); 64 64 } 65 65 66 66 void save_processor_state(void) ··· 109 109 /* 110 110 * control registers 111 111 */ 112 - asm volatile ("movl %0, %%cr4" :: "r" (ctxt->cr4)); 113 - asm volatile ("movl %0, %%cr3" :: "r" (ctxt->cr3)); 114 - asm volatile ("movl %0, %%cr2" :: "r" (ctxt->cr2)); 115 - asm volatile ("movl %0, %%cr0" :: "r" (ctxt->cr0)); 112 + write_cr4(ctxt->cr4); 113 + write_cr3(ctxt->cr3); 114 + write_cr2(ctxt->cr2); 115 + write_cr2(ctxt->cr0); 116 116 117 117 /* 118 118 * now restore the descriptor tables to their proper values
+1 -1
include/asm-i386/agp.h
··· 19 19 /* Could use CLFLUSH here if the cpu supports it. But then it would 20 20 need to be called for each cacheline of the whole page so it may not be 21 21 worth it. Would need a page for it. */ 22 - #define flush_agp_cache() asm volatile("wbinvd":::"memory") 22 + #define flush_agp_cache() wbinvd() 23 23 24 24 /* Convert a physical address to an address suitable for the GART. */ 25 25 #define phys_to_gart(x) (x)
+4 -1
include/asm-i386/bugs.h
··· 118 118 printk("disabled\n"); 119 119 return; 120 120 } 121 - __asm__ __volatile__("hlt ; hlt ; hlt ; hlt"); 121 + halt(); 122 + halt(); 123 + halt(); 124 + halt(); 122 125 printk("OK.\n"); 123 126 } 124 127
+9 -13
include/asm-i386/processor.h
··· 203 203 return edx; 204 204 } 205 205 206 - #define load_cr3(pgdir) \ 207 - asm volatile("movl %0,%%cr3": :"r" (__pa(pgdir))) 208 - 206 + #define load_cr3(pgdir) write_cr3(__pa(pgdir)) 209 207 210 208 /* 211 209 * Intel CPU features in CR4 ··· 230 232 231 233 static inline void set_in_cr4 (unsigned long mask) 232 234 { 235 + unsigned cr4; 233 236 mmu_cr4_features |= mask; 234 - __asm__("movl %%cr4,%%eax\n\t" 235 - "orl %0,%%eax\n\t" 236 - "movl %%eax,%%cr4\n" 237 - : : "irg" (mask) 238 - :"ax"); 237 + cr4 = read_cr4(); 238 + cr4 |= mask; 239 + write_cr4(cr4); 239 240 } 240 241 241 242 static inline void clear_in_cr4 (unsigned long mask) 242 243 { 244 + unsigned cr4; 243 245 mmu_cr4_features &= ~mask; 244 - __asm__("movl %%cr4,%%eax\n\t" 245 - "andl %0,%%eax\n\t" 246 - "movl %%eax,%%cr4\n" 247 - : : "irg" (~mask) 248 - :"ax"); 246 + cr4 = read_cr4(); 247 + cr4 &= ~mask; 248 + write_cr4(cr4); 249 249 } 250 250 251 251 /*
+25 -3
include/asm-i386/system.h
··· 107 107 #define clts() __asm__ __volatile__ ("clts") 108 108 #define read_cr0() ({ \ 109 109 unsigned int __dummy; \ 110 - __asm__( \ 110 + __asm__ __volatile__( \ 111 111 "movl %%cr0,%0\n\t" \ 112 112 :"=r" (__dummy)); \ 113 113 __dummy; \ 114 114 }) 115 115 #define write_cr0(x) \ 116 - __asm__("movl %0,%%cr0": :"r" (x)); 116 + __asm__ __volatile__("movl %0,%%cr0": :"r" (x)); 117 + 118 + #define read_cr2() ({ \ 119 + unsigned int __dummy; \ 120 + __asm__ __volatile__( \ 121 + "movl %%cr2,%0\n\t" \ 122 + :"=r" (__dummy)); \ 123 + __dummy; \ 124 + }) 125 + #define write_cr2(x) \ 126 + __asm__ __volatile__("movl %0,%%cr2": :"r" (x)); 127 + 128 + #define read_cr3() ({ \ 129 + unsigned int __dummy; \ 130 + __asm__ ( \ 131 + "movl %%cr3,%0\n\t" \ 132 + :"=r" (__dummy)); \ 133 + __dummy; \ 134 + }) 135 + #define write_cr3(x) \ 136 + __asm__ __volatile__("movl %0,%%cr3": :"r" (x)); 117 137 118 138 #define read_cr4() ({ \ 119 139 unsigned int __dummy; \ ··· 143 123 __dummy; \ 144 124 }) 145 125 #define write_cr4(x) \ 146 - __asm__("movl %0,%%cr4": :"r" (x)); 126 + __asm__ __volatile__("movl %0,%%cr4": :"r" (x)); 147 127 #define stts() write_cr0(8 | read_cr0()) 148 128 149 129 #endif /* __KERNEL__ */ ··· 467 447 #define local_irq_enable() __asm__ __volatile__("sti": : :"memory") 468 448 /* used in the idle loop; sti takes one instruction cycle to complete */ 469 449 #define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") 450 + /* used when interrupts are already enabled or to shutdown the processor */ 451 + #define halt() __asm__ __volatile__("hlt": : :"memory") 470 452 471 453 #define irqs_disabled() \ 472 454 ({ \
+13 -13
include/asm-i386/xor.h
··· 535 535 536 536 #define XMMS_SAVE do { \ 537 537 preempt_disable(); \ 538 + cr0 = read_cr0(); \ 539 + clts(); \ 538 540 __asm__ __volatile__ ( \ 539 - "movl %%cr0,%0 ;\n\t" \ 540 - "clts ;\n\t" \ 541 - "movups %%xmm0,(%1) ;\n\t" \ 542 - "movups %%xmm1,0x10(%1) ;\n\t" \ 543 - "movups %%xmm2,0x20(%1) ;\n\t" \ 544 - "movups %%xmm3,0x30(%1) ;\n\t" \ 545 - : "=&r" (cr0) \ 541 + "movups %%xmm0,(%0) ;\n\t" \ 542 + "movups %%xmm1,0x10(%0) ;\n\t" \ 543 + "movups %%xmm2,0x20(%0) ;\n\t" \ 544 + "movups %%xmm3,0x30(%0) ;\n\t" \ 545 + : \ 546 546 : "r" (xmm_save) \ 547 547 : "memory"); \ 548 548 } while(0) ··· 550 550 #define XMMS_RESTORE do { \ 551 551 __asm__ __volatile__ ( \ 552 552 "sfence ;\n\t" \ 553 - "movups (%1),%%xmm0 ;\n\t" \ 554 - "movups 0x10(%1),%%xmm1 ;\n\t" \ 555 - "movups 0x20(%1),%%xmm2 ;\n\t" \ 556 - "movups 0x30(%1),%%xmm3 ;\n\t" \ 557 - "movl %0,%%cr0 ;\n\t" \ 553 + "movups (%0),%%xmm0 ;\n\t" \ 554 + "movups 0x10(%0),%%xmm1 ;\n\t" \ 555 + "movups 0x20(%0),%%xmm2 ;\n\t" \ 556 + "movups 0x30(%0),%%xmm3 ;\n\t" \ 558 557 : \ 559 - : "r" (cr0), "r" (xmm_save) \ 558 + : "r" (xmm_save) \ 560 559 : "memory"); \ 560 + write_cr0(cr0); \ 561 561 preempt_enable(); \ 562 562 } while(0) 563 563