Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.15 404 lines 8.5 kB view raw
1#include <linux/config.h> 2#include <linux/types.h> 3#include <linux/string.h> 4#include <linux/sched.h> 5#include <linux/hardirq.h> 6#include <linux/module.h> 7 8#include <asm/i387.h> 9 10 11/* 12 * MMX 3DNow! library helper functions 13 * 14 * To do: 15 * We can use MMX just for prefetch in IRQ's. This may be a win. 16 * (reported so on K6-III) 17 * We should use a better code neutral filler for the short jump 18 * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ?? 19 * We also want to clobber the filler register so we don't get any 20 * register forwarding stalls on the filler. 21 * 22 * Add *user handling. Checksums are not a win with MMX on any CPU 23 * tested so far for any MMX solution figured. 24 * 25 * 22/09/2000 - Arjan van de Ven 26 * Improved for non-egineering-sample Athlons 27 * 28 */ 29 30void *_mmx_memcpy(void *to, const void *from, size_t len) 31{ 32 void *p; 33 int i; 34 35 if (unlikely(in_interrupt())) 36 return __memcpy(to, from, len); 37 38 p = to; 39 i = len >> 6; /* len/64 */ 40 41 kernel_fpu_begin(); 42 43 __asm__ __volatile__ ( 44 "1: prefetch (%0)\n" /* This set is 28 bytes */ 45 " prefetch 64(%0)\n" 46 " prefetch 128(%0)\n" 47 " prefetch 192(%0)\n" 48 " prefetch 256(%0)\n" 49 "2: \n" 50 ".section .fixup, \"ax\"\n" 51 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ 52 " jmp 2b\n" 53 ".previous\n" 54 ".section __ex_table,\"a\"\n" 55 " .align 4\n" 56 " .long 1b, 3b\n" 57 ".previous" 58 : : "r" (from) ); 59 60 61 for(; i>5; i--) 62 { 63 __asm__ __volatile__ ( 64 "1: prefetch 320(%0)\n" 65 "2: movq (%0), %%mm0\n" 66 " movq 8(%0), %%mm1\n" 67 " movq 16(%0), %%mm2\n" 68 " movq 24(%0), %%mm3\n" 69 " movq %%mm0, (%1)\n" 70 " movq %%mm1, 8(%1)\n" 71 " movq %%mm2, 16(%1)\n" 72 " movq %%mm3, 24(%1)\n" 73 " movq 32(%0), %%mm0\n" 74 " movq 40(%0), %%mm1\n" 75 " movq 48(%0), %%mm2\n" 76 " movq 56(%0), %%mm3\n" 77 " movq %%mm0, 32(%1)\n" 78 " movq %%mm1, 40(%1)\n" 79 " movq %%mm2, 48(%1)\n" 80 " movq %%mm3, 56(%1)\n" 81 ".section .fixup, \"ax\"\n" 82 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ 83 " jmp 2b\n" 84 ".previous\n" 85 ".section __ex_table,\"a\"\n" 86 " .align 4\n" 87 " .long 1b, 3b\n" 88 ".previous" 89 : : "r" (from), "r" (to) : "memory"); 90 from+=64; 91 to+=64; 92 } 93 94 for(; i>0; i--) 95 { 96 __asm__ __volatile__ ( 97 " movq (%0), %%mm0\n" 98 " movq 8(%0), %%mm1\n" 99 " movq 16(%0), %%mm2\n" 100 " movq 24(%0), %%mm3\n" 101 " movq %%mm0, (%1)\n" 102 " movq %%mm1, 8(%1)\n" 103 " movq %%mm2, 16(%1)\n" 104 " movq %%mm3, 24(%1)\n" 105 " movq 32(%0), %%mm0\n" 106 " movq 40(%0), %%mm1\n" 107 " movq 48(%0), %%mm2\n" 108 " movq 56(%0), %%mm3\n" 109 " movq %%mm0, 32(%1)\n" 110 " movq %%mm1, 40(%1)\n" 111 " movq %%mm2, 48(%1)\n" 112 " movq %%mm3, 56(%1)\n" 113 : : "r" (from), "r" (to) : "memory"); 114 from+=64; 115 to+=64; 116 } 117 /* 118 * Now do the tail of the block 119 */ 120 __memcpy(to, from, len&63); 121 kernel_fpu_end(); 122 return p; 123} 124 125#ifdef CONFIG_MK7 126 127/* 128 * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and 129 * other MMX using processors do not. 130 */ 131 132static void fast_clear_page(void *page) 133{ 134 int i; 135 136 kernel_fpu_begin(); 137 138 __asm__ __volatile__ ( 139 " pxor %%mm0, %%mm0\n" : : 140 ); 141 142 for(i=0;i<4096/64;i++) 143 { 144 __asm__ __volatile__ ( 145 " movntq %%mm0, (%0)\n" 146 " movntq %%mm0, 8(%0)\n" 147 " movntq %%mm0, 16(%0)\n" 148 " movntq %%mm0, 24(%0)\n" 149 " movntq %%mm0, 32(%0)\n" 150 " movntq %%mm0, 40(%0)\n" 151 " movntq %%mm0, 48(%0)\n" 152 " movntq %%mm0, 56(%0)\n" 153 : : "r" (page) : "memory"); 154 page+=64; 155 } 156 /* since movntq is weakly-ordered, a "sfence" is needed to become 157 * ordered again. 158 */ 159 __asm__ __volatile__ ( 160 " sfence \n" : : 161 ); 162 kernel_fpu_end(); 163} 164 165static void fast_copy_page(void *to, void *from) 166{ 167 int i; 168 169 kernel_fpu_begin(); 170 171 /* maybe the prefetch stuff can go before the expensive fnsave... 172 * but that is for later. -AV 173 */ 174 __asm__ __volatile__ ( 175 "1: prefetch (%0)\n" 176 " prefetch 64(%0)\n" 177 " prefetch 128(%0)\n" 178 " prefetch 192(%0)\n" 179 " prefetch 256(%0)\n" 180 "2: \n" 181 ".section .fixup, \"ax\"\n" 182 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ 183 " jmp 2b\n" 184 ".previous\n" 185 ".section __ex_table,\"a\"\n" 186 " .align 4\n" 187 " .long 1b, 3b\n" 188 ".previous" 189 : : "r" (from) ); 190 191 for(i=0; i<(4096-320)/64; i++) 192 { 193 __asm__ __volatile__ ( 194 "1: prefetch 320(%0)\n" 195 "2: movq (%0), %%mm0\n" 196 " movntq %%mm0, (%1)\n" 197 " movq 8(%0), %%mm1\n" 198 " movntq %%mm1, 8(%1)\n" 199 " movq 16(%0), %%mm2\n" 200 " movntq %%mm2, 16(%1)\n" 201 " movq 24(%0), %%mm3\n" 202 " movntq %%mm3, 24(%1)\n" 203 " movq 32(%0), %%mm4\n" 204 " movntq %%mm4, 32(%1)\n" 205 " movq 40(%0), %%mm5\n" 206 " movntq %%mm5, 40(%1)\n" 207 " movq 48(%0), %%mm6\n" 208 " movntq %%mm6, 48(%1)\n" 209 " movq 56(%0), %%mm7\n" 210 " movntq %%mm7, 56(%1)\n" 211 ".section .fixup, \"ax\"\n" 212 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ 213 " jmp 2b\n" 214 ".previous\n" 215 ".section __ex_table,\"a\"\n" 216 " .align 4\n" 217 " .long 1b, 3b\n" 218 ".previous" 219 : : "r" (from), "r" (to) : "memory"); 220 from+=64; 221 to+=64; 222 } 223 for(i=(4096-320)/64; i<4096/64; i++) 224 { 225 __asm__ __volatile__ ( 226 "2: movq (%0), %%mm0\n" 227 " movntq %%mm0, (%1)\n" 228 " movq 8(%0), %%mm1\n" 229 " movntq %%mm1, 8(%1)\n" 230 " movq 16(%0), %%mm2\n" 231 " movntq %%mm2, 16(%1)\n" 232 " movq 24(%0), %%mm3\n" 233 " movntq %%mm3, 24(%1)\n" 234 " movq 32(%0), %%mm4\n" 235 " movntq %%mm4, 32(%1)\n" 236 " movq 40(%0), %%mm5\n" 237 " movntq %%mm5, 40(%1)\n" 238 " movq 48(%0), %%mm6\n" 239 " movntq %%mm6, 48(%1)\n" 240 " movq 56(%0), %%mm7\n" 241 " movntq %%mm7, 56(%1)\n" 242 : : "r" (from), "r" (to) : "memory"); 243 from+=64; 244 to+=64; 245 } 246 /* since movntq is weakly-ordered, a "sfence" is needed to become 247 * ordered again. 248 */ 249 __asm__ __volatile__ ( 250 " sfence \n" : : 251 ); 252 kernel_fpu_end(); 253} 254 255#else 256 257/* 258 * Generic MMX implementation without K7 specific streaming 259 */ 260 261static void fast_clear_page(void *page) 262{ 263 int i; 264 265 kernel_fpu_begin(); 266 267 __asm__ __volatile__ ( 268 " pxor %%mm0, %%mm0\n" : : 269 ); 270 271 for(i=0;i<4096/128;i++) 272 { 273 __asm__ __volatile__ ( 274 " movq %%mm0, (%0)\n" 275 " movq %%mm0, 8(%0)\n" 276 " movq %%mm0, 16(%0)\n" 277 " movq %%mm0, 24(%0)\n" 278 " movq %%mm0, 32(%0)\n" 279 " movq %%mm0, 40(%0)\n" 280 " movq %%mm0, 48(%0)\n" 281 " movq %%mm0, 56(%0)\n" 282 " movq %%mm0, 64(%0)\n" 283 " movq %%mm0, 72(%0)\n" 284 " movq %%mm0, 80(%0)\n" 285 " movq %%mm0, 88(%0)\n" 286 " movq %%mm0, 96(%0)\n" 287 " movq %%mm0, 104(%0)\n" 288 " movq %%mm0, 112(%0)\n" 289 " movq %%mm0, 120(%0)\n" 290 : : "r" (page) : "memory"); 291 page+=128; 292 } 293 294 kernel_fpu_end(); 295} 296 297static void fast_copy_page(void *to, void *from) 298{ 299 int i; 300 301 302 kernel_fpu_begin(); 303 304 __asm__ __volatile__ ( 305 "1: prefetch (%0)\n" 306 " prefetch 64(%0)\n" 307 " prefetch 128(%0)\n" 308 " prefetch 192(%0)\n" 309 " prefetch 256(%0)\n" 310 "2: \n" 311 ".section .fixup, \"ax\"\n" 312 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ 313 " jmp 2b\n" 314 ".previous\n" 315 ".section __ex_table,\"a\"\n" 316 " .align 4\n" 317 " .long 1b, 3b\n" 318 ".previous" 319 : : "r" (from) ); 320 321 for(i=0; i<4096/64; i++) 322 { 323 __asm__ __volatile__ ( 324 "1: prefetch 320(%0)\n" 325 "2: movq (%0), %%mm0\n" 326 " movq 8(%0), %%mm1\n" 327 " movq 16(%0), %%mm2\n" 328 " movq 24(%0), %%mm3\n" 329 " movq %%mm0, (%1)\n" 330 " movq %%mm1, 8(%1)\n" 331 " movq %%mm2, 16(%1)\n" 332 " movq %%mm3, 24(%1)\n" 333 " movq 32(%0), %%mm0\n" 334 " movq 40(%0), %%mm1\n" 335 " movq 48(%0), %%mm2\n" 336 " movq 56(%0), %%mm3\n" 337 " movq %%mm0, 32(%1)\n" 338 " movq %%mm1, 40(%1)\n" 339 " movq %%mm2, 48(%1)\n" 340 " movq %%mm3, 56(%1)\n" 341 ".section .fixup, \"ax\"\n" 342 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ 343 " jmp 2b\n" 344 ".previous\n" 345 ".section __ex_table,\"a\"\n" 346 " .align 4\n" 347 " .long 1b, 3b\n" 348 ".previous" 349 : : "r" (from), "r" (to) : "memory"); 350 from+=64; 351 to+=64; 352 } 353 kernel_fpu_end(); 354} 355 356 357#endif 358 359/* 360 * Favour MMX for page clear and copy. 361 */ 362 363static void slow_zero_page(void * page) 364{ 365 int d0, d1; 366 __asm__ __volatile__( \ 367 "cld\n\t" \ 368 "rep ; stosl" \ 369 : "=&c" (d0), "=&D" (d1) 370 :"a" (0),"1" (page),"0" (1024) 371 :"memory"); 372} 373 374void mmx_clear_page(void * page) 375{ 376 if(unlikely(in_interrupt())) 377 slow_zero_page(page); 378 else 379 fast_clear_page(page); 380} 381 382static void slow_copy_page(void *to, void *from) 383{ 384 int d0, d1, d2; 385 __asm__ __volatile__( \ 386 "cld\n\t" \ 387 "rep ; movsl" \ 388 : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ 389 : "0" (1024),"1" ((long) to),"2" ((long) from) \ 390 : "memory"); 391} 392 393 394void mmx_copy_page(void *to, void *from) 395{ 396 if(unlikely(in_interrupt())) 397 slow_copy_page(to, from); 398 else 399 fast_copy_page(to, from); 400} 401 402EXPORT_SYMBOL(_mmx_memcpy); 403EXPORT_SYMBOL(mmx_clear_page); 404EXPORT_SYMBOL(mmx_copy_page);