at v2.6.12 482 lines 11 kB view raw
1#ifndef _I386_STRING_H_ 2#define _I386_STRING_H_ 3 4#ifdef __KERNEL__ 5#include <linux/config.h> 6/* 7 * On a 486 or Pentium, we are better off not using the 8 * byte string operations. But on a 386 or a PPro the 9 * byte string ops are faster than doing it by hand 10 * (MUCH faster on a Pentium). 11 */ 12 13/* 14 * This string-include defines all string functions as inline 15 * functions. Use gcc. It also assumes ds=es=data space, this should be 16 * normal. Most of the string-functions are rather heavily hand-optimized, 17 * see especially strsep,strstr,str[c]spn. They should work, but are not 18 * very easy to understand. Everything is done entirely within the register 19 * set, making the functions fast and clean. String instructions have been 20 * used through-out, making for "slightly" unclear code :-) 21 * 22 * NO Copyright (C) 1991, 1992 Linus Torvalds, 23 * consider these trivial functions to be PD. 24 */ 25 26/* AK: in fact I bet it would be better to move this stuff all out of line. 27 */ 28 29#define __HAVE_ARCH_STRCPY 30static inline char * strcpy(char * dest,const char *src) 31{ 32int d0, d1, d2; 33__asm__ __volatile__( 34 "1:\tlodsb\n\t" 35 "stosb\n\t" 36 "testb %%al,%%al\n\t" 37 "jne 1b" 38 : "=&S" (d0), "=&D" (d1), "=&a" (d2) 39 :"0" (src),"1" (dest) : "memory"); 40return dest; 41} 42 43#define __HAVE_ARCH_STRNCPY 44static inline char * strncpy(char * dest,const char *src,size_t count) 45{ 46int d0, d1, d2, d3; 47__asm__ __volatile__( 48 "1:\tdecl %2\n\t" 49 "js 2f\n\t" 50 "lodsb\n\t" 51 "stosb\n\t" 52 "testb %%al,%%al\n\t" 53 "jne 1b\n\t" 54 "rep\n\t" 55 "stosb\n" 56 "2:" 57 : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) 58 :"0" (src),"1" (dest),"2" (count) : "memory"); 59return dest; 60} 61 62#define __HAVE_ARCH_STRCAT 63static inline char * strcat(char * dest,const char * src) 64{ 65int d0, d1, d2, d3; 66__asm__ __volatile__( 67 "repne\n\t" 68 "scasb\n\t" 69 "decl %1\n" 70 "1:\tlodsb\n\t" 71 "stosb\n\t" 72 "testb %%al,%%al\n\t" 73 "jne 1b" 74 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) 75 : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu):"memory"); 76return dest; 77} 78 79#define __HAVE_ARCH_STRNCAT 80static inline char * strncat(char * dest,const char * src,size_t count) 81{ 82int d0, d1, d2, d3; 83__asm__ __volatile__( 84 "repne\n\t" 85 "scasb\n\t" 86 "decl %1\n\t" 87 "movl %8,%3\n" 88 "1:\tdecl %3\n\t" 89 "js 2f\n\t" 90 "lodsb\n\t" 91 "stosb\n\t" 92 "testb %%al,%%al\n\t" 93 "jne 1b\n" 94 "2:\txorl %2,%2\n\t" 95 "stosb" 96 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) 97 : "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count) 98 : "memory"); 99return dest; 100} 101 102#define __HAVE_ARCH_STRCMP 103static inline int strcmp(const char * cs,const char * ct) 104{ 105int d0, d1; 106register int __res; 107__asm__ __volatile__( 108 "1:\tlodsb\n\t" 109 "scasb\n\t" 110 "jne 2f\n\t" 111 "testb %%al,%%al\n\t" 112 "jne 1b\n\t" 113 "xorl %%eax,%%eax\n\t" 114 "jmp 3f\n" 115 "2:\tsbbl %%eax,%%eax\n\t" 116 "orb $1,%%al\n" 117 "3:" 118 :"=a" (__res), "=&S" (d0), "=&D" (d1) 119 :"1" (cs),"2" (ct)); 120return __res; 121} 122 123#define __HAVE_ARCH_STRNCMP 124static inline int strncmp(const char * cs,const char * ct,size_t count) 125{ 126register int __res; 127int d0, d1, d2; 128__asm__ __volatile__( 129 "1:\tdecl %3\n\t" 130 "js 2f\n\t" 131 "lodsb\n\t" 132 "scasb\n\t" 133 "jne 3f\n\t" 134 "testb %%al,%%al\n\t" 135 "jne 1b\n" 136 "2:\txorl %%eax,%%eax\n\t" 137 "jmp 4f\n" 138 "3:\tsbbl %%eax,%%eax\n\t" 139 "orb $1,%%al\n" 140 "4:" 141 :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) 142 :"1" (cs),"2" (ct),"3" (count)); 143return __res; 144} 145 146#define __HAVE_ARCH_STRCHR 147static inline char * strchr(const char * s, int c) 148{ 149int d0; 150register char * __res; 151__asm__ __volatile__( 152 "movb %%al,%%ah\n" 153 "1:\tlodsb\n\t" 154 "cmpb %%ah,%%al\n\t" 155 "je 2f\n\t" 156 "testb %%al,%%al\n\t" 157 "jne 1b\n\t" 158 "movl $1,%1\n" 159 "2:\tmovl %1,%0\n\t" 160 "decl %0" 161 :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c)); 162return __res; 163} 164 165#define __HAVE_ARCH_STRRCHR 166static inline char * strrchr(const char * s, int c) 167{ 168int d0, d1; 169register char * __res; 170__asm__ __volatile__( 171 "movb %%al,%%ah\n" 172 "1:\tlodsb\n\t" 173 "cmpb %%ah,%%al\n\t" 174 "jne 2f\n\t" 175 "leal -1(%%esi),%0\n" 176 "2:\ttestb %%al,%%al\n\t" 177 "jne 1b" 178 :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c)); 179return __res; 180} 181 182#define __HAVE_ARCH_STRLEN 183static inline size_t strlen(const char * s) 184{ 185int d0; 186register int __res; 187__asm__ __volatile__( 188 "repne\n\t" 189 "scasb\n\t" 190 "notl %0\n\t" 191 "decl %0" 192 :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffffu)); 193return __res; 194} 195 196static inline void * __memcpy(void * to, const void * from, size_t n) 197{ 198int d0, d1, d2; 199__asm__ __volatile__( 200 "rep ; movsl\n\t" 201 "movl %4,%%ecx\n\t" 202 "andl $3,%%ecx\n\t" 203#if 1 /* want to pay 2 byte penalty for a chance to skip microcoded rep? */ 204 "jz 1f\n\t" 205#endif 206 "rep ; movsb\n\t" 207 "1:" 208 : "=&c" (d0), "=&D" (d1), "=&S" (d2) 209 : "0" (n/4), "g" (n), "1" ((long) to), "2" ((long) from) 210 : "memory"); 211return (to); 212} 213 214/* 215 * This looks ugly, but the compiler can optimize it totally, 216 * as the count is constant. 217 */ 218static inline void * __constant_memcpy(void * to, const void * from, size_t n) 219{ 220 long esi, edi; 221 if (!n) return to; 222#if 1 /* want to do small copies with non-string ops? */ 223 switch (n) { 224 case 1: *(char*)to = *(char*)from; return to; 225 case 2: *(short*)to = *(short*)from; return to; 226 case 4: *(int*)to = *(int*)from; return to; 227#if 1 /* including those doable with two moves? */ 228 case 3: *(short*)to = *(short*)from; 229 *((char*)to+2) = *((char*)from+2); return to; 230 case 5: *(int*)to = *(int*)from; 231 *((char*)to+4) = *((char*)from+4); return to; 232 case 6: *(int*)to = *(int*)from; 233 *((short*)to+2) = *((short*)from+2); return to; 234 case 8: *(int*)to = *(int*)from; 235 *((int*)to+1) = *((int*)from+1); return to; 236#endif 237 } 238#endif 239 esi = (long) from; 240 edi = (long) to; 241 if (n >= 5*4) { 242 /* large block: use rep prefix */ 243 int ecx; 244 __asm__ __volatile__( 245 "rep ; movsl" 246 : "=&c" (ecx), "=&D" (edi), "=&S" (esi) 247 : "0" (n/4), "1" (edi),"2" (esi) 248 : "memory" 249 ); 250 } else { 251 /* small block: don't clobber ecx + smaller code */ 252 if (n >= 4*4) __asm__ __volatile__("movsl" 253 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 254 if (n >= 3*4) __asm__ __volatile__("movsl" 255 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 256 if (n >= 2*4) __asm__ __volatile__("movsl" 257 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 258 if (n >= 1*4) __asm__ __volatile__("movsl" 259 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 260 } 261 switch (n % 4) { 262 /* tail */ 263 case 0: return to; 264 case 1: __asm__ __volatile__("movsb" 265 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 266 return to; 267 case 2: __asm__ __volatile__("movsw" 268 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 269 return to; 270 default: __asm__ __volatile__("movsw\n\tmovsb" 271 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 272 return to; 273 } 274} 275 276#define __HAVE_ARCH_MEMCPY 277 278#ifdef CONFIG_X86_USE_3DNOW 279 280#include <asm/mmx.h> 281 282/* 283 * This CPU favours 3DNow strongly (eg AMD Athlon) 284 */ 285 286static inline void * __constant_memcpy3d(void * to, const void * from, size_t len) 287{ 288 if (len < 512) 289 return __constant_memcpy(to, from, len); 290 return _mmx_memcpy(to, from, len); 291} 292 293static __inline__ void *__memcpy3d(void *to, const void *from, size_t len) 294{ 295 if (len < 512) 296 return __memcpy(to, from, len); 297 return _mmx_memcpy(to, from, len); 298} 299 300#define memcpy(t, f, n) \ 301(__builtin_constant_p(n) ? \ 302 __constant_memcpy3d((t),(f),(n)) : \ 303 __memcpy3d((t),(f),(n))) 304 305#else 306 307/* 308 * No 3D Now! 309 */ 310 311#define memcpy(t, f, n) \ 312(__builtin_constant_p(n) ? \ 313 __constant_memcpy((t),(f),(n)) : \ 314 __memcpy((t),(f),(n))) 315 316#endif 317 318#define __HAVE_ARCH_MEMMOVE 319void *memmove(void * dest,const void * src, size_t n); 320 321#define memcmp __builtin_memcmp 322 323#define __HAVE_ARCH_MEMCHR 324static inline void * memchr(const void * cs,int c,size_t count) 325{ 326int d0; 327register void * __res; 328if (!count) 329 return NULL; 330__asm__ __volatile__( 331 "repne\n\t" 332 "scasb\n\t" 333 "je 1f\n\t" 334 "movl $1,%0\n" 335 "1:\tdecl %0" 336 :"=D" (__res), "=&c" (d0) : "a" (c),"0" (cs),"1" (count)); 337return __res; 338} 339 340static inline void * __memset_generic(void * s, char c,size_t count) 341{ 342int d0, d1; 343__asm__ __volatile__( 344 "rep\n\t" 345 "stosb" 346 : "=&c" (d0), "=&D" (d1) 347 :"a" (c),"1" (s),"0" (count) 348 :"memory"); 349return s; 350} 351 352/* we might want to write optimized versions of these later */ 353#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count)) 354 355/* 356 * memset(x,0,y) is a reasonably common thing to do, so we want to fill 357 * things 32 bits at a time even when we don't know the size of the 358 * area at compile-time.. 359 */ 360static inline void * __constant_c_memset(void * s, unsigned long c, size_t count) 361{ 362int d0, d1; 363__asm__ __volatile__( 364 "rep ; stosl\n\t" 365 "testb $2,%b3\n\t" 366 "je 1f\n\t" 367 "stosw\n" 368 "1:\ttestb $1,%b3\n\t" 369 "je 2f\n\t" 370 "stosb\n" 371 "2:" 372 : "=&c" (d0), "=&D" (d1) 373 :"a" (c), "q" (count), "0" (count/4), "1" ((long) s) 374 :"memory"); 375return (s); 376} 377 378/* Added by Gertjan van Wingerde to make minix and sysv module work */ 379#define __HAVE_ARCH_STRNLEN 380static inline size_t strnlen(const char * s, size_t count) 381{ 382int d0; 383register int __res; 384__asm__ __volatile__( 385 "movl %2,%0\n\t" 386 "jmp 2f\n" 387 "1:\tcmpb $0,(%0)\n\t" 388 "je 3f\n\t" 389 "incl %0\n" 390 "2:\tdecl %1\n\t" 391 "cmpl $-1,%1\n\t" 392 "jne 1b\n" 393 "3:\tsubl %2,%0" 394 :"=a" (__res), "=&d" (d0) 395 :"c" (s),"1" (count)); 396return __res; 397} 398/* end of additional stuff */ 399 400#define __HAVE_ARCH_STRSTR 401 402extern char *strstr(const char *cs, const char *ct); 403 404/* 405 * This looks horribly ugly, but the compiler can optimize it totally, 406 * as we by now know that both pattern and count is constant.. 407 */ 408static inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count) 409{ 410 switch (count) { 411 case 0: 412 return s; 413 case 1: 414 *(unsigned char *)s = pattern; 415 return s; 416 case 2: 417 *(unsigned short *)s = pattern; 418 return s; 419 case 3: 420 *(unsigned short *)s = pattern; 421 *(2+(unsigned char *)s) = pattern; 422 return s; 423 case 4: 424 *(unsigned long *)s = pattern; 425 return s; 426 } 427#define COMMON(x) \ 428__asm__ __volatile__( \ 429 "rep ; stosl" \ 430 x \ 431 : "=&c" (d0), "=&D" (d1) \ 432 : "a" (pattern),"0" (count/4),"1" ((long) s) \ 433 : "memory") 434{ 435 int d0, d1; 436 switch (count % 4) { 437 case 0: COMMON(""); return s; 438 case 1: COMMON("\n\tstosb"); return s; 439 case 2: COMMON("\n\tstosw"); return s; 440 default: COMMON("\n\tstosw\n\tstosb"); return s; 441 } 442} 443 444#undef COMMON 445} 446 447#define __constant_c_x_memset(s, c, count) \ 448(__builtin_constant_p(count) ? \ 449 __constant_c_and_count_memset((s),(c),(count)) : \ 450 __constant_c_memset((s),(c),(count))) 451 452#define __memset(s, c, count) \ 453(__builtin_constant_p(count) ? \ 454 __constant_count_memset((s),(c),(count)) : \ 455 __memset_generic((s),(c),(count))) 456 457#define __HAVE_ARCH_MEMSET 458#define memset(s, c, count) \ 459(__builtin_constant_p(c) ? \ 460 __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \ 461 __memset((s),(c),(count))) 462 463/* 464 * find the first occurrence of byte 'c', or 1 past the area if none 465 */ 466#define __HAVE_ARCH_MEMSCAN 467static inline void * memscan(void * addr, int c, size_t size) 468{ 469 if (!size) 470 return addr; 471 __asm__("repnz; scasb\n\t" 472 "jnz 1f\n\t" 473 "dec %%edi\n" 474 "1:" 475 : "=D" (addr), "=c" (size) 476 : "0" (addr), "1" (size), "a" (c)); 477 return addr; 478} 479 480#endif /* __KERNEL__ */ 481 482#endif