at v2.6.16 494 lines 11 kB view raw
1#ifndef _I386_STRING_H_ 2#define _I386_STRING_H_ 3 4#ifdef __KERNEL__ 5#include <linux/config.h> 6/* 7 * On a 486 or Pentium, we are better off not using the 8 * byte string operations. But on a 386 or a PPro the 9 * byte string ops are faster than doing it by hand 10 * (MUCH faster on a Pentium). 11 */ 12 13/* 14 * This string-include defines all string functions as inline 15 * functions. Use gcc. It also assumes ds=es=data space, this should be 16 * normal. Most of the string-functions are rather heavily hand-optimized, 17 * see especially strsep,strstr,str[c]spn. They should work, but are not 18 * very easy to understand. Everything is done entirely within the register 19 * set, making the functions fast and clean. String instructions have been 20 * used through-out, making for "slightly" unclear code :-) 21 * 22 * NO Copyright (C) 1991, 1992 Linus Torvalds, 23 * consider these trivial functions to be PD. 24 */ 25 26/* AK: in fact I bet it would be better to move this stuff all out of line. 27 */ 28 29#define __HAVE_ARCH_STRCPY 30static inline char * strcpy(char * dest,const char *src) 31{ 32int d0, d1, d2; 33__asm__ __volatile__( 34 "1:\tlodsb\n\t" 35 "stosb\n\t" 36 "testb %%al,%%al\n\t" 37 "jne 1b" 38 : "=&S" (d0), "=&D" (d1), "=&a" (d2) 39 :"0" (src),"1" (dest) : "memory"); 40return dest; 41} 42 43#define __HAVE_ARCH_STRNCPY 44static inline char * strncpy(char * dest,const char *src,size_t count) 45{ 46int d0, d1, d2, d3; 47__asm__ __volatile__( 48 "1:\tdecl %2\n\t" 49 "js 2f\n\t" 50 "lodsb\n\t" 51 "stosb\n\t" 52 "testb %%al,%%al\n\t" 53 "jne 1b\n\t" 54 "rep\n\t" 55 "stosb\n" 56 "2:" 57 : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) 58 :"0" (src),"1" (dest),"2" (count) : "memory"); 59return dest; 60} 61 62#define __HAVE_ARCH_STRCAT 63static inline char * strcat(char * dest,const char * src) 64{ 65int d0, d1, d2, d3; 66__asm__ __volatile__( 67 "repne\n\t" 68 "scasb\n\t" 69 "decl %1\n" 70 "1:\tlodsb\n\t" 71 "stosb\n\t" 72 "testb %%al,%%al\n\t" 73 "jne 1b" 74 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) 75 : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu):"memory"); 76return dest; 77} 78 79#define __HAVE_ARCH_STRNCAT 80static inline char * strncat(char * dest,const char * src,size_t count) 81{ 82int d0, d1, d2, d3; 83__asm__ __volatile__( 84 "repne\n\t" 85 "scasb\n\t" 86 "decl %1\n\t" 87 "movl %8,%3\n" 88 "1:\tdecl %3\n\t" 89 "js 2f\n\t" 90 "lodsb\n\t" 91 "stosb\n\t" 92 "testb %%al,%%al\n\t" 93 "jne 1b\n" 94 "2:\txorl %2,%2\n\t" 95 "stosb" 96 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) 97 : "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count) 98 : "memory"); 99return dest; 100} 101 102#define __HAVE_ARCH_STRCMP 103static inline int strcmp(const char * cs,const char * ct) 104{ 105int d0, d1; 106register int __res; 107__asm__ __volatile__( 108 "1:\tlodsb\n\t" 109 "scasb\n\t" 110 "jne 2f\n\t" 111 "testb %%al,%%al\n\t" 112 "jne 1b\n\t" 113 "xorl %%eax,%%eax\n\t" 114 "jmp 3f\n" 115 "2:\tsbbl %%eax,%%eax\n\t" 116 "orb $1,%%al\n" 117 "3:" 118 :"=a" (__res), "=&S" (d0), "=&D" (d1) 119 :"1" (cs),"2" (ct) 120 :"memory"); 121return __res; 122} 123 124#define __HAVE_ARCH_STRNCMP 125static inline int strncmp(const char * cs,const char * ct,size_t count) 126{ 127register int __res; 128int d0, d1, d2; 129__asm__ __volatile__( 130 "1:\tdecl %3\n\t" 131 "js 2f\n\t" 132 "lodsb\n\t" 133 "scasb\n\t" 134 "jne 3f\n\t" 135 "testb %%al,%%al\n\t" 136 "jne 1b\n" 137 "2:\txorl %%eax,%%eax\n\t" 138 "jmp 4f\n" 139 "3:\tsbbl %%eax,%%eax\n\t" 140 "orb $1,%%al\n" 141 "4:" 142 :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) 143 :"1" (cs),"2" (ct),"3" (count) 144 :"memory"); 145return __res; 146} 147 148#define __HAVE_ARCH_STRCHR 149static inline char * strchr(const char * s, int c) 150{ 151int d0; 152register char * __res; 153__asm__ __volatile__( 154 "movb %%al,%%ah\n" 155 "1:\tlodsb\n\t" 156 "cmpb %%ah,%%al\n\t" 157 "je 2f\n\t" 158 "testb %%al,%%al\n\t" 159 "jne 1b\n\t" 160 "movl $1,%1\n" 161 "2:\tmovl %1,%0\n\t" 162 "decl %0" 163 :"=a" (__res), "=&S" (d0) 164 :"1" (s),"0" (c) 165 :"memory"); 166return __res; 167} 168 169#define __HAVE_ARCH_STRRCHR 170static inline char * strrchr(const char * s, int c) 171{ 172int d0, d1; 173register char * __res; 174__asm__ __volatile__( 175 "movb %%al,%%ah\n" 176 "1:\tlodsb\n\t" 177 "cmpb %%ah,%%al\n\t" 178 "jne 2f\n\t" 179 "leal -1(%%esi),%0\n" 180 "2:\ttestb %%al,%%al\n\t" 181 "jne 1b" 182 :"=g" (__res), "=&S" (d0), "=&a" (d1) 183 :"0" (0),"1" (s),"2" (c) 184 :"memory"); 185return __res; 186} 187 188#define __HAVE_ARCH_STRLEN 189static inline size_t strlen(const char * s) 190{ 191int d0; 192register int __res; 193__asm__ __volatile__( 194 "repne\n\t" 195 "scasb\n\t" 196 "notl %0\n\t" 197 "decl %0" 198 :"=c" (__res), "=&D" (d0) 199 :"1" (s),"a" (0), "0" (0xffffffffu) 200 :"memory"); 201return __res; 202} 203 204static __always_inline void * __memcpy(void * to, const void * from, size_t n) 205{ 206int d0, d1, d2; 207__asm__ __volatile__( 208 "rep ; movsl\n\t" 209 "movl %4,%%ecx\n\t" 210 "andl $3,%%ecx\n\t" 211#if 1 /* want to pay 2 byte penalty for a chance to skip microcoded rep? */ 212 "jz 1f\n\t" 213#endif 214 "rep ; movsb\n\t" 215 "1:" 216 : "=&c" (d0), "=&D" (d1), "=&S" (d2) 217 : "0" (n/4), "g" (n), "1" ((long) to), "2" ((long) from) 218 : "memory"); 219return (to); 220} 221 222/* 223 * This looks ugly, but the compiler can optimize it totally, 224 * as the count is constant. 225 */ 226static __always_inline void * __constant_memcpy(void * to, const void * from, size_t n) 227{ 228 long esi, edi; 229 if (!n) return to; 230#if 1 /* want to do small copies with non-string ops? */ 231 switch (n) { 232 case 1: *(char*)to = *(char*)from; return to; 233 case 2: *(short*)to = *(short*)from; return to; 234 case 4: *(int*)to = *(int*)from; return to; 235#if 1 /* including those doable with two moves? */ 236 case 3: *(short*)to = *(short*)from; 237 *((char*)to+2) = *((char*)from+2); return to; 238 case 5: *(int*)to = *(int*)from; 239 *((char*)to+4) = *((char*)from+4); return to; 240 case 6: *(int*)to = *(int*)from; 241 *((short*)to+2) = *((short*)from+2); return to; 242 case 8: *(int*)to = *(int*)from; 243 *((int*)to+1) = *((int*)from+1); return to; 244#endif 245 } 246#endif 247 esi = (long) from; 248 edi = (long) to; 249 if (n >= 5*4) { 250 /* large block: use rep prefix */ 251 int ecx; 252 __asm__ __volatile__( 253 "rep ; movsl" 254 : "=&c" (ecx), "=&D" (edi), "=&S" (esi) 255 : "0" (n/4), "1" (edi),"2" (esi) 256 : "memory" 257 ); 258 } else { 259 /* small block: don't clobber ecx + smaller code */ 260 if (n >= 4*4) __asm__ __volatile__("movsl" 261 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 262 if (n >= 3*4) __asm__ __volatile__("movsl" 263 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 264 if (n >= 2*4) __asm__ __volatile__("movsl" 265 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 266 if (n >= 1*4) __asm__ __volatile__("movsl" 267 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 268 } 269 switch (n % 4) { 270 /* tail */ 271 case 0: return to; 272 case 1: __asm__ __volatile__("movsb" 273 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 274 return to; 275 case 2: __asm__ __volatile__("movsw" 276 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 277 return to; 278 default: __asm__ __volatile__("movsw\n\tmovsb" 279 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 280 return to; 281 } 282} 283 284#define __HAVE_ARCH_MEMCPY 285 286#ifdef CONFIG_X86_USE_3DNOW 287 288#include <asm/mmx.h> 289 290/* 291 * This CPU favours 3DNow strongly (eg AMD Athlon) 292 */ 293 294static inline void * __constant_memcpy3d(void * to, const void * from, size_t len) 295{ 296 if (len < 512) 297 return __constant_memcpy(to, from, len); 298 return _mmx_memcpy(to, from, len); 299} 300 301static __inline__ void *__memcpy3d(void *to, const void *from, size_t len) 302{ 303 if (len < 512) 304 return __memcpy(to, from, len); 305 return _mmx_memcpy(to, from, len); 306} 307 308#define memcpy(t, f, n) \ 309(__builtin_constant_p(n) ? \ 310 __constant_memcpy3d((t),(f),(n)) : \ 311 __memcpy3d((t),(f),(n))) 312 313#else 314 315/* 316 * No 3D Now! 317 */ 318 319#define memcpy(t, f, n) \ 320(__builtin_constant_p(n) ? \ 321 __constant_memcpy((t),(f),(n)) : \ 322 __memcpy((t),(f),(n))) 323 324#endif 325 326#define __HAVE_ARCH_MEMMOVE 327void *memmove(void * dest,const void * src, size_t n); 328 329#define memcmp __builtin_memcmp 330 331#define __HAVE_ARCH_MEMCHR 332static inline void * memchr(const void * cs,int c,size_t count) 333{ 334int d0; 335register void * __res; 336if (!count) 337 return NULL; 338__asm__ __volatile__( 339 "repne\n\t" 340 "scasb\n\t" 341 "je 1f\n\t" 342 "movl $1,%0\n" 343 "1:\tdecl %0" 344 :"=D" (__res), "=&c" (d0) 345 :"a" (c),"0" (cs),"1" (count) 346 :"memory"); 347return __res; 348} 349 350static inline void * __memset_generic(void * s, char c,size_t count) 351{ 352int d0, d1; 353__asm__ __volatile__( 354 "rep\n\t" 355 "stosb" 356 : "=&c" (d0), "=&D" (d1) 357 :"a" (c),"1" (s),"0" (count) 358 :"memory"); 359return s; 360} 361 362/* we might want to write optimized versions of these later */ 363#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count)) 364 365/* 366 * memset(x,0,y) is a reasonably common thing to do, so we want to fill 367 * things 32 bits at a time even when we don't know the size of the 368 * area at compile-time.. 369 */ 370static __always_inline void * __constant_c_memset(void * s, unsigned long c, size_t count) 371{ 372int d0, d1; 373__asm__ __volatile__( 374 "rep ; stosl\n\t" 375 "testb $2,%b3\n\t" 376 "je 1f\n\t" 377 "stosw\n" 378 "1:\ttestb $1,%b3\n\t" 379 "je 2f\n\t" 380 "stosb\n" 381 "2:" 382 :"=&c" (d0), "=&D" (d1) 383 :"a" (c), "q" (count), "0" (count/4), "1" ((long) s) 384 :"memory"); 385return (s); 386} 387 388/* Added by Gertjan van Wingerde to make minix and sysv module work */ 389#define __HAVE_ARCH_STRNLEN 390static inline size_t strnlen(const char * s, size_t count) 391{ 392int d0; 393register int __res; 394__asm__ __volatile__( 395 "movl %2,%0\n\t" 396 "jmp 2f\n" 397 "1:\tcmpb $0,(%0)\n\t" 398 "je 3f\n\t" 399 "incl %0\n" 400 "2:\tdecl %1\n\t" 401 "cmpl $-1,%1\n\t" 402 "jne 1b\n" 403 "3:\tsubl %2,%0" 404 :"=a" (__res), "=&d" (d0) 405 :"c" (s),"1" (count) 406 :"memory"); 407return __res; 408} 409/* end of additional stuff */ 410 411#define __HAVE_ARCH_STRSTR 412 413extern char *strstr(const char *cs, const char *ct); 414 415/* 416 * This looks horribly ugly, but the compiler can optimize it totally, 417 * as we by now know that both pattern and count is constant.. 418 */ 419static __always_inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count) 420{ 421 switch (count) { 422 case 0: 423 return s; 424 case 1: 425 *(unsigned char *)s = pattern; 426 return s; 427 case 2: 428 *(unsigned short *)s = pattern; 429 return s; 430 case 3: 431 *(unsigned short *)s = pattern; 432 *(2+(unsigned char *)s) = pattern; 433 return s; 434 case 4: 435 *(unsigned long *)s = pattern; 436 return s; 437 } 438#define COMMON(x) \ 439__asm__ __volatile__( \ 440 "rep ; stosl" \ 441 x \ 442 : "=&c" (d0), "=&D" (d1) \ 443 : "a" (pattern),"0" (count/4),"1" ((long) s) \ 444 : "memory") 445{ 446 int d0, d1; 447 switch (count % 4) { 448 case 0: COMMON(""); return s; 449 case 1: COMMON("\n\tstosb"); return s; 450 case 2: COMMON("\n\tstosw"); return s; 451 default: COMMON("\n\tstosw\n\tstosb"); return s; 452 } 453} 454 455#undef COMMON 456} 457 458#define __constant_c_x_memset(s, c, count) \ 459(__builtin_constant_p(count) ? \ 460 __constant_c_and_count_memset((s),(c),(count)) : \ 461 __constant_c_memset((s),(c),(count))) 462 463#define __memset(s, c, count) \ 464(__builtin_constant_p(count) ? \ 465 __constant_count_memset((s),(c),(count)) : \ 466 __memset_generic((s),(c),(count))) 467 468#define __HAVE_ARCH_MEMSET 469#define memset(s, c, count) \ 470(__builtin_constant_p(c) ? \ 471 __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \ 472 __memset((s),(c),(count))) 473 474/* 475 * find the first occurrence of byte 'c', or 1 past the area if none 476 */ 477#define __HAVE_ARCH_MEMSCAN 478static inline void * memscan(void * addr, int c, size_t size) 479{ 480 if (!size) 481 return addr; 482 __asm__("repnz; scasb\n\t" 483 "jnz 1f\n\t" 484 "dec %%edi\n" 485 "1:" 486 : "=D" (addr), "=c" (size) 487 : "0" (addr), "1" (size), "a" (c) 488 : "memory"); 489 return addr; 490} 491 492#endif /* __KERNEL__ */ 493 494#endif