at v2.6.22 11 kB view raw
1#ifndef _I386_STRING_H_ 2#define _I386_STRING_H_ 3 4#ifdef __KERNEL__ 5/* 6 * On a 486 or Pentium, we are better off not using the 7 * byte string operations. But on a 386 or a PPro the 8 * byte string ops are faster than doing it by hand 9 * (MUCH faster on a Pentium). 10 */ 11 12/* 13 * This string-include defines all string functions as inline 14 * functions. Use gcc. It also assumes ds=es=data space, this should be 15 * normal. Most of the string-functions are rather heavily hand-optimized, 16 * see especially strsep,strstr,str[c]spn. They should work, but are not 17 * very easy to understand. Everything is done entirely within the register 18 * set, making the functions fast and clean. String instructions have been 19 * used through-out, making for "slightly" unclear code :-) 20 * 21 * NO Copyright (C) 1991, 1992 Linus Torvalds, 22 * consider these trivial functions to be PD. 23 */ 24 25/* AK: in fact I bet it would be better to move this stuff all out of line. 26 */ 27 28#define __HAVE_ARCH_STRCPY 29static inline char * strcpy(char * dest,const char *src) 30{ 31int d0, d1, d2; 32__asm__ __volatile__( 33 "1:\tlodsb\n\t" 34 "stosb\n\t" 35 "testb %%al,%%al\n\t" 36 "jne 1b" 37 : "=&S" (d0), "=&D" (d1), "=&a" (d2) 38 :"0" (src),"1" (dest) : "memory"); 39return dest; 40} 41 42#define __HAVE_ARCH_STRNCPY 43static inline char * strncpy(char * dest,const char *src,size_t count) 44{ 45int d0, d1, d2, d3; 46__asm__ __volatile__( 47 "1:\tdecl %2\n\t" 48 "js 2f\n\t" 49 "lodsb\n\t" 50 "stosb\n\t" 51 "testb %%al,%%al\n\t" 52 "jne 1b\n\t" 53 "rep\n\t" 54 "stosb\n" 55 "2:" 56 : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) 57 :"0" (src),"1" (dest),"2" (count) : "memory"); 58return dest; 59} 60 61#define __HAVE_ARCH_STRCAT 62static inline char * strcat(char * dest,const char * src) 63{ 64int d0, d1, d2, d3; 65__asm__ __volatile__( 66 "repne\n\t" 67 "scasb\n\t" 68 "decl %1\n" 69 "1:\tlodsb\n\t" 70 "stosb\n\t" 71 "testb %%al,%%al\n\t" 72 "jne 1b" 73 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) 74 : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu):"memory"); 75return dest; 76} 77 78#define __HAVE_ARCH_STRNCAT 79static inline char * strncat(char * dest,const char * src,size_t count) 80{ 81int d0, d1, d2, d3; 82__asm__ __volatile__( 83 "repne\n\t" 84 "scasb\n\t" 85 "decl %1\n\t" 86 "movl %8,%3\n" 87 "1:\tdecl %3\n\t" 88 "js 2f\n\t" 89 "lodsb\n\t" 90 "stosb\n\t" 91 "testb %%al,%%al\n\t" 92 "jne 1b\n" 93 "2:\txorl %2,%2\n\t" 94 "stosb" 95 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) 96 : "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count) 97 : "memory"); 98return dest; 99} 100 101#define __HAVE_ARCH_STRCMP 102static inline int strcmp(const char * cs,const char * ct) 103{ 104int d0, d1; 105register int __res; 106__asm__ __volatile__( 107 "1:\tlodsb\n\t" 108 "scasb\n\t" 109 "jne 2f\n\t" 110 "testb %%al,%%al\n\t" 111 "jne 1b\n\t" 112 "xorl %%eax,%%eax\n\t" 113 "jmp 3f\n" 114 "2:\tsbbl %%eax,%%eax\n\t" 115 "orb $1,%%al\n" 116 "3:" 117 :"=a" (__res), "=&S" (d0), "=&D" (d1) 118 :"1" (cs),"2" (ct) 119 :"memory"); 120return __res; 121} 122 123#define __HAVE_ARCH_STRNCMP 124static inline int strncmp(const char * cs,const char * ct,size_t count) 125{ 126register int __res; 127int d0, d1, d2; 128__asm__ __volatile__( 129 "1:\tdecl %3\n\t" 130 "js 2f\n\t" 131 "lodsb\n\t" 132 "scasb\n\t" 133 "jne 3f\n\t" 134 "testb %%al,%%al\n\t" 135 "jne 1b\n" 136 "2:\txorl %%eax,%%eax\n\t" 137 "jmp 4f\n" 138 "3:\tsbbl %%eax,%%eax\n\t" 139 "orb $1,%%al\n" 140 "4:" 141 :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) 142 :"1" (cs),"2" (ct),"3" (count) 143 :"memory"); 144return __res; 145} 146 147#define __HAVE_ARCH_STRCHR 148static inline char * strchr(const char * s, int c) 149{ 150int d0; 151register char * __res; 152__asm__ __volatile__( 153 "movb %%al,%%ah\n" 154 "1:\tlodsb\n\t" 155 "cmpb %%ah,%%al\n\t" 156 "je 2f\n\t" 157 "testb %%al,%%al\n\t" 158 "jne 1b\n\t" 159 "movl $1,%1\n" 160 "2:\tmovl %1,%0\n\t" 161 "decl %0" 162 :"=a" (__res), "=&S" (d0) 163 :"1" (s),"0" (c) 164 :"memory"); 165return __res; 166} 167 168#define __HAVE_ARCH_STRRCHR 169static inline char * strrchr(const char * s, int c) 170{ 171int d0, d1; 172register char * __res; 173__asm__ __volatile__( 174 "movb %%al,%%ah\n" 175 "1:\tlodsb\n\t" 176 "cmpb %%ah,%%al\n\t" 177 "jne 2f\n\t" 178 "leal -1(%%esi),%0\n" 179 "2:\ttestb %%al,%%al\n\t" 180 "jne 1b" 181 :"=g" (__res), "=&S" (d0), "=&a" (d1) 182 :"0" (0),"1" (s),"2" (c) 183 :"memory"); 184return __res; 185} 186 187#define __HAVE_ARCH_STRLEN 188static inline size_t strlen(const char * s) 189{ 190int d0; 191register int __res; 192__asm__ __volatile__( 193 "repne\n\t" 194 "scasb\n\t" 195 "notl %0\n\t" 196 "decl %0" 197 :"=c" (__res), "=&D" (d0) 198 :"1" (s),"a" (0), "0" (0xffffffffu) 199 :"memory"); 200return __res; 201} 202 203static __always_inline void * __memcpy(void * to, const void * from, size_t n) 204{ 205int d0, d1, d2; 206__asm__ __volatile__( 207 "rep ; movsl\n\t" 208 "movl %4,%%ecx\n\t" 209 "andl $3,%%ecx\n\t" 210#if 1 /* want to pay 2 byte penalty for a chance to skip microcoded rep? */ 211 "jz 1f\n\t" 212#endif 213 "rep ; movsb\n\t" 214 "1:" 215 : "=&c" (d0), "=&D" (d1), "=&S" (d2) 216 : "0" (n/4), "g" (n), "1" ((long) to), "2" ((long) from) 217 : "memory"); 218return (to); 219} 220 221/* 222 * This looks ugly, but the compiler can optimize it totally, 223 * as the count is constant. 224 */ 225static __always_inline void * __constant_memcpy(void * to, const void * from, size_t n) 226{ 227 long esi, edi; 228 if (!n) return to; 229#if 1 /* want to do small copies with non-string ops? */ 230 switch (n) { 231 case 1: *(char*)to = *(char*)from; return to; 232 case 2: *(short*)to = *(short*)from; return to; 233 case 4: *(int*)to = *(int*)from; return to; 234#if 1 /* including those doable with two moves? */ 235 case 3: *(short*)to = *(short*)from; 236 *((char*)to+2) = *((char*)from+2); return to; 237 case 5: *(int*)to = *(int*)from; 238 *((char*)to+4) = *((char*)from+4); return to; 239 case 6: *(int*)to = *(int*)from; 240 *((short*)to+2) = *((short*)from+2); return to; 241 case 8: *(int*)to = *(int*)from; 242 *((int*)to+1) = *((int*)from+1); return to; 243#endif 244 } 245#endif 246 esi = (long) from; 247 edi = (long) to; 248 if (n >= 5*4) { 249 /* large block: use rep prefix */ 250 int ecx; 251 __asm__ __volatile__( 252 "rep ; movsl" 253 : "=&c" (ecx), "=&D" (edi), "=&S" (esi) 254 : "0" (n/4), "1" (edi),"2" (esi) 255 : "memory" 256 ); 257 } else { 258 /* small block: don't clobber ecx + smaller code */ 259 if (n >= 4*4) __asm__ __volatile__("movsl" 260 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 261 if (n >= 3*4) __asm__ __volatile__("movsl" 262 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 263 if (n >= 2*4) __asm__ __volatile__("movsl" 264 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 265 if (n >= 1*4) __asm__ __volatile__("movsl" 266 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 267 } 268 switch (n % 4) { 269 /* tail */ 270 case 0: return to; 271 case 1: __asm__ __volatile__("movsb" 272 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 273 return to; 274 case 2: __asm__ __volatile__("movsw" 275 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 276 return to; 277 default: __asm__ __volatile__("movsw\n\tmovsb" 278 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 279 return to; 280 } 281} 282 283#define __HAVE_ARCH_MEMCPY 284 285#ifdef CONFIG_X86_USE_3DNOW 286 287#include <asm/mmx.h> 288 289/* 290 * This CPU favours 3DNow strongly (eg AMD Athlon) 291 */ 292 293static inline void * __constant_memcpy3d(void * to, const void * from, size_t len) 294{ 295 if (len < 512) 296 return __constant_memcpy(to, from, len); 297 return _mmx_memcpy(to, from, len); 298} 299 300static __inline__ void *__memcpy3d(void *to, const void *from, size_t len) 301{ 302 if (len < 512) 303 return __memcpy(to, from, len); 304 return _mmx_memcpy(to, from, len); 305} 306 307#define memcpy(t, f, n) \ 308(__builtin_constant_p(n) ? \ 309 __constant_memcpy3d((t),(f),(n)) : \ 310 __memcpy3d((t),(f),(n))) 311 312#else 313 314/* 315 * No 3D Now! 316 */ 317 318#define memcpy(t, f, n) \ 319(__builtin_constant_p(n) ? \ 320 __constant_memcpy((t),(f),(n)) : \ 321 __memcpy((t),(f),(n))) 322 323#endif 324 325#define __HAVE_ARCH_MEMMOVE 326void *memmove(void * dest,const void * src, size_t n); 327 328#define memcmp __builtin_memcmp 329 330#define __HAVE_ARCH_MEMCHR 331static inline void * memchr(const void * cs,int c,size_t count) 332{ 333int d0; 334register void * __res; 335if (!count) 336 return NULL; 337__asm__ __volatile__( 338 "repne\n\t" 339 "scasb\n\t" 340 "je 1f\n\t" 341 "movl $1,%0\n" 342 "1:\tdecl %0" 343 :"=D" (__res), "=&c" (d0) 344 :"a" (c),"0" (cs),"1" (count) 345 :"memory"); 346return __res; 347} 348 349static inline void * __memset_generic(void * s, char c,size_t count) 350{ 351int d0, d1; 352__asm__ __volatile__( 353 "rep\n\t" 354 "stosb" 355 : "=&c" (d0), "=&D" (d1) 356 :"a" (c),"1" (s),"0" (count) 357 :"memory"); 358return s; 359} 360 361/* we might want to write optimized versions of these later */ 362#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count)) 363 364/* 365 * memset(x,0,y) is a reasonably common thing to do, so we want to fill 366 * things 32 bits at a time even when we don't know the size of the 367 * area at compile-time.. 368 */ 369static __always_inline void * __constant_c_memset(void * s, unsigned long c, size_t count) 370{ 371int d0, d1; 372__asm__ __volatile__( 373 "rep ; stosl\n\t" 374 "testb $2,%b3\n\t" 375 "je 1f\n\t" 376 "stosw\n" 377 "1:\ttestb $1,%b3\n\t" 378 "je 2f\n\t" 379 "stosb\n" 380 "2:" 381 :"=&c" (d0), "=&D" (d1) 382 :"a" (c), "q" (count), "0" (count/4), "1" ((long) s) 383 :"memory"); 384return (s); 385} 386 387/* Added by Gertjan van Wingerde to make minix and sysv module work */ 388#define __HAVE_ARCH_STRNLEN 389static inline size_t strnlen(const char * s, size_t count) 390{ 391int d0; 392register int __res; 393__asm__ __volatile__( 394 "movl %2,%0\n\t" 395 "jmp 2f\n" 396 "1:\tcmpb $0,(%0)\n\t" 397 "je 3f\n\t" 398 "incl %0\n" 399 "2:\tdecl %1\n\t" 400 "cmpl $-1,%1\n\t" 401 "jne 1b\n" 402 "3:\tsubl %2,%0" 403 :"=a" (__res), "=&d" (d0) 404 :"c" (s),"1" (count) 405 :"memory"); 406return __res; 407} 408/* end of additional stuff */ 409 410#define __HAVE_ARCH_STRSTR 411 412extern char *strstr(const char *cs, const char *ct); 413 414/* 415 * This looks horribly ugly, but the compiler can optimize it totally, 416 * as we by now know that both pattern and count is constant.. 417 */ 418static __always_inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count) 419{ 420 switch (count) { 421 case 0: 422 return s; 423 case 1: 424 *(unsigned char *)s = pattern; 425 return s; 426 case 2: 427 *(unsigned short *)s = pattern; 428 return s; 429 case 3: 430 *(unsigned short *)s = pattern; 431 *(2+(unsigned char *)s) = pattern; 432 return s; 433 case 4: 434 *(unsigned long *)s = pattern; 435 return s; 436 } 437#define COMMON(x) \ 438__asm__ __volatile__( \ 439 "rep ; stosl" \ 440 x \ 441 : "=&c" (d0), "=&D" (d1) \ 442 : "a" (pattern),"0" (count/4),"1" ((long) s) \ 443 : "memory") 444{ 445 int d0, d1; 446 switch (count % 4) { 447 case 0: COMMON(""); return s; 448 case 1: COMMON("\n\tstosb"); return s; 449 case 2: COMMON("\n\tstosw"); return s; 450 default: COMMON("\n\tstosw\n\tstosb"); return s; 451 } 452} 453 454#undef COMMON 455} 456 457#define __constant_c_x_memset(s, c, count) \ 458(__builtin_constant_p(count) ? \ 459 __constant_c_and_count_memset((s),(c),(count)) : \ 460 __constant_c_memset((s),(c),(count))) 461 462#define __memset(s, c, count) \ 463(__builtin_constant_p(count) ? \ 464 __constant_count_memset((s),(c),(count)) : \ 465 __memset_generic((s),(c),(count))) 466 467#define __HAVE_ARCH_MEMSET 468#define memset(s, c, count) \ 469(__builtin_constant_p(c) ? \ 470 __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \ 471 __memset((s),(c),(count))) 472 473/* 474 * find the first occurrence of byte 'c', or 1 past the area if none 475 */ 476#define __HAVE_ARCH_MEMSCAN 477static inline void * memscan(void * addr, int c, size_t size) 478{ 479 if (!size) 480 return addr; 481 __asm__("repnz; scasb\n\t" 482 "jnz 1f\n\t" 483 "dec %%edi\n" 484 "1:" 485 : "=D" (addr), "=c" (size) 486 : "0" (addr), "1" (size), "a" (c) 487 : "memory"); 488 return addr; 489} 490 491#endif /* __KERNEL__ */ 492 493#endif