at v2.6.12 542 lines 12 kB view raw
1#ifndef _M68K_STRING_H_ 2#define _M68K_STRING_H_ 3 4#include <asm/setup.h> 5#include <asm/page.h> 6 7#define __HAVE_ARCH_STRCPY 8static inline char * strcpy(char * dest,const char *src) 9{ 10 char *xdest = dest; 11 12 __asm__ __volatile__ 13 ("1:\tmoveb %1@+,%0@+\n\t" 14 "jne 1b" 15 : "=a" (dest), "=a" (src) 16 : "0" (dest), "1" (src) : "memory"); 17 return xdest; 18} 19 20#define __HAVE_ARCH_STRNCPY 21static inline char * strncpy(char *dest, const char *src, size_t n) 22{ 23 char *xdest = dest; 24 25 if (n == 0) 26 return xdest; 27 28 __asm__ __volatile__ 29 ("1:\tmoveb %1@+,%0@+\n\t" 30 "jeq 2f\n\t" 31 "subql #1,%2\n\t" 32 "jne 1b\n\t" 33 "2:" 34 : "=a" (dest), "=a" (src), "=d" (n) 35 : "0" (dest), "1" (src), "2" (n) 36 : "memory"); 37 return xdest; 38} 39 40#define __HAVE_ARCH_STRCAT 41static inline char * strcat(char * dest, const char * src) 42{ 43 char *tmp = dest; 44 45 while (*dest) 46 dest++; 47 while ((*dest++ = *src++)) 48 ; 49 50 return tmp; 51} 52 53#define __HAVE_ARCH_STRNCAT 54static inline char * strncat(char *dest, const char *src, size_t count) 55{ 56 char *tmp = dest; 57 58 if (count) { 59 while (*dest) 60 dest++; 61 while ((*dest++ = *src++)) { 62 if (--count == 0) { 63 *dest++='\0'; 64 break; 65 } 66 } 67 } 68 69 return tmp; 70} 71 72#define __HAVE_ARCH_STRCHR 73static inline char * strchr(const char * s, int c) 74{ 75 const char ch = c; 76 77 for(; *s != ch; ++s) 78 if (*s == '\0') 79 return( NULL ); 80 return( (char *) s); 81} 82 83#if 0 84#define __HAVE_ARCH_STRPBRK 85static inline char *strpbrk(const char *cs,const char *ct) 86{ 87 const char *sc1,*sc2; 88 89 for( sc1 = cs; *sc1 != '\0'; ++sc1) 90 for( sc2 = ct; *sc2 != '\0'; ++sc2) 91 if (*sc1 == *sc2) 92 return((char *) sc1); 93 return( NULL ); 94} 95#endif 96 97#if 0 98#define __HAVE_ARCH_STRSPN 99static inline size_t strspn(const char *s, const char *accept) 100{ 101 const char *p; 102 const char *a; 103 size_t count = 0; 104 105 for (p = s; *p != '\0'; ++p) 106 { 107 for (a = accept; *a != '\0'; ++a) 108 if (*p == *a) 109 break; 110 if (*a == '\0') 111 return count; 112 else 113 ++count; 114 } 115 116 return count; 117} 118#endif 119 120/* strstr !! */ 121 122#define __HAVE_ARCH_STRLEN 123static inline size_t strlen(const char * s) 124{ 125 const char *sc; 126 for (sc = s; *sc != '\0'; ++sc) ; 127 return(sc - s); 128} 129 130/* strnlen !! */ 131 132#define __HAVE_ARCH_STRCMP 133static inline int strcmp(const char * cs,const char * ct) 134{ 135 char __res; 136 137 __asm__ 138 ("1:\tmoveb %0@+,%2\n\t" /* get *cs */ 139 "cmpb %1@+,%2\n\t" /* compare a byte */ 140 "jne 2f\n\t" /* not equal, break out */ 141 "tstb %2\n\t" /* at end of cs? */ 142 "jne 1b\n\t" /* no, keep going */ 143 "jra 3f\n\t" /* strings are equal */ 144 "2:\tsubb %1@-,%2\n\t" /* *cs - *ct */ 145 "3:" 146 : "=a" (cs), "=a" (ct), "=d" (__res) 147 : "0" (cs), "1" (ct)); 148 return __res; 149} 150 151#define __HAVE_ARCH_STRNCMP 152static inline int strncmp(const char * cs,const char * ct,size_t count) 153{ 154 char __res; 155 156 if (!count) 157 return 0; 158 __asm__ 159 ("1:\tmovb %0@+,%3\n\t" /* get *cs */ 160 "cmpb %1@+,%3\n\t" /* compare a byte */ 161 "jne 3f\n\t" /* not equal, break out */ 162 "tstb %3\n\t" /* at end of cs? */ 163 "jeq 4f\n\t" /* yes, all done */ 164 "subql #1,%2\n\t" /* no, adjust count */ 165 "jne 1b\n\t" /* more to do, keep going */ 166 "2:\tmoveq #0,%3\n\t" /* strings are equal */ 167 "jra 4f\n\t" 168 "3:\tsubb %1@-,%3\n\t" /* *cs - *ct */ 169 "4:" 170 : "=a" (cs), "=a" (ct), "=d" (count), "=d" (__res) 171 : "0" (cs), "1" (ct), "2" (count)); 172 return __res; 173} 174 175#define __HAVE_ARCH_MEMSET 176/* 177 * This is really ugly, but its highly optimizatiable by the 178 * compiler and is meant as compensation for gcc's missing 179 * __builtin_memset(). For the 680[23]0 it might be worth considering 180 * the optimal number of misaligned writes compared to the number of 181 * tests'n'branches needed to align the destination address. The 182 * 680[46]0 doesn't really care due to their copy-back caches. 183 * 10/09/96 - Jes Sorensen 184 */ 185static inline void * __memset_g(void * s, int c, size_t count) 186{ 187 void *xs = s; 188 size_t temp; 189 190 if (!count) 191 return xs; 192 193 c &= 0xff; 194 c |= c << 8; 195 c |= c << 16; 196 197 if (count < 36){ 198 long *ls = s; 199 200 switch(count){ 201 case 32: case 33: case 34: case 35: 202 *ls++ = c; 203 case 28: case 29: case 30: case 31: 204 *ls++ = c; 205 case 24: case 25: case 26: case 27: 206 *ls++ = c; 207 case 20: case 21: case 22: case 23: 208 *ls++ = c; 209 case 16: case 17: case 18: case 19: 210 *ls++ = c; 211 case 12: case 13: case 14: case 15: 212 *ls++ = c; 213 case 8: case 9: case 10: case 11: 214 *ls++ = c; 215 case 4: case 5: case 6: case 7: 216 *ls++ = c; 217 break; 218 default: 219 break; 220 } 221 s = ls; 222 if (count & 0x02){ 223 short *ss = s; 224 *ss++ = c; 225 s = ss; 226 } 227 if (count & 0x01){ 228 char *cs = s; 229 *cs++ = c; 230 s = cs; 231 } 232 return xs; 233 } 234 235 if ((long) s & 1) 236 { 237 char *cs = s; 238 *cs++ = c; 239 s = cs; 240 count--; 241 } 242 if (count > 2 && (long) s & 2) 243 { 244 short *ss = s; 245 *ss++ = c; 246 s = ss; 247 count -= 2; 248 } 249 temp = count >> 2; 250 if (temp) 251 { 252 long *ls = s; 253 temp--; 254 do 255 *ls++ = c; 256 while (temp--); 257 s = ls; 258 } 259 if (count & 2) 260 { 261 short *ss = s; 262 *ss++ = c; 263 s = ss; 264 } 265 if (count & 1) 266 { 267 char *cs = s; 268 *cs = c; 269 } 270 return xs; 271} 272 273/* 274 * __memset_page assumes that data is longword aligned. Most, if not 275 * all, of these page sized memsets are performed on page aligned 276 * areas, thus we do not need to check if the destination is longword 277 * aligned. Of course we suffer a serious performance loss if this is 278 * not the case but I think the risk of this ever happening is 279 * extremely small. We spend a lot of time clearing pages in 280 * get_empty_page() so I think it is worth it anyway. Besides, the 281 * 680[46]0 do not really care about misaligned writes due to their 282 * copy-back cache. 283 * 284 * The optimized case for the 680[46]0 is implemented using the move16 285 * instruction. My tests showed that this implementation is 35-45% 286 * faster than the original implementation using movel, the only 287 * caveat is that the destination address must be 16-byte aligned. 288 * 01/09/96 - Jes Sorensen 289 */ 290static inline void * __memset_page(void * s,int c,size_t count) 291{ 292 unsigned long data, tmp; 293 void *xs = s; 294 295 c = c & 255; 296 data = c | (c << 8); 297 data |= data << 16; 298 299#ifdef CPU_M68040_OR_M68060_ONLY 300 301 if (((unsigned long) s) & 0x0f) 302 __memset_g(s, c, count); 303 else{ 304 unsigned long *sp = s; 305 *sp++ = data; 306 *sp++ = data; 307 *sp++ = data; 308 *sp++ = data; 309 310 __asm__ __volatile__("1:\t" 311 ".chip 68040\n\t" 312 "move16 %2@+,%0@+\n\t" 313 ".chip 68k\n\t" 314 "subqw #8,%2\n\t" 315 "subqw #8,%2\n\t" 316 "dbra %1,1b\n\t" 317 : "=a" (sp), "=d" (tmp) 318 : "a" (s), "0" (sp), "1" ((count - 16) / 16 - 1) 319 ); 320 } 321 322#else 323 __asm__ __volatile__("1:\t" 324 "movel %2,%0@+\n\t" 325 "movel %2,%0@+\n\t" 326 "movel %2,%0@+\n\t" 327 "movel %2,%0@+\n\t" 328 "movel %2,%0@+\n\t" 329 "movel %2,%0@+\n\t" 330 "movel %2,%0@+\n\t" 331 "movel %2,%0@+\n\t" 332 "dbra %1,1b\n\t" 333 : "=a" (s), "=d" (tmp) 334 : "d" (data), "0" (s), "1" (count / 32 - 1) 335 ); 336#endif 337 338 return xs; 339} 340 341extern void *memset(void *,int,__kernel_size_t); 342 343#define __memset_const(s,c,count) \ 344((count==PAGE_SIZE) ? \ 345 __memset_page((s),(c),(count)) : \ 346 __memset_g((s),(c),(count))) 347 348#define memset(s, c, count) \ 349(__builtin_constant_p(count) ? \ 350 __memset_const((s),(c),(count)) : \ 351 __memset_g((s),(c),(count))) 352 353#define __HAVE_ARCH_MEMCPY 354extern void * memcpy(void *, const void *, size_t ); 355/* 356 * __builtin_memcpy() does not handle page-sized memcpys very well, 357 * thus following the same assumptions as for page-sized memsets, this 358 * function copies page-sized areas using an unrolled loop, without 359 * considering alignment. 360 * 361 * For the 680[46]0 only kernels we use the move16 instruction instead 362 * as it writes through the data-cache, invalidating the cache-lines 363 * touched. In this way we do not use up the entire data-cache (well, 364 * half of it on the 68060) by copying a page. An unrolled loop of two 365 * move16 instructions seem to the fastest. The only caveat is that 366 * both source and destination must be 16-byte aligned, if not we fall 367 * back to the generic memcpy function. - Jes 368 */ 369static inline void * __memcpy_page(void * to, const void * from, size_t count) 370{ 371 unsigned long tmp; 372 void *xto = to; 373 374#ifdef CPU_M68040_OR_M68060_ONLY 375 376 if (((unsigned long) to | (unsigned long) from) & 0x0f) 377 return memcpy(to, from, count); 378 379 __asm__ __volatile__("1:\t" 380 ".chip 68040\n\t" 381 "move16 %1@+,%0@+\n\t" 382 "move16 %1@+,%0@+\n\t" 383 ".chip 68k\n\t" 384 "dbra %2,1b\n\t" 385 : "=a" (to), "=a" (from), "=d" (tmp) 386 : "0" (to), "1" (from) , "2" (count / 32 - 1) 387 ); 388#else 389 __asm__ __volatile__("1:\t" 390 "movel %1@+,%0@+\n\t" 391 "movel %1@+,%0@+\n\t" 392 "movel %1@+,%0@+\n\t" 393 "movel %1@+,%0@+\n\t" 394 "movel %1@+,%0@+\n\t" 395 "movel %1@+,%0@+\n\t" 396 "movel %1@+,%0@+\n\t" 397 "movel %1@+,%0@+\n\t" 398 "dbra %2,1b\n\t" 399 : "=a" (to), "=a" (from), "=d" (tmp) 400 : "0" (to), "1" (from) , "2" (count / 32 - 1) 401 ); 402#endif 403 return xto; 404} 405 406#define __memcpy_const(to, from, n) \ 407((n==PAGE_SIZE) ? \ 408 __memcpy_page((to),(from),(n)) : \ 409 __builtin_memcpy((to),(from),(n))) 410 411#define memcpy(to, from, n) \ 412(__builtin_constant_p(n) ? \ 413 __memcpy_const((to),(from),(n)) : \ 414 memcpy((to),(from),(n))) 415 416#define __HAVE_ARCH_MEMMOVE 417static inline void * memmove(void * dest,const void * src, size_t n) 418{ 419 void *xdest = dest; 420 size_t temp; 421 422 if (!n) 423 return xdest; 424 425 if (dest < src) 426 { 427 if ((long) dest & 1) 428 { 429 char *cdest = dest; 430 const char *csrc = src; 431 *cdest++ = *csrc++; 432 dest = cdest; 433 src = csrc; 434 n--; 435 } 436 if (n > 2 && (long) dest & 2) 437 { 438 short *sdest = dest; 439 const short *ssrc = src; 440 *sdest++ = *ssrc++; 441 dest = sdest; 442 src = ssrc; 443 n -= 2; 444 } 445 temp = n >> 2; 446 if (temp) 447 { 448 long *ldest = dest; 449 const long *lsrc = src; 450 temp--; 451 do 452 *ldest++ = *lsrc++; 453 while (temp--); 454 dest = ldest; 455 src = lsrc; 456 } 457 if (n & 2) 458 { 459 short *sdest = dest; 460 const short *ssrc = src; 461 *sdest++ = *ssrc++; 462 dest = sdest; 463 src = ssrc; 464 } 465 if (n & 1) 466 { 467 char *cdest = dest; 468 const char *csrc = src; 469 *cdest = *csrc; 470 } 471 } 472 else 473 { 474 dest = (char *) dest + n; 475 src = (const char *) src + n; 476 if ((long) dest & 1) 477 { 478 char *cdest = dest; 479 const char *csrc = src; 480 *--cdest = *--csrc; 481 dest = cdest; 482 src = csrc; 483 n--; 484 } 485 if (n > 2 && (long) dest & 2) 486 { 487 short *sdest = dest; 488 const short *ssrc = src; 489 *--sdest = *--ssrc; 490 dest = sdest; 491 src = ssrc; 492 n -= 2; 493 } 494 temp = n >> 2; 495 if (temp) 496 { 497 long *ldest = dest; 498 const long *lsrc = src; 499 temp--; 500 do 501 *--ldest = *--lsrc; 502 while (temp--); 503 dest = ldest; 504 src = lsrc; 505 } 506 if (n & 2) 507 { 508 short *sdest = dest; 509 const short *ssrc = src; 510 *--sdest = *--ssrc; 511 dest = sdest; 512 src = ssrc; 513 } 514 if (n & 1) 515 { 516 char *cdest = dest; 517 const char *csrc = src; 518 *--cdest = *--csrc; 519 } 520 } 521 return xdest; 522} 523 524#define __HAVE_ARCH_MEMCMP 525extern int memcmp(const void * ,const void * ,size_t ); 526#define memcmp(cs, ct, n) \ 527(__builtin_constant_p(n) ? \ 528 __builtin_memcmp((cs),(ct),(n)) : \ 529 memcmp((cs),(ct),(n))) 530 531#define __HAVE_ARCH_MEMCHR 532static inline void *memchr(const void *cs, int c, size_t count) 533{ 534 /* Someone else can optimize this, I don't care - tonym@mac.linux-m68k.org */ 535 unsigned char *ret = (unsigned char *)cs; 536 for(;count>0;count--,ret++) 537 if(*ret == c) return ret; 538 539 return NULL; 540} 541 542#endif /* _M68K_STRING_H_ */