at v2.6.26 13 kB view raw
1/* 2 * String handling functions for PowerPC. 3 * 4 * Copyright (C) 1996 Paul Mackerras. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11#include <asm/processor.h> 12#include <asm/cache.h> 13#include <asm/errno.h> 14#include <asm/ppc_asm.h> 15 16#define COPY_16_BYTES \ 17 lwz r7,4(r4); \ 18 lwz r8,8(r4); \ 19 lwz r9,12(r4); \ 20 lwzu r10,16(r4); \ 21 stw r7,4(r6); \ 22 stw r8,8(r6); \ 23 stw r9,12(r6); \ 24 stwu r10,16(r6) 25 26#define COPY_16_BYTES_WITHEX(n) \ 278 ## n ## 0: \ 28 lwz r7,4(r4); \ 298 ## n ## 1: \ 30 lwz r8,8(r4); \ 318 ## n ## 2: \ 32 lwz r9,12(r4); \ 338 ## n ## 3: \ 34 lwzu r10,16(r4); \ 358 ## n ## 4: \ 36 stw r7,4(r6); \ 378 ## n ## 5: \ 38 stw r8,8(r6); \ 398 ## n ## 6: \ 40 stw r9,12(r6); \ 418 ## n ## 7: \ 42 stwu r10,16(r6) 43 44#define COPY_16_BYTES_EXCODE(n) \ 459 ## n ## 0: \ 46 addi r5,r5,-(16 * n); \ 47 b 104f; \ 489 ## n ## 1: \ 49 addi r5,r5,-(16 * n); \ 50 b 105f; \ 51.section __ex_table,"a"; \ 52 .align 2; \ 53 .long 8 ## n ## 0b,9 ## n ## 0b; \ 54 .long 8 ## n ## 1b,9 ## n ## 0b; \ 55 .long 8 ## n ## 2b,9 ## n ## 0b; \ 56 .long 8 ## n ## 3b,9 ## n ## 0b; \ 57 .long 8 ## n ## 4b,9 ## n ## 1b; \ 58 .long 8 ## n ## 5b,9 ## n ## 1b; \ 59 .long 8 ## n ## 6b,9 ## n ## 1b; \ 60 .long 8 ## n ## 7b,9 ## n ## 1b; \ 61 .text 62 63 .text 64 .stabs "arch/ppc/lib/",N_SO,0,0,0f 65 .stabs "string.S",N_SO,0,0,0f 66 67CACHELINE_BYTES = L1_CACHE_BYTES 68LG_CACHELINE_BYTES = L1_CACHE_SHIFT 69CACHELINE_MASK = (L1_CACHE_BYTES-1) 70 71_GLOBAL(strcpy) 72 addi r5,r3,-1 73 addi r4,r4,-1 741: lbzu r0,1(r4) 75 cmpwi 0,r0,0 76 stbu r0,1(r5) 77 bne 1b 78 blr 79 80/* This clears out any unused part of the destination buffer, 81 just as the libc version does. -- paulus */ 82_GLOBAL(strncpy) 83 cmpwi 0,r5,0 84 beqlr 85 mtctr r5 86 addi r6,r3,-1 87 addi r4,r4,-1 881: lbzu r0,1(r4) 89 cmpwi 0,r0,0 90 stbu r0,1(r6) 91 bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ 92 bnelr /* if we didn't hit a null char, we're done */ 93 mfctr r5 94 cmpwi 0,r5,0 /* any space left in destination buffer? */ 95 beqlr /* we know r0 == 0 here */ 962: stbu r0,1(r6) /* clear it out if so */ 97 bdnz 2b 98 blr 99 100_GLOBAL(strcat) 101 addi r5,r3,-1 102 addi r4,r4,-1 1031: lbzu r0,1(r5) 104 cmpwi 0,r0,0 105 bne 1b 106 addi r5,r5,-1 1071: lbzu r0,1(r4) 108 cmpwi 0,r0,0 109 stbu r0,1(r5) 110 bne 1b 111 blr 112 113_GLOBAL(strcmp) 114 addi r5,r3,-1 115 addi r4,r4,-1 1161: lbzu r3,1(r5) 117 cmpwi 1,r3,0 118 lbzu r0,1(r4) 119 subf. r3,r0,r3 120 beqlr 1 121 beq 1b 122 blr 123 124_GLOBAL(strncmp) 125 PPC_LCMPI r5,0 126 beqlr 127 mtctr r5 128 addi r5,r3,-1 129 addi r4,r4,-1 1301: lbzu r3,1(r5) 131 cmpwi 1,r3,0 132 lbzu r0,1(r4) 133 subf. r3,r0,r3 134 beqlr 1 135 bdnzt eq,1b 136 blr 137 138_GLOBAL(strlen) 139 addi r4,r3,-1 1401: lbzu r0,1(r4) 141 cmpwi 0,r0,0 142 bne 1b 143 subf r3,r3,r4 144 blr 145 146/* 147 * Use dcbz on the complete cache lines in the destination 148 * to set them to zero. This requires that the destination 149 * area is cacheable. -- paulus 150 */ 151_GLOBAL(cacheable_memzero) 152 mr r5,r4 153 li r4,0 154 addi r6,r3,-4 155 cmplwi 0,r5,4 156 blt 7f 157 stwu r4,4(r6) 158 beqlr 159 andi. r0,r6,3 160 add r5,r0,r5 161 subf r6,r0,r6 162 clrlwi r7,r6,32-LG_CACHELINE_BYTES 163 add r8,r7,r5 164 srwi r9,r8,LG_CACHELINE_BYTES 165 addic. r9,r9,-1 /* total number of complete cachelines */ 166 ble 2f 167 xori r0,r7,CACHELINE_MASK & ~3 168 srwi. r0,r0,2 169 beq 3f 170 mtctr r0 1714: stwu r4,4(r6) 172 bdnz 4b 1733: mtctr r9 174 li r7,4 175#if !defined(CONFIG_8xx) 17610: dcbz r7,r6 177#else 17810: stw r4, 4(r6) 179 stw r4, 8(r6) 180 stw r4, 12(r6) 181 stw r4, 16(r6) 182#if CACHE_LINE_SIZE >= 32 183 stw r4, 20(r6) 184 stw r4, 24(r6) 185 stw r4, 28(r6) 186 stw r4, 32(r6) 187#endif /* CACHE_LINE_SIZE */ 188#endif 189 addi r6,r6,CACHELINE_BYTES 190 bdnz 10b 191 clrlwi r5,r8,32-LG_CACHELINE_BYTES 192 addi r5,r5,4 1932: srwi r0,r5,2 194 mtctr r0 195 bdz 6f 1961: stwu r4,4(r6) 197 bdnz 1b 1986: andi. r5,r5,3 1997: cmpwi 0,r5,0 200 beqlr 201 mtctr r5 202 addi r6,r6,3 2038: stbu r4,1(r6) 204 bdnz 8b 205 blr 206 207_GLOBAL(memset) 208 rlwimi r4,r4,8,16,23 209 rlwimi r4,r4,16,0,15 210 addi r6,r3,-4 211 cmplwi 0,r5,4 212 blt 7f 213 stwu r4,4(r6) 214 beqlr 215 andi. r0,r6,3 216 add r5,r0,r5 217 subf r6,r0,r6 218 srwi r0,r5,2 219 mtctr r0 220 bdz 6f 2211: stwu r4,4(r6) 222 bdnz 1b 2236: andi. r5,r5,3 2247: cmpwi 0,r5,0 225 beqlr 226 mtctr r5 227 addi r6,r6,3 2288: stbu r4,1(r6) 229 bdnz 8b 230 blr 231 232/* 233 * This version uses dcbz on the complete cache lines in the 234 * destination area to reduce memory traffic. This requires that 235 * the destination area is cacheable. 236 * We only use this version if the source and dest don't overlap. 237 * -- paulus. 238 */ 239_GLOBAL(cacheable_memcpy) 240 add r7,r3,r5 /* test if the src & dst overlap */ 241 add r8,r4,r5 242 cmplw 0,r4,r7 243 cmplw 1,r3,r8 244 crand 0,0,4 /* cr0.lt &= cr1.lt */ 245 blt memcpy /* if regions overlap */ 246 247 addi r4,r4,-4 248 addi r6,r3,-4 249 neg r0,r3 250 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 251 beq 58f 252 253 cmplw 0,r5,r0 /* is this more than total to do? */ 254 blt 63f /* if not much to do */ 255 andi. r8,r0,3 /* get it word-aligned first */ 256 subf r5,r0,r5 257 mtctr r8 258 beq+ 61f 25970: lbz r9,4(r4) /* do some bytes */ 260 stb r9,4(r6) 261 addi r4,r4,1 262 addi r6,r6,1 263 bdnz 70b 26461: srwi. r0,r0,2 265 mtctr r0 266 beq 58f 26772: lwzu r9,4(r4) /* do some words */ 268 stwu r9,4(r6) 269 bdnz 72b 270 27158: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 272 clrlwi r5,r5,32-LG_CACHELINE_BYTES 273 li r11,4 274 mtctr r0 275 beq 63f 27653: 277#if !defined(CONFIG_8xx) 278 dcbz r11,r6 279#endif 280 COPY_16_BYTES 281#if L1_CACHE_BYTES >= 32 282 COPY_16_BYTES 283#if L1_CACHE_BYTES >= 64 284 COPY_16_BYTES 285 COPY_16_BYTES 286#if L1_CACHE_BYTES >= 128 287 COPY_16_BYTES 288 COPY_16_BYTES 289 COPY_16_BYTES 290 COPY_16_BYTES 291#endif 292#endif 293#endif 294 bdnz 53b 295 29663: srwi. r0,r5,2 297 mtctr r0 298 beq 64f 29930: lwzu r0,4(r4) 300 stwu r0,4(r6) 301 bdnz 30b 302 30364: andi. r0,r5,3 304 mtctr r0 305 beq+ 65f 30640: lbz r0,4(r4) 307 stb r0,4(r6) 308 addi r4,r4,1 309 addi r6,r6,1 310 bdnz 40b 31165: blr 312 313_GLOBAL(memmove) 314 cmplw 0,r3,r4 315 bgt backwards_memcpy 316 /* fall through */ 317 318_GLOBAL(memcpy) 319 srwi. r7,r5,3 320 addi r6,r3,-4 321 addi r4,r4,-4 322 beq 2f /* if less than 8 bytes to do */ 323 andi. r0,r6,3 /* get dest word aligned */ 324 mtctr r7 325 bne 5f 3261: lwz r7,4(r4) 327 lwzu r8,8(r4) 328 stw r7,4(r6) 329 stwu r8,8(r6) 330 bdnz 1b 331 andi. r5,r5,7 3322: cmplwi 0,r5,4 333 blt 3f 334 lwzu r0,4(r4) 335 addi r5,r5,-4 336 stwu r0,4(r6) 3373: cmpwi 0,r5,0 338 beqlr 339 mtctr r5 340 addi r4,r4,3 341 addi r6,r6,3 3424: lbzu r0,1(r4) 343 stbu r0,1(r6) 344 bdnz 4b 345 blr 3465: subfic r0,r0,4 347 mtctr r0 3486: lbz r7,4(r4) 349 addi r4,r4,1 350 stb r7,4(r6) 351 addi r6,r6,1 352 bdnz 6b 353 subf r5,r0,r5 354 rlwinm. r7,r5,32-3,3,31 355 beq 2b 356 mtctr r7 357 b 1b 358 359_GLOBAL(backwards_memcpy) 360 rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ 361 add r6,r3,r5 362 add r4,r4,r5 363 beq 2f 364 andi. r0,r6,3 365 mtctr r7 366 bne 5f 3671: lwz r7,-4(r4) 368 lwzu r8,-8(r4) 369 stw r7,-4(r6) 370 stwu r8,-8(r6) 371 bdnz 1b 372 andi. r5,r5,7 3732: cmplwi 0,r5,4 374 blt 3f 375 lwzu r0,-4(r4) 376 subi r5,r5,4 377 stwu r0,-4(r6) 3783: cmpwi 0,r5,0 379 beqlr 380 mtctr r5 3814: lbzu r0,-1(r4) 382 stbu r0,-1(r6) 383 bdnz 4b 384 blr 3855: mtctr r0 3866: lbzu r7,-1(r4) 387 stbu r7,-1(r6) 388 bdnz 6b 389 subf r5,r0,r5 390 rlwinm. r7,r5,32-3,3,31 391 beq 2b 392 mtctr r7 393 b 1b 394 395_GLOBAL(memcmp) 396 cmpwi 0,r5,0 397 ble- 2f 398 mtctr r5 399 addi r6,r3,-1 400 addi r4,r4,-1 4011: lbzu r3,1(r6) 402 lbzu r0,1(r4) 403 subf. r3,r0,r3 404 bdnzt 2,1b 405 blr 4062: li r3,0 407 blr 408 409_GLOBAL(memchr) 410 cmpwi 0,r5,0 411 ble- 2f 412 mtctr r5 413 addi r3,r3,-1 4141: lbzu r0,1(r3) 415 cmpw 0,r0,r4 416 bdnzf 2,1b 417 beqlr 4182: li r3,0 419 blr 420 421_GLOBAL(__copy_tofrom_user) 422 addi r4,r4,-4 423 addi r6,r3,-4 424 neg r0,r3 425 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 426 beq 58f 427 428 cmplw 0,r5,r0 /* is this more than total to do? */ 429 blt 63f /* if not much to do */ 430 andi. r8,r0,3 /* get it word-aligned first */ 431 mtctr r8 432 beq+ 61f 43370: lbz r9,4(r4) /* do some bytes */ 43471: stb r9,4(r6) 435 addi r4,r4,1 436 addi r6,r6,1 437 bdnz 70b 43861: subf r5,r0,r5 439 srwi. r0,r0,2 440 mtctr r0 441 beq 58f 44272: lwzu r9,4(r4) /* do some words */ 44373: stwu r9,4(r6) 444 bdnz 72b 445 446 .section __ex_table,"a" 447 .align 2 448 .long 70b,100f 449 .long 71b,101f 450 .long 72b,102f 451 .long 73b,103f 452 .text 453 45458: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 455 clrlwi r5,r5,32-LG_CACHELINE_BYTES 456 li r11,4 457 beq 63f 458 459#ifdef CONFIG_8xx 460 /* Don't use prefetch on 8xx */ 461 mtctr r0 462 li r0,0 46353: COPY_16_BYTES_WITHEX(0) 464 bdnz 53b 465 466#else /* not CONFIG_8xx */ 467 /* Here we decide how far ahead to prefetch the source */ 468 li r3,4 469 cmpwi r0,1 470 li r7,0 471 ble 114f 472 li r7,1 473#if MAX_COPY_PREFETCH > 1 474 /* Heuristically, for large transfers we prefetch 475 MAX_COPY_PREFETCH cachelines ahead. For small transfers 476 we prefetch 1 cacheline ahead. */ 477 cmpwi r0,MAX_COPY_PREFETCH 478 ble 112f 479 li r7,MAX_COPY_PREFETCH 480112: mtctr r7 481111: dcbt r3,r4 482 addi r3,r3,CACHELINE_BYTES 483 bdnz 111b 484#else 485 dcbt r3,r4 486 addi r3,r3,CACHELINE_BYTES 487#endif /* MAX_COPY_PREFETCH > 1 */ 488 489114: subf r8,r7,r0 490 mr r0,r7 491 mtctr r8 492 49353: dcbt r3,r4 49454: dcbz r11,r6 495 .section __ex_table,"a" 496 .align 2 497 .long 54b,105f 498 .text 499/* the main body of the cacheline loop */ 500 COPY_16_BYTES_WITHEX(0) 501#if L1_CACHE_BYTES >= 32 502 COPY_16_BYTES_WITHEX(1) 503#if L1_CACHE_BYTES >= 64 504 COPY_16_BYTES_WITHEX(2) 505 COPY_16_BYTES_WITHEX(3) 506#if L1_CACHE_BYTES >= 128 507 COPY_16_BYTES_WITHEX(4) 508 COPY_16_BYTES_WITHEX(5) 509 COPY_16_BYTES_WITHEX(6) 510 COPY_16_BYTES_WITHEX(7) 511#endif 512#endif 513#endif 514 bdnz 53b 515 cmpwi r0,0 516 li r3,4 517 li r7,0 518 bne 114b 519#endif /* CONFIG_8xx */ 520 52163: srwi. r0,r5,2 522 mtctr r0 523 beq 64f 52430: lwzu r0,4(r4) 52531: stwu r0,4(r6) 526 bdnz 30b 527 52864: andi. r0,r5,3 529 mtctr r0 530 beq+ 65f 53140: lbz r0,4(r4) 53241: stb r0,4(r6) 533 addi r4,r4,1 534 addi r6,r6,1 535 bdnz 40b 53665: li r3,0 537 blr 538 539/* read fault, initial single-byte copy */ 540100: li r9,0 541 b 90f 542/* write fault, initial single-byte copy */ 543101: li r9,1 54490: subf r5,r8,r5 545 li r3,0 546 b 99f 547/* read fault, initial word copy */ 548102: li r9,0 549 b 91f 550/* write fault, initial word copy */ 551103: li r9,1 55291: li r3,2 553 b 99f 554 555/* 556 * this stuff handles faults in the cacheline loop and branches to either 557 * 104f (if in read part) or 105f (if in write part), after updating r5 558 */ 559 COPY_16_BYTES_EXCODE(0) 560#if L1_CACHE_BYTES >= 32 561 COPY_16_BYTES_EXCODE(1) 562#if L1_CACHE_BYTES >= 64 563 COPY_16_BYTES_EXCODE(2) 564 COPY_16_BYTES_EXCODE(3) 565#if L1_CACHE_BYTES >= 128 566 COPY_16_BYTES_EXCODE(4) 567 COPY_16_BYTES_EXCODE(5) 568 COPY_16_BYTES_EXCODE(6) 569 COPY_16_BYTES_EXCODE(7) 570#endif 571#endif 572#endif 573 574/* read fault in cacheline loop */ 575104: li r9,0 576 b 92f 577/* fault on dcbz (effectively a write fault) */ 578/* or write fault in cacheline loop */ 579105: li r9,1 58092: li r3,LG_CACHELINE_BYTES 581 mfctr r8 582 add r0,r0,r8 583 b 106f 584/* read fault in final word loop */ 585108: li r9,0 586 b 93f 587/* write fault in final word loop */ 588109: li r9,1 58993: andi. r5,r5,3 590 li r3,2 591 b 99f 592/* read fault in final byte loop */ 593110: li r9,0 594 b 94f 595/* write fault in final byte loop */ 596111: li r9,1 59794: li r5,0 598 li r3,0 599/* 600 * At this stage the number of bytes not copied is 601 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. 602 */ 60399: mfctr r0 604106: slw r3,r0,r3 605 add. r3,r3,r5 606 beq 120f /* shouldn't happen */ 607 cmpwi 0,r9,0 608 bne 120f 609/* for a read fault, first try to continue the copy one byte at a time */ 610 mtctr r3 611130: lbz r0,4(r4) 612131: stb r0,4(r6) 613 addi r4,r4,1 614 addi r6,r6,1 615 bdnz 130b 616/* then clear out the destination: r3 bytes starting at 4(r6) */ 617132: mfctr r3 618 srwi. r0,r3,2 619 li r9,0 620 mtctr r0 621 beq 113f 622112: stwu r9,4(r6) 623 bdnz 112b 624113: andi. r0,r3,3 625 mtctr r0 626 beq 120f 627114: stb r9,4(r6) 628 addi r6,r6,1 629 bdnz 114b 630120: blr 631 632 .section __ex_table,"a" 633 .align 2 634 .long 30b,108b 635 .long 31b,109b 636 .long 40b,110b 637 .long 41b,111b 638 .long 130b,132b 639 .long 131b,120b 640 .long 112b,120b 641 .long 114b,120b 642 .text 643 644_GLOBAL(__clear_user) 645 addi r6,r3,-4 646 li r3,0 647 li r5,0 648 cmplwi 0,r4,4 649 blt 7f 650 /* clear a single word */ 65111: stwu r5,4(r6) 652 beqlr 653 /* clear word sized chunks */ 654 andi. r0,r6,3 655 add r4,r0,r4 656 subf r6,r0,r6 657 srwi r0,r4,2 658 andi. r4,r4,3 659 mtctr r0 660 bdz 7f 6611: stwu r5,4(r6) 662 bdnz 1b 663 /* clear byte sized chunks */ 6647: cmpwi 0,r4,0 665 beqlr 666 mtctr r4 667 addi r6,r6,3 6688: stbu r5,1(r6) 669 bdnz 8b 670 blr 67190: mr r3,r4 672 blr 67391: mfctr r3 674 slwi r3,r3,2 675 add r3,r3,r4 676 blr 67792: mfctr r3 678 blr 679 680 .section __ex_table,"a" 681 .align 2 682 .long 11b,90b 683 .long 1b,91b 684 .long 8b,92b 685 .text 686 687_GLOBAL(__strncpy_from_user) 688 addi r6,r3,-1 689 addi r4,r4,-1 690 cmpwi 0,r5,0 691 beq 2f 692 mtctr r5 6931: lbzu r0,1(r4) 694 cmpwi 0,r0,0 695 stbu r0,1(r6) 696 bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ 697 beq 3f 6982: addi r6,r6,1 6993: subf r3,r3,r6 700 blr 70199: li r3,-EFAULT 702 blr 703 704 .section __ex_table,"a" 705 .align 2 706 .long 1b,99b 707 .text 708 709/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */ 710_GLOBAL(__strnlen_user) 711 addi r7,r3,-1 712 subf r6,r7,r5 /* top+1 - str */ 713 cmplw 0,r4,r6 714 bge 0f 715 mr r6,r4 7160: mtctr r6 /* ctr = min(len, top - str) */ 7171: lbzu r0,1(r7) /* get next byte */ 718 cmpwi 0,r0,0 719 bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */ 720 addi r7,r7,1 721 subf r3,r3,r7 /* number of bytes we have looked at */ 722 beqlr /* return if we found a 0 byte */ 723 cmpw 0,r3,r4 /* did we look at all len bytes? */ 724 blt 99f /* if not, must have hit top */ 725 addi r3,r4,1 /* return len + 1 to indicate no null found */ 726 blr 72799: li r3,0 /* bad address, return 0 */ 728 blr 729 730 .section __ex_table,"a" 731 .align 2 732 .long 1b,99b