at v2.6.15 13 kB view raw
1/* 2 * String handling functions for PowerPC. 3 * 4 * Copyright (C) 1996 Paul Mackerras. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11#include <linux/config.h> 12#include <asm/processor.h> 13#include <asm/cache.h> 14#include <asm/errno.h> 15#include <asm/ppc_asm.h> 16 17#define COPY_16_BYTES \ 18 lwz r7,4(r4); \ 19 lwz r8,8(r4); \ 20 lwz r9,12(r4); \ 21 lwzu r10,16(r4); \ 22 stw r7,4(r6); \ 23 stw r8,8(r6); \ 24 stw r9,12(r6); \ 25 stwu r10,16(r6) 26 27#define COPY_16_BYTES_WITHEX(n) \ 288 ## n ## 0: \ 29 lwz r7,4(r4); \ 308 ## n ## 1: \ 31 lwz r8,8(r4); \ 328 ## n ## 2: \ 33 lwz r9,12(r4); \ 348 ## n ## 3: \ 35 lwzu r10,16(r4); \ 368 ## n ## 4: \ 37 stw r7,4(r6); \ 388 ## n ## 5: \ 39 stw r8,8(r6); \ 408 ## n ## 6: \ 41 stw r9,12(r6); \ 428 ## n ## 7: \ 43 stwu r10,16(r6) 44 45#define COPY_16_BYTES_EXCODE(n) \ 469 ## n ## 0: \ 47 addi r5,r5,-(16 * n); \ 48 b 104f; \ 499 ## n ## 1: \ 50 addi r5,r5,-(16 * n); \ 51 b 105f; \ 52.section __ex_table,"a"; \ 53 .align 2; \ 54 .long 8 ## n ## 0b,9 ## n ## 0b; \ 55 .long 8 ## n ## 1b,9 ## n ## 0b; \ 56 .long 8 ## n ## 2b,9 ## n ## 0b; \ 57 .long 8 ## n ## 3b,9 ## n ## 0b; \ 58 .long 8 ## n ## 4b,9 ## n ## 1b; \ 59 .long 8 ## n ## 5b,9 ## n ## 1b; \ 60 .long 8 ## n ## 6b,9 ## n ## 1b; \ 61 .long 8 ## n ## 7b,9 ## n ## 1b; \ 62 .text 63 64 .text 65 .stabs "arch/ppc/lib/",N_SO,0,0,0f 66 .stabs "string.S",N_SO,0,0,0f 67 68CACHELINE_BYTES = L1_CACHE_BYTES 69LG_CACHELINE_BYTES = L1_CACHE_SHIFT 70CACHELINE_MASK = (L1_CACHE_BYTES-1) 71 72_GLOBAL(strcpy) 73 addi r5,r3,-1 74 addi r4,r4,-1 751: lbzu r0,1(r4) 76 cmpwi 0,r0,0 77 stbu r0,1(r5) 78 bne 1b 79 blr 80 81/* This clears out any unused part of the destination buffer, 82 just as the libc version does. -- paulus */ 83_GLOBAL(strncpy) 84 cmpwi 0,r5,0 85 beqlr 86 mtctr r5 87 addi r6,r3,-1 88 addi r4,r4,-1 891: lbzu r0,1(r4) 90 cmpwi 0,r0,0 91 stbu r0,1(r6) 92 bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ 93 bnelr /* if we didn't hit a null char, we're done */ 94 mfctr r5 95 cmpwi 0,r5,0 /* any space left in destination buffer? */ 96 beqlr /* we know r0 == 0 here */ 972: stbu r0,1(r6) /* clear it out if so */ 98 bdnz 2b 99 blr 100 101_GLOBAL(strcat) 102 addi r5,r3,-1 103 addi r4,r4,-1 1041: lbzu r0,1(r5) 105 cmpwi 0,r0,0 106 bne 1b 107 addi r5,r5,-1 1081: lbzu r0,1(r4) 109 cmpwi 0,r0,0 110 stbu r0,1(r5) 111 bne 1b 112 blr 113 114_GLOBAL(strcmp) 115 addi r5,r3,-1 116 addi r4,r4,-1 1171: lbzu r3,1(r5) 118 cmpwi 1,r3,0 119 lbzu r0,1(r4) 120 subf. r3,r0,r3 121 beqlr 1 122 beq 1b 123 blr 124 125_GLOBAL(strlen) 126 addi r4,r3,-1 1271: lbzu r0,1(r4) 128 cmpwi 0,r0,0 129 bne 1b 130 subf r3,r3,r4 131 blr 132 133/* 134 * Use dcbz on the complete cache lines in the destination 135 * to set them to zero. This requires that the destination 136 * area is cacheable. -- paulus 137 */ 138_GLOBAL(cacheable_memzero) 139 mr r5,r4 140 li r4,0 141 addi r6,r3,-4 142 cmplwi 0,r5,4 143 blt 7f 144 stwu r4,4(r6) 145 beqlr 146 andi. r0,r6,3 147 add r5,r0,r5 148 subf r6,r0,r6 149 clrlwi r7,r6,32-LG_CACHELINE_BYTES 150 add r8,r7,r5 151 srwi r9,r8,LG_CACHELINE_BYTES 152 addic. r9,r9,-1 /* total number of complete cachelines */ 153 ble 2f 154 xori r0,r7,CACHELINE_MASK & ~3 155 srwi. r0,r0,2 156 beq 3f 157 mtctr r0 1584: stwu r4,4(r6) 159 bdnz 4b 1603: mtctr r9 161 li r7,4 162#if !defined(CONFIG_8xx) 16310: dcbz r7,r6 164#else 16510: stw r4, 4(r6) 166 stw r4, 8(r6) 167 stw r4, 12(r6) 168 stw r4, 16(r6) 169#if CACHE_LINE_SIZE >= 32 170 stw r4, 20(r6) 171 stw r4, 24(r6) 172 stw r4, 28(r6) 173 stw r4, 32(r6) 174#endif /* CACHE_LINE_SIZE */ 175#endif 176 addi r6,r6,CACHELINE_BYTES 177 bdnz 10b 178 clrlwi r5,r8,32-LG_CACHELINE_BYTES 179 addi r5,r5,4 1802: srwi r0,r5,2 181 mtctr r0 182 bdz 6f 1831: stwu r4,4(r6) 184 bdnz 1b 1856: andi. r5,r5,3 1867: cmpwi 0,r5,0 187 beqlr 188 mtctr r5 189 addi r6,r6,3 1908: stbu r4,1(r6) 191 bdnz 8b 192 blr 193 194_GLOBAL(memset) 195 rlwimi r4,r4,8,16,23 196 rlwimi r4,r4,16,0,15 197 addi r6,r3,-4 198 cmplwi 0,r5,4 199 blt 7f 200 stwu r4,4(r6) 201 beqlr 202 andi. r0,r6,3 203 add r5,r0,r5 204 subf r6,r0,r6 205 srwi r0,r5,2 206 mtctr r0 207 bdz 6f 2081: stwu r4,4(r6) 209 bdnz 1b 2106: andi. r5,r5,3 2117: cmpwi 0,r5,0 212 beqlr 213 mtctr r5 214 addi r6,r6,3 2158: stbu r4,1(r6) 216 bdnz 8b 217 blr 218 219/* 220 * This version uses dcbz on the complete cache lines in the 221 * destination area to reduce memory traffic. This requires that 222 * the destination area is cacheable. 223 * We only use this version if the source and dest don't overlap. 224 * -- paulus. 225 */ 226_GLOBAL(cacheable_memcpy) 227 add r7,r3,r5 /* test if the src & dst overlap */ 228 add r8,r4,r5 229 cmplw 0,r4,r7 230 cmplw 1,r3,r8 231 crand 0,0,4 /* cr0.lt &= cr1.lt */ 232 blt memcpy /* if regions overlap */ 233 234 addi r4,r4,-4 235 addi r6,r3,-4 236 neg r0,r3 237 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 238 beq 58f 239 240 cmplw 0,r5,r0 /* is this more than total to do? */ 241 blt 63f /* if not much to do */ 242 andi. r8,r0,3 /* get it word-aligned first */ 243 subf r5,r0,r5 244 mtctr r8 245 beq+ 61f 24670: lbz r9,4(r4) /* do some bytes */ 247 stb r9,4(r6) 248 addi r4,r4,1 249 addi r6,r6,1 250 bdnz 70b 25161: srwi. r0,r0,2 252 mtctr r0 253 beq 58f 25472: lwzu r9,4(r4) /* do some words */ 255 stwu r9,4(r6) 256 bdnz 72b 257 25858: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 259 clrlwi r5,r5,32-LG_CACHELINE_BYTES 260 li r11,4 261 mtctr r0 262 beq 63f 26353: 264#if !defined(CONFIG_8xx) 265 dcbz r11,r6 266#endif 267 COPY_16_BYTES 268#if L1_CACHE_BYTES >= 32 269 COPY_16_BYTES 270#if L1_CACHE_BYTES >= 64 271 COPY_16_BYTES 272 COPY_16_BYTES 273#if L1_CACHE_BYTES >= 128 274 COPY_16_BYTES 275 COPY_16_BYTES 276 COPY_16_BYTES 277 COPY_16_BYTES 278#endif 279#endif 280#endif 281 bdnz 53b 282 28363: srwi. r0,r5,2 284 mtctr r0 285 beq 64f 28630: lwzu r0,4(r4) 287 stwu r0,4(r6) 288 bdnz 30b 289 29064: andi. r0,r5,3 291 mtctr r0 292 beq+ 65f 29340: lbz r0,4(r4) 294 stb r0,4(r6) 295 addi r4,r4,1 296 addi r6,r6,1 297 bdnz 40b 29865: blr 299 300_GLOBAL(memmove) 301 cmplw 0,r3,r4 302 bgt backwards_memcpy 303 /* fall through */ 304 305_GLOBAL(memcpy) 306 srwi. r7,r5,3 307 addi r6,r3,-4 308 addi r4,r4,-4 309 beq 2f /* if less than 8 bytes to do */ 310 andi. r0,r6,3 /* get dest word aligned */ 311 mtctr r7 312 bne 5f 3131: lwz r7,4(r4) 314 lwzu r8,8(r4) 315 stw r7,4(r6) 316 stwu r8,8(r6) 317 bdnz 1b 318 andi. r5,r5,7 3192: cmplwi 0,r5,4 320 blt 3f 321 lwzu r0,4(r4) 322 addi r5,r5,-4 323 stwu r0,4(r6) 3243: cmpwi 0,r5,0 325 beqlr 326 mtctr r5 327 addi r4,r4,3 328 addi r6,r6,3 3294: lbzu r0,1(r4) 330 stbu r0,1(r6) 331 bdnz 4b 332 blr 3335: subfic r0,r0,4 334 mtctr r0 3356: lbz r7,4(r4) 336 addi r4,r4,1 337 stb r7,4(r6) 338 addi r6,r6,1 339 bdnz 6b 340 subf r5,r0,r5 341 rlwinm. r7,r5,32-3,3,31 342 beq 2b 343 mtctr r7 344 b 1b 345 346_GLOBAL(backwards_memcpy) 347 rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ 348 add r6,r3,r5 349 add r4,r4,r5 350 beq 2f 351 andi. r0,r6,3 352 mtctr r7 353 bne 5f 3541: lwz r7,-4(r4) 355 lwzu r8,-8(r4) 356 stw r7,-4(r6) 357 stwu r8,-8(r6) 358 bdnz 1b 359 andi. r5,r5,7 3602: cmplwi 0,r5,4 361 blt 3f 362 lwzu r0,-4(r4) 363 subi r5,r5,4 364 stwu r0,-4(r6) 3653: cmpwi 0,r5,0 366 beqlr 367 mtctr r5 3684: lbzu r0,-1(r4) 369 stbu r0,-1(r6) 370 bdnz 4b 371 blr 3725: mtctr r0 3736: lbzu r7,-1(r4) 374 stbu r7,-1(r6) 375 bdnz 6b 376 subf r5,r0,r5 377 rlwinm. r7,r5,32-3,3,31 378 beq 2b 379 mtctr r7 380 b 1b 381 382_GLOBAL(memcmp) 383 cmpwi 0,r5,0 384 ble- 2f 385 mtctr r5 386 addi r6,r3,-1 387 addi r4,r4,-1 3881: lbzu r3,1(r6) 389 lbzu r0,1(r4) 390 subf. r3,r0,r3 391 bdnzt 2,1b 392 blr 3932: li r3,0 394 blr 395 396_GLOBAL(memchr) 397 cmpwi 0,r5,0 398 ble- 2f 399 mtctr r5 400 addi r3,r3,-1 4011: lbzu r0,1(r3) 402 cmpw 0,r0,r4 403 bdnzf 2,1b 404 beqlr 4052: li r3,0 406 blr 407 408_GLOBAL(__copy_tofrom_user) 409 addi r4,r4,-4 410 addi r6,r3,-4 411 neg r0,r3 412 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 413 beq 58f 414 415 cmplw 0,r5,r0 /* is this more than total to do? */ 416 blt 63f /* if not much to do */ 417 andi. r8,r0,3 /* get it word-aligned first */ 418 mtctr r8 419 beq+ 61f 42070: lbz r9,4(r4) /* do some bytes */ 42171: stb r9,4(r6) 422 addi r4,r4,1 423 addi r6,r6,1 424 bdnz 70b 42561: subf r5,r0,r5 426 srwi. r0,r0,2 427 mtctr r0 428 beq 58f 42972: lwzu r9,4(r4) /* do some words */ 43073: stwu r9,4(r6) 431 bdnz 72b 432 433 .section __ex_table,"a" 434 .align 2 435 .long 70b,100f 436 .long 71b,101f 437 .long 72b,102f 438 .long 73b,103f 439 .text 440 44158: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 442 clrlwi r5,r5,32-LG_CACHELINE_BYTES 443 li r11,4 444 beq 63f 445 446#ifdef CONFIG_8xx 447 /* Don't use prefetch on 8xx */ 448 mtctr r0 449 li r0,0 45053: COPY_16_BYTES_WITHEX(0) 451 bdnz 53b 452 453#else /* not CONFIG_8xx */ 454 /* Here we decide how far ahead to prefetch the source */ 455 li r3,4 456 cmpwi r0,1 457 li r7,0 458 ble 114f 459 li r7,1 460#if MAX_COPY_PREFETCH > 1 461 /* Heuristically, for large transfers we prefetch 462 MAX_COPY_PREFETCH cachelines ahead. For small transfers 463 we prefetch 1 cacheline ahead. */ 464 cmpwi r0,MAX_COPY_PREFETCH 465 ble 112f 466 li r7,MAX_COPY_PREFETCH 467112: mtctr r7 468111: dcbt r3,r4 469 addi r3,r3,CACHELINE_BYTES 470 bdnz 111b 471#else 472 dcbt r3,r4 473 addi r3,r3,CACHELINE_BYTES 474#endif /* MAX_COPY_PREFETCH > 1 */ 475 476114: subf r8,r7,r0 477 mr r0,r7 478 mtctr r8 479 48053: dcbt r3,r4 48154: dcbz r11,r6 482 .section __ex_table,"a" 483 .align 2 484 .long 54b,105f 485 .text 486/* the main body of the cacheline loop */ 487 COPY_16_BYTES_WITHEX(0) 488#if L1_CACHE_BYTES >= 32 489 COPY_16_BYTES_WITHEX(1) 490#if L1_CACHE_BYTES >= 64 491 COPY_16_BYTES_WITHEX(2) 492 COPY_16_BYTES_WITHEX(3) 493#if L1_CACHE_BYTES >= 128 494 COPY_16_BYTES_WITHEX(4) 495 COPY_16_BYTES_WITHEX(5) 496 COPY_16_BYTES_WITHEX(6) 497 COPY_16_BYTES_WITHEX(7) 498#endif 499#endif 500#endif 501 bdnz 53b 502 cmpwi r0,0 503 li r3,4 504 li r7,0 505 bne 114b 506#endif /* CONFIG_8xx */ 507 50863: srwi. r0,r5,2 509 mtctr r0 510 beq 64f 51130: lwzu r0,4(r4) 51231: stwu r0,4(r6) 513 bdnz 30b 514 51564: andi. r0,r5,3 516 mtctr r0 517 beq+ 65f 51840: lbz r0,4(r4) 51941: stb r0,4(r6) 520 addi r4,r4,1 521 addi r6,r6,1 522 bdnz 40b 52365: li r3,0 524 blr 525 526/* read fault, initial single-byte copy */ 527100: li r9,0 528 b 90f 529/* write fault, initial single-byte copy */ 530101: li r9,1 53190: subf r5,r8,r5 532 li r3,0 533 b 99f 534/* read fault, initial word copy */ 535102: li r9,0 536 b 91f 537/* write fault, initial word copy */ 538103: li r9,1 53991: li r3,2 540 b 99f 541 542/* 543 * this stuff handles faults in the cacheline loop and branches to either 544 * 104f (if in read part) or 105f (if in write part), after updating r5 545 */ 546 COPY_16_BYTES_EXCODE(0) 547#if L1_CACHE_BYTES >= 32 548 COPY_16_BYTES_EXCODE(1) 549#if L1_CACHE_BYTES >= 64 550 COPY_16_BYTES_EXCODE(2) 551 COPY_16_BYTES_EXCODE(3) 552#if L1_CACHE_BYTES >= 128 553 COPY_16_BYTES_EXCODE(4) 554 COPY_16_BYTES_EXCODE(5) 555 COPY_16_BYTES_EXCODE(6) 556 COPY_16_BYTES_EXCODE(7) 557#endif 558#endif 559#endif 560 561/* read fault in cacheline loop */ 562104: li r9,0 563 b 92f 564/* fault on dcbz (effectively a write fault) */ 565/* or write fault in cacheline loop */ 566105: li r9,1 56792: li r3,LG_CACHELINE_BYTES 568 mfctr r8 569 add r0,r0,r8 570 b 106f 571/* read fault in final word loop */ 572108: li r9,0 573 b 93f 574/* write fault in final word loop */ 575109: li r9,1 57693: andi. r5,r5,3 577 li r3,2 578 b 99f 579/* read fault in final byte loop */ 580110: li r9,0 581 b 94f 582/* write fault in final byte loop */ 583111: li r9,1 58494: li r5,0 585 li r3,0 586/* 587 * At this stage the number of bytes not copied is 588 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. 589 */ 59099: mfctr r0 591106: slw r3,r0,r3 592 add. r3,r3,r5 593 beq 120f /* shouldn't happen */ 594 cmpwi 0,r9,0 595 bne 120f 596/* for a read fault, first try to continue the copy one byte at a time */ 597 mtctr r3 598130: lbz r0,4(r4) 599131: stb r0,4(r6) 600 addi r4,r4,1 601 addi r6,r6,1 602 bdnz 130b 603/* then clear out the destination: r3 bytes starting at 4(r6) */ 604132: mfctr r3 605 srwi. r0,r3,2 606 li r9,0 607 mtctr r0 608 beq 113f 609112: stwu r9,4(r6) 610 bdnz 112b 611113: andi. r0,r3,3 612 mtctr r0 613 beq 120f 614114: stb r9,4(r6) 615 addi r6,r6,1 616 bdnz 114b 617120: blr 618 619 .section __ex_table,"a" 620 .align 2 621 .long 30b,108b 622 .long 31b,109b 623 .long 40b,110b 624 .long 41b,111b 625 .long 130b,132b 626 .long 131b,120b 627 .long 112b,120b 628 .long 114b,120b 629 .text 630 631_GLOBAL(__clear_user) 632 addi r6,r3,-4 633 li r3,0 634 li r5,0 635 cmplwi 0,r4,4 636 blt 7f 637 /* clear a single word */ 63811: stwu r5,4(r6) 639 beqlr 640 /* clear word sized chunks */ 641 andi. r0,r6,3 642 add r4,r0,r4 643 subf r6,r0,r6 644 srwi r0,r4,2 645 andi. r4,r4,3 646 mtctr r0 647 bdz 7f 6481: stwu r5,4(r6) 649 bdnz 1b 650 /* clear byte sized chunks */ 6517: cmpwi 0,r4,0 652 beqlr 653 mtctr r4 654 addi r6,r6,3 6558: stbu r5,1(r6) 656 bdnz 8b 657 blr 65890: mr r3,r4 659 blr 66091: mfctr r3 661 slwi r3,r3,2 662 add r3,r3,r4 663 blr 66492: mfctr r3 665 blr 666 667 .section __ex_table,"a" 668 .align 2 669 .long 11b,90b 670 .long 1b,91b 671 .long 8b,92b 672 .text 673 674_GLOBAL(__strncpy_from_user) 675 addi r6,r3,-1 676 addi r4,r4,-1 677 cmpwi 0,r5,0 678 beq 2f 679 mtctr r5 6801: lbzu r0,1(r4) 681 cmpwi 0,r0,0 682 stbu r0,1(r6) 683 bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ 684 beq 3f 6852: addi r6,r6,1 6863: subf r3,r3,r6 687 blr 68899: li r3,-EFAULT 689 blr 690 691 .section __ex_table,"a" 692 .align 2 693 .long 1b,99b 694 .text 695 696/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */ 697_GLOBAL(__strnlen_user) 698 addi r7,r3,-1 699 subf r6,r7,r5 /* top+1 - str */ 700 cmplw 0,r4,r6 701 bge 0f 702 mr r6,r4 7030: mtctr r6 /* ctr = min(len, top - str) */ 7041: lbzu r0,1(r7) /* get next byte */ 705 cmpwi 0,r0,0 706 bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */ 707 addi r7,r7,1 708 subf r3,r3,r7 /* number of bytes we have looked at */ 709 beqlr /* return if we found a 0 byte */ 710 cmpw 0,r3,r4 /* did we look at all len bytes? */ 711 blt 99f /* if not, must have hit top */ 712 addi r3,r4,1 /* return len + 1 to indicate no null found */ 713 blr 71499: li r3,0 /* bad address, return 0 */ 715 blr 716 717 .section __ex_table,"a" 718 .align 2 719 .long 1b,99b