Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARC: String library

Hand optimised asm code for ARC700 pipeline.
Originally written/optimized by Joern Rennecke

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Joern Rennecke <joern.rennecke@embecosm.com>

+661
+40
arch/arc/include/asm/string.h
··· 1 + /* 2 + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * vineetg: May 2011 9 + * -We had half-optimised memset/memcpy, got better versions of those 10 + * -Added memcmp, strchr, strcpy, strcmp, strlen 11 + * 12 + * Amit Bhor: Codito Technologies 2004 13 + */ 14 + 15 + #ifndef _ASM_ARC_STRING_H 16 + #define _ASM_ARC_STRING_H 17 + 18 + #include <linux/types.h> 19 + 20 + #ifdef __KERNEL__ 21 + 22 + #define __HAVE_ARCH_MEMSET 23 + #define __HAVE_ARCH_MEMCPY 24 + #define __HAVE_ARCH_MEMCMP 25 + #define __HAVE_ARCH_STRCHR 26 + #define __HAVE_ARCH_STRCPY 27 + #define __HAVE_ARCH_STRCMP 28 + #define __HAVE_ARCH_STRLEN 29 + 30 + extern void *memset(void *ptr, int, __kernel_size_t); 31 + extern void *memcpy(void *, const void *, __kernel_size_t); 32 + extern void memzero(void *ptr, __kernel_size_t n); 33 + extern int memcmp(const void *, const void *, __kernel_size_t); 34 + extern char *strchr(const char *s, int c); 35 + extern char *strcpy(char *dest, const char *src); 36 + extern int strcmp(const char *cs, const char *ct); 37 + extern __kernel_size_t strlen(const char *); 38 + 39 + #endif /* __KERNEL__ */ 40 + #endif /* _ASM_ARC_STRING_H */
+124
arch/arc/lib/memcmp.S
··· 1 + /* 2 + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + */ 8 + 9 + #include <asm/linkage.h> 10 + 11 + #ifdef __LITTLE_ENDIAN__ 12 + #define WORD2 r2 13 + #define SHIFT r3 14 + #else /* BIG ENDIAN */ 15 + #define WORD2 r3 16 + #define SHIFT r2 17 + #endif 18 + 19 + ARC_ENTRY memcmp 20 + or r12,r0,r1 21 + asl_s r12,r12,30 22 + sub r3,r2,1 23 + brls r2,r12,.Lbytewise 24 + ld r4,[r0,0] 25 + ld r5,[r1,0] 26 + lsr.f lp_count,r3,3 27 + lpne .Loop_end 28 + ld_s WORD2,[r0,4] 29 + ld_s r12,[r1,4] 30 + brne r4,r5,.Leven 31 + ld.a r4,[r0,8] 32 + ld.a r5,[r1,8] 33 + brne WORD2,r12,.Lodd 34 + .Loop_end: 35 + asl_s SHIFT,SHIFT,3 36 + bhs_s .Last_cmp 37 + brne r4,r5,.Leven 38 + ld r4,[r0,4] 39 + ld r5,[r1,4] 40 + #ifdef __LITTLE_ENDIAN__ 41 + nop_s 42 + ; one more load latency cycle 43 + .Last_cmp: 44 + xor r0,r4,r5 45 + bset r0,r0,SHIFT 46 + sub_s r1,r0,1 47 + bic_s r1,r1,r0 48 + norm r1,r1 49 + b.d .Leven_cmp 50 + and r1,r1,24 51 + .Leven: 52 + xor r0,r4,r5 53 + sub_s r1,r0,1 54 + bic_s r1,r1,r0 55 + norm r1,r1 56 + ; slow track insn 57 + and r1,r1,24 58 + .Leven_cmp: 59 + asl r2,r4,r1 60 + asl r12,r5,r1 61 + lsr_s r2,r2,1 62 + lsr_s r12,r12,1 63 + j_s.d [blink] 64 + sub r0,r2,r12 65 + .balign 4 66 + .Lodd: 67 + xor r0,WORD2,r12 68 + sub_s r1,r0,1 69 + bic_s r1,r1,r0 70 + norm r1,r1 71 + ; slow track insn 72 + and r1,r1,24 73 + asl_s r2,r2,r1 74 + asl_s r12,r12,r1 75 + lsr_s r2,r2,1 76 + lsr_s r12,r12,1 77 + j_s.d [blink] 78 + sub r0,r2,r12 79 + #else /* BIG ENDIAN */ 80 + .Last_cmp: 81 + neg_s SHIFT,SHIFT 82 + lsr r4,r4,SHIFT 83 + lsr r5,r5,SHIFT 84 + ; slow track insn 85 + .Leven: 86 + sub.f r0,r4,r5 87 + mov.ne r0,1 88 + j_s.d [blink] 89 + bset.cs r0,r0,31 90 + .Lodd: 91 + cmp_s WORD2,r12 92 + 93 + mov_s r0,1 94 + j_s.d [blink] 95 + bset.cs r0,r0,31 96 + #endif /* ENDIAN */ 97 + .balign 4 98 + .Lbytewise: 99 + breq r2,0,.Lnil 100 + ldb r4,[r0,0] 101 + ldb r5,[r1,0] 102 + lsr.f lp_count,r3 103 + lpne .Lbyte_end 104 + ldb_s r3,[r0,1] 105 + ldb r12,[r1,1] 106 + brne r4,r5,.Lbyte_even 107 + ldb.a r4,[r0,2] 108 + ldb.a r5,[r1,2] 109 + brne r3,r12,.Lbyte_odd 110 + .Lbyte_end: 111 + bcc .Lbyte_even 112 + brne r4,r5,.Lbyte_even 113 + ldb_s r3,[r0,1] 114 + ldb_s r12,[r1,1] 115 + .Lbyte_odd: 116 + j_s.d [blink] 117 + sub r0,r3,r12 118 + .Lbyte_even: 119 + j_s.d [blink] 120 + sub r0,r4,r5 121 + .Lnil: 122 + j_s.d [blink] 123 + mov r0,0 124 + ARC_EXIT memcmp
+66
arch/arc/lib/memcpy-700.S
··· 1 + /* 2 + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + */ 8 + 9 + #include <asm/linkage.h> 10 + 11 + ARC_ENTRY memcpy 12 + or r3,r0,r1 13 + asl_s r3,r3,30 14 + mov_s r5,r0 15 + brls.d r2,r3,.Lcopy_bytewise 16 + sub.f r3,r2,1 17 + ld_s r12,[r1,0] 18 + asr.f lp_count,r3,3 19 + bbit0.d r3,2,.Lnox4 20 + bmsk_s r2,r2,1 21 + st.ab r12,[r5,4] 22 + ld.a r12,[r1,4] 23 + .Lnox4: 24 + lppnz .Lendloop 25 + ld_s r3,[r1,4] 26 + st.ab r12,[r5,4] 27 + ld.a r12,[r1,8] 28 + st.ab r3,[r5,4] 29 + .Lendloop: 30 + breq r2,0,.Last_store 31 + ld r3,[r5,0] 32 + #ifdef __LITTLE_ENDIAN__ 33 + add3 r2,-1,r2 34 + ; uses long immediate 35 + xor_s r12,r12,r3 36 + bmsk r12,r12,r2 37 + xor_s r12,r12,r3 38 + #else /* BIG ENDIAN */ 39 + sub3 r2,31,r2 40 + ; uses long immediate 41 + xor_s r3,r3,r12 42 + bmsk r3,r3,r2 43 + xor_s r12,r12,r3 44 + #endif /* ENDIAN */ 45 + .Last_store: 46 + j_s.d [blink] 47 + st r12,[r5,0] 48 + 49 + .balign 4 50 + .Lcopy_bytewise: 51 + jcs [blink] 52 + ldb_s r12,[r1,0] 53 + lsr.f lp_count,r3 54 + bhs_s .Lnox1 55 + stb.ab r12,[r5,1] 56 + ldb.a r12,[r1,1] 57 + .Lnox1: 58 + lppnz .Lendbloop 59 + ldb_s r3,[r1,1] 60 + stb.ab r12,[r5,1] 61 + ldb.a r12,[r1,2] 62 + stb.ab r3,[r5,1] 63 + .Lendbloop: 64 + j_s.d [blink] 65 + stb r12,[r5,0] 66 + ARC_EXIT memcpy
+59
arch/arc/lib/memset.S
··· 1 + /* 2 + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + */ 8 + 9 + #include <asm/linkage.h> 10 + 11 + #define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */ 12 + 13 + ARC_ENTRY memset 14 + mov_s r4,r0 15 + or r12,r0,r2 16 + bmsk.f r12,r12,1 17 + extb_s r1,r1 18 + asl r3,r1,8 19 + beq.d .Laligned 20 + or_s r1,r1,r3 21 + brls r2,SMALL,.Ltiny 22 + add r3,r2,r0 23 + stb r1,[r3,-1] 24 + bclr_s r3,r3,0 25 + stw r1,[r3,-2] 26 + bmsk.f r12,r0,1 27 + add_s r2,r2,r12 28 + sub.ne r2,r2,4 29 + stb.ab r1,[r4,1] 30 + and r4,r4,-2 31 + stw.ab r1,[r4,2] 32 + and r4,r4,-4 33 + .Laligned: ; This code address should be aligned for speed. 34 + asl r3,r1,16 35 + lsr.f lp_count,r2,2 36 + or_s r1,r1,r3 37 + lpne .Loop_end 38 + st.ab r1,[r4,4] 39 + .Loop_end: 40 + j_s [blink] 41 + 42 + .balign 4 43 + .Ltiny: 44 + mov.f lp_count,r2 45 + lpne .Ltiny_end 46 + stb.ab r1,[r4,1] 47 + .Ltiny_end: 48 + j_s [blink] 49 + ARC_EXIT memset 50 + 51 + ; memzero: @r0 = mem, @r1 = size_t 52 + ; memset: @r0 = mem, @r1 = char, @r2 = size_t 53 + 54 + ARC_ENTRY memzero 55 + ; adjust bzero args to memset args 56 + mov r2, r1 57 + mov r1, 0 58 + b memset ;tail call so need to tinker with blink 59 + ARC_EXIT memzero
+123
arch/arc/lib/strchr-700.S
··· 1 + /* 2 + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + */ 8 + 9 + /* ARC700 has a relatively long pipeline and branch prediction, so we want 10 + to avoid branches that are hard to predict. On the other hand, the 11 + presence of the norm instruction makes it easier to operate on whole 12 + words branch-free. */ 13 + 14 + #include <asm/linkage.h> 15 + 16 + ARC_ENTRY strchr 17 + extb_s r1,r1 18 + asl r5,r1,8 19 + bmsk r2,r0,1 20 + or r5,r5,r1 21 + mov_s r3,0x01010101 22 + breq.d r2,r0,.Laligned 23 + asl r4,r5,16 24 + sub_s r0,r0,r2 25 + asl r7,r2,3 26 + ld_s r2,[r0] 27 + #ifdef __LITTLE_ENDIAN__ 28 + asl r7,r3,r7 29 + #else 30 + lsr r7,r3,r7 31 + #endif 32 + or r5,r5,r4 33 + ror r4,r3 34 + sub r12,r2,r7 35 + bic_s r12,r12,r2 36 + and r12,r12,r4 37 + brne.d r12,0,.Lfound0_ua 38 + xor r6,r2,r5 39 + ld.a r2,[r0,4] 40 + sub r12,r6,r7 41 + bic r12,r12,r6 42 + and r7,r12,r4 43 + breq r7,0,.Loop ; For speed, we want this branch to be unaligned. 44 + b .Lfound_char ; Likewise this one. 45 + ; /* We require this code address to be unaligned for speed... */ 46 + .Laligned: 47 + ld_s r2,[r0] 48 + or r5,r5,r4 49 + ror r4,r3 50 + ; /* ... so that this code address is aligned, for itself and ... */ 51 + .Loop: 52 + sub r12,r2,r3 53 + bic_s r12,r12,r2 54 + and r12,r12,r4 55 + brne.d r12,0,.Lfound0 56 + xor r6,r2,r5 57 + ld.a r2,[r0,4] 58 + sub r12,r6,r3 59 + bic r12,r12,r6 60 + and r7,r12,r4 61 + breq r7,0,.Loop /* ... so that this branch is unaligned. */ 62 + ; Found searched-for character. r0 has already advanced to next word. 63 + #ifdef __LITTLE_ENDIAN__ 64 + /* We only need the information about the first matching byte 65 + (i.e. the least significant matching byte) to be exact, 66 + hence there is no problem with carry effects. */ 67 + .Lfound_char: 68 + sub r3,r7,1 69 + bic r3,r3,r7 70 + norm r2,r3 71 + sub_s r0,r0,1 72 + asr_s r2,r2,3 73 + j.d [blink] 74 + sub_s r0,r0,r2 75 + 76 + .balign 4 77 + .Lfound0_ua: 78 + mov r3,r7 79 + .Lfound0: 80 + sub r3,r6,r3 81 + bic r3,r3,r6 82 + and r2,r3,r4 83 + or_s r12,r12,r2 84 + sub_s r3,r12,1 85 + bic_s r3,r3,r12 86 + norm r3,r3 87 + add_s r0,r0,3 88 + asr_s r12,r3,3 89 + asl.f 0,r2,r3 90 + sub_s r0,r0,r12 91 + j_s.d [blink] 92 + mov.pl r0,0 93 + #else /* BIG ENDIAN */ 94 + .Lfound_char: 95 + lsr r7,r7,7 96 + 97 + bic r2,r7,r6 98 + norm r2,r2 99 + sub_s r0,r0,4 100 + asr_s r2,r2,3 101 + j.d [blink] 102 + add_s r0,r0,r2 103 + 104 + .Lfound0_ua: 105 + mov_s r3,r7 106 + .Lfound0: 107 + asl_s r2,r2,7 108 + or r7,r6,r4 109 + bic_s r12,r12,r2 110 + sub r2,r7,r3 111 + or r2,r2,r6 112 + bic r12,r2,r12 113 + bic.f r3,r4,r12 114 + norm r3,r3 115 + 116 + add.pl r3,r3,1 117 + asr_s r12,r3,3 118 + asl.f 0,r2,r3 119 + add_s r0,r0,r12 120 + j_s.d [blink] 121 + mov.mi r0,0 122 + #endif /* ENDIAN */ 123 + ARC_EXIT strchr
+96
arch/arc/lib/strcmp.S
··· 1 + /* 2 + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + */ 8 + 9 + /* This is optimized primarily for the ARC700. 10 + It would be possible to speed up the loops by one cycle / word 11 + respective one cycle / byte by forcing double source 1 alignment, unrolling 12 + by a factor of two, and speculatively loading the second word / byte of 13 + source 1; however, that would increase the overhead for loop setup / finish, 14 + and strcmp might often terminate early. */ 15 + 16 + #include <asm/linkage.h> 17 + 18 + ARC_ENTRY strcmp 19 + or r2,r0,r1 20 + bmsk_s r2,r2,1 21 + brne r2,0,.Lcharloop 22 + mov_s r12,0x01010101 23 + ror r5,r12 24 + .Lwordloop: 25 + ld.ab r2,[r0,4] 26 + ld.ab r3,[r1,4] 27 + nop_s 28 + sub r4,r2,r12 29 + bic r4,r4,r2 30 + and r4,r4,r5 31 + brne r4,0,.Lfound0 32 + breq r2,r3,.Lwordloop 33 + #ifdef __LITTLE_ENDIAN__ 34 + xor r0,r2,r3 ; mask for difference 35 + sub_s r1,r0,1 36 + bic_s r0,r0,r1 ; mask for least significant difference bit 37 + sub r1,r5,r0 38 + xor r0,r5,r1 ; mask for least significant difference byte 39 + and_s r2,r2,r0 40 + and_s r3,r3,r0 41 + #endif /* LITTLE ENDIAN */ 42 + cmp_s r2,r3 43 + mov_s r0,1 44 + j_s.d [blink] 45 + bset.lo r0,r0,31 46 + 47 + .balign 4 48 + #ifdef __LITTLE_ENDIAN__ 49 + .Lfound0: 50 + xor r0,r2,r3 ; mask for difference 51 + or r0,r0,r4 ; or in zero indicator 52 + sub_s r1,r0,1 53 + bic_s r0,r0,r1 ; mask for least significant difference bit 54 + sub r1,r5,r0 55 + xor r0,r5,r1 ; mask for least significant difference byte 56 + and_s r2,r2,r0 57 + and_s r3,r3,r0 58 + sub.f r0,r2,r3 59 + mov.hi r0,1 60 + j_s.d [blink] 61 + bset.lo r0,r0,31 62 + #else /* BIG ENDIAN */ 63 + /* The zero-detection above can mis-detect 0x01 bytes as zeroes 64 + because of carry-propagateion from a lower significant zero byte. 65 + We can compensate for this by checking that bit0 is zero. 66 + This compensation is not necessary in the step where we 67 + get a low estimate for r2, because in any affected bytes 68 + we already have 0x00 or 0x01, which will remain unchanged 69 + when bit 7 is cleared. */ 70 + .balign 4 71 + .Lfound0: 72 + lsr r0,r4,8 73 + lsr_s r1,r2 74 + bic_s r2,r2,r0 ; get low estimate for r2 and get ... 75 + bic_s r0,r0,r1 ; <this is the adjusted mask for zeros> 76 + or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ... 77 + cmp_s r3,r2 ; ... be independent of trailing garbage 78 + or_s r2,r2,r0 ; likewise for r3 > r2 79 + bic_s r3,r3,r0 80 + rlc r0,0 ; r0 := r2 > r3 ? 1 : 0 81 + cmp_s r2,r3 82 + j_s.d [blink] 83 + bset.lo r0,r0,31 84 + #endif /* ENDIAN */ 85 + 86 + .balign 4 87 + .Lcharloop: 88 + ldb.ab r2,[r0,1] 89 + ldb.ab r3,[r1,1] 90 + nop_s 91 + breq r2,0,.Lcmpend 92 + breq r2,r3,.Lcharloop 93 + .Lcmpend: 94 + j_s.d [blink] 95 + sub r0,r2,r3 96 + ARC_EXIT strcmp
+70
arch/arc/lib/strcpy-700.S
··· 1 + /* 2 + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + */ 8 + 9 + /* If dst and src are 4 byte aligned, copy 8 bytes at a time. 10 + If the src is 4, but not 8 byte aligned, we first read 4 bytes to get 11 + it 8 byte aligned. Thus, we can do a little read-ahead, without 12 + dereferencing a cache line that we should not touch. 13 + Note that short and long instructions have been scheduled to avoid 14 + branch stalls. 15 + The beq_s to r3z could be made unaligned & long to avoid a stall 16 + there, but the it is not likely to be taken often, and it 17 + would also be likey to cost an unaligned mispredict at the next call. */ 18 + 19 + #include <asm/linkage.h> 20 + 21 + ARC_ENTRY strcpy 22 + or r2,r0,r1 23 + bmsk_s r2,r2,1 24 + brne.d r2,0,charloop 25 + mov_s r10,r0 26 + ld_s r3,[r1,0] 27 + mov r8,0x01010101 28 + bbit0.d r1,2,loop_start 29 + ror r12,r8 30 + sub r2,r3,r8 31 + bic_s r2,r2,r3 32 + tst_s r2,r12 33 + bne r3z 34 + mov_s r4,r3 35 + .balign 4 36 + loop: 37 + ld.a r3,[r1,4] 38 + st.ab r4,[r10,4] 39 + loop_start: 40 + ld.a r4,[r1,4] 41 + sub r2,r3,r8 42 + bic_s r2,r2,r3 43 + tst_s r2,r12 44 + bne_s r3z 45 + st.ab r3,[r10,4] 46 + sub r2,r4,r8 47 + bic r2,r2,r4 48 + tst r2,r12 49 + beq loop 50 + mov_s r3,r4 51 + #ifdef __LITTLE_ENDIAN__ 52 + r3z: bmsk.f r1,r3,7 53 + lsr_s r3,r3,8 54 + #else 55 + r3z: lsr.f r1,r3,24 56 + asl_s r3,r3,8 57 + #endif 58 + bne.d r3z 59 + stb.ab r1,[r10,1] 60 + j_s [blink] 61 + 62 + .balign 4 63 + charloop: 64 + ldb.ab r3,[r1,1] 65 + 66 + 67 + brne.d r3,0,charloop 68 + stb.ab r3,[r10,1] 69 + j [blink] 70 + ARC_EXIT strcpy
+83
arch/arc/lib/strlen.S
··· 1 + /* 2 + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + */ 8 + 9 + #include <asm/linkage.h> 10 + 11 + ARC_ENTRY strlen 12 + or r3,r0,7 13 + ld r2,[r3,-7] 14 + ld.a r6,[r3,-3] 15 + mov r4,0x01010101 16 + ; uses long immediate 17 + #ifdef __LITTLE_ENDIAN__ 18 + asl_s r1,r0,3 19 + btst_s r0,2 20 + asl r7,r4,r1 21 + ror r5,r4 22 + sub r1,r2,r7 23 + bic_s r1,r1,r2 24 + mov.eq r7,r4 25 + sub r12,r6,r7 26 + bic r12,r12,r6 27 + or.eq r12,r12,r1 28 + and r12,r12,r5 29 + brne r12,0,.Learly_end 30 + #else /* BIG ENDIAN */ 31 + ror r5,r4 32 + btst_s r0,2 33 + mov_s r1,31 34 + sub3 r7,r1,r0 35 + sub r1,r2,r4 36 + bic_s r1,r1,r2 37 + bmsk r1,r1,r7 38 + sub r12,r6,r4 39 + bic r12,r12,r6 40 + bmsk.ne r12,r12,r7 41 + or.eq r12,r12,r1 42 + and r12,r12,r5 43 + brne r12,0,.Learly_end 44 + #endif /* ENDIAN */ 45 + 46 + .Loop: 47 + ld_s r2,[r3,4] 48 + ld.a r6,[r3,8] 49 + ; stall for load result 50 + sub r1,r2,r4 51 + bic_s r1,r1,r2 52 + sub r12,r6,r4 53 + bic r12,r12,r6 54 + or r12,r12,r1 55 + and r12,r12,r5 56 + breq r12,0,.Loop 57 + .Lend: 58 + and.f r1,r1,r5 59 + sub.ne r3,r3,4 60 + mov.eq r1,r12 61 + #ifdef __LITTLE_ENDIAN__ 62 + sub_s r2,r1,1 63 + bic_s r2,r2,r1 64 + norm r1,r2 65 + sub_s r0,r0,3 66 + lsr_s r1,r1,3 67 + sub r0,r3,r0 68 + j_s.d [blink] 69 + sub r0,r0,r1 70 + #else /* BIG ENDIAN */ 71 + lsr_s r1,r1,7 72 + mov.eq r2,r6 73 + bic_s r1,r1,r2 74 + norm r1,r1 75 + sub r0,r3,r0 76 + lsr_s r1,r1,3 77 + j_s.d [blink] 78 + add r0,r0,r1 79 + #endif /* ENDIAN */ 80 + .Learly_end: 81 + b.d .Lend 82 + sub_s.ne r1,r1,r1 83 + ARC_EXIT strlen