···11+/*22+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)33+ *44+ * This program is free software; you can redistribute it and/or modify55+ * it under the terms of the GNU General Public License version 2 as66+ * published by the Free Software Foundation.77+ *88+ * vineetg: May 201199+ * -We had half-optimised memset/memcpy, got better versions of those1010+ * -Added memcmp, strchr, strcpy, strcmp, strlen1111+ *1212+ * Amit Bhor: Codito Technologies 20041313+ */1414+1515+#ifndef _ASM_ARC_STRING_H1616+#define _ASM_ARC_STRING_H1717+1818+#include <linux/types.h>1919+2020+#ifdef __KERNEL__2121+2222+#define __HAVE_ARCH_MEMSET2323+#define __HAVE_ARCH_MEMCPY2424+#define __HAVE_ARCH_MEMCMP2525+#define __HAVE_ARCH_STRCHR2626+#define __HAVE_ARCH_STRCPY2727+#define __HAVE_ARCH_STRCMP2828+#define __HAVE_ARCH_STRLEN2929+3030+extern void *memset(void *ptr, int, __kernel_size_t);3131+extern void *memcpy(void *, const void *, __kernel_size_t);3232+extern void memzero(void *ptr, __kernel_size_t n);3333+extern int memcmp(const void *, const void *, __kernel_size_t);3434+extern char *strchr(const char *s, int c);3535+extern char *strcpy(char *dest, const char *src);3636+extern int strcmp(const char *cs, const char *ct);3737+extern __kernel_size_t strlen(const char *);3838+3939+#endif /* __KERNEL__ */4040+#endif /* _ASM_ARC_STRING_H */
+124
arch/arc/lib/memcmp.S
···11+/*22+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)33+ *44+ * This program is free software; you can redistribute it and/or modify55+ * it under the terms of the GNU General Public License version 2 as66+ * published by the Free Software Foundation.77+ */88+99+#include <asm/linkage.h>1010+1111+#ifdef __LITTLE_ENDIAN__1212+#define WORD2 r21313+#define SHIFT r31414+#else /* BIG ENDIAN */1515+#define WORD2 r31616+#define SHIFT r21717+#endif1818+1919+ARC_ENTRY memcmp2020+ or r12,r0,r12121+ asl_s r12,r12,302222+ sub r3,r2,12323+ brls r2,r12,.Lbytewise2424+ ld r4,[r0,0]2525+ ld r5,[r1,0]2626+ lsr.f lp_count,r3,32727+ lpne .Loop_end2828+ ld_s WORD2,[r0,4]2929+ ld_s r12,[r1,4]3030+ brne r4,r5,.Leven3131+ ld.a r4,[r0,8]3232+ ld.a r5,[r1,8]3333+ brne WORD2,r12,.Lodd3434+.Loop_end:3535+ asl_s SHIFT,SHIFT,33636+ bhs_s .Last_cmp3737+ brne r4,r5,.Leven3838+ ld r4,[r0,4]3939+ ld r5,[r1,4]4040+#ifdef __LITTLE_ENDIAN__4141+ nop_s4242+ ; one more load latency cycle4343+.Last_cmp:4444+ xor r0,r4,r54545+ bset r0,r0,SHIFT4646+ sub_s r1,r0,14747+ bic_s r1,r1,r04848+ norm r1,r14949+ b.d .Leven_cmp5050+ and r1,r1,245151+.Leven:5252+ xor r0,r4,r55353+ sub_s r1,r0,15454+ bic_s r1,r1,r05555+ norm r1,r15656+ ; slow track insn5757+ and r1,r1,245858+.Leven_cmp:5959+ asl r2,r4,r16060+ asl r12,r5,r16161+ lsr_s r2,r2,16262+ lsr_s r12,r12,16363+ j_s.d [blink]6464+ sub r0,r2,r126565+ .balign 46666+.Lodd:6767+ xor r0,WORD2,r126868+ sub_s r1,r0,16969+ bic_s r1,r1,r07070+ norm r1,r17171+ ; slow track insn7272+ and r1,r1,247373+ asl_s r2,r2,r17474+ asl_s r12,r12,r17575+ lsr_s r2,r2,17676+ lsr_s r12,r12,17777+ j_s.d [blink]7878+ sub r0,r2,r127979+#else /* BIG ENDIAN */8080+.Last_cmp:8181+ neg_s SHIFT,SHIFT8282+ lsr r4,r4,SHIFT8383+ lsr r5,r5,SHIFT8484+ ; slow track insn8585+.Leven:8686+ sub.f r0,r4,r58787+ mov.ne r0,18888+ j_s.d [blink]8989+ bset.cs r0,r0,319090+.Lodd:9191+ cmp_s WORD2,r129292+9393+ mov_s r0,19494+ j_s.d [blink]9595+ bset.cs r0,r0,319696+#endif /* ENDIAN */9797+ .balign 49898+.Lbytewise:9999+ breq r2,0,.Lnil100100+ ldb r4,[r0,0]101101+ ldb r5,[r1,0]102102+ lsr.f lp_count,r3103103+ lpne .Lbyte_end104104+ ldb_s r3,[r0,1]105105+ ldb r12,[r1,1]106106+ brne r4,r5,.Lbyte_even107107+ ldb.a r4,[r0,2]108108+ ldb.a r5,[r1,2]109109+ brne r3,r12,.Lbyte_odd110110+.Lbyte_end:111111+ bcc .Lbyte_even112112+ brne r4,r5,.Lbyte_even113113+ ldb_s r3,[r0,1]114114+ ldb_s r12,[r1,1]115115+.Lbyte_odd:116116+ j_s.d [blink]117117+ sub r0,r3,r12118118+.Lbyte_even:119119+ j_s.d [blink]120120+ sub r0,r4,r5121121+.Lnil:122122+ j_s.d [blink]123123+ mov r0,0124124+ARC_EXIT memcmp
+66
arch/arc/lib/memcpy-700.S
···11+/*22+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)33+ *44+ * This program is free software; you can redistribute it and/or modify55+ * it under the terms of the GNU General Public License version 2 as66+ * published by the Free Software Foundation.77+ */88+99+#include <asm/linkage.h>1010+1111+ARC_ENTRY memcpy1212+ or r3,r0,r11313+ asl_s r3,r3,301414+ mov_s r5,r01515+ brls.d r2,r3,.Lcopy_bytewise1616+ sub.f r3,r2,11717+ ld_s r12,[r1,0]1818+ asr.f lp_count,r3,31919+ bbit0.d r3,2,.Lnox42020+ bmsk_s r2,r2,12121+ st.ab r12,[r5,4]2222+ ld.a r12,[r1,4]2323+.Lnox4:2424+ lppnz .Lendloop2525+ ld_s r3,[r1,4]2626+ st.ab r12,[r5,4]2727+ ld.a r12,[r1,8]2828+ st.ab r3,[r5,4]2929+.Lendloop:3030+ breq r2,0,.Last_store3131+ ld r3,[r5,0]3232+#ifdef __LITTLE_ENDIAN__3333+ add3 r2,-1,r23434+ ; uses long immediate3535+ xor_s r12,r12,r33636+ bmsk r12,r12,r23737+ xor_s r12,r12,r33838+#else /* BIG ENDIAN */3939+ sub3 r2,31,r24040+ ; uses long immediate4141+ xor_s r3,r3,r124242+ bmsk r3,r3,r24343+ xor_s r12,r12,r34444+#endif /* ENDIAN */4545+.Last_store:4646+ j_s.d [blink]4747+ st r12,[r5,0]4848+4949+ .balign 45050+.Lcopy_bytewise:5151+ jcs [blink]5252+ ldb_s r12,[r1,0]5353+ lsr.f lp_count,r35454+ bhs_s .Lnox15555+ stb.ab r12,[r5,1]5656+ ldb.a r12,[r1,1]5757+.Lnox1:5858+ lppnz .Lendbloop5959+ ldb_s r3,[r1,1]6060+ stb.ab r12,[r5,1]6161+ ldb.a r12,[r1,2]6262+ stb.ab r3,[r5,1]6363+.Lendbloop:6464+ j_s.d [blink]6565+ stb r12,[r5,0]6666+ARC_EXIT memcpy
+59
arch/arc/lib/memset.S
···11+/*22+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)33+ *44+ * This program is free software; you can redistribute it and/or modify55+ * it under the terms of the GNU General Public License version 2 as66+ * published by the Free Software Foundation.77+ */88+99+#include <asm/linkage.h>1010+1111+#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */1212+1313+ARC_ENTRY memset1414+ mov_s r4,r01515+ or r12,r0,r21616+ bmsk.f r12,r12,11717+ extb_s r1,r11818+ asl r3,r1,81919+ beq.d .Laligned2020+ or_s r1,r1,r32121+ brls r2,SMALL,.Ltiny2222+ add r3,r2,r02323+ stb r1,[r3,-1]2424+ bclr_s r3,r3,02525+ stw r1,[r3,-2]2626+ bmsk.f r12,r0,12727+ add_s r2,r2,r122828+ sub.ne r2,r2,42929+ stb.ab r1,[r4,1]3030+ and r4,r4,-23131+ stw.ab r1,[r4,2]3232+ and r4,r4,-43333+.Laligned: ; This code address should be aligned for speed.3434+ asl r3,r1,163535+ lsr.f lp_count,r2,23636+ or_s r1,r1,r33737+ lpne .Loop_end3838+ st.ab r1,[r4,4]3939+.Loop_end:4040+ j_s [blink]4141+4242+ .balign 44343+.Ltiny:4444+ mov.f lp_count,r24545+ lpne .Ltiny_end4646+ stb.ab r1,[r4,1]4747+.Ltiny_end:4848+ j_s [blink]4949+ARC_EXIT memset5050+5151+; memzero: @r0 = mem, @r1 = size_t5252+; memset: @r0 = mem, @r1 = char, @r2 = size_t5353+5454+ARC_ENTRY memzero5555+ ; adjust bzero args to memset args5656+ mov r2, r15757+ mov r1, 05858+ b memset ;tail call so need to tinker with blink5959+ARC_EXIT memzero
+123
arch/arc/lib/strchr-700.S
···11+/*22+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)33+ *44+ * This program is free software; you can redistribute it and/or modify55+ * it under the terms of the GNU General Public License version 2 as66+ * published by the Free Software Foundation.77+ */88+99+/* ARC700 has a relatively long pipeline and branch prediction, so we want1010+ to avoid branches that are hard to predict. On the other hand, the1111+ presence of the norm instruction makes it easier to operate on whole1212+ words branch-free. */1313+1414+#include <asm/linkage.h>1515+1616+ARC_ENTRY strchr1717+ extb_s r1,r11818+ asl r5,r1,81919+ bmsk r2,r0,12020+ or r5,r5,r12121+ mov_s r3,0x010101012222+ breq.d r2,r0,.Laligned2323+ asl r4,r5,162424+ sub_s r0,r0,r22525+ asl r7,r2,32626+ ld_s r2,[r0]2727+#ifdef __LITTLE_ENDIAN__2828+ asl r7,r3,r72929+#else3030+ lsr r7,r3,r73131+#endif3232+ or r5,r5,r43333+ ror r4,r33434+ sub r12,r2,r73535+ bic_s r12,r12,r23636+ and r12,r12,r43737+ brne.d r12,0,.Lfound0_ua3838+ xor r6,r2,r53939+ ld.a r2,[r0,4]4040+ sub r12,r6,r74141+ bic r12,r12,r64242+ and r7,r12,r44343+ breq r7,0,.Loop ; For speed, we want this branch to be unaligned.4444+ b .Lfound_char ; Likewise this one.4545+; /* We require this code address to be unaligned for speed... */4646+.Laligned:4747+ ld_s r2,[r0]4848+ or r5,r5,r44949+ ror r4,r35050+; /* ... so that this code address is aligned, for itself and ... */5151+.Loop:5252+ sub r12,r2,r35353+ bic_s r12,r12,r25454+ and r12,r12,r45555+ brne.d r12,0,.Lfound05656+ xor r6,r2,r55757+ ld.a r2,[r0,4]5858+ sub r12,r6,r35959+ bic r12,r12,r66060+ and r7,r12,r46161+ breq r7,0,.Loop /* ... so that this branch is unaligned. */6262+ ; Found searched-for character. r0 has already advanced to next word.6363+#ifdef __LITTLE_ENDIAN__6464+/* We only need the information about the first matching byte6565+ (i.e. the least significant matching byte) to be exact,6666+ hence there is no problem with carry effects. */6767+.Lfound_char:6868+ sub r3,r7,16969+ bic r3,r3,r77070+ norm r2,r37171+ sub_s r0,r0,17272+ asr_s r2,r2,37373+ j.d [blink]7474+ sub_s r0,r0,r27575+7676+ .balign 47777+.Lfound0_ua:7878+ mov r3,r77979+.Lfound0:8080+ sub r3,r6,r38181+ bic r3,r3,r68282+ and r2,r3,r48383+ or_s r12,r12,r28484+ sub_s r3,r12,18585+ bic_s r3,r3,r128686+ norm r3,r38787+ add_s r0,r0,38888+ asr_s r12,r3,38989+ asl.f 0,r2,r39090+ sub_s r0,r0,r129191+ j_s.d [blink]9292+ mov.pl r0,09393+#else /* BIG ENDIAN */9494+.Lfound_char:9595+ lsr r7,r7,79696+9797+ bic r2,r7,r69898+ norm r2,r29999+ sub_s r0,r0,4100100+ asr_s r2,r2,3101101+ j.d [blink]102102+ add_s r0,r0,r2103103+104104+.Lfound0_ua:105105+ mov_s r3,r7106106+.Lfound0:107107+ asl_s r2,r2,7108108+ or r7,r6,r4109109+ bic_s r12,r12,r2110110+ sub r2,r7,r3111111+ or r2,r2,r6112112+ bic r12,r2,r12113113+ bic.f r3,r4,r12114114+ norm r3,r3115115+116116+ add.pl r3,r3,1117117+ asr_s r12,r3,3118118+ asl.f 0,r2,r3119119+ add_s r0,r0,r12120120+ j_s.d [blink]121121+ mov.mi r0,0122122+#endif /* ENDIAN */123123+ARC_EXIT strchr
+96
arch/arc/lib/strcmp.S
···11+/*22+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)33+ *44+ * This program is free software; you can redistribute it and/or modify55+ * it under the terms of the GNU General Public License version 2 as66+ * published by the Free Software Foundation.77+ */88+99+/* This is optimized primarily for the ARC700.1010+ It would be possible to speed up the loops by one cycle / word1111+ respective one cycle / byte by forcing double source 1 alignment, unrolling1212+ by a factor of two, and speculatively loading the second word / byte of1313+ source 1; however, that would increase the overhead for loop setup / finish,1414+ and strcmp might often terminate early. */1515+1616+#include <asm/linkage.h>1717+1818+ARC_ENTRY strcmp1919+ or r2,r0,r12020+ bmsk_s r2,r2,12121+ brne r2,0,.Lcharloop2222+ mov_s r12,0x010101012323+ ror r5,r122424+.Lwordloop:2525+ ld.ab r2,[r0,4]2626+ ld.ab r3,[r1,4]2727+ nop_s2828+ sub r4,r2,r122929+ bic r4,r4,r23030+ and r4,r4,r53131+ brne r4,0,.Lfound03232+ breq r2,r3,.Lwordloop3333+#ifdef __LITTLE_ENDIAN__3434+ xor r0,r2,r3 ; mask for difference3535+ sub_s r1,r0,13636+ bic_s r0,r0,r1 ; mask for least significant difference bit3737+ sub r1,r5,r03838+ xor r0,r5,r1 ; mask for least significant difference byte3939+ and_s r2,r2,r04040+ and_s r3,r3,r04141+#endif /* LITTLE ENDIAN */4242+ cmp_s r2,r34343+ mov_s r0,14444+ j_s.d [blink]4545+ bset.lo r0,r0,314646+4747+ .balign 44848+#ifdef __LITTLE_ENDIAN__4949+.Lfound0:5050+ xor r0,r2,r3 ; mask for difference5151+ or r0,r0,r4 ; or in zero indicator5252+ sub_s r1,r0,15353+ bic_s r0,r0,r1 ; mask for least significant difference bit5454+ sub r1,r5,r05555+ xor r0,r5,r1 ; mask for least significant difference byte5656+ and_s r2,r2,r05757+ and_s r3,r3,r05858+ sub.f r0,r2,r35959+ mov.hi r0,16060+ j_s.d [blink]6161+ bset.lo r0,r0,316262+#else /* BIG ENDIAN */6363+ /* The zero-detection above can mis-detect 0x01 bytes as zeroes6464+ because of carry-propagateion from a lower significant zero byte.6565+ We can compensate for this by checking that bit0 is zero.6666+ This compensation is not necessary in the step where we6767+ get a low estimate for r2, because in any affected bytes6868+ we already have 0x00 or 0x01, which will remain unchanged6969+ when bit 7 is cleared. */7070+ .balign 47171+.Lfound0:7272+ lsr r0,r4,87373+ lsr_s r1,r27474+ bic_s r2,r2,r0 ; get low estimate for r2 and get ...7575+ bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>7676+ or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...7777+ cmp_s r3,r2 ; ... be independent of trailing garbage7878+ or_s r2,r2,r0 ; likewise for r3 > r27979+ bic_s r3,r3,r08080+ rlc r0,0 ; r0 := r2 > r3 ? 1 : 08181+ cmp_s r2,r38282+ j_s.d [blink]8383+ bset.lo r0,r0,318484+#endif /* ENDIAN */8585+8686+ .balign 48787+.Lcharloop:8888+ ldb.ab r2,[r0,1]8989+ ldb.ab r3,[r1,1]9090+ nop_s9191+ breq r2,0,.Lcmpend9292+ breq r2,r3,.Lcharloop9393+.Lcmpend:9494+ j_s.d [blink]9595+ sub r0,r2,r39696+ARC_EXIT strcmp
+70
arch/arc/lib/strcpy-700.S
···11+/*22+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)33+ *44+ * This program is free software; you can redistribute it and/or modify55+ * it under the terms of the GNU General Public License version 2 as66+ * published by the Free Software Foundation.77+ */88+99+/* If dst and src are 4 byte aligned, copy 8 bytes at a time.1010+ If the src is 4, but not 8 byte aligned, we first read 4 bytes to get1111+ it 8 byte aligned. Thus, we can do a little read-ahead, without1212+ dereferencing a cache line that we should not touch.1313+ Note that short and long instructions have been scheduled to avoid1414+ branch stalls.1515+ The beq_s to r3z could be made unaligned & long to avoid a stall1616+ there, but the it is not likely to be taken often, and it1717+ would also be likey to cost an unaligned mispredict at the next call. */1818+1919+#include <asm/linkage.h>2020+2121+ARC_ENTRY strcpy2222+ or r2,r0,r12323+ bmsk_s r2,r2,12424+ brne.d r2,0,charloop2525+ mov_s r10,r02626+ ld_s r3,[r1,0]2727+ mov r8,0x010101012828+ bbit0.d r1,2,loop_start2929+ ror r12,r83030+ sub r2,r3,r83131+ bic_s r2,r2,r33232+ tst_s r2,r123333+ bne r3z3434+ mov_s r4,r33535+ .balign 43636+loop:3737+ ld.a r3,[r1,4]3838+ st.ab r4,[r10,4]3939+loop_start:4040+ ld.a r4,[r1,4]4141+ sub r2,r3,r84242+ bic_s r2,r2,r34343+ tst_s r2,r124444+ bne_s r3z4545+ st.ab r3,[r10,4]4646+ sub r2,r4,r84747+ bic r2,r2,r44848+ tst r2,r124949+ beq loop5050+ mov_s r3,r45151+#ifdef __LITTLE_ENDIAN__5252+r3z: bmsk.f r1,r3,75353+ lsr_s r3,r3,85454+#else5555+r3z: lsr.f r1,r3,245656+ asl_s r3,r3,85757+#endif5858+ bne.d r3z5959+ stb.ab r1,[r10,1]6060+ j_s [blink]6161+6262+ .balign 46363+charloop:6464+ ldb.ab r3,[r1,1]6565+6666+6767+ brne.d r3,0,charloop6868+ stb.ab r3,[r10,1]6969+ j [blink]7070+ARC_EXIT strcpy
+83
arch/arc/lib/strlen.S
···11+/*22+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)33+ *44+ * This program is free software; you can redistribute it and/or modify55+ * it under the terms of the GNU General Public License version 2 as66+ * published by the Free Software Foundation.77+ */88+99+#include <asm/linkage.h>1010+1111+ARC_ENTRY strlen1212+ or r3,r0,71313+ ld r2,[r3,-7]1414+ ld.a r6,[r3,-3]1515+ mov r4,0x010101011616+ ; uses long immediate1717+#ifdef __LITTLE_ENDIAN__1818+ asl_s r1,r0,31919+ btst_s r0,22020+ asl r7,r4,r12121+ ror r5,r42222+ sub r1,r2,r72323+ bic_s r1,r1,r22424+ mov.eq r7,r42525+ sub r12,r6,r72626+ bic r12,r12,r62727+ or.eq r12,r12,r12828+ and r12,r12,r52929+ brne r12,0,.Learly_end3030+#else /* BIG ENDIAN */3131+ ror r5,r43232+ btst_s r0,23333+ mov_s r1,313434+ sub3 r7,r1,r03535+ sub r1,r2,r43636+ bic_s r1,r1,r23737+ bmsk r1,r1,r73838+ sub r12,r6,r43939+ bic r12,r12,r64040+ bmsk.ne r12,r12,r74141+ or.eq r12,r12,r14242+ and r12,r12,r54343+ brne r12,0,.Learly_end4444+#endif /* ENDIAN */4545+4646+.Loop:4747+ ld_s r2,[r3,4]4848+ ld.a r6,[r3,8]4949+ ; stall for load result5050+ sub r1,r2,r45151+ bic_s r1,r1,r25252+ sub r12,r6,r45353+ bic r12,r12,r65454+ or r12,r12,r15555+ and r12,r12,r55656+ breq r12,0,.Loop5757+.Lend:5858+ and.f r1,r1,r55959+ sub.ne r3,r3,46060+ mov.eq r1,r126161+#ifdef __LITTLE_ENDIAN__6262+ sub_s r2,r1,16363+ bic_s r2,r2,r16464+ norm r1,r26565+ sub_s r0,r0,36666+ lsr_s r1,r1,36767+ sub r0,r3,r06868+ j_s.d [blink]6969+ sub r0,r0,r17070+#else /* BIG ENDIAN */7171+ lsr_s r1,r1,77272+ mov.eq r2,r67373+ bic_s r1,r1,r27474+ norm r1,r17575+ sub r0,r3,r07676+ lsr_s r1,r1,37777+ j_s.d [blink]7878+ add r0,r0,r17979+#endif /* ENDIAN */8080+.Learly_end:8181+ b.d .Lend8282+ sub_s.ne r1,r1,r18383+ARC_EXIT strlen