Blackfin: optimize strncpy a bit

Add a little strncpy optimization which can easily cut boot time by 20%.

When the kernel is booting with initramfs, it builds up the filesystem
from a cpio archive by calling strncpy_from_user() via fs/namei.c's
do_getname() on every file in the archive (which can be lots) with a
length of PATH_MAX (1024). This causes the dest of the strncpy to be
padded with many NUL bytes.

This optimization mostly causes these NUL bytes to be padded with a call
to memset() which is already optimized for filling memory quickly, but
the hardware loop helps a little bit as well.

Boot time measured with 'loglevel=0' so UART speed doesn't get in the way.

Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

authored by Robin Getz and committed by Mike Frysinger 648eee52 c70dcabc

+47 -13
+1
arch/blackfin/lib/memset.S
··· 20 * R1 = filler byte 21 * R2 = count 22 * Favours word aligned data. 23 */ 24 25 ENTRY(_memset)
··· 20 * R1 = filler byte 21 * R2 = count 22 * Favours word aligned data. 23 + * The strncpy assumes that I0 and I1 are not used in this function 24 */ 25 26 ENTRY(_memset)
+46 -13
arch/blackfin/lib/strncpy.S
··· 5 */ 6 7 #include <linux/linkage.h> 8 9 /* void *strncpy(char *dest, const char *src, size_t n); 10 * R0 = address (dest) 11 * R1 = address (src) 12 * R2 = size 13 - * Returns a pointer to the destination string dest 14 */ 15 16 #ifdef CONFIG_STRNCPY_L1 ··· 26 ENTRY(_strncpy) 27 CC = R2 == 0; 28 if CC JUMP 4f; 29 P0 = R0 ; /* dst*/ 30 P1 = R1 ; /* src*/ 31 32 1: 33 R1 = B [P1++] (Z); 34 B [P0++] = R1; 35 - CC = R1; 36 - if ! cc jump 2f; 37 - R2 += -1; 38 - CC = R2 == 0; 39 - if ! cc jump 1b (bp); 40 - jump 4f; 41 2: 42 - /* if src is shorter than n, we need to null pad bytes in dest */ 43 - R1 = 0; 44 3: 45 R2 += -1; 46 - CC = R2 == 0; 47 - if cc jump 4f; 48 - B [P0++] = R1; 49 - jump 3b; 50 51 4: 52 RTS; 53 54 ENDPROC(_strncpy)
··· 5 */ 6 7 #include <linux/linkage.h> 8 + #include <asm/context.S> 9 10 /* void *strncpy(char *dest, const char *src, size_t n); 11 * R0 = address (dest) 12 * R1 = address (src) 13 * R2 = size 14 + * Returns a pointer (R0) to the destination string dest 15 + * we do this by not changing R0 16 */ 17 18 #ifdef CONFIG_STRNCPY_L1 ··· 24 ENTRY(_strncpy) 25 CC = R2 == 0; 26 if CC JUMP 4f; 27 + 28 + P2 = R2 ; /* size */ 29 P0 = R0 ; /* dst*/ 30 P1 = R1 ; /* src*/ 31 32 + LSETUP (1f, 2f) LC0 = P2; 33 1: 34 R1 = B [P1++] (Z); 35 B [P0++] = R1; 36 + CC = R1 == 0; 37 2: 38 + if CC jump 3f; 39 + 40 + RTS; 41 + 42 + /* if src is shorter than n, we need to null pad bytes in dest 43 + * but, we can get here when the last byte is zero, and we don't 44 + * want to copy an extra byte at the end, so we need to check 45 + */ 46 3: 47 + R2 = LC0; 48 + CC = R2 49 + if ! CC jump 6f; 50 + 51 + /* if the required null padded portion is small, do it here, rather than 52 + * handling the overhead of memset (which is OK when things are big). 53 + */ 54 + R3 = 0x20; 55 + CC = R2 < R3; 56 + IF CC jump 4f; 57 + 58 R2 += -1; 59 + 60 + /* Set things up for memset 61 + * R0 = address 62 + * R1 = filler byte (this case it's zero, set above) 63 + * R2 = count (set above) 64 + */ 65 + 66 + I1 = R0; 67 + R0 = RETS; 68 + I0 = R0; 69 + R0 = P0; 70 + pseudo_long_call _memset, p0; 71 + R0 = I0; 72 + RETS = R0; 73 + R0 = I1; 74 + RTS; 75 76 4: 77 + LSETUP(5f, 5f) LC0; 78 + 5: 79 + B [P0++] = R1; 80 + 6: 81 RTS; 82 83 ENDPROC(_strncpy)