Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc: memcpy optimization for 64bit LE

Unaligned stores take alignment exceptions on POWER7 running in little-endian.
This is a dumb little-endian base memcpy that prevents unaligned stores.
Once booted the feature fixup code switches over to the VMX copy loops
(which are already endian safe).

The question is what we do before that switch over. The base 64bit
memcpy takes alignment exceptions on POWER7 so we can't use it as is.
Fixing the causes of alignment exception would slow it down, because
we'd need to ensure all loads and stores are aligned either through
rotate tricks or bytewise loads and stores. Either would be bad for
all other 64bit platforms.

[ I simplified the loop a bit - Anton ]

Signed-off-by: Philippe Bergheaud <felix@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

authored by

Philippe Bergheaud and committed by
Benjamin Herrenschmidt
00f554fa 48ce3b7c

+16 -8
-4
arch/powerpc/include/asm/string.h
··· 10 10 #define __HAVE_ARCH_STRNCMP 11 11 #define __HAVE_ARCH_STRCAT 12 12 #define __HAVE_ARCH_MEMSET 13 - #ifdef __BIG_ENDIAN__ 14 13 #define __HAVE_ARCH_MEMCPY 15 - #endif 16 14 #define __HAVE_ARCH_MEMMOVE 17 15 #define __HAVE_ARCH_MEMCMP 18 16 #define __HAVE_ARCH_MEMCHR ··· 22 24 extern int strncmp(const char *, const char *, __kernel_size_t); 23 25 extern char * strcat(char *, const char *); 24 26 extern void * memset(void *,int,__kernel_size_t); 25 - #ifdef __BIG_ENDIAN__ 26 27 extern void * memcpy(void *,const void *,__kernel_size_t); 27 - #endif 28 28 extern void * memmove(void *,const void *,__kernel_size_t); 29 29 extern int memcmp(const void *,const void *,__kernel_size_t); 30 30 extern void * memchr(const void *,int,__kernel_size_t);
-2
arch/powerpc/kernel/ppc_ksyms.c
··· 155 155 #endif 156 156 long long __bswapdi2(long long); 157 157 EXPORT_SYMBOL(__bswapdi2); 158 - #ifdef __BIG_ENDIAN__ 159 158 EXPORT_SYMBOL(memcpy); 160 - #endif 161 159 EXPORT_SYMBOL(memset); 162 160 EXPORT_SYMBOL(memmove); 163 161 EXPORT_SYMBOL(memcmp);
-2
arch/powerpc/lib/Makefile
··· 23 23 obj-$(CONFIG_PPC64) += checksum_wrappers_64.o 24 24 endif 25 25 26 - ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),) 27 26 obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o 28 - endif 29 27 30 28 obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o 31 29
+16
arch/powerpc/lib/memcpy_64.S
··· 12 12 .align 7 13 13 _GLOBAL(memcpy) 14 14 BEGIN_FTR_SECTION 15 + #ifdef __LITTLE_ENDIAN__ 16 + cmpdi cr7,r5,0 17 + #else 15 18 std r3,48(r1) /* save destination pointer for return value */ 19 + #endif 16 20 FTR_SECTION_ELSE 17 21 #ifndef SELFTEST 18 22 b memcpy_power7 19 23 #endif 20 24 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 25 + #ifdef __LITTLE_ENDIAN__ 26 + /* dumb little-endian memcpy that will get replaced at runtime */ 27 + addi r9,r3,-1 28 + addi r4,r4,-1 29 + beqlr cr7 30 + mtctr r5 31 + 1: lbzu r10,1(r4) 32 + stbu r10,1(r9) 33 + bdnz 1b 34 + blr 35 + #else 21 36 PPC_MTOCRF(0x01,r5) 22 37 cmpldi cr1,r5,16 23 38 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry ··· 218 203 stb r0,0(r3) 219 204 4: ld r3,48(r1) /* return dest pointer */ 220 205 blr 206 + #endif