Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86: Stop using weak symbols for __iowrite32_copy()

Start switching iomap_copy routines over to use #define and arch provided
inline/macro functions instead of weak symbols.

Inline functions allow more compiler optimization and this is often a
driver hot path.

x86 has the only weak implementation for __iowrite32_copy(), so replace it
with a static inline containing the same single instruction inline
assembly. The compiler will generate the "mov edx,ecx" in a more optimal
way.

Remove iomap_copy_64.S

Link: https://lore.kernel.org/r/1-v3-1893cd8b9369+1925-mlx5_arm_wc_jgg@nvidia.com
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

+24 -20
+17
arch/x86/include/asm/io.h
··· 209 209 #define memcpy_toio memcpy_toio 210 210 #define memset_io memset_io 211 211 212 + #ifdef CONFIG_X86_64 213 + /* 214 + * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for 215 + * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy 216 + * loop. 217 + */ 218 + static inline void __iowrite32_copy(void __iomem *to, const void *from, 219 + size_t count) 220 + { 221 + asm volatile("rep ; movsl" 222 + : "=&c"(count), "=&D"(to), "=&S"(from) 223 + : "0"(count), "1"(to), "2"(from) 224 + : "memory"); 225 + } 226 + #define __iowrite32_copy __iowrite32_copy 227 + #endif 228 + 212 229 /* 213 230 * ISA space is 'always mapped' on a typical x86 system, no need to 214 231 * explicitly ioremap() it. The fact that the ISA IO space is mapped
-1
arch/x86/lib/Makefile
··· 53 53 lib-y += atomic64_386_32.o 54 54 endif 55 55 else 56 - obj-y += iomap_copy_64.o 57 56 ifneq ($(CONFIG_GENERIC_CSUM),y) 58 57 lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o 59 58 endif
-15
arch/x86/lib/iomap_copy_64.S
··· 1 - /* SPDX-License-Identifier: GPL-2.0-only */ 2 - /* 3 - * Copyright 2006 PathScale, Inc. All Rights Reserved. 4 - */ 5 - 6 - #include <linux/linkage.h> 7 - 8 - /* 9 - * override generic version in lib/iomap_copy.c 10 - */ 11 - SYM_FUNC_START(__iowrite32_copy) 12 - movl %edx,%ecx 13 - rep movsl 14 - RET 15 - SYM_FUNC_END(__iowrite32_copy)
+4 -1
include/linux/io.h
··· 16 16 struct device; 17 17 struct resource; 18 18 19 - __visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count); 19 + #ifndef __iowrite32_copy 20 + void __iowrite32_copy(void __iomem *to, const void *from, size_t count); 21 + #endif 22 + 20 23 void __ioread32_copy(void *to, const void __iomem *from, size_t count); 21 24 void __iowrite64_copy(void __iomem *to, const void *from, size_t count); 22 25
+3 -3
lib/iomap_copy.c
··· 16 16 * time. Order of access is not guaranteed, nor is a memory barrier 17 17 * performed afterwards. 18 18 */ 19 - void __attribute__((weak)) __iowrite32_copy(void __iomem *to, 20 - const void *from, 21 - size_t count) 19 + #ifndef __iowrite32_copy 20 + void __iowrite32_copy(void __iomem *to, const void *from, size_t count) 22 21 { 23 22 u32 __iomem *dst = to; 24 23 const u32 *src = from; ··· 27 28 __raw_writel(*src++, dst++); 28 29 } 29 30 EXPORT_SYMBOL_GPL(__iowrite32_copy); 31 + #endif 30 32 31 33 /** 32 34 * __ioread32_copy - copy data from MMIO space, in 32-bit units