Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Fix constant folding and poor optimization in byte swapping code

Constant folding does not work for the swabXX() byte swapping functions,
and the C versions optimize poorly.

Attempting to initialize a global variable to swab16(0x1234) or put
something like "case swab32(42):" in a switch statement will not compile.
It can work, swab.h just isn't doing it correctly. This patch fixes that.

Contrary to the comment in asm-i386/byteorder.h, gcc does not recognize the
"C" version of swab16 and turn it into efficient code. gcc can do this,
just not with the current code. The simple function:

u16 foo(u16 x) { return swab16(x); }

Would compile to:
movzwl %ax, %eax
movl %eax, %edx
shrl $8, %eax
sall $8, %edx
orl %eax, %edx

With this patch, it will compile to:
rolw $8, %ax

I also attempted to document the maze different macros/inline functions
that are used to create the final product.

Signed-off-by: Trent Piepho <xyzzy@speakeasy.org>
Cc: Francois-Rene Rideau <fare@tunes.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Trent Piepho and committed by
Linus Torvalds
8e2c2002 02fb6149

+69 -39
+69 -39
include/linux/byteorder/swab.h
··· 10 10 * separated swab functions from cpu_to_XX, 11 11 * to clean up support for bizarre-endian architectures. 12 12 * 13 + * Trent Piepho <xyzzy@speakeasy.org> 2007114 14 + * make constant-folding work, provide C versions that 15 + * gcc can optimize better, explain different versions 16 + * 13 17 * See asm-i386/byteorder.h and suches for examples of how to provide 14 18 * architecture-dependent optimized versions 15 19 * ··· 21 17 22 18 #include <linux/compiler.h> 23 19 20 + /* Functions/macros defined, there are a lot: 21 + * 22 + * ___swabXX 23 + * Generic C versions of the swab functions. 24 + * 25 + * ___constant_swabXX 26 + * C versions that gcc can fold into a compile-time constant when 27 + * the argument is a compile-time constant. 28 + * 29 + * __arch__swabXX[sp]? 30 + * Architecture optimized versions of all the swab functions 31 + * (including the s and p versions). These can be defined in 32 + * asm-arch/byteorder.h. Any which are not, are defined here. 33 + * __arch__swabXXs() is defined in terms of __arch__swabXXp(), which 34 + * is defined in terms of __arch__swabXX(), which is in turn defined 35 + * in terms of ___swabXX(x). 36 + * These must be macros. They may be unsafe for arguments with 37 + * side-effects. 38 + * 39 + * __fswabXX 40 + * Inline function versions of the __arch__ macros. These _are_ safe 41 + * if the arguments have side-effects. Note there are no s and p 42 + * versions of these. 43 + * 44 + * __swabXX[sb] 45 + * There are the ones you should actually use. The __swabXX versions 46 + * will be a constant given a constant argument and use the arch 47 + * specific code (if any) for non-constant arguments. The s and p 48 + * versions always use the arch specific code (constant folding 49 + * doesn't apply). They are safe to use with arguments with 50 + * side-effects. 51 + * 52 + * swabXX[sb] 53 + * Nicknames for __swabXX[sb] to use in the kernel. 54 + */ 55 + 24 56 /* casts are necessary for constants, because we never know how for sure 25 57 * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way. 26 58 */ 27 - #define ___swab16(x) \ 28 - ({ \ 29 - __u16 __x = (x); \ 30 - ((__u16)( \ 31 - (((__u16)(__x) & (__u16)0x00ffU) << 8) | \ 32 - (((__u16)(__x) & (__u16)0xff00U) >> 8) )); \ 33 - }) 34 59 35 - #define ___swab32(x) \ 36 - ({ \ 37 - __u32 __x = (x); \ 38 - ((__u32)( \ 39 - (((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \ 40 - (((__u32)(__x) & (__u32)0x0000ff00UL) << 8) | \ 41 - (((__u32)(__x) & (__u32)0x00ff0000UL) >> 8) | \ 42 - (((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \ 43 - }) 44 - 45 - #define ___swab64(x) \ 46 - ({ \ 47 - __u64 __x = (x); \ 48 - ((__u64)( \ 49 - (__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \ 50 - (__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \ 51 - (__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \ 52 - (__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) << 8) | \ 53 - (__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >> 8) | \ 54 - (__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \ 55 - (__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \ 56 - (__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \ 57 - }) 60 + static __inline__ __attribute_const__ __u16 ___swab16(__u16 x) 61 + { 62 + return x<<8 | x>>8; 63 + } 64 + static __inline__ __attribute_const__ __u32 ___swab32(__u32 x) 65 + { 66 + return x<<24 | x>>24 | 67 + (x & (__u32)0x0000ff00UL)<<8 | 68 + (x & (__u32)0x00ff0000UL)>>8; 69 + } 70 + static __inline__ __attribute_const__ __u64 ___swab64(__u64 x) 71 + { 72 + return x<<56 | x>>56 | 73 + (x & (__u64)0x000000000000ff00ULL)<<40 | 74 + (x & (__u64)0x0000000000ff0000ULL)<<24 | 75 + (x & (__u64)0x00000000ff000000ULL)<< 8 | 76 + (x & (__u64)0x000000ff00000000ULL)>> 8 | 77 + (x & (__u64)0x0000ff0000000000ULL)>>24 | 78 + (x & (__u64)0x00ff000000000000ULL)>>40; 79 + } 58 80 59 81 #define ___constant_swab16(x) \ 60 82 ((__u16)( \ ··· 107 77 * provide defaults when no architecture-specific optimization is detected 108 78 */ 109 79 #ifndef __arch__swab16 110 - # define __arch__swab16(x) ({ __u16 __tmp = (x) ; ___swab16(__tmp); }) 80 + # define __arch__swab16(x) ___swab16(x) 111 81 #endif 112 82 #ifndef __arch__swab32 113 - # define __arch__swab32(x) ({ __u32 __tmp = (x) ; ___swab32(__tmp); }) 83 + # define __arch__swab32(x) ___swab32(x) 114 84 #endif 115 85 #ifndef __arch__swab64 116 - # define __arch__swab64(x) ({ __u64 __tmp = (x) ; ___swab64(__tmp); }) 86 + # define __arch__swab64(x) ___swab64(x) 117 87 #endif 118 88 119 89 #ifndef __arch__swab16p ··· 127 97 #endif 128 98 129 99 #ifndef __arch__swab16s 130 - # define __arch__swab16s(x) do { *(x) = __arch__swab16p((x)); } while (0) 100 + # define __arch__swab16s(x) ((void)(*(x) = __arch__swab16p(x))) 131 101 #endif 132 102 #ifndef __arch__swab32s 133 - # define __arch__swab32s(x) do { *(x) = __arch__swab32p((x)); } while (0) 103 + # define __arch__swab32s(x) ((void)(*(x) = __arch__swab32p(x))) 134 104 #endif 135 105 #ifndef __arch__swab64s 136 - # define __arch__swab64s(x) do { *(x) = __arch__swab64p((x)); } while (0) 106 + # define __arch__swab64s(x) ((void)(*(x) = __arch__swab64p(x))) 137 107 #endif 138 108 139 109 ··· 143 113 #if defined(__GNUC__) && defined(__OPTIMIZE__) 144 114 # define __swab16(x) \ 145 115 (__builtin_constant_p((__u16)(x)) ? \ 146 - ___swab16((x)) : \ 116 + ___constant_swab16((x)) : \ 147 117 __fswab16((x))) 148 118 # define __swab32(x) \ 149 119 (__builtin_constant_p((__u32)(x)) ? \ 150 - ___swab32((x)) : \ 120 + ___constant_swab32((x)) : \ 151 121 __fswab32((x))) 152 122 # define __swab64(x) \ 153 123 (__builtin_constant_p((__u64)(x)) ? \ 154 - ___swab64((x)) : \ 124 + ___constant_swab64((x)) : \ 155 125 __fswab64((x))) 156 126 #else 157 127 # define __swab16(x) __fswab16(x)