Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'core-hweight-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'core-hweight-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86, hweight: Use a 32-bit popcnt for __arch_hweight32()
arch, hweight: Fix compilation errors
x86: Add optimized popcnt variants
bitops: Optimize hweight() by making use of compile-time evaluation

+188 -62
+10 -8
arch/alpha/include/asm/bitops.h
··· 405 405 406 406 #if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67) 407 407 /* Whee. EV67 can calculate it directly. */ 408 - static inline unsigned long hweight64(unsigned long w) 408 + static inline unsigned long __arch_hweight64(unsigned long w) 409 409 { 410 410 return __kernel_ctpop(w); 411 411 } 412 412 413 - static inline unsigned int hweight32(unsigned int w) 413 + static inline unsigned int __arch_weight32(unsigned int w) 414 414 { 415 - return hweight64(w); 415 + return __arch_hweight64(w); 416 416 } 417 417 418 - static inline unsigned int hweight16(unsigned int w) 418 + static inline unsigned int __arch_hweight16(unsigned int w) 419 419 { 420 - return hweight64(w & 0xffff); 420 + return __arch_hweight64(w & 0xffff); 421 421 } 422 422 423 - static inline unsigned int hweight8(unsigned int w) 423 + static inline unsigned int __arch_hweight8(unsigned int w) 424 424 { 425 - return hweight64(w & 0xff); 425 + return __arch_hweight64(w & 0xff); 426 426 } 427 427 #else 428 - #include <asm-generic/bitops/hweight.h> 428 + #include <asm-generic/bitops/arch_hweight.h> 429 429 #endif 430 + 431 + #include <asm-generic/bitops/const_hweight.h> 430 432 431 433 #endif /* __KERNEL__ */ 432 434
+6 -5
arch/ia64/include/asm/bitops.h
··· 437 437 * hweightN: returns the hamming weight (i.e. the number 438 438 * of bits set) of a N-bit word 439 439 */ 440 - static __inline__ unsigned long 441 - hweight64 (unsigned long x) 440 + static __inline__ unsigned long __arch_hweight64(unsigned long x) 442 441 { 443 442 unsigned long result; 444 443 result = ia64_popcnt(x); 445 444 return result; 446 445 } 447 446 448 - #define hweight32(x) (unsigned int) hweight64((x) & 0xfffffffful) 449 - #define hweight16(x) (unsigned int) hweight64((x) & 0xfffful) 450 - #define hweight8(x) (unsigned int) hweight64((x) & 0xfful) 447 + #define __arch_hweight32(x) ((unsigned int) __arch_hweight64((x) & 0xfffffffful)) 448 + #define __arch_hweight16(x) ((unsigned int) __arch_hweight64((x) & 0xfffful)) 449 + #define __arch_hweight8(x) ((unsigned int) __arch_hweight64((x) & 0xfful)) 450 + 451 + #include <asm-generic/bitops/const_hweight.h> 451 452 452 453 #endif /* __KERNEL__ */ 453 454
+6 -5
arch/sparc/include/asm/bitops_64.h
··· 44 44 45 45 #ifdef ULTRA_HAS_POPULATION_COUNT 46 46 47 - static inline unsigned int hweight64(unsigned long w) 47 + static inline unsigned int __arch_hweight64(unsigned long w) 48 48 { 49 49 unsigned int res; 50 50 ··· 52 52 return res; 53 53 } 54 54 55 - static inline unsigned int hweight32(unsigned int w) 55 + static inline unsigned int __arch_hweight32(unsigned int w) 56 56 { 57 57 unsigned int res; 58 58 ··· 60 60 return res; 61 61 } 62 62 63 - static inline unsigned int hweight16(unsigned int w) 63 + static inline unsigned int __arch_hweight16(unsigned int w) 64 64 { 65 65 unsigned int res; 66 66 ··· 68 68 return res; 69 69 } 70 70 71 - static inline unsigned int hweight8(unsigned int w) 71 + static inline unsigned int __arch_hweight8(unsigned int w) 72 72 { 73 73 unsigned int res; 74 74 ··· 78 78 79 79 #else 80 80 81 - #include <asm-generic/bitops/hweight.h> 81 + #include <asm-generic/bitops/arch_hweight.h> 82 82 83 83 #endif 84 + #include <asm-generic/bitops/const_hweight.h> 84 85 #include <asm-generic/bitops/lock.h> 85 86 #endif /* __KERNEL__ */ 86 87
+5
arch/x86/Kconfig
··· 237 237 def_bool y 238 238 depends on X86_32 && !CC_STACKPROTECTOR 239 239 240 + config ARCH_HWEIGHT_CFLAGS 241 + string 242 + default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 243 + default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 244 + 240 245 config KTIME_SCALAR 241 246 def_bool X86_32 242 247 source "init/Kconfig"
+6 -3
arch/x86/include/asm/alternative.h
··· 42 42 #define LOCK_PREFIX "" 43 43 #endif 44 44 45 - /* This must be included *after* the definition of LOCK_PREFIX */ 46 - #include <asm/cpufeature.h> 47 - 48 45 struct alt_instr { 49 46 u8 *instr; /* original instruction */ 50 47 u8 *replacement; ··· 94 97 ".section .altinstr_replacement, \"ax\"\n" \ 95 98 "663:\n\t" newinstr "\n664:\n" /* replacement */ \ 96 99 ".previous" 100 + 101 + /* 102 + * This must be included *after* the definition of ALTERNATIVE due to 103 + * <asm/arch_hweight.h> 104 + */ 105 + #include <asm/cpufeature.h> 97 106 98 107 /* 99 108 * Alternative instructions for different CPU types or capabilities.
+61
arch/x86/include/asm/arch_hweight.h
··· 1 + #ifndef _ASM_X86_HWEIGHT_H 2 + #define _ASM_X86_HWEIGHT_H 3 + 4 + #ifdef CONFIG_64BIT 5 + /* popcnt %edi, %eax -- redundant REX prefix for alignment */ 6 + #define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" 7 + /* popcnt %rdi, %rax */ 8 + #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" 9 + #define REG_IN "D" 10 + #define REG_OUT "a" 11 + #else 12 + /* popcnt %eax, %eax */ 13 + #define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0" 14 + #define REG_IN "a" 15 + #define REG_OUT "a" 16 + #endif 17 + 18 + /* 19 + * __sw_hweightXX are called from within the alternatives below 20 + * and callee-clobbered registers need to be taken care of. See 21 + * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective 22 + * compiler switches. 23 + */ 24 + static inline unsigned int __arch_hweight32(unsigned int w) 25 + { 26 + unsigned int res = 0; 27 + 28 + asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) 29 + : "="REG_OUT (res) 30 + : REG_IN (w)); 31 + 32 + return res; 33 + } 34 + 35 + static inline unsigned int __arch_hweight16(unsigned int w) 36 + { 37 + return __arch_hweight32(w & 0xffff); 38 + } 39 + 40 + static inline unsigned int __arch_hweight8(unsigned int w) 41 + { 42 + return __arch_hweight32(w & 0xff); 43 + } 44 + 45 + static inline unsigned long __arch_hweight64(__u64 w) 46 + { 47 + unsigned long res = 0; 48 + 49 + #ifdef CONFIG_X86_32 50 + return __arch_hweight32((u32)w) + 51 + __arch_hweight32((u32)(w >> 32)); 52 + #else 53 + asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) 54 + : "="REG_OUT (res) 55 + : REG_IN (w)); 56 + #endif /* CONFIG_X86_32 */ 57 + 58 + return res; 59 + } 60 + 61 + #endif
+3 -1
arch/x86/include/asm/bitops.h
··· 444 444 445 445 #define ARCH_HAS_FAST_MULTIPLIER 1 446 446 447 - #include <asm-generic/bitops/hweight.h> 447 + #include <asm/arch_hweight.h> 448 + 449 + #include <asm-generic/bitops/const_hweight.h> 448 450 449 451 #endif /* __KERNEL__ */ 450 452
+25
include/asm-generic/bitops/arch_hweight.h
··· 1 + #ifndef _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_ 2 + #define _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_ 3 + 4 + #include <asm/types.h> 5 + 6 + static inline unsigned int __arch_hweight32(unsigned int w) 7 + { 8 + return __sw_hweight32(w); 9 + } 10 + 11 + static inline unsigned int __arch_hweight16(unsigned int w) 12 + { 13 + return __sw_hweight16(w); 14 + } 15 + 16 + static inline unsigned int __arch_hweight8(unsigned int w) 17 + { 18 + return __sw_hweight8(w); 19 + } 20 + 21 + static inline unsigned long __arch_hweight64(__u64 w) 22 + { 23 + return __sw_hweight64(w); 24 + } 25 + #endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
+42
include/asm-generic/bitops/const_hweight.h
··· 1 + #ifndef _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ 2 + #define _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ 3 + 4 + /* 5 + * Compile time versions of __arch_hweightN() 6 + */ 7 + #define __const_hweight8(w) \ 8 + ( (!!((w) & (1ULL << 0))) + \ 9 + (!!((w) & (1ULL << 1))) + \ 10 + (!!((w) & (1ULL << 2))) + \ 11 + (!!((w) & (1ULL << 3))) + \ 12 + (!!((w) & (1ULL << 4))) + \ 13 + (!!((w) & (1ULL << 5))) + \ 14 + (!!((w) & (1ULL << 6))) + \ 15 + (!!((w) & (1ULL << 7))) ) 16 + 17 + #define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8 )) 18 + #define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16)) 19 + #define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32)) 20 + 21 + /* 22 + * Generic interface. 23 + */ 24 + #define hweight8(w) (__builtin_constant_p(w) ? __const_hweight8(w) : __arch_hweight8(w)) 25 + #define hweight16(w) (__builtin_constant_p(w) ? __const_hweight16(w) : __arch_hweight16(w)) 26 + #define hweight32(w) (__builtin_constant_p(w) ? __const_hweight32(w) : __arch_hweight32(w)) 27 + #define hweight64(w) (__builtin_constant_p(w) ? __const_hweight64(w) : __arch_hweight64(w)) 28 + 29 + /* 30 + * Interface for known constant arguments 31 + */ 32 + #define HWEIGHT8(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight8(w)) 33 + #define HWEIGHT16(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight16(w)) 34 + #define HWEIGHT32(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight32(w)) 35 + #define HWEIGHT64(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight64(w)) 36 + 37 + /* 38 + * Type invariant interface to the compile time constant hweight functions. 39 + */ 40 + #define HWEIGHT(w) HWEIGHT64((u64)w) 41 + 42 + #endif /* _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ */
+2 -6
include/asm-generic/bitops/hweight.h
··· 1 1 #ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_ 2 2 #define _ASM_GENERIC_BITOPS_HWEIGHT_H_ 3 3 4 - #include <asm/types.h> 5 - 6 - extern unsigned int hweight32(unsigned int w); 7 - extern unsigned int hweight16(unsigned int w); 8 - extern unsigned int hweight8(unsigned int w); 9 - extern unsigned long hweight64(__u64 w); 4 + #include <asm-generic/bitops/arch_hweight.h> 5 + #include <asm-generic/bitops/const_hweight.h> 10 6 11 7 #endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
+5 -25
include/linux/bitops.h
··· 10 10 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) 11 11 #endif 12 12 13 + extern unsigned int __sw_hweight8(unsigned int w); 14 + extern unsigned int __sw_hweight16(unsigned int w); 15 + extern unsigned int __sw_hweight32(unsigned int w); 16 + extern unsigned long __sw_hweight64(__u64 w); 17 + 13 18 /* 14 19 * Include this here because some architectures need generic_ffs/fls in 15 20 * scope ··· 48 43 { 49 44 return sizeof(w) == 4 ? hweight32(w) : hweight64(w); 50 45 } 51 - 52 - /* 53 - * Clearly slow versions of the hweightN() functions, their benefit is 54 - * of course compile time evaluation of constant arguments. 55 - */ 56 - #define HWEIGHT8(w) \ 57 - ( BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + \ 58 - (!!((w) & (1ULL << 0))) + \ 59 - (!!((w) & (1ULL << 1))) + \ 60 - (!!((w) & (1ULL << 2))) + \ 61 - (!!((w) & (1ULL << 3))) + \ 62 - (!!((w) & (1ULL << 4))) + \ 63 - (!!((w) & (1ULL << 5))) + \ 64 - (!!((w) & (1ULL << 6))) + \ 65 - (!!((w) & (1ULL << 7))) ) 66 - 67 - #define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8((w) >> 8)) 68 - #define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16)) 69 - #define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32)) 70 - 71 - /* 72 - * Type invariant version that simply casts things to the 73 - * largest type. 74 - */ 75 - #define HWEIGHT(w) HWEIGHT64((u64)(w)) 76 46 77 47 /** 78 48 * rol32 - rotate a 32-bit value left
+3
lib/Makefile
··· 39 39 lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o 40 40 lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o 41 41 obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o 42 + 43 + CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) 42 44 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o 45 + 43 46 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o 44 47 obj-$(CONFIG_BTREE) += btree.o 45 48 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
+10 -9
lib/hweight.c
··· 9 9 * The Hamming Weight of a number is the total number of bits set in it. 10 10 */ 11 11 12 - unsigned int hweight32(unsigned int w) 12 + unsigned int __sw_hweight32(unsigned int w) 13 13 { 14 14 #ifdef ARCH_HAS_FAST_MULTIPLIER 15 15 w -= (w >> 1) & 0x55555555; ··· 24 24 return (res + (res >> 16)) & 0x000000FF; 25 25 #endif 26 26 } 27 - EXPORT_SYMBOL(hweight32); 27 + EXPORT_SYMBOL(__sw_hweight32); 28 28 29 - unsigned int hweight16(unsigned int w) 29 + unsigned int __sw_hweight16(unsigned int w) 30 30 { 31 31 unsigned int res = w - ((w >> 1) & 0x5555); 32 32 res = (res & 0x3333) + ((res >> 2) & 0x3333); 33 33 res = (res + (res >> 4)) & 0x0F0F; 34 34 return (res + (res >> 8)) & 0x00FF; 35 35 } 36 - EXPORT_SYMBOL(hweight16); 36 + EXPORT_SYMBOL(__sw_hweight16); 37 37 38 - unsigned int hweight8(unsigned int w) 38 + unsigned int __sw_hweight8(unsigned int w) 39 39 { 40 40 unsigned int res = w - ((w >> 1) & 0x55); 41 41 res = (res & 0x33) + ((res >> 2) & 0x33); 42 42 return (res + (res >> 4)) & 0x0F; 43 43 } 44 - EXPORT_SYMBOL(hweight8); 44 + EXPORT_SYMBOL(__sw_hweight8); 45 45 46 - unsigned long hweight64(__u64 w) 46 + unsigned long __sw_hweight64(__u64 w) 47 47 { 48 48 #if BITS_PER_LONG == 32 49 - return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); 49 + return __sw_hweight32((unsigned int)(w >> 32)) + 50 + __sw_hweight32((unsigned int)w); 50 51 #elif BITS_PER_LONG == 64 51 52 #ifdef ARCH_HAS_FAST_MULTIPLIER 52 53 w -= (w >> 1) & 0x5555555555555555ul; ··· 64 63 #endif 65 64 #endif 66 65 } 67 - EXPORT_SYMBOL(hweight64); 66 + EXPORT_SYMBOL(__sw_hweight64);
+4
scripts/Makefile.lib
··· 245 245 cmd_lzo = (cat $(filter-out FORCE,$^) | \ 246 246 lzop -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ 247 247 (rm -f $@ ; false) 248 + 249 + # misc stuff 250 + # --------------------------------------------------------------------------- 251 + quote:="