Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'hash' of git://ftp.sciencehorizons.net/linux

Pull string hash improvements from George Spelvin:
"This series does several related things:

- Makes the dcache hash (fs/namei.c) useful for general kernel use.

(Thanks to Bruce for noticing the zero-length corner case)

- Converts the string hashes in <linux/sunrpc/svcauth.h> to use the
above.

- Avoids 64-bit multiplies in hash_64() on 32-bit platforms. Two
32-bit multiplies will do well enough.

- Rids the world of the bad hash multipliers in hash_32.

This finishes the job started in commit 689de1d6ca95 ("Minimal
fix-up of bad hashing behavior of hash_64()")

The vast majority of Linux architectures have hardware support for
32x32-bit multiply and so derive no benefit from "simplified"
multipliers.

The few processors that do not (68000, h8/300 and some models of
Microblaze) have arch-specific implementations added. Those
patches are last in the series.

- Overhauls the dcache hash mixing.

The patch in commit 0fed3ac866ea ("namei: Improve hash mixing if
CONFIG_DCACHE_WORD_ACCESS") was an off-the-cuff suggestion.
Replaced with a much more careful design that's simultaneously
faster and better. (My own invention, as there was noting suitable
in the literature I could find. Comments welcome!)

- Modify the hash_name() loop to skip the initial HASH_MIX(). This
would let us salt the hash if we ever wanted to.

- Sort out partial_name_hash().

The hash function is declared as using a long state, even though
it's truncated to 32 bits at the end and the extra internal state
contributes nothing to the result. And some callers do odd things:

- fs/hfs/string.c only allocates 32 bits of state
- fs/hfsplus/unicode.c uses it to hash 16-bit unicode symbols not bytes

- Modify bytemask_from_count to handle inputs of 1..sizeof(long)
rather than 0..sizeof(long)-1. This would simplify users other
than full_name_hash"

Special thanks to Bruce Fields for testing and finding bugs in v1. (I
learned some humbling lessons about "obviously correct" code.)

On the arch-specific front, the m68k assembly has been tested in a
standalone test harness, I've been in contact with the Microblaze
maintainers who mostly don't care, as the hardware multiplier is never
omitted in real-world applications, and I haven't heard anything from
the H8/300 world"

* 'hash' of git://ftp.sciencehorizons.net/linux:
h8300: Add <asm/hash.h>
microblaze: Add <asm/hash.h>
m68k: Add <asm/hash.h>
<linux/hash.h>: Add support for architecture-specific functions
fs/namei.c: Improve dcache hash function
Eliminate bad hash multipliers from hash_32() and hash_64()
Change hash_64() return value to 32 bits
<linux/sunrpc/svcauth.h>: Define hash_str() in terms of hashlen_string()
fs/namei.c: Add hashlen_string() function
Pull out string hash to <linux/stringhash.h>

+738 -154
+8
arch/Kconfig
··· 598 598 Architecture supports the 'objtool check' host tool command, which 599 599 performs compile-time stack metadata validation. 600 600 601 + config HAVE_ARCH_HASH 602 + bool 603 + default n 604 + help 605 + If this is set, the architecture provides an <asm/hash.h> 606 + file which provides platform-specific implementations of some 607 + functions in <linux/hash.h> or fs/namei.c. 608 + 601 609 # 602 610 # ABI hall of shame 603 611 #
+1
arch/h8300/Kconfig
··· 20 20 select HAVE_KERNEL_GZIP 21 21 select HAVE_KERNEL_LZO 22 22 select HAVE_ARCH_KGDB 23 + select HAVE_ARCH_HASH 23 24 select CPU_NO_EFFICIENT_FFS 24 25 25 26 config RWSEM_GENERIC_SPINLOCK
+53
arch/h8300/include/asm/hash.h
··· 1 + #ifndef _ASM_HASH_H 2 + #define _ASM_HASH_H 3 + 4 + /* 5 + * The later H8SX models have a 32x32-bit multiply, but the H8/300H 6 + * and H8S have only 16x16->32. Since it's tolerably compact, this is 7 + * basically an inlined version of the __mulsi3 code. Since the inputs 8 + * are not expected to be small, it's also simplfied by skipping the 9 + * early-out checks. 10 + * 11 + * (Since neither CPU has any multi-bit shift instructions, a 12 + * shift-and-add version is a non-starter.) 13 + * 14 + * TODO: come up with an arch-specific version of the hashing in fs/namei.c, 15 + * since that is heavily dependent on rotates. Which, as mentioned, suck 16 + * horribly on H8. 17 + */ 18 + 19 + #if defined(CONFIG_CPU_H300H) || defined(CONFIG_CPU_H8S) 20 + 21 + #define HAVE_ARCH__HASH_32 1 22 + 23 + /* 24 + * Multiply by k = 0x61C88647. Fitting this into three registers requires 25 + * one extra instruction, but reducing register pressure will probably 26 + * make that back and then some. 27 + * 28 + * GCC asm note: %e1 is the high half of operand %1, while %f1 is the 29 + * low half. So if %1 is er4, then %e1 is e4 and %f1 is r4. 30 + * 31 + * This has been designed to modify x in place, since that's the most 32 + * common usage, but preserve k, since hash_64() makes two calls in 33 + * quick succession. 34 + */ 35 + static inline u32 __attribute_const__ __hash_32(u32 x) 36 + { 37 + u32 temp; 38 + 39 + asm( "mov.w %e1,%f0" 40 + "\n mulxu.w %f2,%0" /* klow * xhigh */ 41 + "\n mov.w %f0,%e1" /* The extra instruction */ 42 + "\n mov.w %f1,%f0" 43 + "\n mulxu.w %e2,%0" /* khigh * xlow */ 44 + "\n add.w %e1,%f0" 45 + "\n mulxu.w %f2,%1" /* klow * xlow */ 46 + "\n add.w %f0,%e1" 47 + : "=&r" (temp), "=r" (x) 48 + : "%r" (GOLDEN_RATIO_32), "1" (x)); 49 + return x; 50 + } 51 + 52 + #endif 53 + #endif /* _ASM_HASH_H */
+1
arch/m68k/Kconfig.cpu
··· 41 41 select CPU_HAS_NO_UNALIGNED 42 42 select GENERIC_CSUM 43 43 select CPU_NO_EFFICIENT_FFS 44 + select HAVE_ARCH_HASH 44 45 help 45 46 The Freescale (was Motorola) 68000 CPU is the first generation of 46 47 the well known M68K family of processors. The CPU core as well as
+59
arch/m68k/include/asm/hash.h
··· 1 + #ifndef _ASM_HASH_H 2 + #define _ASM_HASH_H 3 + 4 + /* 5 + * If CONFIG_M68000=y (original mc68000/010), this file is #included 6 + * to work around the lack of a MULU.L instruction. 7 + */ 8 + 9 + #define HAVE_ARCH__HASH_32 1 10 + /* 11 + * While it would be legal to substitute a different hash operation 12 + * entirely, let's keep it simple and just use an optimized multiply 13 + * by GOLDEN_RATIO_32 = 0x61C88647. 14 + * 15 + * The best way to do that appears to be to multiply by 0x8647 with 16 + * shifts and adds, and use mulu.w to multiply the high half by 0x61C8. 17 + * 18 + * Because the 68000 has multi-cycle shifts, this addition chain is 19 + * chosen to minimise the shift distances. 20 + * 21 + * Despite every attempt to spoon-feed it simple operations, GCC 22 + * 6.1.1 doggedly insists on doing annoying things like converting 23 + * "lsl.l #2,<reg>" (12 cycles) to two adds (8+8 cycles). 24 + * 25 + * It also likes to notice two shifts in a row, like "a = x << 2" and 26 + * "a <<= 7", and convert that to "a = x << 9". But shifts longer 27 + * than 8 bits are extra-slow on m68k, so that's a lose. 28 + * 29 + * Since the 68000 is a very simple in-order processor with no 30 + * instruction scheduling effects on execution time, we can safely 31 + * take it out of GCC's hands and write one big asm() block. 32 + * 33 + * Without calling overhead, this operation is 30 bytes (14 instructions 34 + * plus one immediate constant) and 166 cycles. 35 + * 36 + * (Because %2 is fetched twice, it can't be postincrement, and thus it 37 + * can't be a fully general "g" or "m". Register is preferred, but 38 + * offsettable memory or immediate will work.) 39 + */ 40 + static inline u32 __attribute_const__ __hash_32(u32 x) 41 + { 42 + u32 a, b; 43 + 44 + asm( "move.l %2,%0" /* a = x * 0x0001 */ 45 + "\n lsl.l #2,%0" /* a = x * 0x0004 */ 46 + "\n move.l %0,%1" 47 + "\n lsl.l #7,%0" /* a = x * 0x0200 */ 48 + "\n add.l %2,%0" /* a = x * 0x0201 */ 49 + "\n add.l %0,%1" /* b = x * 0x0205 */ 50 + "\n add.l %0,%0" /* a = x * 0x0402 */ 51 + "\n add.l %0,%1" /* b = x * 0x0607 */ 52 + "\n lsl.l #5,%0" /* a = x * 0x8040 */ 53 + : "=&d,d" (a), "=&r,r" (b) 54 + : "r,roi?" (x)); /* a+b = x*0x8647 */ 55 + 56 + return ((u16)(x*0x61c8) << 16) + a + b; 57 + } 58 + 59 + #endif /* _ASM_HASH_H */
+1
arch/microblaze/Kconfig
··· 16 16 select GENERIC_IRQ_SHOW 17 17 select GENERIC_PCI_IOMAP 18 18 select GENERIC_SCHED_CLOCK 19 + select HAVE_ARCH_HASH 19 20 select HAVE_ARCH_KGDB 20 21 select HAVE_DEBUG_KMEMLEAK 21 22 select HAVE_DMA_API_DEBUG
+81
arch/microblaze/include/asm/hash.h
··· 1 + #ifndef _ASM_HASH_H 2 + #define _ASM_HASH_H 3 + 4 + /* 5 + * Fortunately, most people who want to run Linux on Microblaze enable 6 + * both multiplier and barrel shifter, but omitting them is technically 7 + * a supported configuration. 8 + * 9 + * With just a barrel shifter, we can implement an efficient constant 10 + * multiply using shifts and adds. GCC can find a 9-step solution, but 11 + * this 6-step solution was found by Yevgen Voronenko's implementation 12 + * of the Hcub algorithm at http://spiral.ece.cmu.edu/mcm/gen.html. 13 + * 14 + * That software is really not designed for a single multiplier this large, 15 + * but if you run it enough times with different seeds, it'll find several 16 + * 6-shift, 6-add sequences for computing x * 0x61C88647. They are all 17 + * c = (x << 19) + x; 18 + * a = (x << 9) + c; 19 + * b = (x << 23) + a; 20 + * return (a<<11) + (b<<6) + (c<<3) - b; 21 + * with variations on the order of the final add. 22 + * 23 + * Without even a shifter, it's hopless; any hash function will suck. 24 + */ 25 + 26 + #if CONFIG_XILINX_MICROBLAZE0_USE_HW_MUL == 0 27 + 28 + #define HAVE_ARCH__HASH_32 1 29 + 30 + /* Multiply by GOLDEN_RATIO_32 = 0x61C88647 */ 31 + static inline u32 __attribute_const__ __hash_32(u32 a) 32 + { 33 + #if CONFIG_XILINX_MICROBLAZE0_USE_BARREL 34 + unsigned int b, c; 35 + 36 + /* Phase 1: Compute three intermediate values */ 37 + b = a << 23; 38 + c = (a << 19) + a; 39 + a = (a << 9) + c; 40 + b += a; 41 + 42 + /* Phase 2: Compute (a << 11) + (b << 6) + (c << 3) - b */ 43 + a <<= 5; 44 + a += b; /* (a << 5) + b */ 45 + a <<= 3; 46 + a += c; /* (a << 8) + (b << 3) + c */ 47 + a <<= 3; 48 + return a - b; /* (a << 11) + (b << 6) + (c << 3) - b */ 49 + #else 50 + /* 51 + * "This is really going to hurt." 52 + * 53 + * Without a barrel shifter, left shifts are implemented as 54 + * repeated additions, and the best we can do is an optimal 55 + * addition-subtraction chain. This one is not known to be 56 + * optimal, but at 37 steps, it's decent for a 31-bit multiplier. 57 + * 58 + * Question: given its size (37*4 = 148 bytes per instance), 59 + * and slowness, is this worth having inline? 60 + */ 61 + unsigned int b, c, d; 62 + 63 + b = a << 4; /* 4 */ 64 + c = b << 1; /* 1 5 */ 65 + b += a; /* 1 6 */ 66 + c += b; /* 1 7 */ 67 + c <<= 3; /* 3 10 */ 68 + c -= a; /* 1 11 */ 69 + d = c << 7; /* 7 18 */ 70 + d += b; /* 1 19 */ 71 + d <<= 8; /* 8 27 */ 72 + d += a; /* 1 28 */ 73 + d <<= 1; /* 1 29 */ 74 + d += b; /* 1 30 */ 75 + d <<= 6; /* 6 36 */ 76 + return d + c; /* 1 37 total instructions*/ 77 + #endif 78 + } 79 + 80 + #endif /* !CONFIG_XILINX_MICROBLAZE0_USE_HW_MUL */ 81 + #endif /* _ASM_HASH_H */
+2
drivers/media/usb/dvb-usb-v2/af9015.c
··· 398 398 } 399 399 400 400 #define AF9015_EEPROM_SIZE 256 401 + /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ 402 + #define GOLDEN_RATIO_PRIME_32 0x9e370001UL 401 403 402 404 /* hash (and dump) eeprom */ 403 405 static int af9015_eeprom_hash(struct dvb_usb_device *d)
+1 -2
fs/dcache.c
··· 1670 1670 struct qstr q; 1671 1671 1672 1672 q.name = name; 1673 - q.len = strlen(name); 1674 - q.hash = full_name_hash(q.name, q.len); 1673 + q.hash_len = hashlen_string(name); 1675 1674 return d_alloc(parent, &q); 1676 1675 } 1677 1676 EXPORT_SYMBOL(d_alloc_name);
+124 -38
fs/namei.c
··· 35 35 #include <linux/fs_struct.h> 36 36 #include <linux/posix_acl.h> 37 37 #include <linux/hash.h> 38 + #include <linux/bitops.h> 38 39 #include <asm/uaccess.h> 39 40 40 41 #include "internal.h" ··· 1798 1797 1799 1798 #include <asm/word-at-a-time.h> 1800 1799 1801 - #ifdef CONFIG_64BIT 1800 + #ifdef HASH_MIX 1802 1801 1803 - static inline unsigned int fold_hash(unsigned long hash) 1804 - { 1805 - return hash_64(hash, 32); 1806 - } 1802 + /* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */ 1803 + 1804 + #elif defined(CONFIG_64BIT) 1805 + /* 1806 + * Register pressure in the mixing function is an issue, particularly 1807 + * on 32-bit x86, but almost any function requires one state value and 1808 + * one temporary. Instead, use a function designed for two state values 1809 + * and no temporaries. 1810 + * 1811 + * This function cannot create a collision in only two iterations, so 1812 + * we have two iterations to achieve avalanche. In those two iterations, 1813 + * we have six layers of mixing, which is enough to spread one bit's 1814 + * influence out to 2^6 = 64 state bits. 1815 + * 1816 + * Rotate constants are scored by considering either 64 one-bit input 1817 + * deltas or 64*63/2 = 2016 two-bit input deltas, and finding the 1818 + * probability of that delta causing a change to each of the 128 output 1819 + * bits, using a sample of random initial states. 1820 + * 1821 + * The Shannon entropy of the computed probabilities is then summed 1822 + * to produce a score. Ideally, any input change has a 50% chance of 1823 + * toggling any given output bit. 1824 + * 1825 + * Mixing scores (in bits) for (12,45): 1826 + * Input delta: 1-bit 2-bit 1827 + * 1 round: 713.3 42542.6 1828 + * 2 rounds: 2753.7 140389.8 1829 + * 3 rounds: 5954.1 233458.2 1830 + * 4 rounds: 7862.6 256672.2 1831 + * Perfect: 8192 258048 1832 + * (64*128) (64*63/2 * 128) 1833 + */ 1834 + #define HASH_MIX(x, y, a) \ 1835 + ( x ^= (a), \ 1836 + y ^= x, x = rol64(x,12),\ 1837 + x += y, y = rol64(y,45),\ 1838 + y *= 9 ) 1807 1839 1808 1840 /* 1809 - * This is George Marsaglia's XORSHIFT generator. 1810 - * It implements a maximum-period LFSR in only a few 1811 - * instructions. It also has the property (required 1812 - * by hash_name()) that mix_hash(0) = 0. 1841 + * Fold two longs into one 32-bit hash value. This must be fast, but 1842 + * latency isn't quite as critical, as there is a fair bit of additional 1843 + * work done before the hash value is used. 1813 1844 */ 1814 - static inline unsigned long mix_hash(unsigned long hash) 1845 + static inline unsigned int fold_hash(unsigned long x, unsigned long y) 1815 1846 { 1816 - hash ^= hash << 13; 1817 - hash ^= hash >> 7; 1818 - hash ^= hash << 17; 1819 - return hash; 1847 + y ^= x * GOLDEN_RATIO_64; 1848 + y *= GOLDEN_RATIO_64; 1849 + return y >> 32; 1820 1850 } 1821 1851 1822 1852 #else /* 32-bit case */ 1823 1853 1824 - #define fold_hash(x) (x) 1854 + /* 1855 + * Mixing scores (in bits) for (7,20): 1856 + * Input delta: 1-bit 2-bit 1857 + * 1 round: 330.3 9201.6 1858 + * 2 rounds: 1246.4 25475.4 1859 + * 3 rounds: 1907.1 31295.1 1860 + * 4 rounds: 2042.3 31718.6 1861 + * Perfect: 2048 31744 1862 + * (32*64) (32*31/2 * 64) 1863 + */ 1864 + #define HASH_MIX(x, y, a) \ 1865 + ( x ^= (a), \ 1866 + y ^= x, x = rol32(x, 7),\ 1867 + x += y, y = rol32(y,20),\ 1868 + y *= 9 ) 1825 1869 1826 - static inline unsigned long mix_hash(unsigned long hash) 1870 + static inline unsigned int fold_hash(unsigned long x, unsigned long y) 1827 1871 { 1828 - hash ^= hash << 13; 1829 - hash ^= hash >> 17; 1830 - hash ^= hash << 5; 1831 - return hash; 1872 + /* Use arch-optimized multiply if one exists */ 1873 + return __hash_32(y ^ __hash_32(x)); 1832 1874 } 1833 1875 1834 1876 #endif 1835 1877 1836 - unsigned int full_name_hash(const unsigned char *name, unsigned int len) 1878 + /* 1879 + * Return the hash of a string of known length. This is carfully 1880 + * designed to match hash_name(), which is the more critical function. 1881 + * In particular, we must end by hashing a final word containing 0..7 1882 + * payload bytes, to match the way that hash_name() iterates until it 1883 + * finds the delimiter after the name. 1884 + */ 1885 + unsigned int full_name_hash(const char *name, unsigned int len) 1837 1886 { 1838 - unsigned long a, hash = 0; 1887 + unsigned long a, x = 0, y = 0; 1839 1888 1840 1889 for (;;) { 1890 + if (!len) 1891 + goto done; 1841 1892 a = load_unaligned_zeropad(name); 1842 1893 if (len < sizeof(unsigned long)) 1843 1894 break; 1844 - hash = mix_hash(hash + a); 1895 + HASH_MIX(x, y, a); 1845 1896 name += sizeof(unsigned long); 1846 1897 len -= sizeof(unsigned long); 1847 - if (!len) 1848 - goto done; 1849 1898 } 1850 - hash += a & bytemask_from_count(len); 1899 + x ^= a & bytemask_from_count(len); 1851 1900 done: 1852 - return fold_hash(hash); 1901 + return fold_hash(x, y); 1853 1902 } 1854 1903 EXPORT_SYMBOL(full_name_hash); 1904 + 1905 + /* Return the "hash_len" (hash and length) of a null-terminated string */ 1906 + u64 hashlen_string(const char *name) 1907 + { 1908 + unsigned long a = 0, x = 0, y = 0, adata, mask, len; 1909 + const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; 1910 + 1911 + len = -sizeof(unsigned long); 1912 + do { 1913 + HASH_MIX(x, y, a); 1914 + len += sizeof(unsigned long); 1915 + a = load_unaligned_zeropad(name+len); 1916 + } while (!has_zero(a, &adata, &constants)); 1917 + 1918 + adata = prep_zero_mask(a, adata, &constants); 1919 + mask = create_zero_mask(adata); 1920 + x ^= a & zero_bytemask(mask); 1921 + 1922 + return hashlen_create(fold_hash(x, y), len + find_zero(mask)); 1923 + } 1924 + EXPORT_SYMBOL(hashlen_string); 1855 1925 1856 1926 /* 1857 1927 * Calculate the length and hash of the path component, and ··· 1930 1858 */ 1931 1859 static inline u64 hash_name(const char *name) 1932 1860 { 1933 - unsigned long a, b, adata, bdata, mask, hash, len; 1861 + unsigned long a = 0, b, x = 0, y = 0, adata, bdata, mask, len; 1934 1862 const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; 1935 1863 1936 - hash = a = 0; 1937 1864 len = -sizeof(unsigned long); 1938 1865 do { 1939 - hash = mix_hash(hash + a); 1866 + HASH_MIX(x, y, a); 1940 1867 len += sizeof(unsigned long); 1941 1868 a = load_unaligned_zeropad(name+len); 1942 1869 b = a ^ REPEAT_BYTE('/'); ··· 1943 1872 1944 1873 adata = prep_zero_mask(a, adata, &constants); 1945 1874 bdata = prep_zero_mask(b, bdata, &constants); 1946 - 1947 1875 mask = create_zero_mask(adata | bdata); 1876 + x ^= a & zero_bytemask(mask); 1948 1877 1949 - hash += a & zero_bytemask(mask); 1950 - len += find_zero(mask); 1951 - return hashlen_create(fold_hash(hash), len); 1878 + return hashlen_create(fold_hash(x, y), len + find_zero(mask)); 1952 1879 } 1953 1880 1954 - #else 1881 + #else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */ 1955 1882 1956 - unsigned int full_name_hash(const unsigned char *name, unsigned int len) 1883 + /* Return the hash of a string of known length */ 1884 + unsigned int full_name_hash(const char *name, unsigned int len) 1957 1885 { 1958 1886 unsigned long hash = init_name_hash(); 1959 1887 while (len--) 1960 - hash = partial_name_hash(*name++, hash); 1888 + hash = partial_name_hash((unsigned char)*name++, hash); 1961 1889 return end_name_hash(hash); 1962 1890 } 1963 1891 EXPORT_SYMBOL(full_name_hash); 1892 + 1893 + /* Return the "hash_len" (hash and length) of a null-terminated string */ 1894 + u64 hash_string(const char *name) 1895 + { 1896 + unsigned long hash = init_name_hash(); 1897 + unsigned long len = 0, c; 1898 + 1899 + c = (unsigned char)*name; 1900 + do { 1901 + len++; 1902 + hash = partial_name_hash(c, hash); 1903 + c = (unsigned char)name[len]; 1904 + } while (c); 1905 + return hashlen_create(end_name_hash(hash), len); 1906 + } 1907 + EXPORT_SYMBOL(hash_string); 1964 1908 1965 1909 /* 1966 1910 * We know there's a real path component here of at least ··· 2020 1934 int type; 2021 1935 2022 1936 err = may_lookup(nd); 2023 - if (err) 1937 + if (err) 2024 1938 return err; 2025 1939 2026 1940 hash_len = hash_name(name);
+1 -26
include/linux/dcache.h
··· 10 10 #include <linux/cache.h> 11 11 #include <linux/rcupdate.h> 12 12 #include <linux/lockref.h> 13 + #include <linux/stringhash.h> 13 14 14 15 struct path; 15 16 struct vfsmount; ··· 53 52 }; 54 53 55 54 #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } 56 - #define hashlen_hash(hashlen) ((u32) (hashlen)) 57 - #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) 58 - #define hashlen_create(hash,len) (((u64)(len)<<32)|(u32)(hash)) 59 55 60 56 struct dentry_stat_t { 61 57 long nr_dentry; ··· 62 64 long dummy[2]; 63 65 }; 64 66 extern struct dentry_stat_t dentry_stat; 65 - 66 - /* Name hashing routines. Initial hash value */ 67 - /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ 68 - #define init_name_hash() 0 69 - 70 - /* partial hash update function. Assume roughly 4 bits per character */ 71 - static inline unsigned long 72 - partial_name_hash(unsigned long c, unsigned long prevhash) 73 - { 74 - return (prevhash + (c << 4) + (c >> 4)) * 11; 75 - } 76 - 77 - /* 78 - * Finally: cut down the number of bits to a int value (and try to avoid 79 - * losing bits) 80 - */ 81 - static inline unsigned long end_name_hash(unsigned long hash) 82 - { 83 - return (unsigned int) hash; 84 - } 85 - 86 - /* Compute the hash for a name string. */ 87 - extern unsigned int full_name_hash(const unsigned char *, unsigned int); 88 67 89 68 /* 90 69 * Try to keep struct dentry aligned on 64 byte cachelines (this will
+59 -57
include/linux/hash.h
··· 3 3 /* Fast hashing routine for ints, longs and pointers. 4 4 (C) 2002 Nadia Yvette Chambers, IBM */ 5 5 6 - /* 7 - * Knuth recommends primes in approximately golden ratio to the maximum 8 - * integer representable by a machine word for multiplicative hashing. 9 - * Chuck Lever verified the effectiveness of this technique: 10 - * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf 11 - * 12 - * These primes are chosen to be bit-sparse, that is operations on 13 - * them can use shifts and additions instead of multiplications for 14 - * machines where multiplications are slow. 15 - */ 16 - 17 6 #include <asm/types.h> 18 7 #include <linux/compiler.h> 19 8 20 - /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ 21 - #define GOLDEN_RATIO_PRIME_32 0x9e370001UL 22 - /* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ 23 - #define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001UL 24 - 9 + /* 10 + * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and 11 + * fs/inode.c. It's not actually prime any more (the previous primes 12 + * were actively bad for hashing), but the name remains. 13 + */ 25 14 #if BITS_PER_LONG == 32 26 - #define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_32 15 + #define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32 27 16 #define hash_long(val, bits) hash_32(val, bits) 28 17 #elif BITS_PER_LONG == 64 29 18 #define hash_long(val, bits) hash_64(val, bits) 30 - #define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_64 19 + #define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64 31 20 #else 32 21 #error Wordsize not 32 or 64 33 22 #endif 34 23 35 24 /* 36 - * The above primes are actively bad for hashing, since they are 37 - * too sparse. The 32-bit one is mostly ok, the 64-bit one causes 38 - * real problems. Besides, the "prime" part is pointless for the 39 - * multiplicative hash. 25 + * This hash multiplies the input by a large odd number and takes the 26 + * high bits. Since multiplication propagates changes to the most 27 + * significant end only, it is essential that the high bits of the 28 + * product be used for the hash value. 29 + * 30 + * Chuck Lever verified the effectiveness of this technique: 31 + * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf 40 32 * 41 33 * Although a random odd number will do, it turns out that the golden 42 34 * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice 43 - * properties. 35 + * properties. (See Knuth vol 3, section 6.4, exercise 9.) 44 36 * 45 - * These are the negative, (1 - phi) = (phi^2) = (3 - sqrt(5))/2. 46 - * (See Knuth vol 3, section 6.4, exercise 9.) 37 + * These are the negative, (1 - phi) = phi**2 = (3 - sqrt(5))/2, 38 + * which is very slightly easier to multiply by and makes no 39 + * difference to the hash distribution. 47 40 */ 48 41 #define GOLDEN_RATIO_32 0x61C88647 49 42 #define GOLDEN_RATIO_64 0x61C8864680B583EBull 50 43 51 - static __always_inline u64 hash_64(u64 val, unsigned int bits) 52 - { 53 - u64 hash = val; 54 - 55 - #if BITS_PER_LONG == 64 56 - hash = hash * GOLDEN_RATIO_64; 57 - #else 58 - /* Sigh, gcc can't optimise this alone like it does for 32 bits. */ 59 - u64 n = hash; 60 - n <<= 18; 61 - hash -= n; 62 - n <<= 33; 63 - hash -= n; 64 - n <<= 3; 65 - hash += n; 66 - n <<= 3; 67 - hash -= n; 68 - n <<= 4; 69 - hash += n; 70 - n <<= 2; 71 - hash += n; 44 + #ifdef CONFIG_HAVE_ARCH_HASH 45 + /* This header may use the GOLDEN_RATIO_xx constants */ 46 + #include <asm/hash.h> 72 47 #endif 73 48 74 - /* High bits are more random, so use them. */ 75 - return hash >> (64 - bits); 76 - } 77 - 78 - static inline u32 hash_32(u32 val, unsigned int bits) 49 + /* 50 + * The _generic versions exist only so lib/test_hash.c can compare 51 + * the arch-optimized versions with the generic. 52 + * 53 + * Note that if you change these, any <asm/hash.h> that aren't updated 54 + * to match need to have their HAVE_ARCH_* define values updated so the 55 + * self-test will not false-positive. 56 + */ 57 + #ifndef HAVE_ARCH__HASH_32 58 + #define __hash_32 __hash_32_generic 59 + #endif 60 + static inline u32 __hash_32_generic(u32 val) 79 61 { 80 - /* On some cpus multiply is faster, on others gcc will do shifts */ 81 - u32 hash = val * GOLDEN_RATIO_PRIME_32; 82 - 83 - /* High bits are more random, so use them. */ 84 - return hash >> (32 - bits); 62 + return val * GOLDEN_RATIO_32; 85 63 } 86 64 87 - static inline unsigned long hash_ptr(const void *ptr, unsigned int bits) 65 + #ifndef HAVE_ARCH_HASH_32 66 + #define hash_32 hash_32_generic 67 + #endif 68 + static inline u32 hash_32_generic(u32 val, unsigned int bits) 69 + { 70 + /* High bits are more random, so use them. */ 71 + return __hash_32(val) >> (32 - bits); 72 + } 73 + 74 + #ifndef HAVE_ARCH_HASH_64 75 + #define hash_64 hash_64_generic 76 + #endif 77 + static __always_inline u32 hash_64_generic(u64 val, unsigned int bits) 78 + { 79 + #if BITS_PER_LONG == 64 80 + /* 64x64-bit multiply is efficient on all 64-bit processors */ 81 + return val * GOLDEN_RATIO_64 >> (64 - bits); 82 + #else 83 + /* Hash 64 bits using only 32x32-bit multiply. */ 84 + return hash_32((u32)val ^ __hash_32(val >> 32), bits); 85 + #endif 86 + } 87 + 88 + static inline u32 hash_ptr(const void *ptr, unsigned int bits) 88 89 { 89 90 return hash_long((unsigned long)ptr, bits); 90 91 } 91 92 93 + /* This really should be called fold32_ptr; it does no hashing to speak of. */ 92 94 static inline u32 hash32_ptr(const void *ptr) 93 95 { 94 96 unsigned long val = (unsigned long)ptr;
+76
include/linux/stringhash.h
··· 1 + #ifndef __LINUX_STRINGHASH_H 2 + #define __LINUX_STRINGHASH_H 3 + 4 + #include <linux/compiler.h> /* For __pure */ 5 + #include <linux/types.h> /* For u32, u64 */ 6 + 7 + /* 8 + * Routines for hashing strings of bytes to a 32-bit hash value. 9 + * 10 + * These hash functions are NOT GUARANTEED STABLE between kernel 11 + * versions, architectures, or even repeated boots of the same kernel. 12 + * (E.g. they may depend on boot-time hardware detection or be 13 + * deliberately randomized.) 14 + * 15 + * They are also not intended to be secure against collisions caused by 16 + * malicious inputs; much slower hash functions are required for that. 17 + * 18 + * They are optimized for pathname components, meaning short strings. 19 + * Even if a majority of files have longer names, the dynamic profile of 20 + * pathname components skews short due to short directory names. 21 + * (E.g. /usr/lib/libsesquipedalianism.so.3.141.) 22 + */ 23 + 24 + /* 25 + * Version 1: one byte at a time. Example of use: 26 + * 27 + * unsigned long hash = init_name_hash; 28 + * while (*p) 29 + * hash = partial_name_hash(tolower(*p++), hash); 30 + * hash = end_name_hash(hash); 31 + * 32 + * Although this is designed for bytes, fs/hfsplus/unicode.c 33 + * abuses it to hash 16-bit values. 34 + */ 35 + 36 + /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ 37 + #define init_name_hash() 0 38 + 39 + /* partial hash update function. Assume roughly 4 bits per character */ 40 + static inline unsigned long 41 + partial_name_hash(unsigned long c, unsigned long prevhash) 42 + { 43 + return (prevhash + (c << 4) + (c >> 4)) * 11; 44 + } 45 + 46 + /* 47 + * Finally: cut down the number of bits to a int value (and try to avoid 48 + * losing bits) 49 + */ 50 + static inline unsigned long end_name_hash(unsigned long hash) 51 + { 52 + return (unsigned int)hash; 53 + } 54 + 55 + /* 56 + * Version 2: One word (32 or 64 bits) at a time. 57 + * If CONFIG_DCACHE_WORD_ACCESS is defined (meaning <asm/word-at-a-time.h> 58 + * exists, which describes major Linux platforms like x86 and ARM), then 59 + * this computes a different hash function much faster. 60 + * 61 + * If not set, this falls back to a wrapper around the preceding. 62 + */ 63 + extern unsigned int __pure full_name_hash(const char *, unsigned int); 64 + 65 + /* 66 + * A hash_len is a u64 with the hash of a string in the low 67 + * half and the length in the high half. 68 + */ 69 + #define hashlen_hash(hashlen) ((u32)(hashlen)) 70 + #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) 71 + #define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash)) 72 + 73 + /* Return the "hash_len" (hash and length) of a null-terminated string */ 74 + extern u64 __pure hashlen_string(const char *name); 75 + 76 + #endif /* __LINUX_STRINGHASH_H */
+9 -31
include/linux/sunrpc/svcauth.h
··· 16 16 #include <linux/sunrpc/cache.h> 17 17 #include <linux/sunrpc/gss_api.h> 18 18 #include <linux/hash.h> 19 + #include <linux/stringhash.h> 19 20 #include <linux/cred.h> 20 21 21 22 struct svc_cred { ··· 166 165 extern int unix_gid_cache_create(struct net *net); 167 166 extern void unix_gid_cache_destroy(struct net *net); 168 167 169 - static inline unsigned long hash_str(char *name, int bits) 168 + /* 169 + * The <stringhash.h> functions are good enough that we don't need to 170 + * use hash_32() on them; just extracting the high bits is enough. 171 + */ 172 + static inline unsigned long hash_str(char const *name, int bits) 170 173 { 171 - unsigned long hash = 0; 172 - unsigned long l = 0; 173 - int len = 0; 174 - unsigned char c; 175 - do { 176 - if (unlikely(!(c = *name++))) { 177 - c = (char)len; len = -1; 178 - } 179 - l = (l << 8) | c; 180 - len++; 181 - if ((len & (BITS_PER_LONG/8-1))==0) 182 - hash = hash_long(hash^l, BITS_PER_LONG); 183 - } while (len); 184 - return hash >> (BITS_PER_LONG - bits); 174 + return hashlen_hash(hashlen_string(name)) >> (32 - bits); 185 175 } 186 176 187 - static inline unsigned long hash_mem(char *buf, int length, int bits) 177 + static inline unsigned long hash_mem(char const *buf, int length, int bits) 188 178 { 189 - unsigned long hash = 0; 190 - unsigned long l = 0; 191 - int len = 0; 192 - unsigned char c; 193 - do { 194 - if (len == length) { 195 - c = (char)len; len = -1; 196 - } else 197 - c = *buf++; 198 - l = (l << 8) | c; 199 - len++; 200 - if ((len & (BITS_PER_LONG/8-1))==0) 201 - hash = hash_long(hash^l, BITS_PER_LONG); 202 - } while (len); 203 - return hash >> (BITS_PER_LONG - bits); 179 + return full_name_hash(buf, length) >> (32 - bits); 204 180 } 205 181 206 182 #endif /* __KERNEL__ */
+11
lib/Kconfig.debug
··· 1849 1849 1850 1850 If unsure, say N. 1851 1851 1852 + config TEST_HASH 1853 + tristate "Perform selftest on hash functions" 1854 + default n 1855 + help 1856 + Enable this option to test the kernel's integer (<linux/hash,h>) 1857 + and string (<linux/stringhash.h>) hash functions on boot 1858 + (or module load). 1859 + 1860 + This is intended to help people writing architecture-specific 1861 + optimized versions. If unsure, say N. 1862 + 1852 1863 endmenu # runtime tests 1853 1864 1854 1865 config PROVIDE_OHCI1394_DMA_INIT
+1
lib/Makefile
··· 48 48 obj-y += kstrtox.o 49 49 obj-$(CONFIG_TEST_BPF) += test_bpf.o 50 50 obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o 51 + obj-$(CONFIG_TEST_HASH) += test_hash.o 51 52 obj-$(CONFIG_TEST_KASAN) += test_kasan.o 52 53 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o 53 54 obj-$(CONFIG_TEST_LKM) += test_module.o
+250
lib/test_hash.c
··· 1 + /* 2 + * Test cases for <linux/hash.h> and <linux/stringhash.h> 3 + * This just verifies that various ways of computing a hash 4 + * produce the same thing and, for cases where a k-bit hash 5 + * value is requested, is of the requested size. 6 + * 7 + * We fill a buffer with a 255-byte null-terminated string, 8 + * and use both full_name_hash() and hashlen_string() to hash the 9 + * substrings from i to j, where 0 <= i < j < 256. 10 + * 11 + * The returned values are used to check that __hash_32() and 12 + * __hash_32_generic() compute the same thing. Likewise hash_32() 13 + * and hash_64(). 14 + */ 15 + 16 + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt "\n" 17 + 18 + #include <linux/compiler.h> 19 + #include <linux/types.h> 20 + #include <linux/module.h> 21 + #include <linux/hash.h> 22 + #include <linux/stringhash.h> 23 + #include <linux/printk.h> 24 + 25 + /* 32-bit XORSHIFT generator. Seed must not be zero. */ 26 + static u32 __init __attribute_const__ 27 + xorshift(u32 seed) 28 + { 29 + seed ^= seed << 13; 30 + seed ^= seed >> 17; 31 + seed ^= seed << 5; 32 + return seed; 33 + } 34 + 35 + /* Given a non-zero x, returns a non-zero byte. */ 36 + static u8 __init __attribute_const__ 37 + mod255(u32 x) 38 + { 39 + x = (x & 0xffff) + (x >> 16); /* 1 <= x <= 0x1fffe */ 40 + x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0x2fd */ 41 + x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0x100 */ 42 + x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0xff */ 43 + return x; 44 + } 45 + 46 + /* Fill the buffer with non-zero bytes. */ 47 + static void __init 48 + fill_buf(char *buf, size_t len, u32 seed) 49 + { 50 + size_t i; 51 + 52 + for (i = 0; i < len; i++) { 53 + seed = xorshift(seed); 54 + buf[i] = mod255(seed); 55 + } 56 + } 57 + 58 + /* 59 + * Test the various integer hash functions. h64 (or its low-order bits) 60 + * is the integer to hash. hash_or accumulates the OR of the hash values, 61 + * which are later checked to see that they cover all the requested bits. 62 + * 63 + * Because these functions (as opposed to the string hashes) are all 64 + * inline, the code being tested is actually in the module, and you can 65 + * recompile and re-test the module without rebooting. 66 + */ 67 + static bool __init 68 + test_int_hash(unsigned long long h64, u32 hash_or[2][33]) 69 + { 70 + int k; 71 + u32 h0 = (u32)h64, h1, h2; 72 + 73 + /* Test __hash32 */ 74 + hash_or[0][0] |= h1 = __hash_32(h0); 75 + #ifdef HAVE_ARCH__HASH_32 76 + hash_or[1][0] |= h2 = __hash_32_generic(h0); 77 + #if HAVE_ARCH__HASH_32 == 1 78 + if (h1 != h2) { 79 + pr_err("__hash_32(%#x) = %#x != __hash_32_generic() = %#x", 80 + h0, h1, h2); 81 + return false; 82 + } 83 + #endif 84 + #endif 85 + 86 + /* Test k = 1..32 bits */ 87 + for (k = 1; k <= 32; k++) { 88 + u32 const m = ((u32)2 << (k-1)) - 1; /* Low k bits set */ 89 + 90 + /* Test hash_32 */ 91 + hash_or[0][k] |= h1 = hash_32(h0, k); 92 + if (h1 > m) { 93 + pr_err("hash_32(%#x, %d) = %#x > %#x", h0, k, h1, m); 94 + return false; 95 + } 96 + #ifdef HAVE_ARCH_HASH_32 97 + h2 = hash_32_generic(h0, k); 98 + #if HAVE_ARCH_HASH_32 == 1 99 + if (h1 != h2) { 100 + pr_err("hash_32(%#x, %d) = %#x != hash_32_generic() " 101 + " = %#x", h0, k, h1, h2); 102 + return false; 103 + } 104 + #else 105 + if (h2 > m) { 106 + pr_err("hash_32_generic(%#x, %d) = %#x > %#x", 107 + h0, k, h1, m); 108 + return false; 109 + } 110 + #endif 111 + #endif 112 + /* Test hash_64 */ 113 + hash_or[1][k] |= h1 = hash_64(h64, k); 114 + if (h1 > m) { 115 + pr_err("hash_64(%#llx, %d) = %#x > %#x", h64, k, h1, m); 116 + return false; 117 + } 118 + #ifdef HAVE_ARCH_HASH_64 119 + h2 = hash_64_generic(h64, k); 120 + #if HAVE_ARCH_HASH_64 == 1 121 + if (h1 != h2) { 122 + pr_err("hash_64(%#llx, %d) = %#x != hash_64_generic() " 123 + "= %#x", h64, k, h1, h2); 124 + return false; 125 + } 126 + #else 127 + if (h2 > m) { 128 + pr_err("hash_64_generic(%#llx, %d) = %#x > %#x", 129 + h64, k, h1, m); 130 + return false; 131 + } 132 + #endif 133 + #endif 134 + } 135 + 136 + (void)h2; /* Suppress unused variable warning */ 137 + return true; 138 + } 139 + 140 + #define SIZE 256 /* Run time is cubic in SIZE */ 141 + 142 + static int __init 143 + test_hash_init(void) 144 + { 145 + char buf[SIZE+1]; 146 + u32 string_or = 0, hash_or[2][33] = { 0 }; 147 + unsigned tests = 0; 148 + unsigned long long h64 = 0; 149 + int i, j; 150 + 151 + fill_buf(buf, SIZE, 1); 152 + 153 + /* Test every possible non-empty substring in the buffer. */ 154 + for (j = SIZE; j > 0; --j) { 155 + buf[j] = '\0'; 156 + 157 + for (i = 0; i <= j; i++) { 158 + u64 hashlen = hashlen_string(buf+i); 159 + u32 h0 = full_name_hash(buf+i, j-i); 160 + 161 + /* Check that hashlen_string gets the length right */ 162 + if (hashlen_len(hashlen) != j-i) { 163 + pr_err("hashlen_string(%d..%d) returned length" 164 + " %u, expected %d", 165 + i, j, hashlen_len(hashlen), j-i); 166 + return -EINVAL; 167 + } 168 + /* Check that the hashes match */ 169 + if (hashlen_hash(hashlen) != h0) { 170 + pr_err("hashlen_string(%d..%d) = %08x != " 171 + "full_name_hash() = %08x", 172 + i, j, hashlen_hash(hashlen), h0); 173 + return -EINVAL; 174 + } 175 + 176 + string_or |= h0; 177 + h64 = h64 << 32 | h0; /* For use with hash_64 */ 178 + if (!test_int_hash(h64, hash_or)) 179 + return -EINVAL; 180 + tests++; 181 + } /* i */ 182 + } /* j */ 183 + 184 + /* The OR of all the hash values should cover all the bits */ 185 + if (~string_or) { 186 + pr_err("OR of all string hash results = %#x != %#x", 187 + string_or, -1u); 188 + return -EINVAL; 189 + } 190 + if (~hash_or[0][0]) { 191 + pr_err("OR of all __hash_32 results = %#x != %#x", 192 + hash_or[0][0], -1u); 193 + return -EINVAL; 194 + } 195 + #ifdef HAVE_ARCH__HASH_32 196 + #if HAVE_ARCH__HASH_32 != 1 /* Test is pointless if results match */ 197 + if (~hash_or[1][0]) { 198 + pr_err("OR of all __hash_32_generic results = %#x != %#x", 199 + hash_or[1][0], -1u); 200 + return -EINVAL; 201 + } 202 + #endif 203 + #endif 204 + 205 + /* Likewise for all the i-bit hash values */ 206 + for (i = 1; i <= 32; i++) { 207 + u32 const m = ((u32)2 << (i-1)) - 1; /* Low i bits set */ 208 + 209 + if (hash_or[0][i] != m) { 210 + pr_err("OR of all hash_32(%d) results = %#x " 211 + "(%#x expected)", i, hash_or[0][i], m); 212 + return -EINVAL; 213 + } 214 + if (hash_or[1][i] != m) { 215 + pr_err("OR of all hash_64(%d) results = %#x " 216 + "(%#x expected)", i, hash_or[1][i], m); 217 + return -EINVAL; 218 + } 219 + } 220 + 221 + /* Issue notices about skipped tests. */ 222 + #ifndef HAVE_ARCH__HASH_32 223 + pr_info("__hash_32() has no arch implementation to test."); 224 + #elif HAVE_ARCH__HASH_32 != 1 225 + pr_info("__hash_32() is arch-specific; not compared to generic."); 226 + #endif 227 + #ifndef HAVE_ARCH_HASH_32 228 + pr_info("hash_32() has no arch implementation to test."); 229 + #elif HAVE_ARCH_HASH_32 != 1 230 + pr_info("hash_32() is arch-specific; not compared to generic."); 231 + #endif 232 + #ifndef HAVE_ARCH_HASH_64 233 + pr_info("hash_64() has no arch implementation to test."); 234 + #elif HAVE_ARCH_HASH_64 != 1 235 + pr_info("hash_64() is arch-specific; not compared to generic."); 236 + #endif 237 + 238 + pr_notice("%u tests passed.", tests); 239 + 240 + return 0; 241 + } 242 + 243 + static void __exit test_hash_exit(void) 244 + { 245 + } 246 + 247 + module_init(test_hash_init); /* Does everything */ 248 + module_exit(test_hash_exit); /* Does nothing */ 249 + 250 + MODULE_LICENSE("GPL");