Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Make the "word-at-a-time" helper functions more commonly usable

I have a new optimized x86 "strncpy_from_user()" that will use these
same helper functions for all the same reasons the name lookup code uses
them. This is preparation for that.

This moves them into an architecture-specific header file. It's
architecture-specific for two reasons:

- some of the functions are likely to want architecture-specific
implementations. Even if the current code happens to be "generic" in
the sense that it should work on any little-endian machine, it's
likely that the "multiply by a big constant and shift" implementation
is less than optimal for an architecture that has a guaranteed fast
bit count instruction, for example.

- I expect that if architectures like sparc want to start playing
around with this, we'll need to abstract out a few more details (in
particular the actual unaligned accesses). So we're likely to have
more architecture-specific stuff if non-x86 architectures start using
this.

(and if it turns out that non-x86 architectures don't start using
this, then having it in an architecture-specific header is still the
right thing to do, of course)

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

+49 -32
+46
arch/x86/include/asm/word-at-a-time.h
··· 1 + #ifndef _ASM_WORD_AT_A_TIME_H 2 + #define _ASM_WORD_AT_A_TIME_H 3 + 4 + /* 5 + * This is largely generic for little-endian machines, but the 6 + * optimal byte mask counting is probably going to be something 7 + * that is architecture-specific. If you have a reliably fast 8 + * bit count instruction, that might be better than the multiply 9 + * and shift, for example. 10 + */ 11 + 12 + #ifdef CONFIG_64BIT 13 + 14 + /* 15 + * Jan Achrenius on G+: microoptimized version of 16 + * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" 17 + * that works for the bytemasks without having to 18 + * mask them first. 19 + */ 20 + static inline long count_masked_bytes(unsigned long mask) 21 + { 22 + return mask*0x0001020304050608ul >> 56; 23 + } 24 + 25 + #else /* 32-bit case */ 26 + 27 + /* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ 28 + static inline long count_masked_bytes(long mask) 29 + { 30 + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ 31 + long a = (0x0ff0001+mask) >> 23; 32 + /* Fix the 1 for 00 case */ 33 + return a & mask; 34 + } 35 + 36 + #endif 37 + 38 + #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) 39 + 40 + /* Return the high bit set in the first byte that is a zero */ 41 + static inline unsigned long has_zero(unsigned long a) 42 + { 43 + return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80); 44 + } 45 + 46 + #endif /* _ASM_WORD_AT_A_TIME_H */
+3 -32
fs/namei.c
··· 1407 1407 */ 1408 1408 #ifdef CONFIG_DCACHE_WORD_ACCESS 1409 1409 1410 - #ifdef CONFIG_64BIT 1410 + #include <asm/word-at-a-time.h> 1411 1411 1412 - /* 1413 - * Jan Achrenius on G+: microoptimized version of 1414 - * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" 1415 - * that works for the bytemasks without having to 1416 - * mask them first. 1417 - */ 1418 - static inline long count_masked_bytes(unsigned long mask) 1419 - { 1420 - return mask*0x0001020304050608ul >> 56; 1421 - } 1412 + #ifdef CONFIG_64BIT 1422 1413 1423 1414 static inline unsigned int fold_hash(unsigned long hash) 1424 1415 { ··· 1418 1427 } 1419 1428 1420 1429 #else /* 32-bit case */ 1421 - 1422 - /* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ 1423 - static inline long count_masked_bytes(long mask) 1424 - { 1425 - /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ 1426 - long a = (0x0ff0001+mask) >> 23; 1427 - /* Fix the 1 for 00 case */ 1428 - return a & mask; 1429 - } 1430 1430 1431 1431 #define fold_hash(x) (x) 1432 1432 ··· 1446 1464 } 1447 1465 EXPORT_SYMBOL(full_name_hash); 1448 1466 1449 - #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) 1450 - #define ONEBYTES REPEAT_BYTE(0x01) 1451 - #define SLASHBYTES REPEAT_BYTE('/') 1452 - #define HIGHBITS REPEAT_BYTE(0x80) 1453 - 1454 - /* Return the high bit set in the first byte that is a zero */ 1455 - static inline unsigned long has_zero(unsigned long a) 1456 - { 1457 - return ((a - ONEBYTES) & ~a) & HIGHBITS; 1458 - } 1459 - 1460 1467 /* 1461 1468 * Calculate the length and hash of the path component, and 1462 1469 * return the length of the component; ··· 1461 1490 len += sizeof(unsigned long); 1462 1491 a = *(unsigned long *)(name+len); 1463 1492 /* Do we have any NUL or '/' bytes in this word? */ 1464 - mask = has_zero(a) | has_zero(a ^ SLASHBYTES); 1493 + mask = has_zero(a) | has_zero(a ^ REPEAT_BYTE('/')); 1465 1494 } while (!mask); 1466 1495 1467 1496 /* The mask *below* the first high bit set */