Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] x86-64: bitops fix for -Os

This fixes the x86-64 find_[first|next]_zero_bit() function for the
end-of-range case. It didn't test for a zero size, and the "rep scas"
would do entirely the wrong thing.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Alexandre Oliva and committed by
Linus Torvalds
06024f21 cfa024f4

+50 -16
+50 -16
arch/x86_64/lib/bitops.c
··· 5 5 #undef find_first_bit 6 6 #undef find_next_bit 7 7 8 - /** 9 - * find_first_zero_bit - find the first zero bit in a memory region 10 - * @addr: The address to start the search at 11 - * @size: The maximum size to search 12 - * 13 - * Returns the bit-number of the first zero bit, not the number of the byte 14 - * containing a bit. 15 - */ 16 - inline long find_first_zero_bit(const unsigned long * addr, unsigned long size) 8 + static inline long 9 + __find_first_zero_bit(const unsigned long * addr, unsigned long size) 17 10 { 18 11 long d0, d1, d2; 19 12 long res; 20 13 14 + /* 15 + * We must test the size in words, not in bits, because 16 + * otherwise incoming sizes in the range -63..-1 will not run 17 + * any scasq instructions, and then the flags used by the je 18 + * instruction will have whatever random value was in place 19 + * before. Nobody should call us like that, but 20 + * find_next_zero_bit() does when offset and size are at the 21 + * same word and it fails to find a zero itself. 22 + */ 23 + size += 63; 24 + size >>= 6; 21 25 if (!size) 22 26 return 0; 23 27 asm volatile( ··· 34 30 " shlq $3,%%rdi\n" 35 31 " addq %%rdi,%%rdx" 36 32 :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) 37 - :"0" (0ULL), "1" ((size + 63) >> 6), "2" (addr), "3" (-1ULL), 38 - [addr] "r" (addr) : "memory"); 33 + :"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL), 34 + [addr] "S" (addr) : "memory"); 35 + /* 36 + * Any register would do for [addr] above, but GCC tends to 37 + * prefer rbx over rsi, even though rsi is readily available 38 + * and doesn't have to be saved. 39 + */ 39 40 return res; 41 + } 42 + 43 + /** 44 + * find_first_zero_bit - find the first zero bit in a memory region 45 + * @addr: The address to start the search at 46 + * @size: The maximum size to search 47 + * 48 + * Returns the bit-number of the first zero bit, not the number of the byte 49 + * containing a bit. 50 + */ 51 + long find_first_zero_bit(const unsigned long * addr, unsigned long size) 52 + { 53 + return __find_first_zero_bit (addr, size); 40 54 } 41 55 42 56 /** ··· 65 43 */ 66 44 long find_next_zero_bit (const unsigned long * addr, long size, long offset) 67 45 { 68 - unsigned long * p = ((unsigned long *) addr) + (offset >> 6); 46 + const unsigned long * p = addr + (offset >> 6); 69 47 unsigned long set = 0; 70 48 unsigned long res, bit = offset&63; 71 49 ··· 85 63 /* 86 64 * No zero yet, search remaining full words for a zero 87 65 */ 88 - res = find_first_zero_bit ((const unsigned long *)p, 89 - size - 64 * (p - (unsigned long *) addr)); 66 + res = __find_first_zero_bit (p, size - 64 * (p - addr)); 67 + 90 68 return (offset + set + res); 91 69 } 92 70 ··· 96 74 long d0, d1; 97 75 long res; 98 76 77 + /* 78 + * We must test the size in words, not in bits, because 79 + * otherwise incoming sizes in the range -63..-1 will not run 80 + * any scasq instructions, and then the flags used by the jz 81 + * instruction will have whatever random value was in place 82 + * before. Nobody should call us like that, but 83 + * find_next_bit() does when offset and size are at the same 84 + * word and it fails to find a one itself. 85 + */ 86 + size += 63; 87 + size >>= 6; 88 + if (!size) 89 + return 0; 99 90 asm volatile( 100 91 " repe; scasq\n" 101 92 " jz 1f\n" ··· 118 83 " shlq $3,%%rdi\n" 119 84 " addq %%rdi,%%rax" 120 85 :"=a" (res), "=&c" (d0), "=&D" (d1) 121 - :"0" (0ULL), 122 - "1" ((size + 63) >> 6), "2" (addr), 86 + :"0" (0ULL), "1" (size), "2" (addr), 123 87 [addr] "r" (addr) : "memory"); 124 88 return res; 125 89 }