Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

lib: add find_nth{,_and,_andnot}_bit()

Kernel lacks for a function that searches for Nth bit in a bitmap.
Usually people do it like this:
for_each_set_bit(bit, mask, size)
if (n-- == 0)
return bit;

We can do it more efficiently, if we:
1. find a word containing Nth bit, using hweight(); and
2. find the bit, using a helper fns(), that works similarly to
__ffs() and ffz().

fns() is implemented as a simple loop. For x86_64, there's PDEP instruction
to do that: ret = clz(pdep(1 << idx, num)). However, for large bitmaps the
most of improvement comes from using hweight(), so I kept fns() simple.

New find_nth_bit() is ~70 times faster on x86_64/kvm in find_bit benchmark:
find_nth_bit: 7154190 ns, 16411 iterations
for_each_bit: 505493126 ns, 16315 iterations

With all that, a family of 3 new functions is added, and used where
appropriate in the following patches.

Signed-off-by: Yury Norov <yury.norov@gmail.com>

+149
+19
include/linux/bitops.h
··· 248 248 } 249 249 250 250 /** 251 + * fns - find N'th set bit in a word 252 + * @word: The word to search 253 + * @n: Bit to find 254 + */ 255 + static inline unsigned long fns(unsigned long word, unsigned int n) 256 + { 257 + unsigned int bit; 258 + 259 + while (word) { 260 + bit = __ffs(word); 261 + if (n-- == 0) 262 + return bit; 263 + __clear_bit(bit, &word); 264 + } 265 + 266 + return BITS_PER_LONG; 267 + } 268 + 269 + /** 251 270 * assign_bit - Assign value to a bit in memory 252 271 * @nr: the bit to set 253 272 * @addr: the address to start counting from
+86
include/linux/find.h
··· 15 15 unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, 16 16 unsigned long start); 17 17 extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); 18 + unsigned long __find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n); 19 + unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2, 20 + unsigned long size, unsigned long n); 21 + unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, 22 + unsigned long size, unsigned long n); 18 23 extern unsigned long _find_first_and_bit(const unsigned long *addr1, 19 24 const unsigned long *addr2, unsigned long size); 20 25 extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); ··· 140 135 return _find_first_bit(addr, size); 141 136 } 142 137 #endif 138 + 139 + /** 140 + * find_nth_bit - find N'th set bit in a memory region 141 + * @addr: The address to start the search at 142 + * @size: The maximum number of bits to search 143 + * @n: The number of set bit, which position is needed, counting from 0 144 + * 145 + * The following is semantically equivalent: 146 + * idx = find_nth_bit(addr, size, 0); 147 + * idx = find_first_bit(addr, size); 148 + * 149 + * Returns the bit number of the N'th set bit. 150 + * If no such, returns @size. 151 + */ 152 + static inline 153 + unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n) 154 + { 155 + if (n >= size) 156 + return size; 157 + 158 + if (small_const_nbits(size)) { 159 + unsigned long val = *addr & GENMASK(size - 1, 0); 160 + 161 + return val ? fns(val, n) : size; 162 + } 163 + 164 + return __find_nth_bit(addr, size, n); 165 + } 166 + 167 + /** 168 + * find_nth_and_bit - find N'th set bit in 2 memory regions 169 + * @addr1: The 1st address to start the search at 170 + * @addr2: The 2nd address to start the search at 171 + * @size: The maximum number of bits to search 172 + * @n: The number of set bit, which position is needed, counting from 0 173 + * 174 + * Returns the bit number of the N'th set bit. 175 + * If no such, returns @size. 176 + */ 177 + static inline 178 + unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2, 179 + unsigned long size, unsigned long n) 180 + { 181 + if (n >= size) 182 + return size; 183 + 184 + if (small_const_nbits(size)) { 185 + unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0); 186 + 187 + return val ? fns(val, n) : size; 188 + } 189 + 190 + return __find_nth_and_bit(addr1, addr2, size, n); 191 + } 192 + 193 + /** 194 + * find_nth_andnot_bit - find N'th set bit in 2 memory regions, 195 + * flipping bits in 2nd region 196 + * @addr1: The 1st address to start the search at 197 + * @addr2: The 2nd address to start the search at 198 + * @size: The maximum number of bits to search 199 + * @n: The number of set bit, which position is needed, counting from 0 200 + * 201 + * Returns the bit number of the N'th set bit. 202 + * If no such, returns @size. 203 + */ 204 + static inline 205 + unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, 206 + unsigned long size, unsigned long n) 207 + { 208 + if (n >= size) 209 + return size; 210 + 211 + if (small_const_nbits(size)) { 212 + unsigned long val = *addr1 & (~*addr2) & GENMASK(size - 1, 0); 213 + 214 + return val ? fns(val, n) : size; 215 + } 216 + 217 + return __find_nth_andnot_bit(addr1, addr2, size, n); 218 + } 143 219 144 220 #ifndef find_first_and_bit 145 221 /**
+44
lib/find_bit.c
··· 68 68 sz; \ 69 69 }) 70 70 71 + #define FIND_NTH_BIT(FETCH, size, num) \ 72 + ({ \ 73 + unsigned long sz = (size), nr = (num), idx, w, tmp; \ 74 + \ 75 + for (idx = 0; (idx + 1) * BITS_PER_LONG <= sz; idx++) { \ 76 + if (idx * BITS_PER_LONG + nr >= sz) \ 77 + goto out; \ 78 + \ 79 + tmp = (FETCH); \ 80 + w = hweight_long(tmp); \ 81 + if (w > nr) \ 82 + goto found; \ 83 + \ 84 + nr -= w; \ 85 + } \ 86 + \ 87 + if (sz % BITS_PER_LONG) \ 88 + tmp = (FETCH) & BITMAP_LAST_WORD_MASK(sz); \ 89 + found: \ 90 + sz = min(idx * BITS_PER_LONG + fns(tmp, nr), sz); \ 91 + out: \ 92 + sz; \ 93 + }) 94 + 71 95 #ifndef find_first_bit 72 96 /* 73 97 * Find the first set bit in a memory region. ··· 134 110 } 135 111 EXPORT_SYMBOL(_find_next_bit); 136 112 #endif 113 + 114 + unsigned long __find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n) 115 + { 116 + return FIND_NTH_BIT(addr[idx], size, n); 117 + } 118 + EXPORT_SYMBOL(__find_nth_bit); 119 + 120 + unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2, 121 + unsigned long size, unsigned long n) 122 + { 123 + return FIND_NTH_BIT(addr1[idx] & addr2[idx], size, n); 124 + } 125 + EXPORT_SYMBOL(__find_nth_and_bit); 126 + 127 + unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, 128 + unsigned long size, unsigned long n) 129 + { 130 + return FIND_NTH_BIT(addr1[idx] & ~addr2[idx], size, n); 131 + } 132 + EXPORT_SYMBOL(__find_nth_andnot_bit); 137 133 138 134 #ifndef find_next_and_bit 139 135 unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2,