Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bitmap: introduce generic optimized bitmap_size()

The number of times yet another open coded
`BITS_TO_LONGS(nbits) * sizeof(long)` can be spotted is huge.
Some generic helper is long overdue.

Add one, bitmap_size(), but with one detail.
BITS_TO_LONGS() uses DIV_ROUND_UP(). The latter works well when both
divident and divisor are compile-time constants or when the divisor
is not a pow-of-2. When it is however, the compilers sometimes tend
to generate suboptimal code (GCC 13):

48 83 c0 3f add $0x3f,%rax
48 c1 e8 06 shr $0x6,%rax
48 8d 14 c5 00 00 00 00 lea 0x0(,%rax,8),%rdx

%BITS_PER_LONG is always a pow-2 (either 32 or 64), but GCC still does
full division of `nbits + 63` by it and then multiplication by 8.
Instead of BITS_TO_LONGS(), use ALIGN() and then divide by 8. GCC:

8d 50 3f lea 0x3f(%rax),%edx
c1 ea 03 shr $0x3,%edx
81 e2 f8 ff ff 1f and $0x1ffffff8,%edx

Now it shifts `nbits + 63` by 3 positions (IOW performs fast division
by 8) and then masks bits[2:0]. bloat-o-meter:

add/remove: 0/0 grow/shrink: 20/133 up/down: 156/-773 (-617)

Clang does it better and generates the same code before/after starting
from -O1, except that with the ALIGN() approach it uses %edx and thus
still saves some bytes:

add/remove: 0/0 grow/shrink: 9/133 up/down: 18/-538 (-520)

Note that we can't expand DIV_ROUND_UP() by adding a check and using
this approach there, as it's used in array declarations where
expressions are not allowed.
Add this helper to tools/ as well.

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Acked-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Alexander Lobakin and committed by
David S. Miller
a37fbe66 10a04ff0

+11 -15
-5
drivers/md/dm-clone-metadata.c
··· 465 465 466 466 /*---------------------------------------------------------------------------*/ 467 467 468 - static size_t bitmap_size(unsigned long nr_bits) 469 - { 470 - return BITS_TO_LONGS(nr_bits) * sizeof(long); 471 - } 472 - 473 468 static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words, 474 469 unsigned long nr_regions) 475 470 {
+1 -1
drivers/s390/cio/idset.c
··· 18 18 19 19 static inline unsigned long idset_bitmap_size(int num_ssid, int num_id) 20 20 { 21 - return BITS_TO_LONGS(num_ssid * num_id) * sizeof(unsigned long); 21 + return bitmap_size(size_mul(num_ssid, num_id)); 22 22 } 23 23 24 24 static struct idset *idset_new(int num_ssid, int num_id)
+5 -3
include/linux/bitmap.h
··· 226 226 #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) 227 227 #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) 228 228 229 + #define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE) 230 + 229 231 static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) 230 232 { 231 - unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); 233 + unsigned int len = bitmap_size(nbits); 232 234 233 235 if (small_const_nbits(nbits)) 234 236 *dst = 0; ··· 240 238 241 239 static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) 242 240 { 243 - unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); 241 + unsigned int len = bitmap_size(nbits); 244 242 245 243 if (small_const_nbits(nbits)) 246 244 *dst = ~0UL; ··· 251 249 static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, 252 250 unsigned int nbits) 253 251 { 254 - unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); 252 + unsigned int len = bitmap_size(nbits); 255 253 256 254 if (small_const_nbits(nbits)) 257 255 *dst = *src;
+1 -1
include/linux/cpumask.h
··· 853 853 */ 854 854 static inline unsigned int cpumask_size(void) 855 855 { 856 - return BITS_TO_LONGS(large_cpumask_bits) * sizeof(long); 856 + return bitmap_size(large_cpumask_bits); 857 857 } 858 858 859 859 /*
-2
lib/math/prime_numbers.c
··· 6 6 #include <linux/prime_numbers.h> 7 7 #include <linux/slab.h> 8 8 9 - #define bitmap_size(nbits) (BITS_TO_LONGS(nbits) * sizeof(unsigned long)) 10 - 11 9 struct primes { 12 10 struct rcu_head rcu; 13 11 unsigned long last, sz;
+4 -3
tools/include/linux/bitmap.h
··· 26 26 #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) 27 27 #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) 28 28 29 + #define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE) 30 + 29 31 static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) 30 32 { 31 33 if (small_const_nbits(nbits)) 32 34 *dst = 0UL; 33 35 else { 34 - int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); 35 - memset(dst, 0, len); 36 + memset(dst, 0, bitmap_size(nbits)); 36 37 } 37 38 } 38 39 ··· 85 84 */ 86 85 static inline unsigned long *bitmap_zalloc(int nbits) 87 86 { 88 - return calloc(1, BITS_TO_LONGS(nbits) * sizeof(unsigned long)); 87 + return calloc(1, bitmap_size(nbits)); 89 88 } 90 89 91 90 /*