Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cpumask: Introduce cpumask_weighted_or()

CID management OR's two cpumasks and then calculates the weight on the
result. That's inefficient as that has to walk the same stuff twice. As
this is done with runqueue lock held, there is a real benefit of speeding
this up. Depending on the system this results in 10-20% less cycles spent
with runqueue lock held for a 4K cpumask.

Provide cpumask_weighted_or() and the corresponding bitmap functions which
return the weight of the OR result right away.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://patch.msgid.link/20251119172549.448263340@linutronix.de

authored by

Thomas Gleixner and committed by
Peter Zijlstra
437cb3de 0d032a43

+37
+15
include/linux/bitmap.h
··· 45 45 * bitmap_copy(dst, src, nbits) *dst = *src 46 46 * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2 47 47 * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2 48 + * bitmap_weighted_or(dst, src1, src2, nbits) *dst = *src1 | *src2. Returns Hamming Weight of dst 48 49 * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2 49 50 * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2) 50 51 * bitmap_complement(dst, src, nbits) *dst = ~(*src) ··· 166 165 const unsigned long *bitmap2, unsigned int nbits); 167 166 void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, 168 167 const unsigned long *bitmap2, unsigned int nbits); 168 + unsigned int __bitmap_weighted_or(unsigned long *dst, const unsigned long *bitmap1, 169 + const unsigned long *bitmap2, unsigned int nbits); 169 170 void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, 170 171 const unsigned long *bitmap2, unsigned int nbits); 171 172 bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, ··· 338 335 *dst = *src1 | *src2; 339 336 else 340 337 __bitmap_or(dst, src1, src2, nbits); 338 + } 339 + 340 + static __always_inline 341 + unsigned int bitmap_weighted_or(unsigned long *dst, const unsigned long *src1, 342 + const unsigned long *src2, unsigned int nbits) 343 + { 344 + if (small_const_nbits(nbits)) { 345 + *dst = *src1 | *src2; 346 + return hweight_long(*dst & BITMAP_LAST_WORD_MASK(nbits)); 347 + } else { 348 + return __bitmap_weighted_or(dst, src1, src2, nbits); 349 + } 341 350 } 342 351 343 352 static __always_inline
+16
include/linux/cpumask.h
··· 729 729 } 730 730 731 731 /** 732 + * cpumask_weighted_or - *dstp = *src1p | *src2p and return the weight of the result 733 + * @dstp: the cpumask result 734 + * @src1p: the first input 735 + * @src2p: the second input 736 + * 737 + * Return: The number of bits set in the resulting cpumask @dstp 738 + */ 739 + static __always_inline 740 + unsigned int cpumask_weighted_or(struct cpumask *dstp, const struct cpumask *src1p, 741 + const struct cpumask *src2p) 742 + { 743 + return bitmap_weighted_or(cpumask_bits(dstp), cpumask_bits(src1p), 744 + cpumask_bits(src2p), small_cpumask_bits); 745 + } 746 + 747 + /** 732 748 * cpumask_xor - *dstp = *src1p ^ *src2p 733 749 * @dstp: the cpumask result 734 750 * @src1p: the first input
+6
lib/bitmap.c
··· 355 355 } 356 356 EXPORT_SYMBOL(__bitmap_weight_andnot); 357 357 358 + unsigned int __bitmap_weighted_or(unsigned long *dst, const unsigned long *bitmap1, 359 + const unsigned long *bitmap2, unsigned int bits) 360 + { 361 + return BITMAP_WEIGHT(({dst[idx] = bitmap1[idx] | bitmap2[idx]; dst[idx]; }), bits); 362 + } 363 + 358 364 void __bitmap_set(unsigned long *map, unsigned int start, int len) 359 365 { 360 366 unsigned long *p = map + BIT_WORD(start);