at v6.13 15 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _LINUX_MIN_HEAP_H 3#define _LINUX_MIN_HEAP_H 4 5#include <linux/bug.h> 6#include <linux/string.h> 7#include <linux/types.h> 8 9/** 10 * Data structure to hold a min-heap. 11 * @nr: Number of elements currently in the heap. 12 * @size: Maximum number of elements that can be held in current storage. 13 * @data: Pointer to the start of array holding the heap elements. 14 * @preallocated: Start of the static preallocated array holding the heap elements. 15 */ 16#define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \ 17struct _name { \ 18 int nr; \ 19 int size; \ 20 _type *data; \ 21 _type preallocated[_nr]; \ 22} 23 24#define DEFINE_MIN_HEAP(_type, _name) MIN_HEAP_PREALLOCATED(_type, _name, 0) 25 26typedef DEFINE_MIN_HEAP(char, min_heap_char) min_heap_char; 27 28#define __minheap_cast(_heap) (typeof((_heap)->data[0]) *) 29#define __minheap_obj_size(_heap) sizeof((_heap)->data[0]) 30 31/** 32 * struct min_heap_callbacks - Data/functions to customise the min_heap. 33 * @less: Partial order function for this heap. 34 * @swp: Swap elements function. 35 */ 36struct min_heap_callbacks { 37 bool (*less)(const void *lhs, const void *rhs, void *args); 38 void (*swp)(void *lhs, void *rhs, void *args); 39}; 40 41/** 42 * is_aligned - is this pointer & size okay for word-wide copying? 43 * @base: pointer to data 44 * @size: size of each element 45 * @align: required alignment (typically 4 or 8) 46 * 47 * Returns true if elements can be copied using word loads and stores. 48 * The size must be a multiple of the alignment, and the base address must 49 * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. 50 * 51 * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)" 52 * to "if ((a | b) & mask)", so we do that by hand. 53 */ 54__attribute_const__ __always_inline 55static bool is_aligned(const void *base, size_t size, unsigned char align) 56{ 57 unsigned char lsbits = (unsigned char)size; 58 59 (void)base; 60#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 61 lsbits |= (unsigned char)(uintptr_t)base; 62#endif 63 return (lsbits & (align - 1)) == 0; 64} 65 66/** 67 * swap_words_32 - swap two elements in 32-bit chunks 68 * @a: pointer to the first element to swap 69 * @b: pointer to the second element to swap 70 * @n: element size (must be a multiple of 4) 71 * 72 * Exchange the two objects in memory. This exploits base+index addressing, 73 * which basically all CPUs have, to minimize loop overhead computations. 74 * 75 * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the 76 * bottom of the loop, even though the zero flag is still valid from the 77 * subtract (since the intervening mov instructions don't alter the flags). 78 * Gcc 8.1.0 doesn't have that problem. 79 */ 80static __always_inline 81void swap_words_32(void *a, void *b, size_t n) 82{ 83 do { 84 u32 t = *(u32 *)(a + (n -= 4)); 85 *(u32 *)(a + n) = *(u32 *)(b + n); 86 *(u32 *)(b + n) = t; 87 } while (n); 88} 89 90/** 91 * swap_words_64 - swap two elements in 64-bit chunks 92 * @a: pointer to the first element to swap 93 * @b: pointer to the second element to swap 94 * @n: element size (must be a multiple of 8) 95 * 96 * Exchange the two objects in memory. This exploits base+index 97 * addressing, which basically all CPUs have, to minimize loop overhead 98 * computations. 99 * 100 * We'd like to use 64-bit loads if possible. If they're not, emulating 101 * one requires base+index+4 addressing which x86 has but most other 102 * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads, 103 * but it's possible to have 64-bit loads without 64-bit pointers (e.g. 104 * x32 ABI). Are there any cases the kernel needs to worry about? 105 */ 106static __always_inline 107void swap_words_64(void *a, void *b, size_t n) 108{ 109 do { 110#ifdef CONFIG_64BIT 111 u64 t = *(u64 *)(a + (n -= 8)); 112 *(u64 *)(a + n) = *(u64 *)(b + n); 113 *(u64 *)(b + n) = t; 114#else 115 /* Use two 32-bit transfers to avoid base+index+4 addressing */ 116 u32 t = *(u32 *)(a + (n -= 4)); 117 *(u32 *)(a + n) = *(u32 *)(b + n); 118 *(u32 *)(b + n) = t; 119 120 t = *(u32 *)(a + (n -= 4)); 121 *(u32 *)(a + n) = *(u32 *)(b + n); 122 *(u32 *)(b + n) = t; 123#endif 124 } while (n); 125} 126 127/** 128 * swap_bytes - swap two elements a byte at a time 129 * @a: pointer to the first element to swap 130 * @b: pointer to the second element to swap 131 * @n: element size 132 * 133 * This is the fallback if alignment doesn't allow using larger chunks. 134 */ 135static __always_inline 136void swap_bytes(void *a, void *b, size_t n) 137{ 138 do { 139 char t = ((char *)a)[--n]; 140 ((char *)a)[n] = ((char *)b)[n]; 141 ((char *)b)[n] = t; 142 } while (n); 143} 144 145/* 146 * The values are arbitrary as long as they can't be confused with 147 * a pointer, but small integers make for the smallest compare 148 * instructions. 149 */ 150#define SWAP_WORDS_64 ((void (*)(void *, void *, void *))0) 151#define SWAP_WORDS_32 ((void (*)(void *, void *, void *))1) 152#define SWAP_BYTES ((void (*)(void *, void *, void *))2) 153 154/* 155 * Selects the appropriate swap function based on the element size. 156 */ 157static __always_inline 158void *select_swap_func(const void *base, size_t size) 159{ 160 if (is_aligned(base, size, 8)) 161 return SWAP_WORDS_64; 162 else if (is_aligned(base, size, 4)) 163 return SWAP_WORDS_32; 164 else 165 return SWAP_BYTES; 166} 167 168static __always_inline 169void do_swap(void *a, void *b, size_t size, void (*swap_func)(void *lhs, void *rhs, void *args), 170 void *priv) 171{ 172 if (swap_func == SWAP_WORDS_64) 173 swap_words_64(a, b, size); 174 else if (swap_func == SWAP_WORDS_32) 175 swap_words_32(a, b, size); 176 else if (swap_func == SWAP_BYTES) 177 swap_bytes(a, b, size); 178 else 179 swap_func(a, b, priv); 180} 181 182/** 183 * parent - given the offset of the child, find the offset of the parent. 184 * @i: the offset of the heap element whose parent is sought. Non-zero. 185 * @lsbit: a precomputed 1-bit mask, equal to "size & -size" 186 * @size: size of each element 187 * 188 * In terms of array indexes, the parent of element j = @i/@size is simply 189 * (j-1)/2. But when working in byte offsets, we can't use implicit 190 * truncation of integer divides. 191 * 192 * Fortunately, we only need one bit of the quotient, not the full divide. 193 * @size has a least significant bit. That bit will be clear if @i is 194 * an even multiple of @size, and set if it's an odd multiple. 195 * 196 * Logically, we're doing "if (i & lsbit) i -= size;", but since the 197 * branch is unpredictable, it's done with a bit of clever branch-free 198 * code instead. 199 */ 200__attribute_const__ __always_inline 201static size_t parent(size_t i, unsigned int lsbit, size_t size) 202{ 203 i -= size; 204 i -= size & -(i & lsbit); 205 return i / 2; 206} 207 208/* Initialize a min-heap. */ 209static __always_inline 210void __min_heap_init_inline(min_heap_char *heap, void *data, int size) 211{ 212 heap->nr = 0; 213 heap->size = size; 214 if (data) 215 heap->data = data; 216 else 217 heap->data = heap->preallocated; 218} 219 220#define min_heap_init_inline(_heap, _data, _size) \ 221 __min_heap_init_inline((min_heap_char *)_heap, _data, _size) 222 223/* Get the minimum element from the heap. */ 224static __always_inline 225void *__min_heap_peek_inline(struct min_heap_char *heap) 226{ 227 return heap->nr ? heap->data : NULL; 228} 229 230#define min_heap_peek_inline(_heap) \ 231 (__minheap_cast(_heap) __min_heap_peek_inline((min_heap_char *)_heap)) 232 233/* Check if the heap is full. */ 234static __always_inline 235bool __min_heap_full_inline(min_heap_char *heap) 236{ 237 return heap->nr == heap->size; 238} 239 240#define min_heap_full_inline(_heap) \ 241 __min_heap_full_inline((min_heap_char *)_heap) 242 243/* Sift the element at pos down the heap. */ 244static __always_inline 245void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, 246 const struct min_heap_callbacks *func, void *args) 247{ 248 const unsigned long lsbit = elem_size & -elem_size; 249 void *data = heap->data; 250 void (*swp)(void *lhs, void *rhs, void *args) = func->swp; 251 /* pre-scale counters for performance */ 252 size_t a = pos * elem_size; 253 size_t b, c, d; 254 size_t n = heap->nr * elem_size; 255 256 if (!swp) 257 swp = select_swap_func(data, elem_size); 258 259 /* Find the sift-down path all the way to the leaves. */ 260 for (b = a; c = 2 * b + elem_size, (d = c + elem_size) < n;) 261 b = func->less(data + c, data + d, args) ? c : d; 262 263 /* Special case for the last leaf with no sibling. */ 264 if (d == n) 265 b = c; 266 267 /* Backtrack to the correct location. */ 268 while (b != a && func->less(data + a, data + b, args)) 269 b = parent(b, lsbit, elem_size); 270 271 /* Shift the element into its correct place. */ 272 c = b; 273 while (b != a) { 274 b = parent(b, lsbit, elem_size); 275 do_swap(data + b, data + c, elem_size, swp, args); 276 } 277} 278 279#define min_heap_sift_down_inline(_heap, _pos, _func, _args) \ 280 __min_heap_sift_down_inline((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), \ 281 _func, _args) 282 283/* Sift up ith element from the heap, O(log2(nr)). */ 284static __always_inline 285void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx, 286 const struct min_heap_callbacks *func, void *args) 287{ 288 const unsigned long lsbit = elem_size & -elem_size; 289 void *data = heap->data; 290 void (*swp)(void *lhs, void *rhs, void *args) = func->swp; 291 /* pre-scale counters for performance */ 292 size_t a = idx * elem_size, b; 293 294 if (!swp) 295 swp = select_swap_func(data, elem_size); 296 297 while (a) { 298 b = parent(a, lsbit, elem_size); 299 if (func->less(data + b, data + a, args)) 300 break; 301 do_swap(data + a, data + b, elem_size, swp, args); 302 a = b; 303 } 304} 305 306#define min_heap_sift_up_inline(_heap, _idx, _func, _args) \ 307 __min_heap_sift_up_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \ 308 _func, _args) 309 310/* Floyd's approach to heapification that is O(nr). */ 311static __always_inline 312void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size, 313 const struct min_heap_callbacks *func, void *args) 314{ 315 int i; 316 317 for (i = heap->nr / 2 - 1; i >= 0; i--) 318 __min_heap_sift_down_inline(heap, i, elem_size, func, args); 319} 320 321#define min_heapify_all_inline(_heap, _func, _args) \ 322 __min_heapify_all_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) 323 324/* Remove minimum element from the heap, O(log2(nr)). */ 325static __always_inline 326bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size, 327 const struct min_heap_callbacks *func, void *args) 328{ 329 void *data = heap->data; 330 331 if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap")) 332 return false; 333 334 /* Place last element at the root (position 0) and then sift down. */ 335 heap->nr--; 336 memcpy(data, data + (heap->nr * elem_size), elem_size); 337 __min_heap_sift_down_inline(heap, 0, elem_size, func, args); 338 339 return true; 340} 341 342#define min_heap_pop_inline(_heap, _func, _args) \ 343 __min_heap_pop_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) 344 345/* 346 * Remove the minimum element and then push the given element. The 347 * implementation performs 1 sift (O(log2(nr))) and is therefore more 348 * efficient than a pop followed by a push that does 2. 349 */ 350static __always_inline 351void __min_heap_pop_push_inline(min_heap_char *heap, const void *element, size_t elem_size, 352 const struct min_heap_callbacks *func, void *args) 353{ 354 memcpy(heap->data, element, elem_size); 355 __min_heap_sift_down_inline(heap, 0, elem_size, func, args); 356} 357 358#define min_heap_pop_push_inline(_heap, _element, _func, _args) \ 359 __min_heap_pop_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ 360 _func, _args) 361 362/* Push an element on to the heap, O(log2(nr)). */ 363static __always_inline 364bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t elem_size, 365 const struct min_heap_callbacks *func, void *args) 366{ 367 void *data = heap->data; 368 int pos; 369 370 if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap")) 371 return false; 372 373 /* Place at the end of data. */ 374 pos = heap->nr; 375 memcpy(data + (pos * elem_size), element, elem_size); 376 heap->nr++; 377 378 /* Sift child at pos up. */ 379 __min_heap_sift_up_inline(heap, elem_size, pos, func, args); 380 381 return true; 382} 383 384#define min_heap_push_inline(_heap, _element, _func, _args) \ 385 __min_heap_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ 386 _func, _args) 387 388/* Remove ith element from the heap, O(log2(nr)). */ 389static __always_inline 390bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx, 391 const struct min_heap_callbacks *func, void *args) 392{ 393 void *data = heap->data; 394 void (*swp)(void *lhs, void *rhs, void *args) = func->swp; 395 396 if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap")) 397 return false; 398 399 if (!swp) 400 swp = select_swap_func(data, elem_size); 401 402 /* Place last element at the root (position 0) and then sift down. */ 403 heap->nr--; 404 if (idx == heap->nr) 405 return true; 406 do_swap(data + (idx * elem_size), data + (heap->nr * elem_size), elem_size, swp, args); 407 __min_heap_sift_up_inline(heap, elem_size, idx, func, args); 408 __min_heap_sift_down_inline(heap, idx, elem_size, func, args); 409 410 return true; 411} 412 413#define min_heap_del_inline(_heap, _idx, _func, _args) \ 414 __min_heap_del_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \ 415 _func, _args) 416 417void __min_heap_init(min_heap_char *heap, void *data, int size); 418void *__min_heap_peek(struct min_heap_char *heap); 419bool __min_heap_full(min_heap_char *heap); 420void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, 421 const struct min_heap_callbacks *func, void *args); 422void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, 423 const struct min_heap_callbacks *func, void *args); 424void __min_heapify_all(min_heap_char *heap, size_t elem_size, 425 const struct min_heap_callbacks *func, void *args); 426bool __min_heap_pop(min_heap_char *heap, size_t elem_size, 427 const struct min_heap_callbacks *func, void *args); 428void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size, 429 const struct min_heap_callbacks *func, void *args); 430bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, 431 const struct min_heap_callbacks *func, void *args); 432bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, 433 const struct min_heap_callbacks *func, void *args); 434 435#define min_heap_init(_heap, _data, _size) \ 436 __min_heap_init((min_heap_char *)_heap, _data, _size) 437#define min_heap_peek(_heap) \ 438 (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) 439#define min_heap_full(_heap) \ 440 __min_heap_full((min_heap_char *)_heap) 441#define min_heap_sift_down(_heap, _pos, _func, _args) \ 442 __min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) 443#define min_heap_sift_up(_heap, _idx, _func, _args) \ 444 __min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) 445#define min_heapify_all(_heap, _func, _args) \ 446 __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) 447#define min_heap_pop(_heap, _func, _args) \ 448 __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) 449#define min_heap_pop_push(_heap, _element, _func, _args) \ 450 __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ 451 _func, _args) 452#define min_heap_push(_heap, _element, _func, _args) \ 453 __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) 454#define min_heap_del(_heap, _idx, _func, _args) \ 455 __min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) 456 457#endif /* _LINUX_MIN_HEAP_H */