dm vdo: add basic hash map data structures

+718

drivers/md/dm-vdo/int-map.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright 2023 Red Hat 4 + */ 5 + 6 + /** 7 + * DOC: 8 + * 9 + * Hash table implementation of a map from integers to pointers, implemented using the Hopscotch 10 + * Hashing algorithm by Herlihy, Shavit, and Tzafrir (see 11 + * http://en.wikipedia.org/wiki/Hopscotch_hashing). This implementation does not contain any of the 12 + * locking/concurrency features of the algorithm, just the collision resolution scheme. 13 + * 14 + * Hopscotch Hashing is based on hashing with open addressing and linear probing. All the entries 15 + * are stored in a fixed array of buckets, with no dynamic allocation for collisions. Unlike linear 16 + * probing, all the entries that hash to a given bucket are stored within a fixed neighborhood 17 + * starting at that bucket. Chaining is effectively represented as a bit vector relative to each 18 + * bucket instead of as pointers or explicit offsets. 19 + * 20 + * When an empty bucket cannot be found within a given neighborhood, subsequent neighborhoods are 21 + * searched, and one or more entries will "hop" into those neighborhoods. When this process works, 22 + * an empty bucket will move into the desired neighborhood, allowing the entry to be added. When 23 + * that process fails (typically when the buckets are around 90% full), the table must be resized 24 + * and the all entries rehashed and added to the expanded table. 25 + * 26 + * Unlike linear probing, the number of buckets that must be searched in the worst case has a fixed 27 + * upper bound (the size of the neighborhood). Those entries occupy a small number of memory cache 28 + * lines, leading to improved use of the cache (fewer misses on both successful and unsuccessful 29 + * searches). Hopscotch hashing outperforms linear probing at much higher load factors, so even 30 + * with the increased memory burden for maintaining the hop vectors, less memory is needed to 31 + * achieve that performance. Hopscotch is also immune to "contamination" from deleting entries 32 + * since entries are genuinely removed instead of being replaced by a placeholder. 33 + * 34 + * The published description of the algorithm used a bit vector, but the paper alludes to an offset 35 + * scheme which is used by this implementation. Since the entries in the neighborhood are within N 36 + * entries of the hash bucket at the start of the neighborhood, a pair of small offset fields each 37 + * log2(N) bits wide is all that's needed to maintain the hops as a linked list. In order to encode 38 + * "no next hop" (i.e. NULL) as the natural initial value of zero, the offsets are biased by one 39 + * (i.e. 0 => NULL, 1 => offset=0, 2 => offset=1, etc.) We can represent neighborhoods of up to 255 40 + * entries with just 8+8=16 bits per entry. The hop list is sorted by hop offset so the first entry 41 + * in the list is always the bucket closest to the start of the neighborhood. 42 + * 43 + * While individual accesses tend to be very fast, the table resize operations are very, very 44 + * expensive. If an upper bound on the latency of adding an entry to the table is needed, we either 45 + * need to ensure the table is pre-sized to be large enough so no resize is ever needed, or we'll 46 + * need to develop an approach to incrementally resize the table. 47 + */ 48 + 49 + #include "int-map.h" 50 + 51 + #include <linux/minmax.h> 52 + 53 + #include "errors.h" 54 + #include "logger.h" 55 + #include "memory-alloc.h" 56 + #include "numeric.h" 57 + #include "permassert.h" 58 + 59 + enum { 60 + DEFAULT_CAPACITY = 16, /* the number of neighborhoods in a new table */ 61 + NEIGHBORHOOD = 255, /* the number of buckets in each neighborhood */ 62 + MAX_PROBES = 1024, /* limit on the number of probes for a free bucket */ 63 + NULL_HOP_OFFSET = 0, /* the hop offset value terminating the hop list */ 64 + DEFAULT_LOAD = 75 /* a compromise between memory use and performance */ 65 + }; 66 + 67 + /** 68 + * struct bucket - hash bucket 69 + * 70 + * Buckets are packed together to reduce memory usage and improve cache efficiency. It would be 71 + * tempting to encode the hop offsets separately and maintain alignment of key/value pairs, but 72 + * it's crucial to keep the hop fields near the buckets that they use them so they'll tend to share 73 + * cache lines. 74 + */ 75 + struct __packed bucket { 76 + /** 77 + * @first_hop: The biased offset of the first entry in the hop list of the neighborhood 78 + * that hashes to this bucket. 79 + */ 80 + u8 first_hop; 81 + /** @next_hop: The biased offset of the next bucket in the hop list. */ 82 + u8 next_hop; 83 + /** @key: The key stored in this bucket. */ 84 + u64 key; 85 + /** @value: The value stored in this bucket (NULL if empty). */ 86 + void *value; 87 + }; 88 + 89 + /** 90 + * struct int_map - The concrete definition of the opaque int_map type. 91 + * 92 + * To avoid having to wrap the neighborhoods of the last entries back around to the start of the 93 + * bucket array, we allocate a few more buckets at the end of the array instead, which is why 94 + * capacity and bucket_count are different. 95 + */ 96 + struct int_map { 97 + /** @size: The number of entries stored in the map. */ 98 + size_t size; 99 + /** @capacity: The number of neighborhoods in the map. */ 100 + size_t capacity; 101 + /* @bucket_count: The number of buckets in the bucket array. */ 102 + size_t bucket_count; 103 + /** @buckets: The array of hash buckets. */ 104 + struct bucket *buckets; 105 + }; 106 + 107 + /** 108 + * mix() - The Google CityHash 16-byte hash mixing function. 109 + * @input1: The first input value. 110 + * @input2: The second input value. 111 + * 112 + * Return: A hash of the two inputs. 113 + */ 114 + static u64 mix(u64 input1, u64 input2) 115 + { 116 + static const u64 CITY_MULTIPLIER = 0x9ddfea08eb382d69ULL; 117 + u64 hash = (input1 ^ input2); 118 + 119 + hash *= CITY_MULTIPLIER; 120 + hash ^= (hash >> 47); 121 + hash ^= input2; 122 + hash *= CITY_MULTIPLIER; 123 + hash ^= (hash >> 47); 124 + hash *= CITY_MULTIPLIER; 125 + return hash; 126 + } 127 + 128 + /** 129 + * hash_key() - Calculate a 64-bit non-cryptographic hash value for the provided 64-bit integer 130 + * key. 131 + * @key: The mapping key. 132 + * 133 + * The implementation is based on Google's CityHash, only handling the specific case of an 8-byte 134 + * input. 135 + * 136 + * Return: The hash of the mapping key. 137 + */ 138 + static u64 hash_key(u64 key) 139 + { 140 + /* 141 + * Aliasing restrictions forbid us from casting pointer types, so use a union to convert a 142 + * single u64 to two u32 values. 143 + */ 144 + union { 145 + u64 u64; 146 + u32 u32[2]; 147 + } pun = {.u64 = key}; 148 + 149 + return mix(sizeof(key) + (((u64) pun.u32[0]) << 3), pun.u32[1]); 150 + } 151 + 152 + /** 153 + * allocate_buckets() - Initialize an int_map. 154 + * @map: The map to initialize. 155 + * @capacity: The initial capacity of the map. 156 + * 157 + * Return: UDS_SUCCESS or an error code. 158 + */ 159 + static int allocate_buckets(struct int_map *map, size_t capacity) 160 + { 161 + map->size = 0; 162 + map->capacity = capacity; 163 + 164 + /* 165 + * Allocate NEIGHBORHOOD - 1 extra buckets so the last bucket can have a full neighborhood 166 + * without have to wrap back around to element zero. 167 + */ 168 + map->bucket_count = capacity + (NEIGHBORHOOD - 1); 169 + return uds_allocate(map->bucket_count, struct bucket, 170 + "struct int_map buckets", &map->buckets); 171 + } 172 + 173 + /** 174 + * vdo_make_int_map() - Allocate and initialize an int_map. 175 + * @initial_capacity: The number of entries the map should initially be capable of holding (zero 176 + * tells the map to use its own small default). 177 + * @initial_load: The load factor of the map, expressed as an integer percentage (typically in the 178 + * range 50 to 90, with zero telling the map to use its own default). 179 + * @map_ptr: Output, a pointer to hold the new int_map. 180 + * 181 + * Return: UDS_SUCCESS or an error code. 182 + */ 183 + int vdo_make_int_map(size_t initial_capacity, unsigned int initial_load, struct int_map **map_ptr) 184 + { 185 + struct int_map *map; 186 + int result; 187 + size_t capacity; 188 + 189 + /* Use the default initial load if the caller did not specify one. */ 190 + if (initial_load == 0) 191 + initial_load = DEFAULT_LOAD; 192 + if (initial_load > 100) 193 + return UDS_INVALID_ARGUMENT; 194 + 195 + result = uds_allocate(1, struct int_map, "struct int_map", &map); 196 + if (result != UDS_SUCCESS) 197 + return result; 198 + 199 + /* Use the default capacity if the caller did not specify one. */ 200 + capacity = (initial_capacity > 0) ? initial_capacity : DEFAULT_CAPACITY; 201 + 202 + /* 203 + * Scale up the capacity by the specified initial load factor. (i.e to hold 1000 entries at 204 + * 80% load we need a capacity of 1250) 205 + */ 206 + capacity = capacity * 100 / initial_load; 207 + 208 + result = allocate_buckets(map, capacity); 209 + if (result != UDS_SUCCESS) { 210 + vdo_free_int_map(uds_forget(map)); 211 + return result; 212 + } 213 + 214 + *map_ptr = map; 215 + return UDS_SUCCESS; 216 + } 217 + 218 + /** 219 + * vdo_free_int_map() - Free an int_map. 220 + * @map: The int_map to free. 221 + * 222 + * NOTE: The map does not own the pointer values stored in the map and they are not freed by this 223 + * call. 224 + */ 225 + void vdo_free_int_map(struct int_map *map) 226 + { 227 + if (map == NULL) 228 + return; 229 + 230 + uds_free(uds_forget(map->buckets)); 231 + uds_free(uds_forget(map)); 232 + } 233 + 234 + /** 235 + * vdo_int_map_size() - Get the number of entries stored in an int_map. 236 + * @map: The int_map to query. 237 + * 238 + * Return: The number of entries in the map. 239 + */ 240 + size_t vdo_int_map_size(const struct int_map *map) 241 + { 242 + return map->size; 243 + } 244 + 245 + /** 246 + * dereference_hop() - Convert a biased hop offset within a neighborhood to a pointer to the bucket 247 + * it references. 248 + * @neighborhood: The first bucket in the neighborhood. 249 + * @hop_offset: The biased hop offset to the desired bucket. 250 + * 251 + * Return: NULL if hop_offset is zero, otherwise a pointer to the bucket in the neighborhood at 252 + * hop_offset - 1. 253 + */ 254 + static struct bucket *dereference_hop(struct bucket *neighborhood, unsigned int hop_offset) 255 + { 256 + BUILD_BUG_ON(NULL_HOP_OFFSET != 0); 257 + if (hop_offset == NULL_HOP_OFFSET) 258 + return NULL; 259 + 260 + return &neighborhood[hop_offset - 1]; 261 + } 262 + 263 + /** 264 + * insert_in_hop_list() - Add a bucket into the hop list for the neighborhood. 265 + * @neighborhood: The first bucket in the neighborhood. 266 + * @new_bucket: The bucket to add to the hop list. 267 + * 268 + * The bucket is inserted it into the list so the hop list remains sorted by hop offset. 269 + */ 270 + static void insert_in_hop_list(struct bucket *neighborhood, struct bucket *new_bucket) 271 + { 272 + /* Zero indicates a NULL hop offset, so bias the hop offset by one. */ 273 + int hop_offset = 1 + (new_bucket - neighborhood); 274 + 275 + /* Handle the special case of adding a bucket at the start of the list. */ 276 + int next_hop = neighborhood->first_hop; 277 + 278 + if ((next_hop == NULL_HOP_OFFSET) || (next_hop > hop_offset)) { 279 + new_bucket->next_hop = next_hop; 280 + neighborhood->first_hop = hop_offset; 281 + return; 282 + } 283 + 284 + /* Search the hop list for the insertion point that maintains the sort order. */ 285 + for (;;) { 286 + struct bucket *bucket = dereference_hop(neighborhood, next_hop); 287 + 288 + next_hop = bucket->next_hop; 289 + 290 + if ((next_hop == NULL_HOP_OFFSET) || (next_hop > hop_offset)) { 291 + new_bucket->next_hop = next_hop; 292 + bucket->next_hop = hop_offset; 293 + return; 294 + } 295 + } 296 + } 297 + 298 + /** 299 + * select_bucket() - Select and return the hash bucket for a given search key. 300 + * @map: The map to search. 301 + * @key: The mapping key. 302 + */ 303 + static struct bucket *select_bucket(const struct int_map *map, u64 key) 304 + { 305 + /* 306 + * Calculate a good hash value for the provided key. We want exactly 32 bits, so mask the 307 + * result. 308 + */ 309 + u64 hash = hash_key(key) & 0xFFFFFFFF; 310 + 311 + /* 312 + * Scale the 32-bit hash to a bucket index by treating it as a binary fraction and 313 + * multiplying that by the capacity. If the hash is uniformly distributed over [0 .. 314 + * 2^32-1], then (hash * capacity / 2^32) should be uniformly distributed over [0 .. 315 + * capacity-1]. The multiply and shift is much faster than a divide (modulus) on X86 CPUs. 316 + */ 317 + return &map->buckets[(hash * map->capacity) >> 32]; 318 + } 319 + 320 + /** 321 + * search_hop_list() - Search the hop list associated with given hash bucket for a given search 322 + * key. 323 + * @map: The map being searched. 324 + * @bucket: The map bucket to search for the key. 325 + * @key: The mapping key. 326 + * @previous_ptr: Output. if not NULL, a pointer in which to store the bucket in the list preceding 327 + * the one that had the matching key 328 + * 329 + * If the key is found, returns a pointer to the entry (bucket or collision), otherwise returns 330 + * NULL. 331 + * 332 + * Return: An entry that matches the key, or NULL if not found. 333 + */ 334 + static struct bucket *search_hop_list(struct int_map *map __always_unused, 335 + struct bucket *bucket, 336 + u64 key, 337 + struct bucket **previous_ptr) 338 + { 339 + struct bucket *previous = NULL; 340 + unsigned int next_hop = bucket->first_hop; 341 + 342 + while (next_hop != NULL_HOP_OFFSET) { 343 + /* 344 + * Check the neighboring bucket indexed by the offset for the 345 + * desired key. 346 + */ 347 + struct bucket *entry = dereference_hop(bucket, next_hop); 348 + 349 + if ((key == entry->key) && (entry->value != NULL)) { 350 + if (previous_ptr != NULL) 351 + *previous_ptr = previous; 352 + return entry; 353 + } 354 + next_hop = entry->next_hop; 355 + previous = entry; 356 + } 357 + 358 + return NULL; 359 + } 360 + 361 + /** 362 + * vdo_int_map_get() - Retrieve the value associated with a given key from the int_map. 363 + * @map: The int_map to query. 364 + * @key: The key to look up. 365 + * 366 + * Return: The value associated with the given key, or NULL if the key is not mapped to any value. 367 + */ 368 + void *vdo_int_map_get(struct int_map *map, u64 key) 369 + { 370 + struct bucket *match = search_hop_list(map, select_bucket(map, key), key, NULL); 371 + 372 + return ((match != NULL) ? match->value : NULL); 373 + } 374 + 375 + /** 376 + * resize_buckets() - Increase the number of hash buckets. 377 + * @map: The map to resize. 378 + * 379 + * Resizes and rehashes all the existing entries, storing them in the new buckets. 380 + * 381 + * Return: UDS_SUCCESS or an error code. 382 + */ 383 + static int resize_buckets(struct int_map *map) 384 + { 385 + int result; 386 + size_t i; 387 + 388 + /* Copy the top-level map data to the stack. */ 389 + struct int_map old_map = *map; 390 + 391 + /* Re-initialize the map to be empty and 50% larger. */ 392 + size_t new_capacity = map->capacity / 2 * 3; 393 + 394 + uds_log_info("%s: attempting resize from %zu to %zu, current size=%zu", 395 + __func__, map->capacity, new_capacity, map->size); 396 + result = allocate_buckets(map, new_capacity); 397 + if (result != UDS_SUCCESS) { 398 + *map = old_map; 399 + return result; 400 + } 401 + 402 + /* Populate the new hash table from the entries in the old bucket array. */ 403 + for (i = 0; i < old_map.bucket_count; i++) { 404 + struct bucket *entry = &old_map.buckets[i]; 405 + 406 + if (entry->value == NULL) 407 + continue; 408 + 409 + result = vdo_int_map_put(map, entry->key, entry->value, true, NULL); 410 + if (result != UDS_SUCCESS) { 411 + /* Destroy the new partial map and restore the map from the stack. */ 412 + uds_free(uds_forget(map->buckets)); 413 + *map = old_map; 414 + return result; 415 + } 416 + } 417 + 418 + /* Destroy the old bucket array. */ 419 + uds_free(uds_forget(old_map.buckets)); 420 + return UDS_SUCCESS; 421 + } 422 + 423 + /** 424 + * find_empty_bucket() - Probe the bucket array starting at the given bucket for the next empty 425 + * bucket, returning a pointer to it. 426 + * @map: The map containing the buckets to search. 427 + * @bucket: The bucket at which to start probing. 428 + * @max_probes: The maximum number of buckets to search. 429 + * 430 + * NULL will be returned if the search reaches the end of the bucket array or if the number of 431 + * linear probes exceeds a specified limit. 432 + * 433 + * Return: The next empty bucket, or NULL if the search failed. 434 + */ 435 + static struct bucket * 436 + find_empty_bucket(struct int_map *map, struct bucket *bucket, unsigned int max_probes) 437 + { 438 + /* 439 + * Limit the search to either the nearer of the end of the bucket array or a fixed distance 440 + * beyond the initial bucket. 441 + */ 442 + ptrdiff_t remaining = &map->buckets[map->bucket_count] - bucket; 443 + struct bucket *sentinel = &bucket[min_t(ptrdiff_t, remaining, max_probes)]; 444 + struct bucket *entry; 445 + 446 + for (entry = bucket; entry < sentinel; entry++) { 447 + if (entry->value == NULL) 448 + return entry; 449 + } 450 + 451 + return NULL; 452 + } 453 + 454 + /** 455 + * move_empty_bucket() - Move an empty bucket closer to the start of the bucket array. 456 + * @map: The map containing the bucket. 457 + * @hole: The empty bucket to fill with an entry that precedes it in one of its enclosing 458 + * neighborhoods. 459 + * 460 + * This searches the neighborhoods that contain the empty bucket for a non-empty bucket closer to 461 + * the start of the array. If such a bucket is found, this swaps the two buckets by moving the 462 + * entry to the empty bucket. 463 + * 464 + * Return: The bucket that was vacated by moving its entry to the provided hole, or NULL if no 465 + * entry could be moved. 466 + */ 467 + static struct bucket *move_empty_bucket(struct int_map *map __always_unused, struct bucket *hole) 468 + { 469 + /* 470 + * Examine every neighborhood that the empty bucket is part of, starting with the one in 471 + * which it is the last bucket. No boundary check is needed for the negative array 472 + * arithmetic since this function is only called when hole is at least NEIGHBORHOOD cells 473 + * deeper into the array than a valid bucket. 474 + */ 475 + struct bucket *bucket; 476 + 477 + for (bucket = &hole[1 - NEIGHBORHOOD]; bucket < hole; bucket++) { 478 + /* 479 + * Find the entry that is nearest to the bucket, which means it will be nearest to 480 + * the hash bucket whose neighborhood is full. 481 + */ 482 + struct bucket *new_hole = dereference_hop(bucket, bucket->first_hop); 483 + 484 + if (new_hole == NULL) { 485 + /* 486 + * There are no buckets in this neighborhood that are in use by this one 487 + * (they must all be owned by overlapping neighborhoods). 488 + */ 489 + continue; 490 + } 491 + 492 + /* 493 + * Skip this bucket if its first entry is actually further away than the hole that 494 + * we're already trying to fill. 495 + */ 496 + if (hole < new_hole) 497 + continue; 498 + 499 + /* 500 + * We've found an entry in this neighborhood that we can "hop" further away, moving 501 + * the hole closer to the hash bucket, if not all the way into its neighborhood. 502 + */ 503 + 504 + /* 505 + * The entry that will be the new hole is the first bucket in the list, so setting 506 + * first_hop is all that's needed remove it from the list. 507 + */ 508 + bucket->first_hop = new_hole->next_hop; 509 + new_hole->next_hop = NULL_HOP_OFFSET; 510 + 511 + /* Move the entry into the original hole. */ 512 + hole->key = new_hole->key; 513 + hole->value = new_hole->value; 514 + new_hole->value = NULL; 515 + 516 + /* Insert the filled hole into the hop list for the neighborhood. */ 517 + insert_in_hop_list(bucket, hole); 518 + return new_hole; 519 + } 520 + 521 + /* We couldn't find an entry to relocate to the hole. */ 522 + return NULL; 523 + } 524 + 525 + /** 526 + * update_mapping() - Find and update any existing mapping for a given key, returning the value 527 + * associated with the key in the provided pointer. 528 + * @map: The int_map to attempt to modify. 529 + * @neighborhood: The first bucket in the neighborhood that would contain the search key 530 + * @key: The key with which to associate the new value. 531 + * @new_value: The value to be associated with the key. 532 + * @update: Whether to overwrite an existing value. 533 + * @old_value_ptr: a pointer in which to store the old value (unmodified if no mapping was found) 534 + * 535 + * Return: true if the map contains a mapping for the key, false if it does not. 536 + */ 537 + static bool update_mapping(struct int_map *map, 538 + struct bucket *neighborhood, 539 + u64 key, 540 + void *new_value, 541 + bool update, 542 + void **old_value_ptr) 543 + { 544 + struct bucket *bucket = search_hop_list(map, neighborhood, key, NULL); 545 + 546 + if (bucket == NULL) { 547 + /* There is no bucket containing the key in the neighborhood. */ 548 + return false; 549 + } 550 + 551 + /* 552 + * Return the value of the current mapping (if desired) and update the mapping with the new 553 + * value (if desired). 554 + */ 555 + if (old_value_ptr != NULL) 556 + *old_value_ptr = bucket->value; 557 + if (update) 558 + bucket->value = new_value; 559 + return true; 560 + } 561 + 562 + /** 563 + * find_or_make_vacancy() - Find an empty bucket. 564 + * @map: The int_map to search or modify. 565 + * @neighborhood: The first bucket in the neighborhood in which an empty bucket is needed for a new 566 + * mapping. 567 + * 568 + * Find an empty bucket in a specified neighborhood for a new mapping or attempt to re-arrange 569 + * mappings so there is such a bucket. This operation may fail (returning NULL) if an empty bucket 570 + * is not available or could not be relocated to the neighborhood. 571 + * 572 + * Return: a pointer to an empty bucket in the desired neighborhood, or NULL if a vacancy could not 573 + * be found or arranged. 574 + */ 575 + static struct bucket *find_or_make_vacancy(struct int_map *map, struct bucket *neighborhood) 576 + { 577 + /* Probe within and beyond the neighborhood for the first empty bucket. */ 578 + struct bucket *hole = find_empty_bucket(map, neighborhood, MAX_PROBES); 579 + 580 + /* 581 + * Keep trying until the empty bucket is in the bucket's neighborhood or we are unable to 582 + * move it any closer by swapping it with a filled bucket. 583 + */ 584 + while (hole != NULL) { 585 + int distance = hole - neighborhood; 586 + 587 + if (distance < NEIGHBORHOOD) { 588 + /* 589 + * We've found or relocated an empty bucket close enough to the initial 590 + * hash bucket to be referenced by its hop vector. 591 + */ 592 + return hole; 593 + } 594 + 595 + /* 596 + * The nearest empty bucket isn't within the neighborhood that must contain the new 597 + * entry, so try to swap it with bucket that is closer. 598 + */ 599 + hole = move_empty_bucket(map, hole); 600 + } 601 + 602 + return NULL; 603 + } 604 + 605 + /** 606 + * vdo_int_map_put() - Try to associate a value with an integer. 607 + * @map: The int_map to attempt to modify. 608 + * @key: The key with which to associate the new value. 609 + * @new_value: The value to be associated with the key. 610 + * @update: Whether to overwrite an existing value. 611 + * @old_value_ptr: A pointer in which to store either the old value (if the key was already mapped) 612 + * or NULL if the map did not contain the key; NULL may be provided if the caller 613 + * does not need to know the old value 614 + * 615 + * Try to associate a value (a pointer) with an integer in an int_map. If the map already contains 616 + * a mapping for the provided key, the old value is only replaced with the specified value if 617 + * update is true. In either case the old value is returned. If the map does not already contain a 618 + * value for the specified key, the new value is added regardless of the value of update. 619 + * 620 + * Return: UDS_SUCCESS or an error code. 621 + */ 622 + int vdo_int_map_put(struct int_map *map, u64 key, void *new_value, bool update, void **old_value_ptr) 623 + { 624 + struct bucket *neighborhood, *bucket; 625 + 626 + if (new_value == NULL) 627 + return UDS_INVALID_ARGUMENT; 628 + 629 + /* 630 + * Select the bucket at the start of the neighborhood that must contain any entry for the 631 + * provided key. 632 + */ 633 + neighborhood = select_bucket(map, key); 634 + 635 + /* 636 + * Check whether the neighborhood already contains an entry for the key, in which case we 637 + * optionally update it, returning the old value. 638 + */ 639 + if (update_mapping(map, neighborhood, key, new_value, update, old_value_ptr)) 640 + return UDS_SUCCESS; 641 + 642 + /* 643 + * Find an empty bucket in the desired neighborhood for the new entry or re-arrange entries 644 + * in the map so there is such a bucket. This operation will usually succeed; the loop body 645 + * will only be executed on the rare occasions that we have to resize the map. 646 + */ 647 + while ((bucket = find_or_make_vacancy(map, neighborhood)) == NULL) { 648 + int result; 649 + 650 + /* 651 + * There is no empty bucket in which to put the new entry in the current map, so 652 + * we're forced to allocate a new bucket array with a larger capacity, re-hash all 653 + * the entries into those buckets, and try again (a very expensive operation for 654 + * large maps). 655 + */ 656 + result = resize_buckets(map); 657 + if (result != UDS_SUCCESS) 658 + return result; 659 + 660 + /* 661 + * Resizing the map invalidates all pointers to buckets, so recalculate the 662 + * neighborhood pointer. 663 + */ 664 + neighborhood = select_bucket(map, key); 665 + } 666 + 667 + /* Put the new entry in the empty bucket, adding it to the neighborhood. */ 668 + bucket->key = key; 669 + bucket->value = new_value; 670 + insert_in_hop_list(neighborhood, bucket); 671 + map->size += 1; 672 + 673 + /* There was no existing entry, so there was no old value to be returned. */ 674 + if (old_value_ptr != NULL) 675 + *old_value_ptr = NULL; 676 + return UDS_SUCCESS; 677 + } 678 + 679 + /** 680 + * vdo_int_map_remove() - Remove the mapping for a given key from the int_map. 681 + * @map: The int_map from which to remove the mapping. 682 + * @key: The key whose mapping is to be removed. 683 + * 684 + * Return: the value that was associated with the key, or NULL if it was not mapped. 685 + */ 686 + void *vdo_int_map_remove(struct int_map *map, u64 key) 687 + { 688 + void *value; 689 + 690 + /* Select the bucket to search and search it for an existing entry. */ 691 + struct bucket *bucket = select_bucket(map, key); 692 + struct bucket *previous; 693 + struct bucket *victim = search_hop_list(map, bucket, key, &previous); 694 + 695 + if (victim == NULL) { 696 + /* There is no matching entry to remove. */ 697 + return NULL; 698 + } 699 + 700 + /* 701 + * We found an entry to remove. Save the mapped value to return later and empty the bucket. 702 + */ 703 + map->size -= 1; 704 + value = victim->value; 705 + victim->value = NULL; 706 + victim->key = 0; 707 + 708 + /* The victim bucket is now empty, but it still needs to be spliced out of the hop list. */ 709 + if (previous == NULL) { 710 + /* The victim is the head of the list, so swing first_hop. */ 711 + bucket->first_hop = victim->next_hop; 712 + } else { 713 + previous->next_hop = victim->next_hop; 714 + } 715 + 716 + victim->next_hop = NULL_HOP_OFFSET; 717 + return value; 718 + }

+40

drivers/md/dm-vdo/int-map.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright 2023 Red Hat 4 + */ 5 + 6 + #ifndef VDO_INT_MAP_H 7 + #define VDO_INT_MAP_H 8 + 9 + #include <linux/compiler.h> 10 + #include <linux/types.h> 11 + 12 + /** 13 + * DOC: int_map 14 + * 15 + * An int_map associates pointers (void *) with integer keys (u64). NULL pointer values are 16 + * not supported. 17 + * 18 + * The map is implemented as hash table, which should provide constant-time insert, query, and 19 + * remove operations, although the insert may occasionally grow the table, which is linear in the 20 + * number of entries in the map. The table will grow as needed to hold new entries, but will not 21 + * shrink as entries are removed. 22 + */ 23 + 24 + struct int_map; 25 + 26 + int __must_check 27 + vdo_make_int_map(size_t initial_capacity, unsigned int initial_load, struct int_map **map_ptr); 28 + 29 + void vdo_free_int_map(struct int_map *map); 30 + 31 + size_t vdo_int_map_size(const struct int_map *map); 32 + 33 + void *vdo_int_map_get(struct int_map *map, u64 key); 34 + 35 + int __must_check 36 + vdo_int_map_put(struct int_map *map, u64 key, void *new_value, bool update, void **old_value_ptr); 37 + 38 + void *vdo_int_map_remove(struct int_map *map, u64 key); 39 + 40 + #endif /* VDO_INT_MAP_H */

+696

drivers/md/dm-vdo/pointer-map.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright 2023 Red Hat 4 + */ 5 + 6 + /** 7 + * DOC: 8 + * 9 + * Hash table implementation of a map from integers to pointers, implemented using the Hopscotch 10 + * Hashing algorithm by Herlihy, Shavit, and Tzafrir (see 11 + * http://en.wikipedia.org/wiki/Hopscotch_hashing). This implementation does not contain any of the 12 + * locking/concurrency features of the algorithm, just the collision resolution scheme. 13 + * 14 + * Hopscotch Hashing is based on hashing with open addressing and linear probing. All the entries 15 + * are stored in a fixed array of buckets, with no dynamic allocation for collisions. Unlike linear 16 + * probing, all the entries that hash to a given bucket are stored within a fixed neighborhood 17 + * starting at that bucket. Chaining is effectively represented as a bit vector relative to each 18 + * bucket instead of as pointers or explicit offsets. 19 + * 20 + * When an empty bucket cannot be found within a given neighborhood, subsequent neighborhoods are 21 + * searched, and one or more entries will "hop" into those neighborhoods. When this process works, 22 + * an empty bucket will move into the desired neighborhood, allowing the entry to be added. When 23 + * that process fails (typically when the buckets are around 90% full), the table must be resized 24 + * and the all entries rehashed and added to the expanded table. 25 + * 26 + * Unlike linear probing, the number of buckets that must be searched in the worst case has a fixed 27 + * upper bound (the size of the neighborhood). Those entries occupy a small number of memory cache 28 + * lines, leading to improved use of the cache (fewer misses on both successful and unsuccessful 29 + * searches). Hopscotch hashing outperforms linear probing at much higher load factors, so even 30 + * with the increased memory burden for maintaining the hop vectors, less memory is needed to 31 + * achieve that performance. Hopscotch is also immune to "contamination" from deleting entries 32 + * since entries are genuinely removed instead of being replaced by a placeholder. 33 + * 34 + * The published description of the algorithm used a bit vector, but the paper alludes to an offset 35 + * scheme which is used by this implementation. Since the entries in the neighborhood are within N 36 + * entries of the hash bucket at the start of the neighborhood, a pair of small offset fields each 37 + * log2(N) bits wide is all that's needed to maintain the hops as a linked list. In order to encode 38 + * "no next hop" (i.e. NULL) as the natural initial value of zero, the offsets are biased by one 39 + * (i.e. 0 => NULL, 1 => offset=0, 2 => offset=1, etc.) We can represent neighborhoods of up to 255 40 + * entries with just 8+8=16 bits per entry. The hop list is sorted by hop offset so the first entry 41 + * in the list is always the bucket closest to the start of the neighborhood. 42 + * 43 + * While individual accesses tend to be very fast, the table resize operations are very, very 44 + * expensive. If an upper bound on the latency of adding an entry to the table is needed, we either 45 + * need to ensure the table is pre-sized to be large enough so no resize is ever needed, or we'll 46 + * need to develop an approach to incrementally resize the table. 47 + */ 48 + 49 + #include "pointer-map.h" 50 + 51 + #include <linux/minmax.h> 52 + 53 + #include "errors.h" 54 + #include "logger.h" 55 + #include "memory-alloc.h" 56 + #include "numeric.h" 57 + #include "permassert.h" 58 + 59 + enum { 60 + DEFAULT_CAPACITY = 16, /* the number of neighborhoods in a new table */ 61 + NEIGHBORHOOD = 255, /* the number of buckets in each neighborhood */ 62 + MAX_PROBES = 1024, /* limit on the number of probes for a free bucket */ 63 + NULL_HOP_OFFSET = 0, /* the hop offset value terminating the hop list */ 64 + DEFAULT_LOAD = 75 /* a compromise between memory use and performance */ 65 + }; 66 + 67 + /** 68 + * struct bucket - Hash buckets. 69 + * 70 + * Buckets are packed together to reduce memory usage and improve cache efficiency. It would be 71 + * tempting to encode the hop offsets separately and maintain alignment of key/value pairs, but 72 + * it's crucial to keep the hop fields near the buckets that they use them so they'll tend to share 73 + * cache lines. 74 + */ 75 + struct __packed bucket { 76 + /** 77 + * @first_hop: The biased offset of the first entry in the hop list of the neighborhood 78 + * that hashes to this bucket. 79 + */ 80 + u8 first_hop; 81 + /** @next_hop: the biased offset of the next bucket in the hop list. */ 82 + u8 next_hop; 83 + /** @key: The key stored in this bucket. */ 84 + const void *key; 85 + /** @value: The value stored in this bucket (NULL if empty). */ 86 + void *value; 87 + }; 88 + 89 + /** 90 + * struct pointer_map - The concrete definition of the opaque pointer_map type. 91 + * 92 + * To avoid having to wrap the neighborhoods of the last entries back around to the start of the 93 + * bucket array, we allocate a few more buckets at the end of the array instead, which is why 94 + * capacity and bucket_count are different. 95 + */ 96 + struct pointer_map { 97 + /** @size: The number of entries stored in the map. */ 98 + size_t size; 99 + /** @capacity: The number of neighborhoods in the map. */ 100 + size_t capacity; 101 + /** @bucket_count: The number of buckets in the bucket array. */ 102 + size_t bucket_count; 103 + /** @buckets: The array of hash buckets. */ 104 + struct bucket *buckets; 105 + /** @comparator: The function for comparing keys for equality. */ 106 + pointer_key_comparator *comparator; 107 + /** @hasher: The function for getting a hash code from a key. */ 108 + pointer_key_hasher *hasher; 109 + }; 110 + 111 + /** 112 + * allocate_buckets() - Initialize a pointer_map. 113 + * @map: The map to initialize. 114 + * @capacity: The initial capacity of the map. 115 + * 116 + * Return: UDS_SUCCESS or an error code. 117 + */ 118 + static int allocate_buckets(struct pointer_map *map, size_t capacity) 119 + { 120 + map->size = 0; 121 + map->capacity = capacity; 122 + 123 + /* 124 + * Allocate NEIGHBORHOOD - 1 extra buckets so the last bucket can have a full neighborhood 125 + * without have to wrap back around to element zero. 126 + */ 127 + map->bucket_count = capacity + (NEIGHBORHOOD - 1); 128 + return uds_allocate(map->bucket_count, 129 + struct bucket, 130 + "pointer_map buckets", 131 + &map->buckets); 132 + } 133 + 134 + /** 135 + * vdo_make_pointer_map() - Allocate and initialize a pointer_map. 136 + * @initial_capacity: The number of entries the map should initially be capable of holding (zero 137 + * tells the map to use its own small default). 138 + * @initial_load: The load factor of the map, expressed as an integer percentage (typically in the 139 + * range 50 to 90, with zero telling the map to use its own default). 140 + * @comparator: The function to use to compare the referents of two pointer keys for equality. 141 + * @hasher: The function to use obtain the hash code associated with each pointer key 142 + * @map_ptr: A pointer to hold the new pointer_map. 143 + * 144 + * Return: UDS_SUCCESS or an error code. 145 + */ 146 + int vdo_make_pointer_map(size_t initial_capacity, 147 + unsigned int initial_load, 148 + pointer_key_comparator comparator, 149 + pointer_key_hasher hasher, 150 + struct pointer_map **map_ptr) 151 + { 152 + int result; 153 + struct pointer_map *map; 154 + size_t capacity; 155 + 156 + /* Use the default initial load if the caller did not specify one. */ 157 + if (initial_load == 0) 158 + initial_load = DEFAULT_LOAD; 159 + if (initial_load > 100) 160 + return UDS_INVALID_ARGUMENT; 161 + 162 + result = uds_allocate(1, struct pointer_map, "pointer_map", &map); 163 + if (result != UDS_SUCCESS) 164 + return result; 165 + 166 + map->hasher = hasher; 167 + map->comparator = comparator; 168 + 169 + /* Use the default capacity if the caller did not specify one. */ 170 + capacity = (initial_capacity > 0) ? initial_capacity : DEFAULT_CAPACITY; 171 + 172 + /* 173 + * Scale up the capacity by the specified initial load factor. (i.e to hold 1000 entries at 174 + * 80% load we need a capacity of 1250) 175 + */ 176 + capacity = capacity * 100 / initial_load; 177 + 178 + result = allocate_buckets(map, capacity); 179 + if (result != UDS_SUCCESS) { 180 + vdo_free_pointer_map(uds_forget(map)); 181 + return result; 182 + } 183 + 184 + *map_ptr = map; 185 + return UDS_SUCCESS; 186 + } 187 + 188 + /** 189 + * vdo_free_pointer_map() - Free a pointer_map. 190 + * @map: The pointer_map to free. 191 + * 192 + * The map does not own the pointer keys and values stored in the map and they are not freed by 193 + * this call. 194 + */ 195 + void vdo_free_pointer_map(struct pointer_map *map) 196 + { 197 + if (map == NULL) 198 + return; 199 + 200 + uds_free(uds_forget(map->buckets)); 201 + uds_free(uds_forget(map)); 202 + } 203 + 204 + /** 205 + * vdo_pointer_map_size() - Get the number of entries stored in a pointer_map. 206 + * @map: The pointer_map to query. 207 + * 208 + * Return: The number of entries in the map. 209 + */ 210 + size_t vdo_pointer_map_size(const struct pointer_map *map) 211 + { 212 + return map->size; 213 + } 214 + 215 + /** 216 + * dereference_hop() - Convert a biased hop offset within a neighborhood to a pointer to the bucket 217 + * it references. 218 + * @neighborhood: The first bucket in the neighborhood. 219 + * @hop_offset: The biased hop offset to the desired bucket. 220 + * 221 + * Return: NULL if hop_offset is zero, otherwise a pointer to the bucket in the neighborhood at 222 + * hop_offset - 1. 223 + */ 224 + static struct bucket *dereference_hop(struct bucket *neighborhood, unsigned int hop_offset) 225 + { 226 + BUILD_BUG_ON(NULL_HOP_OFFSET != 0); 227 + if (hop_offset == NULL_HOP_OFFSET) 228 + return NULL; 229 + 230 + return &neighborhood[hop_offset - 1]; 231 + } 232 + 233 + /** 234 + * insert_in_hop_list() - Add a bucket into the hop list for the neighborhood, inserting it into 235 + * the list so the hop list remains sorted by hop offset. 236 + * @neighborhood: The first bucket in the neighborhood. 237 + * @new_bucket: The bucket to add to the hop list. 238 + */ 239 + static void insert_in_hop_list(struct bucket *neighborhood, struct bucket *new_bucket) 240 + { 241 + /* Zero indicates a NULL hop offset, so bias the hop offset by one. */ 242 + int hop_offset = 1 + (new_bucket - neighborhood); 243 + 244 + /* Handle the special case of adding a bucket at the start of the list. */ 245 + int next_hop = neighborhood->first_hop; 246 + 247 + if ((next_hop == NULL_HOP_OFFSET) || (next_hop > hop_offset)) { 248 + new_bucket->next_hop = next_hop; 249 + neighborhood->first_hop = hop_offset; 250 + return; 251 + } 252 + 253 + /* Search the hop list for the insertion point that maintains the sort order. */ 254 + for (;;) { 255 + struct bucket *bucket = dereference_hop(neighborhood, next_hop); 256 + 257 + next_hop = bucket->next_hop; 258 + 259 + if ((next_hop == NULL_HOP_OFFSET) || (next_hop > hop_offset)) { 260 + new_bucket->next_hop = next_hop; 261 + bucket->next_hop = hop_offset; 262 + return; 263 + } 264 + } 265 + } 266 + 267 + /** 268 + * select_bucket() - Select and return the hash bucket for a given search key. 269 + * @map: The map to search. 270 + * @key: The mapping key. 271 + */ 272 + static struct bucket *select_bucket(const struct pointer_map *map, const void *key) 273 + { 274 + /* 275 + * Scale the 32-bit hash to a bucket index by treating it as a binary fraction and 276 + * multiplying that by the capacity. If the hash is uniformly distributed over [0 .. 277 + * 2^32-1], then (hash * capacity / 2^32) should be uniformly distributed over [0 .. 278 + * capacity-1]. The multiply and shift is much faster than a divide (modulus) on X86 CPUs. 279 + */ 280 + u64 hash = map->hasher(key); 281 + 282 + return &map->buckets[(hash * map->capacity) >> 32]; 283 + } 284 + 285 + /** 286 + * search_hop_list() - Search the hop list. 287 + * @map: The map being searched. 288 + * @bucket: The map bucket to search for the key. 289 + * @key: The mapping key. 290 + * @previous_ptr: if not NULL, a pointer in which to store the bucket in the list preceding the one 291 + * that had the matching key. 292 + * 293 + * Searches the hop list associated with given hash bucket for a given search key. If the key is 294 + * found, returns a pointer to the entry (bucket or collision), otherwise returns NULL. 295 + * 296 + * Return: an entry that matches the key, or NULL if not found. 297 + */ 298 + static struct bucket *search_hop_list(struct pointer_map *map, 299 + struct bucket *bucket, 300 + const void *key, 301 + struct bucket **previous_ptr) 302 + { 303 + struct bucket *previous = NULL; 304 + unsigned int next_hop = bucket->first_hop; 305 + 306 + while (next_hop != NULL_HOP_OFFSET) { 307 + /* Check the neighboring bucket indexed by the offset for the desired key. */ 308 + struct bucket *entry = dereference_hop(bucket, next_hop); 309 + 310 + if ((entry->value != NULL) && map->comparator(key, entry->key)) { 311 + if (previous_ptr != NULL) 312 + *previous_ptr = previous; 313 + return entry; 314 + } 315 + next_hop = entry->next_hop; 316 + previous = entry; 317 + } 318 + return NULL; 319 + } 320 + 321 + /** 322 + * vdo_pointer_map_get() - Retrieve the value associated with a given key from the pointer_map. 323 + * @map: The pointer_map to query. 324 + * @key: The key to look up (may be NULL if the comparator and hasher functions support it). 325 + * 326 + * Return: the value associated with the given key, or NULL if the key is not mapped to any value. 327 + */ 328 + void *vdo_pointer_map_get(struct pointer_map *map, const void *key) 329 + { 330 + struct bucket *match = search_hop_list(map, select_bucket(map, key), key, NULL); 331 + 332 + return ((match != NULL) ? match->value : NULL); 333 + } 334 + 335 + /** 336 + * resize_buckets() - Increase the number of hash buckets and rehash all the existing entries, 337 + * storing them in the new buckets. 338 + * @map: The map to resize. 339 + */ 340 + static int resize_buckets(struct pointer_map *map) 341 + { 342 + int result; 343 + size_t i; 344 + 345 + /* Copy the top-level map data to the stack. */ 346 + struct pointer_map old_map = *map; 347 + 348 + /* Re-initialize the map to be empty and 50% larger. */ 349 + size_t new_capacity = map->capacity / 2 * 3; 350 + 351 + uds_log_info("%s: attempting resize from %zu to %zu, current size=%zu", 352 + __func__, 353 + map->capacity, 354 + new_capacity, 355 + map->size); 356 + result = allocate_buckets(map, new_capacity); 357 + if (result != UDS_SUCCESS) { 358 + *map = old_map; 359 + return result; 360 + } 361 + 362 + /* Populate the new hash table from the entries in the old bucket array. */ 363 + for (i = 0; i < old_map.bucket_count; i++) { 364 + struct bucket *entry = &old_map.buckets[i]; 365 + 366 + if (entry->value == NULL) 367 + continue; 368 + 369 + result = vdo_pointer_map_put(map, entry->key, entry->value, true, NULL); 370 + if (result != UDS_SUCCESS) { 371 + /* Destroy the new partial map and restore the map from the stack. */ 372 + uds_free(uds_forget(map->buckets)); 373 + *map = old_map; 374 + return result; 375 + } 376 + } 377 + 378 + /* Destroy the old bucket array. */ 379 + uds_free(uds_forget(old_map.buckets)); 380 + return UDS_SUCCESS; 381 + } 382 + 383 + /** 384 + * find_empty_bucket() - Probe the bucket array starting at the given bucket for the next empty 385 + * bucket, returning a pointer to it. 386 + * @map: The map containing the buckets to search. 387 + * @bucket: The bucket at which to start probing. 388 + * @max_probes: The maximum number of buckets to search. 389 + * 390 + * NULL will be returned if the search reaches the end of the bucket array or if the number of 391 + * linear probes exceeds a specified limit. 392 + * 393 + * Return: The next empty bucket, or NULL if the search failed. 394 + */ 395 + static struct bucket * 396 + find_empty_bucket(struct pointer_map *map, struct bucket *bucket, unsigned int max_probes) 397 + { 398 + /* 399 + * Limit the search to either the nearer of the end of the bucket array or a fixed distance 400 + * beyond the initial bucket. 401 + */ 402 + ptrdiff_t remaining = &map->buckets[map->bucket_count] - bucket; 403 + struct bucket *sentinel = &bucket[min_t(ptrdiff_t, remaining, max_probes)]; 404 + struct bucket *entry; 405 + 406 + for (entry = bucket; entry < sentinel; entry++) 407 + if (entry->value == NULL) 408 + return entry; 409 + return NULL; 410 + } 411 + 412 + /** 413 + * move_empty_bucket() - Move an empty bucket closer to the start of the bucket array. 414 + * @map: The map containing the bucket. 415 + 416 + * @hole: The empty bucket to fill with an entry that precedes it in one of its enclosing 417 + * neighborhoods. 418 + * 419 + * This searches the neighborhoods that contain the empty bucket for a non-empty bucket closer to 420 + * the start of the array. If such a bucket is found, this swaps the two buckets by moving the 421 + * entry to the empty bucket. 422 + * 423 + * Return: The bucket that was vacated by moving its entry to the provided hole, or NULL if no 424 + * entry could be moved. 425 + */ 426 + static struct bucket * 427 + move_empty_bucket(struct pointer_map *map __always_unused, struct bucket *hole) 428 + { 429 + /* 430 + * Examine every neighborhood that the empty bucket is part of, starting with the one in 431 + * which it is the last bucket. No boundary check is needed for the negative array 432 + * arithmetic since this function is only called when hole is at least NEIGHBORHOOD cells 433 + * deeper into the array than a valid bucket. 434 + */ 435 + struct bucket *bucket; 436 + 437 + for (bucket = &hole[1 - NEIGHBORHOOD]; bucket < hole; bucket++) { 438 + /* 439 + * Find the entry that is nearest to the bucket, which means it will be nearest to 440 + * the hash bucket whose neighborhood is full. 441 + */ 442 + struct bucket *new_hole = dereference_hop(bucket, bucket->first_hop); 443 + 444 + if (new_hole == NULL) { 445 + /* 446 + * There are no buckets in this neighborhood that are in use by this one 447 + * (they must all be owned by overlapping neighborhoods). 448 + */ 449 + continue; 450 + } 451 + 452 + /* 453 + * Skip this bucket if its first entry is actually further away than the hole that 454 + * we're already trying to fill. 455 + */ 456 + if (hole < new_hole) 457 + continue; 458 + 459 + /* 460 + * We've found an entry in this neighborhood that we can "hop" further away, moving 461 + * the hole closer to the hash bucket, if not all the way into its neighborhood. 462 + */ 463 + 464 + /* 465 + * The entry that will be the new hole is the first bucket in the list, so setting 466 + * first_hop is all that's needed remove it from the list. 467 + */ 468 + bucket->first_hop = new_hole->next_hop; 469 + new_hole->next_hop = NULL_HOP_OFFSET; 470 + 471 + /* Move the entry into the original hole. */ 472 + hole->key = new_hole->key; 473 + hole->value = new_hole->value; 474 + new_hole->value = NULL; 475 + 476 + /* Insert the filled hole into the hop list for the neighborhood. */ 477 + insert_in_hop_list(bucket, hole); 478 + return new_hole; 479 + } 480 + 481 + /* We couldn't find an entry to relocate to the hole. */ 482 + return NULL; 483 + } 484 + 485 + /** 486 + * update_mapping() - Find and update any existing mapping for a given key, returning the value 487 + * associated with the key in the provided pointer. 488 + * @map: The pointer_map to attempt to modify. 489 + * @neighborhood: The first bucket in the neighborhood that would contain the search key. 490 + * @key: The key with which to associate the new value. 491 + * @new_value: The value to be associated with the key. 492 + * @update: Whether to overwrite an existing value. 493 + * @old_value_ptr: A pointer in which to store the old value (unmodified if no mapping was found). 494 + * 495 + * Return: true if the map contains a mapping for the key, false if it does not. 496 + */ 497 + static bool update_mapping(struct pointer_map *map, 498 + struct bucket *neighborhood, 499 + const void *key, 500 + void *new_value, 501 + bool update, 502 + void **old_value_ptr) 503 + { 504 + struct bucket *bucket = search_hop_list(map, neighborhood, key, NULL); 505 + 506 + if (bucket == NULL) { 507 + /* There is no bucket containing the key in the neighborhood. */ 508 + return false; 509 + } 510 + 511 + /* 512 + * Return the value of the current mapping (if desired) and update the mapping with the new 513 + * value (if desired). 514 + */ 515 + if (old_value_ptr != NULL) 516 + *old_value_ptr = bucket->value; 517 + if (update) { 518 + /* 519 + * We're dropping the old key pointer on the floor here, assuming it's a property 520 + * of the value or that it's otherwise safe to just forget. 521 + */ 522 + bucket->key = key; 523 + bucket->value = new_value; 524 + } 525 + return true; 526 + } 527 + 528 + /** 529 + * find_or_make_vacancy() - Find an empty bucket in a specified neighborhood for a new mapping or 530 + * attempt to re-arrange mappings so there is such a bucket. 531 + * @map: The pointer_map to search or modify. 532 + * @neighborhood: The first bucket in the neighborhood in which an empty bucket is needed for a new 533 + * mapping. 534 + * 535 + * This operation may fail (returning NULL) if an empty bucket is not available or could not be 536 + * relocated to the neighborhood. 537 + * 538 + * Return: A pointer to an empty bucket in the desired neighborhood, or NULL if a vacancy could not 539 + * be found or arranged. 540 + */ 541 + static struct bucket *find_or_make_vacancy(struct pointer_map *map, struct bucket *neighborhood) 542 + { 543 + /* Probe within and beyond the neighborhood for the first empty bucket. */ 544 + struct bucket *hole = find_empty_bucket(map, neighborhood, MAX_PROBES); 545 + 546 + /* 547 + * Keep trying until the empty bucket is in the bucket's neighborhood or we are unable to 548 + * move it any closer by swapping it with a filled bucket. 549 + */ 550 + while (hole != NULL) { 551 + int distance = hole - neighborhood; 552 + 553 + if (distance < NEIGHBORHOOD) { 554 + /* 555 + * We've found or relocated an empty bucket close enough to the initial 556 + * hash bucket to be referenced by its hop vector. 557 + */ 558 + return hole; 559 + } 560 + 561 + /* 562 + * The nearest empty bucket isn't within the neighborhood that must contain the new 563 + * entry, so try to swap it with bucket that is closer. 564 + */ 565 + hole = move_empty_bucket(map, hole); 566 + } 567 + 568 + return NULL; 569 + } 570 + 571 + /** 572 + * vdo_pointer_map_put() - Try to associate a value (a pointer) with an integer in a pointer_map. 573 + * @map: The pointer_map to attempt to modify. 574 + * @key: The key with which to associate the new value (may be NULL if the comparator and hasher 575 + * functions support it). 576 + * @new_value: The value to be associated with the key. 577 + * @update: Whether to overwrite an existing value. 578 + * @old_value_ptr: A pointer in which to store either the old value (if the key was already mapped) 579 + * or NULL if the map did not contain the key; NULL may be provided if the caller 580 + * does not need to know the old value. 581 + * 582 + * If the map already contains a mapping for the provided key, the old value is only replaced with 583 + * the specified value if update is true. In either case the old value is returned. If the map does 584 + * not already contain a value for the specified key, the new value is added regardless of the 585 + * value of update. 586 + * 587 + * If the value stored in the map is updated, then the key stored in the map will also be updated 588 + * with the key provided by this call. The old key will not be returned due to the memory 589 + * management assumptions described in the interface header comment. 590 + * 591 + * Return: UDS_SUCCESS or an error code. 592 + */ 593 + int vdo_pointer_map_put(struct pointer_map *map, 594 + const void *key, 595 + void *new_value, 596 + bool update, 597 + void **old_value_ptr) 598 + { 599 + struct bucket *neighborhood, *bucket; 600 + 601 + if (new_value == NULL) 602 + return UDS_INVALID_ARGUMENT; 603 + 604 + /* 605 + * Select the bucket at the start of the neighborhood that must contain any entry for the 606 + * provided key. 607 + */ 608 + neighborhood = select_bucket(map, key); 609 + 610 + /* 611 + * Check whether the neighborhood already contains an entry for the key, in which case we 612 + * optionally update it, returning the old value. 613 + */ 614 + if (update_mapping(map, neighborhood, key, new_value, update, old_value_ptr)) 615 + return UDS_SUCCESS; 616 + 617 + /* 618 + * Find an empty bucket in the desired neighborhood for the new entry or re-arrange entries 619 + * in the map so there is such a bucket. This operation will usually succeed; the loop body 620 + * will only be executed on the rare occasions that we have to resize the map. 621 + */ 622 + while ((bucket = find_or_make_vacancy(map, neighborhood)) == NULL) { 623 + /* 624 + * There is no empty bucket in which to put the new entry in the current map, so 625 + * we're forced to allocate a new bucket array with a larger capacity, re-hash all 626 + * the entries into those buckets, and try again (a very expensive operation for 627 + * large maps). 628 + */ 629 + int result = resize_buckets(map); 630 + 631 + if (result != UDS_SUCCESS) 632 + return result; 633 + 634 + /* 635 + * Resizing the map invalidates all pointers to buckets, so 636 + * recalculate the neighborhood pointer. 637 + */ 638 + neighborhood = select_bucket(map, key); 639 + } 640 + 641 + /* Put the new entry in the empty bucket, adding it to the neighborhood. */ 642 + bucket->key = key; 643 + bucket->value = new_value; 644 + insert_in_hop_list(neighborhood, bucket); 645 + map->size += 1; 646 + 647 + /* 648 + * There was no existing entry, so there was no old value to be 649 + * returned. 650 + */ 651 + if (old_value_ptr != NULL) 652 + *old_value_ptr = NULL; 653 + return UDS_SUCCESS; 654 + } 655 + 656 + /** 657 + * vdo_pointer_map_remove() - Remove the mapping for a given key from the pointer_map. 658 + * @map: The pointer_map from which to remove the mapping. 659 + * @key: The key whose mapping is to be removed (may be NULL if the comparator and hasher functions 660 + * support it). 661 + * 662 + * Return: the value that was associated with the key, or NULL if it was not mapped. 663 + */ 664 + void *vdo_pointer_map_remove(struct pointer_map *map, const void *key) 665 + { 666 + void *value; 667 + 668 + /* Select the bucket to search and search it for an existing entry. */ 669 + struct bucket *bucket = select_bucket(map, key); 670 + struct bucket *previous; 671 + struct bucket *victim = search_hop_list(map, bucket, key, &previous); 672 + 673 + if (victim == NULL) { 674 + /* There is no matching entry to remove. */ 675 + return NULL; 676 + } 677 + 678 + /* 679 + * We found an entry to remove. Save the mapped value to return later and empty the bucket. 680 + */ 681 + map->size -= 1; 682 + value = victim->value; 683 + victim->value = NULL; 684 + victim->key = 0; 685 + 686 + /* The victim bucket is now empty, but it still needs to be spliced out of the hop list. */ 687 + if (previous == NULL) { 688 + /* The victim is the head of the list, so swing first_hop. */ 689 + bucket->first_hop = victim->next_hop; 690 + } else { 691 + previous->next_hop = victim->next_hop; 692 + } 693 + 694 + victim->next_hop = NULL_HOP_OFFSET; 695 + return value; 696 + }

+81

drivers/md/dm-vdo/pointer-map.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright 2023 Red Hat 4 + */ 5 + 6 + #ifndef VDO_POINTER_MAP_H 7 + #define VDO_POINTER_MAP_H 8 + 9 + #include <linux/compiler.h> 10 + #include <linux/types.h> 11 + 12 + /* 13 + * A pointer_map associates pointer values (<code>void *</code>) with the data referenced by 14 + * pointer keys (<code>void *</code>). <code>NULL</code> pointer values are not supported. A 15 + * <code>NULL</code> key value is supported when the instance's key comparator and hasher functions 16 + * support it. 17 + * 18 + * The map is implemented as hash table, which should provide constant-time insert, query, and 19 + * remove operations, although the insert may occasionally grow the table, which is linear in the 20 + * number of entries in the map. The table will grow as needed to hold new entries, but will not 21 + * shrink as entries are removed. 22 + * 23 + * The key and value pointers passed to the map are retained and used by the map, but are not owned 24 + * by the map. Freeing the map does not attempt to free the pointers. The client is entirely 25 + * responsible for the memory management of the keys and values. The current interface and 26 + * implementation assume that keys will be properties of the values, or that keys will not be 27 + * memory managed, or that keys will not need to be freed as a result of being replaced when a key 28 + * is re-mapped. 29 + */ 30 + 31 + struct pointer_map; 32 + 33 + /** 34 + * typedef pointer_key_comparator - The prototype of functions that compare the referents of two 35 + * pointer keys for equality. 36 + * @this_key: The first element to compare. 37 + * @that_key: The second element to compare. 38 + * 39 + * If two keys are equal, then both keys must have the same the hash code associated with them by 40 + * the hasher function defined below. 41 + * 42 + * Return: true if and only if the referents of the two key pointers are to be treated as the same 43 + * key by the map. 44 + */ 45 + typedef bool pointer_key_comparator(const void *this_key, const void *that_key); 46 + 47 + /** 48 + * typedef pointer_key_hasher - The prototype of functions that get or calculate a hash code 49 + * associated with the referent of pointer key. 50 + * @key: The pointer key to hash. 51 + * 52 + * The hash code must be uniformly distributed over all u32 values. The hash code associated 53 + * with a given key must not change while the key is in the map. If the comparator function says 54 + * two keys are equal, then this function must return the same hash code for both keys. This 55 + * function may be called many times for a key while an entry is stored for it in the map. 56 + * 57 + * Return: The hash code for the key. 58 + */ 59 + typedef u32 pointer_key_hasher(const void *key); 60 + 61 + int __must_check vdo_make_pointer_map(size_t initial_capacity, 62 + unsigned int initial_load, 63 + pointer_key_comparator comparator, 64 + pointer_key_hasher hasher, 65 + struct pointer_map **map_ptr); 66 + 67 + void vdo_free_pointer_map(struct pointer_map *map); 68 + 69 + size_t vdo_pointer_map_size(const struct pointer_map *map); 70 + 71 + void *vdo_pointer_map_get(struct pointer_map *map, const void *key); 72 + 73 + int __must_check vdo_pointer_map_put(struct pointer_map *map, 74 + const void *key, 75 + void *new_value, 76 + bool update, 77 + void **old_value_ptr); 78 + 79 + void *vdo_pointer_map_remove(struct pointer_map *map, const void *key); 80 + 81 + #endif /* VDO_POINTER_MAP_H */

+226

drivers/md/dm-vdo/priority-table.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright 2023 Red Hat 4 + */ 5 + 6 + #include "priority-table.h" 7 + 8 + #include <linux/log2.h> 9 + 10 + #include "errors.h" 11 + #include "memory-alloc.h" 12 + #include "permassert.h" 13 + 14 + #include "status-codes.h" 15 + 16 + /* We use a single 64-bit search vector, so the maximum priority is 63 */ 17 + enum { 18 + MAX_PRIORITY = 63 19 + }; 20 + 21 + /* 22 + * All the entries with the same priority are queued in a circular list in a bucket for that 23 + * priority. The table is essentially an array of buckets. 24 + */ 25 + struct bucket { 26 + /* 27 + * The head of a queue of table entries, all having the same priority 28 + */ 29 + struct list_head queue; 30 + /* The priority of all the entries in this bucket */ 31 + unsigned int priority; 32 + }; 33 + 34 + /* 35 + * A priority table is an array of buckets, indexed by priority. New entries are added to the end 36 + * of the queue in the appropriate bucket. The dequeue operation finds the highest-priority 37 + * non-empty bucket by searching a bit vector represented as a single 8-byte word, which is very 38 + * fast with compiler and CPU support. 39 + */ 40 + struct priority_table { 41 + /* The maximum priority of entries that may be stored in this table */ 42 + unsigned int max_priority; 43 + /* A bit vector flagging all buckets that are currently non-empty */ 44 + u64 search_vector; 45 + /* The array of all buckets, indexed by priority */ 46 + struct bucket buckets[]; 47 + }; 48 + 49 + /** 50 + * vdo_make_priority_table() - Allocate and initialize a new priority_table. 51 + * @max_priority: The maximum priority value for table entries. 52 + * @table_ptr: A pointer to hold the new table. 53 + * 54 + * Return: VDO_SUCCESS or an error code. 55 + */ 56 + int vdo_make_priority_table(unsigned int max_priority, struct priority_table **table_ptr) 57 + { 58 + struct priority_table *table; 59 + int result; 60 + unsigned int priority; 61 + 62 + if (max_priority > MAX_PRIORITY) 63 + return UDS_INVALID_ARGUMENT; 64 + 65 + result = uds_allocate_extended(struct priority_table, max_priority + 1, 66 + struct bucket, __func__, &table); 67 + if (result != VDO_SUCCESS) 68 + return result; 69 + 70 + for (priority = 0; priority <= max_priority; priority++) { 71 + struct bucket *bucket = &table->buckets[priority]; 72 + 73 + bucket->priority = priority; 74 + INIT_LIST_HEAD(&bucket->queue); 75 + } 76 + 77 + table->max_priority = max_priority; 78 + table->search_vector = 0; 79 + 80 + *table_ptr = table; 81 + return VDO_SUCCESS; 82 + } 83 + 84 + /** 85 + * vdo_free_priority_table() - Free a priority_table. 86 + * @table: The table to free. 87 + * 88 + * The table does not own the entries stored in it and they are not freed by this call. 89 + */ 90 + void vdo_free_priority_table(struct priority_table *table) 91 + { 92 + if (table == NULL) 93 + return; 94 + 95 + /* 96 + * Unlink the buckets from any entries still in the table so the entries won't be left with 97 + * dangling pointers to freed memory. 98 + */ 99 + vdo_reset_priority_table(table); 100 + 101 + uds_free(table); 102 + } 103 + 104 + /** 105 + * vdo_reset_priority_table() - Reset a priority table, leaving it in the same empty state as when 106 + * newly constructed. 107 + * @table: The table to reset. 108 + * 109 + * The table does not own the entries stored in it and they are not freed (or even unlinked from 110 + * each other) by this call. 111 + */ 112 + void vdo_reset_priority_table(struct priority_table *table) 113 + { 114 + unsigned int priority; 115 + 116 + table->search_vector = 0; 117 + for (priority = 0; priority <= table->max_priority; priority++) 118 + list_del_init(&table->buckets[priority].queue); 119 + } 120 + 121 + /** 122 + * vdo_priority_table_enqueue() - Add a new entry to the priority table, appending it to the queue 123 + * for entries with the specified priority. 124 + * @table: The table in which to store the entry. 125 + * @priority: The priority of the entry. 126 + * @entry: The list_head embedded in the entry to store in the table (the caller must have 127 + * initialized it). 128 + */ 129 + void vdo_priority_table_enqueue(struct priority_table *table, unsigned int priority, 130 + struct list_head *entry) 131 + { 132 + ASSERT_LOG_ONLY((priority <= table->max_priority), 133 + "entry priority must be valid for the table"); 134 + 135 + /* Append the entry to the queue in the specified bucket. */ 136 + list_move_tail(entry, &table->buckets[priority].queue); 137 + 138 + /* Flag the bucket in the search vector since it must be non-empty. */ 139 + table->search_vector |= (1ULL << priority); 140 + } 141 + 142 + static inline void mark_bucket_empty(struct priority_table *table, struct bucket *bucket) 143 + { 144 + table->search_vector &= ~(1ULL << bucket->priority); 145 + } 146 + 147 + /** 148 + * vdo_priority_table_dequeue() - Find the highest-priority entry in the table, remove it from the 149 + * table, and return it. 150 + * @table: The priority table from which to remove an entry. 151 + * 152 + * If there are multiple entries with the same priority, the one that has been in the table with 153 + * that priority the longest will be returned. 154 + * 155 + * Return: The dequeued entry, or NULL if the table is currently empty. 156 + */ 157 + struct list_head *vdo_priority_table_dequeue(struct priority_table *table) 158 + { 159 + struct bucket *bucket; 160 + struct list_head *entry; 161 + int top_priority; 162 + 163 + if (table->search_vector == 0) { 164 + /* All buckets are empty. */ 165 + return NULL; 166 + } 167 + 168 + /* 169 + * Find the highest priority non-empty bucket by finding the highest-order non-zero bit in 170 + * the search vector. 171 + */ 172 + top_priority = ilog2(table->search_vector); 173 + 174 + /* Dequeue the first entry in the bucket. */ 175 + bucket = &table->buckets[top_priority]; 176 + entry = bucket->queue.next; 177 + list_del_init(entry); 178 + 179 + /* Clear the bit in the search vector if the bucket has been emptied. */ 180 + if (list_empty(&bucket->queue)) 181 + mark_bucket_empty(table, bucket); 182 + 183 + return entry; 184 + } 185 + 186 + /** 187 + * vdo_priority_table_remove() - Remove a specified entry from its priority table. 188 + * @table: The table from which to remove the entry. 189 + * @entry: The entry to remove from the table. 190 + */ 191 + void vdo_priority_table_remove(struct priority_table *table, struct list_head *entry) 192 + { 193 + struct list_head *next_entry; 194 + 195 + /* 196 + * We can't guard against calls where the entry is on a list for a different table, but 197 + * it's easy to deal with an entry not in any table or list. 198 + */ 199 + if (list_empty(entry)) 200 + return; 201 + 202 + /* 203 + * Remove the entry from the bucket list, remembering a pointer to another entry in the 204 + * ring. 205 + */ 206 + next_entry = entry->next; 207 + list_del_init(entry); 208 + 209 + /* 210 + * If the rest of the list is now empty, the next node must be the list head in the bucket 211 + * and we can use it to update the search vector. 212 + */ 213 + if (list_empty(next_entry)) 214 + mark_bucket_empty(table, list_entry(next_entry, struct bucket, queue)); 215 + } 216 + 217 + /** 218 + * vdo_is_priority_table_empty() - Return whether the priority table is empty. 219 + * @table: The table to check. 220 + * 221 + * Return: true if the table is empty. 222 + */ 223 + bool vdo_is_priority_table_empty(struct priority_table *table) 224 + { 225 + return (table->search_vector == 0); 226 + }

+47

drivers/md/dm-vdo/priority-table.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright 2023 Red Hat 4 + */ 5 + 6 + #ifndef VDO_PRIORITY_TABLE_H 7 + #define VDO_PRIORITY_TABLE_H 8 + 9 + #include <linux/list.h> 10 + 11 + /* 12 + * A priority_table is a simple implementation of a priority queue for entries with priorities that 13 + * are small non-negative integer values. It implements the obvious priority queue operations of 14 + * enqueuing an entry and dequeuing an entry with the maximum priority. It also supports removing 15 + * an arbitrary entry. The priority of an entry already in the table can be changed by removing it 16 + * and re-enqueuing it with a different priority. All operations have O(1) complexity. 17 + * 18 + * The links for the table entries must be embedded in the entries themselves. Lists are used to 19 + * link entries in the table and no wrapper type is declared, so an existing list entry in an 20 + * object can also be used to queue it in a priority_table, assuming the field is not used for 21 + * anything else while so queued. 22 + * 23 + * The table is implemented as an array of queues (circular lists) indexed by priority, along with 24 + * a hint for which queues are non-empty. Steven Skiena calls a very similar structure a "bounded 25 + * height priority queue", but given the resemblance to a hash table, "priority table" seems both 26 + * shorter and more apt, if somewhat novel. 27 + */ 28 + 29 + struct priority_table; 30 + 31 + int __must_check vdo_make_priority_table(unsigned int max_priority, 32 + struct priority_table **table_ptr); 33 + 34 + void vdo_free_priority_table(struct priority_table *table); 35 + 36 + void vdo_priority_table_enqueue(struct priority_table *table, unsigned int priority, 37 + struct list_head *entry); 38 + 39 + void vdo_reset_priority_table(struct priority_table *table); 40 + 41 + struct list_head * __must_check vdo_priority_table_dequeue(struct priority_table *table); 42 + 43 + void vdo_priority_table_remove(struct priority_table *table, struct list_head *entry); 44 + 45 + bool __must_check vdo_is_priority_table_empty(struct priority_table *table); 46 + 47 + #endif /* VDO_PRIORITY_TABLE_H */