1/*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2024 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20*/
21#include "SDL_internal.h"
22#include "SDL_hashtable.h"
23
24// XXX: We can't use SDL_assert here because it's going to call into hashtable code
25#include <assert.h>
26#define HT_ASSERT(x) assert(x)
27
28typedef struct SDL_HashItem
29{
30 // TODO: Splitting off values into a separate array might be more cache-friendly
31 const void *key;
32 const void *value;
33 Uint32 hash;
34 Uint32 probe_len : 31;
35 Uint32 live : 1;
36} SDL_HashItem;
37
38// Must be a power of 2 >= sizeof(SDL_HashItem)
39#define MAX_HASHITEM_SIZEOF 32u
40SDL_COMPILE_TIME_ASSERT(sizeof_SDL_HashItem, sizeof(SDL_HashItem) <= MAX_HASHITEM_SIZEOF);
41
42// Anything larger than this will cause integer overflows
43#define MAX_HASHTABLE_SIZE (0x80000000u / (MAX_HASHITEM_SIZEOF))
44
45struct SDL_HashTable
46{
47 SDL_RWLock *lock;
48 SDL_HashItem *table;
49 SDL_HashTable_HashFn hash;
50 SDL_HashTable_KeyMatchFn keymatch;
51 SDL_HashTable_NukeFn nuke;
52 void *data;
53 Uint32 hash_mask;
54 Uint32 max_probe_len;
55 Uint32 num_occupied_slots;
56 bool stackable;
57};
58
59SDL_HashTable *SDL_CreateHashTable(void *data,
60 Uint32 num_buckets,
61 SDL_HashTable_HashFn hashfn,
62 SDL_HashTable_KeyMatchFn keymatchfn,
63 SDL_HashTable_NukeFn nukefn,
64 bool threadsafe,
65 bool stackable)
66{
67 SDL_HashTable *table;
68
69 // num_buckets must be a power of two so we can derive the bucket index with just a bit-and.
70 if ((num_buckets < 1) || !SDL_HasExactlyOneBitSet32(num_buckets)) {
71 SDL_SetError("num_buckets must be a power of two");
72 return NULL;
73 }
74
75 if (num_buckets > MAX_HASHTABLE_SIZE) {
76 SDL_SetError("num_buckets is too large");
77 return NULL;
78 }
79
80 table = (SDL_HashTable *)SDL_calloc(1, sizeof(SDL_HashTable));
81 if (!table) {
82 return NULL;
83 }
84
85 if (threadsafe) {
86 // Don't fail if we can't create a lock (single threaded environment?)
87 table->lock = SDL_CreateRWLock();
88 }
89
90 table->table = (SDL_HashItem *)SDL_calloc(num_buckets, sizeof(SDL_HashItem));
91 if (!table->table) {
92 SDL_DestroyHashTable(table);
93 return NULL;
94 }
95
96 table->hash_mask = num_buckets - 1;
97 table->stackable = stackable;
98 table->data = data;
99 table->hash = hashfn;
100 table->keymatch = keymatchfn;
101 table->nuke = nukefn;
102 return table;
103}
104
105static SDL_INLINE Uint32 calc_hash(const SDL_HashTable *table, const void *key)
106{
107 const Uint32 BitMixer = 0x9E3779B1u;
108 return table->hash(key, table->data) * BitMixer;
109}
110
111static SDL_INLINE Uint32 get_probe_length(Uint32 zero_idx, Uint32 actual_idx, Uint32 num_buckets)
112{
113 // returns the probe sequence length from zero_idx to actual_idx
114
115 if (actual_idx < zero_idx) {
116 return num_buckets - zero_idx + actual_idx;
117 }
118
119 return actual_idx - zero_idx;
120}
121
122static SDL_HashItem *find_item(const SDL_HashTable *ht, const void *key, Uint32 hash, Uint32 *i, Uint32 *probe_len)
123{
124 Uint32 hash_mask = ht->hash_mask;
125 Uint32 max_probe_len = ht->max_probe_len;
126
127 SDL_HashItem *table = ht->table;
128
129 for (;;) {
130 SDL_HashItem *item = table + *i;
131 Uint32 item_hash = item->hash;
132
133 if (!item->live) {
134 return NULL;
135 }
136
137 if (item_hash == hash && ht->keymatch(item->key, key, ht->data)) {
138 return item;
139 }
140
141 Uint32 item_probe_len = item->probe_len;
142 HT_ASSERT(item_probe_len == get_probe_length(item_hash & hash_mask, (Uint32)(item - table), hash_mask + 1));
143
144 if (*probe_len > item_probe_len) {
145 return NULL;
146 }
147
148 if (++*probe_len > max_probe_len) {
149 return NULL;
150 }
151
152 *i = (*i + 1) & hash_mask;
153 }
154}
155
156static SDL_HashItem *find_first_item(const SDL_HashTable *ht, const void *key, Uint32 hash)
157{
158 Uint32 i = hash & ht->hash_mask;
159 Uint32 probe_len = 0;
160 return find_item(ht, key, hash, &i, &probe_len);
161}
162
163static SDL_HashItem *insert_item(SDL_HashItem *item_to_insert, SDL_HashItem *table, Uint32 hash_mask, Uint32 *max_probe_len_ptr)
164{
165 Uint32 idx = item_to_insert->hash & hash_mask;
166 SDL_HashItem temp_item, *target = NULL;
167 Uint32 num_buckets = hash_mask + 1;
168
169 for (;;) {
170 SDL_HashItem *candidate = table + idx;
171
172 if (!candidate->live) {
173 // Found an empty slot. Put it here and we're done.
174
175 *candidate = *item_to_insert;
176
177 if (target == NULL) {
178 target = candidate;
179 }
180
181 Uint32 probe_len = get_probe_length(candidate->hash & hash_mask, idx, num_buckets);
182 candidate->probe_len = probe_len;
183
184 if (*max_probe_len_ptr < probe_len) {
185 *max_probe_len_ptr = probe_len;
186 }
187
188 break;
189 }
190
191 Uint32 candidate_probe_len = candidate->probe_len;
192 HT_ASSERT(candidate_probe_len == get_probe_length(candidate->hash & hash_mask, idx, num_buckets));
193 Uint32 new_probe_len = get_probe_length(item_to_insert->hash & hash_mask, idx, num_buckets);
194
195 if (candidate_probe_len < new_probe_len) {
196 // Robin Hood hashing: the item at idx has a better probe length than our item would at this position.
197 // Evict it and put our item in its place, then continue looking for a new spot for the displaced item.
198 // This algorithm significantly reduces clustering in the table, making lookups take very few probes.
199
200 temp_item = *candidate;
201 *candidate = *item_to_insert;
202
203 if (target == NULL) {
204 target = candidate;
205 }
206
207 *item_to_insert = temp_item;
208
209 HT_ASSERT(new_probe_len == get_probe_length(candidate->hash & hash_mask, idx, num_buckets));
210 candidate->probe_len = new_probe_len;
211
212 if (*max_probe_len_ptr < new_probe_len) {
213 *max_probe_len_ptr = new_probe_len;
214 }
215 }
216
217 idx = (idx + 1) & hash_mask;
218 }
219
220 return target;
221}
222
223static void delete_item(SDL_HashTable *ht, SDL_HashItem *item)
224{
225 Uint32 hash_mask = ht->hash_mask;
226 SDL_HashItem *table = ht->table;
227
228 if (ht->nuke) {
229 ht->nuke(item->key, item->value, ht->data);
230 }
231 ht->num_occupied_slots--;
232
233 Uint32 idx = (Uint32)(item - ht->table);
234
235 for (;;) {
236 idx = (idx + 1) & hash_mask;
237 SDL_HashItem *next_item = table + idx;
238
239 if (next_item->probe_len < 1) {
240 SDL_zerop(item);
241 return;
242 }
243
244 *item = *next_item;
245 item->probe_len -= 1;
246 HT_ASSERT(item->probe_len < ht->max_probe_len);
247 item = next_item;
248 }
249}
250
251static bool resize(SDL_HashTable *ht, Uint32 new_size)
252{
253 SDL_HashItem *old_table = ht->table;
254 Uint32 old_size = ht->hash_mask + 1;
255 Uint32 new_hash_mask = new_size - 1;
256 SDL_HashItem *new_table = SDL_calloc(new_size, sizeof(*new_table));
257
258 if (!new_table) {
259 return false;
260 }
261
262 ht->max_probe_len = 0;
263 ht->hash_mask = new_hash_mask;
264 ht->table = new_table;
265
266 for (Uint32 i = 0; i < old_size; ++i) {
267 SDL_HashItem *item = old_table + i;
268 if (item->live) {
269 insert_item(item, new_table, new_hash_mask, &ht->max_probe_len);
270 }
271 }
272
273 SDL_free(old_table);
274 return true;
275}
276
277static bool maybe_resize(SDL_HashTable *ht)
278{
279 Uint32 capacity = ht->hash_mask + 1;
280
281 if (capacity >= MAX_HASHTABLE_SIZE) {
282 return false;
283 }
284
285 Uint32 max_load_factor = 217; // range: 0-255; 217 is roughly 85%
286 Uint32 resize_threshold = (Uint32)((max_load_factor * (Uint64)capacity) >> 8);
287
288 if (ht->num_occupied_slots > resize_threshold) {
289 return resize(ht, capacity * 2);
290 }
291
292 return true;
293}
294
295bool SDL_InsertIntoHashTable(SDL_HashTable *table, const void *key, const void *value)
296{
297 SDL_HashItem *item;
298 Uint32 hash;
299 bool result = false;
300
301 if (!table) {
302 return false;
303 }
304
305 if (table->lock) {
306 SDL_LockRWLockForWriting(table->lock);
307 }
308
309 hash = calc_hash(table, key);
310 item = find_first_item(table, key, hash);
311
312 if (item && !table->stackable) {
313 // Allow overwrites, this might have been inserted on another thread
314 delete_item(table, item);
315 }
316
317 SDL_HashItem new_item;
318 new_item.key = key;
319 new_item.value = value;
320 new_item.hash = hash;
321 new_item.live = true;
322 new_item.probe_len = 0;
323
324 table->num_occupied_slots++;
325
326 if (!maybe_resize(table)) {
327 table->num_occupied_slots--;
328 goto done;
329 }
330
331 // This never returns NULL
332 insert_item(&new_item, table->table, table->hash_mask, &table->max_probe_len);
333 result = true;
334
335done:
336 if (table->lock) {
337 SDL_UnlockRWLock(table->lock);
338 }
339 return result;
340}
341
342bool SDL_FindInHashTable(const SDL_HashTable *table, const void *key, const void **value)
343{
344 Uint32 hash;
345 SDL_HashItem *i;
346 bool result = false;
347
348 if (!table) {
349 if (value) {
350 *value = NULL;
351 }
352 return false;
353 }
354
355 if (table->lock) {
356 SDL_LockRWLockForReading(table->lock);
357 }
358
359 hash = calc_hash(table, key);
360 i = find_first_item(table, key, hash);
361 if (i) {
362 if (value) {
363 *value = i->value;
364 }
365 result = true;
366 }
367
368 if (table->lock) {
369 SDL_UnlockRWLock(table->lock);
370 }
371 return result;
372}
373
374bool SDL_RemoveFromHashTable(SDL_HashTable *table, const void *key)
375{
376 Uint32 hash;
377 SDL_HashItem *item;
378 bool result = false;
379
380 if (!table) {
381 return false;
382 }
383
384 if (table->lock) {
385 SDL_LockRWLockForWriting(table->lock);
386 }
387
388 // FIXME: what to do for stacking hashtables?
389 // The original code removes just one item.
390 // This hashtable happens to preserve the insertion order of multi-value keys,
391 // so deleting the first one will always delete the least-recently inserted one.
392 // But maybe it makes more sense to remove all matching items?
393
394 hash = calc_hash(table, key);
395 item = find_first_item(table, key, hash);
396 if (!item) {
397 goto done;
398 }
399
400 delete_item(table, item);
401 result = true;
402
403done:
404 if (table->lock) {
405 SDL_UnlockRWLock(table->lock);
406 }
407 return result;
408}
409
410bool SDL_IterateHashTableKey(const SDL_HashTable *table, const void *key, const void **_value, void **iter)
411{
412 SDL_HashItem *item = (SDL_HashItem *)*iter;
413
414 if (!table) {
415 return false;
416 }
417
418 Uint32 i, probe_len, hash;
419
420 if (item) {
421 HT_ASSERT(item >= table->table);
422 HT_ASSERT(item < table->table + (table->hash_mask + 1));
423
424 hash = item->hash;
425 probe_len = item->probe_len + 1;
426 i = ((Uint32)(item - table->table) + 1) & table->hash_mask;
427 item = table->table + i;
428 } else {
429 hash = calc_hash(table, key);
430 i = hash & table->hash_mask;
431 probe_len = 0;
432 }
433
434 item = find_item(table, key, hash, &i, &probe_len);
435
436 if (!item) {
437 *_value = NULL;
438 return false;
439 }
440
441 *_value = item->value;
442 *iter = item;
443
444 return true;
445}
446
447bool SDL_IterateHashTable(const SDL_HashTable *table, const void **_key, const void **_value, void **iter)
448{
449 SDL_HashItem *item = (SDL_HashItem *)*iter;
450
451 if (!table) {
452 return false;
453 }
454
455 if (!item) {
456 item = table->table;
457 } else {
458 item++;
459 }
460
461 HT_ASSERT(item >= table->table);
462 SDL_HashItem *end = table->table + (table->hash_mask + 1);
463
464 while (item < end && !item->live) {
465 ++item;
466 }
467
468 HT_ASSERT(item <= end);
469
470 if (item == end) {
471 if (_key) {
472 *_key = NULL;
473 }
474 if (_value) {
475 *_value = NULL;
476 }
477 return false;
478 }
479
480 if (_key) {
481 *_key = item->key;
482 }
483 if (_value) {
484 *_value = item->value;
485 }
486 *iter = item;
487
488 return true;
489}
490
491bool SDL_HashTableEmpty(SDL_HashTable *table)
492{
493 return !(table && table->num_occupied_slots);
494}
495
496static void nuke_all(SDL_HashTable *table)
497{
498 void *data = table->data;
499 SDL_HashItem *end = table->table + (table->hash_mask + 1);
500 SDL_HashItem *i;
501
502 for (i = table->table; i < end; ++i) {
503 if (i->live) {
504 table->nuke(i->key, i->value, data);
505 }
506 }
507}
508
509void SDL_EmptyHashTable(SDL_HashTable *table)
510{
511 if (table) {
512 SDL_LockRWLockForWriting(table->lock);
513 {
514 if (table->nuke) {
515 nuke_all(table);
516 }
517
518 SDL_memset(table->table, 0, sizeof(*table->table) * (table->hash_mask + 1));
519 table->num_occupied_slots = 0;
520 }
521 SDL_UnlockRWLock(table->lock);
522 }
523}
524
525void SDL_DestroyHashTable(SDL_HashTable *table)
526{
527 if (table) {
528 SDL_EmptyHashTable(table);
529
530 SDL_DestroyRWLock(table->lock);
531 SDL_free(table->table);
532 SDL_free(table);
533 }
534}
535
536// this is djb's xor hashing function.
537static SDL_INLINE Uint32 hash_string_djbxor(const char *str, size_t len)
538{
539 Uint32 hash = 5381;
540 while (len--) {
541 hash = ((hash << 5) + hash) ^ *(str++);
542 }
543 return hash;
544}
545
546Uint32 SDL_HashPointer(const void *key, void *unused)
547{
548 (void)unused;
549 return SDL_murmur3_32(&key, sizeof(key), 0);
550}
551
552bool SDL_KeyMatchPointer(const void *a, const void *b, void *unused)
553{
554 (void)unused;
555 return (a == b);
556}
557
558Uint32 SDL_HashString(const void *key, void *unused)
559{
560 (void)unused;
561 const char *str = (const char *)key;
562 return hash_string_djbxor(str, SDL_strlen(str));
563}
564
565bool SDL_KeyMatchString(const void *a, const void *b, void *unused)
566{
567 const char *a_string = (const char *)a;
568 const char *b_string = (const char *)b;
569
570 (void)unused;
571 if (a == b) {
572 return true; // same pointer, must match.
573 } else if (!a || !b) {
574 return false; // one pointer is NULL (and first test shows they aren't the same pointer), must not match.
575 } else if (a_string[0] != b_string[0]) {
576 return false; // we know they don't match
577 }
578 return (SDL_strcmp(a_string, b_string) == 0); // Check against actual string contents.
579}
580
581// We assume we can fit the ID in the key directly
582SDL_COMPILE_TIME_ASSERT(SDL_HashID_KeySize, sizeof(Uint32) <= sizeof(const void *));
583
584Uint32 SDL_HashID(const void *key, void *unused)
585{
586 (void)unused;
587 return (Uint32)(uintptr_t)key;
588}
589
590bool SDL_KeyMatchID(const void *a, const void *b, void *unused)
591{
592 (void)unused;
593 return (a == b);
594}
595
596void SDL_NukeFreeKey(const void *key, const void *value, void *unused)
597{
598 (void)value;
599 (void)unused;
600 SDL_free((void *)key);
601}
602
603void SDL_NukeFreeValue(const void *key, const void *value, void *unused)
604{
605 (void)key;
606 (void)unused;
607 SDL_free((void *)value);
608}