Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf threads: Switch from rbtree to hashmap

The rbtree provides a sorting on entries but this is unused. Switch to
using hashmap for O(1) rather than O(log n) find/insert/remove
complexity.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240301053646.1449657-7-irogers@google.com

authored by

Ian Rogers and committed by
Namhyung Kim
412a2ff4 93bb5b0d

+47 -105
+44 -102
tools/perf/util/threads.c
··· 3 3 #include "machine.h" 4 4 #include "thread.h" 5 5 6 - struct thread_rb_node { 7 - struct rb_node rb_node; 8 - struct thread *thread; 9 - }; 10 - 11 6 static struct threads_table_entry *threads__table(struct threads *threads, pid_t tid) 12 7 { 13 8 /* Cast it to handle tid == -1 */ 14 9 return &threads->table[(unsigned int)tid % THREADS__TABLE_SIZE]; 10 + } 11 + 12 + static size_t key_hash(long key, void *ctx __maybe_unused) 13 + { 14 + /* The table lookup removes low bit entropy, but this is just ignored here. */ 15 + return key; 16 + } 17 + 18 + static bool key_equal(long key1, long key2, void *ctx __maybe_unused) 19 + { 20 + return key1 == key2; 15 21 } 16 22 17 23 void threads__init(struct threads *threads) ··· 25 19 for (int i = 0; i < THREADS__TABLE_SIZE; i++) { 26 20 struct threads_table_entry *table = &threads->table[i]; 27 21 28 - table->entries = RB_ROOT_CACHED; 22 + hashmap__init(&table->shard, key_hash, key_equal, NULL); 29 23 init_rwsem(&table->lock); 30 - table->nr = 0; 31 24 table->last_match = NULL; 32 25 } 33 26 } ··· 37 32 for (int i = 0; i < THREADS__TABLE_SIZE; i++) { 38 33 struct threads_table_entry *table = &threads->table[i]; 39 34 35 + hashmap__clear(&table->shard); 40 36 exit_rwsem(&table->lock); 41 37 } 42 38 } ··· 50 44 struct threads_table_entry *table = &threads->table[i]; 51 45 52 46 down_read(&table->lock); 53 - nr += table->nr; 47 + nr += hashmap__size(&table->shard); 54 48 up_read(&table->lock); 55 49 } 56 50 return nr; ··· 92 86 struct thread *threads__find(struct threads *threads, pid_t tid) 93 87 { 94 88 struct threads_table_entry *table = threads__table(threads, tid); 95 - struct rb_node **p; 96 - struct thread *res = NULL; 89 + struct thread *res; 97 90 98 91 down_read(&table->lock); 99 92 res = __threads_table_entry__get_last_match(table, tid); 100 - if (res) 101 - return res; 102 - 103 - p = &table->entries.rb_root.rb_node; 104 - while (*p != NULL) { 105 - struct rb_node *parent = *p; 106 - struct thread *th = rb_entry(parent, struct thread_rb_node, rb_node)->thread; 107 - 108 - if (thread__tid(th) == tid) { 109 - res = thread__get(th); 110 - break; 111 - } 112 - 113 - if (tid < thread__tid(th)) 114 - p = &(*p)->rb_left; 115 - else 116 - p = &(*p)->rb_right; 93 + if (!res) { 94 + if (hashmap__find(&table->shard, tid, &res)) 95 + res = thread__get(res); 117 96 } 118 97 up_read(&table->lock); 119 98 if (res) ··· 109 118 struct thread *threads__findnew(struct threads *threads, pid_t pid, pid_t tid, bool *created) 110 119 { 111 120 struct threads_table_entry *table = threads__table(threads, tid); 112 - struct rb_node **p; 113 - struct rb_node *parent = NULL; 114 121 struct thread *res = NULL; 115 - struct thread_rb_node *nd; 116 - bool leftmost = true; 117 122 118 123 *created = false; 119 124 down_write(&table->lock); 120 - p = &table->entries.rb_root.rb_node; 121 - while (*p != NULL) { 122 - struct thread *th; 123 - 124 - parent = *p; 125 - th = rb_entry(parent, struct thread_rb_node, rb_node)->thread; 126 - 127 - if (thread__tid(th) == tid) { 128 - __threads_table_entry__set_last_match(table, th); 129 - res = thread__get(th); 130 - goto out_unlock; 131 - } 132 - 133 - if (tid < thread__tid(th)) 134 - p = &(*p)->rb_left; 135 - else { 136 - leftmost = false; 137 - p = &(*p)->rb_right; 138 - } 139 - } 140 - nd = malloc(sizeof(*nd)); 141 - if (nd == NULL) 142 - goto out_unlock; 143 125 res = thread__new(pid, tid); 144 - if (!res) 145 - free(nd); 146 - else { 147 - *created = true; 148 - nd->thread = thread__get(res); 149 - rb_link_node(&nd->rb_node, parent, p); 150 - rb_insert_color_cached(&nd->rb_node, &table->entries, leftmost); 151 - ++table->nr; 152 - __threads_table_entry__set_last_match(table, res); 126 + if (res) { 127 + if (hashmap__add(&table->shard, tid, res)) { 128 + /* Add failed. Assume a race so find other entry. */ 129 + thread__put(res); 130 + res = NULL; 131 + if (hashmap__find(&table->shard, tid, &res)) 132 + res = thread__get(res); 133 + } else { 134 + res = thread__get(res); 135 + *created = true; 136 + } 137 + if (res) 138 + __threads_table_entry__set_last_match(table, res); 153 139 } 154 - out_unlock: 155 140 up_write(&table->lock); 156 141 return res; 157 142 } ··· 136 169 { 137 170 for (int i = 0; i < THREADS__TABLE_SIZE; i++) { 138 171 struct threads_table_entry *table = &threads->table[i]; 139 - struct rb_node *nd; 172 + struct hashmap_entry *cur, *tmp; 173 + size_t bkt; 140 174 141 175 down_write(&table->lock); 142 176 __threads_table_entry__set_last_match(table, NULL); 143 - nd = rb_first_cached(&table->entries); 144 - while (nd) { 145 - struct thread_rb_node *trb = rb_entry(nd, struct thread_rb_node, rb_node); 177 + hashmap__for_each_entry_safe((&table->shard), cur, tmp, bkt) { 178 + struct thread *old_value; 146 179 147 - nd = rb_next(nd); 148 - thread__put(trb->thread); 149 - rb_erase_cached(&trb->rb_node, &table->entries); 150 - RB_CLEAR_NODE(&trb->rb_node); 151 - --table->nr; 152 - 153 - free(trb); 180 + hashmap__delete(&table->shard, cur->key, /*old_key=*/NULL, &old_value); 181 + thread__put(old_value); 154 182 } 155 - assert(table->nr == 0); 156 183 up_write(&table->lock); 157 184 } 158 185 } 159 186 160 187 void threads__remove(struct threads *threads, struct thread *thread) 161 188 { 162 - struct rb_node **p; 163 189 struct threads_table_entry *table = threads__table(threads, thread__tid(thread)); 164 - pid_t tid = thread__tid(thread); 190 + struct thread *old_value; 165 191 166 192 down_write(&table->lock); 167 193 if (table->last_match && RC_CHK_EQUAL(table->last_match, thread)) 168 194 __threads_table_entry__set_last_match(table, NULL); 169 195 170 - p = &table->entries.rb_root.rb_node; 171 - while (*p != NULL) { 172 - struct rb_node *parent = *p; 173 - struct thread_rb_node *nd = rb_entry(parent, struct thread_rb_node, rb_node); 174 - struct thread *th = nd->thread; 175 - 176 - if (RC_CHK_EQUAL(th, thread)) { 177 - thread__put(nd->thread); 178 - rb_erase_cached(&nd->rb_node, &table->entries); 179 - RB_CLEAR_NODE(&nd->rb_node); 180 - --table->nr; 181 - free(nd); 182 - break; 183 - } 184 - 185 - if (tid < thread__tid(th)) 186 - p = &(*p)->rb_left; 187 - else 188 - p = &(*p)->rb_right; 189 - } 196 + hashmap__delete(&table->shard, thread__tid(thread), /*old_key=*/NULL, &old_value); 197 + thread__put(old_value); 190 198 up_write(&table->lock); 191 199 } 192 200 ··· 171 229 { 172 230 for (int i = 0; i < THREADS__TABLE_SIZE; i++) { 173 231 struct threads_table_entry *table = &threads->table[i]; 174 - struct rb_node *nd; 232 + struct hashmap_entry *cur; 233 + size_t bkt; 175 234 176 235 down_read(&table->lock); 177 - for (nd = rb_first_cached(&table->entries); nd; nd = rb_next(nd)) { 178 - struct thread_rb_node *trb = rb_entry(nd, struct thread_rb_node, rb_node); 179 - int rc = fn(trb->thread, data); 236 + hashmap__for_each_entry((&table->shard), cur, bkt) { 237 + int rc = fn((struct thread *)cur->pvalue, data); 180 238 181 239 if (rc != 0) { 182 240 up_read(&table->lock);
+3 -3
tools/perf/util/threads.h
··· 2 2 #ifndef __PERF_THREADS_H 3 3 #define __PERF_THREADS_H 4 4 5 - #include <linux/rbtree.h> 5 + #include "hashmap.h" 6 6 #include "rwsem.h" 7 7 8 8 struct thread; ··· 11 11 #define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS) 12 12 13 13 struct threads_table_entry { 14 - struct rb_root_cached entries; 14 + /* Key is tid, value is struct thread. */ 15 + struct hashmap shard; 15 16 struct rw_semaphore lock; 16 - unsigned int nr; 17 17 struct thread *last_match; 18 18 }; 19 19