Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Simplify event ID and signal slot management

Signal slots are identical to event IDs.

Replace the used_slot_bitmap and events hash table with an IDR to
allocate and lookup event IDs and signal slots more efficiently.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>

authored by

Felix Kuehling and committed by
Oded Gabbay
482f0777 50cb7dd9

+80 -170
+70 -160
drivers/gpu/drm/amd/amdkfd/kfd_events.c
··· 41 41 bool activated; /* Becomes true when event is signaled */ 42 42 }; 43 43 44 - #define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT 45 - #define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE) 46 - 47 44 /* 48 - * Over-complicated pooled allocator for event notification slots. 49 - * 50 45 * Each signal event needs a 64-bit signal slot where the signaler will write 51 - * a 1 before sending an interrupt.l (This is needed because some interrupts 46 + * a 1 before sending an interrupt. (This is needed because some interrupts 52 47 * do not contain enough spare data bits to identify an event.) 53 - * We get whole pages from vmalloc and map them to the process VA. 54 - * Individual signal events are then allocated a slot in a page. 48 + * We get whole pages and map them to the process VA. 49 + * Individual signal events use their event_id as slot index. 55 50 */ 56 - 57 51 struct kfd_signal_page { 58 52 uint64_t *kernel_address; 59 53 uint64_t __user *user_address; 60 - unsigned int free_slots; 61 - unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS]; 62 54 }; 63 55 64 56 /* ··· 65 73 return page->kernel_address; 66 74 } 67 75 68 - static bool allocate_free_slot(struct kfd_process *process, 69 - unsigned int *out_slot_index) 70 - { 71 - struct kfd_signal_page *page = process->signal_page; 72 - unsigned int slot; 73 - 74 - if (!page || page->free_slots == 0) { 75 - pr_debug("No free event signal slots were found for process %p\n", 76 - process); 77 - 78 - return false; 79 - } 80 - 81 - slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE); 82 - 83 - __set_bit(slot, page->used_slot_bitmap); 84 - page->free_slots--; 85 - 86 - page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT; 87 - 88 - *out_slot_index = slot; 89 - 90 - pr_debug("Allocated event signal slot in page %p, slot %d\n", 91 - page, slot); 92 - 93 - return true; 94 - } 95 - 96 76 static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) 97 77 { 98 78 void *backing_store; ··· 73 109 page = kzalloc(sizeof(*page), GFP_KERNEL); 74 110 if (!page) 75 111 return NULL; 76 - 77 - page->free_slots = SLOTS_PER_PAGE; 78 112 79 113 backing_store = (void *) __get_free_pages(GFP_KERNEL, 80 114 get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); ··· 94 132 return NULL; 95 133 } 96 134 97 - static bool allocate_event_notification_slot(struct kfd_process *p, 98 - unsigned int *signal_slot_index) 135 + static int allocate_event_notification_slot(struct kfd_process *p, 136 + struct kfd_event *ev) 99 137 { 138 + int id; 139 + 100 140 if (!p->signal_page) { 101 141 p->signal_page = allocate_signal_page(p); 102 142 if (!p->signal_page) 103 - return false; 143 + return -ENOMEM; 104 144 } 105 145 106 - return allocate_free_slot(p, signal_slot_index); 107 - } 146 + id = idr_alloc(&p->event_idr, ev, 0, KFD_SIGNAL_EVENT_LIMIT, 147 + GFP_KERNEL); 148 + if (id < 0) 149 + return id; 108 150 109 - /* Assumes that the process's event_mutex is locked. */ 110 - static void release_event_notification_slot(struct kfd_signal_page *page, 111 - size_t slot_index) 112 - { 113 - __clear_bit(slot_index, page->used_slot_bitmap); 114 - page->free_slots++; 151 + ev->event_id = id; 152 + page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT; 115 153 116 - /* We don't free signal pages, they are retained by the process 117 - * and reused until it exits. 118 - */ 154 + return 0; 119 155 } 120 156 121 157 /* ··· 122 162 */ 123 163 static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) 124 164 { 125 - struct kfd_event *ev; 126 - 127 - hash_for_each_possible(p->events, ev, events, id) 128 - if (ev->event_id == id) 129 - return ev; 130 - 131 - return NULL; 132 - } 133 - 134 - /* 135 - * Produce a kfd event id for a nonsignal event. 136 - * These are arbitrary numbers, so we do a sequential search through 137 - * the hash table for an unused number. 138 - */ 139 - static u32 make_nonsignal_event_id(struct kfd_process *p) 140 - { 141 - u32 id; 142 - 143 - for (id = p->next_nonsignal_event_id; 144 - id < KFD_LAST_NONSIGNAL_EVENT_ID && 145 - lookup_event_by_id(p, id); 146 - id++) 147 - ; 148 - 149 - if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { 150 - 151 - /* 152 - * What if id == LAST_NONSIGNAL_EVENT_ID - 1? 153 - * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so 154 - * the first loop fails immediately and we proceed with the 155 - * wraparound loop below. 156 - */ 157 - p->next_nonsignal_event_id = id + 1; 158 - 159 - return id; 160 - } 161 - 162 - for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; 163 - id < KFD_LAST_NONSIGNAL_EVENT_ID && 164 - lookup_event_by_id(p, id); 165 - id++) 166 - ; 167 - 168 - 169 - if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { 170 - p->next_nonsignal_event_id = id + 1; 171 - return id; 172 - } 173 - 174 - p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; 175 - return 0; 176 - } 177 - 178 - static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p, 179 - unsigned int signal_slot) 180 - { 181 - return lookup_event_by_id(p, signal_slot); 165 + return idr_find(&p->event_idr, id); 182 166 } 183 167 184 168 static int create_signal_event(struct file *devkfd, 185 169 struct kfd_process *p, 186 170 struct kfd_event *ev) 187 171 { 172 + int ret; 173 + 188 174 if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { 189 175 if (!p->signal_event_limit_reached) { 190 176 pr_warn("Signal event wasn't created because limit was reached\n"); 191 177 p->signal_event_limit_reached = true; 192 178 } 193 - return -ENOMEM; 179 + return -ENOSPC; 194 180 } 195 181 196 - if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) { 182 + ret = allocate_event_notification_slot(p, ev); 183 + if (ret) { 197 184 pr_warn("Signal event wasn't created because out of kernel memory\n"); 198 - return -ENOMEM; 185 + return ret; 199 186 } 200 187 201 188 p->signal_event_count++; 202 189 203 - ev->user_signal_address = 204 - &p->signal_page->user_address[ev->signal_slot_index]; 205 - 206 - ev->event_id = ev->signal_slot_index; 207 - 190 + ev->user_signal_address = &p->signal_page->user_address[ev->event_id]; 208 191 pr_debug("Signal event number %zu created with id %d, address %p\n", 209 192 p->signal_event_count, ev->event_id, 210 193 ev->user_signal_address); ··· 155 252 return 0; 156 253 } 157 254 158 - /* 159 - * No non-signal events are supported yet. 160 - * We create them as events that never signal. 161 - * Set event calls from user-mode are failed. 162 - */ 163 255 static int create_other_event(struct kfd_process *p, struct kfd_event *ev) 164 256 { 165 - ev->event_id = make_nonsignal_event_id(p); 166 - if (ev->event_id == 0) 167 - return -ENOMEM; 257 + /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an 258 + * intentional integer overflow to -1 without a compiler 259 + * warning. idr_alloc treats a negative value as "maximum 260 + * signed integer". 261 + */ 262 + int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID, 263 + (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1, 264 + GFP_KERNEL); 265 + 266 + if (id < 0) 267 + return id; 268 + ev->event_id = id; 168 269 169 270 return 0; 170 271 } ··· 176 269 void kfd_event_init_process(struct kfd_process *p) 177 270 { 178 271 mutex_init(&p->event_mutex); 179 - hash_init(p->events); 272 + idr_init(&p->event_idr); 180 273 p->signal_page = NULL; 181 - p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; 182 274 p->signal_event_count = 0; 183 275 } 184 276 ··· 190 284 waiter->event = NULL; 191 285 wake_up_all(&ev->wq); 192 286 193 - if ((ev->type == KFD_EVENT_TYPE_SIGNAL || 194 - ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) { 195 - release_event_notification_slot(p->signal_page, 196 - ev->signal_slot_index); 287 + if (ev->type == KFD_EVENT_TYPE_SIGNAL || 288 + ev->type == KFD_EVENT_TYPE_DEBUG) 197 289 p->signal_event_count--; 198 - } 199 290 200 - hash_del(&ev->events); 291 + idr_remove(&p->event_idr, ev->event_id); 201 292 kfree(ev); 202 293 } 203 294 204 295 static void destroy_events(struct kfd_process *p) 205 296 { 206 297 struct kfd_event *ev; 207 - struct hlist_node *tmp; 208 - unsigned int hash_bkt; 298 + uint32_t id; 209 299 210 - hash_for_each_safe(p->events, hash_bkt, tmp, ev, events) 300 + idr_for_each_entry(&p->event_idr, ev, id) 211 301 destroy_event(p, ev); 302 + idr_destroy(&p->event_idr); 212 303 } 213 304 214 305 /* ··· 268 365 if (!ret) { 269 366 *event_page_offset = KFD_MMAP_EVENTS_MASK; 270 367 *event_page_offset <<= PAGE_SHIFT; 271 - *event_slot_index = ev->signal_slot_index; 368 + *event_slot_index = ev->event_id; 272 369 } 273 370 break; 274 371 default: ··· 277 374 } 278 375 279 376 if (!ret) { 280 - hash_add(p->events, &ev->events, ev->event_id); 281 - 282 377 *event_id = ev->event_id; 283 378 *event_trigger_data = ev->event_id; 284 379 } else { ··· 370 469 371 470 static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) 372 471 { 373 - page_slots(p->signal_page)[ev->signal_slot_index] = 374 - UNSIGNALED_EVENT_SLOT; 375 - } 376 - 377 - static bool is_slot_signaled(struct kfd_process *p, unsigned int index) 378 - { 379 - if (!p->signal_page) 380 - return false; 381 - else 382 - return page_slots(p->signal_page)[index] != 383 - UNSIGNALED_EVENT_SLOT; 472 + page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT; 384 473 } 385 474 386 475 static void set_event_from_interrupt(struct kfd_process *p, ··· 409 518 * ignore it, but we could use any bits we did receive to 410 519 * search faster. 411 520 */ 412 - unsigned int i; 521 + uint64_t *slots = page_slots(p->signal_page); 522 + uint32_t id; 413 523 414 - for (i = 0; i < SLOTS_PER_PAGE; i++) 415 - if (is_slot_signaled(p, i)) { 416 - ev = lookup_event_by_page_slot(p, i); 417 - set_event_from_interrupt(p, ev); 524 + if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { 525 + /* With relatively few events, it's faster to 526 + * iterate over the event IDR 527 + */ 528 + idr_for_each_entry(&p->event_idr, ev, id) { 529 + if (id >= KFD_SIGNAL_EVENT_LIMIT) 530 + break; 531 + 532 + if (slots[id] != UNSIGNALED_EVENT_SLOT) 533 + set_event_from_interrupt(p, ev); 418 534 } 535 + } else { 536 + /* With relatively many events, it's faster to 537 + * iterate over the signal slots and lookup 538 + * only signaled events from the IDR. 539 + */ 540 + for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++) 541 + if (slots[id] != UNSIGNALED_EVENT_SLOT) { 542 + ev = lookup_event_by_id(p, id); 543 + set_event_from_interrupt(p, ev); 544 + } 545 + } 419 546 } 420 547 421 548 mutex_unlock(&p->event_mutex); ··· 745 836 { 746 837 struct kfd_hsa_memory_exception_data *ev_data; 747 838 struct kfd_event *ev; 748 - int bkt; 839 + uint32_t id; 749 840 bool send_signal = true; 750 841 751 842 ev_data = (struct kfd_hsa_memory_exception_data *) event_data; 752 843 753 - hash_for_each(p->events, bkt, ev, events) 844 + id = KFD_FIRST_NONSIGNAL_EVENT_ID; 845 + idr_for_each_entry_continue(&p->event_idr, ev, id) 754 846 if (ev->type == type) { 755 847 send_signal = false; 756 848 dev_dbg(kfd_device,
+7 -7
drivers/gpu/drm/amd/amdkfd/kfd_events.h
··· 31 31 #include "kfd_priv.h" 32 32 #include <uapi/linux/kfd_ioctl.h> 33 33 34 - #define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U 35 - #define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK 36 - #define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX 34 + /* 35 + * IDR supports non-negative integer IDs. Small IDs are used for 36 + * signal events to match their signal slot. Use the upper half of the 37 + * ID space for non-signal events. 38 + */ 39 + #define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1) 40 + #define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX 37 41 38 42 /* 39 43 * Written into kfd_signal_slot_t to indicate that the event is not signaled. ··· 51 47 struct signal_page; 52 48 53 49 struct kfd_event { 54 - /* All events in process, rooted at kfd_process.events. */ 55 - struct hlist_node events; 56 - 57 50 u32 event_id; 58 51 59 52 bool signaled; ··· 61 60 wait_queue_head_t wq; /* List of event waiters. */ 62 61 63 62 /* Only for signal events. */ 64 - unsigned int signal_slot_index; 65 63 uint64_t __user *user_signal_address; 66 64 67 65 /* type specific data */
+3 -3
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 31 31 #include <linux/workqueue.h> 32 32 #include <linux/spinlock.h> 33 33 #include <linux/kfd_ioctl.h> 34 + #include <linux/idr.h> 34 35 #include <kgd_kfd_interface.h> 35 36 36 37 #include "amd_shared.h" ··· 539 538 540 539 /* Event-related data */ 541 540 struct mutex event_mutex; 542 - /* All events in process hashed by ID, linked on kfd_event.events. */ 543 - DECLARE_HASHTABLE(events, 4); 541 + /* Event ID allocator and lookup */ 542 + struct idr event_idr; 544 543 /* Event page */ 545 544 struct kfd_signal_page *signal_page; 546 - u32 next_nonsignal_event_id; 547 545 size_t signal_event_count; 548 546 bool signal_event_limit_reached; 549 547 };