Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Simplify events page allocator

The first event page is always big enough to handle all events.
Handling of multiple events pages is not supported by user mode, and
not necessary.

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>

authored by

Felix Kuehling and committed by
Oded Gabbay
50cb7dd9 74e40716

+73 -135
+71 -132
drivers/gpu/drm/amd/amdkfd/kfd_events.c
··· 41 41 bool activated; /* Becomes true when event is signaled */ 42 42 }; 43 43 44 + #define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT 45 + #define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE) 46 + 44 47 /* 45 48 * Over-complicated pooled allocator for event notification slots. 46 49 * ··· 54 51 * Individual signal events are then allocated a slot in a page. 55 52 */ 56 53 57 - struct signal_page { 58 - struct list_head event_pages; /* kfd_process.signal_event_pages */ 54 + struct kfd_signal_page { 59 55 uint64_t *kernel_address; 60 56 uint64_t __user *user_address; 61 - uint32_t page_index; /* Index into the mmap aperture. */ 62 57 unsigned int free_slots; 63 - unsigned long used_slot_bitmap[0]; 58 + unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS]; 64 59 }; 65 - 66 - #define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT 67 - #define SLOT_BITMAP_SIZE BITS_TO_LONGS(SLOTS_PER_PAGE) 68 - #define BITS_PER_PAGE (ilog2(SLOTS_PER_PAGE)+1) 69 - #define SIGNAL_PAGE_SIZE (sizeof(struct signal_page) + \ 70 - SLOT_BITMAP_SIZE * sizeof(long)) 71 60 72 61 /* 73 62 * For signal events, the event ID is used as the interrupt user data. ··· 67 72 */ 68 73 69 74 #define INTERRUPT_DATA_BITS 8 70 - #define SIGNAL_EVENT_ID_SLOT_SHIFT 0 71 75 72 - static uint64_t *page_slots(struct signal_page *page) 76 + static uint64_t *page_slots(struct kfd_signal_page *page) 73 77 { 74 78 return page->kernel_address; 75 79 } 76 80 77 81 static bool allocate_free_slot(struct kfd_process *process, 78 - struct signal_page **out_page, 79 - unsigned int *out_slot_index) 82 + unsigned int *out_slot_index) 80 83 { 81 - struct signal_page *page; 84 + struct kfd_signal_page *page = process->signal_page; 85 + unsigned int slot; 82 86 83 - list_for_each_entry(page, &process->signal_event_pages, event_pages) { 84 - if (page->free_slots > 0) { 85 - unsigned int slot = 86 - find_first_zero_bit(page->used_slot_bitmap, 87 - SLOTS_PER_PAGE); 87 + if (!page || page->free_slots == 0) { 88 + pr_debug("No free event signal slots were found for process %p\n", 89 + process); 88 90 89 - __set_bit(slot, page->used_slot_bitmap); 90 - page->free_slots--; 91 - 92 - page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT; 93 - 94 - *out_page = page; 95 - *out_slot_index = slot; 96 - 97 - pr_debug("Allocated event signal slot in page %p, slot %d\n", 98 - page, slot); 99 - 100 - return true; 101 - } 91 + return false; 102 92 } 103 93 104 - pr_debug("No free event signal slots were found for process %p\n", 105 - process); 94 + slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE); 106 95 107 - return false; 96 + __set_bit(slot, page->used_slot_bitmap); 97 + page->free_slots--; 98 + 99 + page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT; 100 + 101 + *out_slot_index = slot; 102 + 103 + pr_debug("Allocated event signal slot in page %p, slot %d\n", 104 + page, slot); 105 + 106 + return true; 108 107 } 109 108 110 - #define list_tail_entry(head, type, member) \ 111 - list_entry((head)->prev, type, member) 112 - 113 - static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p) 109 + static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) 114 110 { 115 111 void *backing_store; 116 - struct signal_page *page; 112 + struct kfd_signal_page *page; 117 113 118 - page = kzalloc(SIGNAL_PAGE_SIZE, GFP_KERNEL); 114 + page = kzalloc(sizeof(*page), GFP_KERNEL); 119 115 if (!page) 120 - goto fail_alloc_signal_page; 116 + return NULL; 121 117 122 118 page->free_slots = SLOTS_PER_PAGE; 123 119 124 - backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, 120 + backing_store = (void *) __get_free_pages(GFP_KERNEL, 125 121 get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); 126 122 if (!backing_store) 127 123 goto fail_alloc_signal_store; 128 124 129 - /* prevent user-mode info leaks */ 125 + /* Initialize all events to unsignaled */ 130 126 memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, 131 - KFD_SIGNAL_EVENT_LIMIT * 8); 127 + KFD_SIGNAL_EVENT_LIMIT * 8); 132 128 133 129 page->kernel_address = backing_store; 134 - 135 - if (list_empty(&p->signal_event_pages)) 136 - page->page_index = 0; 137 - else 138 - page->page_index = list_tail_entry(&p->signal_event_pages, 139 - struct signal_page, 140 - event_pages)->page_index + 1; 141 - 142 130 pr_debug("Allocated new event signal page at %p, for process %p\n", 143 131 page, p); 144 - pr_debug("Page index is %d\n", page->page_index); 145 132 146 - list_add(&page->event_pages, &p->signal_event_pages); 147 - 148 - return true; 133 + return page; 149 134 150 135 fail_alloc_signal_store: 151 136 kfree(page); 152 - fail_alloc_signal_page: 153 - return false; 137 + return NULL; 154 138 } 155 139 156 - static bool allocate_event_notification_slot(struct file *devkfd, 157 - struct kfd_process *p, 158 - struct signal_page **page, 159 - unsigned int *signal_slot_index) 140 + static bool allocate_event_notification_slot(struct kfd_process *p, 141 + unsigned int *signal_slot_index) 160 142 { 161 - bool ret; 162 - 163 - ret = allocate_free_slot(p, page, signal_slot_index); 164 - if (!ret) { 165 - ret = allocate_signal_page(devkfd, p); 166 - if (ret) 167 - ret = allocate_free_slot(p, page, signal_slot_index); 143 + if (!p->signal_page) { 144 + p->signal_page = allocate_signal_page(p); 145 + if (!p->signal_page) 146 + return false; 168 147 } 169 148 170 - return ret; 149 + return allocate_free_slot(p, signal_slot_index); 171 150 } 172 151 173 152 /* Assumes that the process's event_mutex is locked. */ 174 - static void release_event_notification_slot(struct signal_page *page, 153 + static void release_event_notification_slot(struct kfd_signal_page *page, 175 154 size_t slot_index) 176 155 { 177 156 __clear_bit(slot_index, page->used_slot_bitmap); ··· 154 185 /* We don't free signal pages, they are retained by the process 155 186 * and reused until it exits. 156 187 */ 157 - } 158 - 159 - static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, 160 - unsigned int page_index) 161 - { 162 - struct signal_page *page; 163 - 164 - /* 165 - * This is safe because we don't delete signal pages until the 166 - * process exits. 167 - */ 168 - list_for_each_entry(page, &p->signal_event_pages, event_pages) 169 - if (page->page_index == page_index) 170 - return page; 171 - 172 - return NULL; 173 188 } 174 189 175 190 /* ··· 169 216 return ev; 170 217 171 218 return NULL; 172 - } 173 - 174 - static u32 make_signal_event_id(struct signal_page *page, 175 - unsigned int signal_slot_index) 176 - { 177 - return page->page_index | 178 - (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT); 179 219 } 180 220 181 221 /* ··· 216 270 } 217 271 218 272 static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p, 219 - struct signal_page *page, 220 273 unsigned int signal_slot) 221 274 { 222 - return lookup_event_by_id(p, make_signal_event_id(page, signal_slot)); 275 + return lookup_event_by_id(p, signal_slot); 223 276 } 224 277 225 278 static int create_signal_event(struct file *devkfd, ··· 233 288 return -ENOMEM; 234 289 } 235 290 236 - if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page, 237 - &ev->signal_slot_index)) { 291 + if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) { 238 292 pr_warn("Signal event wasn't created because out of kernel memory\n"); 239 293 return -ENOMEM; 240 294 } ··· 241 297 p->signal_event_count++; 242 298 243 299 ev->user_signal_address = 244 - &ev->signal_page->user_address[ev->signal_slot_index]; 300 + &p->signal_page->user_address[ev->signal_slot_index]; 245 301 246 - ev->event_id = make_signal_event_id(ev->signal_page, 247 - ev->signal_slot_index); 302 + ev->event_id = ev->signal_slot_index; 248 303 249 304 pr_debug("Signal event number %zu created with id %d, address %p\n", 250 305 p->signal_event_count, ev->event_id, ··· 270 327 { 271 328 mutex_init(&p->event_mutex); 272 329 hash_init(p->events); 273 - INIT_LIST_HEAD(&p->signal_event_pages); 330 + p->signal_page = NULL; 274 331 p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; 275 332 p->signal_event_count = 0; 276 333 } ··· 284 341 waiter->event = NULL; 285 342 wake_up_all(&ev->wq); 286 343 287 - if (ev->signal_page) { 288 - release_event_notification_slot(ev->signal_page, 344 + if ((ev->type == KFD_EVENT_TYPE_SIGNAL || 345 + ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) { 346 + release_event_notification_slot(p->signal_page, 289 347 ev->signal_slot_index); 290 348 p->signal_event_count--; 291 349 } ··· 309 365 * We assume that the process is being destroyed and there is no need to 310 366 * unmap the pages or keep bookkeeping data in order. 311 367 */ 312 - static void shutdown_signal_pages(struct kfd_process *p) 368 + static void shutdown_signal_page(struct kfd_process *p) 313 369 { 314 - struct signal_page *page, *tmp; 370 + struct kfd_signal_page *page = p->signal_page; 315 371 316 - list_for_each_entry_safe(page, tmp, &p->signal_event_pages, 317 - event_pages) { 372 + if (page) { 318 373 free_pages((unsigned long)page->kernel_address, 319 374 get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); 320 375 kfree(page); ··· 323 380 void kfd_event_free_process(struct kfd_process *p) 324 381 { 325 382 destroy_events(p); 326 - shutdown_signal_pages(p); 383 + shutdown_signal_page(p); 327 384 } 328 385 329 386 static bool event_can_be_gpu_signaled(const struct kfd_event *ev) ··· 363 420 case KFD_EVENT_TYPE_DEBUG: 364 421 ret = create_signal_event(devkfd, p, ev); 365 422 if (!ret) { 366 - *event_page_offset = (ev->signal_page->page_index | 367 - KFD_MMAP_EVENTS_MASK); 423 + *event_page_offset = KFD_MMAP_EVENTS_MASK; 368 424 *event_page_offset <<= PAGE_SHIFT; 369 425 *event_slot_index = ev->signal_slot_index; 370 426 } ··· 469 527 470 528 static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) 471 529 { 472 - page_slots(ev->signal_page)[ev->signal_slot_index] = 530 + page_slots(p->signal_page)[ev->signal_slot_index] = 473 531 UNSIGNALED_EVENT_SLOT; 474 532 } 475 533 476 - static bool is_slot_signaled(struct signal_page *page, unsigned int index) 534 + static bool is_slot_signaled(struct kfd_process *p, unsigned int index) 477 535 { 478 - return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT; 536 + if (!p->signal_page) 537 + return false; 538 + else 539 + return page_slots(p->signal_page)[index] != 540 + UNSIGNALED_EVENT_SLOT; 479 541 } 480 542 481 543 static void set_event_from_interrupt(struct kfd_process *p, ··· 512 566 /* Partial ID is a full ID. */ 513 567 ev = lookup_event_by_id(p, partial_id); 514 568 set_event_from_interrupt(p, ev); 515 - } else { 569 + } else if (p->signal_page) { 516 570 /* 517 571 * Partial ID is in fact partial. For now we completely 518 572 * ignore it, but we could use any bits we did receive to 519 573 * search faster. 520 574 */ 521 - struct signal_page *page; 522 575 unsigned int i; 523 576 524 - list_for_each_entry(page, &p->signal_event_pages, event_pages) 525 - for (i = 0; i < SLOTS_PER_PAGE; i++) 526 - if (is_slot_signaled(page, i)) { 527 - ev = lookup_event_by_page_slot(p, 528 - page, i); 529 - set_event_from_interrupt(p, ev); 530 - } 577 + for (i = 0; i < SLOTS_PER_PAGE; i++) 578 + if (is_slot_signaled(p, i)) { 579 + ev = lookup_event_by_page_slot(p, i); 580 + set_event_from_interrupt(p, ev); 581 + } 531 582 } 532 583 533 584 mutex_unlock(&p->event_mutex); ··· 789 846 int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) 790 847 { 791 848 792 - unsigned int page_index; 793 849 unsigned long pfn; 794 - struct signal_page *page; 850 + struct kfd_signal_page *page; 795 851 796 852 /* check required size is logical */ 797 853 if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) != ··· 799 857 return -EINVAL; 800 858 } 801 859 802 - page_index = vma->vm_pgoff; 803 - 804 - page = lookup_signal_page_by_index(p, page_index); 860 + page = p->signal_page; 805 861 if (!page) { 806 862 /* Probably KFD bug, but mmap is user-accessible. */ 807 - pr_debug("Signal page could not be found for page_index %u\n", 808 - page_index); 863 + pr_debug("Signal page could not be found\n"); 809 864 return -EINVAL; 810 865 } 811 866
-1
drivers/gpu/drm/amd/amdkfd/kfd_events.h
··· 60 60 wait_queue_head_t wq; /* List of event waiters. */ 61 61 62 62 /* Only for signal events. */ 63 - struct signal_page *signal_page; 64 63 unsigned int signal_slot_index; 65 64 uint64_t __user *user_signal_address; 66 65
+2 -2
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 540 540 struct mutex event_mutex; 541 541 /* All events in process hashed by ID, linked on kfd_event.events. */ 542 542 DECLARE_HASHTABLE(events, 4); 543 - /* struct slot_page_header.event_pages */ 544 - struct list_head signal_event_pages; 543 + /* Event page */ 544 + struct kfd_signal_page *signal_page; 545 545 u32 next_nonsignal_event_id; 546 546 size_t signal_event_count; 547 547 bool signal_event_limit_reached;