Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Use IH context ID for signal lookup

This speeds up signal lookup when the IH ring entry includes a
valid context ID or partial context ID. Only if the context ID is
found to be invalid, fall back to an exhaustive search of all
signaled events.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>

authored by

Felix Kuehling and committed by
Oded Gabbay
3f04f961 482f0777

+64 -16
+4 -3
drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
··· 47 47 unsigned int pasid; 48 48 const struct cik_ih_ring_entry *ihre = 49 49 (const struct cik_ih_ring_entry *)ih_ring_entry; 50 + uint32_t context_id = ihre->data & 0xfffffff; 50 51 51 52 pasid = (ihre->ring_id & 0xffff0000) >> 16; 52 53 ··· 55 54 return; 56 55 57 56 if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE) 58 - kfd_signal_event_interrupt(pasid, 0, 0); 57 + kfd_signal_event_interrupt(pasid, context_id, 28); 59 58 else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP) 60 - kfd_signal_event_interrupt(pasid, 0, 0); 59 + kfd_signal_event_interrupt(pasid, context_id, 28); 61 60 else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) 62 - kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8); 61 + kfd_signal_event_interrupt(pasid, context_id & 0xff, 8); 63 62 else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) 64 63 kfd_signal_hw_exception_event(pasid); 65 64 }
+60 -13
drivers/gpu/drm/amd/amdkfd/kfd_events.c
··· 53 53 uint64_t __user *user_address; 54 54 }; 55 55 56 - /* 57 - * For signal events, the event ID is used as the interrupt user data. 58 - * For SQ s_sendmsg interrupts, this is limited to 8 bits. 59 - */ 60 - 61 - #define INTERRUPT_DATA_BITS 8 62 56 63 57 static uint64_t *page_slots(struct kfd_signal_page *page) 64 58 { ··· 117 123 static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) 118 124 { 119 125 return idr_find(&p->event_idr, id); 126 + } 127 + 128 + /** 129 + * lookup_signaled_event_by_partial_id - Lookup signaled event from partial ID 130 + * @p: Pointer to struct kfd_process 131 + * @id: ID to look up 132 + * @bits: Number of valid bits in @id 133 + * 134 + * Finds the first signaled event with a matching partial ID. If no 135 + * matching signaled event is found, returns NULL. In that case the 136 + * caller should assume that the partial ID is invalid and do an 137 + * exhaustive search of all siglaned events. 138 + * 139 + * If multiple events with the same partial ID signal at the same 140 + * time, they will be found one interrupt at a time, not necessarily 141 + * in the same order the interrupts occurred. As long as the number of 142 + * interrupts is correct, all signaled events will be seen by the 143 + * driver. 144 + */ 145 + static struct kfd_event *lookup_signaled_event_by_partial_id( 146 + struct kfd_process *p, uint32_t id, uint32_t bits) 147 + { 148 + struct kfd_event *ev; 149 + 150 + if (!p->signal_page || id >= KFD_SIGNAL_EVENT_LIMIT) 151 + return NULL; 152 + 153 + /* Fast path for the common case that @id is not a partial ID 154 + * and we only need a single lookup. 155 + */ 156 + if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) { 157 + if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) 158 + return NULL; 159 + 160 + return idr_find(&p->event_idr, id); 161 + } 162 + 163 + /* General case for partial IDs: Iterate over all matching IDs 164 + * and find the first one that has signaled. 165 + */ 166 + for (ev = NULL; id < KFD_SIGNAL_EVENT_LIMIT && !ev; id += 1U << bits) { 167 + if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) 168 + continue; 169 + 170 + ev = idr_find(&p->event_idr, id); 171 + } 172 + 173 + return ev; 120 174 } 121 175 122 176 static int create_signal_event(struct file *devkfd, ··· 427 385 void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, 428 386 uint32_t valid_id_bits) 429 387 { 430 - struct kfd_event *ev; 388 + struct kfd_event *ev = NULL; 431 389 432 390 /* 433 391 * Because we are called from arbitrary context (workqueue) as opposed ··· 441 399 442 400 mutex_lock(&p->event_mutex); 443 401 444 - if (valid_id_bits >= INTERRUPT_DATA_BITS) { 445 - /* Partial ID is a full ID. */ 446 - ev = lookup_event_by_id(p, partial_id); 402 + if (valid_id_bits) 403 + ev = lookup_signaled_event_by_partial_id(p, partial_id, 404 + valid_id_bits); 405 + if (ev) { 447 406 set_event_from_interrupt(p, ev); 448 407 } else if (p->signal_page) { 449 408 /* 450 - * Partial ID is in fact partial. For now we completely 451 - * ignore it, but we could use any bits we did receive to 452 - * search faster. 409 + * Partial ID lookup failed. Assume that the event ID 410 + * in the interrupt payload was invalid and do an 411 + * exhaustive search of signaled events. 453 412 */ 454 413 uint64_t *slots = page_slots(p->signal_page); 455 414 uint32_t id; 415 + 416 + if (valid_id_bits) 417 + pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n", 418 + partial_id, valid_id_bits); 456 419 457 420 if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { 458 421 /* With relatively few events, it's faster to