Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Add interrupt handling module

This patch adds the interrupt handling module, kfd_interrupt.c, and its
related members in different data structures to the amdkfd driver.

The amdkfd interrupt module maintains an internal interrupt ring
per amdkfd device. The internal interrupt ring contains interrupts
that needs further handling. The extra handling is deferred to
a later time through a workqueue.

There's no acknowledgment for the interrupts we use. The hardware
simply queues a new interrupt each time without waiting.

The fixed-size internal queue means that it's possible for us to lose
interrupts because we have no back-pressure to the hardware.

However, only interrupts that are "wanted" by amdkfd, are copied into
the amdkfd s/w interrupt ring, in order to minimize the chances
for overflow of the ring.

Signed-off-by: Andrew Lewycky <Andrew.Lewycky@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>

authored by

Andrew Lewycky and committed by
Oded Gabbay
2249d558 d36b94fc

+235 -1
+1
drivers/gpu/drm/amd/amdkfd/Makefile
··· 12 12 kfd_kernel_queue_vi.o kfd_packet_manager.o \ 13 13 kfd_process_queue_manager.o kfd_device_queue_manager.o \ 14 14 kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ 15 + kfd_interrupt.o 15 16 16 17 obj-$(CONFIG_HSA_AMD) += amdkfd.o
+21 -1
drivers/gpu/drm/amd/amdkfd/kfd_device.c
··· 235 235 goto kfd_topology_add_device_error; 236 236 } 237 237 238 + if (kfd_interrupt_init(kfd)) { 239 + dev_err(kfd_device, 240 + "Error initializing interrupts for device (%x:%x)\n", 241 + kfd->pdev->vendor, kfd->pdev->device); 242 + goto kfd_interrupt_error; 243 + } 244 + 238 245 if (!device_iommu_pasid_init(kfd)) { 239 246 dev_err(kfd_device, 240 247 "Error initializing iommuv2 for device (%x:%x)\n", ··· 280 273 device_queue_manager_error: 281 274 amd_iommu_free_device(kfd->pdev); 282 275 device_iommu_pasid_error: 276 + kfd_interrupt_exit(kfd); 277 + kfd_interrupt_error: 283 278 kfd_topology_remove_device(kfd); 284 279 kfd_topology_add_device_error: 285 280 kfd_gtt_sa_fini(kfd); ··· 299 290 if (kfd->init_complete) { 300 291 device_queue_manager_uninit(kfd->dqm); 301 292 amd_iommu_free_device(kfd->pdev); 293 + kfd_interrupt_exit(kfd); 302 294 kfd_topology_remove_device(kfd); 303 295 kfd_gtt_sa_fini(kfd); 304 296 kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); ··· 343 333 /* This is called directly from KGD at ISR. */ 344 334 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) 345 335 { 346 - /* Process interrupts / schedule work as necessary */ 336 + if (!kfd->init_complete) 337 + return; 338 + 339 + spin_lock(&kfd->interrupt_lock); 340 + 341 + if (kfd->interrupts_active 342 + && interrupt_is_wanted(kfd, ih_ring_entry) 343 + && enqueue_ih_ring_entry(kfd, ih_ring_entry)) 344 + schedule_work(&kfd->interrupt_work); 345 + 346 + spin_unlock(&kfd->interrupt_lock); 347 347 } 348 348 349 349 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
+15
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 522 522 return 0; 523 523 } 524 524 525 + static void init_interrupts(struct device_queue_manager *dqm) 526 + { 527 + unsigned int i; 528 + 529 + BUG_ON(dqm == NULL); 530 + 531 + for (i = 0 ; i < get_pipes_num(dqm) ; i++) 532 + dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, 533 + i + get_first_pipe(dqm)); 534 + } 535 + 525 536 static int init_scheduler(struct device_queue_manager *dqm) 526 537 { 527 538 int retval; ··· 592 581 593 582 static int start_nocpsch(struct device_queue_manager *dqm) 594 583 { 584 + init_interrupts(dqm); 595 585 return 0; 596 586 } 597 587 ··· 749 737 750 738 dqm->fence_addr = dqm->fence_mem->cpu_ptr; 751 739 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 740 + 741 + init_interrupts(dqm); 742 + 752 743 list_for_each_entry(node, &dqm->queues, list) 753 744 if (node->qpd->pqm->process && dqm->dev) 754 745 kfd_bind_process_to_device(dqm->dev,
+181
drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
··· 1 + /* 2 + * Copyright 2014 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + */ 22 + 23 + /* 24 + * KFD Interrupts. 25 + * 26 + * AMD GPUs deliver interrupts by pushing an interrupt description onto the 27 + * interrupt ring and then sending an interrupt. KGD receives the interrupt 28 + * in ISR and sends us a pointer to each new entry on the interrupt ring. 29 + * 30 + * We generally can't process interrupt-signaled events from ISR, so we call 31 + * out to each interrupt client module (currently only the scheduler) to ask if 32 + * each interrupt is interesting. If they return true, then it requires further 33 + * processing so we copy it to an internal interrupt ring and call each 34 + * interrupt client again from a work-queue. 35 + * 36 + * There's no acknowledgment for the interrupts we use. The hardware simply 37 + * queues a new interrupt each time without waiting. 38 + * 39 + * The fixed-size internal queue means that it's possible for us to lose 40 + * interrupts because we have no back-pressure to the hardware. 41 + */ 42 + 43 + #include <linux/slab.h> 44 + #include <linux/device.h> 45 + #include "kfd_priv.h" 46 + 47 + #define KFD_INTERRUPT_RING_SIZE 1024 48 + 49 + static void interrupt_wq(struct work_struct *); 50 + 51 + int kfd_interrupt_init(struct kfd_dev *kfd) 52 + { 53 + void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE, 54 + kfd->device_info->ih_ring_entry_size, 55 + GFP_KERNEL); 56 + if (!interrupt_ring) 57 + return -ENOMEM; 58 + 59 + kfd->interrupt_ring = interrupt_ring; 60 + kfd->interrupt_ring_size = 61 + KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size; 62 + atomic_set(&kfd->interrupt_ring_wptr, 0); 63 + atomic_set(&kfd->interrupt_ring_rptr, 0); 64 + 65 + spin_lock_init(&kfd->interrupt_lock); 66 + 67 + INIT_WORK(&kfd->interrupt_work, interrupt_wq); 68 + 69 + kfd->interrupts_active = true; 70 + 71 + /* 72 + * After this function returns, the interrupt will be enabled. This 73 + * barrier ensures that the interrupt running on a different processor 74 + * sees all the above writes. 75 + */ 76 + smp_wmb(); 77 + 78 + return 0; 79 + } 80 + 81 + void kfd_interrupt_exit(struct kfd_dev *kfd) 82 + { 83 + /* 84 + * Stop the interrupt handler from writing to the ring and scheduling 85 + * workqueue items. The spinlock ensures that any interrupt running 86 + * after we have unlocked sees interrupts_active = false. 87 + */ 88 + unsigned long flags; 89 + 90 + spin_lock_irqsave(&kfd->interrupt_lock, flags); 91 + kfd->interrupts_active = false; 92 + spin_unlock_irqrestore(&kfd->interrupt_lock, flags); 93 + 94 + /* 95 + * Flush_scheduled_work ensures that there are no outstanding 96 + * work-queue items that will access interrupt_ring. New work items 97 + * can't be created because we stopped interrupt handling above. 98 + */ 99 + flush_scheduled_work(); 100 + 101 + kfree(kfd->interrupt_ring); 102 + } 103 + 104 + /* 105 + * This assumes that it can't be called concurrently with itself 106 + * but only with dequeue_ih_ring_entry. 107 + */ 108 + bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) 109 + { 110 + unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); 111 + unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); 112 + 113 + if ((rptr - wptr) % kfd->interrupt_ring_size == 114 + kfd->device_info->ih_ring_entry_size) { 115 + /* This is very bad, the system is likely to hang. */ 116 + dev_err_ratelimited(kfd_chardev(), 117 + "Interrupt ring overflow, dropping interrupt.\n"); 118 + return false; 119 + } 120 + 121 + memcpy(kfd->interrupt_ring + wptr, ih_ring_entry, 122 + kfd->device_info->ih_ring_entry_size); 123 + 124 + wptr = (wptr + kfd->device_info->ih_ring_entry_size) % 125 + kfd->interrupt_ring_size; 126 + smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */ 127 + atomic_set(&kfd->interrupt_ring_wptr, wptr); 128 + 129 + return true; 130 + } 131 + 132 + /* 133 + * This assumes that it can't be called concurrently with itself 134 + * but only with enqueue_ih_ring_entry. 135 + */ 136 + static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) 137 + { 138 + /* 139 + * Assume that wait queues have an implicit barrier, i.e. anything that 140 + * happened in the ISR before it queued work is visible. 141 + */ 142 + 143 + unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); 144 + unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); 145 + 146 + if (rptr == wptr) 147 + return false; 148 + 149 + memcpy(ih_ring_entry, kfd->interrupt_ring + rptr, 150 + kfd->device_info->ih_ring_entry_size); 151 + 152 + rptr = (rptr + kfd->device_info->ih_ring_entry_size) % 153 + kfd->interrupt_ring_size; 154 + 155 + /* 156 + * Ensure the rptr write update is not visible until 157 + * memcpy has finished reading. 158 + */ 159 + smp_mb(); 160 + atomic_set(&kfd->interrupt_ring_rptr, rptr); 161 + 162 + return true; 163 + } 164 + 165 + static void interrupt_wq(struct work_struct *work) 166 + { 167 + struct kfd_dev *dev = container_of(work, struct kfd_dev, 168 + interrupt_work); 169 + 170 + uint32_t ih_ring_entry[DIV_ROUND_UP( 171 + dev->device_info->ih_ring_entry_size, 172 + sizeof(uint32_t))]; 173 + 174 + while (dequeue_ih_ring_entry(dev, ih_ring_entry)) 175 + ; 176 + } 177 + 178 + bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry) 179 + { 180 + return false; 181 + }
+17
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 161 161 unsigned int gtt_sa_chunk_size; 162 162 unsigned int gtt_sa_num_of_chunks; 163 163 164 + /* Interrupts */ 165 + void *interrupt_ring; 166 + size_t interrupt_ring_size; 167 + atomic_t interrupt_ring_rptr; 168 + atomic_t interrupt_ring_wptr; 169 + struct work_struct interrupt_work; 170 + spinlock_t interrupt_lock; 171 + 164 172 /* QCM Device instance */ 165 173 struct device_queue_manager *dqm; 166 174 167 175 bool init_complete; 176 + /* 177 + * Interrupts of interest to KFD are copied 178 + * from the HW ring into a SW ring. 179 + */ 180 + bool interrupts_active; 168 181 }; 169 182 170 183 /* KGD2KFD callbacks */ ··· 568 555 struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx); 569 556 570 557 /* Interrupts */ 558 + int kfd_interrupt_init(struct kfd_dev *dev); 559 + void kfd_interrupt_exit(struct kfd_dev *dev); 571 560 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); 561 + bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); 562 + bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry); 572 563 573 564 /* Power Management */ 574 565 void kgd2kfd_suspend(struct kfd_dev *kfd);