Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Queue interrupt work to different CPU

For CPX mode, each KFD node has interrupt worker to process ih_fifo to
send events to user space. Currently all interrupt workers of same adev
queue to same CPU, all workers execution are actually serialized and
this cause KFD ih_fifo overflow when CPU usage is high.

Use per-GPU unbounded highpri queue with number of workers equals to
number of partitions, let queue_work select the next CPU round robin
among the local CPUs of same NUMA.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Philip Yang and committed by
Alex Deucher
34db5a32 1b001432

+20 -33
+9 -16
drivers/gpu/drm/amd/amdkfd/kfd_device.c
··· 649 649 struct kfd_node *knode; 650 650 unsigned int i; 651 651 652 + /* 653 + * flush_work ensures that there are no outstanding 654 + * work-queue items that will access interrupt_ring. New work items 655 + * can't be created because we stopped interrupt handling above. 656 + */ 657 + flush_workqueue(kfd->ih_wq); 658 + destroy_workqueue(kfd->ih_wq); 659 + 652 660 for (i = 0; i < num_nodes; i++) { 653 661 knode = kfd->nodes[i]; 654 662 device_queue_manager_uninit(knode->dqm); ··· 1074 1066 return err; 1075 1067 } 1076 1068 1077 - static inline void kfd_queue_work(struct workqueue_struct *wq, 1078 - struct work_struct *work) 1079 - { 1080 - int cpu, new_cpu; 1081 - 1082 - cpu = new_cpu = smp_processor_id(); 1083 - do { 1084 - new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids; 1085 - if (cpu_to_node(new_cpu) == numa_node_id()) 1086 - break; 1087 - } while (cpu != new_cpu); 1088 - 1089 - queue_work_on(new_cpu, wq, work); 1090 - } 1091 - 1092 1069 /* This is called directly from KGD at ISR. */ 1093 1070 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) 1094 1071 { ··· 1099 1106 patched_ihre, &is_patched) 1100 1107 && enqueue_ih_ring_entry(node, 1101 1108 is_patched ? patched_ihre : ih_ring_entry)) { 1102 - kfd_queue_work(node->ih_wq, &node->interrupt_work); 1109 + queue_work(node->kfd->ih_wq, &node->interrupt_work); 1103 1110 spin_unlock_irqrestore(&node->interrupt_lock, flags); 1104 1111 return; 1105 1112 }
+9 -16
drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
··· 62 62 return r; 63 63 } 64 64 65 - node->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1); 66 - if (unlikely(!node->ih_wq)) { 67 - kfifo_free(&node->ih_fifo); 68 - dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n"); 69 - return -ENOMEM; 65 + if (!node->kfd->ih_wq) { 66 + node->kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI | WQ_UNBOUND, 67 + node->kfd->num_nodes); 68 + if (unlikely(!node->kfd->ih_wq)) { 69 + kfifo_free(&node->ih_fifo); 70 + dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n"); 71 + return -ENOMEM; 72 + } 70 73 } 71 74 spin_lock_init(&node->interrupt_lock); 72 75 ··· 99 96 spin_lock_irqsave(&node->interrupt_lock, flags); 100 97 node->interrupts_active = false; 101 98 spin_unlock_irqrestore(&node->interrupt_lock, flags); 102 - 103 - /* 104 - * flush_work ensures that there are no outstanding 105 - * work-queue items that will access interrupt_ring. New work items 106 - * can't be created because we stopped interrupt handling above. 107 - */ 108 - flush_workqueue(node->ih_wq); 109 - 110 - destroy_workqueue(node->ih_wq); 111 - 112 99 kfifo_free(&node->ih_fifo); 113 100 } 114 101 ··· 148 155 /* If we spent more than a second processing signals, 149 156 * reschedule the worker to avoid soft-lockup warnings 150 157 */ 151 - queue_work(dev->ih_wq, &dev->interrupt_work); 158 + queue_work(dev->kfd->ih_wq, &dev->interrupt_work); 152 159 break; 153 160 } 154 161 }
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 274 274 275 275 /* Interrupts */ 276 276 struct kfifo ih_fifo; 277 - struct workqueue_struct *ih_wq; 278 277 struct work_struct interrupt_work; 279 278 spinlock_t interrupt_lock; 280 279 ··· 365 366 366 367 struct kfd_node *nodes[MAX_KFD_NODES]; 367 368 unsigned int num_nodes; 369 + 370 + struct workqueue_struct *ih_wq; 368 371 369 372 /* Kernel doorbells for KFD device */ 370 373 struct amdgpu_bo *doorbells;