Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Fix circular locking dependency warning

[ 150.887733] ======================================================
[ 150.893903] WARNING: possible circular locking dependency detected
[ 150.905917] ------------------------------------------------------
[ 150.912129] kfdtest/4081 is trying to acquire lock:
[ 150.917002] ffff8f7f3762e118 (&mm->mmap_sem#2){++++}, at:
__might_fault+0x3e/0x90
[ 150.924490]
but task is already holding lock:
[ 150.930320] ffff8f7f49d229e8 (&dqm->lock_hidden){+.+.}, at:
destroy_queue_cpsch+0x29/0x210 [amdgpu]
[ 150.939432]
which lock already depends on the new lock.

[ 150.947603]
the existing dependency chain (in reverse order) is:
[ 150.955074]
-> #3 (&dqm->lock_hidden){+.+.}:
[ 150.960822] __mutex_lock+0xa1/0x9f0
[ 150.964996] evict_process_queues_cpsch+0x22/0x120 [amdgpu]
[ 150.971155] kfd_process_evict_queues+0x3b/0xc0 [amdgpu]
[ 150.977054] kgd2kfd_quiesce_mm+0x25/0x60 [amdgpu]
[ 150.982442] amdgpu_amdkfd_evict_userptr+0x35/0x70 [amdgpu]
[ 150.988615] amdgpu_mn_invalidate_hsa+0x41/0x60 [amdgpu]
[ 150.994448] __mmu_notifier_invalidate_range_start+0xa4/0x240
[ 151.000714] copy_page_range+0xd70/0xd80
[ 151.005159] dup_mm+0x3ca/0x550
[ 151.008816] copy_process+0x1bdc/0x1c70
[ 151.013183] _do_fork+0x76/0x6c0
[ 151.016929] __x64_sys_clone+0x8c/0xb0
[ 151.021201] do_syscall_64+0x4a/0x1d0
[ 151.025404] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 151.030977]
-> #2 (&adev->notifier_lock){+.+.}:
[ 151.036993] __mutex_lock+0xa1/0x9f0
[ 151.041168] amdgpu_mn_invalidate_hsa+0x30/0x60 [amdgpu]
[ 151.047019] __mmu_notifier_invalidate_range_start+0xa4/0x240
[ 151.053277] copy_page_range+0xd70/0xd80
[ 151.057722] dup_mm+0x3ca/0x550
[ 151.061388] copy_process+0x1bdc/0x1c70
[ 151.065748] _do_fork+0x76/0x6c0
[ 151.069499] __x64_sys_clone+0x8c/0xb0
[ 151.073765] do_syscall_64+0x4a/0x1d0
[ 151.077952] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 151.083523]
-> #1 (mmu_notifier_invalidate_range_start){+.+.}:
[ 151.090833] change_protection+0x802/0xab0
[ 151.095448] mprotect_fixup+0x187/0x2d0
[ 151.099801] setup_arg_pages+0x124/0x250
[ 151.104251] load_elf_binary+0x3a4/0x1464
[ 151.108781] search_binary_handler+0x6c/0x210
[ 151.113656] __do_execve_file.isra.40+0x7f7/0xa50
[ 151.118875] do_execve+0x21/0x30
[ 151.122632] call_usermodehelper_exec_async+0x17e/0x190
[ 151.128393] ret_from_fork+0x24/0x30
[ 151.132489]
-> #0 (&mm->mmap_sem#2){++++}:
[ 151.138064] __lock_acquire+0x11a1/0x1490
[ 151.142597] lock_acquire+0x90/0x180
[ 151.146694] __might_fault+0x68/0x90
[ 151.150879] read_sdma_queue_counter+0x5f/0xb0 [amdgpu]
[ 151.156693] update_sdma_queue_past_activity_stats+0x3b/0x90 [amdgpu]
[ 151.163725] destroy_queue_cpsch+0x1ae/0x210 [amdgpu]
[ 151.169373] pqm_destroy_queue+0xf0/0x250 [amdgpu]
[ 151.174762] kfd_ioctl_destroy_queue+0x32/0x70 [amdgpu]
[ 151.180577] kfd_ioctl+0x223/0x400 [amdgpu]
[ 151.185284] ksys_ioctl+0x8f/0xb0
[ 151.189118] __x64_sys_ioctl+0x16/0x20
[ 151.193389] do_syscall_64+0x4a/0x1d0
[ 151.197569] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 151.203141]
other info that might help us debug this:

[ 151.211140] Chain exists of:
&mm->mmap_sem#2 --> &adev->notifier_lock --> &dqm->lock_hidden

[ 151.222535] Possible unsafe locking scenario:

[ 151.228447] CPU0 CPU1
[ 151.232971] ---- ----
[ 151.237502] lock(&dqm->lock_hidden);
[ 151.241254] lock(&adev->notifier_lock);
[ 151.247774] lock(&dqm->lock_hidden);
[ 151.254038] lock(&mm->mmap_sem#2);

This commit fixes the warning by ensuring get_user() is not called
while reading SDMA stats with dqm_lock held as get_user() could cause a
page fault which leads to the circular locking scenario.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Mukul Joshi and committed by
Alex Deucher
d69fd951 7ee78aff

+159 -62
+34 -39
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 153 153 dqm->active_cp_queue_count--; 154 154 } 155 155 156 - int read_sdma_queue_counter(struct queue *q, uint64_t *val) 156 + int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val) 157 157 { 158 158 int ret; 159 159 uint64_t tmp = 0; 160 160 161 - if (!q || !val) 161 + if (!val) 162 162 return -EINVAL; 163 163 /* 164 164 * SDMA activity counter is stored at queue's RPTR + 0x8 location. 165 165 */ 166 - if (!access_ok((const void __user *)((uint64_t)q->properties.read_ptr + 166 + if (!access_ok((const void __user *)(q_rptr + 167 167 sizeof(uint64_t)), sizeof(uint64_t))) { 168 168 pr_err("Can't access sdma queue activity counter\n"); 169 169 return -EFAULT; 170 170 } 171 171 172 - ret = get_user(tmp, (uint64_t *)((uint64_t)(q->properties.read_ptr) + 173 - sizeof(uint64_t))); 172 + ret = get_user(tmp, (uint64_t *)(q_rptr + sizeof(uint64_t))); 174 173 if (!ret) { 175 174 *val = tmp; 176 175 } 177 - 178 - return ret; 179 - } 180 - 181 - static int update_sdma_queue_past_activity_stats(struct kfd_process_device *pdd, 182 - struct queue *q) 183 - { 184 - int ret; 185 - uint64_t val = 0; 186 - 187 - if (!pdd) 188 - return -ENODEV; 189 - 190 - ret = read_sdma_queue_counter(q, &val); 191 - if (ret) { 192 - pr_err("Failed to read SDMA queue counter for queue: %d\n", 193 - q->properties.queue_id); 194 - return ret; 195 - } 196 - 197 - pdd->sdma_past_activity_counter += val; 198 176 199 177 return ret; 200 178 } ··· 511 533 if (retval == -ETIME) 512 534 qpd->reset_wavefronts = true; 513 535 514 - /* Get the SDMA queue stats */ 515 - if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 516 - (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 517 - update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q); 518 - } 519 536 520 537 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 521 538 ··· 546 573 struct queue *q) 547 574 { 548 575 int retval; 576 + uint64_t sdma_val = 0; 577 + struct kfd_process_device *pdd = qpd_to_pdd(qpd); 578 + 579 + /* Get the SDMA queue stats */ 580 + if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 581 + (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 582 + retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr, 583 + &sdma_val); 584 + if (retval) 585 + pr_err("Failed to read SDMA queue counter for queue: %d\n", 586 + q->properties.queue_id); 587 + } 549 588 550 589 dqm_lock(dqm); 551 590 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 591 + if (!retval) 592 + pdd->sdma_past_activity_counter += sdma_val; 552 593 dqm_unlock(dqm); 553 594 554 595 return retval; ··· 1467 1480 { 1468 1481 int retval; 1469 1482 struct mqd_manager *mqd_mgr; 1483 + uint64_t sdma_val = 0; 1484 + struct kfd_process_device *pdd = qpd_to_pdd(qpd); 1485 + 1486 + /* Get the SDMA queue stats */ 1487 + if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1488 + (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1489 + retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr, 1490 + &sdma_val); 1491 + if (retval) 1492 + pr_err("Failed to read SDMA queue counter for queue: %d\n", 1493 + q->properties.queue_id); 1494 + } 1470 1495 1471 1496 retval = 0; 1472 1497 ··· 1500 1501 1501 1502 deallocate_doorbell(qpd, q); 1502 1503 1503 - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1504 + if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1505 + (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1504 1506 deallocate_sdma_queue(dqm, q); 1505 - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1506 - deallocate_sdma_queue(dqm, q); 1507 + pdd->sdma_past_activity_counter += sdma_val; 1508 + } 1507 1509 1508 1510 list_del(&q->list); 1509 1511 qpd->queue_count--; ··· 1520 1520 } 1521 1521 } 1522 1522 1523 - /* Get the SDMA queue stats */ 1524 - if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1525 - (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1526 - update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q); 1527 - } 1528 1523 /* 1529 1524 * Unconditionally decrement this counter, regardless of the queue's 1530 1525 * type
+1 -2
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
··· 251 251 mutex_unlock(&dqm->lock_hidden); 252 252 } 253 253 254 - int read_sdma_queue_counter(struct queue *q, uint64_t *val); 255 - 254 + int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val); 256 255 #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
+124 -21
drivers/gpu/drm/amd/amdkfd/kfd_process.c
··· 86 86 uint64_t sdma_activity_counter; 87 87 }; 88 88 89 + struct temp_sdma_queue_list { 90 + uint64_t rptr; 91 + uint64_t sdma_val; 92 + unsigned int queue_id; 93 + struct list_head list; 94 + }; 95 + 89 96 static void kfd_sdma_activity_worker(struct work_struct *work) 90 97 { 91 98 struct kfd_sdma_activity_handler_workarea *workarea; ··· 103 96 struct qcm_process_device *qpd; 104 97 struct device_queue_manager *dqm; 105 98 int ret = 0; 99 + struct temp_sdma_queue_list sdma_q_list; 100 + struct temp_sdma_queue_list *sdma_q, *next; 106 101 107 102 workarea = container_of(work, struct kfd_sdma_activity_handler_workarea, 108 103 sdma_activity_work); ··· 118 109 qpd = &pdd->qpd; 119 110 if (!dqm || !qpd) 120 111 return; 112 + /* 113 + * Total SDMA activity is current SDMA activity + past SDMA activity 114 + * Past SDMA count is stored in pdd. 115 + * To get the current activity counters for all active SDMA queues, 116 + * we loop over all SDMA queues and get their counts from user-space. 117 + * 118 + * We cannot call get_user() with dqm_lock held as it can cause 119 + * a circular lock dependency situation. To read the SDMA stats, 120 + * we need to do the following: 121 + * 122 + * 1. Create a temporary list of SDMA queue nodes from the qpd->queues_list, 123 + * with dqm_lock/dqm_unlock(). 124 + * 2. Call get_user() for each node in temporary list without dqm_lock. 125 + * Save the SDMA count for each node and also add the count to the total 126 + * SDMA count counter. 127 + * Its possible, during this step, a few SDMA queue nodes got deleted 128 + * from the qpd->queues_list. 129 + * 3. Do a second pass over qpd->queues_list to check if any nodes got deleted. 130 + * If any node got deleted, its SDMA count would be captured in the sdma 131 + * past activity counter. So subtract the SDMA counter stored in step 2 132 + * for this node from the total SDMA count. 133 + */ 134 + INIT_LIST_HEAD(&sdma_q_list.list); 121 135 122 - mm = get_task_mm(pdd->process->lead_thread); 123 - if (!mm) { 136 + /* 137 + * Create the temp list of all SDMA queues 138 + */ 139 + dqm_lock(dqm); 140 + 141 + list_for_each_entry(q, &qpd->queues_list, list) { 142 + if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) && 143 + (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI)) 144 + continue; 145 + 146 + sdma_q = kzalloc(sizeof(struct temp_sdma_queue_list), GFP_KERNEL); 147 + if (!sdma_q) { 148 + dqm_unlock(dqm); 149 + goto cleanup; 150 + } 151 + 152 + INIT_LIST_HEAD(&sdma_q->list); 153 + sdma_q->rptr = (uint64_t)q->properties.read_ptr; 154 + sdma_q->queue_id = q->properties.queue_id; 155 + list_add_tail(&sdma_q->list, &sdma_q_list.list); 156 + } 157 + 158 + /* 159 + * If the temp list is empty, then no SDMA queues nodes were found in 160 + * qpd->queues_list. Return the past activity count as the total sdma 161 + * count 162 + */ 163 + if (list_empty(&sdma_q_list.list)) { 164 + workarea->sdma_activity_counter = pdd->sdma_past_activity_counter; 165 + dqm_unlock(dqm); 124 166 return; 125 167 } 126 168 169 + dqm_unlock(dqm); 170 + 171 + /* 172 + * Get the usage count for each SDMA queue in temp_list. 173 + */ 174 + mm = get_task_mm(pdd->process->lead_thread); 175 + if (!mm) 176 + goto cleanup; 177 + 127 178 use_mm(mm); 128 179 180 + list_for_each_entry(sdma_q, &sdma_q_list.list, list) { 181 + val = 0; 182 + ret = read_sdma_queue_counter(sdma_q->rptr, &val); 183 + if (ret) { 184 + pr_debug("Failed to read SDMA queue active counter for queue id: %d", 185 + sdma_q->queue_id); 186 + } else { 187 + sdma_q->sdma_val = val; 188 + workarea->sdma_activity_counter += val; 189 + } 190 + } 191 + 192 + unuse_mm(mm); 193 + mmput(mm); 194 + 195 + /* 196 + * Do a second iteration over qpd_queues_list to check if any SDMA 197 + * nodes got deleted while fetching SDMA counter. 198 + */ 129 199 dqm_lock(dqm); 130 200 131 - /* 132 - * Total SDMA activity is current SDMA activity + past SDMA activity 133 - */ 134 - workarea->sdma_activity_counter = pdd->sdma_past_activity_counter; 201 + workarea->sdma_activity_counter += pdd->sdma_past_activity_counter; 135 202 136 - /* 137 - * Get the current activity counters for all active SDMA queues 138 - */ 139 203 list_for_each_entry(q, &qpd->queues_list, list) { 140 - if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 141 - (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 142 - val = 0; 143 - ret = read_sdma_queue_counter(q, &val); 144 - if (ret) 145 - pr_debug("Failed to read SDMA queue active " 146 - "counter for queue id: %d", 147 - q->properties.queue_id); 148 - else 149 - workarea->sdma_activity_counter += val; 204 + if (list_empty(&sdma_q_list.list)) 205 + break; 206 + 207 + if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) && 208 + (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI)) 209 + continue; 210 + 211 + list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) { 212 + if (((uint64_t)q->properties.read_ptr == sdma_q->rptr) && 213 + (sdma_q->queue_id == q->properties.queue_id)) { 214 + list_del(&sdma_q->list); 215 + kfree(sdma_q); 216 + break; 217 + } 150 218 } 151 219 } 152 220 153 221 dqm_unlock(dqm); 154 - unuse_mm(mm); 155 - mmput(mm); 222 + 223 + /* 224 + * If temp list is not empty, it implies some queues got deleted 225 + * from qpd->queues_list during SDMA usage read. Subtract the SDMA 226 + * count for each node from the total SDMA count. 227 + */ 228 + list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) { 229 + workarea->sdma_activity_counter -= sdma_q->sdma_val; 230 + list_del(&sdma_q->list); 231 + kfree(sdma_q); 232 + } 233 + 234 + return; 235 + 236 + cleanup: 237 + list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) { 238 + list_del(&sdma_q->list); 239 + kfree(sdma_q); 240 + } 156 241 } 157 242 158 243 static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,