Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Update mapping if range attributes changed

Change SVM range mapping flags or access attributes don't trigger
migration, if range is already mapped on GPUs we should update GPU
mapping and pass flush_tlb flag true to amdgpu vm.

Change SVM range preferred_loc or migration granularity don't need
update GPU mapping, skip the validate_and_map.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Philip Yang and committed by
Alex Deucher
601354f3 6b9c63a6

+32 -14
+32 -14
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
··· 685 685 686 686 static void 687 687 svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange, 688 - uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) 688 + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, 689 + bool *update_mapping) 689 690 { 690 691 uint32_t i; 691 692 int gpuidx; ··· 702 701 case KFD_IOCTL_SVM_ATTR_ACCESS: 703 702 case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: 704 703 case KFD_IOCTL_SVM_ATTR_NO_ACCESS: 704 + *update_mapping = true; 705 705 gpuidx = kfd_process_gpuidx_from_gpuid(p, 706 706 attrs[i].value); 707 707 if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) { ··· 717 715 } 718 716 break; 719 717 case KFD_IOCTL_SVM_ATTR_SET_FLAGS: 718 + *update_mapping = true; 720 719 prange->flags |= attrs[i].value; 721 720 break; 722 721 case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: 722 + *update_mapping = true; 723 723 prange->flags &= ~attrs[i].value; 724 724 break; 725 725 case KFD_IOCTL_SVM_ATTR_GRANULARITY: ··· 1254 1250 svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, 1255 1251 unsigned long offset, unsigned long npages, bool readonly, 1256 1252 dma_addr_t *dma_addr, struct amdgpu_device *bo_adev, 1257 - struct dma_fence **fence) 1253 + struct dma_fence **fence, bool flush_tlb) 1258 1254 { 1259 1255 struct amdgpu_device *adev = pdd->dev->adev; 1260 1256 struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); ··· 1292 1288 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, 1293 1289 pte_flags); 1294 1290 1295 - r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL, 1291 + r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL, 1296 1292 last_start, prange->start + i, 1297 1293 pte_flags, 1298 1294 last_start - prange->start, ··· 1326 1322 static int 1327 1323 svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, 1328 1324 unsigned long npages, bool readonly, 1329 - unsigned long *bitmap, bool wait) 1325 + unsigned long *bitmap, bool wait, bool flush_tlb) 1330 1326 { 1331 1327 struct kfd_process_device *pdd; 1332 1328 struct amdgpu_device *bo_adev; ··· 1361 1357 1362 1358 r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly, 1363 1359 prange->dma_addr[gpuidx], 1364 - bo_adev, wait ? &fence : NULL); 1360 + bo_adev, wait ? &fence : NULL, 1361 + flush_tlb); 1365 1362 if (r) 1366 1363 break; 1367 1364 ··· 1483 1478 * 5. Release page table (and SVM BO) reservation 1484 1479 */ 1485 1480 static int svm_range_validate_and_map(struct mm_struct *mm, 1486 - struct svm_range *prange, 1487 - int32_t gpuidx, bool intr, bool wait) 1481 + struct svm_range *prange, int32_t gpuidx, 1482 + bool intr, bool wait, bool flush_tlb) 1488 1483 { 1489 1484 struct svm_validate_context ctx; 1490 1485 unsigned long start, end, addr; ··· 1523 1518 prange->bitmap_aip, MAX_GPU_INSTANCE); 1524 1519 } 1525 1520 1526 - if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) 1527 - return 0; 1521 + if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) { 1522 + if (!prange->mapped_to_gpu) 1523 + return 0; 1524 + 1525 + bitmap_copy(ctx.bitmap, prange->bitmap_access, MAX_GPU_INSTANCE); 1526 + } 1528 1527 1529 1528 if (prange->actual_loc && !prange->ttm_res) { 1530 1529 /* This should never happen. actual_loc gets set by ··· 1600 1591 } 1601 1592 1602 1593 r = svm_range_map_to_gpus(prange, offset, npages, readonly, 1603 - ctx.bitmap, wait); 1594 + ctx.bitmap, wait, flush_tlb); 1604 1595 1605 1596 unlock_out: 1606 1597 svm_range_unlock(prange); ··· 1696 1687 mutex_lock(&prange->migrate_mutex); 1697 1688 1698 1689 r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, 1699 - false, true); 1690 + false, true, false); 1700 1691 if (r) 1701 1692 pr_debug("failed %d to map 0x%lx to gpus\n", r, 1702 1693 prange->start); ··· 2834 2825 } 2835 2826 } 2836 2827 2837 - r = svm_range_validate_and_map(mm, prange, gpuidx, false, false); 2828 + r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false); 2838 2829 if (r) 2839 2830 pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", 2840 2831 r, svms, prange->start, prange->last); ··· 3247 3238 struct svm_range_list *svms; 3248 3239 struct svm_range *prange; 3249 3240 struct svm_range *next; 3241 + bool update_mapping = false; 3242 + bool flush_tlb; 3250 3243 int r = 0; 3251 3244 3252 3245 pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n", ··· 3287 3276 svm_range_add_notifier_locked(mm, prange); 3288 3277 } 3289 3278 list_for_each_entry(prange, &update_list, update_list) { 3290 - svm_range_apply_attrs(p, prange, nattr, attrs); 3279 + svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping); 3291 3280 /* TODO: unmap ranges from GPU that lost access */ 3292 3281 } 3293 3282 list_for_each_entry_safe(prange, next, &remove_list, update_list) { ··· 3320 3309 continue; 3321 3310 } 3322 3311 3312 + if (!migrated && !update_mapping) { 3313 + mutex_unlock(&prange->migrate_mutex); 3314 + continue; 3315 + } 3316 + 3317 + flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu; 3318 + 3323 3319 r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, 3324 - true, true); 3320 + true, true, flush_tlb); 3325 3321 if (r) 3326 3322 pr_debug("failed %d to map svm range\n", r); 3327 3323