Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: map SVM range with correct access permission

Restore retry fault or prefetch range, or restore svm range after
eviction to map range to GPU with correct read or write access
permission.

Range may includes multiple VMAs, update GPU page table with offset of
prange, number of pages for each VMA according VMA access permission.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Philip Yang and committed by
Alex Deucher
2f617f4d ff891a2e

+86 -48
+86 -48
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
··· 120 120 121 121 static int 122 122 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, 123 + unsigned long offset, unsigned long npages, 123 124 unsigned long *hmm_pfns, uint32_t gpuidx) 124 125 { 125 126 enum dma_data_direction dir = DMA_BIDIRECTIONAL; ··· 137 136 prange->dma_addr[gpuidx] = addr; 138 137 } 139 138 140 - for (i = 0; i < prange->npages; i++) { 139 + addr += offset; 140 + for (i = 0; i < npages; i++) { 141 141 if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]), 142 142 "leaking dma mapping\n")) 143 143 dma_unmap_page(dev, addr[i], PAGE_SIZE, dir); ··· 169 167 170 168 static int 171 169 svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, 170 + unsigned long offset, unsigned long npages, 172 171 unsigned long *hmm_pfns) 173 172 { 174 173 struct kfd_process *p; ··· 190 187 } 191 188 adev = (struct amdgpu_device *)pdd->dev->kgd; 192 189 193 - r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx); 190 + r = svm_range_dma_map_dev(adev, prange, offset, npages, 191 + hmm_pfns, gpuidx); 194 192 if (r) 195 193 break; 196 194 } ··· 1092 1088 pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; 1093 1089 1094 1090 pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags); 1095 - 1096 - pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n", 1097 - prange->svms, prange->start, prange->last, 1098 - (domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags); 1099 - 1100 1091 return pte_flags; 1101 1092 } 1102 1093 ··· 1155 1156 1156 1157 static int 1157 1158 svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, 1158 - struct svm_range *prange, dma_addr_t *dma_addr, 1159 + struct svm_range *prange, unsigned long offset, 1160 + unsigned long npages, bool readonly, dma_addr_t *dma_addr, 1159 1161 struct amdgpu_device *bo_adev, struct dma_fence **fence) 1160 1162 { 1161 1163 struct amdgpu_bo_va bo_va; ··· 1167 1167 int r = 0; 1168 1168 int64_t i; 1169 1169 1170 - pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, 1171 - prange->last); 1170 + last_start = prange->start + offset; 1171 + 1172 + pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms, 1173 + last_start, last_start + npages - 1, readonly); 1172 1174 1173 1175 if (prange->svm_bo && prange->ttm_res) 1174 1176 bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev); 1175 1177 1176 - last_start = prange->start; 1177 - for (i = 0; i < prange->npages; i++) { 1178 + for (i = offset; i < offset + npages; i++) { 1178 1179 last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN; 1179 1180 dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN; 1180 1181 if ((prange->start + i) < prange->last && ··· 1184 1183 1185 1184 pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n", 1186 1185 last_start, prange->start + i, last_domain ? "GPU" : "CPU"); 1186 + 1187 1187 pte_flags = svm_range_get_pte_flags(adev, prange, last_domain); 1188 - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL, 1189 - last_start, 1188 + if (readonly) 1189 + pte_flags &= ~AMDGPU_PTE_WRITEABLE; 1190 + 1191 + pr_debug("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n", 1192 + prange->svms, last_start, prange->start + i, 1193 + (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, 1194 + pte_flags); 1195 + 1196 + r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, 1197 + NULL, last_start, 1190 1198 prange->start + i, pte_flags, 1191 1199 last_start - prange->start, 1192 - NULL, 1193 - dma_addr, 1200 + NULL, dma_addr, 1194 1201 &vm->last_update, 1195 1202 &table_freed); 1196 1203 if (r) { ··· 1229 1220 return r; 1230 1221 } 1231 1222 1232 - static int svm_range_map_to_gpus(struct svm_range *prange, 1233 - unsigned long *bitmap, bool wait) 1223 + static int 1224 + svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, 1225 + unsigned long npages, bool readonly, 1226 + unsigned long *bitmap, bool wait) 1234 1227 { 1235 1228 struct kfd_process_device *pdd; 1236 1229 struct amdgpu_device *bo_adev; ··· 1268 1257 } 1269 1258 1270 1259 r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv), 1271 - prange, prange->dma_addr[gpuidx], 1260 + prange, offset, npages, readonly, 1261 + prange->dma_addr[gpuidx], 1272 1262 bo_adev, wait ? &fence : NULL); 1273 1263 if (r) 1274 1264 break; ··· 1402 1390 int32_t gpuidx, bool intr, bool wait) 1403 1391 { 1404 1392 struct svm_validate_context ctx; 1405 - struct hmm_range *hmm_range; 1393 + unsigned long start, end, addr; 1406 1394 struct kfd_process *p; 1407 1395 void *owner; 1408 1396 int32_t idx; ··· 1460 1448 break; 1461 1449 } 1462 1450 } 1463 - r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, 1464 - prange->start << PAGE_SHIFT, 1465 - prange->npages, &hmm_range, 1466 - false, true, owner); 1467 - if (r) { 1468 - pr_debug("failed %d to get svm range pages\n", r); 1469 - goto unreserve_out; 1470 - } 1471 1451 1472 - r = svm_range_dma_map(prange, ctx.bitmap, 1473 - hmm_range->hmm_pfns); 1474 - if (r) { 1475 - pr_debug("failed %d to dma map range\n", r); 1476 - goto unreserve_out; 1477 - } 1452 + start = prange->start << PAGE_SHIFT; 1453 + end = (prange->last + 1) << PAGE_SHIFT; 1454 + for (addr = start; addr < end && !r; ) { 1455 + struct hmm_range *hmm_range; 1456 + struct vm_area_struct *vma; 1457 + unsigned long next; 1458 + unsigned long offset; 1459 + unsigned long npages; 1460 + bool readonly; 1478 1461 1479 - prange->validated_once = true; 1462 + vma = find_vma(mm, addr); 1463 + if (!vma || addr < vma->vm_start) { 1464 + r = -EFAULT; 1465 + goto unreserve_out; 1466 + } 1467 + readonly = !(vma->vm_flags & VM_WRITE); 1480 1468 1481 - svm_range_lock(prange); 1482 - if (amdgpu_hmm_range_get_pages_done(hmm_range)) { 1483 - pr_debug("hmm update the range, need validate again\n"); 1484 - r = -EAGAIN; 1485 - goto unlock_out; 1486 - } 1487 - if (!list_empty(&prange->child_list)) { 1488 - pr_debug("range split by unmap in parallel, validate again\n"); 1489 - r = -EAGAIN; 1490 - goto unlock_out; 1491 - } 1469 + next = min(vma->vm_end, end); 1470 + npages = (next - addr) >> PAGE_SHIFT; 1471 + r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, 1472 + addr, npages, &hmm_range, 1473 + readonly, true, owner); 1474 + if (r) { 1475 + pr_debug("failed %d to get svm range pages\n", r); 1476 + goto unreserve_out; 1477 + } 1492 1478 1493 - r = svm_range_map_to_gpus(prange, ctx.bitmap, wait); 1479 + offset = (addr - start) >> PAGE_SHIFT; 1480 + r = svm_range_dma_map(prange, ctx.bitmap, offset, npages, 1481 + hmm_range->hmm_pfns); 1482 + if (r) { 1483 + pr_debug("failed %d to dma map range\n", r); 1484 + goto unreserve_out; 1485 + } 1486 + 1487 + svm_range_lock(prange); 1488 + if (amdgpu_hmm_range_get_pages_done(hmm_range)) { 1489 + pr_debug("hmm update the range, need validate again\n"); 1490 + r = -EAGAIN; 1491 + goto unlock_out; 1492 + } 1493 + if (!list_empty(&prange->child_list)) { 1494 + pr_debug("range split by unmap in parallel, validate again\n"); 1495 + r = -EAGAIN; 1496 + goto unlock_out; 1497 + } 1498 + 1499 + r = svm_range_map_to_gpus(prange, offset, npages, readonly, 1500 + ctx.bitmap, wait); 1494 1501 1495 1502 unlock_out: 1496 - svm_range_unlock(prange); 1503 + svm_range_unlock(prange); 1504 + 1505 + addr = next; 1506 + } 1507 + 1508 + if (addr == end) 1509 + prange->validated_once = true; 1510 + 1497 1511 unreserve_out: 1498 1512 svm_range_unreserve_bos(&ctx); 1499 1513