Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: track unified memory reservation with xnack off

[WHY]
Unified memory with xnack off should be tracked, as userptr mappings
and legacy allocations do. To avoid oversuscribe system memory when
xnack off.
[How]
Exposing functions reserve_mem_limit and unreserve_mem_limit to SVM
API and call them on every prange creation and free.

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Alex Sierra and committed by
Alex Deucher
f9af3c16 06ac561f

+59 -26
+4
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
··· 305 305 void amdgpu_amdkfd_block_mmu_notifications(void *p); 306 306 int amdgpu_amdkfd_criu_resume(void *p); 307 307 bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); 308 + int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, 309 + uint64_t size, u32 alloc_flag); 310 + void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, 311 + uint64_t size, u32 alloc_flag); 308 312 309 313 #if IS_ENABLED(CONFIG_HSA_AMD) 310 314 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
+14 -9
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
··· 129 129 * 130 130 * Return: returns -ENOMEM in case of error, ZERO otherwise 131 131 */ 132 - static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, 132 + int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, 133 133 uint64_t size, u32 alloc_flag) 134 134 { 135 135 uint64_t reserved_for_pt = ··· 169 169 kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || 170 170 (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > 171 171 kfd_mem_limit.max_ttm_mem_limit) || 172 - (adev->kfd.vram_used + vram_needed > 172 + (adev && adev->kfd.vram_used + vram_needed > 173 173 adev->gmc.real_vram_size - 174 174 atomic64_read(&adev->vram_pin_size) - 175 175 reserved_for_pt)) { ··· 180 180 /* Update memory accounting by decreasing available system 181 181 * memory, TTM memory and GPU memory as computed above 182 182 */ 183 - adev->kfd.vram_used += vram_needed; 183 + WARN_ONCE(vram_needed && !adev, 184 + "adev reference can't be null when vram is used"); 185 + if (adev) 186 + adev->kfd.vram_used += vram_needed; 184 187 kfd_mem_limit.system_mem_used += system_mem_needed; 185 188 kfd_mem_limit.ttm_mem_used += ttm_mem_needed; 186 189 ··· 192 189 return ret; 193 190 } 194 191 195 - static void unreserve_mem_limit(struct amdgpu_device *adev, 192 + void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, 196 193 uint64_t size, u32 alloc_flag) 197 194 { 198 195 spin_lock(&kfd_mem_limit.mem_limit_lock); ··· 201 198 kfd_mem_limit.system_mem_used -= size; 202 199 kfd_mem_limit.ttm_mem_used -= size; 203 200 } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { 204 - adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); 201 + WARN_ONCE(!adev, 202 + "adev reference can't be null when alloc mem flags vram is set"); 203 + if (adev) 204 + adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); 205 205 } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { 206 206 kfd_mem_limit.system_mem_used -= size; 207 207 } else if (!(alloc_flag & ··· 213 207 pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); 214 208 goto release; 215 209 } 216 - 217 - WARN_ONCE(adev->kfd.vram_used < 0, 210 + WARN_ONCE(adev && adev->kfd.vram_used < 0, 218 211 "KFD VRAM memory accounting unbalanced"); 219 212 WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, 220 213 "KFD TTM memory accounting unbalanced"); ··· 230 225 u32 alloc_flags = bo->kfd_bo->alloc_flags; 231 226 u64 size = amdgpu_bo_size(bo); 232 227 233 - unreserve_mem_limit(adev, size, alloc_flags); 228 + amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags); 234 229 235 230 kfree(bo->kfd_bo); 236 231 } ··· 1754 1749 /* Don't unreserve system mem limit twice */ 1755 1750 goto err_reserve_limit; 1756 1751 err_bo_create: 1757 - unreserve_mem_limit(adev, size, flags); 1752 + amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags); 1758 1753 err_reserve_limit: 1759 1754 mutex_destroy(&(*mem)->lock); 1760 1755 if (gobj)
+41 -17
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
··· 266 266 } 267 267 } 268 268 269 - static void svm_range_free(struct svm_range *prange) 269 + static void svm_range_free(struct svm_range *prange, bool update_mem_usage) 270 270 { 271 + uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT; 272 + struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); 273 + 271 274 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, 272 275 prange->start, prange->last); 273 276 274 277 svm_range_vram_node_free(prange); 275 278 svm_range_free_dma_mappings(prange); 279 + 280 + if (update_mem_usage && !p->xnack_enabled) { 281 + pr_debug("unreserve mem limit: %lld\n", size); 282 + amdgpu_amdkfd_unreserve_mem_limit(NULL, size, 283 + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR); 284 + } 276 285 mutex_destroy(&prange->lock); 277 286 mutex_destroy(&prange->migrate_mutex); 278 287 kfree(prange); ··· 300 291 301 292 static struct 302 293 svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, 303 - uint64_t last) 294 + uint64_t last, bool update_mem_usage) 304 295 { 305 296 uint64_t size = last - start + 1; 306 297 struct svm_range *prange; ··· 309 300 prange = kzalloc(sizeof(*prange), GFP_KERNEL); 310 301 if (!prange) 311 302 return NULL; 303 + 304 + p = container_of(svms, struct kfd_process, svms); 305 + if (!p->xnack_enabled && update_mem_usage && 306 + amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT, 307 + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) { 308 + pr_info("SVM mapping failed, exceeds resident system memory limit\n"); 309 + kfree(prange); 310 + return NULL; 311 + } 312 312 prange->npages = size; 313 313 prange->svms = svms; 314 314 prange->start = start; ··· 332 314 mutex_init(&prange->migrate_mutex); 333 315 mutex_init(&prange->lock); 334 316 335 - p = container_of(svms, struct kfd_process, svms); 336 317 if (p->xnack_enabled) 337 318 bitmap_copy(prange->bitmap_access, svms->bitmap_supported, 338 319 MAX_GPU_INSTANCE); ··· 1024 1007 1025 1008 svms = prange->svms; 1026 1009 if (old_start == start) 1027 - *new = svm_range_new(svms, last + 1, old_last); 1010 + *new = svm_range_new(svms, last + 1, old_last, false); 1028 1011 else 1029 - *new = svm_range_new(svms, old_start, start - 1); 1012 + *new = svm_range_new(svms, old_start, start - 1, false); 1030 1013 if (!*new) 1031 1014 return -ENOMEM; 1032 1015 ··· 1034 1017 if (r) { 1035 1018 pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n", 1036 1019 r, old_start, old_last, start, last); 1037 - svm_range_free(*new); 1020 + svm_range_free(*new, false); 1038 1021 *new = NULL; 1039 1022 } 1040 1023 ··· 1869 1852 { 1870 1853 struct svm_range *new; 1871 1854 1872 - new = svm_range_new(old->svms, old->start, old->last); 1855 + new = svm_range_new(old->svms, old->start, old->last, false); 1873 1856 if (!new) 1874 1857 return NULL; 1875 1858 ··· 1922 1905 while (last >= start) { 1923 1906 l = min(last, ALIGN_DOWN(start + max_pages, max_pages) - 1); 1924 1907 1925 - prange = svm_range_new(svms, start, l); 1908 + prange = svm_range_new(svms, start, l, true); 1926 1909 if (!prange) 1927 1910 return -ENOMEM; 1928 1911 list_add(&prange->list, insert_list); ··· 1973 1956 struct interval_tree_node *node; 1974 1957 struct svm_range *prange; 1975 1958 struct svm_range *tmp; 1959 + struct list_head new_list; 1976 1960 int r = 0; 1977 1961 1978 1962 pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last); ··· 1981 1963 INIT_LIST_HEAD(update_list); 1982 1964 INIT_LIST_HEAD(insert_list); 1983 1965 INIT_LIST_HEAD(remove_list); 1966 + INIT_LIST_HEAD(&new_list); 1984 1967 1985 1968 node = interval_tree_iter_first(&svms->objects, start, last); 1986 1969 while (node) { ··· 2039 2020 if (node->start > start) { 2040 2021 r = svm_range_split_new(svms, start, node->start - 1, 2041 2022 READ_ONCE(max_svm_range_pages), 2042 - insert_list, update_list); 2023 + &new_list, update_list); 2043 2024 if (r) 2044 2025 goto out; 2045 2026 } ··· 2052 2033 if (start <= last) 2053 2034 r = svm_range_split_new(svms, start, last, 2054 2035 READ_ONCE(max_svm_range_pages), 2055 - insert_list, update_list); 2036 + &new_list, update_list); 2056 2037 2057 2038 out: 2058 - if (r) 2039 + if (r) { 2059 2040 list_for_each_entry_safe(prange, tmp, insert_list, list) 2060 - svm_range_free(prange); 2041 + svm_range_free(prange, false); 2042 + list_for_each_entry_safe(prange, tmp, &new_list, list) 2043 + svm_range_free(prange, true); 2044 + } else { 2045 + list_splice(&new_list, insert_list); 2046 + } 2061 2047 2062 2048 return r; 2063 2049 } ··· 2109 2085 svms, prange, prange->start, prange->last); 2110 2086 svm_range_unlink(prange); 2111 2087 svm_range_remove_notifier(prange); 2112 - svm_range_free(prange); 2088 + svm_range_free(prange, true); 2113 2089 break; 2114 2090 case SVM_OP_UPDATE_RANGE_NOTIFIER: 2115 2091 pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n", ··· 2672 2648 last = addr; 2673 2649 } 2674 2650 2675 - prange = svm_range_new(&p->svms, start, last); 2651 + prange = svm_range_new(&p->svms, start, last, true); 2676 2652 if (!prange) { 2677 2653 pr_debug("Failed to create prange in address [0x%llx]\n", addr); 2678 2654 return NULL; 2679 2655 } 2680 2656 if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) { 2681 2657 pr_debug("failed to get gpuid from kgd\n"); 2682 - svm_range_free(prange); 2658 + svm_range_free(prange, true); 2683 2659 return NULL; 2684 2660 } 2685 2661 ··· 2979 2955 list_for_each_entry_safe(prange, next, &p->svms.list, list) { 2980 2956 svm_range_unlink(prange); 2981 2957 svm_range_remove_notifier(prange); 2982 - svm_range_free(prange); 2958 + svm_range_free(prange, true); 2983 2959 } 2984 2960 2985 2961 mutex_destroy(&p->svms.lock); ··· 3395 3371 prange->last); 3396 3372 svm_range_unlink(prange); 3397 3373 svm_range_remove_notifier(prange); 3398 - svm_range_free(prange); 3374 + svm_range_free(prange, false); 3399 3375 } 3400 3376 3401 3377 mmap_write_downgrade(mm);