Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Accounting pdd vram_usage for svm

Process device data pdd->vram_usage is read by rocm-smi via sysfs, this
is currently missing the svm_bo usage accounting, so "rocm-smi
--showpids" per process VRAM usage report is incorrect.

Add pdd->vram_usage accounting when svm_bo allocation and release,
change to atomic64_t type because it is updated outside process mutex
now.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Philip Yang and committed by
Alex Deucher
98c0b0ef bfaf1883

+32 -6
+3 -3
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
··· 1148 1148 1149 1149 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) 1150 1150 size >>= 1; 1151 - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size)); 1151 + atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage); 1152 1152 } 1153 1153 1154 1154 mutex_unlock(&p->mutex); ··· 1219 1219 kfd_process_device_remove_obj_handle( 1220 1220 pdd, GET_IDR_HANDLE(args->handle)); 1221 1221 1222 - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size); 1222 + atomic64_sub(size, &pdd->vram_usage); 1223 1223 1224 1224 err_unlock: 1225 1225 err_pdd: ··· 2347 2347 } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { 2348 2348 bo_bucket->restored_offset = offset; 2349 2349 /* Update the VRAM usage count */ 2350 - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size); 2350 + atomic64_add(bo_bucket->size, &pdd->vram_usage); 2351 2351 } 2352 2352 return 0; 2353 2353 }
+1 -1
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 775 775 enum kfd_pdd_bound bound; 776 776 777 777 /* VRAM usage */ 778 - uint64_t vram_usage; 778 + atomic64_t vram_usage; 779 779 struct attribute attr_vram; 780 780 char vram_filename[MAX_SYSFS_FILENAME_LEN]; 781 781
+2 -2
drivers/gpu/drm/amd/amdkfd/kfd_process.c
··· 332 332 } else if (strncmp(attr->name, "vram_", 5) == 0) { 333 333 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, 334 334 attr_vram); 335 - return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); 335 + return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage)); 336 336 } else if (strncmp(attr->name, "sdma_", 5) == 0) { 337 337 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, 338 338 attr_sdma); ··· 1625 1625 pdd->bound = PDD_UNBOUND; 1626 1626 pdd->already_dequeued = false; 1627 1627 pdd->runtime_inuse = false; 1628 - pdd->vram_usage = 0; 1628 + atomic64_set(&pdd->vram_usage, 0); 1629 1629 pdd->sdma_past_activity_counter = 0; 1630 1630 pdd->user_gpu_id = dev->id; 1631 1631 atomic64_set(&pdd->evict_duration_counter, 0);
+26
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
··· 405 405 spin_lock(&svm_bo->list_lock); 406 406 } 407 407 spin_unlock(&svm_bo->list_lock); 408 + 409 + if (mmget_not_zero(svm_bo->eviction_fence->mm)) { 410 + struct kfd_process_device *pdd; 411 + struct kfd_process *p; 412 + struct mm_struct *mm; 413 + 414 + mm = svm_bo->eviction_fence->mm; 415 + /* 416 + * The forked child process takes svm_bo device pages ref, svm_bo could be 417 + * released after parent process is gone. 418 + */ 419 + p = kfd_lookup_process_by_mm(mm); 420 + if (p) { 421 + pdd = kfd_get_process_device_data(svm_bo->node, p); 422 + if (pdd) 423 + atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage); 424 + kfd_unref_process(p); 425 + } 426 + mmput(mm); 427 + } 428 + 408 429 if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) 409 430 /* We're not in the eviction worker. Signal the fence. */ 410 431 dma_fence_signal(&svm_bo->eviction_fence->base); ··· 553 532 svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, 554 533 bool clear) 555 534 { 535 + struct kfd_process_device *pdd; 556 536 struct amdgpu_bo_param bp; 557 537 struct svm_range_bo *svm_bo; 558 538 struct amdgpu_bo_user *ubo; ··· 644 622 spin_lock(&svm_bo->list_lock); 645 623 list_add(&prange->svm_bo_list, &svm_bo->range_list); 646 624 spin_unlock(&svm_bo->list_lock); 625 + 626 + pdd = svm_range_get_pdd_by_node(prange, node); 627 + if (pdd) 628 + atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage); 647 629 648 630 return 0; 649 631