···491491 goto out;492492 }493493494494- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);495495- if (ret) {496496- pr_err("%s: Failed to get user pages: %d\n", __func__, ret);494494+ /* If no restore worker is running concurrently, user_pages495495+ * should not be allocated496496+ */497497+ WARN(mem->user_pages, "Leaking user_pages array");498498+499499+ mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,500500+ sizeof(struct page *),501501+ GFP_KERNEL | __GFP_ZERO);502502+ if (!mem->user_pages) {503503+ pr_err("%s: Failed to allocate pages array\n", __func__);504504+ ret = -ENOMEM;497505 goto unregister_out;498506 }507507+508508+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);509509+ if (ret) {510510+ pr_err("%s: Failed to get user pages: %d\n", __func__, ret);511511+ goto free_out;512512+ }513513+514514+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);499515500516 ret = amdgpu_bo_reserve(bo, true);501517 if (ret) {···525509 amdgpu_bo_unreserve(bo);526510527511release_out:528528- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);512512+ if (ret)513513+ release_pages(mem->user_pages, bo->tbo.ttm->num_pages);514514+free_out:515515+ kvfree(mem->user_pages);516516+ mem->user_pages = NULL;529517unregister_out:530518 if (ret)531519 amdgpu_mn_unregister(bo);···588568 ctx->kfd_bo.priority = 0;589569 ctx->kfd_bo.tv.bo = &bo->tbo;590570 ctx->kfd_bo.tv.num_shared = 1;571571+ ctx->kfd_bo.user_pages = NULL;591572 list_add(&ctx->kfd_bo.tv.head, &ctx->list);592573593574 amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);···652631 ctx->kfd_bo.priority = 0;653632 ctx->kfd_bo.tv.bo = &bo->tbo;654633 ctx->kfd_bo.tv.num_shared = 1;634634+ ctx->kfd_bo.user_pages = NULL;655635 list_add(&ctx->kfd_bo.tv.head, &ctx->list);656636657637 i = 0;···12621240 list_del(&bo_list_entry->head);12631241 mutex_unlock(&process_info->lock);1264124212431243+ /* Free user pages if necessary */12441244+ if (mem->user_pages) {12451245+ pr_debug("%s: Freeing user_pages array\n", __func__);12461246+ if (mem->user_pages[0])12471247+ release_pages(mem->user_pages,12481248+ mem->bo->tbo.ttm->num_pages);12491249+ kvfree(mem->user_pages);12501250+ }12511251+12651252 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);12661253 if (unlikely(ret))12671254 return ret;···1744171317451714 bo = mem->bo;1746171517161716+ if (!mem->user_pages) {17171717+ mem->user_pages =17181718+ kvmalloc_array(bo->tbo.ttm->num_pages,17191719+ sizeof(struct page *),17201720+ GFP_KERNEL | __GFP_ZERO);17211721+ if (!mem->user_pages) {17221722+ pr_err("%s: Failed to allocate pages array\n",17231723+ __func__);17241724+ return -ENOMEM;17251725+ }17261726+ } else if (mem->user_pages[0]) {17271727+ release_pages(mem->user_pages, bo->tbo.ttm->num_pages);17281728+ }17291729+17471730 /* Get updated user pages */17481731 ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,17491749- bo->tbo.ttm->pages);17321732+ mem->user_pages);17501733 if (ret) {17511751- bo->tbo.ttm->pages[0] = NULL;17341734+ mem->user_pages[0] = NULL;17521735 pr_info("%s: Failed to get user pages: %d\n",17531736 __func__, ret);17541737 /* Pretend it succeeded. It will fail later···17711726 * stalled user mode queues.17721727 */17731728 }17291729+17301730+ /* Mark the BO as valid unless it was invalidated17311731+ * again concurrently17321732+ */17331733+ if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)17341734+ return -EAGAIN;17741735 }1775173617761737 return 0;···18061755 GFP_KERNEL);18071756 if (!pd_bo_list_entries) {18081757 pr_err("%s: Failed to allocate PD BO list entries\n", __func__);18091809- ret = -ENOMEM;18101810- goto out_no_mem;17581758+ return -ENOMEM;18111759 }1812176018131761 INIT_LIST_HEAD(&resv_list);···18301780 ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);18311781 WARN(!list_empty(&duplicates), "Duplicates should be empty");18321782 if (ret)18331833- goto out_free;17831783+ goto out;1834178418351785 amdgpu_sync_create(&sync);18361786···1846179618471797 bo = mem->bo;1848179818491849- /* Validate the BO if we got user pages */18501850- if (bo->tbo.ttm->pages[0]) {17991799+ /* Copy pages array and validate the BO if we got user pages */18001800+ if (mem->user_pages[0]) {18011801+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,18021802+ mem->user_pages);18511803 amdgpu_bo_placement_from_domain(bo, mem->domain);18521804 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);18531805 if (ret) {···18581806 }18591807 }1860180818091809+ /* Validate succeeded, now the BO owns the pages, free18101810+ * our copy of the pointer array. Put this BO back on18111811+ * the userptr_valid_list. If we need to revalidate18121812+ * it, we need to start from scratch.18131813+ */18141814+ kvfree(mem->user_pages);18151815+ mem->user_pages = NULL;18611816 list_move_tail(&mem->validate_list.head,18621817 &process_info->userptr_valid_list);18631863-18641864- /* Stop HMM track the userptr update. We dont check the return18651865- * value for concurrent CPU page table update because we will18661866- * reschedule the restore worker if process_info->evicted_bos18671867- * is updated.18681868- */18691869- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);1870181818711819 /* Update mapping. If the BO was not validated18721820 * (because we couldn't get user pages), this will···18971845 ttm_eu_backoff_reservation(&ticket, &resv_list);18981846 amdgpu_sync_wait(&sync, false);18991847 amdgpu_sync_free(&sync);19001900-out_free:18481848+out:19011849 kfree(pd_bo_list_entries);19021902-out_no_mem:19031903- list_for_each_entry_safe(mem, tmp_mem,19041904- &process_info->userptr_inval_list,19051905- validate_list.head) {19061906- bo = mem->bo;19071907- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);19081908- }1909185019101851 return ret;19111852}