Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Revert "drm/amdgpu: replace get_user_pages with HMM mirror helpers"

This reverts commit 915d3eecfa23693bac9e54cdacf84fb4efdcc5c4.

This depends on an HMM fix which is not upstream yet.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

+281 -185
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
··· 61 61 62 62 atomic_t invalid; 63 63 struct amdkfd_process_info *process_info; 64 + struct page **user_pages; 64 65 65 66 struct amdgpu_sync sync; 66 67
+71 -26
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
··· 491 491 goto out; 492 492 } 493 493 494 - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); 495 - if (ret) { 496 - pr_err("%s: Failed to get user pages: %d\n", __func__, ret); 494 + /* If no restore worker is running concurrently, user_pages 495 + * should not be allocated 496 + */ 497 + WARN(mem->user_pages, "Leaking user_pages array"); 498 + 499 + mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, 500 + sizeof(struct page *), 501 + GFP_KERNEL | __GFP_ZERO); 502 + if (!mem->user_pages) { 503 + pr_err("%s: Failed to allocate pages array\n", __func__); 504 + ret = -ENOMEM; 497 505 goto unregister_out; 498 506 } 507 + 508 + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); 509 + if (ret) { 510 + pr_err("%s: Failed to get user pages: %d\n", __func__, ret); 511 + goto free_out; 512 + } 513 + 514 + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); 499 515 500 516 ret = amdgpu_bo_reserve(bo, true); 501 517 if (ret) { ··· 525 509 amdgpu_bo_unreserve(bo); 526 510 527 511 release_out: 528 - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 512 + if (ret) 513 + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); 514 + free_out: 515 + kvfree(mem->user_pages); 516 + mem->user_pages = NULL; 529 517 unregister_out: 530 518 if (ret) 531 519 amdgpu_mn_unregister(bo); ··· 588 568 ctx->kfd_bo.priority = 0; 589 569 ctx->kfd_bo.tv.bo = &bo->tbo; 590 570 ctx->kfd_bo.tv.num_shared = 1; 571 + ctx->kfd_bo.user_pages = NULL; 591 572 list_add(&ctx->kfd_bo.tv.head, &ctx->list); 592 573 593 574 amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); ··· 652 631 ctx->kfd_bo.priority = 0; 653 632 ctx->kfd_bo.tv.bo = &bo->tbo; 654 633 ctx->kfd_bo.tv.num_shared = 1; 634 + ctx->kfd_bo.user_pages = NULL; 655 635 list_add(&ctx->kfd_bo.tv.head, &ctx->list); 656 636 657 637 i = 0; ··· 1262 1240 list_del(&bo_list_entry->head); 1263 1241 mutex_unlock(&process_info->lock); 1264 1242 1243 + /* Free user pages if necessary */ 1244 + if (mem->user_pages) { 1245 + pr_debug("%s: Freeing user_pages array\n", __func__); 1246 + if (mem->user_pages[0]) 1247 + release_pages(mem->user_pages, 1248 + mem->bo->tbo.ttm->num_pages); 1249 + kvfree(mem->user_pages); 1250 + } 1251 + 1265 1252 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); 1266 1253 if (unlikely(ret)) 1267 1254 return ret; ··· 1744 1713 1745 1714 bo = mem->bo; 1746 1715 1716 + if (!mem->user_pages) { 1717 + mem->user_pages = 1718 + kvmalloc_array(bo->tbo.ttm->num_pages, 1719 + sizeof(struct page *), 1720 + GFP_KERNEL | __GFP_ZERO); 1721 + if (!mem->user_pages) { 1722 + pr_err("%s: Failed to allocate pages array\n", 1723 + __func__); 1724 + return -ENOMEM; 1725 + } 1726 + } else if (mem->user_pages[0]) { 1727 + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); 1728 + } 1729 + 1747 1730 /* Get updated user pages */ 1748 1731 ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, 1749 - bo->tbo.ttm->pages); 1732 + mem->user_pages); 1750 1733 if (ret) { 1751 - bo->tbo.ttm->pages[0] = NULL; 1734 + mem->user_pages[0] = NULL; 1752 1735 pr_info("%s: Failed to get user pages: %d\n", 1753 1736 __func__, ret); 1754 1737 /* Pretend it succeeded. It will fail later ··· 1771 1726 * stalled user mode queues. 1772 1727 */ 1773 1728 } 1729 + 1730 + /* Mark the BO as valid unless it was invalidated 1731 + * again concurrently 1732 + */ 1733 + if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) 1734 + return -EAGAIN; 1774 1735 } 1775 1736 1776 1737 return 0; ··· 1806 1755 GFP_KERNEL); 1807 1756 if (!pd_bo_list_entries) { 1808 1757 pr_err("%s: Failed to allocate PD BO list entries\n", __func__); 1809 - ret = -ENOMEM; 1810 - goto out_no_mem; 1758 + return -ENOMEM; 1811 1759 } 1812 1760 1813 1761 INIT_LIST_HEAD(&resv_list); ··· 1830 1780 ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); 1831 1781 WARN(!list_empty(&duplicates), "Duplicates should be empty"); 1832 1782 if (ret) 1833 - goto out_free; 1783 + goto out; 1834 1784 1835 1785 amdgpu_sync_create(&sync); 1836 1786 ··· 1846 1796 1847 1797 bo = mem->bo; 1848 1798 1849 - /* Validate the BO if we got user pages */ 1850 - if (bo->tbo.ttm->pages[0]) { 1799 + /* Copy pages array and validate the BO if we got user pages */ 1800 + if (mem->user_pages[0]) { 1801 + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, 1802 + mem->user_pages); 1851 1803 amdgpu_bo_placement_from_domain(bo, mem->domain); 1852 1804 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 1853 1805 if (ret) { ··· 1858 1806 } 1859 1807 } 1860 1808 1809 + /* Validate succeeded, now the BO owns the pages, free 1810 + * our copy of the pointer array. Put this BO back on 1811 + * the userptr_valid_list. If we need to revalidate 1812 + * it, we need to start from scratch. 1813 + */ 1814 + kvfree(mem->user_pages); 1815 + mem->user_pages = NULL; 1861 1816 list_move_tail(&mem->validate_list.head, 1862 1817 &process_info->userptr_valid_list); 1863 - 1864 - /* Stop HMM track the userptr update. We dont check the return 1865 - * value for concurrent CPU page table update because we will 1866 - * reschedule the restore worker if process_info->evicted_bos 1867 - * is updated. 1868 - */ 1869 - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 1870 1818 1871 1819 /* Update mapping. If the BO was not validated 1872 1820 * (because we couldn't get user pages), this will ··· 1897 1845 ttm_eu_backoff_reservation(&ticket, &resv_list); 1898 1846 amdgpu_sync_wait(&sync, false); 1899 1847 amdgpu_sync_free(&sync); 1900 - out_free: 1848 + out: 1901 1849 kfree(pd_bo_list_entries); 1902 - out_no_mem: 1903 - list_for_each_entry_safe(mem, tmp_mem, 1904 - &process_info->userptr_inval_list, 1905 - validate_list.head) { 1906 - bo = mem->bo; 1907 - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 1908 - } 1909 1850 1910 1851 return ret; 1911 1852 }
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
··· 36 36 struct amdgpu_bo_va *bo_va; 37 37 uint32_t priority; 38 38 struct page **user_pages; 39 - bool user_invalidated; 39 + int user_invalidated; 40 40 }; 41 41 42 42 struct amdgpu_bo_list {
+88 -50
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
··· 52 52 p->uf_entry.tv.bo = &bo->tbo; 53 53 /* One for TTM and one for the CS job */ 54 54 p->uf_entry.tv.num_shared = 2; 55 + p->uf_entry.user_pages = NULL; 55 56 56 57 drm_gem_object_put_unlocked(gobj); 57 58 ··· 540 539 if (usermm && usermm != current->mm) 541 540 return -EPERM; 542 541 543 - if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && 544 - lobj->user_invalidated && lobj->user_pages) { 542 + /* Check if we have user pages and nobody bound the BO already */ 543 + if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && 544 + lobj->user_pages) { 545 545 amdgpu_bo_placement_from_domain(bo, 546 546 AMDGPU_GEM_DOMAIN_CPU); 547 547 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 548 548 if (r) 549 549 return r; 550 - 551 550 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, 552 551 lobj->user_pages); 553 552 binding_userptr = true; ··· 578 577 struct amdgpu_bo *gds; 579 578 struct amdgpu_bo *gws; 580 579 struct amdgpu_bo *oa; 580 + unsigned tries = 10; 581 581 int r; 582 582 583 583 INIT_LIST_HEAD(&p->validated); ··· 614 612 if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) 615 613 list_add(&p->uf_entry.tv.head, &p->validated); 616 614 617 - /* Get userptr backing pages. If pages are updated after registered 618 - * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do 619 - * amdgpu_ttm_backend_bind() to flush and invalidate new pages 620 - */ 621 - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 622 - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); 623 - bool userpage_invalidated = false; 624 - int i; 615 + while (1) { 616 + struct list_head need_pages; 625 617 626 - e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, 627 - sizeof(struct page *), 628 - GFP_KERNEL | __GFP_ZERO); 629 - if (!e->user_pages) { 630 - DRM_ERROR("calloc failure\n"); 631 - return -ENOMEM; 618 + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, 619 + &duplicates); 620 + if (unlikely(r != 0)) { 621 + if (r != -ERESTARTSYS) 622 + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); 623 + goto error_free_pages; 632 624 } 633 625 634 - r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages); 635 - if (r) { 636 - kvfree(e->user_pages); 637 - e->user_pages = NULL; 638 - return r; 639 - } 626 + INIT_LIST_HEAD(&need_pages); 627 + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 628 + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); 640 629 641 - for (i = 0; i < bo->tbo.ttm->num_pages; i++) { 642 - if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { 643 - userpage_invalidated = true; 644 - break; 630 + if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, 631 + &e->user_invalidated) && e->user_pages) { 632 + 633 + /* We acquired a page array, but somebody 634 + * invalidated it. Free it and try again 635 + */ 636 + release_pages(e->user_pages, 637 + bo->tbo.ttm->num_pages); 638 + kvfree(e->user_pages); 639 + e->user_pages = NULL; 640 + } 641 + 642 + if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && 643 + !e->user_pages) { 644 + list_del(&e->tv.head); 645 + list_add(&e->tv.head, &need_pages); 646 + 647 + amdgpu_bo_unreserve(bo); 645 648 } 646 649 } 647 - e->user_invalidated = userpage_invalidated; 648 - } 649 650 650 - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, 651 - &duplicates); 652 - if (unlikely(r != 0)) { 653 - if (r != -ERESTARTSYS) 654 - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); 655 - goto out; 651 + if (list_empty(&need_pages)) 652 + break; 653 + 654 + /* Unreserve everything again. */ 655 + ttm_eu_backoff_reservation(&p->ticket, &p->validated); 656 + 657 + /* We tried too many times, just abort */ 658 + if (!--tries) { 659 + r = -EDEADLK; 660 + DRM_ERROR("deadlock in %s\n", __func__); 661 + goto error_free_pages; 662 + } 663 + 664 + /* Fill the page arrays for all userptrs. */ 665 + list_for_each_entry(e, &need_pages, tv.head) { 666 + struct ttm_tt *ttm = e->tv.bo->ttm; 667 + 668 + e->user_pages = kvmalloc_array(ttm->num_pages, 669 + sizeof(struct page*), 670 + GFP_KERNEL | __GFP_ZERO); 671 + if (!e->user_pages) { 672 + r = -ENOMEM; 673 + DRM_ERROR("calloc failure in %s\n", __func__); 674 + goto error_free_pages; 675 + } 676 + 677 + r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); 678 + if (r) { 679 + DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); 680 + kvfree(e->user_pages); 681 + e->user_pages = NULL; 682 + goto error_free_pages; 683 + } 684 + } 685 + 686 + /* And try again. */ 687 + list_splice(&need_pages, &p->validated); 656 688 } 657 689 658 690 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, ··· 755 719 error_validate: 756 720 if (r) 757 721 ttm_eu_backoff_reservation(&p->ticket, &p->validated); 758 - out: 722 + 723 + error_free_pages: 724 + 725 + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 726 + if (!e->user_pages) 727 + continue; 728 + 729 + release_pages(e->user_pages, e->tv.bo->ttm->num_pages); 730 + kvfree(e->user_pages); 731 + } 732 + 759 733 return r; 760 734 } 761 735 ··· 1224 1178 struct amdgpu_bo_list_entry *e; 1225 1179 struct amdgpu_job *job; 1226 1180 uint64_t seq; 1181 + 1227 1182 int r; 1228 1183 1229 1184 job = p->job; ··· 1234 1187 if (r) 1235 1188 goto error_unlock; 1236 1189 1237 - /* No memory allocation is allowed while holding the mn lock. 1238 - * p->mn is hold until amdgpu_cs_submit is finished and fence is added 1239 - * to BOs. 1240 - */ 1190 + /* No memory allocation is allowed while holding the mn lock */ 1241 1191 amdgpu_mn_lock(p->mn); 1242 - 1243 - /* If userptr are invalidated after amdgpu_cs_parser_bos(), return 1244 - * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. 1245 - */ 1246 1192 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 1247 1193 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); 1248 1194 1249 - r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 1250 - } 1251 - if (r) { 1252 - r = -EAGAIN; 1253 - goto error_abort; 1195 + if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { 1196 + r = -ERESTARTSYS; 1197 + goto error_abort; 1198 + } 1254 1199 } 1255 1200 1256 1201 job->owner = p->filp; ··· 1338 1299 1339 1300 out: 1340 1301 amdgpu_cs_parser_fini(&parser, r, reserved_buffers); 1341 - 1342 1302 return r; 1343 1303 } 1344 1304
+8 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
··· 329 329 330 330 r = amdgpu_bo_reserve(bo, true); 331 331 if (r) 332 - goto user_pages_done; 332 + goto free_pages; 333 333 334 334 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); 335 335 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 336 336 amdgpu_bo_unreserve(bo); 337 337 if (r) 338 - goto user_pages_done; 338 + goto free_pages; 339 339 } 340 340 341 341 r = drm_gem_handle_create(filp, gobj, &handle); 342 + /* drop reference from allocate - handle holds it now */ 343 + drm_gem_object_put_unlocked(gobj); 342 344 if (r) 343 - goto user_pages_done; 345 + return r; 344 346 345 347 args->handle = handle; 348 + return 0; 346 349 347 - user_pages_done: 348 - if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) 349 - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 350 + free_pages: 351 + release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); 350 352 351 353 release_object: 352 354 drm_gem_object_put_unlocked(gobj);
+2 -23
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
··· 220 220 true, false, MAX_SCHEDULE_TIMEOUT); 221 221 if (r <= 0) 222 222 DRM_ERROR("(%ld) failed to wait for user bo\n", r); 223 + 224 + amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); 223 225 } 224 226 } 225 227 ··· 502 500 mutex_unlock(&adev->mn_lock); 503 501 } 504 502 505 - /* flags used by HMM internal, not related to CPU/GPU PTE flags */ 506 - static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { 507 - (1 << 0), /* HMM_PFN_VALID */ 508 - (1 << 1), /* HMM_PFN_WRITE */ 509 - 0 /* HMM_PFN_DEVICE_PRIVATE */ 510 - }; 511 - 512 - static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { 513 - 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ 514 - 0, /* HMM_PFN_NONE */ 515 - 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ 516 - }; 517 - 518 - void amdgpu_hmm_init_range(struct hmm_range *range) 519 - { 520 - if (range) { 521 - range->flags = hmm_range_flags; 522 - range->values = hmm_range_values; 523 - range->pfn_shift = PAGE_SHIFT; 524 - range->pfns = NULL; 525 - INIT_LIST_HEAD(&range->list); 526 - } 527 - }
+1 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
··· 25 25 #define __AMDGPU_MN_H__ 26 26 27 27 /* 28 - * HMM mirror 28 + * MMU Notifier 29 29 */ 30 30 struct amdgpu_mn; 31 - struct hmm_range; 32 31 33 32 enum amdgpu_mn_type { 34 33 AMDGPU_MN_TYPE_GFX, ··· 41 42 enum amdgpu_mn_type type); 42 43 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); 43 44 void amdgpu_mn_unregister(struct amdgpu_bo *bo); 44 - void amdgpu_hmm_init_range(struct hmm_range *range); 45 45 #else 46 46 static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} 47 47 static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
+108 -74
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
··· 43 43 #include <linux/pagemap.h> 44 44 #include <linux/debugfs.h> 45 45 #include <linux/iommu.h> 46 - #include <linux/hmm.h> 47 46 #include "amdgpu.h" 48 47 #include "amdgpu_object.h" 49 48 #include "amdgpu_trace.h" ··· 703 704 /* 704 705 * TTM backend functions. 705 706 */ 707 + struct amdgpu_ttm_gup_task_list { 708 + struct list_head list; 709 + struct task_struct *task; 710 + }; 711 + 706 712 struct amdgpu_ttm_tt { 707 713 struct ttm_dma_tt ttm; 708 714 u64 offset; 709 715 uint64_t userptr; 710 716 struct task_struct *usertask; 711 717 uint32_t userflags; 712 - struct hmm_range range; 718 + spinlock_t guptasklock; 719 + struct list_head guptasks; 720 + atomic_t mmu_invalidations; 721 + uint32_t last_set_pages; 713 722 }; 714 723 715 724 /** 716 - * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user 717 - * memory and start HMM tracking CPU page table update 725 + * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR 726 + * pointer to memory 718 727 * 719 - * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only 720 - * once afterwards to stop HMM tracking 728 + * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). 729 + * This provides a wrapper around the get_user_pages() call to provide 730 + * device accessible pages that back user memory. 721 731 */ 722 732 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) 723 733 { 724 734 struct amdgpu_ttm_tt *gtt = (void *)ttm; 725 735 struct mm_struct *mm = gtt->usertask->mm; 726 - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; 727 - struct hmm_range *range = &gtt->range; 728 - int r = 0, i; 736 + unsigned int flags = 0; 737 + unsigned pinned = 0; 738 + int r; 729 739 730 740 if (!mm) /* Happens during process shutdown */ 731 741 return -ESRCH; 732 742 733 - amdgpu_hmm_init_range(range); 743 + if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) 744 + flags |= FOLL_WRITE; 734 745 735 746 down_read(&mm->mmap_sem); 736 747 737 - range->vma = find_vma(mm, gtt->userptr); 738 - if (!range_in_vma(range->vma, gtt->userptr, end)) 739 - r = -EFAULT; 740 - else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && 741 - range->vma->vm_file) 742 - r = -EPERM; 743 - if (r) 744 - goto out; 748 + if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { 749 + /* 750 + * check that we only use anonymous memory to prevent problems 751 + * with writeback 752 + */ 753 + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; 754 + struct vm_area_struct *vma; 745 755 746 - range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t), 747 - GFP_KERNEL); 748 - if (range->pfns == NULL) { 749 - r = -ENOMEM; 750 - goto out; 756 + vma = find_vma(mm, gtt->userptr); 757 + if (!vma || vma->vm_file || vma->vm_end < end) { 758 + up_read(&mm->mmap_sem); 759 + return -EPERM; 760 + } 751 761 } 752 - range->start = gtt->userptr; 753 - range->end = end; 754 762 755 - range->pfns[0] = range->flags[HMM_PFN_VALID]; 756 - range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ? 757 - 0 : range->flags[HMM_PFN_WRITE]; 758 - for (i = 1; i < ttm->num_pages; i++) 759 - range->pfns[i] = range->pfns[0]; 763 + /* loop enough times using contiguous pages of memory */ 764 + do { 765 + unsigned num_pages = ttm->num_pages - pinned; 766 + uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; 767 + struct page **p = pages + pinned; 768 + struct amdgpu_ttm_gup_task_list guptask; 760 769 761 - /* This may trigger page table update */ 762 - r = hmm_vma_fault(range, true); 763 - if (r) 764 - goto out_free_pfns; 770 + guptask.task = current; 771 + spin_lock(&gtt->guptasklock); 772 + list_add(&guptask.list, &gtt->guptasks); 773 + spin_unlock(&gtt->guptasklock); 774 + 775 + if (mm == current->mm) 776 + r = get_user_pages(userptr, num_pages, flags, p, NULL); 777 + else 778 + r = get_user_pages_remote(gtt->usertask, 779 + mm, userptr, num_pages, 780 + flags, p, NULL, NULL); 781 + 782 + spin_lock(&gtt->guptasklock); 783 + list_del(&guptask.list); 784 + spin_unlock(&gtt->guptasklock); 785 + 786 + if (r < 0) 787 + goto release_pages; 788 + 789 + pinned += r; 790 + 791 + } while (pinned < ttm->num_pages); 765 792 766 793 up_read(&mm->mmap_sem); 767 - 768 - for (i = 0; i < ttm->num_pages; i++) 769 - pages[i] = hmm_pfn_to_page(range, range->pfns[i]); 770 - 771 794 return 0; 772 795 773 - out_free_pfns: 774 - kvfree(range->pfns); 775 - range->pfns = NULL; 776 - out: 796 + release_pages: 797 + release_pages(pages, pinned); 777 798 up_read(&mm->mmap_sem); 778 - return r; 779 - } 780 - 781 - /** 782 - * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change 783 - * Check if the pages backing this ttm range have been invalidated 784 - * 785 - * Returns: true if pages are still valid 786 - */ 787 - bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) 788 - { 789 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 790 - bool r = false; 791 - 792 - if (!gtt || !gtt->userptr) 793 - return false; 794 - 795 - WARN_ONCE(!gtt->range.pfns, "No user pages to check\n"); 796 - if (gtt->range.pfns) { 797 - r = hmm_vma_range_done(&gtt->range); 798 - kvfree(gtt->range.pfns); 799 - gtt->range.pfns = NULL; 800 - } 801 - 802 799 return r; 803 800 } 804 801 ··· 807 812 */ 808 813 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) 809 814 { 815 + struct amdgpu_ttm_tt *gtt = (void *)ttm; 810 816 unsigned i; 811 817 812 - for (i = 0; i < ttm->num_pages; ++i) 818 + gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations); 819 + for (i = 0; i < ttm->num_pages; ++i) { 820 + if (ttm->pages[i]) 821 + put_page(ttm->pages[i]); 822 + 813 823 ttm->pages[i] = pages ? pages[i] : NULL; 824 + } 814 825 } 815 826 816 827 /** ··· 901 900 /* unmap the pages mapped to the device */ 902 901 dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); 903 902 904 - sg_free_table(ttm->sg); 903 + /* mark the pages as dirty */ 904 + amdgpu_ttm_tt_mark_user_pages(ttm); 905 905 906 - if (gtt->range.pfns && 907 - ttm->pages[0] == hmm_pfn_to_page(&gtt->range, gtt->range.pfns[0])) 908 - WARN_ONCE(1, "Missing get_user_page_done\n"); 906 + sg_free_table(ttm->sg); 909 907 } 910 908 911 909 int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, ··· 1254 1254 gtt->usertask = current->group_leader; 1255 1255 get_task_struct(gtt->usertask); 1256 1256 1257 + spin_lock_init(&gtt->guptasklock); 1258 + INIT_LIST_HEAD(&gtt->guptasks); 1259 + atomic_set(&gtt->mmu_invalidations, 0); 1260 + gtt->last_set_pages = 0; 1261 + 1257 1262 return 0; 1258 1263 } 1259 1264 ··· 1287 1282 unsigned long end) 1288 1283 { 1289 1284 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1285 + struct amdgpu_ttm_gup_task_list *entry; 1290 1286 unsigned long size; 1291 1287 1292 1288 if (gtt == NULL || !gtt->userptr) ··· 1300 1294 if (gtt->userptr > end || gtt->userptr + size <= start) 1301 1295 return false; 1302 1296 1297 + /* Search the lists of tasks that hold this mapping and see 1298 + * if current is one of them. If it is return false. 1299 + */ 1300 + spin_lock(&gtt->guptasklock); 1301 + list_for_each_entry(entry, &gtt->guptasks, list) { 1302 + if (entry->task == current) { 1303 + spin_unlock(&gtt->guptasklock); 1304 + return false; 1305 + } 1306 + } 1307 + spin_unlock(&gtt->guptasklock); 1308 + 1309 + atomic_inc(&gtt->mmu_invalidations); 1310 + 1303 1311 return true; 1304 1312 } 1305 1313 1306 1314 /** 1307 - * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? 1315 + * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? 1308 1316 */ 1309 - bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) 1317 + bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, 1318 + int *last_invalidated) 1319 + { 1320 + struct amdgpu_ttm_tt *gtt = (void *)ttm; 1321 + int prev_invalidated = *last_invalidated; 1322 + 1323 + *last_invalidated = atomic_read(&gtt->mmu_invalidations); 1324 + return prev_invalidated != *last_invalidated; 1325 + } 1326 + 1327 + /** 1328 + * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object 1329 + * been invalidated since the last time they've been set? 1330 + */ 1331 + bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) 1310 1332 { 1311 1333 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1312 1334 1313 1335 if (gtt == NULL || !gtt->userptr) 1314 1336 return false; 1315 1337 1316 - return true; 1338 + return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages; 1317 1339 } 1318 1340 1319 1341 /**
+1 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
··· 102 102 int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); 103 103 104 104 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); 105 - bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); 106 105 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); 107 106 void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); 108 107 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, ··· 112 113 unsigned long end); 113 114 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, 114 115 int *last_invalidated); 115 - bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); 116 + bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); 116 117 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); 117 118 uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); 118 119 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,