Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: replace get_user_pages with HMM mirror helpers

Use HMM helper function hmm_vma_fault() to get physical pages backing
userptr and start CPU page table update track of those pages. Then use
hmm_vma_range_done() to check if those pages are updated before
amdgpu_cs_submit for gfx or before user queues are resumed for kfd.

If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
from scratch, for kfd, restore worker is rescheduled to retry.

HMM simplify the CPU page table concurrent update check, so remove
guptasklock, mmu_invalidations, last_set_pages fields from
amdgpu_ttm_tt struct.

HMM does not pin the page (increase page ref count), so remove related
operations like release_pages(), put_page(), mark_page_dirty().

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Philip Yang and committed by
Alex Deucher
899fbde1 89cd9d23

+183 -279
-1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
··· 61 61 62 62 atomic_t invalid; 63 63 struct amdkfd_process_info *process_info; 64 - struct page **user_pages; 65 64 66 65 struct amdgpu_sync sync; 67 66
+25 -70
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
··· 492 492 goto out; 493 493 } 494 494 495 - /* If no restore worker is running concurrently, user_pages 496 - * should not be allocated 497 - */ 498 - WARN(mem->user_pages, "Leaking user_pages array"); 499 - 500 - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, 501 - sizeof(struct page *), 502 - GFP_KERNEL | __GFP_ZERO); 503 - if (!mem->user_pages) { 504 - pr_err("%s: Failed to allocate pages array\n", __func__); 505 - ret = -ENOMEM; 506 - goto unregister_out; 507 - } 508 - 509 - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); 495 + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); 510 496 if (ret) { 511 497 pr_err("%s: Failed to get user pages: %d\n", __func__, ret); 512 - goto free_out; 498 + goto unregister_out; 513 499 } 514 - 515 - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); 516 500 517 501 ret = amdgpu_bo_reserve(bo, true); 518 502 if (ret) { ··· 510 526 amdgpu_bo_unreserve(bo); 511 527 512 528 release_out: 513 - if (ret) 514 - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); 515 - free_out: 516 - kvfree(mem->user_pages); 517 - mem->user_pages = NULL; 529 + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 518 530 unregister_out: 519 531 if (ret) 520 532 amdgpu_mn_unregister(bo); ··· 569 589 ctx->kfd_bo.priority = 0; 570 590 ctx->kfd_bo.tv.bo = &bo->tbo; 571 591 ctx->kfd_bo.tv.num_shared = 1; 572 - ctx->kfd_bo.user_pages = NULL; 573 592 list_add(&ctx->kfd_bo.tv.head, &ctx->list); 574 593 575 594 amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); ··· 632 653 ctx->kfd_bo.priority = 0; 633 654 ctx->kfd_bo.tv.bo = &bo->tbo; 634 655 ctx->kfd_bo.tv.num_shared = 1; 635 - ctx->kfd_bo.user_pages = NULL; 636 656 list_add(&ctx->kfd_bo.tv.head, &ctx->list); 637 657 638 658 i = 0; ··· 1246 1268 list_del(&bo_list_entry->head); 1247 1269 mutex_unlock(&process_info->lock); 1248 1270 1249 - /* Free user pages if necessary */ 1250 - if (mem->user_pages) { 1251 - pr_debug("%s: Freeing user_pages array\n", __func__); 1252 - if (mem->user_pages[0]) 1253 - release_pages(mem->user_pages, 1254 - mem->bo->tbo.ttm->num_pages); 1255 - kvfree(mem->user_pages); 1256 - } 1257 - 1258 1271 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); 1259 1272 if (unlikely(ret)) 1260 1273 return ret; ··· 1719 1750 1720 1751 bo = mem->bo; 1721 1752 1722 - if (!mem->user_pages) { 1723 - mem->user_pages = 1724 - kvmalloc_array(bo->tbo.ttm->num_pages, 1725 - sizeof(struct page *), 1726 - GFP_KERNEL | __GFP_ZERO); 1727 - if (!mem->user_pages) { 1728 - pr_err("%s: Failed to allocate pages array\n", 1729 - __func__); 1730 - return -ENOMEM; 1731 - } 1732 - } else if (mem->user_pages[0]) { 1733 - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); 1734 - } 1735 - 1736 1753 /* Get updated user pages */ 1737 1754 ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, 1738 - mem->user_pages); 1755 + bo->tbo.ttm->pages); 1739 1756 if (ret) { 1740 - mem->user_pages[0] = NULL; 1757 + bo->tbo.ttm->pages[0] = NULL; 1741 1758 pr_info("%s: Failed to get user pages: %d\n", 1742 1759 __func__, ret); 1743 1760 /* Pretend it succeeded. It will fail later ··· 1732 1777 * stalled user mode queues. 1733 1778 */ 1734 1779 } 1735 - 1736 - /* Mark the BO as valid unless it was invalidated 1737 - * again concurrently 1738 - */ 1739 - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) 1740 - return -EAGAIN; 1741 1780 } 1742 1781 1743 1782 return 0; ··· 1761 1812 GFP_KERNEL); 1762 1813 if (!pd_bo_list_entries) { 1763 1814 pr_err("%s: Failed to allocate PD BO list entries\n", __func__); 1764 - return -ENOMEM; 1815 + ret = -ENOMEM; 1816 + goto out_no_mem; 1765 1817 } 1766 1818 1767 1819 INIT_LIST_HEAD(&resv_list); ··· 1786 1836 ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); 1787 1837 WARN(!list_empty(&duplicates), "Duplicates should be empty"); 1788 1838 if (ret) 1789 - goto out; 1839 + goto out_free; 1790 1840 1791 1841 amdgpu_sync_create(&sync); 1792 1842 ··· 1802 1852 1803 1853 bo = mem->bo; 1804 1854 1805 - /* Copy pages array and validate the BO if we got user pages */ 1806 - if (mem->user_pages[0]) { 1807 - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, 1808 - mem->user_pages); 1855 + /* Validate the BO if we got user pages */ 1856 + if (bo->tbo.ttm->pages[0]) { 1809 1857 amdgpu_bo_placement_from_domain(bo, mem->domain); 1810 1858 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 1811 1859 if (ret) { ··· 1812 1864 } 1813 1865 } 1814 1866 1815 - /* Validate succeeded, now the BO owns the pages, free 1816 - * our copy of the pointer array. Put this BO back on 1817 - * the userptr_valid_list. If we need to revalidate 1818 - * it, we need to start from scratch. 1819 - */ 1820 - kvfree(mem->user_pages); 1821 - mem->user_pages = NULL; 1822 1867 list_move_tail(&mem->validate_list.head, 1823 1868 &process_info->userptr_valid_list); 1869 + 1870 + /* Stop HMM track the userptr update. We dont check the return 1871 + * value for concurrent CPU page table update because we will 1872 + * reschedule the restore worker if process_info->evicted_bos 1873 + * is updated. 1874 + */ 1875 + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 1824 1876 1825 1877 /* Update mapping. If the BO was not validated 1826 1878 * (because we couldn't get user pages), this will ··· 1851 1903 ttm_eu_backoff_reservation(&ticket, &resv_list); 1852 1904 amdgpu_sync_wait(&sync, false); 1853 1905 amdgpu_sync_free(&sync); 1854 - out: 1906 + out_free: 1855 1907 kfree(pd_bo_list_entries); 1908 + out_no_mem: 1909 + list_for_each_entry_safe(mem, tmp_mem, 1910 + &process_info->userptr_inval_list, 1911 + validate_list.head) { 1912 + bo = mem->bo; 1913 + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 1914 + } 1856 1915 1857 1916 return ret; 1858 1917 }
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
··· 36 36 struct amdgpu_bo_va *bo_va; 37 37 uint32_t priority; 38 38 struct page **user_pages; 39 - int user_invalidated; 39 + bool user_invalidated; 40 40 }; 41 41 42 42 struct amdgpu_bo_list {
+49 -87
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
··· 52 52 p->uf_entry.tv.bo = &bo->tbo; 53 53 /* One for TTM and one for the CS job */ 54 54 p->uf_entry.tv.num_shared = 2; 55 - p->uf_entry.user_pages = NULL; 56 55 57 56 drm_gem_object_put_unlocked(gobj); 58 57 ··· 541 542 if (usermm && usermm != current->mm) 542 543 return -EPERM; 543 544 544 - /* Check if we have user pages and nobody bound the BO already */ 545 - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && 546 - lobj->user_pages) { 545 + if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && 546 + lobj->user_invalidated && lobj->user_pages) { 547 547 amdgpu_bo_placement_from_domain(bo, 548 548 AMDGPU_GEM_DOMAIN_CPU); 549 549 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 550 550 if (r) 551 551 return r; 552 + 552 553 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, 553 554 lobj->user_pages); 554 555 binding_userptr = true; ··· 579 580 struct amdgpu_bo *gds; 580 581 struct amdgpu_bo *gws; 581 582 struct amdgpu_bo *oa; 582 - unsigned tries = 10; 583 583 int r; 584 584 585 585 INIT_LIST_HEAD(&p->validated); ··· 614 616 if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) 615 617 list_add(&p->uf_entry.tv.head, &p->validated); 616 618 617 - while (1) { 618 - struct list_head need_pages; 619 + /* Get userptr backing pages. If pages are updated after registered 620 + * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do 621 + * amdgpu_ttm_backend_bind() to flush and invalidate new pages 622 + */ 623 + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 624 + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); 625 + bool userpage_invalidated = false; 626 + int i; 619 627 620 - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, 621 - &duplicates); 622 - if (unlikely(r != 0)) { 623 - if (r != -ERESTARTSYS) 624 - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); 625 - goto error_free_pages; 628 + e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, 629 + sizeof(struct page *), 630 + GFP_KERNEL | __GFP_ZERO); 631 + if (!e->user_pages) { 632 + DRM_ERROR("calloc failure\n"); 633 + return -ENOMEM; 626 634 } 627 635 628 - INIT_LIST_HEAD(&need_pages); 629 - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 630 - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); 631 - 632 - if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, 633 - &e->user_invalidated) && e->user_pages) { 634 - 635 - /* We acquired a page array, but somebody 636 - * invalidated it. Free it and try again 637 - */ 638 - release_pages(e->user_pages, 639 - bo->tbo.ttm->num_pages); 640 - kvfree(e->user_pages); 641 - e->user_pages = NULL; 642 - } 643 - 644 - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && 645 - !e->user_pages) { 646 - list_del(&e->tv.head); 647 - list_add(&e->tv.head, &need_pages); 648 - 649 - amdgpu_bo_unreserve(bo); 650 - } 636 + r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages); 637 + if (r) { 638 + kvfree(e->user_pages); 639 + e->user_pages = NULL; 640 + return r; 651 641 } 652 642 653 - if (list_empty(&need_pages)) 654 - break; 655 - 656 - /* Unreserve everything again. */ 657 - ttm_eu_backoff_reservation(&p->ticket, &p->validated); 658 - 659 - /* We tried too many times, just abort */ 660 - if (!--tries) { 661 - r = -EDEADLK; 662 - DRM_ERROR("deadlock in %s\n", __func__); 663 - goto error_free_pages; 664 - } 665 - 666 - /* Fill the page arrays for all userptrs. */ 667 - list_for_each_entry(e, &need_pages, tv.head) { 668 - struct ttm_tt *ttm = e->tv.bo->ttm; 669 - 670 - e->user_pages = kvmalloc_array(ttm->num_pages, 671 - sizeof(struct page*), 672 - GFP_KERNEL | __GFP_ZERO); 673 - if (!e->user_pages) { 674 - r = -ENOMEM; 675 - DRM_ERROR("calloc failure in %s\n", __func__); 676 - goto error_free_pages; 677 - } 678 - 679 - r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); 680 - if (r) { 681 - DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); 682 - kvfree(e->user_pages); 683 - e->user_pages = NULL; 684 - goto error_free_pages; 643 + for (i = 0; i < bo->tbo.ttm->num_pages; i++) { 644 + if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { 645 + userpage_invalidated = true; 646 + break; 685 647 } 686 648 } 649 + e->user_invalidated = userpage_invalidated; 650 + } 687 651 688 - /* And try again. */ 689 - list_splice(&need_pages, &p->validated); 652 + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, 653 + &duplicates); 654 + if (unlikely(r != 0)) { 655 + if (r != -ERESTARTSYS) 656 + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); 657 + goto out; 690 658 } 691 659 692 660 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, ··· 721 757 error_validate: 722 758 if (r) 723 759 ttm_eu_backoff_reservation(&p->ticket, &p->validated); 724 - 725 - error_free_pages: 726 - 727 - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 728 - if (!e->user_pages) 729 - continue; 730 - 731 - release_pages(e->user_pages, e->tv.bo->ttm->num_pages); 732 - kvfree(e->user_pages); 733 - } 734 - 760 + out: 735 761 return r; 736 762 } 737 763 ··· 1280 1326 struct amdgpu_bo_list_entry *e; 1281 1327 struct amdgpu_job *job; 1282 1328 uint64_t seq; 1283 - 1284 1329 int r; 1285 1330 1286 1331 job = p->job; ··· 1289 1336 if (r) 1290 1337 goto error_unlock; 1291 1338 1292 - /* No memory allocation is allowed while holding the mn lock */ 1339 + /* No memory allocation is allowed while holding the mn lock. 1340 + * p->mn is hold until amdgpu_cs_submit is finished and fence is added 1341 + * to BOs. 1342 + */ 1293 1343 amdgpu_mn_lock(p->mn); 1344 + 1345 + /* If userptr are invalidated after amdgpu_cs_parser_bos(), return 1346 + * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. 1347 + */ 1294 1348 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 1295 1349 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); 1296 1350 1297 - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { 1298 - r = -ERESTARTSYS; 1299 - goto error_abort; 1300 - } 1351 + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 1352 + } 1353 + if (r) { 1354 + r = -EAGAIN; 1355 + goto error_abort; 1301 1356 } 1302 1357 1303 1358 job->owner = p->filp; ··· 1401 1440 1402 1441 out: 1403 1442 amdgpu_cs_parser_fini(&parser, r, reserved_buffers); 1443 + 1404 1444 return r; 1405 1445 } 1406 1446
+6 -8
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
··· 330 330 331 331 r = amdgpu_bo_reserve(bo, true); 332 332 if (r) 333 - goto free_pages; 333 + goto user_pages_done; 334 334 335 335 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); 336 336 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 337 337 amdgpu_bo_unreserve(bo); 338 338 if (r) 339 - goto free_pages; 339 + goto user_pages_done; 340 340 } 341 341 342 342 r = drm_gem_handle_create(filp, gobj, &handle); 343 - /* drop reference from allocate - handle holds it now */ 344 - drm_gem_object_put_unlocked(gobj); 345 343 if (r) 346 - return r; 344 + goto user_pages_done; 347 345 348 346 args->handle = handle; 349 - return 0; 350 347 351 - free_pages: 352 - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); 348 + user_pages_done: 349 + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) 350 + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); 353 351 354 352 release_object: 355 353 drm_gem_object_put_unlocked(gobj);
+23 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
··· 220 220 true, false, MAX_SCHEDULE_TIMEOUT); 221 221 if (r <= 0) 222 222 DRM_ERROR("(%ld) failed to wait for user bo\n", r); 223 - 224 - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); 225 223 } 226 224 } 227 225 ··· 500 502 mutex_unlock(&adev->mn_lock); 501 503 } 502 504 505 + /* flags used by HMM internal, not related to CPU/GPU PTE flags */ 506 + static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { 507 + (1 << 0), /* HMM_PFN_VALID */ 508 + (1 << 1), /* HMM_PFN_WRITE */ 509 + 0 /* HMM_PFN_DEVICE_PRIVATE */ 510 + }; 511 + 512 + static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { 513 + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ 514 + 0, /* HMM_PFN_NONE */ 515 + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ 516 + }; 517 + 518 + void amdgpu_hmm_init_range(struct hmm_range *range) 519 + { 520 + if (range) { 521 + range->flags = hmm_range_flags; 522 + range->values = hmm_range_values; 523 + range->pfn_shift = PAGE_SHIFT; 524 + range->pfns = NULL; 525 + INIT_LIST_HEAD(&range->list); 526 + } 527 + }
+3 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
··· 25 25 #define __AMDGPU_MN_H__ 26 26 27 27 /* 28 - * MMU Notifier 28 + * HMM mirror 29 29 */ 30 30 struct amdgpu_mn; 31 + struct hmm_range; 31 32 32 33 enum amdgpu_mn_type { 33 34 AMDGPU_MN_TYPE_GFX, ··· 42 41 enum amdgpu_mn_type type); 43 42 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); 44 43 void amdgpu_mn_unregister(struct amdgpu_bo *bo); 44 + void amdgpu_hmm_init_range(struct hmm_range *range); 45 45 #else 46 46 static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} 47 47 static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
+74 -108
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
··· 43 43 #include <linux/pagemap.h> 44 44 #include <linux/debugfs.h> 45 45 #include <linux/iommu.h> 46 + #include <linux/hmm.h> 46 47 #include "amdgpu.h" 47 48 #include "amdgpu_object.h" 48 49 #include "amdgpu_trace.h" ··· 704 703 /* 705 704 * TTM backend functions. 706 705 */ 707 - struct amdgpu_ttm_gup_task_list { 708 - struct list_head list; 709 - struct task_struct *task; 710 - }; 711 - 712 706 struct amdgpu_ttm_tt { 713 707 struct ttm_dma_tt ttm; 714 708 u64 offset; 715 709 uint64_t userptr; 716 710 struct task_struct *usertask; 717 711 uint32_t userflags; 718 - spinlock_t guptasklock; 719 - struct list_head guptasks; 720 - atomic_t mmu_invalidations; 721 - uint32_t last_set_pages; 712 + struct hmm_range range; 722 713 }; 723 714 724 715 /** 725 - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR 726 - * pointer to memory 716 + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user 717 + * memory and start HMM tracking CPU page table update 727 718 * 728 - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). 729 - * This provides a wrapper around the get_user_pages() call to provide 730 - * device accessible pages that back user memory. 719 + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only 720 + * once afterwards to stop HMM tracking 731 721 */ 732 722 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) 733 723 { 734 724 struct amdgpu_ttm_tt *gtt = (void *)ttm; 735 725 struct mm_struct *mm = gtt->usertask->mm; 736 - unsigned int flags = 0; 737 - unsigned pinned = 0; 738 - int r; 726 + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; 727 + struct hmm_range *range = &gtt->range; 728 + int r = 0, i; 739 729 740 730 if (!mm) /* Happens during process shutdown */ 741 731 return -ESRCH; 742 732 743 - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) 744 - flags |= FOLL_WRITE; 733 + amdgpu_hmm_init_range(range); 745 734 746 735 down_read(&mm->mmap_sem); 747 736 748 - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { 749 - /* 750 - * check that we only use anonymous memory to prevent problems 751 - * with writeback 752 - */ 753 - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; 754 - struct vm_area_struct *vma; 737 + range->vma = find_vma(mm, gtt->userptr); 738 + if (!range_in_vma(range->vma, gtt->userptr, end)) 739 + r = -EFAULT; 740 + else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && 741 + range->vma->vm_file) 742 + r = -EPERM; 743 + if (r) 744 + goto out; 755 745 756 - vma = find_vma(mm, gtt->userptr); 757 - if (!vma || vma->vm_file || vma->vm_end < end) { 758 - up_read(&mm->mmap_sem); 759 - return -EPERM; 760 - } 746 + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t), 747 + GFP_KERNEL); 748 + if (range->pfns == NULL) { 749 + r = -ENOMEM; 750 + goto out; 761 751 } 752 + range->start = gtt->userptr; 753 + range->end = end; 762 754 763 - /* loop enough times using contiguous pages of memory */ 764 - do { 765 - unsigned num_pages = ttm->num_pages - pinned; 766 - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; 767 - struct page **p = pages + pinned; 768 - struct amdgpu_ttm_gup_task_list guptask; 755 + range->pfns[0] = range->flags[HMM_PFN_VALID]; 756 + range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ? 757 + 0 : range->flags[HMM_PFN_WRITE]; 758 + for (i = 1; i < ttm->num_pages; i++) 759 + range->pfns[i] = range->pfns[0]; 769 760 770 - guptask.task = current; 771 - spin_lock(&gtt->guptasklock); 772 - list_add(&guptask.list, &gtt->guptasks); 773 - spin_unlock(&gtt->guptasklock); 774 - 775 - if (mm == current->mm) 776 - r = get_user_pages(userptr, num_pages, flags, p, NULL); 777 - else 778 - r = get_user_pages_remote(gtt->usertask, 779 - mm, userptr, num_pages, 780 - flags, p, NULL, NULL); 781 - 782 - spin_lock(&gtt->guptasklock); 783 - list_del(&guptask.list); 784 - spin_unlock(&gtt->guptasklock); 785 - 786 - if (r < 0) 787 - goto release_pages; 788 - 789 - pinned += r; 790 - 791 - } while (pinned < ttm->num_pages); 761 + /* This may trigger page table update */ 762 + r = hmm_vma_fault(range, true); 763 + if (r) 764 + goto out_free_pfns; 792 765 793 766 up_read(&mm->mmap_sem); 767 + 768 + for (i = 0; i < ttm->num_pages; i++) 769 + pages[i] = hmm_pfn_to_page(range, range->pfns[i]); 770 + 794 771 return 0; 795 772 796 - release_pages: 797 - release_pages(pages, pinned); 773 + out_free_pfns: 774 + kvfree(range->pfns); 775 + range->pfns = NULL; 776 + out: 798 777 up_read(&mm->mmap_sem); 778 + return r; 779 + } 780 + 781 + /** 782 + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change 783 + * Check if the pages backing this ttm range have been invalidated 784 + * 785 + * Returns: true if pages are still valid 786 + */ 787 + bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) 788 + { 789 + struct amdgpu_ttm_tt *gtt = (void *)ttm; 790 + bool r = false; 791 + 792 + if (!gtt || !gtt->userptr) 793 + return false; 794 + 795 + WARN_ONCE(!gtt->range.pfns, "No user pages to check\n"); 796 + if (gtt->range.pfns) { 797 + r = hmm_vma_range_done(&gtt->range); 798 + kvfree(gtt->range.pfns); 799 + gtt->range.pfns = NULL; 800 + } 801 + 799 802 return r; 800 803 } 801 804 ··· 812 807 */ 813 808 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) 814 809 { 815 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 816 810 unsigned i; 817 811 818 - gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations); 819 - for (i = 0; i < ttm->num_pages; ++i) { 820 - if (ttm->pages[i]) 821 - put_page(ttm->pages[i]); 822 - 812 + for (i = 0; i < ttm->num_pages; ++i) 823 813 ttm->pages[i] = pages ? pages[i] : NULL; 824 - } 825 814 } 826 815 827 816 /** ··· 900 901 /* unmap the pages mapped to the device */ 901 902 dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); 902 903 903 - /* mark the pages as dirty */ 904 - amdgpu_ttm_tt_mark_user_pages(ttm); 905 - 906 904 sg_free_table(ttm->sg); 905 + 906 + if (gtt->range.pfns && 907 + ttm->pages[0] == hmm_pfn_to_page(&gtt->range, gtt->range.pfns[0])) 908 + WARN_ONCE(1, "Missing get_user_page_done\n"); 907 909 } 908 910 909 911 int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, ··· 1254 1254 gtt->usertask = current->group_leader; 1255 1255 get_task_struct(gtt->usertask); 1256 1256 1257 - spin_lock_init(&gtt->guptasklock); 1258 - INIT_LIST_HEAD(&gtt->guptasks); 1259 - atomic_set(&gtt->mmu_invalidations, 0); 1260 - gtt->last_set_pages = 0; 1261 - 1262 1257 return 0; 1263 1258 } 1264 1259 ··· 1282 1287 unsigned long end) 1283 1288 { 1284 1289 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1285 - struct amdgpu_ttm_gup_task_list *entry; 1286 1290 unsigned long size; 1287 1291 1288 1292 if (gtt == NULL || !gtt->userptr) ··· 1294 1300 if (gtt->userptr > end || gtt->userptr + size <= start) 1295 1301 return false; 1296 1302 1297 - /* Search the lists of tasks that hold this mapping and see 1298 - * if current is one of them. If it is return false. 1299 - */ 1300 - spin_lock(&gtt->guptasklock); 1301 - list_for_each_entry(entry, &gtt->guptasks, list) { 1302 - if (entry->task == current) { 1303 - spin_unlock(&gtt->guptasklock); 1304 - return false; 1305 - } 1306 - } 1307 - spin_unlock(&gtt->guptasklock); 1308 - 1309 - atomic_inc(&gtt->mmu_invalidations); 1310 - 1311 1303 return true; 1312 1304 } 1313 1305 1314 1306 /** 1315 - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? 1307 + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? 1316 1308 */ 1317 - bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, 1318 - int *last_invalidated) 1319 - { 1320 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 1321 - int prev_invalidated = *last_invalidated; 1322 - 1323 - *last_invalidated = atomic_read(&gtt->mmu_invalidations); 1324 - return prev_invalidated != *last_invalidated; 1325 - } 1326 - 1327 - /** 1328 - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object 1329 - * been invalidated since the last time they've been set? 1330 - */ 1331 - bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) 1309 + bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) 1332 1310 { 1333 1311 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1334 1312 1335 1313 if (gtt == NULL || !gtt->userptr) 1336 1314 return false; 1337 1315 1338 - return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages; 1316 + return true; 1339 1317 } 1340 1318 1341 1319 /**
+2 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
··· 102 102 int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); 103 103 104 104 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); 105 + bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); 105 106 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); 106 107 void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); 107 108 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, ··· 113 112 unsigned long end); 114 113 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, 115 114 int *last_invalidated); 116 - bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); 115 + bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); 117 116 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); 118 117 uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); 119 118 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,