Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: allocate VM PDs/PTs on demand

Let's start to allocate VM PDs/PTs on demand instead of pre-allocating
them during mapping.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Christian König and committed by
Alex Deucher
0ce15d6f 780637cb

+39 -129
+1 -9
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
··· 410 410 if (p_bo_va_entry) 411 411 *p_bo_va_entry = bo_va_entry; 412 412 413 - /* Allocate new page tables if needed and validate 414 - * them. 415 - */ 416 - ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo)); 417 - if (ret) { 418 - pr_err("Failed to allocate pts, err=%d\n", ret); 419 - goto err_alloc_pts; 420 - } 421 - 413 + /* Allocate validate page tables if needed */ 422 414 ret = vm_validate_pt_pd_bos(vm); 423 415 if (ret) { 424 416 pr_err("validate_pt_pd_bos() failed\n");
-9
drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
··· 92 92 return -ENOMEM; 93 93 } 94 94 95 - r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, 96 - size); 97 - if (r) { 98 - DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); 99 - amdgpu_vm_bo_rmv(adev, *bo_va); 100 - ttm_eu_backoff_reservation(&ticket, &list); 101 - return r; 102 - } 103 - 104 95 r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, 105 96 AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | 106 97 AMDGPU_PTE_EXECUTABLE);
-10
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
··· 625 625 626 626 switch (args->operation) { 627 627 case AMDGPU_VA_OP_MAP: 628 - r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, 629 - args->map_size); 630 - if (r) 631 - goto error_backoff; 632 - 633 628 va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); 634 629 r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, 635 630 args->offset_in_bo, args->map_size, ··· 640 645 args->map_size); 641 646 break; 642 647 case AMDGPU_VA_OP_REPLACE: 643 - r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, 644 - args->map_size); 645 - if (r) 646 - goto error_backoff; 647 - 648 648 va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); 649 649 r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, 650 650 args->offset_in_bo, args->map_size,
+38 -98
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
··· 521 521 } 522 522 523 523 /** 524 - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT 525 - * 526 - * @adev: amdgpu_device pointer 527 - * @vm: amdgpu_vm structure 528 - * @start: start addr of the walk 529 - * @cursor: state to initialize 530 - * 531 - * Start a walk and go directly to the leaf node. 532 - */ 533 - static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev, 534 - struct amdgpu_vm *vm, uint64_t start, 535 - struct amdgpu_vm_pt_cursor *cursor) 536 - { 537 - amdgpu_vm_pt_start(adev, vm, start, cursor); 538 - while (amdgpu_vm_pt_descendant(adev, cursor)); 539 - } 540 - 541 - /** 542 - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT 543 - * 544 - * @adev: amdgpu_device pointer 545 - * @cursor: current state 546 - * 547 - * Walk the PD/PT tree to the next leaf node. 548 - */ 549 - static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev, 550 - struct amdgpu_vm_pt_cursor *cursor) 551 - { 552 - amdgpu_vm_pt_next(adev, cursor); 553 - if (cursor->pfn != ~0ll) 554 - while (amdgpu_vm_pt_descendant(adev, cursor)); 555 - } 556 - 557 - /** 558 - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy 559 - */ 560 - #define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor) \ 561 - for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor)); \ 562 - (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor))) 563 - 564 - /** 565 524 * amdgpu_vm_pt_first_dfs - start a deep first search 566 525 * 567 526 * @adev: amdgpu_device structure ··· 891 932 * Returns: 892 933 * 0 on success, errno otherwise. 893 934 */ 894 - int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, 895 - struct amdgpu_vm *vm, 896 - uint64_t saddr, uint64_t size) 935 + static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, 936 + struct amdgpu_vm *vm, 937 + struct amdgpu_vm_pt_cursor *cursor) 897 938 { 898 - struct amdgpu_vm_pt_cursor cursor; 939 + struct amdgpu_vm_pt *entry = cursor->entry; 940 + struct amdgpu_bo_param bp; 899 941 struct amdgpu_bo *pt; 900 - uint64_t eaddr; 901 942 int r; 902 943 903 - /* validate the parameters */ 904 - if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) 905 - return -EINVAL; 944 + if (cursor->level < AMDGPU_VM_PTB && !entry->entries) { 945 + unsigned num_entries; 906 946 907 - eaddr = saddr + size - 1; 908 - 909 - saddr /= AMDGPU_GPU_PAGE_SIZE; 910 - eaddr /= AMDGPU_GPU_PAGE_SIZE; 911 - 912 - if (eaddr >= adev->vm_manager.max_pfn) { 913 - dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", 914 - eaddr, adev->vm_manager.max_pfn); 915 - return -EINVAL; 947 + num_entries = amdgpu_vm_num_entries(adev, cursor->level); 948 + entry->entries = kvmalloc_array(num_entries, 949 + sizeof(*entry->entries), 950 + GFP_KERNEL | __GFP_ZERO); 951 + if (!entry->entries) 952 + return -ENOMEM; 916 953 } 917 954 918 - for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) { 919 - struct amdgpu_vm_pt *entry = cursor.entry; 920 - struct amdgpu_bo_param bp; 955 + if (entry->base.bo) 956 + return 0; 921 957 922 - if (cursor.level < AMDGPU_VM_PTB) { 923 - unsigned num_entries; 958 + amdgpu_vm_bo_param(adev, vm, cursor->level, &bp); 924 959 925 - num_entries = amdgpu_vm_num_entries(adev, cursor.level); 926 - entry->entries = kvmalloc_array(num_entries, 927 - sizeof(*entry->entries), 928 - GFP_KERNEL | 929 - __GFP_ZERO); 930 - if (!entry->entries) 931 - return -ENOMEM; 932 - } 960 + r = amdgpu_bo_create(adev, &bp, &pt); 961 + if (r) 962 + return r; 933 963 934 - 935 - if (entry->base.bo) 936 - continue; 937 - 938 - amdgpu_vm_bo_param(adev, vm, cursor.level, &bp); 939 - 940 - r = amdgpu_bo_create(adev, &bp, &pt); 941 - if (r) 942 - return r; 943 - 944 - if (vm->use_cpu_for_update) { 945 - r = amdgpu_bo_kmap(pt, NULL); 946 - if (r) 947 - goto error_free_pt; 948 - } 949 - 950 - /* Keep a reference to the root directory to avoid 951 - * freeing them up in the wrong order. 952 - */ 953 - pt->parent = amdgpu_bo_ref(cursor.parent->base.bo); 954 - 955 - amdgpu_vm_bo_base_init(&entry->base, vm, pt); 956 - 957 - r = amdgpu_vm_clear_bo(adev, vm, pt); 964 + if (vm->use_cpu_for_update) { 965 + r = amdgpu_bo_kmap(pt, NULL); 958 966 if (r) 959 967 goto error_free_pt; 960 968 } 969 + 970 + /* Keep a reference to the root directory to avoid 971 + * freeing them up in the wrong order. 972 + */ 973 + pt->parent = amdgpu_bo_ref(cursor->parent->base.bo); 974 + amdgpu_vm_bo_base_init(&entry->base, vm, pt); 975 + 976 + r = amdgpu_vm_clear_bo(adev, vm, pt); 977 + if (r) 978 + goto error_free_pt; 961 979 962 980 return 0; 963 981 ··· 1580 1644 struct amdgpu_vm_pt_cursor cursor; 1581 1645 uint64_t frag_start = start, frag_end; 1582 1646 unsigned int frag; 1647 + int r; 1583 1648 1584 1649 /* figure out the initial fragment */ 1585 1650 amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); ··· 1588 1651 /* walk over the address space and update the PTs */ 1589 1652 amdgpu_vm_pt_start(adev, params->vm, start, &cursor); 1590 1653 while (cursor.pfn < end) { 1591 - struct amdgpu_bo *pt = cursor.entry->base.bo; 1592 1654 unsigned shift, parent_shift, mask; 1593 1655 uint64_t incr, entry_end, pe_start; 1656 + struct amdgpu_bo *pt; 1594 1657 1595 - if (!pt) 1596 - return -ENOENT; 1658 + r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor); 1659 + if (r) 1660 + return r; 1661 + 1662 + pt = cursor.entry->base.bo; 1597 1663 1598 1664 /* The root level can't be a huge page */ 1599 1665 if (cursor.level == adev->vm_manager.root_level) {
-3
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
··· 303 303 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, 304 304 int (*callback)(void *p, struct amdgpu_bo *bo), 305 305 void *param); 306 - int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, 307 - struct amdgpu_vm *vm, 308 - uint64_t saddr, uint64_t size); 309 306 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); 310 307 int amdgpu_vm_update_directories(struct amdgpu_device *adev, 311 308 struct amdgpu_vm *vm);