Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Use partial hmm page walk during buffer validation in SVM

SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial
migration/mapping do validation on same vm range as migration/map do instead of
whole svm range that can be very large. This change is expected to improve svm
code performance.

Signed-off-by: Xiaogang Chen <xiaogang.chen@amd.com>
Reviewed-by: Philip Yang <philip.yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Xiaogang Chen and committed by
Alex Deucher
006ad514 e48c8cbe

+48 -66
+12 -23
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
··· 260 260 put_page(page); 261 261 } 262 262 263 - static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate) 264 - { 265 - unsigned long cpages = 0; 266 - unsigned long i; 267 - 268 - for (i = 0; i < migrate->npages; i++) { 269 - if (migrate->src[i] & MIGRATE_PFN_VALID && 270 - migrate->src[i] & MIGRATE_PFN_MIGRATE) 271 - cpages++; 272 - } 273 - return cpages; 274 - } 275 - 276 263 static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate) 277 264 { 278 265 unsigned long upages = 0; ··· 389 402 struct dma_fence *mfence = NULL; 390 403 struct migrate_vma migrate = { 0 }; 391 404 unsigned long cpages = 0; 405 + unsigned long mpages = 0; 392 406 dma_addr_t *scratch; 393 407 void *buf; 394 408 int r = -ENOMEM; ··· 438 450 r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset); 439 451 migrate_vma_pages(&migrate); 440 452 441 - pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", 442 - svm_migrate_successful_pages(&migrate), cpages, migrate.npages); 443 - 444 453 svm_migrate_copy_done(adev, mfence); 445 454 migrate_vma_finalize(&migrate); 455 + 456 + mpages = cpages - svm_migrate_unsuccessful_pages(&migrate); 457 + pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", 458 + mpages, cpages, migrate.npages); 446 459 447 460 kfd_smi_event_migration_end(node, p->lead_thread->pid, 448 461 start >> PAGE_SHIFT, end >> PAGE_SHIFT, ··· 454 465 out_free: 455 466 kvfree(buf); 456 467 out: 457 - if (!r && cpages) { 468 + if (!r && mpages) { 458 469 pdd = svm_range_get_pdd_by_node(prange, node); 459 470 if (pdd) 460 - WRITE_ONCE(pdd->page_in, pdd->page_in + cpages); 471 + WRITE_ONCE(pdd->page_in, pdd->page_in + mpages); 461 472 462 - return cpages; 473 + return mpages; 463 474 } 464 475 return r; 465 476 } ··· 487 498 struct vm_area_struct *vma; 488 499 uint64_t ttm_res_offset; 489 500 struct kfd_node *node; 490 - unsigned long cpages = 0; 501 + unsigned long mpages = 0; 491 502 long r = 0; 492 503 493 504 if (start_mgr < prange->start || last_mgr > prange->last) { ··· 529 540 pr_debug("failed %ld to migrate\n", r); 530 541 break; 531 542 } else { 532 - cpages += r; 543 + mpages += r; 533 544 } 534 545 ttm_res_offset += next - addr; 535 546 addr = next; 536 547 } 537 548 538 - if (cpages) { 549 + if (mpages) { 539 550 prange->actual_loc = best_loc; 540 - prange->vram_pages = prange->vram_pages + cpages; 551 + prange->vram_pages += mpages; 541 552 } else if (!prange->actual_loc) { 542 553 /* if no page migrated and all pages from prange are at 543 554 * sys ram drop svm_bo got from svm_range_vram_node_new
+36 -43
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
··· 158 158 static int 159 159 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, 160 160 unsigned long offset, unsigned long npages, 161 - unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t *vram_pages) 161 + unsigned long *hmm_pfns, uint32_t gpuidx) 162 162 { 163 163 enum dma_data_direction dir = DMA_BIDIRECTIONAL; 164 164 dma_addr_t *addr = prange->dma_addr[gpuidx]; 165 165 struct device *dev = adev->dev; 166 166 struct page *page; 167 - uint64_t vram_pages_dev; 168 167 int i, r; 169 168 170 169 if (!addr) { ··· 173 174 prange->dma_addr[gpuidx] = addr; 174 175 } 175 176 176 - vram_pages_dev = 0; 177 177 addr += offset; 178 178 for (i = 0; i < npages; i++) { 179 179 if (svm_is_valid_dma_mapping_addr(dev, addr[i])) ··· 182 184 if (is_zone_device_page(page)) { 183 185 struct amdgpu_device *bo_adev = prange->svm_bo->node->adev; 184 186 185 - vram_pages_dev++; 186 187 addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + 187 188 bo_adev->vm_manager.vram_base_offset - 188 189 bo_adev->kfd.pgmap.range.start; ··· 198 201 pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n", 199 202 addr[i] >> PAGE_SHIFT, page_to_pfn(page)); 200 203 } 201 - *vram_pages = vram_pages_dev; 204 + 202 205 return 0; 203 206 } 204 207 205 208 static int 206 209 svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, 207 210 unsigned long offset, unsigned long npages, 208 - unsigned long *hmm_pfns, uint64_t *vram_pages) 211 + unsigned long *hmm_pfns) 209 212 { 210 213 struct kfd_process *p; 211 214 uint32_t gpuidx; ··· 224 227 } 225 228 226 229 r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages, 227 - hmm_pfns, gpuidx, vram_pages); 230 + hmm_pfns, gpuidx); 228 231 if (r) 229 232 break; 230 233 } ··· 882 885 883 886 static void * 884 887 svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements, 885 - uint64_t offset) 888 + uint64_t offset, uint64_t *vram_pages) 886 889 { 890 + unsigned char *src = (unsigned char *)psrc + offset; 887 891 unsigned char *dst; 892 + uint64_t i; 888 893 889 894 dst = kvmalloc_array(num_elements, size, GFP_KERNEL); 890 895 if (!dst) 891 896 return NULL; 892 - memcpy(dst, (unsigned char *)psrc + offset, num_elements * size); 897 + 898 + if (!vram_pages) { 899 + memcpy(dst, src, num_elements * size); 900 + return (void *)dst; 901 + } 902 + 903 + *vram_pages = 0; 904 + for (i = 0; i < num_elements; i++) { 905 + dma_addr_t *temp; 906 + temp = (dma_addr_t *)dst + i; 907 + *temp = *((dma_addr_t *)src + i); 908 + if (*temp&SVM_RANGE_VRAM_DOMAIN) 909 + (*vram_pages)++; 910 + } 893 911 894 912 return (void *)dst; 895 913 } ··· 918 906 if (!src->dma_addr[i]) 919 907 continue; 920 908 dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i], 921 - sizeof(*src->dma_addr[i]), src->npages, 0); 909 + sizeof(*src->dma_addr[i]), src->npages, 0, NULL); 922 910 if (!dst->dma_addr[i]) 923 911 return -ENOMEM; 924 912 } ··· 929 917 static int 930 918 svm_range_split_array(void *ppnew, void *ppold, size_t size, 931 919 uint64_t old_start, uint64_t old_n, 932 - uint64_t new_start, uint64_t new_n) 920 + uint64_t new_start, uint64_t new_n, uint64_t *new_vram_pages) 933 921 { 934 922 unsigned char *new, *old, *pold; 935 923 uint64_t d; ··· 941 929 return 0; 942 930 943 931 d = (new_start - old_start) * size; 944 - new = svm_range_copy_array(pold, size, new_n, d); 932 + /* get dma addr array for new range and calculte its vram page number */ 933 + new = svm_range_copy_array(pold, size, new_n, d, new_vram_pages); 945 934 if (!new) 946 935 return -ENOMEM; 947 936 d = (new_start == old_start) ? new_n * size : 0; 948 - old = svm_range_copy_array(pold, size, old_n, d); 937 + old = svm_range_copy_array(pold, size, old_n, d, NULL); 949 938 if (!old) { 950 939 kvfree(new); 951 940 return -ENOMEM; ··· 968 955 for (i = 0; i < MAX_GPU_INSTANCE; i++) { 969 956 r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i], 970 957 sizeof(*old->dma_addr[i]), old->start, 971 - npages, new->start, new->npages); 958 + npages, new->start, new->npages, 959 + old->actual_loc ? &new->vram_pages : NULL); 972 960 if (r) 973 961 return r; 974 962 } 963 + if (old->actual_loc) 964 + old->vram_pages -= new->vram_pages; 975 965 976 966 return 0; 977 967 } ··· 997 981 998 982 new->svm_bo = svm_range_bo_ref(old->svm_bo); 999 983 new->ttm_res = old->ttm_res; 1000 - 1001 - /* set new's vram_pages as old range's now, the acurate vram_pages 1002 - * will be updated during mapping 1003 - */ 1004 - new->vram_pages = min(old->vram_pages, new->npages); 1005 984 1006 985 spin_lock(&new->svm_bo->list_lock); 1007 986 list_add(&new->svm_bo_list, &new->svm_bo->range_list); ··· 1120 1109 svm_range_split_tail(struct svm_range *prange, uint64_t new_last, 1121 1110 struct list_head *insert_list, struct list_head *remap_list) 1122 1111 { 1123 - struct svm_range *tail; 1112 + struct svm_range *tail = NULL; 1124 1113 int r = svm_range_split(prange, prange->start, new_last, &tail); 1125 1114 1126 1115 if (!r) { ··· 1135 1124 svm_range_split_head(struct svm_range *prange, uint64_t new_start, 1136 1125 struct list_head *insert_list, struct list_head *remap_list) 1137 1126 { 1138 - struct svm_range *head; 1127 + struct svm_range *head = NULL; 1139 1128 int r = svm_range_split(prange, new_start, prange->last, &head); 1140 1129 1141 1130 if (!r) { ··· 1584 1573 struct svm_validate_context *ctx; 1585 1574 unsigned long start, end, addr; 1586 1575 struct kfd_process *p; 1587 - uint64_t vram_pages; 1588 1576 void *owner; 1589 1577 int32_t idx; 1590 1578 int r = 0; ··· 1658 1648 } 1659 1649 } 1660 1650 1661 - vram_pages = 0; 1662 - start = prange->start << PAGE_SHIFT; 1663 - end = (prange->last + 1) << PAGE_SHIFT; 1651 + start = map_start << PAGE_SHIFT; 1652 + end = (map_last + 1) << PAGE_SHIFT; 1664 1653 for (addr = start; !r && addr < end; ) { 1665 1654 struct hmm_range *hmm_range; 1666 1655 unsigned long map_start_vma; 1667 1656 unsigned long map_last_vma; 1668 1657 struct vm_area_struct *vma; 1669 - uint64_t vram_pages_vma; 1670 1658 unsigned long next = 0; 1671 1659 unsigned long offset; 1672 1660 unsigned long npages; ··· 1691 1683 } 1692 1684 1693 1685 if (!r) { 1694 - offset = (addr - start) >> PAGE_SHIFT; 1686 + offset = (addr >> PAGE_SHIFT) - prange->start; 1695 1687 r = svm_range_dma_map(prange, ctx->bitmap, offset, npages, 1696 - hmm_range->hmm_pfns, &vram_pages_vma); 1688 + hmm_range->hmm_pfns); 1697 1689 if (r) 1698 1690 pr_debug("failed %d to dma map range\n", r); 1699 - else 1700 - vram_pages += vram_pages_vma; 1701 1691 } 1702 1692 1703 1693 svm_range_lock(prange); ··· 1726 1720 svm_range_unlock(prange); 1727 1721 1728 1722 addr = next; 1729 - } 1730 - 1731 - if (addr == end) { 1732 - prange->vram_pages = vram_pages; 1733 - 1734 - /* if prange does not include any vram page and it 1735 - * has not released svm_bo drop its svm_bo reference 1736 - * and set its actaul_loc to sys ram 1737 - */ 1738 - if (!vram_pages && prange->ttm_res) { 1739 - prange->actual_loc = 0; 1740 - svm_range_vram_node_free(prange); 1741 - } 1742 1723 } 1743 1724 1744 1725 svm_range_unreserve_bos(ctx);