Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Add DMABuf import functionality

This is used for interoperability between ROCm compute and graphics
APIs. It allows importing graphics driver BOs into the ROCm SVM
address space for zero-copy GPU access.

The API is split into two steps (query and import) to allow user mode
to manage the virtual address space allocation for the imported buffer.

Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Felix Kuehling and committed by
Alex Deucher
1dde0ea9 3704d56e

+287 -5
+57
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
··· 26 26 #include "amdgpu.h" 27 27 #include "amdgpu_gfx.h" 28 28 #include <linux/module.h> 29 + #include <linux/dma-buf.h> 29 30 30 31 const struct kgd2kfd_calls *kgd2kfd; 31 32 ··· 432 431 cu_info->wave_front_size = acu_info.wave_front_size; 433 432 cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu; 434 433 cu_info->lds_size = acu_info.lds_size; 434 + } 435 + 436 + int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, 437 + struct kgd_dev **dma_buf_kgd, 438 + uint64_t *bo_size, void *metadata_buffer, 439 + size_t buffer_size, uint32_t *metadata_size, 440 + uint32_t *flags) 441 + { 442 + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 443 + struct dma_buf *dma_buf; 444 + struct drm_gem_object *obj; 445 + struct amdgpu_bo *bo; 446 + uint64_t metadata_flags; 447 + int r = -EINVAL; 448 + 449 + dma_buf = dma_buf_get(dma_buf_fd); 450 + if (IS_ERR(dma_buf)) 451 + return PTR_ERR(dma_buf); 452 + 453 + if (dma_buf->ops != &amdgpu_dmabuf_ops) 454 + /* Can't handle non-graphics buffers */ 455 + goto out_put; 456 + 457 + obj = dma_buf->priv; 458 + if (obj->dev->driver != adev->ddev->driver) 459 + /* Can't handle buffers from different drivers */ 460 + goto out_put; 461 + 462 + adev = obj->dev->dev_private; 463 + bo = gem_to_amdgpu_bo(obj); 464 + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | 465 + AMDGPU_GEM_DOMAIN_GTT))) 466 + /* Only VRAM and GTT BOs are supported */ 467 + goto out_put; 468 + 469 + r = 0; 470 + if (dma_buf_kgd) 471 + *dma_buf_kgd = (struct kgd_dev *)adev; 472 + if (bo_size) 473 + *bo_size = amdgpu_bo_size(bo); 474 + if (metadata_size) 475 + *metadata_size = bo->metadata_size; 476 + if (metadata_buffer) 477 + r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, 478 + metadata_size, &metadata_flags); 479 + if (flags) { 480 + *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? 481 + ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT; 482 + 483 + if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) 484 + *flags |= ALLOC_MEM_FLAGS_PUBLIC; 485 + } 486 + 487 + out_put: 488 + dma_buf_put(dma_buf); 489 + return r; 435 490 } 436 491 437 492 uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
+11
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
··· 149 149 150 150 uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd); 151 151 void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); 152 + int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, 153 + struct kgd_dev **dmabuf_kgd, 154 + uint64_t *bo_size, void *metadata_buffer, 155 + size_t buffer_size, uint32_t *metadata_size, 156 + uint32_t *flags); 152 157 uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); 153 158 uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); 154 159 ··· 204 199 205 200 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, 206 201 struct kfd_vm_fault_info *info); 202 + 203 + int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, 204 + struct dma_buf *dmabuf, 205 + uint64_t va, void *vm, 206 + struct kgd_mem **mem, uint64_t *size, 207 + uint64_t *mmap_offset); 207 208 208 209 void amdgpu_amdkfd_gpuvm_init_mem_limits(void); 209 210 void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
+55
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
··· 25 25 #include <linux/list.h> 26 26 #include <linux/pagemap.h> 27 27 #include <linux/sched/mm.h> 28 + #include <linux/dma-buf.h> 28 29 #include <drm/drmP.h> 29 30 #include "amdgpu_object.h" 30 31 #include "amdgpu_vm.h" ··· 1662 1661 mb(); 1663 1662 atomic_set(&adev->gmc.vm_fault_info_updated, 0); 1664 1663 } 1664 + return 0; 1665 + } 1666 + 1667 + int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, 1668 + struct dma_buf *dma_buf, 1669 + uint64_t va, void *vm, 1670 + struct kgd_mem **mem, uint64_t *size, 1671 + uint64_t *mmap_offset) 1672 + { 1673 + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 1674 + struct drm_gem_object *obj; 1675 + struct amdgpu_bo *bo; 1676 + struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1677 + 1678 + if (dma_buf->ops != &amdgpu_dmabuf_ops) 1679 + /* Can't handle non-graphics buffers */ 1680 + return -EINVAL; 1681 + 1682 + obj = dma_buf->priv; 1683 + if (obj->dev->dev_private != adev) 1684 + /* Can't handle buffers from other devices */ 1685 + return -EINVAL; 1686 + 1687 + bo = gem_to_amdgpu_bo(obj); 1688 + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | 1689 + AMDGPU_GEM_DOMAIN_GTT))) 1690 + /* Only VRAM and GTT BOs are supported */ 1691 + return -EINVAL; 1692 + 1693 + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); 1694 + if (!*mem) 1695 + return -ENOMEM; 1696 + 1697 + if (size) 1698 + *size = amdgpu_bo_size(bo); 1699 + 1700 + if (mmap_offset) 1701 + *mmap_offset = amdgpu_bo_mmap_offset(bo); 1702 + 1703 + INIT_LIST_HEAD(&(*mem)->bo_va_list); 1704 + mutex_init(&(*mem)->lock); 1705 + (*mem)->mapping_flags = 1706 + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | 1707 + AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC; 1708 + 1709 + (*mem)->bo = amdgpu_bo_ref(bo); 1710 + (*mem)->va = va; 1711 + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? 1712 + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; 1713 + (*mem)->mapped_to_gpu_memory = 0; 1714 + (*mem)->process_info = avm->process_info; 1715 + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); 1716 + amdgpu_sync_create(&(*mem)->sync); 1717 + 1665 1718 return 0; 1666 1719 } 1667 1720
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
··· 54 54 void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); 55 55 int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); 56 56 57 + extern const struct dma_buf_ops amdgpu_dmabuf_ops; 58 + 57 59 /* 58 60 * GEM objects. 59 61 */
+1 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
··· 39 39 #include <drm/amdgpu_drm.h> 40 40 #include <linux/dma-buf.h> 41 41 42 - static const struct dma_buf_ops amdgpu_dmabuf_ops; 43 - 44 42 /** 45 43 * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table 46 44 * implementation ··· 330 332 return ret; 331 333 } 332 334 333 - static const struct dma_buf_ops amdgpu_dmabuf_ops = { 335 + const struct dma_buf_ops amdgpu_dmabuf_ops = { 334 336 .attach = amdgpu_gem_map_attach, 335 337 .detach = amdgpu_gem_map_detach, 336 338 .map_dma_buf = drm_gem_map_dma_buf,
+117 -1
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
··· 33 33 #include <linux/time.h> 34 34 #include <linux/mm.h> 35 35 #include <linux/mman.h> 36 + #include <linux/dma-buf.h> 36 37 #include <asm/processor.h> 37 38 #include "kfd_priv.h" 38 39 #include "kfd_device_queue_manager.h" ··· 1551 1550 return err; 1552 1551 } 1553 1552 1553 + static int kfd_ioctl_get_dmabuf_info(struct file *filep, 1554 + struct kfd_process *p, void *data) 1555 + { 1556 + struct kfd_ioctl_get_dmabuf_info_args *args = data; 1557 + struct kfd_dev *dev = NULL; 1558 + struct kgd_dev *dma_buf_kgd; 1559 + void *metadata_buffer = NULL; 1560 + uint32_t flags; 1561 + unsigned int i; 1562 + int r; 1563 + 1564 + /* Find a KFD GPU device that supports the get_dmabuf_info query */ 1565 + for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) 1566 + if (dev) 1567 + break; 1568 + if (!dev) 1569 + return -EINVAL; 1570 + 1571 + if (args->metadata_ptr) { 1572 + metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL); 1573 + if (!metadata_buffer) 1574 + return -ENOMEM; 1575 + } 1576 + 1577 + /* Get dmabuf info from KGD */ 1578 + r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd, 1579 + &dma_buf_kgd, &args->size, 1580 + metadata_buffer, args->metadata_size, 1581 + &args->metadata_size, &flags); 1582 + if (r) 1583 + goto exit; 1584 + 1585 + /* Reverse-lookup gpu_id from kgd pointer */ 1586 + dev = kfd_device_by_kgd(dma_buf_kgd); 1587 + if (!dev) { 1588 + r = -EINVAL; 1589 + goto exit; 1590 + } 1591 + args->gpu_id = dev->id; 1592 + args->flags = flags; 1593 + 1594 + /* Copy metadata buffer to user mode */ 1595 + if (metadata_buffer) { 1596 + r = copy_to_user((void __user *)args->metadata_ptr, 1597 + metadata_buffer, args->metadata_size); 1598 + if (r != 0) 1599 + r = -EFAULT; 1600 + } 1601 + 1602 + exit: 1603 + kfree(metadata_buffer); 1604 + 1605 + return r; 1606 + } 1607 + 1608 + static int kfd_ioctl_import_dmabuf(struct file *filep, 1609 + struct kfd_process *p, void *data) 1610 + { 1611 + struct kfd_ioctl_import_dmabuf_args *args = data; 1612 + struct kfd_process_device *pdd; 1613 + struct dma_buf *dmabuf; 1614 + struct kfd_dev *dev; 1615 + int idr_handle; 1616 + uint64_t size; 1617 + void *mem; 1618 + int r; 1619 + 1620 + dev = kfd_device_by_id(args->gpu_id); 1621 + if (!dev) 1622 + return -EINVAL; 1623 + 1624 + dmabuf = dma_buf_get(args->dmabuf_fd); 1625 + if (!dmabuf) 1626 + return -EINVAL; 1627 + 1628 + mutex_lock(&p->mutex); 1629 + 1630 + pdd = kfd_bind_process_to_device(dev, p); 1631 + if (IS_ERR(pdd)) { 1632 + r = PTR_ERR(pdd); 1633 + goto err_unlock; 1634 + } 1635 + 1636 + r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf, 1637 + args->va_addr, pdd->vm, 1638 + (struct kgd_mem **)&mem, &size, 1639 + NULL); 1640 + if (r) 1641 + goto err_unlock; 1642 + 1643 + idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1644 + if (idr_handle < 0) { 1645 + r = -EFAULT; 1646 + goto err_free; 1647 + } 1648 + 1649 + mutex_unlock(&p->mutex); 1650 + 1651 + args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1652 + 1653 + return 0; 1654 + 1655 + err_free: 1656 + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1657 + err_unlock: 1658 + mutex_unlock(&p->mutex); 1659 + return r; 1660 + } 1661 + 1554 1662 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ 1555 1663 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ 1556 1664 .cmd_drv = 0, .name = #ioctl} ··· 1745 1635 kfd_ioctl_set_cu_mask, 0), 1746 1636 1747 1637 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE, 1748 - kfd_ioctl_get_queue_wave_state, 0) 1638 + kfd_ioctl_get_queue_wave_state, 0), 1639 + 1640 + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO, 1641 + kfd_ioctl_get_dmabuf_info, 0), 1642 + 1643 + AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, 1644 + kfd_ioctl_import_dmabuf, 0), 1749 1645 1750 1646 }; 1751 1647
+1
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 793 793 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id); 794 794 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); 795 795 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); 796 + struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd); 796 797 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev); 797 798 int kfd_numa_node_to_apic_id(int numa_node_id); 798 799
+18
drivers/gpu/drm/amd/amdkfd/kfd_topology.c
··· 111 111 return device; 112 112 } 113 113 114 + struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd) 115 + { 116 + struct kfd_topology_device *top_dev; 117 + struct kfd_dev *device = NULL; 118 + 119 + down_read(&topology_lock); 120 + 121 + list_for_each_entry(top_dev, &topology_device_list, list) 122 + if (top_dev->gpu && top_dev->gpu->kgd == kgd) { 123 + device = top_dev->gpu; 124 + break; 125 + } 126 + 127 + up_read(&topology_lock); 128 + 129 + return device; 130 + } 131 + 114 132 /* Called with write topology_lock acquired */ 115 133 static void kfd_release_topology_device(struct kfd_topology_device *dev) 116 134 {
+25 -1
include/uapi/linux/kfd_ioctl.h
··· 398 398 __u32 n_success; /* to/from KFD */ 399 399 }; 400 400 401 + struct kfd_ioctl_get_dmabuf_info_args { 402 + __u64 size; /* from KFD */ 403 + __u64 metadata_ptr; /* to KFD */ 404 + __u32 metadata_size; /* to KFD (space allocated by user) 405 + * from KFD (actual metadata size) 406 + */ 407 + __u32 gpu_id; /* from KFD */ 408 + __u32 flags; /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */ 409 + __u32 dmabuf_fd; /* to KFD */ 410 + }; 411 + 412 + struct kfd_ioctl_import_dmabuf_args { 413 + __u64 va_addr; /* to KFD */ 414 + __u64 handle; /* from KFD */ 415 + __u32 gpu_id; /* to KFD */ 416 + __u32 dmabuf_fd; /* to KFD */ 417 + }; 418 + 401 419 #define AMDKFD_IOCTL_BASE 'K' 402 420 #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) 403 421 #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) ··· 504 486 #define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \ 505 487 AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args) 506 488 489 + #define AMDKFD_IOC_GET_DMABUF_INFO \ 490 + AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args) 491 + 492 + #define AMDKFD_IOC_IMPORT_DMABUF \ 493 + AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) 494 + 507 495 #define AMDKFD_COMMAND_START 0x01 508 - #define AMDKFD_COMMAND_END 0x1C 496 + #define AMDKFD_COMMAND_END 0x1E 509 497 510 498 #endif