Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Introduce kfd_node struct (v5)

Introduce a new structure, kfd_node, which will now represent
a compute node. kfd_node is carved out of kfd_dev structure.
kfd_dev struct now will become the parent of kfd_node, and will
store common resources such as doorbells, GTT sub-alloctor etc.
kfd_node struct will store all resources specific to a compute
node, such as device queue manager, interrupt handling etc.

This is the first step in adding compute partition support in KFD.

v2: introduce kfd_node struct to gc v11 (Hawking)
v3: make reference to kfd_dev struct through kfd_node (Morris)
v4: use kfd_node instead for kfd isr/mqd functions (Morris)
v5: rebase (Alex)

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Tested-by: Amber Lin <Amber.Lin@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Morris Zhang <Shiwu.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Mukul Joshi and committed by
Alex Deucher
8dc1db31 5cf16755

+574 -496
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
··· 35 35 #include "amdgpu_dma_buf.h" 36 36 #include <uapi/linux/kfd_ioctl.h> 37 37 #include "amdgpu_xgmi.h" 38 + #include "kfd_priv.h" 38 39 #include "kfd_smi_events.h" 39 40 #include <drm/ttm/ttm_tt.h> 40 41
+2 -2
drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
··· 26 26 #include "amdgpu_amdkfd.h" 27 27 #include "kfd_smi_events.h" 28 28 29 - static bool cik_event_interrupt_isr(struct kfd_dev *dev, 29 + static bool cik_event_interrupt_isr(struct kfd_node *dev, 30 30 const uint32_t *ih_ring_entry, 31 31 uint32_t *patched_ihre, 32 32 bool *patched_flag) ··· 85 85 !amdgpu_no_queue_eviction_on_vm_fault); 86 86 } 87 87 88 - static void cik_event_interrupt_wq(struct kfd_dev *dev, 88 + static void cik_event_interrupt_wq(struct kfd_node *dev, 89 89 const uint32_t *ih_ring_entry) 90 90 { 91 91 const struct cik_ih_ring_entry *ihre =
+22 -21
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
··· 293 293 void *data) 294 294 { 295 295 struct kfd_ioctl_create_queue_args *args = data; 296 - struct kfd_dev *dev; 296 + struct kfd_node *dev; 297 297 int err = 0; 298 298 unsigned int queue_id; 299 299 struct kfd_process_device *pdd; ··· 328 328 } 329 329 330 330 if (!pdd->doorbell_index && 331 - kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) { 331 + kfd_alloc_process_doorbells(dev->kfd, &pdd->doorbell_index) < 0) { 332 332 err = -ENOMEM; 333 333 goto err_alloc_doorbells; 334 334 } ··· 336 336 /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work 337 337 * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) 338 338 */ 339 - if (dev->shared_resources.enable_mes && 339 + if (dev->kfd->shared_resources.enable_mes && 340 340 ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) 341 341 >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { 342 342 struct amdgpu_bo_va_mapping *wptr_mapping; ··· 887 887 { 888 888 struct kfd_ioctl_set_scratch_backing_va_args *args = data; 889 889 struct kfd_process_device *pdd; 890 - struct kfd_dev *dev; 890 + struct kfd_node *dev; 891 891 long err; 892 892 893 893 mutex_lock(&p->mutex); ··· 1006 1006 return ret; 1007 1007 } 1008 1008 1009 - bool kfd_dev_is_large_bar(struct kfd_dev *dev) 1009 + bool kfd_dev_is_large_bar(struct kfd_node *dev) 1010 1010 { 1011 1011 if (debug_largebar) { 1012 1012 pr_debug("Simulate large-bar allocation on non large-bar machine\n"); 1013 1013 return true; 1014 1014 } 1015 1015 1016 - if (dev->use_iommu_v2) 1016 + if (dev->kfd->use_iommu_v2) 1017 1017 return false; 1018 1018 1019 - if (dev->local_mem_info.local_mem_size_private == 0 && 1020 - dev->local_mem_info.local_mem_size_public > 0) 1019 + if (dev->kfd->local_mem_info.local_mem_size_private == 0 && 1020 + dev->kfd->local_mem_info.local_mem_size_public > 0) 1021 1021 return true; 1022 1022 return false; 1023 1023 } ··· 1041 1041 struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; 1042 1042 struct kfd_process_device *pdd; 1043 1043 void *mem; 1044 - struct kfd_dev *dev; 1044 + struct kfd_node *dev; 1045 1045 int idr_handle; 1046 1046 long err; 1047 1047 uint64_t offset = args->mmap_offset; ··· 1105 1105 } 1106 1106 1107 1107 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { 1108 - if (args->size != kfd_doorbell_process_slice(dev)) { 1108 + if (args->size != kfd_doorbell_process_slice(dev->kfd)) { 1109 1109 err = -EINVAL; 1110 1110 goto err_unlock; 1111 1111 } ··· 1231 1231 struct kfd_ioctl_map_memory_to_gpu_args *args = data; 1232 1232 struct kfd_process_device *pdd, *peer_pdd; 1233 1233 void *mem; 1234 - struct kfd_dev *dev; 1234 + struct kfd_node *dev; 1235 1235 long err = 0; 1236 1236 int i; 1237 1237 uint32_t *devices_arr = NULL; ··· 1405 1405 args->n_success = i+1; 1406 1406 } 1407 1407 1408 - flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev); 1408 + flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd); 1409 1409 if (flush_tlb) { 1410 1410 err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev, 1411 1411 (struct kgd_mem *) mem, true); ··· 1445 1445 int retval; 1446 1446 struct kfd_ioctl_alloc_queue_gws_args *args = data; 1447 1447 struct queue *q; 1448 - struct kfd_dev *dev; 1448 + struct kfd_node *dev; 1449 1449 1450 1450 mutex_lock(&p->mutex); 1451 1451 q = pqm_get_user_queue(&p->pqm, args->queue_id); ··· 1482 1482 struct kfd_process *p, void *data) 1483 1483 { 1484 1484 struct kfd_ioctl_get_dmabuf_info_args *args = data; 1485 - struct kfd_dev *dev = NULL; 1485 + struct kfd_node *dev = NULL; 1486 1486 struct amdgpu_device *dmabuf_adev; 1487 1487 void *metadata_buffer = NULL; 1488 1488 uint32_t flags; ··· 1596 1596 struct kfd_ioctl_export_dmabuf_args *args = data; 1597 1597 struct kfd_process_device *pdd; 1598 1598 struct dma_buf *dmabuf; 1599 - struct kfd_dev *dev; 1599 + struct kfd_node *dev; 1600 1600 void *mem; 1601 1601 int ret = 0; 1602 1602 ··· 2178 2178 } 2179 2179 2180 2180 for (i = 0; i < args->num_devices; i++) { 2181 - struct kfd_dev *dev; 2181 + struct kfd_node *dev; 2182 2182 struct kfd_process_device *pdd; 2183 2183 struct file *drm_file; 2184 2184 ··· 2240 2240 } 2241 2241 2242 2242 if (!pdd->doorbell_index && 2243 - kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) { 2243 + kfd_alloc_process_doorbells(pdd->dev->kfd, &pdd->doorbell_index) < 0) { 2244 2244 ret = -ENOMEM; 2245 2245 goto exit; 2246 2246 } ··· 2268 2268 u64 offset; 2269 2269 2270 2270 if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { 2271 - if (bo_bucket->size != kfd_doorbell_process_slice(pdd->dev)) 2271 + if (bo_bucket->size != 2272 + kfd_doorbell_process_slice(pdd->dev->kfd)) 2272 2273 return -EINVAL; 2273 2274 2274 2275 offset = kfd_get_process_doorbells(pdd); ··· 2351 2350 2352 2351 /* now map these BOs to GPU/s */ 2353 2352 for (j = 0; j < p->n_pdds; j++) { 2354 - struct kfd_dev *peer; 2353 + struct kfd_node *peer; 2355 2354 struct kfd_process_device *peer_pdd; 2356 2355 2357 2356 if (!bo_priv->mapped_gpuids[j]) ··· 2948 2947 return retcode; 2949 2948 } 2950 2949 2951 - static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, 2950 + static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process, 2952 2951 struct vm_area_struct *vma) 2953 2952 { 2954 2953 phys_addr_t address; ··· 2982 2981 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) 2983 2982 { 2984 2983 struct kfd_process *process; 2985 - struct kfd_dev *dev = NULL; 2984 + struct kfd_node *dev = NULL; 2986 2985 unsigned long mmap_offset; 2987 2986 unsigned int gpu_id; 2988 2987
+14 -14
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
··· 1405 1405 return i; 1406 1406 } 1407 1407 1408 - int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info) 1408 + int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info) 1409 1409 { 1410 1410 int num_of_cache_types = 0; 1411 1411 ··· 1524 1524 case IP_VERSION(11, 0, 3): 1525 1525 case IP_VERSION(11, 0, 4): 1526 1526 num_of_cache_types = 1527 - kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info); 1527 + kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); 1528 1528 break; 1529 1529 default: 1530 1530 *pcache_info = dummy_cache_info; ··· 1858 1858 } 1859 1859 1860 1860 static int kfd_fill_gpu_memory_affinity(int *avail_size, 1861 - struct kfd_dev *kdev, uint8_t type, uint64_t size, 1861 + struct kfd_node *kdev, uint8_t type, uint64_t size, 1862 1862 struct crat_subtype_memory *sub_type_hdr, 1863 1863 uint32_t proximity_domain, 1864 1864 const struct kfd_local_mem_info *local_mem_info) ··· 1887 1887 } 1888 1888 1889 1889 #ifdef CONFIG_ACPI_NUMA 1890 - static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) 1890 + static void kfd_find_numa_node_in_srat(struct kfd_node *kdev) 1891 1891 { 1892 1892 struct acpi_table_header *table_header = NULL; 1893 1893 struct acpi_subtable_header *sub_header = NULL; ··· 1982 1982 * Return 0 if successful else return -ve value 1983 1983 */ 1984 1984 static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, 1985 - struct kfd_dev *kdev, 1985 + struct kfd_node *kdev, 1986 1986 struct crat_subtype_iolink *sub_type_hdr, 1987 1987 uint32_t proximity_domain) 1988 1988 { ··· 2044 2044 } 2045 2045 2046 2046 static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, 2047 - struct kfd_dev *kdev, 2048 - struct kfd_dev *peer_kdev, 2047 + struct kfd_node *kdev, 2048 + struct kfd_node *peer_kdev, 2049 2049 struct crat_subtype_iolink *sub_type_hdr, 2050 2050 uint32_t proximity_domain_from, 2051 2051 uint32_t proximity_domain_to) ··· 2081 2081 * [OUT] actual size of data filled in crat_image 2082 2082 */ 2083 2083 static int kfd_create_vcrat_image_gpu(void *pcrat_image, 2084 - size_t *size, struct kfd_dev *kdev, 2084 + size_t *size, struct kfd_node *kdev, 2085 2085 uint32_t proximity_domain) 2086 2086 { 2087 2087 struct crat_header *crat_table = (struct crat_header *)pcrat_image; ··· 2153 2153 /* Check if this node supports IOMMU. During parsing this flag will 2154 2154 * translate to HSA_CAP_ATS_PRESENT 2155 2155 */ 2156 - if (!kfd_iommu_check_device(kdev)) 2156 + if (!kfd_iommu_check_device(kdev->kfd)) 2157 2157 cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; 2158 2158 2159 2159 crat_table->length += sub_type_hdr->length; ··· 2164 2164 * report the total FB size (public+private) as a single 2165 2165 * private heap. 2166 2166 */ 2167 - local_mem_info = kdev->local_mem_info; 2167 + local_mem_info = kdev->kfd->local_mem_info; 2168 2168 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 2169 2169 sub_type_hdr->length); 2170 2170 ··· 2216 2216 * (from other GPU to this GPU) will be added 2217 2217 * in kfd_parse_subtype_iolink. 2218 2218 */ 2219 - if (kdev->hive_id) { 2219 + if (kdev->kfd->hive_id) { 2220 2220 for (nid = 0; nid < proximity_domain; ++nid) { 2221 2221 peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid); 2222 2222 if (!peer_dev->gpu) 2223 2223 continue; 2224 - if (peer_dev->gpu->hive_id != kdev->hive_id) 2224 + if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id) 2225 2225 continue; 2226 2226 sub_type_hdr = (typeof(sub_type_hdr))( 2227 2227 (char *)sub_type_hdr + ··· 2255 2255 * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU 2256 2256 * -- this option is not currently implemented. 2257 2257 * The assumption is that all AMD APUs will have CRAT 2258 - * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU 2258 + * @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU 2259 2259 * 2260 2260 * Return 0 if successful else return -ve value 2261 2261 */ 2262 2262 int kfd_create_crat_image_virtual(void **crat_image, size_t *size, 2263 - int flags, struct kfd_dev *kdev, 2263 + int flags, struct kfd_node *kdev, 2264 2264 uint32_t proximity_domain) 2265 2265 { 2266 2266 void *pcrat_image = NULL;
+3 -3
drivers/gpu/drm/amd/amdkfd/kfd_crat.h
··· 293 293 294 294 #pragma pack() 295 295 296 - struct kfd_dev; 296 + struct kfd_node; 297 297 298 298 /* Static table to describe GPU Cache information */ 299 299 struct kfd_gpu_cache_info { ··· 305 305 */ 306 306 uint32_t num_cu_shared; 307 307 }; 308 - int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info); 308 + int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info); 309 309 310 310 int kfd_create_crat_image_acpi(void **crat_image, size_t *size); 311 311 void kfd_destroy_crat_image(void *crat_image); 312 312 int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, 313 313 uint32_t proximity_domain); 314 314 int kfd_create_crat_image_virtual(void **crat_image, size_t *size, 315 - int flags, struct kfd_dev *kdev, 315 + int flags, struct kfd_node *kdev, 316 316 uint32_t proximity_domain); 317 317 318 318 #endif /* KFD_CRAT_H_INCLUDED */
+1 -1
drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
··· 43 43 static ssize_t kfd_debugfs_hang_hws_write(struct file *file, 44 44 const char __user *user_buf, size_t size, loff_t *ppos) 45 45 { 46 - struct kfd_dev *dev; 46 + struct kfd_node *dev; 47 47 char tmp[16]; 48 48 uint32_t gpu_id; 49 49 int ret = -EINVAL;
+158 -102
drivers/gpu/drm/amd/amdkfd/kfd_device.c
··· 61 61 static void kfd_gtt_sa_fini(struct kfd_dev *kfd); 62 62 63 63 static int kfd_resume_iommu(struct kfd_dev *kfd); 64 - static int kfd_resume(struct kfd_dev *kfd); 64 + static int kfd_resume(struct kfd_node *kfd); 65 65 66 66 static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) 67 67 { ··· 441 441 memset(&kfd->doorbell_available_index, 0, 442 442 sizeof(kfd->doorbell_available_index)); 443 443 444 - atomic_set(&kfd->sram_ecc_flag, 0); 445 - 446 444 ida_init(&kfd->doorbell_ida); 447 445 448 446 return kfd; ··· 487 489 } 488 490 } 489 491 490 - static int kfd_gws_init(struct kfd_dev *kfd) 492 + static int kfd_gws_init(struct kfd_node *node) 491 493 { 492 494 int ret = 0; 495 + struct kfd_dev *kfd = node->kfd; 493 496 494 - if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) 497 + if (node->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) 495 498 return 0; 496 499 497 - if (hws_gws_support || (KFD_IS_SOC15(kfd) && 498 - ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 0, 1) 500 + if (hws_gws_support || (KFD_IS_SOC15(node) && 501 + ((KFD_GC_VERSION(node) == IP_VERSION(9, 0, 1) 499 502 && kfd->mec2_fw_version >= 0x81b3) || 500 - (KFD_GC_VERSION(kfd) <= IP_VERSION(9, 4, 0) 503 + (KFD_GC_VERSION(node) <= IP_VERSION(9, 4, 0) 501 504 && kfd->mec2_fw_version >= 0x1b3) || 502 - (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1) 505 + (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 1) 503 506 && kfd->mec2_fw_version >= 0x30) || 504 - (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) 507 + (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2) 505 508 && kfd->mec2_fw_version >= 0x28) || 506 - (KFD_GC_VERSION(kfd) >= IP_VERSION(10, 3, 0) 507 - && KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0) 509 + (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0) 510 + && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0) 508 511 && kfd->mec2_fw_version >= 0x6b)))) 509 - ret = amdgpu_amdkfd_alloc_gws(kfd->adev, 510 - kfd->adev->gds.gws_size, &kfd->gws); 512 + ret = amdgpu_amdkfd_alloc_gws(node->adev, 513 + node->adev->gds.gws_size, &node->gws); 511 514 512 515 return ret; 513 516 } 514 517 515 - static void kfd_smi_init(struct kfd_dev *dev) 518 + static void kfd_smi_init(struct kfd_node *dev) 516 519 { 517 520 INIT_LIST_HEAD(&dev->smi_clients); 518 521 spin_lock_init(&dev->smi_lock); 522 + } 523 + 524 + static int kfd_init_node(struct kfd_node *node) 525 + { 526 + int err = -1; 527 + 528 + if (kfd_interrupt_init(node)) { 529 + dev_err(kfd_device, "Error initializing interrupts\n"); 530 + goto kfd_interrupt_error; 531 + } 532 + 533 + node->dqm = device_queue_manager_init(node); 534 + if (!node->dqm) { 535 + dev_err(kfd_device, "Error initializing queue manager\n"); 536 + goto device_queue_manager_error; 537 + } 538 + 539 + if (kfd_gws_init(node)) { 540 + dev_err(kfd_device, "Could not allocate %d gws\n", 541 + node->adev->gds.gws_size); 542 + goto gws_error; 543 + } 544 + 545 + if (kfd_resume(node)) 546 + goto kfd_resume_error; 547 + 548 + if (kfd_topology_add_device(node)) { 549 + dev_err(kfd_device, "Error adding device to topology\n"); 550 + goto kfd_topology_add_device_error; 551 + } 552 + 553 + kfd_smi_init(node); 554 + 555 + return 0; 556 + 557 + kfd_topology_add_device_error: 558 + kfd_resume_error: 559 + gws_error: 560 + device_queue_manager_uninit(node->dqm); 561 + device_queue_manager_error: 562 + kfd_interrupt_exit(node); 563 + kfd_interrupt_error: 564 + if (node->gws) 565 + amdgpu_amdkfd_free_gws(node->adev, node->gws); 566 + 567 + /* Cleanup the node memory here */ 568 + kfree(node); 569 + return err; 570 + } 571 + 572 + static void kfd_cleanup_node(struct kfd_dev *kfd) 573 + { 574 + struct kfd_node *knode = kfd->node; 575 + 576 + device_queue_manager_uninit(knode->dqm); 577 + kfd_interrupt_exit(knode); 578 + kfd_topology_remove_device(knode); 579 + if (knode->gws) 580 + amdgpu_amdkfd_free_gws(knode->adev, knode->gws); 581 + kfree(knode); 582 + kfd->node = NULL; 519 583 } 520 584 521 585 bool kgd2kfd_device_init(struct kfd_dev *kfd, 522 586 const struct kgd2kfd_shared_resources *gpu_resources) 523 587 { 524 588 unsigned int size, map_process_packet_size; 589 + struct kfd_node *node; 590 + uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd; 591 + unsigned int max_proc_per_quantum; 525 592 526 593 kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, 527 594 KGD_ENGINE_MEC1); ··· 596 533 KGD_ENGINE_SDMA1); 597 534 kfd->shared_resources = *gpu_resources; 598 535 599 - kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; 600 - kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; 601 - kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd 602 - - kfd->vm_info.first_vmid_kfd + 1; 536 + first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; 537 + last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; 538 + vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1; 603 539 604 540 /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. 605 541 * 32 and 64-bit requests are possible and must be ··· 619 557 620 558 /* Verify module parameters regarding mapped process number*/ 621 559 if (hws_max_conc_proc >= 0) 622 - kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd); 560 + max_proc_per_quantum = min((u32)hws_max_conc_proc, vmid_num_kfd); 623 561 else 624 - kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; 562 + max_proc_per_quantum = vmid_num_kfd; 625 563 626 564 /* calculate max size of mqds needed for queues */ 627 565 size = max_num_of_queues_per_device * ··· 671 609 672 610 kfd->noretry = kfd->adev->gmc.noretry; 673 611 674 - if (kfd_interrupt_init(kfd)) { 675 - dev_err(kfd_device, "Error initializing interrupts\n"); 676 - goto kfd_interrupt_error; 677 - } 678 - 679 - kfd->dqm = device_queue_manager_init(kfd); 680 - if (!kfd->dqm) { 681 - dev_err(kfd_device, "Error initializing queue manager\n"); 682 - goto device_queue_manager_error; 683 - } 684 - 685 - /* If supported on this device, allocate global GWS that is shared 686 - * by all KFD processes 687 - */ 688 - if (kfd_gws_init(kfd)) { 689 - dev_err(kfd_device, "Could not allocate %d gws\n", 690 - kfd->adev->gds.gws_size); 691 - goto gws_error; 692 - } 693 - 694 612 /* If CRAT is broken, won't set iommu enabled */ 695 613 kfd_double_confirm_iommu_support(kfd); 696 614 ··· 684 642 685 643 svm_migrate_init(kfd->adev); 686 644 687 - if (kfd_resume_iommu(kfd)) 688 - goto device_iommu_error; 689 - 690 - if (kfd_resume(kfd)) 691 - goto kfd_resume_error; 692 - 693 - amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info); 694 - 695 - if (kfd_topology_add_device(kfd)) { 696 - dev_err(kfd_device, "Error adding device to topology\n"); 697 - goto kfd_topology_add_device_error; 645 + /* Allocate the KFD node */ 646 + node = kzalloc(sizeof(struct kfd_node), GFP_KERNEL); 647 + if (!node) { 648 + dev_err(kfd_device, "Error allocating KFD node\n"); 649 + goto node_alloc_error; 698 650 } 699 651 700 - kfd_smi_init(kfd); 652 + node->adev = kfd->adev; 653 + node->kfd = kfd; 654 + node->kfd2kgd = kfd->kfd2kgd; 655 + node->vm_info.vmid_num_kfd = vmid_num_kfd; 656 + node->vm_info.first_vmid_kfd = first_vmid_kfd; 657 + node->vm_info.last_vmid_kfd = last_vmid_kfd; 658 + node->max_proc_per_quantum = max_proc_per_quantum; 659 + atomic_set(&node->sram_ecc_flag, 0); 660 + 661 + /* Initialize the KFD node */ 662 + if (kfd_init_node(node)) { 663 + dev_err(kfd_device, "Error initializing KFD node\n"); 664 + goto node_init_error; 665 + } 666 + kfd->node = node; 667 + 668 + if (kfd_resume_iommu(kfd)) 669 + goto kfd_resume_iommu_error; 670 + 671 + amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info); 701 672 702 673 kfd->init_complete = true; 703 674 dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor, 704 675 kfd->adev->pdev->device); 705 676 706 677 pr_debug("Starting kfd with the following scheduling policy %d\n", 707 - kfd->dqm->sched_policy); 678 + node->dqm->sched_policy); 708 679 709 680 goto out; 710 681 711 - kfd_topology_add_device_error: 712 - kfd_resume_error: 682 + kfd_resume_iommu_error: 683 + kfd_cleanup_node(kfd); 684 + node_init_error: 685 + node_alloc_error: 713 686 device_iommu_error: 714 - gws_error: 715 - device_queue_manager_uninit(kfd->dqm); 716 - device_queue_manager_error: 717 - kfd_interrupt_exit(kfd); 718 - kfd_interrupt_error: 719 687 kfd_doorbell_fini(kfd); 720 688 kfd_doorbell_error: 721 689 kfd_gtt_sa_fini(kfd); 722 690 kfd_gtt_sa_init_error: 723 691 amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); 724 692 alloc_gtt_mem_failure: 725 - if (kfd->gws) 726 - amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); 727 693 dev_err(kfd_device, 728 694 "device %x:%x NOT added due to errors\n", 729 695 kfd->adev->pdev->vendor, kfd->adev->pdev->device); ··· 742 692 void kgd2kfd_device_exit(struct kfd_dev *kfd) 743 693 { 744 694 if (kfd->init_complete) { 745 - device_queue_manager_uninit(kfd->dqm); 746 - kfd_interrupt_exit(kfd); 747 - kfd_topology_remove_device(kfd); 695 + kfd_cleanup_node(kfd); 748 696 kfd_doorbell_fini(kfd); 749 697 ida_destroy(&kfd->doorbell_ida); 750 698 kfd_gtt_sa_fini(kfd); 751 699 amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); 752 - if (kfd->gws) 753 - amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); 754 700 } 755 701 756 702 kfree(kfd); ··· 754 708 755 709 int kgd2kfd_pre_reset(struct kfd_dev *kfd) 756 710 { 711 + struct kfd_node *node = kfd->node; 712 + 757 713 if (!kfd->init_complete) 758 714 return 0; 759 715 760 - kfd_smi_event_update_gpu_reset(kfd, false); 716 + kfd_smi_event_update_gpu_reset(node, false); 761 717 762 - kfd->dqm->ops.pre_reset(kfd->dqm); 718 + node->dqm->ops.pre_reset(node->dqm); 763 719 764 720 kgd2kfd_suspend(kfd, false); 765 721 766 - kfd_signal_reset_event(kfd); 722 + kfd_signal_reset_event(node); 767 723 return 0; 768 724 } 769 725 ··· 778 730 int kgd2kfd_post_reset(struct kfd_dev *kfd) 779 731 { 780 732 int ret; 733 + struct kfd_node *node = kfd->node; 781 734 782 735 if (!kfd->init_complete) 783 736 return 0; 784 737 785 - ret = kfd_resume(kfd); 738 + ret = kfd_resume(node); 786 739 if (ret) 787 740 return ret; 788 741 atomic_dec(&kfd_locked); 789 742 790 - atomic_set(&kfd->sram_ecc_flag, 0); 743 + atomic_set(&node->sram_ecc_flag, 0); 791 744 792 - kfd_smi_event_update_gpu_reset(kfd, true); 745 + kfd_smi_event_update_gpu_reset(node, true); 793 746 794 747 return 0; 795 748 } ··· 802 753 803 754 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) 804 755 { 756 + struct kfd_node *node = kfd->node; 757 + 805 758 if (!kfd->init_complete) 806 759 return; 807 760 ··· 814 763 kfd_suspend_all_processes(); 815 764 } 816 765 817 - kfd->dqm->ops.stop(kfd->dqm); 766 + node->dqm->ops.stop(node->dqm); 818 767 kfd_iommu_suspend(kfd); 819 768 } 820 769 821 770 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) 822 771 { 823 772 int ret, count; 773 + struct kfd_node *node = kfd->node; 824 774 825 775 if (!kfd->init_complete) 826 776 return 0; 827 777 828 - ret = kfd_resume(kfd); 778 + ret = kfd_resume(node); 829 779 if (ret) 830 780 return ret; 831 781 ··· 861 809 return err; 862 810 } 863 811 864 - static int kfd_resume(struct kfd_dev *kfd) 812 + static int kfd_resume(struct kfd_node *node) 865 813 { 866 814 int err = 0; 867 815 868 - err = kfd->dqm->ops.start(kfd->dqm); 816 + err = node->dqm->ops.start(node->dqm); 869 817 if (err) 870 818 dev_err(kfd_device, 871 819 "Error starting queue manager for device %x:%x\n", 872 - kfd->adev->pdev->vendor, kfd->adev->pdev->device); 820 + node->adev->pdev->vendor, node->adev->pdev->device); 873 821 874 822 return err; 875 823 } ··· 895 843 uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE]; 896 844 bool is_patched = false; 897 845 unsigned long flags; 846 + struct kfd_node *node = kfd->node; 898 847 899 848 if (!kfd->init_complete) 900 849 return; ··· 905 852 return; 906 853 } 907 854 908 - spin_lock_irqsave(&kfd->interrupt_lock, flags); 855 + spin_lock_irqsave(&node->interrupt_lock, flags); 909 856 910 - if (kfd->interrupts_active 911 - && interrupt_is_wanted(kfd, ih_ring_entry, 857 + if (node->interrupts_active 858 + && interrupt_is_wanted(node, ih_ring_entry, 912 859 patched_ihre, &is_patched) 913 - && enqueue_ih_ring_entry(kfd, 860 + && enqueue_ih_ring_entry(node, 914 861 is_patched ? patched_ihre : ih_ring_entry)) 915 - kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work); 862 + kfd_queue_work(node->ih_wq, &node->interrupt_work); 916 863 917 - spin_unlock_irqrestore(&kfd->interrupt_lock, flags); 864 + spin_unlock_irqrestore(&node->interrupt_lock, flags); 918 865 } 919 866 920 867 int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger) ··· 1052 999 return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size); 1053 1000 } 1054 1001 1055 - int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, 1002 + int kfd_gtt_sa_allocate(struct kfd_node *node, unsigned int size, 1056 1003 struct kfd_mem_obj **mem_obj) 1057 1004 { 1058 1005 unsigned int found, start_search, cur_size; 1006 + struct kfd_dev *kfd = node->kfd; 1059 1007 1060 1008 if (size == 0) 1061 1009 return -EINVAL; ··· 1156 1102 return -ENOMEM; 1157 1103 } 1158 1104 1159 - int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) 1105 + int kfd_gtt_sa_free(struct kfd_node *node, struct kfd_mem_obj *mem_obj) 1160 1106 { 1107 + struct kfd_dev *kfd = node->kfd; 1108 + 1161 1109 /* Act like kfree when trying to free a NULL object */ 1162 1110 if (!mem_obj) 1163 1111 return 0; ··· 1182 1126 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) 1183 1127 { 1184 1128 if (kfd) 1185 - atomic_inc(&kfd->sram_ecc_flag); 1129 + atomic_inc(&kfd->node->sram_ecc_flag); 1186 1130 } 1187 1131 1188 - void kfd_inc_compute_active(struct kfd_dev *kfd) 1132 + void kfd_inc_compute_active(struct kfd_node *node) 1189 1133 { 1190 - if (atomic_inc_return(&kfd->compute_profile) == 1) 1191 - amdgpu_amdkfd_set_compute_idle(kfd->adev, false); 1134 + if (atomic_inc_return(&node->kfd->compute_profile) == 1) 1135 + amdgpu_amdkfd_set_compute_idle(node->adev, false); 1192 1136 } 1193 1137 1194 - void kfd_dec_compute_active(struct kfd_dev *kfd) 1138 + void kfd_dec_compute_active(struct kfd_node *node) 1195 1139 { 1196 - int count = atomic_dec_return(&kfd->compute_profile); 1140 + int count = atomic_dec_return(&node->kfd->compute_profile); 1197 1141 1198 1142 if (count == 0) 1199 - amdgpu_amdkfd_set_compute_idle(kfd->adev, true); 1143 + amdgpu_amdkfd_set_compute_idle(node->adev, true); 1200 1144 WARN_ONCE(count < 0, "Compute profile ref. count error"); 1201 1145 } 1202 1146 1203 1147 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) 1204 1148 { 1205 1149 if (kfd && kfd->init_complete) 1206 - kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask); 1150 + kfd_smi_event_update_thermal_throttling(kfd->node, throttle_bitmask); 1207 1151 } 1208 1152 1209 1153 /* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and ··· 1211 1155 * When the device has more than two engines, we reserve two for PCIe to enable 1212 1156 * full-duplex and the rest are used as XGMI. 1213 1157 */ 1214 - unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev) 1158 + unsigned int kfd_get_num_sdma_engines(struct kfd_node *node) 1215 1159 { 1216 1160 /* If XGMI is not supported, all SDMA engines are PCIe */ 1217 - if (!kdev->adev->gmc.xgmi.supported) 1218 - return kdev->adev->sdma.num_instances; 1161 + if (!node->adev->gmc.xgmi.supported) 1162 + return node->adev->sdma.num_instances; 1219 1163 1220 - return min(kdev->adev->sdma.num_instances, 2); 1164 + return min(node->adev->sdma.num_instances, 2); 1221 1165 } 1222 1166 1223 - unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev) 1167 + unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *node) 1224 1168 { 1225 1169 /* After reserved for PCIe, the rest of engines are XGMI */ 1226 - return kdev->adev->sdma.num_instances - kfd_get_num_sdma_engines(kdev); 1170 + return node->adev->sdma.num_instances - kfd_get_num_sdma_engines(node); 1227 1171 } 1228 1172 1229 1173 #if defined(CONFIG_DEBUG_FS) ··· 1231 1175 /* This function will send a package to HIQ to hang the HWS 1232 1176 * which will trigger a GPU reset and bring the HWS back to normal state 1233 1177 */ 1234 - int kfd_debugfs_hang_hws(struct kfd_dev *dev) 1178 + int kfd_debugfs_hang_hws(struct kfd_node *dev) 1235 1179 { 1236 1180 if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) { 1237 1181 pr_err("HWS is not enabled");
+50 -50
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 74 74 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 75 75 { 76 76 int i; 77 - int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec 78 - + pipe) * dqm->dev->shared_resources.num_queue_per_pipe; 77 + int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec 78 + + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; 79 79 80 80 /* queue is available for KFD usage if bit is 1 */ 81 - for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 81 + for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) 82 82 if (test_bit(pipe_offset + i, 83 - dqm->dev->shared_resources.cp_queue_bitmap)) 83 + dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 84 84 return true; 85 85 return false; 86 86 } 87 87 88 88 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 89 89 { 90 - return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap, 90 + return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, 91 91 KGD_MAX_QUEUES); 92 92 } 93 93 94 94 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 95 95 { 96 - return dqm->dev->shared_resources.num_queue_per_pipe; 96 + return dqm->dev->kfd->shared_resources.num_queue_per_pipe; 97 97 } 98 98 99 99 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 100 100 { 101 - return dqm->dev->shared_resources.num_pipe_per_mec; 101 + return dqm->dev->kfd->shared_resources.num_pipe_per_mec; 102 102 } 103 103 104 104 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) ··· 110 110 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 111 111 { 112 112 return kfd_get_num_sdma_engines(dqm->dev) * 113 - dqm->dev->device_info.num_sdma_queues_per_engine; 113 + dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 114 114 } 115 115 116 116 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 117 117 { 118 118 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 119 - dqm->dev->device_info.num_sdma_queues_per_engine; 119 + dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 120 120 } 121 121 122 122 static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm) 123 123 { 124 - return dqm->dev->device_info.reserved_sdma_queues_bitmap; 124 + return dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap; 125 125 } 126 126 127 127 void program_sh_mem_settings(struct device_queue_manager *dqm, ··· 330 330 struct queue *q, 331 331 uint32_t const *restore_id) 332 332 { 333 - struct kfd_dev *dev = qpd->dqm->dev; 333 + struct kfd_node *dev = qpd->dqm->dev; 334 334 335 335 if (!KFD_IS_SOC15(dev)) { 336 336 /* On pre-SOC15 chips we need to use the queue ID to ··· 349 349 * for a SDMA engine is 512. 350 350 */ 351 351 352 - uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx; 352 + uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; 353 353 uint32_t valid_id = idx_offset[q->properties.sdma_engine_id] 354 354 + (q->properties.sdma_queue_id & 1) 355 355 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET ··· 382 382 } 383 383 384 384 q->properties.doorbell_off = 385 - kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd), 385 + kfd_get_doorbell_dw_offset_in_bar(dev->kfd, qpd_to_pdd(qpd), 386 386 q->doorbell_id); 387 387 return 0; 388 388 } ··· 391 391 struct queue *q) 392 392 { 393 393 unsigned int old; 394 - struct kfd_dev *dev = qpd->dqm->dev; 394 + struct kfd_node *dev = qpd->dqm->dev; 395 395 396 396 if (!KFD_IS_SOC15(dev) || 397 397 q->properties.type == KFD_QUEUE_TYPE_SDMA || ··· 441 441 442 442 program_sh_mem_settings(dqm, qpd); 443 443 444 - if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled) 444 + if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) 445 445 program_trap_handler_settings(dqm, qpd); 446 446 447 447 /* qpd->page_table_base is set earlier when register_process() ··· 460 460 return 0; 461 461 } 462 462 463 - static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, 463 + static int flush_texture_cache_nocpsch(struct kfd_node *kdev, 464 464 struct qcm_process_device *qpd) 465 465 { 466 466 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; ··· 661 661 #define SQ_IND_CMD_CMD_KILL 0x00000003 662 662 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 663 663 664 - static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) 664 + static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) 665 665 { 666 666 int status = 0; 667 667 unsigned int vmid; ··· 837 837 838 838 /* Make sure the queue is unmapped before updating the MQD */ 839 839 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 840 - if (!dqm->dev->shared_resources.enable_mes) 840 + if (!dqm->dev->kfd->shared_resources.enable_mes) 841 841 retval = unmap_queues_cpsch(dqm, 842 842 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); 843 843 else if (prev_active) ··· 858 858 } 859 859 860 860 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 861 - (dqm->dev->cwsr_enabled ? 861 + (dqm->dev->kfd->cwsr_enabled ? 862 862 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 863 863 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 864 864 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); ··· 895 895 } 896 896 897 897 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 898 - if (!dqm->dev->shared_resources.enable_mes) 898 + if (!dqm->dev->kfd->shared_resources.enable_mes) 899 899 retval = map_queues_cpsch(dqm); 900 900 else if (q->properties.is_active) 901 901 retval = add_queue_mes(dqm, q, &pdd->qpd); ··· 951 951 continue; 952 952 953 953 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 954 - (dqm->dev->cwsr_enabled ? 954 + (dqm->dev->kfd->cwsr_enabled ? 955 955 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 956 956 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 957 957 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); ··· 993 993 q->properties.is_active = false; 994 994 decrement_queue_count(dqm, qpd, q); 995 995 996 - if (dqm->dev->shared_resources.enable_mes) { 996 + if (dqm->dev->kfd->shared_resources.enable_mes) { 997 997 retval = remove_queue_mes(dqm, q, qpd); 998 998 if (retval) { 999 999 pr_err("Failed to evict queue %d\n", ··· 1003 1003 } 1004 1004 } 1005 1005 pdd->last_evict_timestamp = get_jiffies_64(); 1006 - if (!dqm->dev->shared_resources.enable_mes) 1006 + if (!dqm->dev->kfd->shared_resources.enable_mes) 1007 1007 retval = execute_queues_cpsch(dqm, 1008 1008 qpd->is_debug ? 1009 1009 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : ··· 1132 1132 q->properties.is_active = true; 1133 1133 increment_queue_count(dqm, &pdd->qpd, q); 1134 1134 1135 - if (dqm->dev->shared_resources.enable_mes) { 1135 + if (dqm->dev->kfd->shared_resources.enable_mes) { 1136 1136 retval = add_queue_mes(dqm, q, qpd); 1137 1137 if (retval) { 1138 1138 pr_err("Failed to restore queue %d\n", ··· 1141 1141 } 1142 1142 } 1143 1143 } 1144 - if (!dqm->dev->shared_resources.enable_mes) 1144 + if (!dqm->dev->kfd->shared_resources.enable_mes) 1145 1145 retval = execute_queues_cpsch(dqm, 1146 1146 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1147 1147 qpd->evicted = 0; ··· 1282 1282 1283 1283 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1284 1284 if (test_bit(pipe_offset + queue, 1285 - dqm->dev->shared_resources.cp_queue_bitmap)) 1285 + dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1286 1286 dqm->allocated_queues[pipe] |= 1 << queue; 1287 1287 } 1288 1288 ··· 1426 1426 int i, mec; 1427 1427 struct scheduling_resources res; 1428 1428 1429 - res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; 1429 + res.vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 1430 1430 1431 1431 res.queue_mask = 0; 1432 1432 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1433 - mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 1434 - / dqm->dev->shared_resources.num_pipe_per_mec; 1433 + mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 1434 + / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 1435 1435 1436 - if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap)) 1436 + if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1437 1437 continue; 1438 1438 1439 1439 /* only acquire queues from the first MEC */ ··· 1489 1489 1490 1490 dqm_lock(dqm); 1491 1491 1492 - if (!dqm->dev->shared_resources.enable_mes) { 1492 + if (!dqm->dev->kfd->shared_resources.enable_mes) { 1493 1493 retval = pm_init(&dqm->packet_mgr, dqm); 1494 1494 if (retval) 1495 1495 goto fail_packet_manager_init; ··· 1516 1516 dqm->is_hws_hang = false; 1517 1517 dqm->is_resetting = false; 1518 1518 dqm->sched_running = true; 1519 - if (!dqm->dev->shared_resources.enable_mes) 1519 + if (!dqm->dev->kfd->shared_resources.enable_mes) 1520 1520 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1521 1521 dqm_unlock(dqm); 1522 1522 1523 1523 return 0; 1524 1524 fail_allocate_vidmem: 1525 1525 fail_set_sched_resources: 1526 - if (!dqm->dev->shared_resources.enable_mes) 1526 + if (!dqm->dev->kfd->shared_resources.enable_mes) 1527 1527 pm_uninit(&dqm->packet_mgr, false); 1528 1528 fail_packet_manager_init: 1529 1529 dqm_unlock(dqm); ··· 1541 1541 } 1542 1542 1543 1543 if (!dqm->is_hws_hang) { 1544 - if (!dqm->dev->shared_resources.enable_mes) 1544 + if (!dqm->dev->kfd->shared_resources.enable_mes) 1545 1545 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); 1546 1546 else 1547 1547 remove_all_queues_mes(dqm); ··· 1550 1550 hanging = dqm->is_hws_hang || dqm->is_resetting; 1551 1551 dqm->sched_running = false; 1552 1552 1553 - if (!dqm->dev->shared_resources.enable_mes) 1553 + if (!dqm->dev->kfd->shared_resources.enable_mes) 1554 1554 pm_release_ib(&dqm->packet_mgr); 1555 1555 1556 1556 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1557 - if (!dqm->dev->shared_resources.enable_mes) 1557 + if (!dqm->dev->kfd->shared_resources.enable_mes) 1558 1558 pm_uninit(&dqm->packet_mgr, hanging); 1559 1559 dqm_unlock(dqm); 1560 1560 ··· 1673 1673 if (q->properties.is_active) { 1674 1674 increment_queue_count(dqm, qpd, q); 1675 1675 1676 - if (!dqm->dev->shared_resources.enable_mes) 1676 + if (!dqm->dev->kfd->shared_resources.enable_mes) 1677 1677 retval = execute_queues_cpsch(dqm, 1678 1678 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1679 1679 else ··· 1893 1893 list_del(&q->list); 1894 1894 qpd->queue_count--; 1895 1895 if (q->properties.is_active) { 1896 - if (!dqm->dev->shared_resources.enable_mes) { 1896 + if (!dqm->dev->kfd->shared_resources.enable_mes) { 1897 1897 decrement_queue_count(dqm, qpd, q); 1898 1898 retval = execute_queues_cpsch(dqm, 1899 1899 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); ··· 2056 2056 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2057 2057 2058 2058 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2059 - q->properties.is_active || !q->device->cwsr_enabled || 2059 + q->properties.is_active || !q->device->kfd->cwsr_enabled || 2060 2060 !mqd_mgr->get_wave_state) { 2061 2061 dqm_unlock(dqm); 2062 2062 return -EINVAL; ··· 2105 2105 2106 2106 dqm_lock(dqm); 2107 2107 2108 - if (q->properties.is_active || !q->device->cwsr_enabled) { 2108 + if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { 2109 2109 r = -EINVAL; 2110 2110 goto dqm_unlock; 2111 2111 } ··· 2158 2158 if (q->properties.is_active) { 2159 2159 decrement_queue_count(dqm, qpd, q); 2160 2160 2161 - if (dqm->dev->shared_resources.enable_mes) { 2161 + if (dqm->dev->kfd->shared_resources.enable_mes) { 2162 2162 retval = remove_queue_mes(dqm, q, qpd); 2163 2163 if (retval) 2164 2164 pr_err("Failed to remove queue %d\n", ··· 2180 2180 } 2181 2181 } 2182 2182 2183 - if (!dqm->dev->shared_resources.enable_mes) 2183 + if (!dqm->dev->kfd->shared_resources.enable_mes) 2184 2184 retval = execute_queues_cpsch(dqm, filter, 0); 2185 2185 2186 2186 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { ··· 2242 2242 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2243 2243 { 2244 2244 int retval; 2245 - struct kfd_dev *dev = dqm->dev; 2245 + struct kfd_node *dev = dqm->dev; 2246 2246 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2247 2247 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2248 2248 get_num_all_sdma_engines(dqm) * 2249 - dev->device_info.num_sdma_queues_per_engine + 2249 + dev->kfd->device_info.num_sdma_queues_per_engine + 2250 2250 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; 2251 2251 2252 2252 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, ··· 2256 2256 return retval; 2257 2257 } 2258 2258 2259 - struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 2259 + struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) 2260 2260 { 2261 2261 struct device_queue_manager *dqm; 2262 2262 ··· 2373 2373 if (init_mqd_managers(dqm)) 2374 2374 goto out_free; 2375 2375 2376 - if (!dev->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 2376 + if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 2377 2377 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); 2378 2378 goto out_free; 2379 2379 } ··· 2386 2386 return NULL; 2387 2387 } 2388 2388 2389 - static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, 2389 + static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, 2390 2390 struct kfd_mem_obj *mqd) 2391 2391 { 2392 2392 WARN(!mqd, "No hiq sdma mqd trunk to free"); ··· 2397 2397 void device_queue_manager_uninit(struct device_queue_manager *dqm) 2398 2398 { 2399 2399 dqm->ops.uninitialize(dqm); 2400 - if (!dqm->dev->shared_resources.enable_mes) 2400 + if (!dqm->dev->kfd->shared_resources.enable_mes) 2401 2401 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 2402 2402 kfree(dqm); 2403 2403 } ··· 2479 2479 2480 2480 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 2481 2481 if (!test_bit(pipe_offset + queue, 2482 - dqm->dev->shared_resources.cp_queue_bitmap)) 2482 + dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 2483 2483 continue; 2484 2484 2485 2485 r = dqm->dev->kfd2kgd->hqd_dump( ··· 2497 2497 2498 2498 for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) { 2499 2499 for (queue = 0; 2500 - queue < dqm->dev->device_info.num_sdma_queues_per_engine; 2500 + queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 2501 2501 queue++) { 2502 2502 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 2503 2503 dqm->dev->adev, pipe, queue, &dump, &n_regs);
+2 -2
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
··· 207 207 struct queue *q, 208 208 struct qcm_process_device *qpd); 209 209 struct mqd_manager * (*mqd_manager_init)(enum KFD_MQD_TYPE type, 210 - struct kfd_dev *dev); 210 + struct kfd_node *dev); 211 211 }; 212 212 213 213 /** ··· 228 228 229 229 struct mqd_manager *mqd_mgrs[KFD_MQD_TYPE_MAX]; 230 230 struct packet_manager packet_mgr; 231 - struct kfd_dev *dev; 231 + struct kfd_node *dev; 232 232 struct mutex lock_hidden; /* use dqm_lock/unlock(dqm) */ 233 233 struct list_head queues; 234 234 unsigned int saved_flags;
+2 -2
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
··· 60 60 qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED << 61 61 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 62 62 63 - if (dqm->dev->noretry && !dqm->dev->use_iommu_v2) 63 + if (dqm->dev->kfd->noretry && !dqm->dev->kfd->use_iommu_v2) 64 64 qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; 65 65 66 - if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3)) 66 + if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3)) 67 67 qpd->sh_mem_config |= 68 68 (1 << SH_MEM_CONFIG__F8_MODE__SHIFT); 69 69
+7 -7
drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
··· 138 138 iounmap(kfd->doorbell_kernel_ptr); 139 139 } 140 140 141 - int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, 141 + int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process, 142 142 struct vm_area_struct *vma) 143 143 { 144 144 phys_addr_t address; ··· 148 148 * For simplicitly we only allow mapping of the entire doorbell 149 149 * allocation of a single device & process. 150 150 */ 151 - if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev)) 151 + if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev->kfd)) 152 152 return -EINVAL; 153 153 154 154 pdd = kfd_get_process_device_data(dev, process); ··· 170 170 " vm_flags == 0x%04lX\n" 171 171 " size == 0x%04lX\n", 172 172 (unsigned long long) vma->vm_start, address, vma->vm_flags, 173 - kfd_doorbell_process_slice(dev)); 173 + kfd_doorbell_process_slice(dev->kfd)); 174 174 175 175 176 176 return io_remap_pfn_range(vma, 177 177 vma->vm_start, 178 178 address >> PAGE_SHIFT, 179 - kfd_doorbell_process_slice(dev), 179 + kfd_doorbell_process_slice(dev->kfd), 180 180 vma->vm_page_prot); 181 181 } 182 182 ··· 278 278 phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd) 279 279 { 280 280 if (!pdd->doorbell_index) { 281 - int r = kfd_alloc_process_doorbells(pdd->dev, 281 + int r = kfd_alloc_process_doorbells(pdd->dev->kfd, 282 282 &pdd->doorbell_index); 283 283 if (r < 0) 284 284 return 0; 285 285 } 286 286 287 - return pdd->dev->doorbell_base + 288 - pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev); 287 + return pdd->dev->kfd->doorbell_base + 288 + pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev->kfd); 289 289 } 290 290 291 291 int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
+6 -6
drivers/gpu/drm/amd/amdkfd/kfd_events.c
··· 348 348 349 349 int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset) 350 350 { 351 - struct kfd_dev *kfd; 351 + struct kfd_node *kfd; 352 352 struct kfd_process_device *pdd; 353 353 void *mem, *kern_addr; 354 354 uint64_t size; ··· 1125 1125 } 1126 1126 1127 1127 #ifdef KFD_SUPPORT_IOMMU_V2 1128 - void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid, 1128 + void kfd_signal_iommu_event(struct kfd_node *dev, u32 pasid, 1129 1129 unsigned long address, bool is_write_requested, 1130 1130 bool is_execute_requested) 1131 1131 { ··· 1221 1221 kfd_unref_process(p); 1222 1222 } 1223 1223 1224 - void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid, 1225 - struct kfd_vm_fault_info *info) 1224 + void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid, 1225 + struct kfd_vm_fault_info *info) 1226 1226 { 1227 1227 struct kfd_event *ev; 1228 1228 uint32_t id; ··· 1269 1269 kfd_unref_process(p); 1270 1270 } 1271 1271 1272 - void kfd_signal_reset_event(struct kfd_dev *dev) 1272 + void kfd_signal_reset_event(struct kfd_node *dev) 1273 1273 { 1274 1274 struct kfd_hsa_hw_exception_data hw_exception_data; 1275 1275 struct kfd_hsa_memory_exception_data memory_exception_data; ··· 1325 1325 srcu_read_unlock(&kfd_processes_srcu, idx); 1326 1326 } 1327 1327 1328 - void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid) 1328 + void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid) 1329 1329 { 1330 1330 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 1331 1331 struct kfd_hsa_memory_exception_data memory_exception_data;
+6 -6
drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
··· 322 322 pdd->lds_base = MAKE_LDS_APP_BASE_VI(); 323 323 pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); 324 324 325 - if (!pdd->dev->use_iommu_v2) { 325 + if (!pdd->dev->kfd->use_iommu_v2) { 326 326 /* dGPUs: SVM aperture starting at 0 327 327 * with small reserved space for kernel. 328 328 * Set them to CANONICAL addresses. 329 329 */ 330 330 pdd->gpuvm_base = SVM_USER_BASE; 331 331 pdd->gpuvm_limit = 332 - pdd->dev->shared_resources.gpuvm_size - 1; 332 + pdd->dev->kfd->shared_resources.gpuvm_size - 1; 333 333 } else { 334 334 /* set them to non CANONICAL addresses, and no SVM is 335 335 * allocated. 336 336 */ 337 337 pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1); 338 338 pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base, 339 - pdd->dev->shared_resources.gpuvm_size); 339 + pdd->dev->kfd->shared_resources.gpuvm_size); 340 340 } 341 341 342 342 pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI(); ··· 356 356 */ 357 357 pdd->gpuvm_base = SVM_USER_BASE; 358 358 pdd->gpuvm_limit = 359 - pdd->dev->shared_resources.gpuvm_size - 1; 359 + pdd->dev->kfd->shared_resources.gpuvm_size - 1; 360 360 361 361 pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9(); 362 362 pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); ··· 365 365 int kfd_init_apertures(struct kfd_process *process) 366 366 { 367 367 uint8_t id = 0; 368 - struct kfd_dev *dev; 368 + struct kfd_node *dev; 369 369 struct kfd_process_device *pdd; 370 370 371 371 /*Iterating over all devices*/ ··· 417 417 } 418 418 } 419 419 420 - if (!dev->use_iommu_v2) { 420 + if (!dev->kfd->use_iommu_v2) { 421 421 /* dGPUs: the reserved space for kernel 422 422 * before SVM 423 423 */
+3 -3
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
··· 187 187 REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, WGP_ID)); 188 188 } 189 189 190 - static void event_interrupt_poison_consumption_v11(struct kfd_dev *dev, 190 + static void event_interrupt_poison_consumption_v11(struct kfd_node *dev, 191 191 uint16_t pasid, uint16_t source_id) 192 192 { 193 193 int ret = -EINVAL; ··· 225 225 amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true); 226 226 } 227 227 228 - static bool event_interrupt_isr_v11(struct kfd_dev *dev, 228 + static bool event_interrupt_isr_v11(struct kfd_node *dev, 229 229 const uint32_t *ih_ring_entry, 230 230 uint32_t *patched_ihre, 231 231 bool *patched_flag) ··· 274 274 !amdgpu_no_queue_eviction_on_vm_fault); 275 275 } 276 276 277 - static void event_interrupt_wq_v11(struct kfd_dev *dev, 277 + static void event_interrupt_wq_v11(struct kfd_node *dev, 278 278 const uint32_t *ih_ring_entry) 279 279 { 280 280 uint16_t source_id, client_id, ring_id, pasid, vmid;
+5 -5
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
··· 90 90 #define KFD_SQ_INT_DATA__ERR_TYPE_MASK 0xF00000 91 91 #define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20 92 92 93 - static void event_interrupt_poison_consumption_v9(struct kfd_dev *dev, 93 + static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, 94 94 uint16_t pasid, uint16_t client_id) 95 95 { 96 96 int old_poison, ret = -EINVAL; ··· 160 160 } 161 161 } 162 162 163 - static bool event_interrupt_isr_v9(struct kfd_dev *dev, 163 + static bool event_interrupt_isr_v9(struct kfd_node *dev, 164 164 const uint32_t *ih_ring_entry, 165 165 uint32_t *patched_ihre, 166 166 bool *patched_flag) ··· 206 206 207 207 *patched_flag = true; 208 208 memcpy(patched_ihre, ih_ring_entry, 209 - dev->device_info.ih_ring_entry_size); 209 + dev->kfd->device_info.ih_ring_entry_size); 210 210 211 211 pasid = dev->dqm->vmid_pasid[vmid]; 212 212 ··· 235 235 uint32_t context_id = 236 236 SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); 237 237 238 - if (context_id == 0 && context_id_expected(dev)) 238 + if (context_id == 0 && context_id_expected(dev->kfd)) 239 239 return false; 240 240 } 241 241 ··· 253 253 !amdgpu_no_queue_eviction_on_vm_fault); 254 254 } 255 255 256 - static void event_interrupt_wq_v9(struct kfd_dev *dev, 256 + static void event_interrupt_wq_v9(struct kfd_node *dev, 257 257 const uint32_t *ih_ring_entry) 258 258 { 259 259 uint16_t source_id, client_id, pasid, vmid;
+32 -32
drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
··· 50 50 51 51 static void interrupt_wq(struct work_struct *); 52 52 53 - int kfd_interrupt_init(struct kfd_dev *kfd) 53 + int kfd_interrupt_init(struct kfd_node *node) 54 54 { 55 55 int r; 56 56 57 - r = kfifo_alloc(&kfd->ih_fifo, 58 - KFD_IH_NUM_ENTRIES * kfd->device_info.ih_ring_entry_size, 57 + r = kfifo_alloc(&node->ih_fifo, 58 + KFD_IH_NUM_ENTRIES * node->kfd->device_info.ih_ring_entry_size, 59 59 GFP_KERNEL); 60 60 if (r) { 61 - dev_err(kfd->adev->dev, "Failed to allocate IH fifo\n"); 61 + dev_err(node->adev->dev, "Failed to allocate IH fifo\n"); 62 62 return r; 63 63 } 64 64 65 - kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1); 66 - if (unlikely(!kfd->ih_wq)) { 67 - kfifo_free(&kfd->ih_fifo); 68 - dev_err(kfd->adev->dev, "Failed to allocate KFD IH workqueue\n"); 65 + node->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1); 66 + if (unlikely(!node->ih_wq)) { 67 + kfifo_free(&node->ih_fifo); 68 + dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n"); 69 69 return -ENOMEM; 70 70 } 71 - spin_lock_init(&kfd->interrupt_lock); 71 + spin_lock_init(&node->interrupt_lock); 72 72 73 - INIT_WORK(&kfd->interrupt_work, interrupt_wq); 73 + INIT_WORK(&node->interrupt_work, interrupt_wq); 74 74 75 - kfd->interrupts_active = true; 75 + node->interrupts_active = true; 76 76 77 77 /* 78 78 * After this function returns, the interrupt will be enabled. This ··· 84 84 return 0; 85 85 } 86 86 87 - void kfd_interrupt_exit(struct kfd_dev *kfd) 87 + void kfd_interrupt_exit(struct kfd_node *node) 88 88 { 89 89 /* 90 90 * Stop the interrupt handler from writing to the ring and scheduling ··· 93 93 */ 94 94 unsigned long flags; 95 95 96 - spin_lock_irqsave(&kfd->interrupt_lock, flags); 97 - kfd->interrupts_active = false; 98 - spin_unlock_irqrestore(&kfd->interrupt_lock, flags); 96 + spin_lock_irqsave(&node->interrupt_lock, flags); 97 + node->interrupts_active = false; 98 + spin_unlock_irqrestore(&node->interrupt_lock, flags); 99 99 100 100 /* 101 101 * flush_work ensures that there are no outstanding 102 102 * work-queue items that will access interrupt_ring. New work items 103 103 * can't be created because we stopped interrupt handling above. 104 104 */ 105 - flush_workqueue(kfd->ih_wq); 105 + flush_workqueue(node->ih_wq); 106 106 107 - kfifo_free(&kfd->ih_fifo); 107 + kfifo_free(&node->ih_fifo); 108 108 } 109 109 110 110 /* 111 111 * Assumption: single reader/writer. This function is not re-entrant 112 112 */ 113 - bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) 113 + bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry) 114 114 { 115 115 int count; 116 116 117 - count = kfifo_in(&kfd->ih_fifo, ih_ring_entry, 118 - kfd->device_info.ih_ring_entry_size); 119 - if (count != kfd->device_info.ih_ring_entry_size) { 120 - dev_dbg_ratelimited(kfd->adev->dev, 117 + count = kfifo_in(&node->ih_fifo, ih_ring_entry, 118 + node->kfd->device_info.ih_ring_entry_size); 119 + if (count != node->kfd->device_info.ih_ring_entry_size) { 120 + dev_dbg_ratelimited(node->adev->dev, 121 121 "Interrupt ring overflow, dropping interrupt %d\n", 122 122 count); 123 123 return false; ··· 129 129 /* 130 130 * Assumption: single reader/writer. This function is not re-entrant 131 131 */ 132 - static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) 132 + static bool dequeue_ih_ring_entry(struct kfd_node *node, void *ih_ring_entry) 133 133 { 134 134 int count; 135 135 136 - count = kfifo_out(&kfd->ih_fifo, ih_ring_entry, 137 - kfd->device_info.ih_ring_entry_size); 136 + count = kfifo_out(&node->ih_fifo, ih_ring_entry, 137 + node->kfd->device_info.ih_ring_entry_size); 138 138 139 - WARN_ON(count && count != kfd->device_info.ih_ring_entry_size); 139 + WARN_ON(count && count != node->kfd->device_info.ih_ring_entry_size); 140 140 141 - return count == kfd->device_info.ih_ring_entry_size; 141 + return count == node->kfd->device_info.ih_ring_entry_size; 142 142 } 143 143 144 144 static void interrupt_wq(struct work_struct *work) 145 145 { 146 - struct kfd_dev *dev = container_of(work, struct kfd_dev, 146 + struct kfd_node *dev = container_of(work, struct kfd_node, 147 147 interrupt_work); 148 148 uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; 149 149 unsigned long start_jiffies = jiffies; 150 150 151 - if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) { 151 + if (dev->kfd->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) { 152 152 dev_err_once(dev->adev->dev, "Ring entry too small\n"); 153 153 return; 154 154 } 155 155 156 156 while (dequeue_ih_ring_entry(dev, ih_ring_entry)) { 157 - dev->device_info.event_interrupt_class->interrupt_wq(dev, 157 + dev->kfd->device_info.event_interrupt_class->interrupt_wq(dev, 158 158 ih_ring_entry); 159 159 if (time_is_before_jiffies(start_jiffies + HZ)) { 160 160 /* If we spent more than a second processing signals, ··· 166 166 } 167 167 } 168 168 169 - bool interrupt_is_wanted(struct kfd_dev *dev, 169 + bool interrupt_is_wanted(struct kfd_node *dev, 170 170 const uint32_t *ih_ring_entry, 171 171 uint32_t *patched_ihre, bool *flag) 172 172 { 173 173 /* integer and bitwise OR so there is no boolean short-circuiting */ 174 174 unsigned int wanted = 0; 175 175 176 - wanted |= dev->device_info.event_interrupt_class->interrupt_isr(dev, 176 + wanted |= dev->kfd->device_info.event_interrupt_class->interrupt_isr(dev, 177 177 ih_ring_entry, patched_ihre, flag); 178 178 179 179 return wanted != 0;
+11 -11
drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
··· 109 109 */ 110 110 int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd) 111 111 { 112 - struct kfd_dev *dev = pdd->dev; 112 + struct kfd_node *dev = pdd->dev; 113 113 struct kfd_process *p = pdd->process; 114 114 int err; 115 115 116 - if (!dev->use_iommu_v2 || pdd->bound == PDD_BOUND) 116 + if (!dev->kfd->use_iommu_v2 || pdd->bound == PDD_BOUND) 117 117 return 0; 118 118 119 119 if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) { ··· 146 146 /* Callback for process shutdown invoked by the IOMMU driver */ 147 147 static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, u32 pasid) 148 148 { 149 - struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); 149 + struct kfd_node *dev = kfd_device_by_pci_dev(pdev); 150 150 struct kfd_process *p; 151 151 struct kfd_process_device *pdd; 152 152 ··· 182 182 static int iommu_invalid_ppr_cb(struct pci_dev *pdev, u32 pasid, 183 183 unsigned long address, u16 flags) 184 184 { 185 - struct kfd_dev *dev; 185 + struct kfd_node *dev; 186 186 187 187 dev_warn_ratelimited(kfd_device, 188 188 "Invalid PPR device %x:%x.%x pasid 0x%x address 0x%lX flags 0x%X", ··· 205 205 * Bind processes do the device that have been temporarily unbound 206 206 * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device. 207 207 */ 208 - static int kfd_bind_processes_to_device(struct kfd_dev *kfd) 208 + static int kfd_bind_processes_to_device(struct kfd_node *knode) 209 209 { 210 210 struct kfd_process_device *pdd; 211 211 struct kfd_process *p; ··· 216 216 217 217 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 218 218 mutex_lock(&p->mutex); 219 - pdd = kfd_get_process_device_data(kfd, p); 219 + pdd = kfd_get_process_device_data(knode, p); 220 220 221 221 if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) { 222 222 mutex_unlock(&p->mutex); 223 223 continue; 224 224 } 225 225 226 - err = amd_iommu_bind_pasid(kfd->adev->pdev, p->pasid, 226 + err = amd_iommu_bind_pasid(knode->adev->pdev, p->pasid, 227 227 p->lead_thread); 228 228 if (err < 0) { 229 229 pr_err("Unexpected pasid 0x%x binding failure\n", ··· 246 246 * processes will be restored to PDD_BOUND state in 247 247 * kfd_bind_processes_to_device. 248 248 */ 249 - static void kfd_unbind_processes_from_device(struct kfd_dev *kfd) 249 + static void kfd_unbind_processes_from_device(struct kfd_node *knode) 250 250 { 251 251 struct kfd_process_device *pdd; 252 252 struct kfd_process *p; ··· 256 256 257 257 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 258 258 mutex_lock(&p->mutex); 259 - pdd = kfd_get_process_device_data(kfd, p); 259 + pdd = kfd_get_process_device_data(knode, p); 260 260 261 261 if (WARN_ON(!pdd)) { 262 262 mutex_unlock(&p->mutex); ··· 281 281 if (!kfd->use_iommu_v2) 282 282 return; 283 283 284 - kfd_unbind_processes_from_device(kfd); 284 + kfd_unbind_processes_from_device(kfd->node); 285 285 286 286 amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL); 287 287 amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL); ··· 312 312 amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, 313 313 iommu_invalid_ppr_cb); 314 314 315 - err = kfd_bind_processes_to_device(kfd); 315 + err = kfd_bind_processes_to_device(kfd->node); 316 316 if (err) { 317 317 amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL); 318 318 amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL);
+9 -9
drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
··· 38 38 /* Initialize a kernel queue, including allocations of GART memory 39 39 * needed for the queue. 40 40 */ 41 - static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev, 41 + static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev, 42 42 enum kfd_queue_type type, unsigned int queue_size) 43 43 { 44 44 struct queue_properties prop; ··· 75 75 if (!kq->mqd_mgr) 76 76 return false; 77 77 78 - prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off); 78 + prop.doorbell_ptr = kfd_get_kernel_doorbell(dev->kfd, &prop.doorbell_off); 79 79 80 80 if (!prop.doorbell_ptr) { 81 81 pr_err("Failed to initialize doorbell"); ··· 112 112 kq->rptr_kernel = kq->rptr_mem->cpu_ptr; 113 113 kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; 114 114 115 - retval = kfd_gtt_sa_allocate(dev, dev->device_info.doorbell_size, 115 + retval = kfd_gtt_sa_allocate(dev, dev->kfd->device_info.doorbell_size, 116 116 &kq->wptr_mem); 117 117 118 118 if (retval != 0) ··· 189 189 err_eop_allocate_vidmem: 190 190 kfd_gtt_sa_free(dev, kq->pq); 191 191 err_pq_allocate_vidmem: 192 - kfd_release_kernel_doorbell(dev, prop.doorbell_ptr); 192 + kfd_release_kernel_doorbell(dev->kfd, prop.doorbell_ptr); 193 193 err_get_kernel_doorbell: 194 194 return false; 195 195 ··· 220 220 kfd_gtt_sa_free(kq->dev, kq->eop_mem); 221 221 222 222 kfd_gtt_sa_free(kq->dev, kq->pq); 223 - kfd_release_kernel_doorbell(kq->dev, 223 + kfd_release_kernel_doorbell(kq->dev->kfd, 224 224 kq->queue->properties.doorbell_ptr); 225 225 uninit_queue(kq->queue); 226 226 } ··· 298 298 } 299 299 pr_debug("\n"); 300 300 #endif 301 - if (kq->dev->device_info.doorbell_size == 8) { 301 + if (kq->dev->kfd->device_info.doorbell_size == 8) { 302 302 *kq->wptr64_kernel = kq->pending_wptr64; 303 303 write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, 304 304 kq->pending_wptr64); ··· 311 311 312 312 void kq_rollback_packet(struct kernel_queue *kq) 313 313 { 314 - if (kq->dev->device_info.doorbell_size == 8) { 314 + if (kq->dev->kfd->device_info.doorbell_size == 8) { 315 315 kq->pending_wptr64 = *kq->wptr64_kernel; 316 316 kq->pending_wptr = *kq->wptr_kernel % 317 317 (kq->queue->properties.queue_size / 4); ··· 320 320 } 321 321 } 322 322 323 - struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, 323 + struct kernel_queue *kernel_queue_init(struct kfd_node *dev, 324 324 enum kfd_queue_type type) 325 325 { 326 326 struct kernel_queue *kq; ··· 345 345 } 346 346 347 347 /* FIXME: Can this test be removed? */ 348 - static __attribute__((unused)) void test_kq(struct kfd_dev *dev) 348 + static __attribute__((unused)) void test_kq(struct kfd_node *dev) 349 349 { 350 350 struct kernel_queue *kq; 351 351 uint32_t *buffer, i;
+1 -1
drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
··· 53 53 54 54 struct kernel_queue { 55 55 /* data */ 56 - struct kfd_dev *dev; 56 + struct kfd_node *dev; 57 57 struct mqd_manager *mqd_mgr; 58 58 struct queue *queue; 59 59 uint64_t pending_wptr64;
+4 -4
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
··· 423 423 424 424 kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid, 425 425 start >> PAGE_SHIFT, end >> PAGE_SHIFT, 426 - 0, adev->kfd.dev->id, prange->prefetch_loc, 426 + 0, adev->kfd.dev->node->id, prange->prefetch_loc, 427 427 prange->preferred_loc, trigger); 428 428 429 429 r = migrate_vma_setup(&migrate); ··· 456 456 457 457 kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid, 458 458 start >> PAGE_SHIFT, end >> PAGE_SHIFT, 459 - 0, adev->kfd.dev->id, trigger); 459 + 0, adev->kfd.dev->node->id, trigger); 460 460 461 461 svm_range_dma_unmap(adev->dev, scratch, 0, npages); 462 462 svm_range_free_dma_mappings(prange); ··· 701 701 702 702 kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid, 703 703 start >> PAGE_SHIFT, end >> PAGE_SHIFT, 704 - adev->kfd.dev->id, 0, prange->prefetch_loc, 704 + adev->kfd.dev->node->id, 0, prange->prefetch_loc, 705 705 prange->preferred_loc, trigger); 706 706 707 707 r = migrate_vma_setup(&migrate); ··· 737 737 738 738 kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid, 739 739 start >> PAGE_SHIFT, end >> PAGE_SHIFT, 740 - adev->kfd.dev->id, 0, trigger); 740 + adev->kfd.dev->node->id, 0, trigger); 741 741 742 742 svm_range_dma_unmap(adev->dev, scratch, 0, npages); 743 743
+3 -3
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
··· 46 46 KFD_PIPE_PRIORITY_CS_HIGH 47 47 }; 48 48 49 - struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev, struct queue_properties *q) 49 + struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_node *dev, struct queue_properties *q) 50 50 { 51 51 struct kfd_mem_obj *mqd_mem_obj = NULL; 52 52 ··· 61 61 return mqd_mem_obj; 62 62 } 63 63 64 - struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev, 64 + struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_node *dev, 65 65 struct queue_properties *q) 66 66 { 67 67 struct kfd_mem_obj *mqd_mem_obj = NULL; ··· 72 72 return NULL; 73 73 74 74 offset = (q->sdma_engine_id * 75 - dev->device_info.num_sdma_queues_per_engine + 75 + dev->kfd->device_info.num_sdma_queues_per_engine + 76 76 q->sdma_queue_id) * 77 77 dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size; 78 78
+4 -4
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
··· 68 68 */ 69 69 extern int pipe_priority_map[]; 70 70 struct mqd_manager { 71 - struct kfd_mem_obj* (*allocate_mqd)(struct kfd_dev *kfd, 71 + struct kfd_mem_obj* (*allocate_mqd)(struct kfd_node *kfd, 72 72 struct queue_properties *q); 73 73 74 74 void (*init_mqd)(struct mqd_manager *mm, void **mqd, ··· 121 121 uint32_t (*read_doorbell_id)(void *mqd); 122 122 123 123 struct mutex mqd_mutex; 124 - struct kfd_dev *dev; 124 + struct kfd_node *dev; 125 125 uint32_t mqd_size; 126 126 }; 127 127 128 - struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev, 128 + struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_node *dev, 129 129 struct queue_properties *q); 130 130 131 - struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev, 131 + struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_node *dev, 132 132 struct queue_properties *q); 133 133 void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd, 134 134 struct kfd_mem_obj *mqd_mem_obj);
+3 -3
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
··· 74 74 m->cp_hqd_queue_priority = q->priority; 75 75 } 76 76 77 - static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, 77 + static struct kfd_mem_obj *allocate_mqd(struct kfd_node *kfd, 78 78 struct queue_properties *q) 79 79 { 80 80 struct kfd_mem_obj *mqd_mem_obj; ··· 390 390 391 391 392 392 struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, 393 - struct kfd_dev *dev) 393 + struct kfd_node *dev) 394 394 { 395 395 struct mqd_manager *mqd; 396 396 ··· 470 470 } 471 471 472 472 struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, 473 - struct kfd_dev *dev) 473 + struct kfd_node *dev) 474 474 { 475 475 struct mqd_manager *mqd; 476 476
+4 -4
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
··· 74 74 m->cp_hqd_queue_priority = q->priority; 75 75 } 76 76 77 - static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, 77 + static struct kfd_mem_obj *allocate_mqd(struct kfd_node *kfd, 78 78 struct queue_properties *q) 79 79 { 80 80 struct kfd_mem_obj *mqd_mem_obj; ··· 122 122 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; 123 123 } 124 124 125 - if (mm->dev->cwsr_enabled) { 125 + if (mm->dev->kfd->cwsr_enabled) { 126 126 m->cp_hqd_persistent_state |= 127 127 (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); 128 128 m->cp_hqd_ctx_save_base_addr_lo = ··· 210 210 m->cp_hqd_pq_doorbell_control |= 211 211 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; 212 212 } 213 - if (mm->dev->cwsr_enabled) 213 + if (mm->dev->kfd->cwsr_enabled) 214 214 m->cp_hqd_ctx_save_control = 0; 215 215 216 216 update_cu_mask(mm, mqd, minfo); ··· 405 405 #endif 406 406 407 407 struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, 408 - struct kfd_dev *dev) 408 + struct kfd_node *dev) 409 409 { 410 410 struct mqd_manager *mqd; 411 411
+9 -9
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
··· 81 81 m->cp_hqd_queue_priority = q->priority; 82 82 } 83 83 84 - static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, 84 + static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node, 85 85 struct queue_properties *q) 86 86 { 87 87 struct kfd_mem_obj *mqd_mem_obj; ··· 91 91 * MES write to areas beyond MQD size. So allocate 92 92 * 1 PAGE_SIZE memory for MQD is MES is enabled. 93 93 */ 94 - if (kfd->shared_resources.enable_mes) 94 + if (node->kfd->shared_resources.enable_mes) 95 95 size = PAGE_SIZE; 96 96 else 97 97 size = sizeof(struct v11_compute_mqd); 98 98 99 - if (kfd_gtt_sa_allocate(kfd, size, &mqd_mem_obj)) 99 + if (kfd_gtt_sa_allocate(node, size, &mqd_mem_obj)) 100 100 return NULL; 101 101 102 102 return mqd_mem_obj; ··· 113 113 m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr; 114 114 addr = mqd_mem_obj->gpu_addr; 115 115 116 - if (mm->dev->shared_resources.enable_mes) 116 + if (mm->dev->kfd->shared_resources.enable_mes) 117 117 size = PAGE_SIZE; 118 118 else 119 119 size = sizeof(struct v11_compute_mqd); ··· 155 155 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; 156 156 } 157 157 158 - if (mm->dev->cwsr_enabled) { 158 + if (mm->dev->kfd->cwsr_enabled) { 159 159 m->cp_hqd_persistent_state |= 160 160 (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); 161 161 m->cp_hqd_ctx_save_base_addr_lo = ··· 243 243 m->cp_hqd_pq_doorbell_control |= 244 244 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; 245 245 } 246 - if (mm->dev->cwsr_enabled) 246 + if (mm->dev->kfd->cwsr_enabled) 247 247 m->cp_hqd_ctx_save_control = 0; 248 248 249 249 update_cu_mask(mm, mqd, minfo); ··· 319 319 320 320 m = (struct v11_sdma_mqd *) mqd_mem_obj->cpu_ptr; 321 321 322 - if (mm->dev->shared_resources.enable_mes) 322 + if (mm->dev->kfd->shared_resources.enable_mes) 323 323 size = PAGE_SIZE; 324 324 else 325 325 size = sizeof(struct v11_sdma_mqd); ··· 387 387 #endif 388 388 389 389 struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, 390 - struct kfd_dev *dev) 390 + struct kfd_node *dev) 391 391 { 392 392 struct mqd_manager *mqd; 393 393 ··· 463 463 * To allocate SDMA MQDs by generic functions 464 464 * when MES is enabled. 465 465 */ 466 - if (dev->shared_resources.enable_mes) { 466 + if (dev->kfd->shared_resources.enable_mes) { 467 467 mqd->allocate_mqd = allocate_mqd; 468 468 mqd->free_mqd = kfd_free_mqd_cp; 469 469 }
+11 -13
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
··· 83 83 m->cp_hqd_queue_priority = q->priority; 84 84 } 85 85 86 - static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, 86 + static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node, 87 87 struct queue_properties *q) 88 88 { 89 89 int retval; ··· 105 105 * pass a special bo flag AMDGPU_GEM_CREATE_CP_MQD_GFX9 to instruct 106 106 * amdgpu memory functions to do so. 107 107 */ 108 - if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { 108 + if (node->kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { 109 109 mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); 110 110 if (!mqd_mem_obj) 111 111 return NULL; 112 - retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->adev, 112 + retval = amdgpu_amdkfd_alloc_gtt_mem(node->adev, 113 113 ALIGN(q->ctl_stack_size, PAGE_SIZE) + 114 114 ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), 115 115 &(mqd_mem_obj->gtt_mem), ··· 121 121 return NULL; 122 122 } 123 123 } else { 124 - retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd), 124 + retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd), 125 125 &mqd_mem_obj); 126 126 if (retval) 127 127 return NULL; ··· 136 136 { 137 137 uint64_t addr; 138 138 struct v9_mqd *m; 139 - struct amdgpu_device *adev = (struct amdgpu_device *)mm->dev->adev; 140 139 141 140 m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr; 142 141 addr = mqd_mem_obj->gpu_addr; ··· 168 169 if (q->format == KFD_QUEUE_FORMAT_AQL) { 169 170 m->cp_hqd_aql_control = 170 171 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; 171 - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { 172 + if (KFD_GC_VERSION(mm->dev) == IP_VERSION(9, 4, 3)) { 172 173 /* On GC 9.4.3, DW 41 is re-purposed as 173 174 * compute_tg_chunk_size. 174 175 * TODO: review this setting when active CUs in the ··· 178 179 } 179 180 } else { 180 181 /* PM4 queue */ 181 - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { 182 + if (KFD_GC_VERSION(mm->dev) == IP_VERSION(9, 4, 3)) { 182 183 m->compute_static_thread_mgmt_se6 = 0; 183 184 /* TODO: program pm4_target_xcc */ 184 185 } ··· 189 190 (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT); 190 191 } 191 192 192 - if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) { 193 + if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) { 193 194 m->cp_hqd_persistent_state |= 194 195 (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); 195 196 m->cp_hqd_ctx_save_base_addr_lo = ··· 224 225 struct queue_properties *q, 225 226 struct mqd_update_info *minfo) 226 227 { 227 - struct amdgpu_device *adev = (struct amdgpu_device *)mm->dev->adev; 228 228 struct v9_mqd *m; 229 229 230 230 m = get_mqd(mqd); ··· 273 275 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | 274 276 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT | 275 277 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT; 276 - if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) 278 + if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) 277 279 m->cp_hqd_pq_control |= 278 - CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK; 280 + CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK; 279 281 m->cp_hqd_pq_doorbell_control |= 1 << 280 282 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; 281 283 } 282 - if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) 284 + if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) 283 285 m->cp_hqd_ctx_save_control = 0; 284 286 285 287 update_cu_mask(mm, mqd, minfo); ··· 485 487 #endif 486 488 487 489 struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, 488 - struct kfd_dev *dev) 490 + struct kfd_node *dev) 489 491 { 490 492 struct mqd_manager *mqd; 491 493
+5 -5
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
··· 77 77 m->cp_hqd_queue_priority = q->priority; 78 78 } 79 79 80 - static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, 80 + static struct kfd_mem_obj *allocate_mqd(struct kfd_node *kfd, 81 81 struct queue_properties *q) 82 82 { 83 83 struct kfd_mem_obj *mqd_mem_obj; ··· 136 136 (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT); 137 137 } 138 138 139 - if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) { 139 + if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) { 140 140 m->cp_hqd_persistent_state |= 141 141 (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); 142 142 m->cp_hqd_ctx_save_base_addr_lo = ··· 227 227 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; 228 228 } 229 229 230 - if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) 230 + if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) 231 231 m->cp_hqd_ctx_save_control = 232 232 atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT | 233 233 mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT; ··· 446 446 #endif 447 447 448 448 struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, 449 - struct kfd_dev *dev) 449 + struct kfd_node *dev) 450 450 { 451 451 struct mqd_manager *mqd; 452 452 ··· 528 528 } 529 529 530 530 struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, 531 - struct kfd_dev *dev) 531 + struct kfd_node *dev) 532 532 { 533 533 struct mqd_manager *mqd; 534 534
+1 -1
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
··· 45 45 unsigned int process_count, queue_count, compute_queue_count, gws_queue_count; 46 46 unsigned int map_queue_size; 47 47 unsigned int max_proc_per_quantum = 1; 48 - struct kfd_dev *dev = pm->dqm->dev; 48 + struct kfd_node *dev = pm->dqm->dev; 49 49 50 50 process_count = pm->dqm->processes_count; 51 51 queue_count = pm->dqm->active_queue_count;
+5 -3
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
··· 119 119 struct pm4_mes_runlist *packet; 120 120 121 121 int concurrent_proc_cnt = 0; 122 - struct kfd_dev *kfd = pm->dqm->dev; 122 + struct kfd_node *kfd = pm->dqm->dev; 123 123 124 124 /* Determine the number of processes to map together to HW: 125 125 * it can not exceed the number of VMIDs available to the ··· 220 220 case KFD_QUEUE_TYPE_SDMA: 221 221 case KFD_QUEUE_TYPE_SDMA_XGMI: 222 222 use_static = false; /* no static queues under SDMA */ 223 - if (q->properties.sdma_engine_id < 2 && !pm_use_ext_eng(q->device)) 223 + if (q->properties.sdma_engine_id < 2 && 224 + !pm_use_ext_eng(q->device->kfd)) 224 225 packet->bitfields2.engine_sel = q->properties.sdma_engine_id + 225 226 engine_sel__mes_map_queues__sdma0_vi; 226 227 else { ··· 264 263 packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES, 265 264 sizeof(struct pm4_mes_unmap_queues)); 266 265 267 - packet->bitfields2.extended_engine_sel = pm_use_ext_eng(pm->dqm->dev) ? 266 + packet->bitfields2.extended_engine_sel = 267 + pm_use_ext_eng(pm->dqm->dev->kfd) ? 268 268 extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel : 269 269 extended_engine_sel__mes_unmap_queues__legacy_engine_sel; 270 270
+1 -1
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
··· 77 77 { 78 78 struct pm4_mes_runlist *packet; 79 79 int concurrent_proc_cnt = 0; 80 - struct kfd_dev *kfd = pm->dqm->dev; 80 + struct kfd_node *kfd = pm->dqm->dev; 81 81 82 82 if (WARN_ON(!ib)) 83 83 return -EFAULT;
+94 -74
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 210 210 ((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) || \ 211 211 (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3))) 212 212 213 + struct kfd_node; 214 + 213 215 struct kfd_event_interrupt_class { 214 - bool (*interrupt_isr)(struct kfd_dev *dev, 216 + bool (*interrupt_isr)(struct kfd_node *dev, 215 217 const uint32_t *ih_ring_entry, uint32_t *patched_ihre, 216 218 bool *patched_flag); 217 - void (*interrupt_wq)(struct kfd_dev *dev, 219 + void (*interrupt_wq)(struct kfd_node *dev, 218 220 const uint32_t *ih_ring_entry); 219 221 }; 220 222 ··· 238 236 uint64_t reserved_sdma_queues_bitmap; 239 237 }; 240 238 241 - unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev); 242 - unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev); 239 + unsigned int kfd_get_num_sdma_engines(struct kfd_node *kdev); 240 + unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *kdev); 243 241 244 242 struct kfd_mem_obj { 245 243 uint32_t range_start; ··· 255 253 uint32_t vmid_num_kfd; 256 254 }; 257 255 256 + struct kfd_dev; 257 + 258 + struct kfd_node { 259 + struct amdgpu_device *adev; /* Duplicated here along with keeping 260 + * a copy in kfd_dev to save a hop 261 + */ 262 + const struct kfd2kgd_calls *kfd2kgd; /* Duplicated here along with 263 + * keeping a copy in kfd_dev to 264 + * save a hop 265 + */ 266 + struct kfd_vmid_info vm_info; 267 + unsigned int id; /* topology stub index */ 268 + /* Interrupts */ 269 + struct kfifo ih_fifo; 270 + struct workqueue_struct *ih_wq; 271 + struct work_struct interrupt_work; 272 + spinlock_t interrupt_lock; 273 + 274 + /* 275 + * Interrupts of interest to KFD are copied 276 + * from the HW ring into a SW ring. 277 + */ 278 + bool interrupts_active; 279 + 280 + /* QCM Device instance */ 281 + struct device_queue_manager *dqm; 282 + 283 + /* Global GWS resource shared between processes */ 284 + void *gws; 285 + bool gws_debug_workaround; 286 + 287 + /* Clients watching SMI events */ 288 + struct list_head smi_clients; 289 + spinlock_t smi_lock; 290 + uint32_t reset_seq_num; 291 + 292 + /* SRAM ECC flag */ 293 + atomic_t sram_ecc_flag; 294 + 295 + /*spm process id */ 296 + unsigned int spm_pasid; 297 + 298 + /* Maximum process number mapped to HW scheduler */ 299 + unsigned int max_proc_per_quantum; 300 + 301 + struct kfd_dev *kfd; 302 + }; 303 + 258 304 struct kfd_dev { 259 305 struct amdgpu_device *adev; 260 306 261 307 struct kfd_device_info device_info; 262 - 263 - unsigned int id; /* topology stub index */ 264 308 265 309 phys_addr_t doorbell_base; /* Start of actual doorbells used by 266 310 * KFD. It is aligned for mapping ··· 322 274 */ 323 275 324 276 struct kgd2kfd_shared_resources shared_resources; 325 - struct kfd_vmid_info vm_info; 326 277 struct kfd_local_mem_info local_mem_info; 327 278 328 279 const struct kfd2kgd_calls *kfd2kgd; ··· 337 290 unsigned int gtt_sa_chunk_size; 338 291 unsigned int gtt_sa_num_of_chunks; 339 292 340 - /* Interrupts */ 341 - struct kfifo ih_fifo; 342 - struct workqueue_struct *ih_wq; 343 - struct work_struct interrupt_work; 344 - spinlock_t interrupt_lock; 345 - 346 - /* QCM Device instance */ 347 - struct device_queue_manager *dqm; 348 - 349 293 bool init_complete; 350 - /* 351 - * Interrupts of interest to KFD are copied 352 - * from the HW ring into a SW ring. 353 - */ 354 - bool interrupts_active; 355 294 356 295 /* Firmware versions */ 357 296 uint16_t mec_fw_version; 358 297 uint16_t mec2_fw_version; 359 298 uint16_t sdma_fw_version; 360 - 361 - /* Maximum process number mapped to HW scheduler */ 362 - unsigned int max_proc_per_quantum; 363 299 364 300 /* CWSR */ 365 301 bool cwsr_enabled; ··· 357 327 /* Use IOMMU v2 flag */ 358 328 bool use_iommu_v2; 359 329 360 - /* SRAM ECC flag */ 361 - atomic_t sram_ecc_flag; 362 - 363 330 /* Compute Profile ref. count */ 364 331 atomic_t compute_profile; 365 - 366 - /* Global GWS resource shared between processes */ 367 - void *gws; 368 - 369 - /* Clients watching SMI events */ 370 - struct list_head smi_clients; 371 - spinlock_t smi_lock; 372 - 373 - uint32_t reset_seq_num; 374 332 375 333 struct ida doorbell_ida; 376 334 unsigned int max_doorbell_slices; ··· 367 349 368 350 /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ 369 351 struct dev_pagemap pgmap; 352 + 353 + struct kfd_node *node; 370 354 }; 371 355 372 356 enum kfd_mempool { ··· 583 563 unsigned int doorbell_id; 584 564 585 565 struct kfd_process *process; 586 - struct kfd_dev *device; 566 + struct kfd_node *device; 587 567 void *gws; 588 568 589 569 /* procfs */ ··· 717 697 /* Data that is per-process-per device. */ 718 698 struct kfd_process_device { 719 699 /* The device that owns this data. */ 720 - struct kfd_dev *dev; 700 + struct kfd_node *dev; 721 701 722 702 /* The process that owns this kfd_process_device. */ 723 703 struct kfd_process *process; ··· 945 925 unsigned int cmd_drv; 946 926 const char *name; 947 927 }; 948 - bool kfd_dev_is_large_bar(struct kfd_dev *dev); 928 + bool kfd_dev_is_large_bar(struct kfd_node *dev); 949 929 950 930 int kfd_process_create_wq(void); 951 931 void kfd_process_destroy_wq(void); ··· 981 961 982 962 int kfd_process_device_init_vm(struct kfd_process_device *pdd, 983 963 struct file *drm_file); 984 - struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 964 + struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev, 985 965 struct kfd_process *p); 986 - struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 966 + struct kfd_process_device *kfd_get_process_device_data(struct kfd_node *dev, 987 967 struct kfd_process *p); 988 - struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, 968 + struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, 989 969 struct kfd_process *p); 990 970 991 971 bool kfd_process_xnack_mode(struct kfd_process *p, bool supported); 992 972 993 - int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, 973 + int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process, 994 974 struct vm_area_struct *vma); 995 975 996 976 /* KFD process API for creating and translating handles */ ··· 1014 994 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd); 1015 995 int kfd_doorbell_init(struct kfd_dev *kfd); 1016 996 void kfd_doorbell_fini(struct kfd_dev *kfd); 1017 - int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, 997 + int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process, 1018 998 struct vm_area_struct *vma); 1019 999 void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, 1020 1000 unsigned int *doorbell_off); ··· 1032 1012 unsigned int doorbell_index); 1033 1013 /* GTT Sub-Allocator */ 1034 1014 1035 - int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, 1015 + int kfd_gtt_sa_allocate(struct kfd_node *node, unsigned int size, 1036 1016 struct kfd_mem_obj **mem_obj); 1037 1017 1038 - int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj); 1018 + int kfd_gtt_sa_free(struct kfd_node *node, struct kfd_mem_obj *mem_obj); 1039 1019 1040 1020 extern struct device *kfd_device; 1041 1021 ··· 1048 1028 /* Topology */ 1049 1029 int kfd_topology_init(void); 1050 1030 void kfd_topology_shutdown(void); 1051 - int kfd_topology_add_device(struct kfd_dev *gpu); 1052 - int kfd_topology_remove_device(struct kfd_dev *gpu); 1031 + int kfd_topology_add_device(struct kfd_node *gpu); 1032 + int kfd_topology_remove_device(struct kfd_node *gpu); 1053 1033 struct kfd_topology_device *kfd_topology_device_by_proximity_domain( 1054 1034 uint32_t proximity_domain); 1055 1035 struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock( 1056 1036 uint32_t proximity_domain); 1057 1037 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id); 1058 - struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); 1059 - struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); 1060 - struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev); 1061 - int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev); 1038 + struct kfd_node *kfd_device_by_id(uint32_t gpu_id); 1039 + struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev); 1040 + struct kfd_node *kfd_device_by_adev(const struct amdgpu_device *adev); 1041 + int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev); 1062 1042 int kfd_numa_node_to_apic_id(int numa_node_id); 1063 1043 void kfd_double_confirm_iommu_support(struct kfd_dev *gpu); 1064 1044 1065 1045 /* Interrupts */ 1066 - int kfd_interrupt_init(struct kfd_dev *dev); 1067 - void kfd_interrupt_exit(struct kfd_dev *dev); 1068 - bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); 1069 - bool interrupt_is_wanted(struct kfd_dev *dev, 1046 + int kfd_interrupt_init(struct kfd_node *dev); 1047 + void kfd_interrupt_exit(struct kfd_node *dev); 1048 + bool enqueue_ih_ring_entry(struct kfd_node *kfd, const void *ih_ring_entry); 1049 + bool interrupt_is_wanted(struct kfd_node *dev, 1070 1050 const uint32_t *ih_ring_entry, 1071 1051 uint32_t *patched_ihre, bool *flag); 1072 1052 ··· 1194 1174 void print_queue(struct queue *q); 1195 1175 1196 1176 struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, 1197 - struct kfd_dev *dev); 1177 + struct kfd_node *dev); 1198 1178 struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, 1199 - struct kfd_dev *dev); 1179 + struct kfd_node *dev); 1200 1180 struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, 1201 - struct kfd_dev *dev); 1181 + struct kfd_node *dev); 1202 1182 struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, 1203 - struct kfd_dev *dev); 1183 + struct kfd_node *dev); 1204 1184 struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, 1205 - struct kfd_dev *dev); 1185 + struct kfd_node *dev); 1206 1186 struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, 1207 - struct kfd_dev *dev); 1187 + struct kfd_node *dev); 1208 1188 struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, 1209 - struct kfd_dev *dev); 1210 - struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); 1189 + struct kfd_node *dev); 1190 + struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev); 1211 1191 void device_queue_manager_uninit(struct device_queue_manager *dqm); 1212 - struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, 1192 + struct kernel_queue *kernel_queue_init(struct kfd_node *dev, 1213 1193 enum kfd_queue_type type); 1214 1194 void kernel_queue_uninit(struct kernel_queue *kq, bool hanging); 1215 1195 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid); ··· 1226 1206 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p); 1227 1207 void pqm_uninit(struct process_queue_manager *pqm); 1228 1208 int pqm_create_queue(struct process_queue_manager *pqm, 1229 - struct kfd_dev *dev, 1209 + struct kfd_node *dev, 1230 1210 struct file *f, 1231 1211 struct queue_properties *properties, 1232 1212 unsigned int *qid, ··· 1343 1323 uint32_t *wait_result); 1344 1324 void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, 1345 1325 uint32_t valid_id_bits); 1346 - void kfd_signal_iommu_event(struct kfd_dev *dev, 1326 + void kfd_signal_iommu_event(struct kfd_node *dev, 1347 1327 u32 pasid, unsigned long address, 1348 1328 bool is_write_requested, bool is_execute_requested); 1349 1329 void kfd_signal_hw_exception_event(u32 pasid); ··· 1359 1339 int kfd_get_num_events(struct kfd_process *p); 1360 1340 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); 1361 1341 1362 - void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid, 1342 + void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid, 1363 1343 struct kfd_vm_fault_info *info); 1364 1344 1365 - void kfd_signal_reset_event(struct kfd_dev *dev); 1345 + void kfd_signal_reset_event(struct kfd_node *dev); 1366 1346 1367 - void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid); 1347 + void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid); 1368 1348 1369 1349 void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); 1370 1350 ··· 1379 1359 bool kfd_is_locked(void); 1380 1360 1381 1361 /* Compute profile */ 1382 - void kfd_inc_compute_active(struct kfd_dev *dev); 1383 - void kfd_dec_compute_active(struct kfd_dev *dev); 1362 + void kfd_inc_compute_active(struct kfd_node *dev); 1363 + void kfd_dec_compute_active(struct kfd_node *dev); 1384 1364 1385 1365 /* Cgroup Support */ 1386 1366 /* Check with device cgroup if @kfd device is accessible */ 1387 - static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd) 1367 + static inline int kfd_devcgroup_check_permission(struct kfd_node *kfd) 1388 1368 { 1389 1369 #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) 1390 1370 struct drm_device *ddev = adev_to_drm(kfd->adev); ··· 1409 1389 int kfd_debugfs_rls_by_device(struct seq_file *m, void *data); 1410 1390 int pm_debugfs_runlist(struct seq_file *m, void *data); 1411 1391 1412 - int kfd_debugfs_hang_hws(struct kfd_dev *dev); 1392 + int kfd_debugfs_hang_hws(struct kfd_node *dev); 1413 1393 int pm_debugfs_hang_hws(struct packet_manager *pm); 1414 1394 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm); 1415 1395
+27 -27
drivers/gpu/drm/amd/amdkfd/kfd_process.c
··· 269 269 int cu_cnt; 270 270 int wave_cnt; 271 271 int max_waves_per_cu; 272 - struct kfd_dev *dev = NULL; 272 + struct kfd_node *dev = NULL; 273 273 struct kfd_process *proc = NULL; 274 274 struct kfd_process_device *pdd = NULL; 275 275 ··· 691 691 static void kfd_process_free_gpuvm(struct kgd_mem *mem, 692 692 struct kfd_process_device *pdd, void **kptr) 693 693 { 694 - struct kfd_dev *dev = pdd->dev; 694 + struct kfd_node *dev = pdd->dev; 695 695 696 696 if (kptr && *kptr) { 697 697 amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem); ··· 713 713 uint64_t gpu_va, uint32_t size, 714 714 uint32_t flags, struct kgd_mem **mem, void **kptr) 715 715 { 716 - struct kfd_dev *kdev = pdd->dev; 716 + struct kfd_node *kdev = pdd->dev; 717 717 int err; 718 718 719 719 err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size, ··· 982 982 static void kfd_process_kunmap_signal_bo(struct kfd_process *p) 983 983 { 984 984 struct kfd_process_device *pdd; 985 - struct kfd_dev *kdev; 985 + struct kfd_node *kdev; 986 986 void *mem; 987 987 988 988 kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle)); ··· 1040 1040 bitmap_free(pdd->qpd.doorbell_bitmap); 1041 1041 idr_destroy(&pdd->alloc_idr); 1042 1042 1043 - kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index); 1043 + kfd_free_process_doorbells(pdd->dev->kfd, pdd->doorbell_index); 1044 1044 1045 - if (pdd->dev->shared_resources.enable_mes) 1045 + if (pdd->dev->kfd->shared_resources.enable_mes) 1046 1046 amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, 1047 1047 pdd->proc_ctx_bo); 1048 1048 /* ··· 1259 1259 int i; 1260 1260 1261 1261 for (i = 0; i < p->n_pdds; i++) { 1262 - struct kfd_dev *dev = p->pdds[i]->dev; 1262 + struct kfd_node *dev = p->pdds[i]->dev; 1263 1263 struct qcm_process_device *qpd = &p->pdds[i]->qpd; 1264 1264 1265 - if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) 1265 + if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) 1266 1266 continue; 1267 1267 1268 1268 offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id); ··· 1279 1279 return err; 1280 1280 } 1281 1281 1282 - memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); 1282 + memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size); 1283 1283 1284 1284 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; 1285 1285 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", ··· 1291 1291 1292 1292 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) 1293 1293 { 1294 - struct kfd_dev *dev = pdd->dev; 1294 + struct kfd_node *dev = pdd->dev; 1295 1295 struct qcm_process_device *qpd = &pdd->qpd; 1296 1296 uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT 1297 1297 | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE ··· 1300 1300 void *kaddr; 1301 1301 int ret; 1302 1302 1303 - if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base) 1303 + if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base) 1304 1304 return 0; 1305 1305 1306 1306 /* cwsr_base is only set for dGPU */ ··· 1313 1313 qpd->cwsr_kaddr = kaddr; 1314 1314 qpd->tba_addr = qpd->cwsr_base; 1315 1315 1316 - memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); 1316 + memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size); 1317 1317 1318 1318 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; 1319 1319 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", ··· 1324 1324 1325 1325 static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd) 1326 1326 { 1327 - struct kfd_dev *dev = pdd->dev; 1327 + struct kfd_node *dev = pdd->dev; 1328 1328 struct qcm_process_device *qpd = &pdd->qpd; 1329 1329 1330 - if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base) 1330 + if (!dev->kfd->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base) 1331 1331 return; 1332 1332 1333 1333 kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr); ··· 1371 1371 * support retry. 1372 1372 */ 1373 1373 for (i = 0; i < p->n_pdds; i++) { 1374 - struct kfd_dev *dev = p->pdds[i]->dev; 1374 + struct kfd_node *dev = p->pdds[i]->dev; 1375 1375 1376 1376 /* Only consider GFXv9 and higher GPUs. Older GPUs don't 1377 1377 * support the SVM APIs and don't need to be considered ··· 1394 1394 if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 1395 1395 return false; 1396 1396 1397 - if (dev->noretry) 1397 + if (dev->kfd->noretry) 1398 1398 return false; 1399 1399 } 1400 1400 ··· 1528 1528 return 0; 1529 1529 } 1530 1530 1531 - struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 1531 + struct kfd_process_device *kfd_get_process_device_data(struct kfd_node *dev, 1532 1532 struct kfd_process *p) 1533 1533 { 1534 1534 int i; ··· 1540 1540 return NULL; 1541 1541 } 1542 1542 1543 - struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, 1543 + struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, 1544 1544 struct kfd_process *p) 1545 1545 { 1546 1546 struct kfd_process_device *pdd = NULL; ··· 1552 1552 if (!pdd) 1553 1553 return NULL; 1554 1554 1555 - if (init_doorbell_bitmap(&pdd->qpd, dev)) { 1555 + if (init_doorbell_bitmap(&pdd->qpd, dev->kfd)) { 1556 1556 pr_err("Failed to init doorbell for process\n"); 1557 1557 goto err_free_pdd; 1558 1558 } ··· 1573 1573 pdd->user_gpu_id = dev->id; 1574 1574 atomic64_set(&pdd->evict_duration_counter, 0); 1575 1575 1576 - if (dev->shared_resources.enable_mes) { 1576 + if (dev->kfd->shared_resources.enable_mes) { 1577 1577 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, 1578 1578 AMDGPU_MES_PROC_CTX_SIZE, 1579 1579 &pdd->proc_ctx_bo, ··· 1619 1619 struct amdgpu_fpriv *drv_priv; 1620 1620 struct amdgpu_vm *avm; 1621 1621 struct kfd_process *p; 1622 - struct kfd_dev *dev; 1622 + struct kfd_node *dev; 1623 1623 int ret; 1624 1624 1625 1625 if (!drm_file) ··· 1679 1679 * 1680 1680 * Assumes that the process lock is held. 1681 1681 */ 1682 - struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 1682 + struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev, 1683 1683 struct kfd_process *p) 1684 1684 { 1685 1685 struct kfd_process_device *pdd; ··· 1811 1811 for (i = 0; i < p->n_pdds; i++) { 1812 1812 struct kfd_process_device *pdd = p->pdds[i]; 1813 1813 1814 - kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid, 1814 + kfd_smi_event_queue_eviction(pdd->dev->kfd, p->lead_thread->pid, 1815 1815 trigger); 1816 1816 1817 1817 r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, ··· 1839 1839 if (n_evicted == 0) 1840 1840 break; 1841 1841 1842 - kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); 1842 + kfd_smi_event_queue_restore(pdd->dev->kfd, p->lead_thread->pid); 1843 1843 1844 1844 if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, 1845 1845 &pdd->qpd)) ··· 1860 1860 for (i = 0; i < p->n_pdds; i++) { 1861 1861 struct kfd_process_device *pdd = p->pdds[i]; 1862 1862 1863 - kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); 1863 + kfd_smi_event_queue_restore(pdd->dev->kfd, p->lead_thread->pid); 1864 1864 1865 1865 r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, 1866 1866 &pdd->qpd); ··· 2016 2016 return ret; 2017 2017 } 2018 2018 2019 - int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, 2019 + int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process, 2020 2020 struct vm_area_struct *vma) 2021 2021 { 2022 2022 struct kfd_process_device *pdd; ··· 2051 2051 { 2052 2052 struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); 2053 2053 uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); 2054 - struct kfd_dev *dev = pdd->dev; 2054 + struct kfd_node *dev = pdd->dev; 2055 2055 2056 2056 /* 2057 2057 * It can be that we race and lose here, but that is extremely unlikely
+10 -10
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
··· 81 81 82 82 void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) 83 83 { 84 - struct kfd_dev *dev = pdd->dev; 84 + struct kfd_node *dev = pdd->dev; 85 85 86 86 if (pdd->already_dequeued) 87 87 return; ··· 93 93 int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, 94 94 void *gws) 95 95 { 96 - struct kfd_dev *dev = NULL; 96 + struct kfd_node *dev = NULL; 97 97 struct process_queue_node *pqn; 98 98 struct kfd_process_device *pdd; 99 99 struct kgd_mem *mem = NULL; ··· 178 178 } 179 179 180 180 static int init_user_queue(struct process_queue_manager *pqm, 181 - struct kfd_dev *dev, struct queue **q, 181 + struct kfd_node *dev, struct queue **q, 182 182 struct queue_properties *q_properties, 183 183 struct file *f, struct amdgpu_bo *wptr_bo, 184 184 unsigned int qid) ··· 199 199 (*q)->device = dev; 200 200 (*q)->process = pqm->process; 201 201 202 - if (dev->shared_resources.enable_mes) { 202 + if (dev->kfd->shared_resources.enable_mes) { 203 203 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, 204 204 AMDGPU_MES_GANG_CTX_SIZE, 205 205 &(*q)->gang_ctx_bo, ··· 224 224 } 225 225 226 226 int pqm_create_queue(struct process_queue_manager *pqm, 227 - struct kfd_dev *dev, 227 + struct kfd_node *dev, 228 228 struct file *f, 229 229 struct queue_properties *properties, 230 230 unsigned int *qid, ··· 258 258 * Hence we also check the type as well 259 259 */ 260 260 if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) 261 - max_queues = dev->device_info.max_no_of_hqd/2; 261 + max_queues = dev->kfd->device_info.max_no_of_hqd/2; 262 262 263 263 if (pdd->qpd.queue_count >= max_queues) 264 264 return -ENOSPC; ··· 354 354 */ 355 355 *p_doorbell_offset_in_process = 356 356 (q->properties.doorbell_off * sizeof(uint32_t)) & 357 - (kfd_doorbell_process_slice(dev) - 1); 357 + (kfd_doorbell_process_slice(dev->kfd) - 1); 358 358 359 359 pr_debug("PQM After DQM create queue\n"); 360 360 ··· 387 387 struct process_queue_node *pqn; 388 388 struct kfd_process_device *pdd; 389 389 struct device_queue_manager *dqm; 390 - struct kfd_dev *dev; 390 + struct kfd_node *dev; 391 391 int retval; 392 392 393 393 dqm = NULL; ··· 439 439 pdd->qpd.num_gws = 0; 440 440 } 441 441 442 - if (dev->shared_resources.enable_mes) { 442 + if (dev->kfd->shared_resources.enable_mes) { 443 443 amdgpu_amdkfd_free_gtt_mem(dev->adev, 444 444 pqn->q->gang_ctx_bo); 445 445 if (pqn->q->wptr_bo) ··· 859 859 } 860 860 861 861 if (!pdd->doorbell_index && 862 - kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) { 862 + kfd_alloc_process_doorbells(pdd->dev->kfd, &pdd->doorbell_index) < 0) { 863 863 ret = -ENOMEM; 864 864 goto exit; 865 865 }
+20 -20
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
··· 36 36 wait_queue_head_t wait_queue; 37 37 /* events enabled */ 38 38 uint64_t events; 39 - struct kfd_dev *dev; 39 + struct kfd_node *dev; 40 40 spinlock_t lock; 41 41 struct rcu_head rcu; 42 42 pid_t pid; ··· 149 149 static int kfd_smi_ev_release(struct inode *inode, struct file *filep) 150 150 { 151 151 struct kfd_smi_client *client = filep->private_data; 152 - struct kfd_dev *dev = client->dev; 152 + struct kfd_node *dev = client->dev; 153 153 154 154 spin_lock(&dev->smi_lock); 155 155 list_del_rcu(&client->list); ··· 171 171 return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event); 172 172 } 173 173 174 - static void add_event_to_kfifo(pid_t pid, struct kfd_dev *dev, 174 + static void add_event_to_kfifo(pid_t pid, struct kfd_node *dev, 175 175 unsigned int smi_event, char *event_msg, int len) 176 176 { 177 177 struct kfd_smi_client *client; ··· 196 196 } 197 197 198 198 __printf(4, 5) 199 - static void kfd_smi_event_add(pid_t pid, struct kfd_dev *dev, 199 + static void kfd_smi_event_add(pid_t pid, struct kfd_node *dev, 200 200 unsigned int event, char *fmt, ...) 201 201 { 202 202 char fifo_in[KFD_SMI_EVENT_MSG_SIZE]; ··· 215 215 add_event_to_kfifo(pid, dev, event, fifo_in, len); 216 216 } 217 217 218 - void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset) 218 + void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset) 219 219 { 220 220 unsigned int event; 221 221 ··· 228 228 kfd_smi_event_add(0, dev, event, "%x\n", dev->reset_seq_num); 229 229 } 230 230 231 - void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, 231 + void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev, 232 232 uint64_t throttle_bitmask) 233 233 { 234 234 kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n", ··· 236 236 amdgpu_dpm_get_thermal_throttling_counter(dev->adev)); 237 237 } 238 238 239 - void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid) 239 + void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid) 240 240 { 241 241 struct amdgpu_task_info task_info; 242 242 ··· 254 254 unsigned long address, bool write_fault, 255 255 ktime_t ts) 256 256 { 257 - kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_START, 257 + kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_PAGE_FAULT_START, 258 258 "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid, 259 - address, dev->id, write_fault ? 'W' : 'R'); 259 + address, dev->node->id, write_fault ? 'W' : 'R'); 260 260 } 261 261 262 262 void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid, 263 263 unsigned long address, bool migration) 264 264 { 265 - kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_END, 265 + kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_PAGE_FAULT_END, 266 266 "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(), 267 - pid, address, dev->id, migration ? 'M' : 'U'); 267 + pid, address, dev->node->id, migration ? 'M' : 'U'); 268 268 } 269 269 270 270 void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid, ··· 273 273 uint32_t prefetch_loc, uint32_t preferred_loc, 274 274 uint32_t trigger) 275 275 { 276 - kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_START, 276 + kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_MIGRATE_START, 277 277 "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", 278 278 ktime_get_boottime_ns(), pid, start, end - start, 279 279 from, to, prefetch_loc, preferred_loc, trigger); ··· 283 283 unsigned long start, unsigned long end, 284 284 uint32_t from, uint32_t to, uint32_t trigger) 285 285 { 286 - kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_END, 286 + kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_MIGRATE_END, 287 287 "%lld -%d @%lx(%lx) %x->%x %d\n", 288 288 ktime_get_boottime_ns(), pid, start, end - start, 289 289 from, to, trigger); ··· 292 292 void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid, 293 293 uint32_t trigger) 294 294 { 295 - kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_EVICTION, 295 + kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_QUEUE_EVICTION, 296 296 "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid, 297 - dev->id, trigger); 297 + dev->node->id, trigger); 298 298 } 299 299 300 300 void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid) 301 301 { 302 - kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_RESTORE, 302 + kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_QUEUE_RESTORE, 303 303 "%lld -%d %x\n", ktime_get_boottime_ns(), pid, 304 - dev->id); 304 + dev->node->id); 305 305 } 306 306 307 307 void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm) ··· 328 328 unsigned long address, unsigned long last, 329 329 uint32_t trigger) 330 330 { 331 - kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_UNMAP_FROM_GPU, 331 + kfd_smi_event_add(pid, dev->node, KFD_SMI_EVENT_UNMAP_FROM_GPU, 332 332 "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(), 333 - pid, address, last - address + 1, dev->id, trigger); 333 + pid, address, last - address + 1, dev->node->id, trigger); 334 334 } 335 335 336 - int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) 336 + int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd) 337 337 { 338 338 struct kfd_smi_client *client; 339 339 int ret;
+4 -4
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
··· 24 24 #ifndef KFD_SMI_EVENTS_H_INCLUDED 25 25 #define KFD_SMI_EVENTS_H_INCLUDED 26 26 27 - int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd); 28 - void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid); 29 - void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, 27 + int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd); 28 + void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid); 29 + void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev, 30 30 uint64_t throttle_bitmask); 31 - void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset); 31 + void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset); 32 32 void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid, 33 33 unsigned long address, bool write_fault, 34 34 ktime_t ts);
+2 -2
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
··· 1266 1266 return -EINVAL; 1267 1267 } 1268 1268 1269 - kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid, 1269 + kfd_smi_event_unmap_from_gpu(pdd->dev->kfd, p->lead_thread->pid, 1270 1270 start, last, trigger); 1271 1271 1272 1272 r = svm_range_unmap_from_gpu(pdd->dev->adev, ··· 3083 3083 spin_lock_init(&svms->deferred_list_lock); 3084 3084 3085 3085 for (i = 0; i < p->n_pdds; i++) 3086 - if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev)) 3086 + if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->kfd)) 3087 3087 bitmap_set(svms->bitmap_supported, i, 1); 3088 3088 3089 3089 return 0;
+28 -28
drivers/gpu/drm/amd/amdkfd/kfd_topology.c
··· 96 96 return ret; 97 97 } 98 98 99 - struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) 99 + struct kfd_node *kfd_device_by_id(uint32_t gpu_id) 100 100 { 101 101 struct kfd_topology_device *top_dev; 102 102 ··· 107 107 return top_dev->gpu; 108 108 } 109 109 110 - struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) 110 + struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev) 111 111 { 112 112 struct kfd_topology_device *top_dev; 113 - struct kfd_dev *device = NULL; 113 + struct kfd_node *device = NULL; 114 114 115 115 down_read(&topology_lock); 116 116 ··· 125 125 return device; 126 126 } 127 127 128 - struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev) 128 + struct kfd_node *kfd_device_by_adev(const struct amdgpu_device *adev) 129 129 { 130 130 struct kfd_topology_device *top_dev; 131 - struct kfd_dev *device = NULL; 131 + struct kfd_node *device = NULL; 132 132 133 133 down_read(&topology_lock); 134 134 ··· 526 526 527 527 if (dev->gpu) { 528 528 log_max_watch_addr = 529 - __ilog2_u32(dev->gpu->device_info.num_of_watch_points); 529 + __ilog2_u32(dev->gpu->kfd->device_info.num_of_watch_points); 530 530 531 531 if (log_max_watch_addr) { 532 532 dev->node_props.capability |= ··· 548 548 sysfs_show_64bit_prop(buffer, offs, "local_mem_size", 0ULL); 549 549 550 550 sysfs_show_32bit_prop(buffer, offs, "fw_version", 551 - dev->gpu->mec_fw_version); 551 + dev->gpu->kfd->mec_fw_version); 552 552 sysfs_show_32bit_prop(buffer, offs, "capability", 553 553 dev->node_props.capability); 554 554 sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version", 555 - dev->gpu->sdma_fw_version); 555 + dev->gpu->kfd->sdma_fw_version); 556 556 sysfs_show_64bit_prop(buffer, offs, "unique_id", 557 557 dev->gpu->adev->unique_id); 558 558 ··· 1157 1157 up_write(&topology_lock); 1158 1158 } 1159 1159 1160 - static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) 1160 + static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) 1161 1161 { 1162 1162 uint32_t hashout; 1163 1163 uint32_t buf[7]; ··· 1167 1167 if (!gpu) 1168 1168 return 0; 1169 1169 1170 - local_mem_size = gpu->local_mem_info.local_mem_size_private + 1171 - gpu->local_mem_info.local_mem_size_public; 1170 + local_mem_size = gpu->kfd->local_mem_info.local_mem_size_private + 1171 + gpu->kfd->local_mem_info.local_mem_size_public; 1172 1172 buf[0] = gpu->adev->pdev->devfn; 1173 1173 buf[1] = gpu->adev->pdev->subsystem_vendor | 1174 1174 (gpu->adev->pdev->subsystem_device << 16); ··· 1188 1188 * list then return NULL. This means a new topology device has to 1189 1189 * be created for this GPU. 1190 1190 */ 1191 - static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) 1191 + static struct kfd_topology_device *kfd_assign_gpu(struct kfd_node *gpu) 1192 1192 { 1193 1193 struct kfd_topology_device *dev; 1194 1194 struct kfd_topology_device *out_dev = NULL; ··· 1201 1201 /* Discrete GPUs need their own topology device list 1202 1202 * entries. Don't assign them to CPU/APU nodes. 1203 1203 */ 1204 - if (!gpu->use_iommu_v2 && 1204 + if (!gpu->kfd->use_iommu_v2 && 1205 1205 dev->node_props.cpu_cores_count) 1206 1206 continue; 1207 1207 ··· 1275 1275 CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; 1276 1276 /* set gpu (dev) flags. */ 1277 1277 } else { 1278 - if (!dev->gpu->pci_atomic_requested || 1278 + if (!dev->gpu->kfd->pci_atomic_requested || 1279 1279 dev->gpu->adev->asic_type == CHIP_HAWAII) 1280 1280 link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | 1281 1281 CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; ··· 1569 1569 if (dev == new_dev) 1570 1570 break; 1571 1571 if (!dev->gpu || !dev->gpu->adev || 1572 - (dev->gpu->hive_id && 1573 - dev->gpu->hive_id == new_dev->gpu->hive_id)) 1572 + (dev->gpu->kfd->hive_id && 1573 + dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id)) 1574 1574 goto next; 1575 1575 1576 1576 /* check if node(s) is/are peer accessible in one direction or bi-direction */ ··· 1589 1589 out: 1590 1590 return ret; 1591 1591 } 1592 - 1593 1592 1594 1593 /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ 1595 1594 static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext, ··· 1722 1723 /* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info 1723 1724 * tables 1724 1725 */ 1725 - static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_dev *kdev) 1726 + static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev) 1726 1727 { 1727 1728 struct kfd_gpu_cache_info *pcache_info = NULL; 1728 1729 int i, j, k; ··· 1804 1805 pr_debug("Added [%d] GPU cache entries\n", num_of_entries); 1805 1806 } 1806 1807 1807 - static int kfd_topology_add_device_locked(struct kfd_dev *gpu, uint32_t gpu_id, 1808 + static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id, 1808 1809 struct kfd_topology_device **dev) 1809 1810 { 1810 1811 int proximity_domain = ++topology_crat_proximity_domain; ··· 1864 1865 return res; 1865 1866 } 1866 1867 1867 - int kfd_topology_add_device(struct kfd_dev *gpu) 1868 + int kfd_topology_add_device(struct kfd_node *gpu) 1868 1869 { 1869 1870 uint32_t gpu_id; 1870 1871 struct kfd_topology_device *dev; ··· 1915 1916 dev->node_props.simd_arrays_per_engine = 1916 1917 cu_info.num_shader_arrays_per_engine; 1917 1918 1918 - dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version; 1919 + dev->node_props.gfx_target_version = 1920 + gpu->kfd->device_info.gfx_target_version; 1919 1921 dev->node_props.vendor_id = gpu->adev->pdev->vendor; 1920 1922 dev->node_props.device_id = gpu->adev->pdev->device; 1921 1923 dev->node_props.capability |= ··· 1929 1929 dev->node_props.max_engine_clk_ccompute = 1930 1930 cpufreq_quick_get_max(0) / 1000; 1931 1931 dev->node_props.drm_render_minor = 1932 - gpu->shared_resources.drm_render_minor; 1932 + gpu->kfd->shared_resources.drm_render_minor; 1933 1933 1934 - dev->node_props.hive_id = gpu->hive_id; 1934 + dev->node_props.hive_id = gpu->kfd->hive_id; 1935 1935 dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu); 1936 1936 dev->node_props.num_sdma_xgmi_engines = 1937 1937 kfd_get_num_xgmi_sdma_engines(gpu); 1938 1938 dev->node_props.num_sdma_queues_per_engine = 1939 - gpu->device_info.num_sdma_queues_per_engine - 1940 - gpu->device_info.num_reserved_sdma_queues_per_engine; 1939 + gpu->kfd->device_info.num_sdma_queues_per_engine - 1940 + gpu->kfd->device_info.num_reserved_sdma_queues_per_engine; 1941 1941 dev->node_props.num_gws = (dev->gpu->gws && 1942 1942 dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? 1943 1943 dev->gpu->adev->gds.gws_size : 0; ··· 1979 1979 * Overwrite ATS capability according to needs_iommu_device to fix 1980 1980 * potential missing corresponding bit in CRAT of BIOS. 1981 1981 */ 1982 - if (dev->gpu->use_iommu_v2) 1982 + if (dev->gpu->kfd->use_iommu_v2) 1983 1983 dev->node_props.capability |= HSA_CAP_ATS_PRESENT; 1984 1984 else 1985 1985 dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT; ··· 2079 2079 } 2080 2080 } 2081 2081 2082 - int kfd_topology_remove_device(struct kfd_dev *gpu) 2082 + int kfd_topology_remove_device(struct kfd_node *gpu) 2083 2083 { 2084 2084 struct kfd_topology_device *dev, *tmp; 2085 2085 uint32_t gpu_id; ··· 2119 2119 * Return - 0: On success (@kdev will be NULL for non GPU nodes) 2120 2120 * -1: If end of list 2121 2121 */ 2122 - int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev) 2122 + int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev) 2123 2123 { 2124 2124 2125 2125 struct kfd_topology_device *top_dev;
+4 -4
drivers/gpu/drm/amd/amdkfd/kfd_topology.h
··· 75 75 uint32_t flags; 76 76 uint32_t width; 77 77 uint32_t mem_clk_max; 78 - struct kfd_dev *gpu; 78 + struct kfd_node *gpu; 79 79 struct kobject *kobj; 80 80 struct attribute attr; 81 81 }; ··· 93 93 uint32_t cache_latency; 94 94 uint32_t cache_type; 95 95 uint8_t sibling_map[CACHE_SIBLINGMAP_SIZE]; 96 - struct kfd_dev *gpu; 96 + struct kfd_node *gpu; 97 97 struct kobject *kobj; 98 98 struct attribute attr; 99 99 uint32_t sibling_map_size; ··· 113 113 uint32_t max_bandwidth; 114 114 uint32_t rec_transfer_size; 115 115 uint32_t flags; 116 - struct kfd_dev *gpu; 116 + struct kfd_node *gpu; 117 117 struct kobject *kobj; 118 118 struct attribute attr; 119 119 }; ··· 135 135 struct list_head io_link_props; 136 136 struct list_head p2p_link_props; 137 137 struct list_head perf_props; 138 - struct kfd_dev *gpu; 138 + struct kfd_node *gpu; 139 139 struct kobject *kobj_node; 140 140 struct kobject *kobj_mem; 141 141 struct kobject *kobj_cache;