Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: add gc 9.5.0 support on kfd

Initial support for GC 9.5.0.

v2: squash in pqm_clean_queue_resource() fix from Lijo

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Alex Sierra and committed by
Alex Deucher
71985559 0ca6d975

+57 -34
+1
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
··· 1639 1639 break; 1640 1640 case IP_VERSION(9, 4, 3): 1641 1641 case IP_VERSION(9, 4, 4): 1642 + case IP_VERSION(9, 5, 0): 1642 1643 num_of_cache_types = 1643 1644 kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd, 1644 1645 *pcache_info);
+1
drivers/gpu/drm/amd/amdkfd/kfd_debug.h
··· 79 79 return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) || 80 80 KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || 81 81 KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) || 82 + KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0) || 82 83 KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)); 83 84 } 84 85
+25 -18
drivers/gpu/drm/amd/amdkfd/kfd_device.c
··· 85 85 case IP_VERSION(4, 4, 0):/* ALDEBARAN */ 86 86 case IP_VERSION(4, 4, 2): 87 87 case IP_VERSION(4, 4, 5): 88 + case IP_VERSION(4, 4, 4): 88 89 case IP_VERSION(5, 0, 0):/* NAVI10 */ 89 90 case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */ 90 91 case IP_VERSION(5, 0, 2):/* NAVI14 */ ··· 153 152 break; 154 153 case IP_VERSION(9, 4, 3): /* GC 9.4.3 */ 155 154 case IP_VERSION(9, 4, 4): /* GC 9.4.4 */ 155 + case IP_VERSION(9, 5, 0): /* GC 9.5.0 */ 156 156 kfd->device_info.event_interrupt_class = 157 157 &event_interrupt_class_v9_4_3; 158 158 break; ··· 358 356 gfx_target_version = 90402; 359 357 f2g = &gc_9_4_3_kfd2kgd; 360 358 break; 359 + case IP_VERSION(9, 5, 0): 360 + gfx_target_version = 90500; 361 + f2g = &gc_9_4_3_kfd2kgd; 362 + break; 361 363 /* Navi10 */ 362 364 case IP_VERSION(10, 1, 10): 363 365 gfx_target_version = 100100; ··· 521 515 > KFD_CWSR_TMA_OFFSET); 522 516 kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex; 523 517 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex); 518 + } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 5, 0)) { 519 + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) > PAGE_SIZE); 520 + kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex; 521 + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex); 524 522 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) { 525 523 BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) 526 524 > KFD_CWSR_TMA_OFFSET); ··· 577 567 && kfd->mec2_fw_version >= 0x28) || 578 568 (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3) || 579 569 KFD_GC_VERSION(node) == IP_VERSION(9, 4, 4)) || 570 + (KFD_GC_VERSION(node) == IP_VERSION(9, 5, 0)) || 580 571 (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0) 581 572 && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0) 582 573 && kfd->mec2_fw_version >= 0x6b) || ··· 744 733 last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; 745 734 vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1; 746 735 747 - /* For GFX9.4.3, we need special handling for VMIDs depending on 748 - * partition mode. 736 + /* For multi-partition capable GPUs, we need special handling for VMIDs 737 + * depending on partition mode. 749 738 * In CPX mode, the VMID range needs to be shared between XCDs. 750 739 * Additionally, there are 13 VMIDs (3-15) available for KFD. To 751 740 * divide them equally, we change starting VMID to 4 and not use 752 741 * VMID 3. 753 - * If the VMID range changes for GFX9.4.3, then this code MUST be 754 - * revisited. 742 + * If the VMID range changes for multi-partition capable GPUs, then 743 + * this code MUST be revisited. 755 744 */ 756 745 if (kfd->adev->xcp_mgr) { 757 746 partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr, ··· 816 805 kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; 817 806 818 807 /* 819 - * For GFX9.4.3, the KFD abstracts all partitions within a socket as 820 - * xGMI connected in the topology so assign a unique hive id per 821 - * device based on the pci device location if device is in PCIe mode. 808 + * For multi-partition capable GPUs, the KFD abstracts all partitions 809 + * within a socket as xGMI connected in the topology so assign a unique 810 + * hive id per device based on the pci device location if device is in 811 + * PCIe mode. 822 812 */ 823 - if (!kfd->hive_id && 824 - (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) || 825 - KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) && 826 - kfd->num_nodes > 1) 813 + if (!kfd->hive_id && kfd->num_nodes > 1) 827 814 kfd->hive_id = pci_dev_id(kfd->adev->pdev); 828 815 829 816 kfd->noretry = kfd->adev->gmc.noretry; ··· 859 850 KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20); 860 851 } 861 852 862 - if ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) || 863 - KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) && 864 - partition_mode == AMDGPU_CPX_PARTITION_MODE && 853 + if (partition_mode == AMDGPU_CPX_PARTITION_MODE && 865 854 kfd->num_nodes != 1) { 866 - /* For GFX9.4.3 and CPX mode, first XCD gets VMID range 867 - * 4-9 and second XCD gets VMID range 10-15. 855 + /* For multi-partition capable GPUs and CPX mode, first 856 + * XCD gets VMID range 4-9 and second XCD gets VMID 857 + * range 10-15. 868 858 */ 869 859 870 860 node->vm_info.first_vmid_kfd = (i%2 == 0) ? ··· 887 879 amdgpu_amdkfd_get_local_mem_info(kfd->adev, 888 880 &node->local_mem_info, node->xcp); 889 881 890 - if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) || 891 - KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) 882 + if (kfd->adev->xcp_mgr) 892 883 kfd_setup_interrupt_bitmap(node, i); 893 884 894 885 /* Initialize the KFD node */
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
··· 64 64 qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; 65 65 66 66 if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) || 67 - KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4)) 67 + KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4) || 68 + KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) 68 69 qpd->sh_mem_config |= 69 70 (1 << SH_MEM_CONFIG__F8_MODE__SHIFT); 70 71
+9 -4
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
··· 78 78 m->compute_static_thread_mgmt_se2 = se_mask[2]; 79 79 m->compute_static_thread_mgmt_se3 = se_mask[3]; 80 80 if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) && 81 - KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4)) { 81 + KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) && 82 + KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) { 82 83 m->compute_static_thread_mgmt_se4 = se_mask[4]; 83 84 m->compute_static_thread_mgmt_se5 = se_mask[5]; 84 85 m->compute_static_thread_mgmt_se6 = se_mask[6]; ··· 302 301 m->cp_hqd_ctx_save_control = 0; 303 302 304 303 if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) && 305 - KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4)) 304 + KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) && 305 + KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) 306 306 update_cu_mask(mm, mqd, minfo, 0); 307 307 set_priority(m, q); 308 308 ··· 887 885 mqd->debugfs_show_mqd = debugfs_show_mqd; 888 886 #endif 889 887 if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || 890 - KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) { 888 + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) || 889 + KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) { 891 890 mqd->init_mqd = init_mqd_v9_4_3; 892 891 mqd->load_mqd = load_mqd_v9_4_3; 893 892 mqd->update_mqd = update_mqd_v9_4_3; ··· 912 909 #if defined(CONFIG_DEBUG_FS) 913 910 mqd->debugfs_show_mqd = debugfs_show_mqd; 914 911 #endif 912 + mqd->check_preemption_failed = check_preemption_failed; 915 913 if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || 916 - KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) { 914 + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) || 915 + KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) { 917 916 mqd->init_mqd = init_mqd_hiq_v9_4_3; 918 917 mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3; 919 918 mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3;
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
··· 260 260 default: 261 261 if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) || 262 262 KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) || 263 - KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4)) 263 + KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) || 264 + KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0)) 264 265 pm->pmf = &kfd_aldebaran_pm_funcs; 265 266 else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1)) 266 267 pm->pmf = &kfd_v9_pm_funcs;
+4 -2
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 207 207 #define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\ 208 208 ((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) || \ 209 209 (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) || \ 210 - (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4))) 210 + (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) || \ 211 + (KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0))) 211 212 212 213 struct kfd_node; 213 214 ··· 1151 1150 uint32_t i; 1152 1151 1153 1152 if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && 1154 - KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4)) 1153 + KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) && 1154 + KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) 1155 1155 return dev->nodes[0]; 1156 1156 1157 1157 for (i = 0; i < dev->num_nodes; i++)
+3 -2
drivers/gpu/drm/amd/amdkfd/kfd_process.c
··· 2127 2127 irq_drain_fence[3] = pdd->process->pasid; 2128 2128 2129 2129 /* 2130 - * For GFX 9.4.3, send the NodeId also in IH cookie DW[3] 2130 + * For GFX 9.4.3/9.5.0, send the NodeId also in IH cookie DW[3] 2131 2131 */ 2132 2132 if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3) || 2133 - KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4)) { 2133 + KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4) || 2134 + KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 5, 0)) { 2134 2135 node_id = ffs(pdd->dev->interrupt_bitmap) - 1; 2135 2136 irq_drain_fence[3] |= node_id << 16; 2136 2137 }
+8 -5
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
··· 131 131 if (!gws && pdd->qpd.num_gws == 0) 132 132 return -EINVAL; 133 133 134 - if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && 135 - KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) && 134 + if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && 135 + KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) && 136 + KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) && 136 137 !dev->kfd->shared_resources.enable_mes) { 137 138 if (gws) 138 139 ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, ··· 198 197 if (pqn->q->gws) { 199 198 if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && 200 199 KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) && 200 + KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 5, 0) && 201 201 !dev->kfd->shared_resources.enable_mes) 202 202 amdgpu_amdkfd_remove_gws_from_process( 203 203 pqm->process->kgd_process_info, pqn->q->gws); ··· 322 320 unsigned int max_queues = 127; /* HWS limit */ 323 321 324 322 /* 325 - * On GFX 9.4.3, increase the number of queues that 326 - * can be created to 255. No HWS limit on GFX 9.4.3. 323 + * On GFX 9.4.3/9.5.0, increase the number of queues that 324 + * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0. 327 325 */ 328 326 if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || 329 - KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) 327 + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) || 328 + KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) 330 329 max_queues = 255; 331 330 332 331 q = NULL;
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_topology.c
··· 1714 1714 pcache->cacheline_size = pcache_info[cache_type].cache_line_size; 1715 1715 1716 1716 if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3) || 1717 - KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4)) 1717 + KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4) || 1718 + KFD_GC_VERSION(knode) == IP_VERSION(9, 5, 0)) 1718 1719 mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); 1719 1720 else 1720 1721 mode = UNKNOWN_MEMORY_PARTITION_MODE;