Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'amd-drm-fixes-6.13-2024-12-11' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes

amd-drm-fixes-6.13-2024-12-11:

amdgpu:
- ISP hw init fix
- SR-IOV fixes
- Fix contiguous VRAM mapping for UVD on older GPUs
- Fix some regressions due to drm scheduler changes
- Workload profile fixes
- Cleaner shader fix

amdkfd:
- Fix DMA map direction for migration
- Fix a potential null pointer dereference
- Cacheline size fixes
- Runtime PM fix

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241211215449.741848-1-alexander.deucher@amd.com

+97 -53
+11 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
··· 1801 1801 if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) 1802 1802 return -EINVAL; 1803 1803 1804 + /* Make sure VRAM is allocated contigiously */ 1804 1805 (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 1805 - amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); 1806 - for (i = 0; i < (*bo)->placement.num_placement; i++) 1807 - (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; 1808 - r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); 1809 - if (r) 1810 - return r; 1806 + if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM && 1807 + !((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) { 1808 + 1809 + amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); 1810 + for (i = 0; i < (*bo)->placement.num_placement; i++) 1811 + (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; 1812 + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); 1813 + if (r) 1814 + return r; 1815 + } 1811 1816 1812 1817 return amdgpu_ttm_alloc_gart(&(*bo)->tbo); 1813 1818 }
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 145 145 "LAST", 146 146 }; 147 147 148 - #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM, 0) 148 + #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0) 149 149 /* 150 150 * Default init level where all blocks are expected to be initialized. This is 151 151 * the level of initialization expected by default and also after a full reset
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
··· 551 551 for (i = 0; i < abo->placement.num_placement; ++i) { 552 552 abo->placements[i].fpfn = 0 >> PAGE_SHIFT; 553 553 abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 554 + if (abo->placements[i].mem_type == TTM_PL_VRAM) 555 + abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; 554 556 } 555 557 } 556 558
+7 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
··· 674 674 pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && 675 675 ring->funcs->emit_wreg; 676 676 677 - if (adev->gfx.enable_cleaner_shader && 678 - ring->funcs->emit_cleaner_shader && 679 - job->enforce_isolation) 680 - ring->funcs->emit_cleaner_shader(ring); 681 - 682 - if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) 677 + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && 678 + !(job->enforce_isolation && !job->vmid)) 683 679 return 0; 684 680 685 681 amdgpu_ring_ib_begin(ring); ··· 685 689 686 690 if (need_pipe_sync) 687 691 amdgpu_ring_emit_pipeline_sync(ring); 692 + 693 + if (adev->gfx.enable_cleaner_shader && 694 + ring->funcs->emit_cleaner_shader && 695 + job->enforce_isolation) 696 + ring->funcs->emit_cleaner_shader(ring); 688 697 689 698 if (vm_flush_needed) { 690 699 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
+8 -2
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
··· 45 45 MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin"); 46 46 MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); 47 47 MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); 48 + MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin"); 49 + MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin"); 48 50 49 51 #define GFX9_MEC_HPD_SIZE 4096 50 52 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L ··· 576 574 { 577 575 int err; 578 576 579 - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 580 - "amdgpu/%s_mec.bin", chip_name); 577 + if (amdgpu_sriov_vf(adev)) 578 + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 579 + "amdgpu/%s_sjt_mec.bin", chip_name); 580 + else 581 + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 582 + "amdgpu/%s_mec.bin", chip_name); 581 583 if (err) 582 584 goto out; 583 585 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+1 -1
drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
··· 1288 1288 struct amdgpu_job *job, 1289 1289 struct amdgpu_ib *ib) 1290 1290 { 1291 - struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); 1291 + struct amdgpu_ring *ring = amdgpu_job_ring(job); 1292 1292 unsigned i; 1293 1293 1294 1294 /* No patching necessary for the first instance */
+21 -3
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
··· 1423 1423 1424 1424 1425 1425 static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, 1426 + bool cache_line_size_missing, 1426 1427 struct kfd_gpu_cache_info *pcache_info) 1427 1428 { 1428 1429 struct amdgpu_device *adev = kdev->adev; ··· 1438 1437 CRAT_CACHE_FLAGS_SIMD_CACHE); 1439 1438 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; 1440 1439 pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size; 1440 + if (cache_line_size_missing && !pcache_info[i].cache_line_size) 1441 + pcache_info[i].cache_line_size = 128; 1441 1442 i++; 1442 1443 } 1443 1444 /* Scalar L1 Instruction Cache per SQC */ ··· 1452 1449 CRAT_CACHE_FLAGS_SIMD_CACHE); 1453 1450 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; 1454 1451 pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size; 1452 + if (cache_line_size_missing && !pcache_info[i].cache_line_size) 1453 + pcache_info[i].cache_line_size = 128; 1455 1454 i++; 1456 1455 } 1457 1456 /* Scalar L1 Data Cache per SQC */ ··· 1465 1460 CRAT_CACHE_FLAGS_SIMD_CACHE); 1466 1461 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; 1467 1462 pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size; 1463 + if (cache_line_size_missing && !pcache_info[i].cache_line_size) 1464 + pcache_info[i].cache_line_size = 64; 1468 1465 i++; 1469 1466 } 1470 1467 /* GL1 Data Cache per SA */ ··· 1479 1472 CRAT_CACHE_FLAGS_DATA_CACHE | 1480 1473 CRAT_CACHE_FLAGS_SIMD_CACHE); 1481 1474 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 1482 - pcache_info[i].cache_line_size = 0; 1475 + if (cache_line_size_missing) 1476 + pcache_info[i].cache_line_size = 128; 1483 1477 i++; 1484 1478 } 1485 1479 /* L2 Data Cache per GPU (Total Tex Cache) */ ··· 1492 1484 CRAT_CACHE_FLAGS_SIMD_CACHE); 1493 1485 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 1494 1486 pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size; 1487 + if (cache_line_size_missing && !pcache_info[i].cache_line_size) 1488 + pcache_info[i].cache_line_size = 128; 1495 1489 i++; 1496 1490 } 1497 1491 /* L3 Data Cache per GPU */ ··· 1504 1494 CRAT_CACHE_FLAGS_DATA_CACHE | 1505 1495 CRAT_CACHE_FLAGS_SIMD_CACHE); 1506 1496 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 1507 - pcache_info[i].cache_line_size = 0; 1497 + pcache_info[i].cache_line_size = 64; 1508 1498 i++; 1509 1499 } 1510 1500 return i; ··· 1579 1569 int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info) 1580 1570 { 1581 1571 int num_of_cache_types = 0; 1572 + bool cache_line_size_missing = false; 1582 1573 1583 1574 switch (kdev->adev->asic_type) { 1584 1575 case CHIP_KAVERI: ··· 1703 1692 case IP_VERSION(11, 5, 0): 1704 1693 case IP_VERSION(11, 5, 1): 1705 1694 case IP_VERSION(11, 5, 2): 1695 + /* Cacheline size not available in IP discovery for gc11. 1696 + * kfd_fill_gpu_cache_info_from_gfx_config to hard code it 1697 + */ 1698 + cache_line_size_missing = true; 1699 + fallthrough; 1706 1700 case IP_VERSION(12, 0, 0): 1707 1701 case IP_VERSION(12, 0, 1): 1708 1702 num_of_cache_types = 1709 - kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); 1703 + kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, 1704 + cache_line_size_missing, 1705 + *pcache_info); 1710 1706 break; 1711 1707 default: 1712 1708 *pcache_info = dummy_cache_info;
+15
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 207 207 if (!down_read_trylock(&adev->reset_domain->sem)) 208 208 return -EIO; 209 209 210 + if (!pdd->proc_ctx_cpu_ptr) { 211 + r = amdgpu_amdkfd_alloc_gtt_mem(adev, 212 + AMDGPU_MES_PROC_CTX_SIZE, 213 + &pdd->proc_ctx_bo, 214 + &pdd->proc_ctx_gpu_addr, 215 + &pdd->proc_ctx_cpu_ptr, 216 + false); 217 + if (r) { 218 + dev_err(adev->dev, 219 + "failed to allocate process context bo\n"); 220 + return r; 221 + } 222 + memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); 223 + } 224 + 210 225 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 211 226 queue_input.process_id = qpd->pqm->process->pasid; 212 227 queue_input.page_table_base_addr = qpd->page_table_base;
+2 -2
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
··· 306 306 spage = migrate_pfn_to_page(migrate->src[i]); 307 307 if (spage && !is_zone_device_page(spage)) { 308 308 src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, 309 - DMA_TO_DEVICE); 309 + DMA_BIDIRECTIONAL); 310 310 r = dma_mapping_error(dev, src[i]); 311 311 if (r) { 312 312 dev_err(dev, "%s: fail %d dma_map_page\n", ··· 629 629 goto out_oom; 630 630 } 631 631 632 - dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); 632 + dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); 633 633 r = dma_mapping_error(dev, dst[i]); 634 634 if (r) { 635 635 dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
+2 -21
drivers/gpu/drm/amd/amdkfd/kfd_process.c
··· 1076 1076 1077 1077 kfd_free_process_doorbells(pdd->dev->kfd, pdd); 1078 1078 1079 - if (pdd->dev->kfd->shared_resources.enable_mes) 1079 + if (pdd->dev->kfd->shared_resources.enable_mes && 1080 + pdd->proc_ctx_cpu_ptr) 1080 1081 amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, 1081 1082 &pdd->proc_ctx_bo); 1082 1083 /* ··· 1609 1608 struct kfd_process *p) 1610 1609 { 1611 1610 struct kfd_process_device *pdd = NULL; 1612 - int retval = 0; 1613 1611 1614 1612 if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE)) 1615 1613 return NULL; ··· 1632 1632 pdd->user_gpu_id = dev->id; 1633 1633 atomic64_set(&pdd->evict_duration_counter, 0); 1634 1634 1635 - if (dev->kfd->shared_resources.enable_mes) { 1636 - retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, 1637 - AMDGPU_MES_PROC_CTX_SIZE, 1638 - &pdd->proc_ctx_bo, 1639 - &pdd->proc_ctx_gpu_addr, 1640 - &pdd->proc_ctx_cpu_ptr, 1641 - false); 1642 - if (retval) { 1643 - dev_err(dev->adev->dev, 1644 - "failed to allocate process context bo\n"); 1645 - goto err_free_pdd; 1646 - } 1647 - memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); 1648 - } 1649 - 1650 1635 p->pdds[p->n_pdds++] = pdd; 1651 1636 if (kfd_dbg_is_per_vmid_supported(pdd->dev)) 1652 1637 pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap( ··· 1643 1658 idr_init(&pdd->alloc_idr); 1644 1659 1645 1660 return pdd; 1646 - 1647 - err_free_pdd: 1648 - kfree(pdd); 1649 - return NULL; 1650 1661 } 1651 1662 1652 1663 /**
+8 -4
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
··· 212 212 void pqm_uninit(struct process_queue_manager *pqm) 213 213 { 214 214 struct process_queue_node *pqn, *next; 215 - struct kfd_process_device *pdd; 216 215 217 216 list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { 218 217 if (pqn->q) { 219 - pdd = kfd_get_process_device_data(pqn->q->device, pqm->process); 220 - kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); 221 - kfd_queue_release_buffers(pdd, &pqn->q->properties); 218 + struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device, 219 + pqm->process); 220 + if (pdd) { 221 + kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); 222 + kfd_queue_release_buffers(pdd, &pqn->q->properties); 223 + } else { 224 + WARN_ON(!pdd); 225 + } 222 226 pqm_clean_queue_resource(pqm, pqn); 223 227 } 224 228
+1
drivers/gpu/drm/amd/include/kgd_pp_interface.h
··· 164 164 }; 165 165 166 166 enum PP_SMC_POWER_PROFILE { 167 + PP_SMC_POWER_PROFILE_UNKNOWN = -1, 167 168 PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT = 0x0, 168 169 PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x1, 169 170 PP_SMC_POWER_PROFILE_POWERSAVING = 0x2,
+17 -7
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
··· 764 764 smu->smu_baco.platform_support = false; 765 765 smu->smu_baco.maco_support = false; 766 766 smu->user_dpm_profile.fan_mode = -1; 767 + smu->power_profile_mode = PP_SMC_POWER_PROFILE_UNKNOWN; 767 768 768 769 mutex_init(&smu->message_lock); 769 770 ··· 1249 1248 return smu->workload_map && smu->workload_map[profile].valid_mapping; 1250 1249 } 1251 1250 1251 + static void smu_init_power_profile(struct smu_context *smu) 1252 + { 1253 + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_UNKNOWN) { 1254 + if (smu->is_apu || 1255 + !smu_is_workload_profile_available( 1256 + smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) 1257 + smu->power_profile_mode = 1258 + PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; 1259 + else 1260 + smu->power_profile_mode = 1261 + PP_SMC_POWER_PROFILE_FULLSCREEN3D; 1262 + } 1263 + smu_power_profile_mode_get(smu, smu->power_profile_mode); 1264 + } 1265 + 1252 1266 static int smu_sw_init(struct amdgpu_ip_block *ip_block) 1253 1267 { 1254 1268 struct amdgpu_device *adev = ip_block->adev; ··· 1285 1269 atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); 1286 1270 atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); 1287 1271 1288 - if (smu->is_apu || 1289 - !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) 1290 - smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; 1291 - else 1292 - smu->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; 1293 - smu_power_profile_mode_get(smu, smu->power_profile_mode); 1294 - 1272 + smu_init_power_profile(smu); 1295 1273 smu->display_config = &adev->pm.pm_display_cfg; 1296 1274 1297 1275 smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
+1
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
··· 2810 2810 smu->workload_map = smu_v13_0_7_workload_map; 2811 2811 smu->smc_driver_if_version = SMU13_0_7_DRIVER_IF_VERSION; 2812 2812 smu_v13_0_set_smu_mailbox_registers(smu); 2813 + smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; 2813 2814 }