Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amd/pm: add send bad channel info function

support message SMU update bad channel info to update HBM bad channel
info in OOB table

Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Stanley.Yang and committed by
Alex Deucher
d510eccf 6e6faf7a

+77 -2
+12
drivers/gpu/drm/amd/pm/amdgpu_dpm.c
··· 507 507 return ret; 508 508 } 509 509 510 + int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t size) 511 + { 512 + struct smu_context *smu = adev->powerplay.pp_handle; 513 + int ret = 0; 514 + 515 + mutex_lock(&adev->pm.mutex); 516 + ret = smu_send_hbm_bad_channel_flag(smu, size); 517 + mutex_unlock(&adev->pm.mutex); 518 + 519 + return ret; 520 + } 521 + 510 522 int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev, 511 523 enum pp_clock_type type, 512 524 uint32_t *min,
+1
drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
··· 412 412 int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version); 413 413 int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool enable); 414 414 int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size); 415 + int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t size); 415 416 int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev, 416 417 enum pp_clock_type type, 417 418 uint32_t *min,
+10
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
··· 3052 3052 3053 3053 return ret; 3054 3054 } 3055 + 3056 + int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size) 3057 + { 3058 + int ret = 0; 3059 + 3060 + if (smu->ppt_funcs && smu->ppt_funcs->send_hbm_bad_channel_flag) 3061 + ret = smu->ppt_funcs->send_hbm_bad_channel_flag(smu, size); 3062 + 3063 + return ret; 3064 + }
+7
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
··· 1292 1292 * @set_config_table: Apply the input DriverSmuConfig table settings. 1293 1293 */ 1294 1294 int (*set_config_table)(struct smu_context *smu, struct config_table_setting *table); 1295 + 1296 + /** 1297 + * @sned_hbm_bad_channel_flag: message SMU to update bad channel info 1298 + * of SMUBUS table. 1299 + */ 1300 + int (*send_hbm_bad_channel_flag)(struct smu_context *smu, uint32_t size); 1295 1301 }; 1296 1302 1297 1303 typedef enum { ··· 1434 1428 int smu_stb_collect_info(struct smu_context *smu, void *buff, uint32_t size); 1435 1429 void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev); 1436 1430 int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size); 1431 + int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size); 1437 1432 #endif 1438 1433 #endif
+2 -1
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h
··· 103 103 #define PPSMC_MSG_GfxDriverResetRecovery 0x42 104 104 #define PPSMC_MSG_BoardPowerCalibration 0x43 105 105 #define PPSMC_MSG_HeavySBR 0x45 106 - #define PPSMC_Message_Count 0x46 106 + #define PPSMC_MSG_SetBadHBMPagesRetiredFlagsPerChannel 0x46 107 + #define PPSMC_Message_Count 0x47 107 108 108 109 109 110 //PPSMC Reset Types
+2 -1
drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
··· 232 232 __SMU_DUMMY_MAP(ForceGfxVid), \ 233 233 __SMU_DUMMY_MAP(Spare0), \ 234 234 __SMU_DUMMY_MAP(UnforceGfxVid), \ 235 - __SMU_DUMMY_MAP(HeavySBR), 235 + __SMU_DUMMY_MAP(HeavySBR), \ 236 + __SMU_DUMMY_MAP(SetBadHBMPagesRetiredFlagsPerChannel), 236 237 237 238 #undef __SMU_DUMMY_MAP 238 239 #define __SMU_DUMMY_MAP(type) SMU_MSG_##type
+43
drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
··· 82 82 */ 83 83 #define SUPPORT_ECCTABLE_SMU_VERSION 0x00442a00 84 84 85 + /* 86 + * SMU support BAD CHENNEL info MSG since version 68.51.00, 87 + * use this to check ECCTALE feature whether support 88 + */ 89 + #define SUPPORT_BAD_CHANNEL_INFO_MSG_VERSION 0x00443300 90 + 85 91 static const struct smu_temperature_range smu13_thermal_policy[] = 86 92 { 87 93 {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, ··· 146 140 MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0), 147 141 MSG_MAP(BoardPowerCalibration, PPSMC_MSG_BoardPowerCalibration, 0), 148 142 MSG_MAP(HeavySBR, PPSMC_MSG_HeavySBR, 0), 143 + MSG_MAP(SetBadHBMPagesRetiredFlagsPerChannel, PPSMC_MSG_SetBadHBMPagesRetiredFlagsPerChannel, 0), 149 144 }; 150 145 151 146 static const struct cmn2asic_mapping aldebaran_clk_map[SMU_CLK_COUNT] = { ··· 2004 1997 return ret; 2005 1998 } 2006 1999 2000 + static int aldebaran_check_bad_channel_info_support(struct smu_context *smu) 2001 + { 2002 + uint32_t if_version = 0xff, smu_version = 0xff; 2003 + int ret = 0; 2004 + 2005 + ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); 2006 + if (ret) { 2007 + /* return not support if failed get smu_version */ 2008 + ret = -EOPNOTSUPP; 2009 + } 2010 + 2011 + if (smu_version < SUPPORT_BAD_CHANNEL_INFO_MSG_VERSION) 2012 + ret = -EOPNOTSUPP; 2013 + 2014 + return ret; 2015 + } 2016 + 2017 + static int aldebaran_send_hbm_bad_channel_flag(struct smu_context *smu, 2018 + uint32_t size) 2019 + { 2020 + int ret = 0; 2021 + 2022 + ret = aldebaran_check_bad_channel_info_support(smu); 2023 + if (ret) 2024 + return ret; 2025 + 2026 + /* message SMU to update the bad channel info on SMUBUS */ 2027 + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetBadHBMPagesRetiredFlagsPerChannel, size, NULL); 2028 + if (ret) 2029 + dev_err(smu->adev->dev, "[%s] failed to message SMU to update HBM bad channel info\n", 2030 + __func__); 2031 + 2032 + return ret; 2033 + } 2034 + 2007 2035 static const struct pptable_funcs aldebaran_ppt_funcs = { 2008 2036 /* init dpm */ 2009 2037 .get_allowed_feature_mask = aldebaran_get_allowed_feature_mask, ··· 2104 2062 .i2c_fini = aldebaran_i2c_control_fini, 2105 2063 .send_hbm_bad_pages_num = aldebaran_smu_send_hbm_bad_page_num, 2106 2064 .get_ecc_info = aldebaran_get_ecc_info, 2065 + .send_hbm_bad_channel_flag = aldebaran_send_hbm_bad_channel_flag, 2107 2066 }; 2108 2067 2109 2068 void aldebaran_set_ppt_funcs(struct smu_context *smu)