Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Account for SH/SE count when setting up cu masks.

On systems with multiple SH per SE compute_static_thread_mgmt_se#
is split into independent masks, one for each SH, in the upper and
lower 16 bits. We need to detect this and apply cu masking to each
SH. The cu mask bits are assigned first to each SE, then to
alternate SHs, then finally to higher CU id. This ensures that
the maximum number of SPIs are engaged as early as possible while
balancing CU assignment to each SH.

v2: Use max SH/SE rather than max SH in cu_per_sh.

v3: Fix comment blocks, ensure se_mask is initially zero filled,
and correctly assign se.sh.cu positions to unset bits in cu_mask.

Signed-off-by: Sean Keely <Sean.Keely@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Sean Keely and committed by
Alex Deucher
1ec06c2d d035f84d

+62 -19
+61 -19
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
··· 98 98 uint32_t *se_mask) 99 99 { 100 100 struct kfd_cu_info cu_info; 101 - uint32_t cu_per_se[KFD_MAX_NUM_SE] = {0}; 102 - int i, se, sh, cu = 0; 103 - 101 + uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0}; 102 + int i, se, sh, cu; 104 103 amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info); 105 104 106 105 if (cu_mask_count > cu_info.cu_active_number) 107 106 cu_mask_count = cu_info.cu_active_number; 108 107 108 + /* Exceeding these bounds corrupts the stack and indicates a coding error. 109 + * Returning with no CU's enabled will hang the queue, which should be 110 + * attention grabbing. 111 + */ 112 + if (cu_info.num_shader_engines > KFD_MAX_NUM_SE) { 113 + pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n", cu_info.num_shader_engines); 114 + return; 115 + } 116 + if (cu_info.num_shader_arrays_per_engine > KFD_MAX_NUM_SH_PER_SE) { 117 + pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n", 118 + cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines); 119 + return; 120 + } 121 + /* Count active CUs per SH. 122 + * 123 + * Some CUs in an SH may be disabled. HW expects disabled CUs to be 124 + * represented in the high bits of each SH's enable mask (the upper and lower 125 + * 16 bits of se_mask) and will take care of the actual distribution of 126 + * disabled CUs within each SH automatically. 127 + * Each half of se_mask must be filled only on bits 0-cu_per_sh[se][sh]-1. 128 + * 129 + * See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info. 130 + */ 109 131 for (se = 0; se < cu_info.num_shader_engines; se++) 110 132 for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) 111 - cu_per_se[se] += hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]); 133 + cu_per_sh[se][sh] = hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]); 112 134 113 - /* Symmetrically map cu_mask to all SEs: 114 - * cu_mask[0] bit0 -> se_mask[0] bit0; 115 - * cu_mask[0] bit1 -> se_mask[1] bit0; 116 - * ... (if # SE is 4) 117 - * cu_mask[0] bit4 -> se_mask[0] bit1; 135 + /* Symmetrically map cu_mask to all SEs & SHs: 136 + * se_mask programs up to 2 SH in the upper and lower 16 bits. 137 + * 138 + * Examples 139 + * Assuming 1 SH/SE, 4 SEs: 140 + * cu_mask[0] bit0 -> se_mask[0] bit0 141 + * cu_mask[0] bit1 -> se_mask[1] bit0 118 142 * ... 143 + * cu_mask[0] bit4 -> se_mask[0] bit1 144 + * ... 145 + * 146 + * Assuming 2 SH/SE, 4 SEs 147 + * cu_mask[0] bit0 -> se_mask[0] bit0 (SE0,SH0,CU0) 148 + * cu_mask[0] bit1 -> se_mask[1] bit0 (SE1,SH0,CU0) 149 + * ... 150 + * cu_mask[0] bit4 -> se_mask[0] bit16 (SE0,SH1,CU0) 151 + * cu_mask[0] bit5 -> se_mask[1] bit16 (SE1,SH1,CU0) 152 + * ... 153 + * cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1) 154 + * ... 155 + * 156 + * First ensure all CUs are disabled, then enable user specified CUs. 119 157 */ 120 - se = 0; 121 - for (i = 0; i < cu_mask_count; i++) { 122 - if (cu_mask[i / 32] & (1 << (i % 32))) 123 - se_mask[se] |= 1 << cu; 158 + for (i = 0; i < cu_info.num_shader_engines; i++) 159 + se_mask[i] = 0; 124 160 125 - do { 126 - se++; 127 - if (se == cu_info.num_shader_engines) { 128 - se = 0; 129 - cu++; 161 + i = 0; 162 + for (cu = 0; cu < 16; cu++) { 163 + for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) { 164 + for (se = 0; se < cu_info.num_shader_engines; se++) { 165 + if (cu_per_sh[se][sh] > cu) { 166 + if (cu_mask[i / 32] & (1 << (i % 32))) 167 + se_mask[se] |= 1 << (cu + sh * 16); 168 + i++; 169 + if (i == cu_mask_count) 170 + return; 171 + } 130 172 } 131 - } while (cu >= cu_per_se[se] && cu < 32); 173 + } 132 174 } 133 175 }
+1
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
··· 27 27 #include "kfd_priv.h" 28 28 29 29 #define KFD_MAX_NUM_SE 8 30 + #define KFD_MAX_NUM_SH_PER_SE 2 30 31 31 32 /** 32 33 * struct mqd_manager