Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Extend CU mask to 8 SEs (v3)

Following bitmap layout logic introduced by:
"drm/amdgpu: support get_cu_info for Arcturus".

v2: squash in fixup for gfx_v9_0.c (Alex)
v3: squash in debug print output fix

Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Jay Cornwall and committed by
Alex Deucher
5145d57e 857b82d0

+30 -12
+4
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
··· 3341 3341 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3342 3342 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3343 3343 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3344 + mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3345 + mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3346 + mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3347 + mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3344 3348 mqd->compute_misc_reserved = 0x00000003; 3345 3349 3346 3350 mqd->dynamic_cu_mask_addr_lo =
+5 -5
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
··· 98 98 uint32_t *se_mask) 99 99 { 100 100 struct kfd_cu_info cu_info; 101 - uint32_t cu_per_sh[4] = {0}; 102 - int i, se, cu = 0; 101 + uint32_t cu_per_se[KFD_MAX_NUM_SE] = {0}; 102 + int i, se, sh, cu = 0; 103 103 104 104 amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info); 105 105 ··· 107 107 cu_mask_count = cu_info.cu_active_number; 108 108 109 109 for (se = 0; se < cu_info.num_shader_engines; se++) 110 - for (i = 0; i < 4; i++) 111 - cu_per_sh[se] += hweight32(cu_info.cu_bitmap[se][i]); 110 + for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) 111 + cu_per_se[se] += hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]); 112 112 113 113 /* Symmetrically map cu_mask to all SEs: 114 114 * cu_mask[0] bit0 -> se_mask[0] bit0; ··· 128 128 se = 0; 129 129 cu++; 130 130 } 131 - } while (cu >= cu_per_sh[se] && cu < 32); 131 + } while (cu >= cu_per_se[se] && cu < 32); 132 132 } 133 133 }
+2
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
··· 26 26 27 27 #include "kfd_priv.h" 28 28 29 + #define KFD_MAX_NUM_SE 8 30 + 29 31 /** 30 32 * struct mqd_manager 31 33 *
+15 -3
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
··· 46 46 struct queue_properties *q) 47 47 { 48 48 struct v9_mqd *m; 49 - uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */ 49 + uint32_t se_mask[KFD_MAX_NUM_SE] = {0}; 50 50 51 51 if (q->cu_mask_count == 0) 52 52 return; ··· 59 59 m->compute_static_thread_mgmt_se1 = se_mask[1]; 60 60 m->compute_static_thread_mgmt_se2 = se_mask[2]; 61 61 m->compute_static_thread_mgmt_se3 = se_mask[3]; 62 + m->compute_static_thread_mgmt_se4 = se_mask[4]; 63 + m->compute_static_thread_mgmt_se5 = se_mask[5]; 64 + m->compute_static_thread_mgmt_se6 = se_mask[6]; 65 + m->compute_static_thread_mgmt_se7 = se_mask[7]; 62 66 63 - pr_debug("update cu mask to %#x %#x %#x %#x\n", 67 + pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n", 64 68 m->compute_static_thread_mgmt_se0, 65 69 m->compute_static_thread_mgmt_se1, 66 70 m->compute_static_thread_mgmt_se2, 67 - m->compute_static_thread_mgmt_se3); 71 + m->compute_static_thread_mgmt_se3, 72 + m->compute_static_thread_mgmt_se4, 73 + m->compute_static_thread_mgmt_se5, 74 + m->compute_static_thread_mgmt_se6, 75 + m->compute_static_thread_mgmt_se7); 68 76 } 69 77 70 78 static void set_priority(struct v9_mqd *m, struct queue_properties *q) ··· 133 125 m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; 134 126 m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; 135 127 m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; 128 + m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF; 129 + m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF; 130 + m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF; 131 + m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF; 136 132 137 133 m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 138 134 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+4 -4
drivers/gpu/drm/amd/include/v9_structs.h
··· 196 196 uint32_t compute_wave_restore_addr_lo; 197 197 uint32_t compute_wave_restore_addr_hi; 198 198 uint32_t compute_wave_restore_control; 199 - uint32_t reserved_39; 200 - uint32_t reserved_40; 201 - uint32_t reserved_41; 202 - uint32_t reserved_42; 199 + uint32_t compute_static_thread_mgmt_se4; 200 + uint32_t compute_static_thread_mgmt_se5; 201 + uint32_t compute_static_thread_mgmt_se6; 202 + uint32_t compute_static_thread_mgmt_se7; 203 203 uint32_t reserved_43; 204 204 uint32_t reserved_44; 205 205 uint32_t reserved_45;