Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: refine gprs init shaders to check coverage

Add codes to check whether all SIMDs are covered, make sure that all
GPRs are initialized.

Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Dennis Li and committed by
Alex Deucher
6effe779 3bffd71d

+382 -5
+6 -3
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
··· 4559 4559 if (!ring->sched.ready) 4560 4560 return 0; 4561 4561 4562 - if (adev->asic_type == CHIP_ARCTURUS || 4563 - adev->asic_type == CHIP_ALDEBARAN) { 4562 + if (adev->asic_type == CHIP_ARCTURUS) { 4564 4563 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; 4565 4564 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); 4566 4565 vgpr_init_regs_ptr = vgpr_init_regs_arcturus; ··· 4744 4745 } 4745 4746 4746 4747 /* requires IBs so do in late init after IB pool is initialized */ 4747 - r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4748 + if (adev->asic_type == CHIP_ALDEBARAN) 4749 + r = gfx_v9_4_2_do_edc_gpr_workarounds(adev); 4750 + else 4751 + r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4752 + 4748 4753 if (r) 4749 4754 return r; 4750 4755
+375 -2
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
··· 22 22 */ 23 23 #include "amdgpu.h" 24 24 #include "soc15.h" 25 + #include "soc15d.h" 25 26 26 27 #include "gc/gc_9_4_2_offset.h" 27 28 #include "gc/gc_9_4_2_sh_mask.h" ··· 79 78 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_UTCL1_CNTL1, 0xffffffff, 0x30800400), 80 79 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20), 81 80 }; 81 + 82 + static const u32 vgpr_init_compute_shader_aldebaran[] = { 83 + 0xb8840904, 0xb8851a04, 0xb8861344, 0x9207c006, 0x92088405, 0x81070807, 84 + 0x81070407, 0x8e078207, 0xbe88008f, 0xc0410200, 0x00000007, 0xd3d94000, 85 + 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 0xd3d94003, 86 + 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 0xd3d94006, 87 + 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 0xd3d94009, 88 + 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 0xd3d9400c, 89 + 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 0xd3d9400f, 90 + 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 0xd3d94012, 91 + 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 0xd3d94015, 92 + 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 0xd3d94018, 93 + 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 0xd3d9401b, 94 + 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 0xd3d9401e, 95 + 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 0xd3d94021, 96 + 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 0xd3d94024, 97 + 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 0xd3d94027, 98 + 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 0xd3d9402a, 99 + 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 0xd3d9402d, 100 + 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 0xd3d94030, 101 + 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 0xd3d94033, 102 + 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 0xd3d94036, 103 + 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 0xd3d94039, 104 + 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 0xd3d9403c, 105 + 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 0xd3d9403f, 106 + 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 0xd3d94042, 107 + 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 0xd3d94045, 108 + 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 0xd3d94048, 109 + 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 0xd3d9404b, 110 + 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 0xd3d9404e, 111 + 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 0xd3d94051, 112 + 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 0xd3d94054, 113 + 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 0xd3d94057, 114 + 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 0xd3d9405a, 115 + 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 0xd3d9405d, 116 + 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 0xd3d94060, 117 + 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 0xd3d94063, 118 + 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 0xd3d94066, 119 + 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 0xd3d94069, 120 + 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 0xd3d9406c, 121 + 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 0xd3d9406f, 122 + 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 0xd3d94072, 123 + 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 0xd3d94075, 124 + 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 0xd3d94078, 125 + 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 0xd3d9407b, 126 + 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 0xd3d9407e, 127 + 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 0xd3d94081, 128 + 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 0xd3d94084, 129 + 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 0xd3d94087, 130 + 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 0xd3d9408a, 131 + 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 0xd3d9408d, 132 + 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 0xd3d94090, 133 + 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 0xd3d94093, 134 + 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 0xd3d94096, 135 + 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 0xd3d94099, 136 + 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 0xd3d9409c, 137 + 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 0xd3d9409f, 138 + 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 0xd3d940a2, 139 + 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 0xd3d940a5, 140 + 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 0xd3d940a8, 141 + 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 0xd3d940ab, 142 + 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 0xd3d940ae, 143 + 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 0xd3d940b1, 144 + 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 0xd3d940b4, 145 + 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 0xd3d940b7, 146 + 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 0xd3d940ba, 147 + 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 0xd3d940bd, 148 + 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 0xd3d940c0, 149 + 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 0xd3d940c3, 150 + 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 0xd3d940c6, 151 + 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 0xd3d940c9, 152 + 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 0xd3d940cc, 153 + 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 0xd3d940cf, 154 + 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 0xd3d940d2, 155 + 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 0xd3d940d5, 156 + 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 0xd3d940d8, 157 + 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 0xd3d940db, 158 + 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 0xd3d940de, 159 + 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 0xd3d940e1, 160 + 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 0xd3d940e4, 161 + 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 0xd3d940e7, 162 + 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 0xd3d940ea, 163 + 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 0xd3d940ed, 164 + 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 0xd3d940f0, 165 + 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 0xd3d940f3, 166 + 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 0xd3d940f6, 167 + 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 0xd3d940f9, 168 + 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 0xd3d940fc, 169 + 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 0xd3d940ff, 170 + 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 0x7e000280, 171 + 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 0x7e0c0280, 172 + 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 0xd28c0001, 173 + 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904, 0xb78b4000, 174 + 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 0x00020201, 175 + 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 0xbf84fff8, 176 + 0xbf810000, 177 + }; 178 + 179 + const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = { 180 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 181 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, 182 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 4 }, 183 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, 184 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0xbf }, 185 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x400004 }, /* 64KB LDS */ 186 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */ 187 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 188 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 189 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 190 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 191 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 192 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 193 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 194 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 195 + }; 196 + 197 + static const u32 sgpr_init_compute_shader_aldebaran[] = { 198 + 0xb8840904, 0xb8851a04, 0xb8861344, 0x9207c006, 0x92088405, 0x81070807, 199 + 0x81070407, 0x8e078207, 0xbefc0006, 0xbf800000, 0xbf900001, 0xbe88008f, 200 + 0xc0410200, 0x00000007, 0xb07c0000, 0xbe8000ff, 0x0000005f, 0xbee50080, 201 + 0xbe812c65, 0xbe822c65, 0xbe832c65, 0xbe842c65, 0xbe852c65, 0xb77c0005, 202 + 0x80808500, 0xbf84fff8, 0xbe800080, 0xbf810000, 203 + }; 204 + 205 + static const struct soc15_reg_entry sgpr1_init_regs_aldebaran[] = { 206 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 207 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, 208 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 }, 209 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, 210 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS): SGPRS[9:6] VGPRS[5:0] */ 211 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x4 }, /* USER_SGPR[5:1]*/ 212 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */ 213 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 214 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 215 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 216 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 217 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, 218 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, 219 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, 220 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, 221 + }; 222 + 223 + static const struct soc15_reg_entry sgpr2_init_regs_aldebaran[] = { 224 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 225 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, 226 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 }, 227 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, 228 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 229 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x4 }, /* USER_SGPR[5:1]*/ 230 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */ 231 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 232 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 233 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 234 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 235 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, 236 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, 237 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, 238 + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, 239 + }; 240 + 241 + static int gfx_v9_4_2_check_gprs_init_coverage(struct amdgpu_device *adev, 242 + uint32_t *wb) 243 + { 244 + uint32_t se_id, cu_id, simd_id; 245 + uint32_t simd_cnt = 0; 246 + uint32_t se_offset, cu_offset, data; 247 + 248 + for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { 249 + se_offset = se_id * 16 * 4; 250 + for (cu_id = 0; cu_id < 16; cu_id++) { 251 + cu_offset = cu_id * 4; 252 + for (simd_id = 0; simd_id < 4; simd_id++) { 253 + data = wb[se_offset + cu_offset + simd_id]; 254 + if (data == 0xF) 255 + simd_cnt++; 256 + } 257 + } 258 + } 259 + 260 + if (adev->gfx.cu_info.number * 4 == simd_cnt) 261 + return 0; 262 + 263 + dev_warn(adev->dev, "SIMD Count: %d, %d\n", 264 + adev->gfx.cu_info.number * 4, simd_cnt); 265 + 266 + for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { 267 + se_offset = se_id * 16 * 4; 268 + for (cu_id = 0; cu_id < 16; cu_id++) { 269 + cu_offset = cu_id * 4; 270 + for (simd_id = 0; simd_id < 4; simd_id++) { 271 + data = wb[se_offset + cu_offset + simd_id]; 272 + if (data != 0xF) 273 + dev_warn(adev->dev, "SE[%d]CU[%d]SIMD[%d]: isn't inited\n", 274 + se_id, cu_id, simd_id); 275 + } 276 + } 277 + } 278 + 279 + return -EFAULT; 280 + } 281 + 282 + static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev, 283 + const uint32_t *shader_ptr, uint32_t shader_size, 284 + const struct soc15_reg_entry *init_regs, uint32_t regs_size, 285 + uint32_t compute_dim_x, u64 wb_gpu_addr) 286 + { 287 + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 288 + struct amdgpu_ib ib; 289 + struct dma_fence *f = NULL; 290 + int r, i; 291 + uint32_t total_size, shader_offset; 292 + u64 gpu_addr; 293 + 294 + total_size = (regs_size * 3 + 4 + 4 + 5 + 2) * 4; 295 + total_size = ALIGN(total_size, 256); 296 + shader_offset = total_size; 297 + total_size += ALIGN(shader_size, 256); 298 + 299 + /* allocate an indirect buffer to put the commands in */ 300 + memset(&ib, 0, sizeof(ib)); 301 + r = amdgpu_ib_get(adev, NULL, total_size, 302 + AMDGPU_IB_POOL_DIRECT, &ib); 303 + if (r) { 304 + DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 305 + return r; 306 + } 307 + 308 + /* load the compute shaders */ 309 + for (i = 0; i < shader_size/sizeof(u32); i++) 310 + ib.ptr[i + (shader_offset / 4)] = shader_ptr[i]; 311 + 312 + /* init the ib length to 0 */ 313 + ib.length_dw = 0; 314 + 315 + /* write the register state for the compute dispatch */ 316 + for (i = 0; i < regs_size; i++) { 317 + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 318 + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(init_regs[i]) 319 + - PACKET3_SET_SH_REG_START; 320 + ib.ptr[ib.length_dw++] = init_regs[i].reg_value; 321 + } 322 + 323 + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 324 + gpu_addr = (ib.gpu_addr + (u64)shader_offset) >> 8; 325 + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 326 + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO) 327 + - PACKET3_SET_SH_REG_START; 328 + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 329 + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 330 + 331 + /* write the wb buffer address */ 332 + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 333 + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0) 334 + - PACKET3_SET_SH_REG_START; 335 + ib.ptr[ib.length_dw++] = lower_32_bits(wb_gpu_addr); 336 + ib.ptr[ib.length_dw++] = upper_32_bits(wb_gpu_addr); 337 + 338 + /* write dispatch packet */ 339 + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 340 + ib.ptr[ib.length_dw++] = compute_dim_x; /* x */ 341 + ib.ptr[ib.length_dw++] = 1; /* y */ 342 + ib.ptr[ib.length_dw++] = 1; /* z */ 343 + ib.ptr[ib.length_dw++] = 344 + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 345 + 346 + /* write CS partial flush packet */ 347 + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 348 + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 349 + 350 + /* shedule the ib on the ring */ 351 + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 352 + if (r) { 353 + DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 354 + goto fail; 355 + } 356 + 357 + /* wait for the GPU to finish processing the IB */ 358 + r = dma_fence_wait(f, false); 359 + if (r) { 360 + DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 361 + goto fail; 362 + } 363 + fail: 364 + amdgpu_ib_free(adev, &ib, NULL); 365 + dma_fence_put(f); 366 + 367 + return r; 368 + } 369 + 370 + int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev) 371 + { 372 + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 373 + int r; 374 + int compute_dim_x = adev->gfx.config.max_shader_engines * 375 + adev->gfx.config.max_cu_per_sh * 376 + adev->gfx.config.max_sh_per_se; 377 + int sgpr_work_group_size = 5; 378 + /* CU_ID: 0~15, SIMD_ID: 0~3 */ 379 + int wb_size = adev->gfx.config.max_shader_engines * 16 * 4; 380 + struct amdgpu_ib ib; 381 + 382 + /* only support when RAS is enabled */ 383 + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 384 + return 0; 385 + 386 + /* bail if the compute ring is not ready */ 387 + if (!ring->sched.ready) 388 + return 0; 389 + 390 + /* allocate an indirect buffer to put the commands in */ 391 + memset(&ib, 0, sizeof(ib)); 392 + r = amdgpu_ib_get(adev, NULL, wb_size * sizeof(uint32_t), 393 + AMDGPU_IB_POOL_DIRECT, &ib); 394 + if (r) { 395 + DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 396 + return r; 397 + } 398 + 399 + memset(ib.ptr, 0, wb_size * sizeof(uint32_t)); 400 + r = gfx_v9_4_2_run_shader(adev, vgpr_init_compute_shader_aldebaran, 401 + sizeof(vgpr_init_compute_shader_aldebaran), 402 + vgpr_init_regs_aldebaran, 403 + ARRAY_SIZE(vgpr_init_regs_aldebaran), 404 + compute_dim_x * 2, ib.gpu_addr); 405 + if (r) { 406 + dev_err(adev->dev, "Init VGPRS: failed to run shader\n"); 407 + goto failed; 408 + } 409 + 410 + r = gfx_v9_4_2_check_gprs_init_coverage(adev, ib.ptr); 411 + if (r) { 412 + dev_err(adev->dev, "Init VGPRS: failed to cover all SIMDs\n"); 413 + goto failed; 414 + } else { 415 + dev_info(adev->dev, "Init VGPRS Successfully\n"); 416 + } 417 + 418 + memset(ib.ptr, 0, wb_size * sizeof(uint32_t)); 419 + r = gfx_v9_4_2_run_shader(adev, sgpr_init_compute_shader_aldebaran, 420 + sizeof(sgpr_init_compute_shader_aldebaran), 421 + sgpr1_init_regs_aldebaran, 422 + ARRAY_SIZE(sgpr1_init_regs_aldebaran), 423 + compute_dim_x / 2 * sgpr_work_group_size, 424 + ib.gpu_addr); 425 + if (r) { 426 + dev_err(adev->dev, "Init SGPRS Part1: failed to run shader\n"); 427 + goto failed; 428 + } 429 + 430 + r = gfx_v9_4_2_run_shader(adev, sgpr_init_compute_shader_aldebaran, 431 + sizeof(sgpr_init_compute_shader_aldebaran), 432 + sgpr2_init_regs_aldebaran, 433 + ARRAY_SIZE(sgpr2_init_regs_aldebaran), 434 + compute_dim_x / 2 * sgpr_work_group_size, 435 + ib.gpu_addr); 436 + if (r) { 437 + dev_err(adev->dev, "Init SGPRS Part2: failed to run shader\n"); 438 + goto failed; 439 + } 440 + 441 + r = gfx_v9_4_2_check_gprs_init_coverage(adev, ib.ptr); 442 + if (r) 443 + dev_err(adev->dev, 444 + "Init SGPRS: failed to cover all SIMDs\n"); 445 + else 446 + dev_info(adev->dev, "Init SGPRS Successfully\n"); 447 + 448 + failed: 449 + amdgpu_ib_free(adev, &ib, NULL); 450 + return r; 451 + } 82 452 83 453 static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev); 84 454 static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev); ··· 1180 808 REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) }, 1181 809 }; 1182 810 1183 - static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = 1184 - { SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 }; 811 + static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = { 812 + SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 813 + }; 1185 814 1186 815 static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev, 1187 816 const struct soc15_reg_entry *reg,
+1
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
··· 29 29 void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev, 30 30 uint32_t die_id); 31 31 void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev); 32 + int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev); 32 33 33 34 extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs; 34 35