Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: add sdma ip block for sienna_cichlid (v5)

Sienna_Cichlid have 4 sdma controllers.

v2: add missing license to sdma_common.h (Alex)
v3: rebase (Alex)
v4: squash in policy fix (Alex)
v4: squash in fw_name fix

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Likun Gao and committed by
Alex Deucher
157e72e8 06ff634c

+1803 -17
+2 -1
drivers/gpu/drm/amd/amdgpu/Makefile
··· 129 129 sdma_v2_4.o \ 130 130 sdma_v3_0.o \ 131 131 sdma_v4_0.o \ 132 - sdma_v5_0.o 132 + sdma_v5_0.o \ 133 + sdma_v5_2.o 133 134 134 135 # add MES block 135 136 amdgpu-y += \
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
··· 180 180 /* SDMA:256~335*/ 181 181 AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 = 0x100, 182 182 AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1 = 0x10A, 183 + AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE2 = 0x114, 184 + AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE3 = 0x11E, 183 185 /* IH: 376~391 */ 184 186 AMDGPU_NAVI10_DOORBELL_IH = 0x178, 185 187 /* MMSCH: 392~407
+7 -1
drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
··· 33 33 #define smnCPM_CONTROL 0x11180460 34 34 #define smnPCIE_CNTL2 0x11180070 35 35 36 + #define mmBIF_SDMA2_DOORBELL_RANGE 0x01d6 37 + #define mmBIF_SDMA2_DOORBELL_RANGE_BASE_IDX 2 38 + #define mmBIF_SDMA3_DOORBELL_RANGE 0x01d7 39 + #define mmBIF_SDMA3_DOORBELL_RANGE_BASE_IDX 2 36 40 37 41 static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev) 38 42 { ··· 85 81 int doorbell_size) 86 82 { 87 83 u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : 88 - SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); 84 + instance == 1 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE) : 85 + instance == 2 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA2_DOORBELL_RANGE) : 86 + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA3_DOORBELL_RANGE); 89 87 90 88 u32 doorbell_range = RREG32(reg); 91 89
+4
drivers/gpu/drm/amd/amdgpu/nv.c
··· 53 53 #include "navi10_ih.h" 54 54 #include "gfx_v10_0.h" 55 55 #include "sdma_v5_0.h" 56 + #include "sdma_v5_2.h" 56 57 #include "vcn_v2_0.h" 57 58 #include "jpeg_v2_0.h" 58 59 #include "dce_virtual.h" ··· 489 488 amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); 490 489 amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); 491 490 amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); 491 + amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); 492 492 break; 493 493 default: 494 494 return -EINVAL; ··· 568 566 adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1; 569 567 adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0; 570 568 adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1; 569 + adev->doorbell_index.sdma_engine[2] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE2; 570 + adev->doorbell_index.sdma_engine[3] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE3; 571 571 adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH; 572 572 adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1; 573 573 adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3;
+42
drivers/gpu/drm/amd/amdgpu/sdma_common.h
··· 1 + /* 2 + * Copyright 2019 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #ifndef __SDMA_COMMON_H__ 25 + #define __SDMA_COMMON_H__ 26 + 27 + enum sdma_utcl2_cache_read_policy { 28 + CACHE_READ_POLICY_L2__LRU = 0x00000000, 29 + CACHE_READ_POLICY_L2__STREAM = 0x00000001, 30 + CACHE_READ_POLICY_L2__NOA = 0x00000002, 31 + CACHE_READ_POLICY_L2__DEFAULT = CACHE_READ_POLICY_L2__NOA, 32 + }; 33 + 34 + enum sdma_utcl2_cache_write_policy { 35 + CACHE_WRITE_POLICY_L2__LRU = 0x00000000, 36 + CACHE_WRITE_POLICY_L2__STREAM = 0x00000001, 37 + CACHE_WRITE_POLICY_L2__NOA = 0x00000002, 38 + CACHE_WRITE_POLICY_L2__BYPASS = 0x00000003, 39 + CACHE_WRITE_POLICY_L2__DEFAULT = CACHE_WRITE_POLICY_L2__BYPASS, 40 + }; 41 + 42 + #endif /* __SDMA_COMMON_H__ */
+1
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
··· 40 40 #include "soc15.h" 41 41 #include "navi10_sdma_pkt_open.h" 42 42 #include "nbio_v2_3.h" 43 + #include "sdma_common.h" 43 44 #include "sdma_v5_0.h" 44 45 45 46 MODULE_FIRMWARE("amdgpu/navi10_sdma.bin");
-15
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
··· 24 24 #ifndef __SDMA_V5_0_H__ 25 25 #define __SDMA_V5_0_H__ 26 26 27 - enum sdma_v5_0_utcl2_cache_read_policy { 28 - CACHE_READ_POLICY_L2__LRU = 0x00000000, 29 - CACHE_READ_POLICY_L2__STREAM = 0x00000001, 30 - CACHE_READ_POLICY_L2__NOA = 0x00000002, 31 - CACHE_READ_POLICY_L2__DEFAULT = CACHE_READ_POLICY_L2__NOA, 32 - }; 33 - 34 - enum sdma_v5_0_utcl2_cache_write_policy { 35 - CACHE_WRITE_POLICY_L2__LRU = 0x00000000, 36 - CACHE_WRITE_POLICY_L2__STREAM = 0x00000001, 37 - CACHE_WRITE_POLICY_L2__NOA = 0x00000002, 38 - CACHE_WRITE_POLICY_L2__BYPASS = 0x00000003, 39 - CACHE_WRITE_POLICY_L2__DEFAULT = CACHE_WRITE_POLICY_L2__BYPASS, 40 - }; 41 - 42 27 extern const struct amd_ip_funcs sdma_v5_0_ip_funcs; 43 28 extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block; 44 29
+1715
drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
··· 1 + /* 2 + * Copyright 2019 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #include <linux/delay.h> 25 + #include <linux/firmware.h> 26 + #include <linux/module.h> 27 + #include <linux/pci.h> 28 + 29 + #include "amdgpu.h" 30 + #include "amdgpu_ucode.h" 31 + #include "amdgpu_trace.h" 32 + 33 + #include "gc/gc_10_3_0_offset.h" 34 + #include "gc/gc_10_3_0_sh_mask.h" 35 + #include "ivsrcid/sdma0/irqsrcs_sdma0_5_0.h" 36 + #include "ivsrcid/sdma1/irqsrcs_sdma1_5_0.h" 37 + #include "ivsrcid/sdma2/irqsrcs_sdma2_5_0.h" 38 + #include "ivsrcid/sdma3/irqsrcs_sdma3_5_0.h" 39 + 40 + #include "soc15_common.h" 41 + #include "soc15.h" 42 + #include "navi10_sdma_pkt_open.h" 43 + #include "nbio_v2_3.h" 44 + #include "sdma_common.h" 45 + #include "sdma_v5_2.h" 46 + 47 + MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin"); 48 + MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma1.bin"); 49 + MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma2.bin"); 50 + MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma3.bin"); 51 + 52 + #define SDMA1_REG_OFFSET 0x600 53 + #define SDMA3_REG_OFFSET 0x400 54 + #define SDMA0_HYP_DEC_REG_START 0x5880 55 + #define SDMA0_HYP_DEC_REG_END 0x5893 56 + #define SDMA1_HYP_DEC_REG_OFFSET 0x20 57 + 58 + static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev); 59 + static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev); 60 + static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev); 61 + static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev); 62 + 63 + static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) 64 + { 65 + u32 base; 66 + 67 + if (internal_offset >= SDMA0_HYP_DEC_REG_START && 68 + internal_offset <= SDMA0_HYP_DEC_REG_END) { 69 + base = adev->reg_offset[GC_HWIP][0][1]; 70 + if (instance != 0) 71 + internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance; 72 + } else { 73 + if (instance < 2) { 74 + base = adev->reg_offset[GC_HWIP][0][0]; 75 + if (instance == 1) 76 + internal_offset += SDMA1_REG_OFFSET; 77 + } else { 78 + base = adev->reg_offset[GC_HWIP][0][2]; 79 + if (instance == 3) 80 + internal_offset += SDMA3_REG_OFFSET; 81 + } 82 + } 83 + 84 + return base + internal_offset; 85 + } 86 + 87 + static void sdma_v5_2_init_golden_registers(struct amdgpu_device *adev) 88 + { 89 + switch (adev->asic_type) { 90 + case CHIP_SIENNA_CICHLID: 91 + break; 92 + default: 93 + break; 94 + } 95 + } 96 + 97 + /** 98 + * sdma_v5_2_init_microcode - load ucode images from disk 99 + * 100 + * @adev: amdgpu_device pointer 101 + * 102 + * Use the firmware interface to load the ucode images into 103 + * the driver (not loaded into hw). 104 + * Returns 0 on success, error on failure. 105 + */ 106 + 107 + // emulation only, won't work on real chip 108 + // navi10 real chip need to use PSP to load firmware 109 + static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) 110 + { 111 + const char *chip_name; 112 + char fw_name[40]; 113 + int err = 0, i; 114 + struct amdgpu_firmware_info *info = NULL; 115 + const struct common_firmware_header *header = NULL; 116 + const struct sdma_firmware_header_v1_0 *hdr; 117 + 118 + DRM_DEBUG("\n"); 119 + 120 + switch (adev->asic_type) { 121 + case CHIP_SIENNA_CICHLID: 122 + chip_name = "sienna_cichlid"; 123 + break; 124 + default: 125 + BUG(); 126 + } 127 + 128 + for (i = 0; i < adev->sdma.num_instances; i++) { 129 + if (i == 0) 130 + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); 131 + else 132 + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); 133 + err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); 134 + if (err) 135 + goto out; 136 + err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); 137 + if (err) 138 + goto out; 139 + hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; 140 + adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); 141 + adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); 142 + if (adev->sdma.instance[i].feature_version >= 20) 143 + adev->sdma.instance[i].burst_nop = true; 144 + DRM_DEBUG("psp_load == '%s'\n", 145 + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); 146 + 147 + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 148 + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; 149 + info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; 150 + info->fw = adev->sdma.instance[i].fw; 151 + header = (const struct common_firmware_header *)info->fw->data; 152 + adev->firmware.fw_size += 153 + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 154 + } 155 + } 156 + out: 157 + if (err) { 158 + DRM_ERROR("sdma_v5_2: Failed to load firmware \"%s\"\n", fw_name); 159 + for (i = 0; i < adev->sdma.num_instances; i++) { 160 + release_firmware(adev->sdma.instance[i].fw); 161 + adev->sdma.instance[i].fw = NULL; 162 + } 163 + } 164 + return err; 165 + } 166 + 167 + static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring) 168 + { 169 + unsigned ret; 170 + 171 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); 172 + amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 173 + amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 174 + amdgpu_ring_write(ring, 1); 175 + ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ 176 + amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ 177 + 178 + return ret; 179 + } 180 + 181 + static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring, 182 + unsigned offset) 183 + { 184 + unsigned cur; 185 + 186 + BUG_ON(offset > ring->buf_mask); 187 + BUG_ON(ring->ring[offset] != 0x55aa55aa); 188 + 189 + cur = (ring->wptr - 1) & ring->buf_mask; 190 + if (cur > offset) 191 + ring->ring[offset] = cur - offset; 192 + else 193 + ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; 194 + } 195 + 196 + /** 197 + * sdma_v5_2_ring_get_rptr - get the current read pointer 198 + * 199 + * @ring: amdgpu ring pointer 200 + * 201 + * Get the current rptr from the hardware (NAVI10+). 202 + */ 203 + static uint64_t sdma_v5_2_ring_get_rptr(struct amdgpu_ring *ring) 204 + { 205 + u64 *rptr; 206 + 207 + /* XXX check if swapping is necessary on BE */ 208 + rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]); 209 + 210 + DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); 211 + return ((*rptr) >> 2); 212 + } 213 + 214 + /** 215 + * sdma_v5_2_ring_get_wptr - get the current write pointer 216 + * 217 + * @ring: amdgpu ring pointer 218 + * 219 + * Get the current wptr from the hardware (NAVI10+). 220 + */ 221 + static uint64_t sdma_v5_2_ring_get_wptr(struct amdgpu_ring *ring) 222 + { 223 + struct amdgpu_device *adev = ring->adev; 224 + u64 *wptr = NULL; 225 + uint64_t local_wptr = 0; 226 + 227 + if (ring->use_doorbell) { 228 + /* XXX check if swapping is necessary on BE */ 229 + wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]); 230 + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); 231 + *wptr = (*wptr) >> 2; 232 + DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); 233 + } else { 234 + u32 lowbit, highbit; 235 + 236 + wptr = &local_wptr; 237 + lowbit = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2; 238 + highbit = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; 239 + 240 + DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", 241 + ring->me, highbit, lowbit); 242 + *wptr = highbit; 243 + *wptr = (*wptr) << 32; 244 + *wptr |= lowbit; 245 + } 246 + 247 + return *wptr; 248 + } 249 + 250 + /** 251 + * sdma_v5_2_ring_set_wptr - commit the write pointer 252 + * 253 + * @ring: amdgpu ring pointer 254 + * 255 + * Write the wptr back to the hardware (NAVI10+). 256 + */ 257 + static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) 258 + { 259 + struct amdgpu_device *adev = ring->adev; 260 + 261 + DRM_DEBUG("Setting write pointer\n"); 262 + if (ring->use_doorbell) { 263 + DRM_DEBUG("Using doorbell -- " 264 + "wptr_offs == 0x%08x " 265 + "lower_32_bits(ring->wptr) << 2 == 0x%08x " 266 + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", 267 + ring->wptr_offs, 268 + lower_32_bits(ring->wptr << 2), 269 + upper_32_bits(ring->wptr << 2)); 270 + /* XXX check if swapping is necessary on BE */ 271 + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2); 272 + adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2); 273 + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", 274 + ring->doorbell_index, ring->wptr << 2); 275 + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); 276 + } else { 277 + DRM_DEBUG("Not using doorbell -- " 278 + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " 279 + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", 280 + ring->me, 281 + lower_32_bits(ring->wptr << 2), 282 + ring->me, 283 + upper_32_bits(ring->wptr << 2)); 284 + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), 285 + lower_32_bits(ring->wptr << 2)); 286 + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), 287 + upper_32_bits(ring->wptr << 2)); 288 + } 289 + } 290 + 291 + static void sdma_v5_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 292 + { 293 + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); 294 + int i; 295 + 296 + for (i = 0; i < count; i++) 297 + if (sdma && sdma->burst_nop && (i == 0)) 298 + amdgpu_ring_write(ring, ring->funcs->nop | 299 + SDMA_PKT_NOP_HEADER_COUNT(count - 1)); 300 + else 301 + amdgpu_ring_write(ring, ring->funcs->nop); 302 + } 303 + 304 + /** 305 + * sdma_v5_2_ring_emit_ib - Schedule an IB on the DMA engine 306 + * 307 + * @ring: amdgpu ring pointer 308 + * @ib: IB object to schedule 309 + * 310 + * Schedule an IB in the DMA ring. 311 + */ 312 + static void sdma_v5_2_ring_emit_ib(struct amdgpu_ring *ring, 313 + struct amdgpu_job *job, 314 + struct amdgpu_ib *ib, 315 + uint32_t flags) 316 + { 317 + unsigned vmid = AMDGPU_JOB_GET_VMID(job); 318 + uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); 319 + 320 + /* An IB packet must end on a 8 DW boundary--the next dword 321 + * must be on a 8-dword boundary. Our IB packet below is 6 322 + * dwords long, thus add x number of NOPs, such that, in 323 + * modular arithmetic, 324 + * wptr + 6 + x = 8k, k >= 0, which in C is, 325 + * (wptr + 6 + x) % 8 = 0. 326 + * The expression below, is a solution of x. 327 + */ 328 + sdma_v5_2_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); 329 + 330 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 331 + SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); 332 + /* base must be 32 byte aligned */ 333 + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); 334 + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 335 + amdgpu_ring_write(ring, ib->length_dw); 336 + amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr)); 337 + amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr)); 338 + } 339 + 340 + /** 341 + * sdma_v5_2_ring_emit_hdp_flush - emit an hdp flush on the DMA ring 342 + * 343 + * @ring: amdgpu ring pointer 344 + * 345 + * Emit an hdp flush packet on the requested DMA ring. 346 + */ 347 + static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) 348 + { 349 + struct amdgpu_device *adev = ring->adev; 350 + u32 ref_and_mask = 0; 351 + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 352 + 353 + if (ring->me == 0) 354 + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; 355 + else 356 + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; 357 + 358 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 359 + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | 360 + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ 361 + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); 362 + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); 363 + amdgpu_ring_write(ring, ref_and_mask); /* reference */ 364 + amdgpu_ring_write(ring, ref_and_mask); /* mask */ 365 + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 366 + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 367 + } 368 + 369 + /** 370 + * sdma_v5_2_ring_emit_fence - emit a fence on the DMA ring 371 + * 372 + * @ring: amdgpu ring pointer 373 + * @fence: amdgpu fence object 374 + * 375 + * Add a DMA fence packet to the ring to write 376 + * the fence seq number and DMA trap packet to generate 377 + * an interrupt if needed. 378 + */ 379 + static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 380 + unsigned flags) 381 + { 382 + struct amdgpu_device *adev = ring->adev; 383 + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 384 + /* write the fence */ 385 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | 386 + SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */ 387 + /* zero in first two bits */ 388 + BUG_ON(addr & 0x3); 389 + amdgpu_ring_write(ring, lower_32_bits(addr)); 390 + amdgpu_ring_write(ring, upper_32_bits(addr)); 391 + amdgpu_ring_write(ring, lower_32_bits(seq)); 392 + 393 + /* optionally write high bits as well */ 394 + if (write64bit) { 395 + addr += 4; 396 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | 397 + SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); 398 + /* zero in first two bits */ 399 + BUG_ON(addr & 0x3); 400 + amdgpu_ring_write(ring, lower_32_bits(addr)); 401 + amdgpu_ring_write(ring, upper_32_bits(addr)); 402 + amdgpu_ring_write(ring, upper_32_bits(seq)); 403 + } 404 + 405 + /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ 406 + if ((flags & AMDGPU_FENCE_FLAG_INT) && adev->pdev->device != 0x50) { 407 + /* generate an interrupt */ 408 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); 409 + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); 410 + } 411 + } 412 + 413 + 414 + /** 415 + * sdma_v5_2_gfx_stop - stop the gfx async dma engines 416 + * 417 + * @adev: amdgpu_device pointer 418 + * 419 + * Stop the gfx async dma ring buffers. 420 + */ 421 + static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev) 422 + { 423 + struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; 424 + struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; 425 + struct amdgpu_ring *sdma2 = &adev->sdma.instance[2].ring; 426 + struct amdgpu_ring *sdma3 = &adev->sdma.instance[3].ring; 427 + u32 rb_cntl, ib_cntl; 428 + int i; 429 + 430 + if ((adev->mman.buffer_funcs_ring == sdma0) || 431 + (adev->mman.buffer_funcs_ring == sdma1) || 432 + (adev->mman.buffer_funcs_ring == sdma2) || 433 + (adev->mman.buffer_funcs_ring == sdma3)) 434 + amdgpu_ttm_set_buffer_funcs_status(adev, false); 435 + 436 + for (i = 0; i < adev->sdma.num_instances; i++) { 437 + rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); 438 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); 439 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); 440 + ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); 441 + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); 442 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); 443 + } 444 + 445 + sdma0->sched.ready = false; 446 + sdma1->sched.ready = false; 447 + sdma2->sched.ready = false; 448 + sdma3->sched.ready = false; 449 + } 450 + 451 + /** 452 + * sdma_v5_2_rlc_stop - stop the compute async dma engines 453 + * 454 + * @adev: amdgpu_device pointer 455 + * 456 + * Stop the compute async dma queues. 457 + */ 458 + static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev) 459 + { 460 + /* XXX todo */ 461 + } 462 + 463 + /** 464 + * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch 465 + * 466 + * @adev: amdgpu_device pointer 467 + * @enable: enable/disable the DMA MEs context switch. 468 + * 469 + * Halt or unhalt the async dma engines context switch. 470 + */ 471 + static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable) 472 + { 473 + u32 f32_cntl, phase_quantum = 0; 474 + int i; 475 + 476 + if (amdgpu_sdma_phase_quantum) { 477 + unsigned value = amdgpu_sdma_phase_quantum; 478 + unsigned unit = 0; 479 + 480 + while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> 481 + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) { 482 + value = (value + 1) >> 1; 483 + unit++; 484 + } 485 + if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> 486 + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) { 487 + value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> 488 + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT); 489 + unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> 490 + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT); 491 + WARN_ONCE(1, 492 + "clamping sdma_phase_quantum to %uK clock cycles\n", 493 + value << unit); 494 + } 495 + phase_quantum = 496 + value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT | 497 + unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; 498 + } 499 + 500 + for (i = 0; i < adev->sdma.num_instances; i++) { 501 + f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); 502 + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, 503 + AUTO_CTXSW_ENABLE, enable ? 1 : 0); 504 + if (enable && amdgpu_sdma_phase_quantum) { 505 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), 506 + phase_quantum); 507 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), 508 + phase_quantum); 509 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), 510 + phase_quantum); 511 + } 512 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); 513 + } 514 + 515 + } 516 + 517 + /** 518 + * sdma_v5_2_enable - stop the async dma engines 519 + * 520 + * @adev: amdgpu_device pointer 521 + * @enable: enable/disable the DMA MEs. 522 + * 523 + * Halt or unhalt the async dma engines. 524 + */ 525 + static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable) 526 + { 527 + u32 f32_cntl; 528 + int i; 529 + 530 + if (enable == false) { 531 + sdma_v5_2_gfx_stop(adev); 532 + sdma_v5_2_rlc_stop(adev); 533 + } 534 + 535 + for (i = 0; i < adev->sdma.num_instances; i++) { 536 + f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); 537 + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); 538 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); 539 + } 540 + } 541 + 542 + /** 543 + * sdma_v5_2_gfx_resume - setup and start the async dma engines 544 + * 545 + * @adev: amdgpu_device pointer 546 + * 547 + * Set up the gfx DMA ring buffers and enable them. 548 + * Returns 0 for success, error for failure. 549 + */ 550 + static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) 551 + { 552 + struct amdgpu_ring *ring; 553 + u32 rb_cntl, ib_cntl; 554 + u32 rb_bufsz; 555 + u32 wb_offset; 556 + u32 doorbell; 557 + u32 doorbell_offset; 558 + u32 temp; 559 + u32 wptr_poll_cntl; 560 + u64 wptr_gpu_addr; 561 + int i, r; 562 + 563 + for (i = 0; i < adev->sdma.num_instances; i++) { 564 + ring = &adev->sdma.instance[i].ring; 565 + wb_offset = (ring->rptr_offs * 4); 566 + 567 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); 568 + 569 + /* Set ring buffer size in dwords */ 570 + rb_bufsz = order_base_2(ring->ring_size / 4); 571 + rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); 572 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); 573 + #ifdef __BIG_ENDIAN 574 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); 575 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, 576 + RPTR_WRITEBACK_SWAP_ENABLE, 1); 577 + #endif 578 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); 579 + 580 + /* Initialize the ring buffer's read and write pointers */ 581 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); 582 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); 583 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); 584 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); 585 + 586 + /* setup the wptr shadow polling */ 587 + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 588 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), 589 + lower_32_bits(wptr_gpu_addr)); 590 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), 591 + upper_32_bits(wptr_gpu_addr)); 592 + wptr_poll_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, 593 + mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); 594 + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 595 + SDMA0_GFX_RB_WPTR_POLL_CNTL, 596 + F32_POLL_ENABLE, 1); 597 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 598 + wptr_poll_cntl); 599 + 600 + /* set the wb address whether it's enabled or not */ 601 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), 602 + upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); 603 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), 604 + lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); 605 + 606 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); 607 + 608 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); 609 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); 610 + 611 + ring->wptr = 0; 612 + 613 + /* before programing wptr to a less value, need set minor_ptr_update first */ 614 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); 615 + 616 + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ 617 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); 618 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); 619 + } 620 + 621 + doorbell = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); 622 + doorbell_offset = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); 623 + 624 + if (ring->use_doorbell) { 625 + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); 626 + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, 627 + OFFSET, ring->doorbell_index); 628 + } else { 629 + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); 630 + } 631 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); 632 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); 633 + 634 + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, 635 + ring->doorbell_index, 20); 636 + 637 + if (amdgpu_sriov_vf(adev)) 638 + sdma_v5_2_ring_set_wptr(ring); 639 + 640 + /* set minor_ptr_update to 0 after wptr programed */ 641 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); 642 + 643 + /* set utc l1 enable flag always to 1 */ 644 + temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); 645 + temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); 646 + 647 + /* enable MCBP */ 648 + temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1); 649 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); 650 + 651 + /* Set up RESP_MODE to non-copy addresses */ 652 + temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); 653 + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); 654 + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); 655 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); 656 + 657 + /* program default cache read and write policy */ 658 + temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); 659 + /* clean read policy and write policy bits */ 660 + temp &= 0xFF0FFF; 661 + temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | 662 + (CACHE_WRITE_POLICY_L2__DEFAULT << 14) | 663 + 0x01000000); 664 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); 665 + 666 + if (!amdgpu_sriov_vf(adev)) { 667 + /* unhalt engine */ 668 + temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); 669 + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); 670 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); 671 + } 672 + 673 + /* enable DMA RB */ 674 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); 675 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); 676 + 677 + ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); 678 + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); 679 + #ifdef __BIG_ENDIAN 680 + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); 681 + #endif 682 + /* enable DMA IBs */ 683 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); 684 + 685 + ring->sched.ready = true; 686 + 687 + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ 688 + sdma_v5_2_ctx_switch_enable(adev, true); 689 + sdma_v5_2_enable(adev, true); 690 + } 691 + 692 + r = amdgpu_ring_test_ring(ring); 693 + if (r) { 694 + ring->sched.ready = false; 695 + return r; 696 + } 697 + 698 + if (adev->mman.buffer_funcs_ring == ring) 699 + amdgpu_ttm_set_buffer_funcs_status(adev, true); 700 + } 701 + 702 + return 0; 703 + } 704 + 705 + /** 706 + * sdma_v5_2_rlc_resume - setup and start the async dma engines 707 + * 708 + * @adev: amdgpu_device pointer 709 + * 710 + * Set up the compute DMA queues and enable them. 711 + * Returns 0 for success, error for failure. 712 + */ 713 + static int sdma_v5_2_rlc_resume(struct amdgpu_device *adev) 714 + { 715 + return 0; 716 + } 717 + 718 + /** 719 + * sdma_v5_2_load_microcode - load the sDMA ME ucode 720 + * 721 + * @adev: amdgpu_device pointer 722 + * 723 + * Loads the sDMA0/1/2/3 ucode. 724 + * Returns 0 for success, -EINVAL if the ucode is not available. 725 + */ 726 + static int sdma_v5_2_load_microcode(struct amdgpu_device *adev) 727 + { 728 + const struct sdma_firmware_header_v1_0 *hdr; 729 + const __le32 *fw_data; 730 + u32 fw_size; 731 + int i, j; 732 + 733 + /* halt the MEs */ 734 + sdma_v5_2_enable(adev, false); 735 + 736 + for (i = 0; i < adev->sdma.num_instances; i++) { 737 + if (!adev->sdma.instance[i].fw) 738 + return -EINVAL; 739 + 740 + hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; 741 + amdgpu_ucode_print_sdma_hdr(&hdr->header); 742 + fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 743 + 744 + fw_data = (const __le32 *) 745 + (adev->sdma.instance[i].fw->data + 746 + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 747 + 748 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0); 749 + 750 + for (j = 0; j < fw_size; j++) { 751 + if (amdgpu_emu_mode == 1 && j % 500 == 0) 752 + msleep(1); 753 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); 754 + } 755 + 756 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); 757 + } 758 + 759 + return 0; 760 + } 761 + 762 + /** 763 + * sdma_v5_2_start - setup and start the async dma engines 764 + * 765 + * @adev: amdgpu_device pointer 766 + * 767 + * Set up the DMA engines and enable them. 768 + * Returns 0 for success, error for failure. 769 + */ 770 + static int sdma_v5_2_start(struct amdgpu_device *adev) 771 + { 772 + int r = 0; 773 + 774 + if (amdgpu_sriov_vf(adev)) { 775 + sdma_v5_2_ctx_switch_enable(adev, false); 776 + sdma_v5_2_enable(adev, false); 777 + 778 + /* set RB registers */ 779 + r = sdma_v5_2_gfx_resume(adev); 780 + return r; 781 + } 782 + 783 + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 784 + r = sdma_v5_2_load_microcode(adev); 785 + if (r) 786 + return r; 787 + 788 + /* The value of mmSDMA_F32_CNTL is invalid the moment after loading fw */ 789 + if (amdgpu_emu_mode == 1) 790 + msleep(1000); 791 + } 792 + 793 + /* unhalt the MEs */ 794 + sdma_v5_2_enable(adev, true); 795 + /* enable sdma ring preemption */ 796 + sdma_v5_2_ctx_switch_enable(adev, true); 797 + 798 + /* start the gfx rings and rlc compute queues */ 799 + r = sdma_v5_2_gfx_resume(adev); 800 + if (r) 801 + return r; 802 + r = sdma_v5_2_rlc_resume(adev); 803 + 804 + return r; 805 + } 806 + 807 + /** 808 + * sdma_v5_2_ring_test_ring - simple async dma engine test 809 + * 810 + * @ring: amdgpu_ring structure holding ring information 811 + * 812 + * Test the DMA engine by writing using it to write an 813 + * value to memory. 814 + * Returns 0 for success, error for failure. 815 + */ 816 + static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) 817 + { 818 + struct amdgpu_device *adev = ring->adev; 819 + unsigned i; 820 + unsigned index; 821 + int r; 822 + u32 tmp; 823 + u64 gpu_addr; 824 + 825 + r = amdgpu_device_wb_get(adev, &index); 826 + if (r) { 827 + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 828 + return r; 829 + } 830 + 831 + gpu_addr = adev->wb.gpu_addr + (index * 4); 832 + tmp = 0xCAFEDEAD; 833 + adev->wb.wb[index] = cpu_to_le32(tmp); 834 + 835 + r = amdgpu_ring_alloc(ring, 5); 836 + if (r) { 837 + DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 838 + amdgpu_device_wb_free(adev, index); 839 + return r; 840 + } 841 + 842 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 843 + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 844 + amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 845 + amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); 846 + amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0)); 847 + amdgpu_ring_write(ring, 0xDEADBEEF); 848 + amdgpu_ring_commit(ring); 849 + 850 + for (i = 0; i < adev->usec_timeout; i++) { 851 + tmp = le32_to_cpu(adev->wb.wb[index]); 852 + if (tmp == 0xDEADBEEF) 853 + break; 854 + if (amdgpu_emu_mode == 1) 855 + msleep(1); 856 + else 857 + udelay(1); 858 + } 859 + 860 + if (i >= adev->usec_timeout) 861 + r = -ETIMEDOUT; 862 + 863 + amdgpu_device_wb_free(adev, index); 864 + 865 + return r; 866 + } 867 + 868 + /** 869 + * sdma_v5_2_ring_test_ib - test an IB on the DMA engine 870 + * 871 + * @ring: amdgpu_ring structure holding ring information 872 + * 873 + * Test a simple IB in the DMA ring. 874 + * Returns 0 on success, error on failure. 875 + */ 876 + static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) 877 + { 878 + struct amdgpu_device *adev = ring->adev; 879 + struct amdgpu_ib ib; 880 + struct dma_fence *f = NULL; 881 + unsigned index; 882 + long r; 883 + u32 tmp = 0; 884 + u64 gpu_addr; 885 + 886 + r = amdgpu_device_wb_get(adev, &index); 887 + if (r) { 888 + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 889 + return r; 890 + } 891 + 892 + gpu_addr = adev->wb.gpu_addr + (index * 4); 893 + tmp = 0xCAFEDEAD; 894 + adev->wb.wb[index] = cpu_to_le32(tmp); 895 + memset(&ib, 0, sizeof(ib)); 896 + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); 897 + if (r) { 898 + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 899 + goto err0; 900 + } 901 + 902 + ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 903 + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 904 + ib.ptr[1] = lower_32_bits(gpu_addr); 905 + ib.ptr[2] = upper_32_bits(gpu_addr); 906 + ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); 907 + ib.ptr[4] = 0xDEADBEEF; 908 + ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 909 + ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 910 + ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 911 + ib.length_dw = 8; 912 + 913 + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 914 + if (r) 915 + goto err1; 916 + 917 + r = dma_fence_wait_timeout(f, false, timeout); 918 + if (r == 0) { 919 + DRM_ERROR("amdgpu: IB test timed out\n"); 920 + r = -ETIMEDOUT; 921 + goto err1; 922 + } else if (r < 0) { 923 + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 924 + goto err1; 925 + } 926 + tmp = le32_to_cpu(adev->wb.wb[index]); 927 + if (tmp == 0xDEADBEEF) 928 + r = 0; 929 + else 930 + r = -EINVAL; 931 + 932 + err1: 933 + amdgpu_ib_free(adev, &ib, NULL); 934 + dma_fence_put(f); 935 + err0: 936 + amdgpu_device_wb_free(adev, index); 937 + return r; 938 + } 939 + 940 + 941 + /** 942 + * sdma_v5_2_vm_copy_pte - update PTEs by copying them from the GART 943 + * 944 + * @ib: indirect buffer to fill with commands 945 + * @pe: addr of the page entry 946 + * @src: src addr to copy from 947 + * @count: number of page entries to update 948 + * 949 + * Update PTEs by copying them from the GART using sDMA. 950 + */ 951 + static void sdma_v5_2_vm_copy_pte(struct amdgpu_ib *ib, 952 + uint64_t pe, uint64_t src, 953 + unsigned count) 954 + { 955 + unsigned bytes = count * 8; 956 + 957 + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 958 + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 959 + ib->ptr[ib->length_dw++] = bytes - 1; 960 + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 961 + ib->ptr[ib->length_dw++] = lower_32_bits(src); 962 + ib->ptr[ib->length_dw++] = upper_32_bits(src); 963 + ib->ptr[ib->length_dw++] = lower_32_bits(pe); 964 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 965 + 966 + } 967 + 968 + /** 969 + * sdma_v5_2_vm_write_pte - update PTEs by writing them manually 970 + * 971 + * @ib: indirect buffer to fill with commands 972 + * @pe: addr of the page entry 973 + * @addr: dst addr to write into pe 974 + * @count: number of page entries to update 975 + * @incr: increase next addr by incr bytes 976 + * @flags: access flags 977 + * 978 + * Update PTEs by writing them manually using sDMA. 979 + */ 980 + static void sdma_v5_2_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, 981 + uint64_t value, unsigned count, 982 + uint32_t incr) 983 + { 984 + unsigned ndw = count * 2; 985 + 986 + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 987 + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 988 + ib->ptr[ib->length_dw++] = lower_32_bits(pe); 989 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 990 + ib->ptr[ib->length_dw++] = ndw - 1; 991 + for (; ndw > 0; ndw -= 2) { 992 + ib->ptr[ib->length_dw++] = lower_32_bits(value); 993 + ib->ptr[ib->length_dw++] = upper_32_bits(value); 994 + value += incr; 995 + } 996 + } 997 + 998 + /** 999 + * sdma_v5_2_vm_set_pte_pde - update the page tables using sDMA 1000 + * 1001 + * @ib: indirect buffer to fill with commands 1002 + * @pe: addr of the page entry 1003 + * @addr: dst addr to write into pe 1004 + * @count: number of page entries to update 1005 + * @incr: increase next addr by incr bytes 1006 + * @flags: access flags 1007 + * 1008 + * Update the page tables using sDMA. 1009 + */ 1010 + static void sdma_v5_2_vm_set_pte_pde(struct amdgpu_ib *ib, 1011 + uint64_t pe, 1012 + uint64_t addr, unsigned count, 1013 + uint32_t incr, uint64_t flags) 1014 + { 1015 + /* for physically contiguous pages (vram) */ 1016 + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE); 1017 + ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ 1018 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 1019 + ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ 1020 + ib->ptr[ib->length_dw++] = upper_32_bits(flags); 1021 + ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ 1022 + ib->ptr[ib->length_dw++] = upper_32_bits(addr); 1023 + ib->ptr[ib->length_dw++] = incr; /* increment size */ 1024 + ib->ptr[ib->length_dw++] = 0; 1025 + ib->ptr[ib->length_dw++] = count - 1; /* number of entries */ 1026 + } 1027 + 1028 + /** 1029 + * sdma_v5_2_ring_pad_ib - pad the IB 1030 + * 1031 + * @ib: indirect buffer to fill with padding 1032 + * 1033 + * Pad the IB with NOPs to a boundary multiple of 8. 1034 + */ 1035 + static void sdma_v5_2_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 1036 + { 1037 + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); 1038 + u32 pad_count; 1039 + int i; 1040 + 1041 + pad_count = (-ib->length_dw) & 0x7; 1042 + for (i = 0; i < pad_count; i++) 1043 + if (sdma && sdma->burst_nop && (i == 0)) 1044 + ib->ptr[ib->length_dw++] = 1045 + SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | 1046 + SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); 1047 + else 1048 + ib->ptr[ib->length_dw++] = 1049 + SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 1050 + } 1051 + 1052 + 1053 + /** 1054 + * sdma_v5_2_ring_emit_pipeline_sync - sync the pipeline 1055 + * 1056 + * @ring: amdgpu_ring pointer 1057 + * 1058 + * Make sure all previous operations are completed (CIK). 1059 + */ 1060 + static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 1061 + { 1062 + uint32_t seq = ring->fence_drv.sync_seq; 1063 + uint64_t addr = ring->fence_drv.gpu_addr; 1064 + 1065 + /* wait for idle */ 1066 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1067 + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1068 + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ 1069 + SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); 1070 + amdgpu_ring_write(ring, addr & 0xfffffffc); 1071 + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 1072 + amdgpu_ring_write(ring, seq); /* reference */ 1073 + amdgpu_ring_write(ring, 0xffffffff); /* mask */ 1074 + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1075 + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ 1076 + } 1077 + 1078 + 1079 + /** 1080 + * sdma_v5_2_ring_emit_vm_flush - vm flush using sDMA 1081 + * 1082 + * @ring: amdgpu_ring pointer 1083 + * @vm: amdgpu_vm pointer 1084 + * 1085 + * Update the page table base and flush the VM TLB 1086 + * using sDMA. 1087 + */ 1088 + static void sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring *ring, 1089 + unsigned vmid, uint64_t pd_addr) 1090 + { 1091 + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 1092 + } 1093 + 1094 + static void sdma_v5_2_ring_emit_wreg(struct amdgpu_ring *ring, 1095 + uint32_t reg, uint32_t val) 1096 + { 1097 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1098 + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1099 + amdgpu_ring_write(ring, reg); 1100 + amdgpu_ring_write(ring, val); 1101 + } 1102 + 1103 + static void sdma_v5_2_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 1104 + uint32_t val, uint32_t mask) 1105 + { 1106 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1107 + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1108 + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ 1109 + amdgpu_ring_write(ring, reg << 2); 1110 + amdgpu_ring_write(ring, 0); 1111 + amdgpu_ring_write(ring, val); /* reference */ 1112 + amdgpu_ring_write(ring, mask); /* mask */ 1113 + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1114 + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); 1115 + } 1116 + 1117 + static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 1118 + uint32_t reg0, uint32_t reg1, 1119 + uint32_t ref, uint32_t mask) 1120 + { 1121 + amdgpu_ring_emit_wreg(ring, reg0, ref); 1122 + /* wait for a cycle to reset vm_inv_eng*_ack */ 1123 + amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); 1124 + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); 1125 + } 1126 + 1127 + static int sdma_v5_2_early_init(void *handle) 1128 + { 1129 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1130 + 1131 + adev->sdma.num_instances = 4; 1132 + 1133 + sdma_v5_2_set_ring_funcs(adev); 1134 + sdma_v5_2_set_buffer_funcs(adev); 1135 + sdma_v5_2_set_vm_pte_funcs(adev); 1136 + sdma_v5_2_set_irq_funcs(adev); 1137 + 1138 + return 0; 1139 + } 1140 + 1141 + static int sdma_v5_2_sw_init(void *handle) 1142 + { 1143 + struct amdgpu_ring *ring; 1144 + int r, i; 1145 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1146 + 1147 + /* SDMA trap event */ 1148 + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, 1149 + SDMA0_5_0__SRCID__SDMA_TRAP, 1150 + &adev->sdma.trap_irq); 1151 + if (r) 1152 + return r; 1153 + 1154 + /* SDMA trap event */ 1155 + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, 1156 + SDMA1_5_0__SRCID__SDMA_TRAP, 1157 + &adev->sdma.trap_irq); 1158 + if (r) 1159 + return r; 1160 + 1161 + /* SDMA trap event */ 1162 + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA2, 1163 + SDMA2_5_0__SRCID__SDMA_TRAP, 1164 + &adev->sdma.trap_irq); 1165 + if (r) 1166 + return r; 1167 + 1168 + /* SDMA trap event */ 1169 + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA3, 1170 + SDMA3_5_0__SRCID__SDMA_TRAP, 1171 + &adev->sdma.trap_irq); 1172 + if (r) 1173 + return r; 1174 + 1175 + r = sdma_v5_2_init_microcode(adev); 1176 + if (r) { 1177 + DRM_ERROR("Failed to load sdma firmware!\n"); 1178 + return r; 1179 + } 1180 + 1181 + for (i = 0; i < adev->sdma.num_instances; i++) { 1182 + ring = &adev->sdma.instance[i].ring; 1183 + ring->ring_obj = NULL; 1184 + ring->use_doorbell = true; 1185 + 1186 + DRM_INFO("use_doorbell being set to: [%s]\n", 1187 + ring->use_doorbell?"true":"false"); 1188 + 1189 + ring->doorbell_index = 1190 + (adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset 1191 + 1192 + sprintf(ring->name, "sdma%d", i); 1193 + r = amdgpu_ring_init(adev, ring, 1024, 1194 + &adev->sdma.trap_irq, 1195 + AMDGPU_SDMA_IRQ_INSTANCE0 + i, 1196 + AMDGPU_RING_PRIO_DEFAULT); 1197 + if (r) 1198 + return r; 1199 + } 1200 + 1201 + return r; 1202 + } 1203 + 1204 + static int sdma_v5_2_sw_fini(void *handle) 1205 + { 1206 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1207 + int i; 1208 + 1209 + for (i = 0; i < adev->sdma.num_instances; i++) 1210 + amdgpu_ring_fini(&adev->sdma.instance[i].ring); 1211 + 1212 + return 0; 1213 + } 1214 + 1215 + static int sdma_v5_2_hw_init(void *handle) 1216 + { 1217 + int r; 1218 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1219 + 1220 + sdma_v5_2_init_golden_registers(adev); 1221 + 1222 + r = sdma_v5_2_start(adev); 1223 + 1224 + return r; 1225 + } 1226 + 1227 + static int sdma_v5_2_hw_fini(void *handle) 1228 + { 1229 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1230 + 1231 + if (amdgpu_sriov_vf(adev)) 1232 + return 0; 1233 + 1234 + sdma_v5_2_ctx_switch_enable(adev, false); 1235 + sdma_v5_2_enable(adev, false); 1236 + 1237 + return 0; 1238 + } 1239 + 1240 + static int sdma_v5_2_suspend(void *handle) 1241 + { 1242 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1243 + 1244 + return sdma_v5_2_hw_fini(adev); 1245 + } 1246 + 1247 + static int sdma_v5_2_resume(void *handle) 1248 + { 1249 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1250 + 1251 + return sdma_v5_2_hw_init(adev); 1252 + } 1253 + 1254 + static bool sdma_v5_2_is_idle(void *handle) 1255 + { 1256 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1257 + u32 i; 1258 + 1259 + for (i = 0; i < adev->sdma.num_instances; i++) { 1260 + u32 tmp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_STATUS_REG)); 1261 + 1262 + if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) 1263 + return false; 1264 + } 1265 + 1266 + return true; 1267 + } 1268 + 1269 + static int sdma_v5_2_wait_for_idle(void *handle) 1270 + { 1271 + unsigned i; 1272 + u32 sdma0, sdma1, sdma2, sdma3; 1273 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1274 + 1275 + for (i = 0; i < adev->usec_timeout; i++) { 1276 + sdma0 = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); 1277 + sdma1 = RREG32(sdma_v5_2_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG)); 1278 + sdma2 = RREG32(sdma_v5_2_get_reg_offset(adev, 2, mmSDMA0_STATUS_REG)); 1279 + sdma3 = RREG32(sdma_v5_2_get_reg_offset(adev, 3, mmSDMA0_STATUS_REG)); 1280 + 1281 + if (sdma0 & sdma1 & sdma2 & sdma3 & SDMA0_STATUS_REG__IDLE_MASK) 1282 + return 0; 1283 + udelay(1); 1284 + } 1285 + return -ETIMEDOUT; 1286 + } 1287 + 1288 + static int sdma_v5_2_soft_reset(void *handle) 1289 + { 1290 + /* todo */ 1291 + 1292 + return 0; 1293 + } 1294 + 1295 + static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring) 1296 + { 1297 + int i, r = 0; 1298 + struct amdgpu_device *adev = ring->adev; 1299 + u32 index = 0; 1300 + u64 sdma_gfx_preempt; 1301 + 1302 + amdgpu_sdma_get_index_from_ring(ring, &index); 1303 + sdma_gfx_preempt = 1304 + sdma_v5_2_get_reg_offset(adev, index, mmSDMA0_GFX_PREEMPT); 1305 + 1306 + /* assert preemption condition */ 1307 + amdgpu_ring_set_preempt_cond_exec(ring, false); 1308 + 1309 + /* emit the trailing fence */ 1310 + ring->trail_seq += 1; 1311 + amdgpu_ring_alloc(ring, 10); 1312 + sdma_v5_2_ring_emit_fence(ring, ring->trail_fence_gpu_addr, 1313 + ring->trail_seq, 0); 1314 + amdgpu_ring_commit(ring); 1315 + 1316 + /* assert IB preemption */ 1317 + WREG32(sdma_gfx_preempt, 1); 1318 + 1319 + /* poll the trailing fence */ 1320 + for (i = 0; i < adev->usec_timeout; i++) { 1321 + if (ring->trail_seq == 1322 + le32_to_cpu(*(ring->trail_fence_cpu_addr))) 1323 + break; 1324 + udelay(1); 1325 + } 1326 + 1327 + if (i >= adev->usec_timeout) { 1328 + r = -EINVAL; 1329 + DRM_ERROR("ring %d failed to be preempted\n", ring->idx); 1330 + } 1331 + 1332 + /* deassert IB preemption */ 1333 + WREG32(sdma_gfx_preempt, 0); 1334 + 1335 + /* deassert the preemption condition */ 1336 + amdgpu_ring_set_preempt_cond_exec(ring, true); 1337 + return r; 1338 + } 1339 + 1340 + static int sdma_v5_2_set_trap_irq_state(struct amdgpu_device *adev, 1341 + struct amdgpu_irq_src *source, 1342 + unsigned type, 1343 + enum amdgpu_interrupt_state state) 1344 + { 1345 + u32 sdma_cntl; 1346 + 1347 + u32 reg_offset = sdma_v5_2_get_reg_offset(adev, type, mmSDMA0_CNTL); 1348 + 1349 + sdma_cntl = RREG32(reg_offset); 1350 + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1351 + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 1352 + WREG32(reg_offset, sdma_cntl); 1353 + 1354 + return 0; 1355 + } 1356 + 1357 + static int sdma_v5_2_process_trap_irq(struct amdgpu_device *adev, 1358 + struct amdgpu_irq_src *source, 1359 + struct amdgpu_iv_entry *entry) 1360 + { 1361 + DRM_DEBUG("IH: SDMA trap\n"); 1362 + switch (entry->client_id) { 1363 + case SOC15_IH_CLIENTID_SDMA0: 1364 + switch (entry->ring_id) { 1365 + case 0: 1366 + amdgpu_fence_process(&adev->sdma.instance[0].ring); 1367 + break; 1368 + case 1: 1369 + /* XXX compute */ 1370 + break; 1371 + case 2: 1372 + /* XXX compute */ 1373 + break; 1374 + case 3: 1375 + /* XXX page queue*/ 1376 + break; 1377 + } 1378 + break; 1379 + case SOC15_IH_CLIENTID_SDMA1: 1380 + switch (entry->ring_id) { 1381 + case 0: 1382 + amdgpu_fence_process(&adev->sdma.instance[1].ring); 1383 + break; 1384 + case 1: 1385 + /* XXX compute */ 1386 + break; 1387 + case 2: 1388 + /* XXX compute */ 1389 + break; 1390 + case 3: 1391 + /* XXX page queue*/ 1392 + break; 1393 + } 1394 + break; 1395 + case SOC15_IH_CLIENTID_SDMA2: 1396 + switch (entry->ring_id) { 1397 + case 0: 1398 + amdgpu_fence_process(&adev->sdma.instance[2].ring); 1399 + break; 1400 + case 1: 1401 + /* XXX compute */ 1402 + break; 1403 + case 2: 1404 + /* XXX compute */ 1405 + break; 1406 + case 3: 1407 + /* XXX page queue*/ 1408 + break; 1409 + } 1410 + break; 1411 + case SOC15_IH_CLIENTID_SDMA3: 1412 + switch (entry->ring_id) { 1413 + case 0: 1414 + amdgpu_fence_process(&adev->sdma.instance[3].ring); 1415 + break; 1416 + case 1: 1417 + /* XXX compute */ 1418 + break; 1419 + case 2: 1420 + /* XXX compute */ 1421 + break; 1422 + case 3: 1423 + /* XXX page queue*/ 1424 + break; 1425 + } 1426 + break; 1427 + } 1428 + return 0; 1429 + } 1430 + 1431 + static int sdma_v5_2_process_illegal_inst_irq(struct amdgpu_device *adev, 1432 + struct amdgpu_irq_src *source, 1433 + struct amdgpu_iv_entry *entry) 1434 + { 1435 + return 0; 1436 + } 1437 + 1438 + static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev, 1439 + bool enable) 1440 + { 1441 + uint32_t data, def; 1442 + int i; 1443 + 1444 + for (i = 0; i < adev->sdma.num_instances; i++) { 1445 + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { 1446 + /* Enable sdma clock gating */ 1447 + def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL)); 1448 + data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | 1449 + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | 1450 + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1451 + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1452 + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK | 1453 + SDMA0_CLK_CTRL__SOFT_OVERRIDER_REG_MASK); 1454 + if (def != data) 1455 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data); 1456 + } else { 1457 + /* Disable sdma clock gating */ 1458 + def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL)); 1459 + data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | 1460 + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | 1461 + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1462 + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1463 + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK | 1464 + SDMA0_CLK_CTRL__SOFT_OVERRIDER_REG_MASK); 1465 + if (def != data) 1466 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data); 1467 + } 1468 + } 1469 + } 1470 + 1471 + static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev, 1472 + bool enable) 1473 + { 1474 + uint32_t data, def; 1475 + int i; 1476 + 1477 + for (i = 0; i < adev->sdma.num_instances; i++) { 1478 + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { 1479 + /* Enable sdma mem light sleep */ 1480 + def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL)); 1481 + data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1482 + if (def != data) 1483 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data); 1484 + 1485 + } else { 1486 + /* Disable sdma mem light sleep */ 1487 + def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL)); 1488 + data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1489 + if (def != data) 1490 + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data); 1491 + 1492 + } 1493 + } 1494 + } 1495 + 1496 + static int sdma_v5_2_set_clockgating_state(void *handle, 1497 + enum amd_clockgating_state state) 1498 + { 1499 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1500 + 1501 + if (amdgpu_sriov_vf(adev)) 1502 + return 0; 1503 + 1504 + switch (adev->asic_type) { 1505 + case CHIP_SIENNA_CICHLID: 1506 + sdma_v5_2_update_medium_grain_clock_gating(adev, 1507 + state == AMD_CG_STATE_GATE ? true : false); 1508 + sdma_v5_2_update_medium_grain_light_sleep(adev, 1509 + state == AMD_CG_STATE_GATE ? true : false); 1510 + break; 1511 + default: 1512 + break; 1513 + } 1514 + 1515 + return 0; 1516 + } 1517 + 1518 + static int sdma_v5_2_set_powergating_state(void *handle, 1519 + enum amd_powergating_state state) 1520 + { 1521 + return 0; 1522 + } 1523 + 1524 + static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags) 1525 + { 1526 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1527 + int data; 1528 + 1529 + if (amdgpu_sriov_vf(adev)) 1530 + *flags = 0; 1531 + 1532 + /* AMD_CG_SUPPORT_SDMA_LS */ 1533 + data = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL)); 1534 + if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK) 1535 + *flags |= AMD_CG_SUPPORT_SDMA_LS; 1536 + } 1537 + 1538 + const struct amd_ip_funcs sdma_v5_2_ip_funcs = { 1539 + .name = "sdma_v5_2", 1540 + .early_init = sdma_v5_2_early_init, 1541 + .late_init = NULL, 1542 + .sw_init = sdma_v5_2_sw_init, 1543 + .sw_fini = sdma_v5_2_sw_fini, 1544 + .hw_init = sdma_v5_2_hw_init, 1545 + .hw_fini = sdma_v5_2_hw_fini, 1546 + .suspend = sdma_v5_2_suspend, 1547 + .resume = sdma_v5_2_resume, 1548 + .is_idle = sdma_v5_2_is_idle, 1549 + .wait_for_idle = sdma_v5_2_wait_for_idle, 1550 + .soft_reset = sdma_v5_2_soft_reset, 1551 + .set_clockgating_state = sdma_v5_2_set_clockgating_state, 1552 + .set_powergating_state = sdma_v5_2_set_powergating_state, 1553 + .get_clockgating_state = sdma_v5_2_get_clockgating_state, 1554 + }; 1555 + 1556 + static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { 1557 + .type = AMDGPU_RING_TYPE_SDMA, 1558 + .align_mask = 0xf, 1559 + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 1560 + .support_64bit_ptrs = true, 1561 + .vmhub = AMDGPU_GFXHUB_0, 1562 + .get_rptr = sdma_v5_2_ring_get_rptr, 1563 + .get_wptr = sdma_v5_2_ring_get_wptr, 1564 + .set_wptr = sdma_v5_2_ring_set_wptr, 1565 + .emit_frame_size = 1566 + 5 + /* sdma_v5_2_ring_init_cond_exec */ 1567 + 6 + /* sdma_v5_2_ring_emit_hdp_flush */ 1568 + 3 + /* hdp_invalidate */ 1569 + 6 + /* sdma_v5_2_ring_emit_pipeline_sync */ 1570 + /* sdma_v5_2_ring_emit_vm_flush */ 1571 + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1572 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + 1573 + 10 + 10 + 10, /* sdma_v5_2_ring_emit_fence x3 for user fence, vm fence */ 1574 + .emit_ib_size = 7 + 6, /* sdma_v5_2_ring_emit_ib */ 1575 + .emit_ib = sdma_v5_2_ring_emit_ib, 1576 + .emit_fence = sdma_v5_2_ring_emit_fence, 1577 + .emit_pipeline_sync = sdma_v5_2_ring_emit_pipeline_sync, 1578 + .emit_vm_flush = sdma_v5_2_ring_emit_vm_flush, 1579 + .emit_hdp_flush = sdma_v5_2_ring_emit_hdp_flush, 1580 + .test_ring = sdma_v5_2_ring_test_ring, 1581 + .test_ib = sdma_v5_2_ring_test_ib, 1582 + .insert_nop = sdma_v5_2_ring_insert_nop, 1583 + .pad_ib = sdma_v5_2_ring_pad_ib, 1584 + .emit_wreg = sdma_v5_2_ring_emit_wreg, 1585 + .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait, 1586 + .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait, 1587 + .init_cond_exec = sdma_v5_2_ring_init_cond_exec, 1588 + .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec, 1589 + .preempt_ib = sdma_v5_2_ring_preempt_ib, 1590 + }; 1591 + 1592 + static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev) 1593 + { 1594 + int i; 1595 + 1596 + for (i = 0; i < adev->sdma.num_instances; i++) { 1597 + adev->sdma.instance[i].ring.funcs = &sdma_v5_2_ring_funcs; 1598 + adev->sdma.instance[i].ring.me = i; 1599 + } 1600 + } 1601 + 1602 + static const struct amdgpu_irq_src_funcs sdma_v5_2_trap_irq_funcs = { 1603 + .set = sdma_v5_2_set_trap_irq_state, 1604 + .process = sdma_v5_2_process_trap_irq, 1605 + }; 1606 + 1607 + static const struct amdgpu_irq_src_funcs sdma_v5_2_illegal_inst_irq_funcs = { 1608 + .process = sdma_v5_2_process_illegal_inst_irq, 1609 + }; 1610 + 1611 + static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev) 1612 + { 1613 + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + 1614 + adev->sdma.num_instances; 1615 + adev->sdma.trap_irq.funcs = &sdma_v5_2_trap_irq_funcs; 1616 + adev->sdma.illegal_inst_irq.funcs = &sdma_v5_2_illegal_inst_irq_funcs; 1617 + } 1618 + 1619 + /** 1620 + * sdma_v5_2_emit_copy_buffer - copy buffer using the sDMA engine 1621 + * 1622 + * @ring: amdgpu_ring structure holding ring information 1623 + * @src_offset: src GPU address 1624 + * @dst_offset: dst GPU address 1625 + * @byte_count: number of bytes to xfer 1626 + * 1627 + * Copy GPU buffers using the DMA engine. 1628 + * Used by the amdgpu ttm implementation to move pages if 1629 + * registered as the asic copy callback. 1630 + */ 1631 + static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib, 1632 + uint64_t src_offset, 1633 + uint64_t dst_offset, 1634 + uint32_t byte_count, 1635 + bool tmz) 1636 + { 1637 + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1638 + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | 1639 + SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); 1640 + ib->ptr[ib->length_dw++] = byte_count - 1; 1641 + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1642 + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 1643 + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); 1644 + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1645 + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1646 + } 1647 + 1648 + /** 1649 + * sdma_v5_2_emit_fill_buffer - fill buffer using the sDMA engine 1650 + * 1651 + * @ring: amdgpu_ring structure holding ring information 1652 + * @src_data: value to write to buffer 1653 + * @dst_offset: dst GPU address 1654 + * @byte_count: number of bytes to xfer 1655 + * 1656 + * Fill GPU buffers using the DMA engine. 1657 + */ 1658 + static void sdma_v5_2_emit_fill_buffer(struct amdgpu_ib *ib, 1659 + uint32_t src_data, 1660 + uint64_t dst_offset, 1661 + uint32_t byte_count) 1662 + { 1663 + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); 1664 + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1665 + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1666 + ib->ptr[ib->length_dw++] = src_data; 1667 + ib->ptr[ib->length_dw++] = byte_count - 1; 1668 + } 1669 + 1670 + static const struct amdgpu_buffer_funcs sdma_v5_2_buffer_funcs = { 1671 + .copy_max_bytes = 0x400000, 1672 + .copy_num_dw = 7, 1673 + .emit_copy_buffer = sdma_v5_2_emit_copy_buffer, 1674 + 1675 + .fill_max_bytes = 0x400000, 1676 + .fill_num_dw = 5, 1677 + .emit_fill_buffer = sdma_v5_2_emit_fill_buffer, 1678 + }; 1679 + 1680 + static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev) 1681 + { 1682 + if (adev->mman.buffer_funcs == NULL) { 1683 + adev->mman.buffer_funcs = &sdma_v5_2_buffer_funcs; 1684 + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 1685 + } 1686 + } 1687 + 1688 + static const struct amdgpu_vm_pte_funcs sdma_v5_2_vm_pte_funcs = { 1689 + .copy_pte_num_dw = 7, 1690 + .copy_pte = sdma_v5_2_vm_copy_pte, 1691 + .write_pte = sdma_v5_2_vm_write_pte, 1692 + .set_pte_pde = sdma_v5_2_vm_set_pte_pde, 1693 + }; 1694 + 1695 + static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev) 1696 + { 1697 + unsigned i; 1698 + 1699 + if (adev->vm_manager.vm_pte_funcs == NULL) { 1700 + adev->vm_manager.vm_pte_funcs = &sdma_v5_2_vm_pte_funcs; 1701 + for (i = 0; i < adev->sdma.num_instances; i++) { 1702 + adev->vm_manager.vm_pte_scheds[i] = 1703 + &adev->sdma.instance[i].ring.sched; 1704 + } 1705 + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; 1706 + } 1707 + } 1708 + 1709 + const struct amdgpu_ip_block_version sdma_v5_2_ip_block = { 1710 + .type = AMD_IP_BLOCK_TYPE_SDMA, 1711 + .major = 5, 1712 + .minor = 2, 1713 + .rev = 0, 1714 + .funcs = &sdma_v5_2_ip_funcs, 1715 + };
+30
drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h
··· 1 + /* 2 + * Copyright 2019 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #ifndef __SDMA_V5_2_H__ 25 + #define __SDMA_V5_2_H__ 26 + 27 + extern const struct amd_ip_funcs sdma_v5_2_ip_funcs; 28 + extern const struct amdgpu_ip_block_version sdma_v5_2_ip_block; 29 + 30 + #endif /* __SDMA_V5_2_H__ */