Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: Add sdma v7_0 ip block support (v7)

v1: Add sdma v7_0 ip block support. (Likun)
v2: Move vmhub from ring_funcs to ring. (Hawking)
v3: Switch to AMDGPU_GFXHUB(0). (Hawking)
v4: Move microcode init into early_init. (Likun)
v5: Fix warnings (Alex)
v6: Squash in various fixes (Alex)
v7: Rebase (Alex)
v8: Rebase (Alex)

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Likun Gao and committed by
Alex Deucher
b412351e 7c0ac603

+1668 -1
+2 -1
drivers/gpu/drm/amd/amdgpu/Makefile
··· 179 179 sdma_v4_4_2.o \ 180 180 sdma_v5_0.o \ 181 181 sdma_v5_2.o \ 182 - sdma_v6_0.o 182 + sdma_v6_0.o \ 183 + sdma_v7_0.o 183 184 184 185 # add MES block 185 186 amdgpu-y += \
+6
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
··· 158 158 const struct common_firmware_header *header = NULL; 159 159 const struct sdma_firmware_header_v1_0 *hdr; 160 160 const struct sdma_firmware_header_v2_0 *hdr_v2; 161 + const struct sdma_firmware_header_v3_0 *hdr_v3; 161 162 162 163 header = (const struct common_firmware_header *) 163 164 sdma_inst->fw->data; ··· 174 173 hdr_v2 = (const struct sdma_firmware_header_v2_0 *)sdma_inst->fw->data; 175 174 sdma_inst->fw_version = le32_to_cpu(hdr_v2->header.ucode_version); 176 175 sdma_inst->feature_version = le32_to_cpu(hdr_v2->ucode_feature_version); 176 + break; 177 + case 3: 178 + hdr_v3 = (const struct sdma_firmware_header_v3_0 *)sdma_inst->fw->data; 179 + sdma_inst->fw_version = le32_to_cpu(hdr_v3->header.ucode_version); 180 + sdma_inst->feature_version = le32_to_cpu(hdr_v3->ucode_feature_version); 177 181 break; 178 182 default: 179 183 return -EINVAL;
+1630
drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
··· 1 + /* 2 + * Copyright 2023 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #include <linux/delay.h> 25 + #include <linux/firmware.h> 26 + #include <linux/module.h> 27 + #include <linux/pci.h> 28 + 29 + #include "amdgpu.h" 30 + #include "amdgpu_ucode.h" 31 + #include "amdgpu_trace.h" 32 + 33 + #include "gc/gc_12_0_0_offset.h" 34 + #include "gc/gc_12_0_0_sh_mask.h" 35 + #include "hdp/hdp_6_0_0_offset.h" 36 + #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 37 + 38 + #include "soc15_common.h" 39 + #include "soc15.h" 40 + #include "sdma_v6_0_0_pkt_open.h" 41 + #include "nbio_v4_3.h" 42 + #include "sdma_common.h" 43 + #include "sdma_v7_0.h" 44 + #include "v12_structs.h" 45 + 46 + MODULE_FIRMWARE("amdgpu/sdma_7_0_0.bin"); 47 + MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin"); 48 + 49 + #define SDMA1_REG_OFFSET 0x600 50 + #define SDMA0_HYP_DEC_REG_START 0x5880 51 + #define SDMA0_HYP_DEC_REG_END 0x589a 52 + #define SDMA1_HYP_DEC_REG_OFFSET 0x20 53 + 54 + static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev); 55 + static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev); 56 + static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev); 57 + static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev); 58 + static int sdma_v7_0_start(struct amdgpu_device *adev); 59 + 60 + static u32 sdma_v7_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) 61 + { 62 + u32 base; 63 + 64 + if (internal_offset >= SDMA0_HYP_DEC_REG_START && 65 + internal_offset <= SDMA0_HYP_DEC_REG_END) { 66 + base = adev->reg_offset[GC_HWIP][0][1]; 67 + if (instance != 0) 68 + internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance; 69 + } else { 70 + base = adev->reg_offset[GC_HWIP][0][0]; 71 + if (instance == 1) 72 + internal_offset += SDMA1_REG_OFFSET; 73 + } 74 + 75 + return base + internal_offset; 76 + } 77 + 78 + static unsigned sdma_v7_0_ring_init_cond_exec(struct amdgpu_ring *ring, 79 + uint64_t addr) 80 + { 81 + unsigned ret; 82 + 83 + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE)); 84 + amdgpu_ring_write(ring, lower_32_bits(addr)); 85 + amdgpu_ring_write(ring, upper_32_bits(addr)); 86 + amdgpu_ring_write(ring, 1); 87 + /* this is the offset we need patch later */ 88 + ret = ring->wptr & ring->buf_mask; 89 + /* insert dummy here and patch it later */ 90 + amdgpu_ring_write(ring, 0); 91 + 92 + return ret; 93 + } 94 + 95 + /** 96 + * sdma_v7_0_ring_get_rptr - get the current read pointer 97 + * 98 + * @ring: amdgpu ring pointer 99 + * 100 + * Get the current rptr from the hardware. 101 + */ 102 + static uint64_t sdma_v7_0_ring_get_rptr(struct amdgpu_ring *ring) 103 + { 104 + u64 *rptr; 105 + 106 + /* XXX check if swapping is necessary on BE */ 107 + rptr = (u64 *)ring->rptr_cpu_addr; 108 + 109 + DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); 110 + return ((*rptr) >> 2); 111 + } 112 + 113 + /** 114 + * sdma_v7_0_ring_get_wptr - get the current write pointer 115 + * 116 + * @ring: amdgpu ring pointer 117 + * 118 + * Get the current wptr from the hardware. 119 + */ 120 + static uint64_t sdma_v7_0_ring_get_wptr(struct amdgpu_ring *ring) 121 + { 122 + u64 wptr = 0; 123 + 124 + if (ring->use_doorbell) { 125 + /* XXX check if swapping is necessary on BE */ 126 + wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr)); 127 + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); 128 + } 129 + 130 + return wptr >> 2; 131 + } 132 + 133 + /** 134 + * sdma_v7_0_ring_set_wptr - commit the write pointer 135 + * 136 + * @ring: amdgpu ring pointer 137 + * 138 + * Write the wptr back to the hardware. 139 + */ 140 + static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring) 141 + { 142 + struct amdgpu_device *adev = ring->adev; 143 + uint32_t *wptr_saved; 144 + uint32_t *is_queue_unmap; 145 + uint64_t aggregated_db_index; 146 + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; 147 + 148 + DRM_DEBUG("Setting write pointer\n"); 149 + 150 + if (ring->is_mes_queue) { 151 + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); 152 + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + 153 + sizeof(uint32_t)); 154 + aggregated_db_index = 155 + amdgpu_mes_get_aggregated_doorbell_index(adev, 156 + ring->hw_prio); 157 + 158 + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 159 + ring->wptr << 2); 160 + *wptr_saved = ring->wptr << 2; 161 + if (*is_queue_unmap) { 162 + WDOORBELL64(aggregated_db_index, ring->wptr << 2); 163 + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", 164 + ring->doorbell_index, ring->wptr << 2); 165 + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); 166 + } else { 167 + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", 168 + ring->doorbell_index, ring->wptr << 2); 169 + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); 170 + 171 + if (*is_queue_unmap) 172 + WDOORBELL64(aggregated_db_index, 173 + ring->wptr << 2); 174 + } 175 + } else { 176 + if (ring->use_doorbell) { 177 + DRM_DEBUG("Using doorbell -- " 178 + "wptr_offs == 0x%08x " 179 + "lower_32_bits(ring->wptr) << 2 == 0x%08x " 180 + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", 181 + ring->wptr_offs, 182 + lower_32_bits(ring->wptr << 2), 183 + upper_32_bits(ring->wptr << 2)); 184 + /* XXX check if swapping is necessary on BE */ 185 + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 186 + ring->wptr << 2); 187 + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", 188 + ring->doorbell_index, ring->wptr << 2); 189 + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); 190 + } else { 191 + DRM_DEBUG("Not using doorbell -- " 192 + "regSDMA%i_GFX_RB_WPTR == 0x%08x " 193 + "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", 194 + ring->me, 195 + lower_32_bits(ring->wptr << 2), 196 + ring->me, 197 + upper_32_bits(ring->wptr << 2)); 198 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, 199 + ring->me, 200 + regSDMA0_QUEUE0_RB_WPTR), 201 + lower_32_bits(ring->wptr << 2)); 202 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, 203 + ring->me, 204 + regSDMA0_QUEUE0_RB_WPTR_HI), 205 + upper_32_bits(ring->wptr << 2)); 206 + } 207 + } 208 + } 209 + 210 + static void sdma_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 211 + { 212 + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); 213 + int i; 214 + 215 + for (i = 0; i < count; i++) 216 + if (sdma && sdma->burst_nop && (i == 0)) 217 + amdgpu_ring_write(ring, ring->funcs->nop | 218 + SDMA_PKT_NOP_HEADER_COUNT(count - 1)); 219 + else 220 + amdgpu_ring_write(ring, ring->funcs->nop); 221 + } 222 + 223 + /** 224 + * sdma_v7_0_ring_emit_ib - Schedule an IB on the DMA engine 225 + * 226 + * @ring: amdgpu ring pointer 227 + * @ib: IB object to schedule 228 + * 229 + * Schedule an IB in the DMA ring. 230 + */ 231 + static void sdma_v7_0_ring_emit_ib(struct amdgpu_ring *ring, 232 + struct amdgpu_job *job, 233 + struct amdgpu_ib *ib, 234 + uint32_t flags) 235 + { 236 + unsigned vmid = AMDGPU_JOB_GET_VMID(job); 237 + uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); 238 + 239 + /* An IB packet must end on a 8 DW boundary--the next dword 240 + * must be on a 8-dword boundary. Our IB packet below is 6 241 + * dwords long, thus add x number of NOPs, such that, in 242 + * modular arithmetic, 243 + * wptr + 6 + x = 8k, k >= 0, which in C is, 244 + * (wptr + 6 + x) % 8 = 0. 245 + * The expression below, is a solution of x. 246 + */ 247 + sdma_v7_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); 248 + 249 + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) | 250 + SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); 251 + /* base must be 32 byte aligned */ 252 + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); 253 + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 254 + amdgpu_ring_write(ring, ib->length_dw); 255 + amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr)); 256 + amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr)); 257 + } 258 + 259 + /** 260 + * sdma_v7_0_ring_emit_mem_sync - flush the IB by graphics cache rinse 261 + * 262 + * @ring: amdgpu ring pointer 263 + * @job: job to retrieve vmid from 264 + * @ib: IB object to schedule 265 + * 266 + * flush the IB by graphics cache rinse. 267 + */ 268 + static void sdma_v7_0_ring_emit_mem_sync(struct amdgpu_ring *ring) 269 + { 270 + uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV | 271 + SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV | 272 + SDMA_GCR_GLI_INV(1); 273 + 274 + /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */ 275 + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ)); 276 + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0)); 277 + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) | 278 + SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0)); 279 + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) | 280 + SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16)); 281 + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) | 282 + SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0)); 283 + } 284 + 285 + 286 + /** 287 + * sdma_v7_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring 288 + * 289 + * @ring: amdgpu ring pointer 290 + * 291 + * Emit an hdp flush packet on the requested DMA ring. 292 + */ 293 + static void sdma_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 294 + { 295 + struct amdgpu_device *adev = ring->adev; 296 + u32 ref_and_mask = 0; 297 + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 298 + 299 + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; 300 + 301 + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) | 302 + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | 303 + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ 304 + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); 305 + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); 306 + amdgpu_ring_write(ring, ref_and_mask); /* reference */ 307 + amdgpu_ring_write(ring, ref_and_mask); /* mask */ 308 + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 309 + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 310 + } 311 + 312 + /** 313 + * sdma_v7_0_ring_emit_fence - emit a fence on the DMA ring 314 + * 315 + * @ring: amdgpu ring pointer 316 + * @fence: amdgpu fence object 317 + * 318 + * Add a DMA fence packet to the ring to write 319 + * the fence seq number and DMA trap packet to generate 320 + * an interrupt if needed. 321 + */ 322 + static void sdma_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 323 + unsigned flags) 324 + { 325 + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 326 + /* write the fence */ 327 + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) | 328 + SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */ 329 + /* zero in first two bits */ 330 + BUG_ON(addr & 0x3); 331 + amdgpu_ring_write(ring, lower_32_bits(addr)); 332 + amdgpu_ring_write(ring, upper_32_bits(addr)); 333 + amdgpu_ring_write(ring, lower_32_bits(seq)); 334 + 335 + /* optionally write high bits as well */ 336 + if (write64bit) { 337 + addr += 4; 338 + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) | 339 + SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); 340 + /* zero in first two bits */ 341 + BUG_ON(addr & 0x3); 342 + amdgpu_ring_write(ring, lower_32_bits(addr)); 343 + amdgpu_ring_write(ring, upper_32_bits(addr)); 344 + amdgpu_ring_write(ring, upper_32_bits(seq)); 345 + } 346 + 347 + if (flags & AMDGPU_FENCE_FLAG_INT) { 348 + uint32_t ctx = ring->is_mes_queue ? 349 + (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; 350 + /* generate an interrupt */ 351 + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); 352 + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); 353 + } 354 + } 355 + 356 + /** 357 + * sdma_v7_0_gfx_stop - stop the gfx async dma engines 358 + * 359 + * @adev: amdgpu_device pointer 360 + * 361 + * Stop the gfx async dma ring buffers. 362 + */ 363 + static void sdma_v7_0_gfx_stop(struct amdgpu_device *adev) 364 + { 365 + u32 rb_cntl, ib_cntl; 366 + int i; 367 + 368 + for (i = 0; i < adev->sdma.num_instances; i++) { 369 + rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); 370 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 0); 371 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); 372 + ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL)); 373 + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 0); 374 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); 375 + } 376 + } 377 + 378 + /** 379 + * sdma_v7_0_rlc_stop - stop the compute async dma engines 380 + * 381 + * @adev: amdgpu_device pointer 382 + * 383 + * Stop the compute async dma queues. 384 + */ 385 + static void sdma_v7_0_rlc_stop(struct amdgpu_device *adev) 386 + { 387 + /* XXX todo */ 388 + } 389 + 390 + /** 391 + * sdma_v7_0_ctx_switch_enable - stop the async dma engines context switch 392 + * 393 + * @adev: amdgpu_device pointer 394 + * @enable: enable/disable the DMA MEs context switch. 395 + * 396 + * Halt or unhalt the async dma engines context switch. 397 + */ 398 + static void sdma_v7_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) 399 + { 400 + } 401 + 402 + /** 403 + * sdma_v7_0_enable - stop the async dma engines 404 + * 405 + * @adev: amdgpu_device pointer 406 + * @enable: enable/disable the DMA MEs. 407 + * 408 + * Halt or unhalt the async dma engines. 409 + */ 410 + static void sdma_v7_0_enable(struct amdgpu_device *adev, bool enable) 411 + { 412 + u32 mcu_cntl; 413 + int i; 414 + 415 + if (!enable) { 416 + sdma_v7_0_gfx_stop(adev); 417 + sdma_v7_0_rlc_stop(adev); 418 + } 419 + 420 + if (amdgpu_sriov_vf(adev)) 421 + return; 422 + 423 + for (i = 0; i < adev->sdma.num_instances; i++) { 424 + mcu_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL)); 425 + mcu_cntl = REG_SET_FIELD(mcu_cntl, SDMA0_MCU_CNTL, HALT, enable ? 0 : 1); 426 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), mcu_cntl); 427 + } 428 + } 429 + 430 + /** 431 + * sdma_v7_0_gfx_resume - setup and start the async dma engines 432 + * 433 + * @adev: amdgpu_device pointer 434 + * 435 + * Set up the gfx DMA ring buffers and enable them. 436 + * Returns 0 for success, error for failure. 437 + */ 438 + static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev) 439 + { 440 + struct amdgpu_ring *ring; 441 + u32 rb_cntl, ib_cntl; 442 + u32 rb_bufsz; 443 + u32 doorbell; 444 + u32 doorbell_offset; 445 + u32 tmp; 446 + u64 wptr_gpu_addr; 447 + int i, r; 448 + 449 + for (i = 0; i < adev->sdma.num_instances; i++) { 450 + ring = &adev->sdma.instance[i].ring; 451 + 452 + //if (!amdgpu_sriov_vf(adev)) 453 + // WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); 454 + 455 + /* Set ring buffer size in dwords */ 456 + rb_bufsz = order_base_2(ring->ring_size / 4); 457 + rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); 458 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); 459 + #ifdef __BIG_ENDIAN 460 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1); 461 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, 462 + RPTR_WRITEBACK_SWAP_ENABLE, 1); 463 + #endif 464 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1); 465 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); 466 + 467 + /* Initialize the ring buffer's read and write pointers */ 468 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0); 469 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0); 470 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0); 471 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0); 472 + 473 + /* setup the wptr shadow polling */ 474 + wptr_gpu_addr = ring->wptr_gpu_addr; 475 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO), 476 + lower_32_bits(wptr_gpu_addr)); 477 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI), 478 + upper_32_bits(wptr_gpu_addr)); 479 + 480 + /* set the wb address whether it's enabled or not */ 481 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI), 482 + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); 483 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO), 484 + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); 485 + 486 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); 487 + if (amdgpu_sriov_vf(adev)) 488 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1); 489 + else 490 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); 491 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1); 492 + 493 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8); 494 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); 495 + 496 + ring->wptr = 0; 497 + 498 + /* before programing wptr to a less value, need set minor_ptr_update first */ 499 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1); 500 + 501 + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ 502 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); 503 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); 504 + } 505 + 506 + doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL)); 507 + doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET)); 508 + 509 + if (ring->use_doorbell) { 510 + doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1); 511 + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET, 512 + OFFSET, ring->doorbell_index); 513 + } else { 514 + doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0); 515 + } 516 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell); 517 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset); 518 + 519 + if (i == 0) 520 + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, 521 + ring->doorbell_index, 522 + adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances); 523 + 524 + if (amdgpu_sriov_vf(adev)) 525 + sdma_v7_0_ring_set_wptr(ring); 526 + 527 + /* set minor_ptr_update to 0 after wptr programed */ 528 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0); 529 + 530 + /* Set up RESP_MODE to non-copy addresses */ 531 + tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL)); 532 + tmp = REG_SET_FIELD(tmp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); 533 + tmp = REG_SET_FIELD(tmp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); 534 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), tmp); 535 + 536 + /* program default cache read and write policy */ 537 + tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE)); 538 + /* clean read policy and write policy bits */ 539 + tmp &= 0xFF0FFF; 540 + tmp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | 541 + (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); 542 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), tmp); 543 + 544 + if (!amdgpu_sriov_vf(adev)) { 545 + /* unhalt engine */ 546 + tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL)); 547 + tmp = REG_SET_FIELD(tmp, SDMA0_MCU_CNTL, HALT, 0); 548 + tmp = REG_SET_FIELD(tmp, SDMA0_MCU_CNTL, RESET, 0); 549 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), tmp); 550 + } 551 + 552 + /* enable DMA RB */ 553 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1); 554 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); 555 + 556 + ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL)); 557 + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1); 558 + #ifdef __BIG_ENDIAN 559 + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1); 560 + #endif 561 + /* enable DMA IBs */ 562 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); 563 + 564 + ring->sched.ready = true; 565 + 566 + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ 567 + sdma_v7_0_ctx_switch_enable(adev, true); 568 + sdma_v7_0_enable(adev, true); 569 + } 570 + 571 + r = amdgpu_ring_test_helper(ring); 572 + if (r) { 573 + ring->sched.ready = false; 574 + return r; 575 + } 576 + 577 + } 578 + 579 + return 0; 580 + } 581 + 582 + /** 583 + * sdma_v7_0_rlc_resume - setup and start the async dma engines 584 + * 585 + * @adev: amdgpu_device pointer 586 + * 587 + * Set up the compute DMA queues and enable them. 588 + * Returns 0 for success, error for failure. 589 + */ 590 + static int sdma_v7_0_rlc_resume(struct amdgpu_device *adev) 591 + { 592 + return 0; 593 + } 594 + 595 + static void sdma_v12_0_free_ucode_buffer(struct amdgpu_device *adev) 596 + { 597 + int i; 598 + 599 + for (i = 0; i < adev->sdma.num_instances; i++) { 600 + amdgpu_bo_free_kernel(&adev->sdma.instance[i].sdma_fw_obj, 601 + &adev->sdma.instance[i].sdma_fw_gpu_addr, 602 + (void **)&adev->sdma.instance[i].sdma_fw_ptr); 603 + } 604 + } 605 + 606 + /** 607 + * sdma_v7_0_load_microcode - load the sDMA ME ucode 608 + * 609 + * @adev: amdgpu_device pointer 610 + * 611 + * Loads the sDMA0/1 ucode. 612 + * Returns 0 for success, -EINVAL if the ucode is not available. 613 + */ 614 + static int sdma_v7_0_load_microcode(struct amdgpu_device *adev) 615 + { 616 + const struct sdma_firmware_header_v3_0 *hdr; 617 + const __le32 *fw_data; 618 + u32 fw_size; 619 + uint32_t tmp, sdma_status, ic_op_cntl; 620 + int i, r, j; 621 + 622 + /* halt the MEs */ 623 + sdma_v7_0_enable(adev, false); 624 + 625 + if (!adev->sdma.instance[0].fw) 626 + return -EINVAL; 627 + 628 + hdr = (const struct sdma_firmware_header_v3_0 *) 629 + adev->sdma.instance[0].fw->data; 630 + amdgpu_ucode_print_sdma_hdr(&hdr->header); 631 + 632 + fw_data = (const __le32 *)(adev->sdma.instance[0].fw->data + 633 + le32_to_cpu(hdr->ucode_offset_bytes)); 634 + fw_size = le32_to_cpu(hdr->ucode_size_bytes); 635 + 636 + for (i = 0; i < adev->sdma.num_instances; i++) { 637 + r = amdgpu_bo_create_reserved(adev, fw_size, 638 + PAGE_SIZE, 639 + AMDGPU_GEM_DOMAIN_VRAM, 640 + &adev->sdma.instance[i].sdma_fw_obj, 641 + &adev->sdma.instance[i].sdma_fw_gpu_addr, 642 + (void **)&adev->sdma.instance[i].sdma_fw_ptr); 643 + if (r) { 644 + dev_err(adev->dev, "(%d) failed to create sdma ucode bo\n", r); 645 + return r; 646 + } 647 + 648 + memcpy(adev->sdma.instance[i].sdma_fw_ptr, fw_data, fw_size); 649 + 650 + amdgpu_bo_kunmap(adev->sdma.instance[i].sdma_fw_obj); 651 + amdgpu_bo_unreserve(adev->sdma.instance[i].sdma_fw_obj); 652 + 653 + tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_CNTL)); 654 + tmp = REG_SET_FIELD(tmp, SDMA0_IC_CNTL, GPA, 0); 655 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_CNTL), tmp); 656 + 657 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_BASE_LO), 658 + lower_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr)); 659 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_BASE_HI), 660 + upper_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr)); 661 + 662 + tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL)); 663 + tmp = REG_SET_FIELD(tmp, SDMA0_IC_OP_CNTL, PRIME_ICACHE, 1); 664 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL), tmp); 665 + 666 + /* Wait for sdma ucode init complete */ 667 + for (j = 0; j < adev->usec_timeout; j++) { 668 + ic_op_cntl = RREG32_SOC15_IP(GC, 669 + sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL)); 670 + sdma_status = RREG32_SOC15_IP(GC, 671 + sdma_v7_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG)); 672 + if ((REG_GET_FIELD(ic_op_cntl, SDMA0_IC_OP_CNTL, ICACHE_PRIMED) == 1) && 673 + (REG_GET_FIELD(sdma_status, SDMA0_STATUS_REG, UCODE_INIT_DONE) == 1)) 674 + break; 675 + udelay(1); 676 + } 677 + 678 + if (j >= adev->usec_timeout) { 679 + dev_err(adev->dev, "failed to init sdma ucode\n"); 680 + return -EINVAL; 681 + } 682 + } 683 + 684 + return 0; 685 + } 686 + 687 + static int sdma_v7_0_soft_reset(void *handle) 688 + { 689 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 690 + u32 tmp; 691 + int i; 692 + 693 + sdma_v7_0_gfx_stop(adev); 694 + 695 + for (i = 0; i < adev->sdma.num_instances; i++) { 696 + //tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_FREEZE)); 697 + //tmp |= SDMA0_FREEZE__FREEZE_MASK; 698 + //WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_FREEZE), tmp); 699 + tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL)); 700 + tmp |= SDMA0_MCU_CNTL__HALT_MASK; 701 + tmp |= SDMA0_MCU_CNTL__RESET_MASK; 702 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), tmp); 703 + 704 + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_PREEMPT), 0); 705 + 706 + udelay(100); 707 + 708 + tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i; 709 + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); 710 + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 711 + 712 + udelay(100); 713 + 714 + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0); 715 + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 716 + 717 + udelay(100); 718 + } 719 + 720 + return sdma_v7_0_start(adev); 721 + } 722 + 723 + static bool sdma_v7_0_check_soft_reset(void *handle) 724 + { 725 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 726 + struct amdgpu_ring *ring; 727 + int i, r; 728 + long tmo = msecs_to_jiffies(1000); 729 + 730 + for (i = 0; i < adev->sdma.num_instances; i++) { 731 + ring = &adev->sdma.instance[i].ring; 732 + r = amdgpu_ring_test_ib(ring, tmo); 733 + if (r) 734 + return true; 735 + } 736 + 737 + return false; 738 + } 739 + 740 + /** 741 + * sdma_v7_0_start - setup and start the async dma engines 742 + * 743 + * @adev: amdgpu_device pointer 744 + * 745 + * Set up the DMA engines and enable them. 746 + * Returns 0 for success, error for failure. 747 + */ 748 + static int sdma_v7_0_start(struct amdgpu_device *adev) 749 + { 750 + int r = 0; 751 + 752 + if (amdgpu_sriov_vf(adev)) { 753 + sdma_v7_0_ctx_switch_enable(adev, false); 754 + sdma_v7_0_enable(adev, false); 755 + 756 + /* set RB registers */ 757 + r = sdma_v7_0_gfx_resume(adev); 758 + return r; 759 + } 760 + 761 + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 762 + r = sdma_v7_0_load_microcode(adev); 763 + if (r) { 764 + sdma_v12_0_free_ucode_buffer(adev); 765 + return r; 766 + } 767 + 768 + if (amdgpu_emu_mode == 1) 769 + msleep(1000); 770 + } 771 + 772 + /* unhalt the MEs */ 773 + sdma_v7_0_enable(adev, true); 774 + /* enable sdma ring preemption */ 775 + sdma_v7_0_ctx_switch_enable(adev, true); 776 + 777 + /* start the gfx rings and rlc compute queues */ 778 + r = sdma_v7_0_gfx_resume(adev); 779 + if (r) 780 + return r; 781 + r = sdma_v7_0_rlc_resume(adev); 782 + 783 + return r; 784 + } 785 + 786 + static int sdma_v7_0_mqd_init(struct amdgpu_device *adev, void *mqd, 787 + struct amdgpu_mqd_prop *prop) 788 + { 789 + struct v12_sdma_mqd *m = mqd; 790 + uint64_t wb_gpu_addr; 791 + 792 + m->sdmax_rlcx_rb_cntl = 793 + order_base_2(prop->queue_size / 4) << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT | 794 + 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | 795 + 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT | 796 + 1 << SDMA0_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT; 797 + 798 + m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8); 799 + m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8); 800 + 801 + wb_gpu_addr = prop->wptr_gpu_addr; 802 + m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr); 803 + m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr); 804 + 805 + wb_gpu_addr = prop->rptr_gpu_addr; 806 + m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr); 807 + m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr); 808 + 809 + m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, 0, 810 + regSDMA0_QUEUE0_IB_CNTL)); 811 + 812 + m->sdmax_rlcx_doorbell_offset = 813 + prop->doorbell_index << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; 814 + 815 + m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_QUEUE0_DOORBELL, ENABLE, 1); 816 + 817 + m->sdmax_rlcx_doorbell_log = 0; 818 + m->sdmax_rlcx_rb_aql_cntl = 0x4000; //regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT; 819 + m->sdmax_rlcx_dummy_reg = 0xf; //regSDMA0_QUEUE0_DUMMY_REG_DEFAULT; 820 + 821 + return 0; 822 + } 823 + 824 + static void sdma_v7_0_set_mqd_funcs(struct amdgpu_device *adev) 825 + { 826 + adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v12_sdma_mqd); 827 + adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v7_0_mqd_init; 828 + } 829 + 830 + /** 831 + * sdma_v7_0_ring_test_ring - simple async dma engine test 832 + * 833 + * @ring: amdgpu_ring structure holding ring information 834 + * 835 + * Test the DMA engine by writing using it to write an 836 + * value to memory. 837 + * Returns 0 for success, error for failure. 838 + */ 839 + static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) 840 + { 841 + struct amdgpu_device *adev = ring->adev; 842 + unsigned i; 843 + unsigned index; 844 + int r; 845 + u32 tmp; 846 + u64 gpu_addr; 847 + volatile uint32_t *cpu_ptr = NULL; 848 + 849 + tmp = 0xCAFEDEAD; 850 + 851 + if (ring->is_mes_queue) { 852 + uint32_t offset = 0; 853 + offset = amdgpu_mes_ctx_get_offs(ring, 854 + AMDGPU_MES_CTX_PADDING_OFFS); 855 + gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 856 + cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 857 + *cpu_ptr = tmp; 858 + } else { 859 + r = amdgpu_device_wb_get(adev, &index); 860 + if (r) { 861 + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 862 + return r; 863 + } 864 + 865 + gpu_addr = adev->wb.gpu_addr + (index * 4); 866 + adev->wb.wb[index] = cpu_to_le32(tmp); 867 + } 868 + 869 + r = amdgpu_ring_alloc(ring, 5); 870 + if (r) { 871 + DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 872 + amdgpu_device_wb_free(adev, index); 873 + return r; 874 + } 875 + 876 + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | 877 + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 878 + amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 879 + amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); 880 + amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0)); 881 + amdgpu_ring_write(ring, 0xDEADBEEF); 882 + amdgpu_ring_commit(ring); 883 + 884 + for (i = 0; i < adev->usec_timeout; i++) { 885 + if (ring->is_mes_queue) 886 + tmp = le32_to_cpu(*cpu_ptr); 887 + else 888 + tmp = le32_to_cpu(adev->wb.wb[index]); 889 + if (tmp == 0xDEADBEEF) 890 + break; 891 + if (amdgpu_emu_mode == 1) 892 + msleep(1); 893 + else 894 + udelay(1); 895 + } 896 + 897 + if (i >= adev->usec_timeout) 898 + r = -ETIMEDOUT; 899 + 900 + if (!ring->is_mes_queue) 901 + amdgpu_device_wb_free(adev, index); 902 + 903 + return r; 904 + } 905 + 906 + /** 907 + * sdma_v7_0_ring_test_ib - test an IB on the DMA engine 908 + * 909 + * @ring: amdgpu_ring structure holding ring information 910 + * 911 + * Test a simple IB in the DMA ring. 912 + * Returns 0 on success, error on failure. 913 + */ 914 + static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 915 + { 916 + struct amdgpu_device *adev = ring->adev; 917 + struct amdgpu_ib ib; 918 + struct dma_fence *f = NULL; 919 + unsigned index; 920 + long r; 921 + u32 tmp = 0; 922 + u64 gpu_addr; 923 + volatile uint32_t *cpu_ptr = NULL; 924 + 925 + tmp = 0xCAFEDEAD; 926 + memset(&ib, 0, sizeof(ib)); 927 + 928 + if (ring->is_mes_queue) { 929 + uint32_t offset = 0; 930 + offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); 931 + ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 932 + ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 933 + 934 + offset = amdgpu_mes_ctx_get_offs(ring, 935 + AMDGPU_MES_CTX_PADDING_OFFS); 936 + gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 937 + cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 938 + *cpu_ptr = tmp; 939 + } else { 940 + r = amdgpu_device_wb_get(adev, &index); 941 + if (r) { 942 + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 943 + return r; 944 + } 945 + 946 + gpu_addr = adev->wb.gpu_addr + (index * 4); 947 + adev->wb.wb[index] = cpu_to_le32(tmp); 948 + 949 + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); 950 + if (r) { 951 + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 952 + goto err0; 953 + } 954 + } 955 + 956 + ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | 957 + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 958 + ib.ptr[1] = lower_32_bits(gpu_addr); 959 + ib.ptr[2] = upper_32_bits(gpu_addr); 960 + ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); 961 + ib.ptr[4] = 0xDEADBEEF; 962 + ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 963 + ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 964 + ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 965 + ib.length_dw = 8; 966 + 967 + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 968 + if (r) 969 + goto err1; 970 + 971 + r = dma_fence_wait_timeout(f, false, timeout); 972 + if (r == 0) { 973 + DRM_ERROR("amdgpu: IB test timed out\n"); 974 + r = -ETIMEDOUT; 975 + goto err1; 976 + } else if (r < 0) { 977 + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 978 + goto err1; 979 + } 980 + 981 + if (ring->is_mes_queue) 982 + tmp = le32_to_cpu(*cpu_ptr); 983 + else 984 + tmp = le32_to_cpu(adev->wb.wb[index]); 985 + 986 + if (tmp == 0xDEADBEEF) 987 + r = 0; 988 + else 989 + r = -EINVAL; 990 + 991 + err1: 992 + amdgpu_ib_free(adev, &ib, NULL); 993 + dma_fence_put(f); 994 + err0: 995 + if (!ring->is_mes_queue) 996 + amdgpu_device_wb_free(adev, index); 997 + return r; 998 + } 999 + 1000 + 1001 + /** 1002 + * sdma_v7_0_vm_copy_pte - update PTEs by copying them from the GART 1003 + * 1004 + * @ib: indirect buffer to fill with commands 1005 + * @pe: addr of the page entry 1006 + * @src: src addr to copy from 1007 + * @count: number of page entries to update 1008 + * 1009 + * Update PTEs by copying them from the GART using sDMA. 1010 + */ 1011 + static void sdma_v7_0_vm_copy_pte(struct amdgpu_ib *ib, 1012 + uint64_t pe, uint64_t src, 1013 + unsigned count) 1014 + { 1015 + unsigned bytes = count * 8; 1016 + 1017 + ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | 1018 + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 1019 + ib->ptr[ib->length_dw++] = bytes - 1; 1020 + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1021 + ib->ptr[ib->length_dw++] = lower_32_bits(src); 1022 + ib->ptr[ib->length_dw++] = upper_32_bits(src); 1023 + ib->ptr[ib->length_dw++] = lower_32_bits(pe); 1024 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 1025 + 1026 + } 1027 + 1028 + /** 1029 + * sdma_v7_0_vm_write_pte - update PTEs by writing them manually 1030 + * 1031 + * @ib: indirect buffer to fill with commands 1032 + * @pe: addr of the page entry 1033 + * @addr: dst addr to write into pe 1034 + * @count: number of page entries to update 1035 + * @incr: increase next addr by incr bytes 1036 + * @flags: access flags 1037 + * 1038 + * Update PTEs by writing them manually using sDMA. 1039 + */ 1040 + static void sdma_v7_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, 1041 + uint64_t value, unsigned count, 1042 + uint32_t incr) 1043 + { 1044 + unsigned ndw = count * 2; 1045 + 1046 + ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | 1047 + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 1048 + ib->ptr[ib->length_dw++] = lower_32_bits(pe); 1049 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 1050 + ib->ptr[ib->length_dw++] = ndw - 1; 1051 + for (; ndw > 0; ndw -= 2) { 1052 + ib->ptr[ib->length_dw++] = lower_32_bits(value); 1053 + ib->ptr[ib->length_dw++] = upper_32_bits(value); 1054 + value += incr; 1055 + } 1056 + } 1057 + 1058 + /** 1059 + * sdma_v7_0_vm_set_pte_pde - update the page tables using sDMA 1060 + * 1061 + * @ib: indirect buffer to fill with commands 1062 + * @pe: addr of the page entry 1063 + * @addr: dst addr to write into pe 1064 + * @count: number of page entries to update 1065 + * @incr: increase next addr by incr bytes 1066 + * @flags: access flags 1067 + * 1068 + * Update the page tables using sDMA. 1069 + */ 1070 + static void sdma_v7_0_vm_set_pte_pde(struct amdgpu_ib *ib, 1071 + uint64_t pe, 1072 + uint64_t addr, unsigned count, 1073 + uint32_t incr, uint64_t flags) 1074 + { 1075 + /* for physically contiguous pages (vram) */ 1076 + ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE); 1077 + ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ 1078 + ib->ptr[ib->length_dw++] = upper_32_bits(pe); 1079 + ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ 1080 + ib->ptr[ib->length_dw++] = upper_32_bits(flags); 1081 + ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ 1082 + ib->ptr[ib->length_dw++] = upper_32_bits(addr); 1083 + ib->ptr[ib->length_dw++] = incr; /* increment size */ 1084 + ib->ptr[ib->length_dw++] = 0; 1085 + ib->ptr[ib->length_dw++] = count - 1; /* number of entries */ 1086 + } 1087 + 1088 + /** 1089 + * sdma_v7_0_ring_pad_ib - pad the IB 1090 + * @ib: indirect buffer to fill with padding 1091 + * 1092 + * Pad the IB with NOPs to a boundary multiple of 8. 1093 + */ 1094 + static void sdma_v7_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 1095 + { 1096 + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); 1097 + u32 pad_count; 1098 + int i; 1099 + 1100 + pad_count = (-ib->length_dw) & 0x7; 1101 + for (i = 0; i < pad_count; i++) 1102 + if (sdma && sdma->burst_nop && (i == 0)) 1103 + ib->ptr[ib->length_dw++] = 1104 + SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) | 1105 + SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); 1106 + else 1107 + ib->ptr[ib->length_dw++] = 1108 + SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP); 1109 + } 1110 + 1111 + /** 1112 + * sdma_v7_0_ring_emit_pipeline_sync - sync the pipeline 1113 + * 1114 + * @ring: amdgpu_ring pointer 1115 + * 1116 + * Make sure all previous operations are completed (CIK). 1117 + */ 1118 + static void sdma_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 1119 + { 1120 + uint32_t seq = ring->fence_drv.sync_seq; 1121 + uint64_t addr = ring->fence_drv.gpu_addr; 1122 + 1123 + /* wait for idle */ 1124 + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1125 + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1126 + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ 1127 + SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); 1128 + amdgpu_ring_write(ring, addr & 0xfffffffc); 1129 + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 1130 + amdgpu_ring_write(ring, seq); /* reference */ 1131 + amdgpu_ring_write(ring, 0xffffffff); /* mask */ 1132 + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1133 + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ 1134 + } 1135 + 1136 + /** 1137 + * sdma_v7_0_ring_emit_vm_flush - vm flush using sDMA 1138 + * 1139 + * @ring: amdgpu_ring pointer 1140 + * @vm: amdgpu_vm pointer 1141 + * 1142 + * Update the page table base and flush the VM TLB 1143 + * using sDMA. 1144 + */ 1145 + static void sdma_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 1146 + unsigned vmid, uint64_t pd_addr) 1147 + { 1148 + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 1149 + } 1150 + 1151 + static void sdma_v7_0_ring_emit_wreg(struct amdgpu_ring *ring, 1152 + uint32_t reg, uint32_t val) 1153 + { 1154 + /* SRBM WRITE command will not support on sdma v7. 1155 + * Use Register WRITE command instead, which OPCODE is same as SRBM WRITE 1156 + */ 1157 + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE)); 1158 + amdgpu_ring_write(ring, reg << 2); 1159 + amdgpu_ring_write(ring, val); 1160 + } 1161 + 1162 + static void sdma_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 1163 + uint32_t val, uint32_t mask) 1164 + { 1165 + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1166 + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1167 + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ 1168 + amdgpu_ring_write(ring, reg << 2); 1169 + amdgpu_ring_write(ring, 0); 1170 + amdgpu_ring_write(ring, val); /* reference */ 1171 + amdgpu_ring_write(ring, mask); /* mask */ 1172 + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1173 + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); 1174 + } 1175 + 1176 + static void sdma_v7_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 1177 + uint32_t reg0, uint32_t reg1, 1178 + uint32_t ref, uint32_t mask) 1179 + { 1180 + amdgpu_ring_emit_wreg(ring, reg0, ref); 1181 + /* wait for a cycle to reset vm_inv_eng*_ack */ 1182 + amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); 1183 + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); 1184 + } 1185 + 1186 + static int sdma_v7_0_early_init(void *handle) 1187 + { 1188 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1189 + int r; 1190 + 1191 + r = amdgpu_sdma_init_microcode(adev, 0, true); 1192 + if (r) { 1193 + DRM_ERROR("Failed to init sdma firmware!\n"); 1194 + return r; 1195 + } 1196 + 1197 + sdma_v7_0_set_ring_funcs(adev); 1198 + sdma_v7_0_set_buffer_funcs(adev); 1199 + sdma_v7_0_set_vm_pte_funcs(adev); 1200 + sdma_v7_0_set_irq_funcs(adev); 1201 + sdma_v7_0_set_mqd_funcs(adev); 1202 + 1203 + return 0; 1204 + } 1205 + 1206 + static int sdma_v7_0_sw_init(void *handle) 1207 + { 1208 + struct amdgpu_ring *ring; 1209 + int r, i; 1210 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1211 + 1212 + /* SDMA trap event */ 1213 + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, 1214 + GFX_11_0_0__SRCID__SDMA_TRAP, 1215 + &adev->sdma.trap_irq); 1216 + if (r) 1217 + return r; 1218 + 1219 + for (i = 0; i < adev->sdma.num_instances; i++) { 1220 + ring = &adev->sdma.instance[i].ring; 1221 + ring->ring_obj = NULL; 1222 + ring->use_doorbell = true; 1223 + ring->me = i; 1224 + 1225 + DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i, 1226 + ring->use_doorbell?"true":"false"); 1227 + 1228 + ring->doorbell_index = 1229 + (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset 1230 + 1231 + ring->vm_hub = AMDGPU_GFXHUB(0); 1232 + sprintf(ring->name, "sdma%d", i); 1233 + r = amdgpu_ring_init(adev, ring, 1024, 1234 + &adev->sdma.trap_irq, 1235 + AMDGPU_SDMA_IRQ_INSTANCE0 + i, 1236 + AMDGPU_RING_PRIO_DEFAULT, NULL); 1237 + if (r) 1238 + return r; 1239 + } 1240 + 1241 + return r; 1242 + } 1243 + 1244 + static int sdma_v7_0_sw_fini(void *handle) 1245 + { 1246 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1247 + int i; 1248 + 1249 + for (i = 0; i < adev->sdma.num_instances; i++) 1250 + amdgpu_ring_fini(&adev->sdma.instance[i].ring); 1251 + 1252 + amdgpu_sdma_destroy_inst_ctx(adev, true); 1253 + 1254 + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) 1255 + sdma_v12_0_free_ucode_buffer(adev); 1256 + 1257 + return 0; 1258 + } 1259 + 1260 + static int sdma_v7_0_hw_init(void *handle) 1261 + { 1262 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1263 + 1264 + return sdma_v7_0_start(adev); 1265 + } 1266 + 1267 + static int sdma_v7_0_hw_fini(void *handle) 1268 + { 1269 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1270 + 1271 + if (amdgpu_sriov_vf(adev)) 1272 + return 0; 1273 + 1274 + sdma_v7_0_ctx_switch_enable(adev, false); 1275 + sdma_v7_0_enable(adev, false); 1276 + 1277 + return 0; 1278 + } 1279 + 1280 + static int sdma_v7_0_suspend(void *handle) 1281 + { 1282 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1283 + 1284 + return sdma_v7_0_hw_fini(adev); 1285 + } 1286 + 1287 + static int sdma_v7_0_resume(void *handle) 1288 + { 1289 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1290 + 1291 + return sdma_v7_0_hw_init(adev); 1292 + } 1293 + 1294 + static bool sdma_v7_0_is_idle(void *handle) 1295 + { 1296 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1297 + u32 i; 1298 + 1299 + for (i = 0; i < adev->sdma.num_instances; i++) { 1300 + u32 tmp = RREG32(sdma_v7_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG)); 1301 + 1302 + if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) 1303 + return false; 1304 + } 1305 + 1306 + return true; 1307 + } 1308 + 1309 + static int sdma_v7_0_wait_for_idle(void *handle) 1310 + { 1311 + unsigned i; 1312 + u32 sdma0, sdma1; 1313 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1314 + 1315 + for (i = 0; i < adev->usec_timeout; i++) { 1316 + sdma0 = RREG32(sdma_v7_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG)); 1317 + sdma1 = RREG32(sdma_v7_0_get_reg_offset(adev, 1, regSDMA0_STATUS_REG)); 1318 + 1319 + if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) 1320 + return 0; 1321 + udelay(1); 1322 + } 1323 + return -ETIMEDOUT; 1324 + } 1325 + 1326 + static int sdma_v7_0_ring_preempt_ib(struct amdgpu_ring *ring) 1327 + { 1328 + int i, r = 0; 1329 + struct amdgpu_device *adev = ring->adev; 1330 + u32 index = 0; 1331 + u64 sdma_gfx_preempt; 1332 + 1333 + amdgpu_sdma_get_index_from_ring(ring, &index); 1334 + sdma_gfx_preempt = 1335 + sdma_v7_0_get_reg_offset(adev, index, regSDMA0_QUEUE0_PREEMPT); 1336 + 1337 + /* assert preemption condition */ 1338 + amdgpu_ring_set_preempt_cond_exec(ring, false); 1339 + 1340 + /* emit the trailing fence */ 1341 + ring->trail_seq += 1; 1342 + amdgpu_ring_alloc(ring, 10); 1343 + sdma_v7_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, 1344 + ring->trail_seq, 0); 1345 + amdgpu_ring_commit(ring); 1346 + 1347 + /* assert IB preemption */ 1348 + WREG32(sdma_gfx_preempt, 1); 1349 + 1350 + /* poll the trailing fence */ 1351 + for (i = 0; i < adev->usec_timeout; i++) { 1352 + if (ring->trail_seq == 1353 + le32_to_cpu(*(ring->trail_fence_cpu_addr))) 1354 + break; 1355 + udelay(1); 1356 + } 1357 + 1358 + if (i >= adev->usec_timeout) { 1359 + r = -EINVAL; 1360 + DRM_ERROR("ring %d failed to be preempted\n", ring->idx); 1361 + } 1362 + 1363 + /* deassert IB preemption */ 1364 + WREG32(sdma_gfx_preempt, 0); 1365 + 1366 + /* deassert the preemption condition */ 1367 + amdgpu_ring_set_preempt_cond_exec(ring, true); 1368 + return r; 1369 + } 1370 + 1371 + static int sdma_v7_0_set_trap_irq_state(struct amdgpu_device *adev, 1372 + struct amdgpu_irq_src *source, 1373 + unsigned type, 1374 + enum amdgpu_interrupt_state state) 1375 + { 1376 + u32 sdma_cntl; 1377 + 1378 + u32 reg_offset = sdma_v7_0_get_reg_offset(adev, type, regSDMA0_CNTL); 1379 + 1380 + sdma_cntl = RREG32(reg_offset); 1381 + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1382 + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 1383 + WREG32(reg_offset, sdma_cntl); 1384 + 1385 + return 0; 1386 + } 1387 + 1388 + static int sdma_v7_0_process_trap_irq(struct amdgpu_device *adev, 1389 + struct amdgpu_irq_src *source, 1390 + struct amdgpu_iv_entry *entry) 1391 + { 1392 + int instances, queue; 1393 + uint32_t mes_queue_id = entry->src_data[0]; 1394 + 1395 + DRM_DEBUG("IH: SDMA trap\n"); 1396 + 1397 + if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { 1398 + struct amdgpu_mes_queue *queue; 1399 + 1400 + mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; 1401 + 1402 + spin_lock(&adev->mes.queue_id_lock); 1403 + queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); 1404 + if (queue) { 1405 + DRM_DEBUG("process smda queue id = %d\n", mes_queue_id); 1406 + amdgpu_fence_process(queue->ring); 1407 + } 1408 + spin_unlock(&adev->mes.queue_id_lock); 1409 + return 0; 1410 + } 1411 + 1412 + queue = entry->ring_id & 0xf; 1413 + instances = (entry->ring_id & 0xf0) >> 4; 1414 + if (instances > 1) { 1415 + DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n"); 1416 + return -EINVAL; 1417 + } 1418 + 1419 + switch (entry->client_id) { 1420 + case SOC21_IH_CLIENTID_GFX: 1421 + switch (queue) { 1422 + case 0: 1423 + amdgpu_fence_process(&adev->sdma.instance[instances].ring); 1424 + break; 1425 + default: 1426 + break; 1427 + } 1428 + break; 1429 + } 1430 + return 0; 1431 + } 1432 + 1433 + static int sdma_v7_0_process_illegal_inst_irq(struct amdgpu_device *adev, 1434 + struct amdgpu_irq_src *source, 1435 + struct amdgpu_iv_entry *entry) 1436 + { 1437 + return 0; 1438 + } 1439 + 1440 + static int sdma_v7_0_set_clockgating_state(void *handle, 1441 + enum amd_clockgating_state state) 1442 + { 1443 + return 0; 1444 + } 1445 + 1446 + static int sdma_v7_0_set_powergating_state(void *handle, 1447 + enum amd_powergating_state state) 1448 + { 1449 + return 0; 1450 + } 1451 + 1452 + static void sdma_v7_0_get_clockgating_state(void *handle, u64 *flags) 1453 + { 1454 + } 1455 + 1456 + const struct amd_ip_funcs sdma_v7_0_ip_funcs = { 1457 + .name = "sdma_v7_0", 1458 + .early_init = sdma_v7_0_early_init, 1459 + .late_init = NULL, 1460 + .sw_init = sdma_v7_0_sw_init, 1461 + .sw_fini = sdma_v7_0_sw_fini, 1462 + .hw_init = sdma_v7_0_hw_init, 1463 + .hw_fini = sdma_v7_0_hw_fini, 1464 + .suspend = sdma_v7_0_suspend, 1465 + .resume = sdma_v7_0_resume, 1466 + .is_idle = sdma_v7_0_is_idle, 1467 + .wait_for_idle = sdma_v7_0_wait_for_idle, 1468 + .soft_reset = sdma_v7_0_soft_reset, 1469 + .check_soft_reset = sdma_v7_0_check_soft_reset, 1470 + .set_clockgating_state = sdma_v7_0_set_clockgating_state, 1471 + .set_powergating_state = sdma_v7_0_set_powergating_state, 1472 + .get_clockgating_state = sdma_v7_0_get_clockgating_state, 1473 + }; 1474 + 1475 + static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = { 1476 + .type = AMDGPU_RING_TYPE_SDMA, 1477 + .align_mask = 0xf, 1478 + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 1479 + .support_64bit_ptrs = true, 1480 + .secure_submission_supported = true, 1481 + .get_rptr = sdma_v7_0_ring_get_rptr, 1482 + .get_wptr = sdma_v7_0_ring_get_wptr, 1483 + .set_wptr = sdma_v7_0_ring_set_wptr, 1484 + .emit_frame_size = 1485 + 5 + /* sdma_v7_0_ring_init_cond_exec */ 1486 + 6 + /* sdma_v7_0_ring_emit_hdp_flush */ 1487 + 6 + /* sdma_v7_0_ring_emit_pipeline_sync */ 1488 + /* sdma_v7_0_ring_emit_vm_flush */ 1489 + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1490 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + 1491 + 10 + 10 + 10, /* sdma_v7_0_ring_emit_fence x3 for user fence, vm fence */ 1492 + .emit_ib_size = 5 + 7 + 6, /* sdma_v7_0_ring_emit_ib */ 1493 + .emit_ib = sdma_v7_0_ring_emit_ib, 1494 + .emit_mem_sync = sdma_v7_0_ring_emit_mem_sync, 1495 + .emit_fence = sdma_v7_0_ring_emit_fence, 1496 + .emit_pipeline_sync = sdma_v7_0_ring_emit_pipeline_sync, 1497 + .emit_vm_flush = sdma_v7_0_ring_emit_vm_flush, 1498 + .emit_hdp_flush = sdma_v7_0_ring_emit_hdp_flush, 1499 + .test_ring = sdma_v7_0_ring_test_ring, 1500 + .test_ib = sdma_v7_0_ring_test_ib, 1501 + .insert_nop = sdma_v7_0_ring_insert_nop, 1502 + .pad_ib = sdma_v7_0_ring_pad_ib, 1503 + .emit_wreg = sdma_v7_0_ring_emit_wreg, 1504 + .emit_reg_wait = sdma_v7_0_ring_emit_reg_wait, 1505 + .emit_reg_write_reg_wait = sdma_v7_0_ring_emit_reg_write_reg_wait, 1506 + .init_cond_exec = sdma_v7_0_ring_init_cond_exec, 1507 + .preempt_ib = sdma_v7_0_ring_preempt_ib, 1508 + }; 1509 + 1510 + static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev) 1511 + { 1512 + int i; 1513 + 1514 + for (i = 0; i < adev->sdma.num_instances; i++) { 1515 + adev->sdma.instance[i].ring.funcs = &sdma_v7_0_ring_funcs; 1516 + adev->sdma.instance[i].ring.me = i; 1517 + } 1518 + } 1519 + 1520 + static const struct amdgpu_irq_src_funcs sdma_v7_0_trap_irq_funcs = { 1521 + .set = sdma_v7_0_set_trap_irq_state, 1522 + .process = sdma_v7_0_process_trap_irq, 1523 + }; 1524 + 1525 + static const struct amdgpu_irq_src_funcs sdma_v7_0_illegal_inst_irq_funcs = { 1526 + .process = sdma_v7_0_process_illegal_inst_irq, 1527 + }; 1528 + 1529 + static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev) 1530 + { 1531 + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + 1532 + adev->sdma.num_instances; 1533 + adev->sdma.trap_irq.funcs = &sdma_v7_0_trap_irq_funcs; 1534 + adev->sdma.illegal_inst_irq.funcs = &sdma_v7_0_illegal_inst_irq_funcs; 1535 + } 1536 + 1537 + /** 1538 + * sdma_v7_0_emit_copy_buffer - copy buffer using the sDMA engine 1539 + * 1540 + * @ring: amdgpu_ring structure holding ring information 1541 + * @src_offset: src GPU address 1542 + * @dst_offset: dst GPU address 1543 + * @byte_count: number of bytes to xfer 1544 + * @copy_flags: flags for the copy 1545 + * 1546 + * Copy GPU buffers using the DMA engine. 1547 + * Used by the amdgpu ttm implementation to move pages if 1548 + * registered as the asic copy callback. 1549 + */ 1550 + static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, 1551 + uint64_t src_offset, 1552 + uint64_t dst_offset, 1553 + uint32_t byte_count, 1554 + uint32_t copy_flags) 1555 + { 1556 + ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | 1557 + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | 1558 + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); 1559 + ib->ptr[ib->length_dw++] = byte_count - 1; 1560 + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1561 + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 1562 + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); 1563 + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1564 + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1565 + } 1566 + 1567 + /** 1568 + * sdma_v7_0_emit_fill_buffer - fill buffer using the sDMA engine 1569 + * 1570 + * @ring: amdgpu_ring structure holding ring information 1571 + * @src_data: value to write to buffer 1572 + * @dst_offset: dst GPU address 1573 + * @byte_count: number of bytes to xfer 1574 + * 1575 + * Fill GPU buffers using the DMA engine. 1576 + */ 1577 + static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib, 1578 + uint32_t src_data, 1579 + uint64_t dst_offset, 1580 + uint32_t byte_count) 1581 + { 1582 + ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_CONST_FILL); 1583 + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1584 + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1585 + ib->ptr[ib->length_dw++] = src_data; 1586 + ib->ptr[ib->length_dw++] = byte_count - 1; 1587 + } 1588 + 1589 + static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = { 1590 + .copy_max_bytes = 0x400000, 1591 + .copy_num_dw = 7, 1592 + .emit_copy_buffer = sdma_v7_0_emit_copy_buffer, 1593 + 1594 + .fill_max_bytes = 0x400000, 1595 + .fill_num_dw = 5, 1596 + .emit_fill_buffer = sdma_v7_0_emit_fill_buffer, 1597 + }; 1598 + 1599 + static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev) 1600 + { 1601 + adev->mman.buffer_funcs = &sdma_v7_0_buffer_funcs; 1602 + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 1603 + } 1604 + 1605 + static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = { 1606 + .copy_pte_num_dw = 7, 1607 + .copy_pte = sdma_v7_0_vm_copy_pte, 1608 + .write_pte = sdma_v7_0_vm_write_pte, 1609 + .set_pte_pde = sdma_v7_0_vm_set_pte_pde, 1610 + }; 1611 + 1612 + static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev) 1613 + { 1614 + unsigned i; 1615 + 1616 + adev->vm_manager.vm_pte_funcs = &sdma_v7_0_vm_pte_funcs; 1617 + for (i = 0; i < adev->sdma.num_instances; i++) { 1618 + adev->vm_manager.vm_pte_scheds[i] = 1619 + &adev->sdma.instance[i].ring.sched; 1620 + } 1621 + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; 1622 + } 1623 + 1624 + const struct amdgpu_ip_block_version sdma_v7_0_ip_block = { 1625 + .type = AMD_IP_BLOCK_TYPE_SDMA, 1626 + .major = 7, 1627 + .minor = 0, 1628 + .rev = 0, 1629 + .funcs = &sdma_v7_0_ip_funcs, 1630 + };
+30
drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h
··· 1 + /* 2 + * Copyright 2023 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #ifndef __SDMA_V7_0_H__ 25 + #define __SDMA_V7_0_H__ 26 + 27 + extern const struct amd_ip_funcs sdma_v7_0_ip_funcs; 28 + extern const struct amdgpu_ip_block_version sdma_v7_0_ip_block; 29 + 30 + #endif /* __SDMA_V7_0_H__ */