Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v6.17-rc1 860 lines 24 kB view raw
1/* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 25#include "amdgpu.h" 26#include "amdgpu_trace.h" 27#include "si.h" 28#include "sid.h" 29 30#include "oss/oss_1_0_d.h" 31#include "oss/oss_1_0_sh_mask.h" 32const u32 sdma_offsets[SDMA_MAX_INSTANCE] = 33{ 34 DMA0_REGISTER_OFFSET, 35 DMA1_REGISTER_OFFSET 36}; 37 38static void si_dma_set_ring_funcs(struct amdgpu_device *adev); 39static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); 40static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev); 41static void si_dma_set_irq_funcs(struct amdgpu_device *adev); 42 43/** 44 * si_dma_ring_get_rptr - get the current read pointer 45 * 46 * @ring: amdgpu ring pointer 47 * 48 * Get the current rptr from the hardware (SI). 49 */ 50static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) 51{ 52 return *ring->rptr_cpu_addr; 53} 54 55/** 56 * si_dma_ring_get_wptr - get the current write pointer 57 * 58 * @ring: amdgpu ring pointer 59 * 60 * Get the current wptr from the hardware (SI). 61 */ 62static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) 63{ 64 struct amdgpu_device *adev = ring->adev; 65 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 66 67 return (RREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; 68} 69 70static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) 71{ 72 struct amdgpu_device *adev = ring->adev; 73 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 74 75 WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); 76} 77 78static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, 79 struct amdgpu_job *job, 80 struct amdgpu_ib *ib, 81 uint32_t flags) 82{ 83 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 84 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 85 * Pad as necessary with NOPs. 86 */ 87 while ((lower_32_bits(ring->wptr) & 7) != 5) 88 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); 89 amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vmid, 0)); 90 amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 91 amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 92 93} 94 95/** 96 * si_dma_ring_emit_fence - emit a fence on the DMA ring 97 * 98 * @ring: amdgpu ring pointer 99 * @addr: address 100 * @seq: sequence number 101 * @flags: fence related flags 102 * 103 * Add a DMA fence packet to the ring to write 104 * the fence seq number and DMA trap packet to generate 105 * an interrupt if needed (VI). 106 */ 107static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 108 unsigned flags) 109{ 110 111 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 112 /* write the fence */ 113 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); 114 amdgpu_ring_write(ring, addr & 0xfffffffc); 115 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); 116 amdgpu_ring_write(ring, seq); 117 /* optionally write high bits as well */ 118 if (write64bit) { 119 addr += 4; 120 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); 121 amdgpu_ring_write(ring, addr & 0xfffffffc); 122 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); 123 amdgpu_ring_write(ring, upper_32_bits(seq)); 124 } 125 /* generate an interrupt */ 126 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0, 0)); 127} 128 129static void si_dma_stop(struct amdgpu_device *adev) 130{ 131 u32 rb_cntl; 132 unsigned i; 133 134 for (i = 0; i < adev->sdma.num_instances; i++) { 135 /* dma0 */ 136 rb_cntl = RREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i]); 137 rb_cntl &= ~DMA_GFX_RB_CNTL__RB_ENABLE_MASK; 138 WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 139 } 140} 141 142static int si_dma_start(struct amdgpu_device *adev) 143{ 144 struct amdgpu_ring *ring; 145 u32 rb_cntl, dma_cntl, ib_cntl, rb_bufsz; 146 int i, r; 147 uint64_t rptr_addr; 148 149 for (i = 0; i < adev->sdma.num_instances; i++) { 150 ring = &adev->sdma.instance[i].ring; 151 152 WREG32(mmDMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); 153 WREG32(mmDMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 154 155 /* Set ring buffer size in dwords */ 156 rb_bufsz = order_base_2(ring->ring_size / 4); 157 rb_cntl = rb_bufsz << 1; 158#ifdef __BIG_ENDIAN 159 rb_cntl |= DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK | DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK; 160#endif 161 WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 162 163 /* Initialize the ring buffer's read and write pointers */ 164 WREG32(mmDMA_GFX_RB_RPTR + sdma_offsets[i], 0); 165 WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], 0); 166 167 rptr_addr = ring->rptr_gpu_addr; 168 169 WREG32(mmDMA_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); 170 WREG32(mmDMA_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); 171 172 rb_cntl |= DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK; 173 174 WREG32(mmDMA_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); 175 176 /* enable DMA IBs */ 177 ib_cntl = DMA_GFX_IB_CNTL__IB_ENABLE_MASK | DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK; 178#ifdef __BIG_ENDIAN 179 ib_cntl |= DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK; 180#endif 181 WREG32(mmDMA_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 182 183 dma_cntl = RREG32(mmDMA_CNTL + sdma_offsets[i]); 184 dma_cntl &= ~DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK; 185 WREG32(mmDMA_CNTL + sdma_offsets[i], dma_cntl); 186 187 ring->wptr = 0; 188 WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); 189 WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_GFX_RB_CNTL__RB_ENABLE_MASK); 190 191 r = amdgpu_ring_test_helper(ring); 192 if (r) 193 return r; 194 } 195 196 return 0; 197} 198 199/** 200 * si_dma_ring_test_ring - simple async dma engine test 201 * 202 * @ring: amdgpu_ring structure holding ring information 203 * 204 * Test the DMA engine by writing using it to write an 205 * value to memory. (VI). 206 * Returns 0 for success, error for failure. 207 */ 208static int si_dma_ring_test_ring(struct amdgpu_ring *ring) 209{ 210 struct amdgpu_device *adev = ring->adev; 211 unsigned i; 212 unsigned index; 213 int r; 214 u32 tmp; 215 u64 gpu_addr; 216 217 r = amdgpu_device_wb_get(adev, &index); 218 if (r) 219 return r; 220 221 gpu_addr = adev->wb.gpu_addr + (index * 4); 222 tmp = 0xCAFEDEAD; 223 adev->wb.wb[index] = cpu_to_le32(tmp); 224 225 r = amdgpu_ring_alloc(ring, 4); 226 if (r) 227 goto error_free_wb; 228 229 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); 230 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 231 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xff); 232 amdgpu_ring_write(ring, 0xDEADBEEF); 233 amdgpu_ring_commit(ring); 234 235 for (i = 0; i < adev->usec_timeout; i++) { 236 tmp = le32_to_cpu(adev->wb.wb[index]); 237 if (tmp == 0xDEADBEEF) 238 break; 239 udelay(1); 240 } 241 242 if (i >= adev->usec_timeout) 243 r = -ETIMEDOUT; 244 245error_free_wb: 246 amdgpu_device_wb_free(adev, index); 247 return r; 248} 249 250/** 251 * si_dma_ring_test_ib - test an IB on the DMA engine 252 * 253 * @ring: amdgpu_ring structure holding ring information 254 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT 255 * 256 * Test a simple IB in the DMA ring (VI). 257 * Returns 0 on success, error on failure. 258 */ 259static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) 260{ 261 struct amdgpu_device *adev = ring->adev; 262 struct amdgpu_ib ib; 263 struct dma_fence *f = NULL; 264 unsigned index; 265 u32 tmp = 0; 266 u64 gpu_addr; 267 long r; 268 269 r = amdgpu_device_wb_get(adev, &index); 270 if (r) 271 return r; 272 273 gpu_addr = adev->wb.gpu_addr + (index * 4); 274 tmp = 0xCAFEDEAD; 275 adev->wb.wb[index] = cpu_to_le32(tmp); 276 memset(&ib, 0, sizeof(ib)); 277 r = amdgpu_ib_get(adev, NULL, 256, 278 AMDGPU_IB_POOL_DIRECT, &ib); 279 if (r) 280 goto err0; 281 282 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); 283 ib.ptr[1] = lower_32_bits(gpu_addr); 284 ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff; 285 ib.ptr[3] = 0xDEADBEEF; 286 ib.length_dw = 4; 287 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 288 if (r) 289 goto err1; 290 291 r = dma_fence_wait_timeout(f, false, timeout); 292 if (r == 0) { 293 r = -ETIMEDOUT; 294 goto err1; 295 } else if (r < 0) { 296 goto err1; 297 } 298 tmp = le32_to_cpu(adev->wb.wb[index]); 299 if (tmp == 0xDEADBEEF) 300 r = 0; 301 else 302 r = -EINVAL; 303 304err1: 305 amdgpu_ib_free(&ib, NULL); 306 dma_fence_put(f); 307err0: 308 amdgpu_device_wb_free(adev, index); 309 return r; 310} 311 312/** 313 * si_dma_vm_copy_pte - update PTEs by copying them from the GART 314 * 315 * @ib: indirect buffer to fill with commands 316 * @pe: addr of the page entry 317 * @src: src addr to copy from 318 * @count: number of page entries to update 319 * 320 * Update PTEs by copying them from the GART using DMA (SI). 321 */ 322static void si_dma_vm_copy_pte(struct amdgpu_ib *ib, 323 uint64_t pe, uint64_t src, 324 unsigned count) 325{ 326 unsigned bytes = count * 8; 327 328 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 329 1, 0, 0, bytes); 330 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 331 ib->ptr[ib->length_dw++] = lower_32_bits(src); 332 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 333 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 334} 335 336/** 337 * si_dma_vm_write_pte - update PTEs by writing them manually 338 * 339 * @ib: indirect buffer to fill with commands 340 * @pe: addr of the page entry 341 * @value: dst addr to write into pe 342 * @count: number of page entries to update 343 * @incr: increase next addr by incr bytes 344 * 345 * Update PTEs by writing them manually using DMA (SI). 346 */ 347static void si_dma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, 348 uint64_t value, unsigned count, 349 uint32_t incr) 350{ 351 unsigned ndw = count * 2; 352 353 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); 354 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 355 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 356 for (; ndw > 0; ndw -= 2) { 357 ib->ptr[ib->length_dw++] = lower_32_bits(value); 358 ib->ptr[ib->length_dw++] = upper_32_bits(value); 359 value += incr; 360 } 361} 362 363/** 364 * si_dma_vm_set_pte_pde - update the page tables using sDMA 365 * 366 * @ib: indirect buffer to fill with commands 367 * @pe: addr of the page entry 368 * @addr: dst addr to write into pe 369 * @count: number of page entries to update 370 * @incr: increase next addr by incr bytes 371 * @flags: access flags 372 * 373 * Update the page tables using sDMA (CIK). 374 */ 375static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib, 376 uint64_t pe, 377 uint64_t addr, unsigned count, 378 uint32_t incr, uint64_t flags) 379{ 380 uint64_t value; 381 unsigned ndw; 382 383 while (count) { 384 ndw = count * 2; 385 if (ndw > 0xFFFFE) 386 ndw = 0xFFFFE; 387 388 if (flags & AMDGPU_PTE_VALID) 389 value = addr; 390 else 391 value = 0; 392 393 /* for physically contiguous pages (vram) */ 394 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 395 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 396 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 397 ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ 398 ib->ptr[ib->length_dw++] = upper_32_bits(flags); 399 ib->ptr[ib->length_dw++] = value; /* value */ 400 ib->ptr[ib->length_dw++] = upper_32_bits(value); 401 ib->ptr[ib->length_dw++] = incr; /* increment size */ 402 ib->ptr[ib->length_dw++] = 0; 403 pe += ndw * 4; 404 addr += (ndw / 2) * incr; 405 count -= ndw / 2; 406 } 407} 408 409/** 410 * si_dma_ring_pad_ib - pad the IB to the required number of dw 411 * 412 * @ring: amdgpu_ring pointer 413 * @ib: indirect buffer to fill with padding 414 * 415 */ 416static void si_dma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 417{ 418 while (ib->length_dw & 0x7) 419 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); 420} 421 422/** 423 * si_dma_ring_emit_pipeline_sync - sync the pipeline 424 * 425 * @ring: amdgpu_ring pointer 426 * 427 * Make sure all previous operations are completed (CIK). 428 */ 429static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 430{ 431 uint32_t seq = ring->fence_drv.sync_seq; 432 uint64_t addr = ring->fence_drv.gpu_addr; 433 434 /* wait for idle */ 435 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0) | 436 (1 << 27)); /* Poll memory */ 437 amdgpu_ring_write(ring, lower_32_bits(addr)); 438 amdgpu_ring_write(ring, (0xff << 16) | upper_32_bits(addr)); /* retry, addr_hi */ 439 amdgpu_ring_write(ring, 0xffffffff); /* mask */ 440 amdgpu_ring_write(ring, seq); /* value */ 441 amdgpu_ring_write(ring, (3 << 28) | 0x20); /* func(equal) | poll interval */ 442} 443 444/** 445 * si_dma_ring_emit_vm_flush - cik vm flush using sDMA 446 * 447 * @ring: amdgpu_ring pointer 448 * @vmid: vmid number to use 449 * @pd_addr: address 450 * 451 * Update the page table base and flush the VM TLB 452 * using sDMA (VI). 453 */ 454static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring, 455 unsigned vmid, uint64_t pd_addr) 456{ 457 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 458 459 /* wait for invalidate to complete */ 460 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); 461 amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST); 462 amdgpu_ring_write(ring, 0xff << 16); /* retry */ 463 amdgpu_ring_write(ring, 1 << vmid); /* mask */ 464 amdgpu_ring_write(ring, 0); /* value */ 465 amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ 466} 467 468static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring, 469 uint32_t reg, uint32_t val) 470{ 471 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); 472 amdgpu_ring_write(ring, (0xf << 16) | reg); 473 amdgpu_ring_write(ring, val); 474} 475 476static int si_dma_early_init(struct amdgpu_ip_block *ip_block) 477{ 478 struct amdgpu_device *adev = ip_block->adev; 479 480 adev->sdma.num_instances = SDMA_MAX_INSTANCE; 481 482 si_dma_set_ring_funcs(adev); 483 si_dma_set_buffer_funcs(adev); 484 si_dma_set_vm_pte_funcs(adev); 485 si_dma_set_irq_funcs(adev); 486 487 return 0; 488} 489 490static int si_dma_sw_init(struct amdgpu_ip_block *ip_block) 491{ 492 struct amdgpu_ring *ring; 493 int r, i; 494 struct amdgpu_device *adev = ip_block->adev; 495 496 /* DMA0 trap event */ 497 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, 498 &adev->sdma.trap_irq); 499 if (r) 500 return r; 501 502 /* DMA1 trap event */ 503 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244, 504 &adev->sdma.trap_irq); 505 if (r) 506 return r; 507 508 for (i = 0; i < adev->sdma.num_instances; i++) { 509 ring = &adev->sdma.instance[i].ring; 510 ring->ring_obj = NULL; 511 ring->use_doorbell = false; 512 sprintf(ring->name, "sdma%d", i); 513 r = amdgpu_ring_init(adev, ring, 1024, 514 &adev->sdma.trap_irq, 515 (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : 516 AMDGPU_SDMA_IRQ_INSTANCE1, 517 AMDGPU_RING_PRIO_DEFAULT, NULL); 518 if (r) 519 return r; 520 } 521 522 return r; 523} 524 525static int si_dma_sw_fini(struct amdgpu_ip_block *ip_block) 526{ 527 struct amdgpu_device *adev = ip_block->adev; 528 int i; 529 530 for (i = 0; i < adev->sdma.num_instances; i++) 531 amdgpu_ring_fini(&adev->sdma.instance[i].ring); 532 533 return 0; 534} 535 536static int si_dma_hw_init(struct amdgpu_ip_block *ip_block) 537{ 538 struct amdgpu_device *adev = ip_block->adev; 539 540 return si_dma_start(adev); 541} 542 543static int si_dma_hw_fini(struct amdgpu_ip_block *ip_block) 544{ 545 si_dma_stop(ip_block->adev); 546 547 return 0; 548} 549 550static int si_dma_suspend(struct amdgpu_ip_block *ip_block) 551{ 552 return si_dma_hw_fini(ip_block); 553} 554 555static int si_dma_resume(struct amdgpu_ip_block *ip_block) 556{ 557 return si_dma_hw_init(ip_block); 558} 559 560static bool si_dma_is_idle(struct amdgpu_ip_block *ip_block) 561{ 562 struct amdgpu_device *adev = ip_block->adev; 563 564 u32 tmp = RREG32(mmSRBM_STATUS2); 565 566 if (tmp & (SRBM_STATUS2__DMA_BUSY_MASK | SRBM_STATUS2__DMA1_BUSY_MASK)) 567 return false; 568 569 return true; 570} 571 572static int si_dma_wait_for_idle(struct amdgpu_ip_block *ip_block) 573{ 574 unsigned i; 575 struct amdgpu_device *adev = ip_block->adev; 576 577 for (i = 0; i < adev->usec_timeout; i++) { 578 if (si_dma_is_idle(ip_block)) 579 return 0; 580 udelay(1); 581 } 582 return -ETIMEDOUT; 583} 584 585static int si_dma_soft_reset(struct amdgpu_ip_block *ip_block) 586{ 587 DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n"); 588 return 0; 589} 590 591static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, 592 struct amdgpu_irq_src *src, 593 unsigned type, 594 enum amdgpu_interrupt_state state) 595{ 596 u32 sdma_cntl; 597 598 switch (type) { 599 case AMDGPU_SDMA_IRQ_INSTANCE0: 600 switch (state) { 601 case AMDGPU_IRQ_STATE_DISABLE: 602 sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); 603 sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; 604 WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); 605 break; 606 case AMDGPU_IRQ_STATE_ENABLE: 607 sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); 608 sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; 609 WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); 610 break; 611 default: 612 break; 613 } 614 break; 615 case AMDGPU_SDMA_IRQ_INSTANCE1: 616 switch (state) { 617 case AMDGPU_IRQ_STATE_DISABLE: 618 sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); 619 sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; 620 WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); 621 break; 622 case AMDGPU_IRQ_STATE_ENABLE: 623 sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); 624 sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; 625 WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); 626 break; 627 default: 628 break; 629 } 630 break; 631 default: 632 break; 633 } 634 return 0; 635} 636 637static int si_dma_process_trap_irq(struct amdgpu_device *adev, 638 struct amdgpu_irq_src *source, 639 struct amdgpu_iv_entry *entry) 640{ 641 if (entry->src_id == 224) 642 amdgpu_fence_process(&adev->sdma.instance[0].ring); 643 else 644 amdgpu_fence_process(&adev->sdma.instance[1].ring); 645 return 0; 646} 647 648static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, 649 enum amd_clockgating_state state) 650{ 651 u32 orig, data, offset; 652 int i; 653 bool enable; 654 struct amdgpu_device *adev = ip_block->adev; 655 656 enable = (state == AMD_CG_STATE_GATE); 657 658 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { 659 for (i = 0; i < adev->sdma.num_instances; i++) { 660 if (i == 0) 661 offset = DMA0_REGISTER_OFFSET; 662 else 663 offset = DMA1_REGISTER_OFFSET; 664 orig = data = RREG32(mmDMA_POWER_CNTL + offset); 665 data &= ~DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 666 if (data != orig) 667 WREG32(mmDMA_POWER_CNTL + offset, data); 668 WREG32(mmDMA_CLK_CTRL + offset, 0x00000100); 669 } 670 } else { 671 for (i = 0; i < adev->sdma.num_instances; i++) { 672 if (i == 0) 673 offset = DMA0_REGISTER_OFFSET; 674 else 675 offset = DMA1_REGISTER_OFFSET; 676 orig = data = RREG32(mmDMA_POWER_CNTL + offset); 677 data |= DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 678 if (data != orig) 679 WREG32(mmDMA_POWER_CNTL + offset, data); 680 681 orig = data = RREG32(mmDMA_CLK_CTRL + offset); 682 data = 0xff000000; 683 if (data != orig) 684 WREG32(mmDMA_CLK_CTRL + offset, data); 685 } 686 } 687 688 return 0; 689} 690 691static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block, 692 enum amd_powergating_state state) 693{ 694 u32 tmp; 695 696 struct amdgpu_device *adev = ip_block->adev; 697 698 WREG32(mmDMA_PGFSM_WRITE, 0x00002000); 699 WREG32(mmDMA_PGFSM_CONFIG, 0x100010ff); 700 701 for (tmp = 0; tmp < 5; tmp++) 702 WREG32(mmDMA_PGFSM_WRITE, 0); 703 704 return 0; 705} 706 707static const struct amd_ip_funcs si_dma_ip_funcs = { 708 .name = "si_dma", 709 .early_init = si_dma_early_init, 710 .sw_init = si_dma_sw_init, 711 .sw_fini = si_dma_sw_fini, 712 .hw_init = si_dma_hw_init, 713 .hw_fini = si_dma_hw_fini, 714 .suspend = si_dma_suspend, 715 .resume = si_dma_resume, 716 .is_idle = si_dma_is_idle, 717 .wait_for_idle = si_dma_wait_for_idle, 718 .soft_reset = si_dma_soft_reset, 719 .set_clockgating_state = si_dma_set_clockgating_state, 720 .set_powergating_state = si_dma_set_powergating_state, 721}; 722 723static const struct amdgpu_ring_funcs si_dma_ring_funcs = { 724 .type = AMDGPU_RING_TYPE_SDMA, 725 .align_mask = 0xf, 726 .nop = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0), 727 .support_64bit_ptrs = false, 728 .get_rptr = si_dma_ring_get_rptr, 729 .get_wptr = si_dma_ring_get_wptr, 730 .set_wptr = si_dma_ring_set_wptr, 731 .emit_frame_size = 732 3 + 3 + /* hdp flush / invalidate */ 733 6 + /* si_dma_ring_emit_pipeline_sync */ 734 SI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* si_dma_ring_emit_vm_flush */ 735 9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */ 736 .emit_ib_size = 7 + 3, /* si_dma_ring_emit_ib */ 737 .emit_ib = si_dma_ring_emit_ib, 738 .emit_fence = si_dma_ring_emit_fence, 739 .emit_pipeline_sync = si_dma_ring_emit_pipeline_sync, 740 .emit_vm_flush = si_dma_ring_emit_vm_flush, 741 .test_ring = si_dma_ring_test_ring, 742 .test_ib = si_dma_ring_test_ib, 743 .insert_nop = amdgpu_ring_insert_nop, 744 .pad_ib = si_dma_ring_pad_ib, 745 .emit_wreg = si_dma_ring_emit_wreg, 746}; 747 748static void si_dma_set_ring_funcs(struct amdgpu_device *adev) 749{ 750 int i; 751 752 for (i = 0; i < adev->sdma.num_instances; i++) 753 adev->sdma.instance[i].ring.funcs = &si_dma_ring_funcs; 754} 755 756static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = { 757 .set = si_dma_set_trap_irq_state, 758 .process = si_dma_process_trap_irq, 759}; 760 761static void si_dma_set_irq_funcs(struct amdgpu_device *adev) 762{ 763 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 764 adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs; 765} 766 767/** 768 * si_dma_emit_copy_buffer - copy buffer using the sDMA engine 769 * 770 * @ib: indirect buffer to copy to 771 * @src_offset: src GPU address 772 * @dst_offset: dst GPU address 773 * @byte_count: number of bytes to xfer 774 * @copy_flags: unused 775 * 776 * Copy GPU buffers using the DMA engine (VI). 777 * Used by the amdgpu ttm implementation to move pages if 778 * registered as the asic copy callback. 779 */ 780static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, 781 uint64_t src_offset, 782 uint64_t dst_offset, 783 uint32_t byte_count, 784 uint32_t copy_flags) 785{ 786 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 787 1, 0, 0, byte_count); 788 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 789 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 790 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) & 0xff; 791 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset) & 0xff; 792} 793 794/** 795 * si_dma_emit_fill_buffer - fill buffer using the sDMA engine 796 * 797 * @ib: indirect buffer to copy to 798 * @src_data: value to write to buffer 799 * @dst_offset: dst GPU address 800 * @byte_count: number of bytes to xfer 801 * 802 * Fill GPU buffers using the DMA engine (VI). 803 */ 804static void si_dma_emit_fill_buffer(struct amdgpu_ib *ib, 805 uint32_t src_data, 806 uint64_t dst_offset, 807 uint32_t byte_count) 808{ 809 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_CONSTANT_FILL, 810 0, 0, 0, byte_count / 4); 811 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 812 ib->ptr[ib->length_dw++] = src_data; 813 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) << 16; 814} 815 816 817static const struct amdgpu_buffer_funcs si_dma_buffer_funcs = { 818 .copy_max_bytes = 0xffff8, 819 .copy_num_dw = 5, 820 .emit_copy_buffer = si_dma_emit_copy_buffer, 821 822 .fill_max_bytes = 0xffff8, 823 .fill_num_dw = 4, 824 .emit_fill_buffer = si_dma_emit_fill_buffer, 825}; 826 827static void si_dma_set_buffer_funcs(struct amdgpu_device *adev) 828{ 829 adev->mman.buffer_funcs = &si_dma_buffer_funcs; 830 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 831} 832 833static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { 834 .copy_pte_num_dw = 5, 835 .copy_pte = si_dma_vm_copy_pte, 836 837 .write_pte = si_dma_vm_write_pte, 838 .set_pte_pde = si_dma_vm_set_pte_pde, 839}; 840 841static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev) 842{ 843 unsigned i; 844 845 adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; 846 for (i = 0; i < adev->sdma.num_instances; i++) { 847 adev->vm_manager.vm_pte_scheds[i] = 848 &adev->sdma.instance[i].ring.sched; 849 } 850 adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; 851} 852 853const struct amdgpu_ip_block_version si_dma_ip_block = 854{ 855 .type = AMD_IP_BLOCK_TYPE_SDMA, 856 .major = 1, 857 .minor = 0, 858 .rev = 0, 859 .funcs = &si_dma_ip_funcs, 860};