Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu/jpeg: add jpeg support for VCN4_0_3

Add jpeg support for VCN4_0_3.

v2: squash in delayed work typo fix (Alex)

Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

James Zhu and committed by
Alex Deucher
e684e654 76e5e4c7

+813 -1
+2 -1
drivers/gpu/drm/amd/amdgpu/Makefile
··· 188 188 jpeg_v2_0.o \ 189 189 jpeg_v2_5.o \ 190 190 jpeg_v3_0.o \ 191 - jpeg_v4_0.o 191 + jpeg_v4_0.o \ 192 + jpeg_v4_0_3.o 192 193 193 194 # add ATHUB block 194 195 amdgpu-y += \
+762
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
··· 1 + /* 2 + * Copyright 2022 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #include "amdgpu.h" 25 + #include "amdgpu_jpeg.h" 26 + #include "soc15.h" 27 + #include "soc15d.h" 28 + #include "jpeg_v4_0_3.h" 29 + 30 + #include "vcn/vcn_4_0_3_offset.h" 31 + #include "vcn/vcn_4_0_3_sh_mask.h" 32 + #include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" 33 + 34 + enum jpeg_engin_status { 35 + UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, 36 + UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, 37 + }; 38 + 39 + static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev); 40 + static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); 41 + static int jpeg_v4_0_3_set_powergating_state(void *handle, 42 + enum amd_powergating_state state); 43 + 44 + /** 45 + * jpeg_v4_0_3_early_init - set function pointers 46 + * 47 + * @handle: amdgpu_device pointer 48 + * 49 + * Set ring and irq function pointers 50 + */ 51 + static int jpeg_v4_0_3_early_init(void *handle) 52 + { 53 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 54 + 55 + jpeg_v4_0_3_set_dec_ring_funcs(adev); 56 + jpeg_v4_0_3_set_irq_funcs(adev); 57 + 58 + return 0; 59 + } 60 + 61 + /** 62 + * jpeg_v4_0_3_sw_init - sw init for JPEG block 63 + * 64 + * @handle: amdgpu_device pointer 65 + * 66 + * Load firmware and sw initialization 67 + */ 68 + static int jpeg_v4_0_3_sw_init(void *handle) 69 + { 70 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 71 + struct amdgpu_ring *ring; 72 + int r; 73 + 74 + /* JPEG TRAP */ 75 + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 76 + VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); 77 + if (r) 78 + return r; 79 + 80 + r = amdgpu_jpeg_sw_init(adev); 81 + if (r) 82 + return r; 83 + 84 + r = amdgpu_jpeg_resume(adev); 85 + if (r) 86 + return r; 87 + 88 + ring = &adev->jpeg.inst->ring_dec; 89 + ring->use_doorbell = false; 90 + ring->vm_hub = AMDGPU_MMHUB0(0); 91 + sprintf(ring->name, "jpeg_dec"); 92 + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, 93 + AMDGPU_RING_PRIO_DEFAULT, NULL); 94 + if (r) 95 + return r; 96 + 97 + adev->jpeg.internal.jpeg_pitch = regUVD_JPEG_PITCH_INTERNAL_OFFSET; 98 + adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); 99 + 100 + return 0; 101 + } 102 + 103 + /** 104 + * jpeg_v4_0_3_sw_fini - sw fini for JPEG block 105 + * 106 + * @handle: amdgpu_device pointer 107 + * 108 + * JPEG suspend and free up sw allocation 109 + */ 110 + static int jpeg_v4_0_3_sw_fini(void *handle) 111 + { 112 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 113 + int r; 114 + 115 + r = amdgpu_jpeg_suspend(adev); 116 + if (r) 117 + return r; 118 + 119 + r = amdgpu_jpeg_sw_fini(adev); 120 + 121 + return r; 122 + } 123 + 124 + /** 125 + * jpeg_v4_0_3_hw_init - start and test JPEG block 126 + * 127 + * @handle: amdgpu_device pointer 128 + * 129 + */ 130 + static int jpeg_v4_0_3_hw_init(void *handle) 131 + { 132 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 133 + struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; 134 + int r; 135 + 136 + r = amdgpu_ring_test_helper(ring); 137 + if (!r) 138 + DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); 139 + 140 + return r; 141 + } 142 + 143 + /** 144 + * jpeg_v4_0_3_hw_fini - stop the hardware block 145 + * 146 + * @handle: amdgpu_device pointer 147 + * 148 + * Stop the JPEG block, mark ring as not ready any more 149 + */ 150 + static int jpeg_v4_0_3_hw_fini(void *handle) 151 + { 152 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 153 + 154 + cancel_delayed_work_sync(&adev->jpeg.idle_work); 155 + 156 + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) 157 + jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); 158 + 159 + return 0; 160 + } 161 + 162 + /** 163 + * jpeg_v4_0_3_suspend - suspend JPEG block 164 + * 165 + * @handle: amdgpu_device pointer 166 + * 167 + * HW fini and suspend JPEG block 168 + */ 169 + static int jpeg_v4_0_3_suspend(void *handle) 170 + { 171 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 172 + int r; 173 + 174 + r = jpeg_v4_0_3_hw_fini(adev); 175 + if (r) 176 + return r; 177 + 178 + r = amdgpu_jpeg_suspend(adev); 179 + 180 + return r; 181 + } 182 + 183 + /** 184 + * jpeg_v4_0_3_resume - resume JPEG block 185 + * 186 + * @handle: amdgpu_device pointer 187 + * 188 + * Resume firmware and hw init JPEG block 189 + */ 190 + static int jpeg_v4_0_3_resume(void *handle) 191 + { 192 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 193 + int r; 194 + 195 + r = amdgpu_jpeg_resume(adev); 196 + if (r) 197 + return r; 198 + 199 + r = jpeg_v4_0_3_hw_init(adev); 200 + 201 + return r; 202 + } 203 + 204 + static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev) 205 + { 206 + uint32_t data; 207 + 208 + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); 209 + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) 210 + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 211 + else 212 + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 213 + 214 + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; 215 + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; 216 + WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); 217 + 218 + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); 219 + data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK 220 + | JPEG_CGC_GATE__JPEG2_DEC_MASK 221 + | JPEG_CGC_GATE__JMCIF_MASK 222 + | JPEG_CGC_GATE__JRBBM_MASK); 223 + WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); 224 + } 225 + 226 + static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev) 227 + { 228 + uint32_t data; 229 + 230 + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); 231 + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) 232 + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 233 + else 234 + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 235 + 236 + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; 237 + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; 238 + WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); 239 + 240 + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); 241 + data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK 242 + |JPEG_CGC_GATE__JPEG2_DEC_MASK 243 + |JPEG_CGC_GATE__JMCIF_MASK 244 + |JPEG_CGC_GATE__JRBBM_MASK); 245 + WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); 246 + } 247 + 248 + /** 249 + * jpeg_v4_0_3_start - start JPEG block 250 + * 251 + * @adev: amdgpu_device pointer 252 + * 253 + * Setup and start the JPEG block 254 + */ 255 + static int jpeg_v4_0_3_start(struct amdgpu_device *adev) 256 + { 257 + struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; 258 + 259 + WREG32_SOC15(JPEG, 0, regUVD_PGFSM_CONFIG, 260 + 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); 261 + SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_PGFSM_STATUS, 262 + UVD_PGFSM_STATUS__UVDJ_PWR_ON << 263 + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, 264 + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); 265 + 266 + /* disable anti hang mechanism */ 267 + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, 268 + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); 269 + 270 + /* JPEG disable CGC */ 271 + jpeg_v4_0_3_disable_clock_gating(adev); 272 + 273 + /* MJPEG global tiling registers */ 274 + WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX8_ADDR_CONFIG, 275 + adev->gfx.config.gb_addr_config); 276 + WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, 277 + adev->gfx.config.gb_addr_config); 278 + 279 + /* enable JMI channel */ 280 + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0, 281 + ~UVD_JMI_CNTL__SOFT_RESET_MASK); 282 + 283 + /* enable System Interrupt for JRBC */ 284 + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN), 285 + JPEG_SYS_INT_EN__DJRBC0_MASK, 286 + ~JPEG_SYS_INT_EN__DJRBC0_MASK); 287 + 288 + WREG32_SOC15(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, 0); 289 + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); 290 + WREG32_SOC15(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, 291 + lower_32_bits(ring->gpu_addr)); 292 + WREG32_SOC15(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, 293 + upper_32_bits(ring->gpu_addr)); 294 + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR, 0); 295 + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR, 0); 296 + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_CNTL, 0x00000002L); 297 + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE, ring->ring_size / 4); 298 + ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR); 299 + 300 + return 0; 301 + } 302 + 303 + /** 304 + * jpeg_v4_0_3_stop - stop JPEG block 305 + * 306 + * @adev: amdgpu_device pointer 307 + * 308 + * stop the JPEG block 309 + */ 310 + static int jpeg_v4_0_3_stop(struct amdgpu_device *adev) 311 + { 312 + /* reset JMI */ 313 + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 314 + UVD_JMI_CNTL__SOFT_RESET_MASK, 315 + ~UVD_JMI_CNTL__SOFT_RESET_MASK); 316 + 317 + jpeg_v4_0_3_enable_clock_gating(adev); 318 + 319 + /* enable anti hang mechanism */ 320 + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 321 + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, 322 + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); 323 + 324 + WREG32_SOC15(JPEG, 0, regUVD_PGFSM_CONFIG, 325 + 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); 326 + SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_PGFSM_STATUS, 327 + UVD_PGFSM_STATUS__UVDJ_PWR_OFF << 328 + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, 329 + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); 330 + 331 + return 0; 332 + } 333 + 334 + /** 335 + * jpeg_v4_0_3_dec_ring_get_rptr - get read pointer 336 + * 337 + * @ring: amdgpu_ring pointer 338 + * 339 + * Returns the current hardware read pointer 340 + */ 341 + static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring) 342 + { 343 + struct amdgpu_device *adev = ring->adev; 344 + 345 + return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_RPTR); 346 + } 347 + 348 + /** 349 + * jpeg_v4_0_3_dec_ring_get_wptr - get write pointer 350 + * 351 + * @ring: amdgpu_ring pointer 352 + * 353 + * Returns the current hardware write pointer 354 + */ 355 + static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) 356 + { 357 + struct amdgpu_device *adev = ring->adev; 358 + 359 + if (ring->use_doorbell) 360 + return adev->wb.wb[ring->wptr_offs]; 361 + else 362 + return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_WPTR); 363 + } 364 + 365 + /** 366 + * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer 367 + * 368 + * @ring: amdgpu_ring pointer 369 + * 370 + * Commits the write pointer to the hardware 371 + */ 372 + static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) 373 + { 374 + struct amdgpu_device *adev = ring->adev; 375 + 376 + if (ring->use_doorbell) { 377 + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 378 + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 379 + } else { 380 + WREG32_SOC15(JPEG, ring->me, 381 + regUVD_JRBC0_UVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); 382 + } 383 + } 384 + 385 + /** 386 + * jpeg_v4_0_3_dec_ring_insert_start - insert a start command 387 + * 388 + * @ring: amdgpu_ring pointer 389 + * 390 + * Write a start command to the ring. 391 + */ 392 + static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) 393 + { 394 + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 395 + 0, 0, PACKETJ_TYPE0)); 396 + amdgpu_ring_write(ring, 0x62a04);/* TODO: PCTL0_MMHUB_DEEPSLEEP_IB */ 397 + 398 + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 399 + 0, 0, PACKETJ_TYPE0)); 400 + amdgpu_ring_write(ring, 0x80004000); 401 + } 402 + 403 + /** 404 + * jpeg_v4_0_3_dec_ring_insert_end - insert a end command 405 + * 406 + * @ring: amdgpu_ring pointer 407 + * 408 + * Write a end command to the ring. 409 + */ 410 + static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) 411 + { 412 + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 413 + 0, 0, PACKETJ_TYPE0)); 414 + amdgpu_ring_write(ring, 0x62a04); 415 + 416 + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 417 + 0, 0, PACKETJ_TYPE0)); 418 + amdgpu_ring_write(ring, 0x00004000); 419 + } 420 + 421 + /** 422 + * jpeg_v4_0_3_dec_ring_emit_fence - emit an fence & trap command 423 + * 424 + * @ring: amdgpu_ring pointer 425 + * @addr: address 426 + * @seq: sequence number 427 + * @flags: fence related flags 428 + * 429 + * Write a fence and a trap command to the ring. 430 + */ 431 + static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 432 + unsigned int flags) 433 + { 434 + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 435 + 436 + amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET, 437 + 0, 0, PACKETJ_TYPE0)); 438 + amdgpu_ring_write(ring, seq); 439 + 440 + amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET, 441 + 0, 0, PACKETJ_TYPE0)); 442 + amdgpu_ring_write(ring, seq); 443 + 444 + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET, 445 + 0, 0, PACKETJ_TYPE0)); 446 + amdgpu_ring_write(ring, lower_32_bits(addr)); 447 + 448 + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET, 449 + 0, 0, PACKETJ_TYPE0)); 450 + amdgpu_ring_write(ring, upper_32_bits(addr)); 451 + 452 + amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, 453 + 0, 0, PACKETJ_TYPE0)); 454 + amdgpu_ring_write(ring, 0x8); 455 + 456 + amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, 457 + 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); 458 + amdgpu_ring_write(ring, 0); 459 + 460 + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 461 + 0, 0, PACKETJ_TYPE0)); 462 + amdgpu_ring_write(ring, 0x3fbc); 463 + 464 + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 465 + 0, 0, PACKETJ_TYPE0)); 466 + amdgpu_ring_write(ring, 0x1); 467 + 468 + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); 469 + amdgpu_ring_write(ring, 0); 470 + } 471 + 472 + /** 473 + * jpeg_v4_0_3_dec_ring_emit_ib - execute indirect buffer 474 + * 475 + * @ring: amdgpu_ring pointer 476 + * @job: job to retrieve vmid from 477 + * @ib: indirect buffer to execute 478 + * @flags: unused 479 + * 480 + * Write ring commands to execute the indirect buffer. 481 + */ 482 + static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, 483 + struct amdgpu_job *job, 484 + struct amdgpu_ib *ib, 485 + uint32_t flags) 486 + { 487 + unsigned int vmid = AMDGPU_JOB_GET_VMID(job); 488 + 489 + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 490 + 0, 0, PACKETJ_TYPE0)); 491 + amdgpu_ring_write(ring, (vmid | (vmid << 4))); 492 + 493 + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 494 + 0, 0, PACKETJ_TYPE0)); 495 + amdgpu_ring_write(ring, (vmid | (vmid << 4))); 496 + 497 + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 498 + 0, 0, PACKETJ_TYPE0)); 499 + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 500 + 501 + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, 502 + 0, 0, PACKETJ_TYPE0)); 503 + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 504 + 505 + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET, 506 + 0, 0, PACKETJ_TYPE0)); 507 + amdgpu_ring_write(ring, ib->length_dw); 508 + 509 + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET, 510 + 0, 0, PACKETJ_TYPE0)); 511 + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); 512 + 513 + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET, 514 + 0, 0, PACKETJ_TYPE0)); 515 + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); 516 + 517 + amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); 518 + amdgpu_ring_write(ring, 0); 519 + 520 + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 521 + 0, 0, PACKETJ_TYPE0)); 522 + amdgpu_ring_write(ring, 0x01400200); 523 + 524 + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, 525 + 0, 0, PACKETJ_TYPE0)); 526 + amdgpu_ring_write(ring, 0x2); 527 + 528 + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_STATUS_INTERNAL_OFFSET, 529 + 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); 530 + amdgpu_ring_write(ring, 0x2); 531 + } 532 + 533 + static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 534 + uint32_t val, uint32_t mask) 535 + { 536 + uint32_t reg_offset = (reg << 2); 537 + 538 + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 539 + 0, 0, PACKETJ_TYPE0)); 540 + amdgpu_ring_write(ring, 0x01400200); 541 + 542 + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, 543 + 0, 0, PACKETJ_TYPE0)); 544 + amdgpu_ring_write(ring, val); 545 + 546 + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 547 + 0, 0, PACKETJ_TYPE0)); 548 + if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { 549 + amdgpu_ring_write(ring, 0); 550 + amdgpu_ring_write(ring, 551 + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); 552 + } else { 553 + amdgpu_ring_write(ring, reg_offset); 554 + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 555 + 0, 0, PACKETJ_TYPE3)); 556 + } 557 + amdgpu_ring_write(ring, mask); 558 + } 559 + 560 + static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, 561 + unsigned int vmid, uint64_t pd_addr) 562 + { 563 + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; 564 + uint32_t data0, data1, mask; 565 + 566 + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 567 + 568 + /* wait for register write */ 569 + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; 570 + data1 = lower_32_bits(pd_addr); 571 + mask = 0xffffffff; 572 + jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask); 573 + } 574 + 575 + static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) 576 + { 577 + uint32_t reg_offset = (reg << 2); 578 + 579 + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 580 + 0, 0, PACKETJ_TYPE0)); 581 + if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { 582 + amdgpu_ring_write(ring, 0); 583 + amdgpu_ring_write(ring, 584 + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); 585 + } else { 586 + amdgpu_ring_write(ring, reg_offset); 587 + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 588 + 0, 0, PACKETJ_TYPE0)); 589 + } 590 + amdgpu_ring_write(ring, val); 591 + } 592 + 593 + static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) 594 + { 595 + int i; 596 + 597 + WARN_ON(ring->wptr % 2 || count % 2); 598 + 599 + for (i = 0; i < count / 2; i++) { 600 + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); 601 + amdgpu_ring_write(ring, 0); 602 + } 603 + } 604 + 605 + static bool jpeg_v4_0_3_is_idle(void *handle) 606 + { 607 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 608 + 609 + return ((RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS) & 610 + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == 611 + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); 612 + } 613 + 614 + static int jpeg_v4_0_3_wait_for_idle(void *handle) 615 + { 616 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 617 + int ret; 618 + 619 + ret = SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS, 620 + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, 621 + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); 622 + return ret; 623 + } 624 + 625 + static int jpeg_v4_0_3_set_clockgating_state(void *handle, 626 + enum amd_clockgating_state state) 627 + { 628 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 629 + bool enable = (state == AMD_CG_STATE_GATE); 630 + 631 + if (enable) { 632 + if (!jpeg_v4_0_3_is_idle(handle)) 633 + return -EBUSY; 634 + jpeg_v4_0_3_enable_clock_gating(adev); 635 + } else { 636 + jpeg_v4_0_3_disable_clock_gating(adev); 637 + } 638 + 639 + return 0; 640 + } 641 + 642 + static int jpeg_v4_0_3_set_powergating_state(void *handle, 643 + enum amd_powergating_state state) 644 + { 645 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 646 + int ret; 647 + 648 + if (state == adev->jpeg.cur_state) 649 + return 0; 650 + 651 + if (state == AMD_PG_STATE_GATE) 652 + ret = jpeg_v4_0_3_stop(adev); 653 + else 654 + ret = jpeg_v4_0_3_start(adev); 655 + 656 + if (!ret) 657 + adev->jpeg.cur_state = state; 658 + 659 + return ret; 660 + } 661 + 662 + static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev, 663 + struct amdgpu_irq_src *source, 664 + unsigned int type, 665 + enum amdgpu_interrupt_state state) 666 + { 667 + return 0; 668 + } 669 + 670 + static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev, 671 + struct amdgpu_irq_src *source, 672 + struct amdgpu_iv_entry *entry) 673 + { 674 + DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n"); 675 + 676 + switch (entry->src_id) { 677 + case VCN_2_0__SRCID__JPEG_DECODE: 678 + amdgpu_fence_process(&adev->jpeg.inst->ring_dec); 679 + break; 680 + default: 681 + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", 682 + entry->src_id, entry->src_data[0]); 683 + break; 684 + } 685 + 686 + return 0; 687 + } 688 + 689 + static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { 690 + .name = "jpeg_v4_0_3", 691 + .early_init = jpeg_v4_0_3_early_init, 692 + .late_init = NULL, 693 + .sw_init = jpeg_v4_0_3_sw_init, 694 + .sw_fini = jpeg_v4_0_3_sw_fini, 695 + .hw_init = jpeg_v4_0_3_hw_init, 696 + .hw_fini = jpeg_v4_0_3_hw_fini, 697 + .suspend = jpeg_v4_0_3_suspend, 698 + .resume = jpeg_v4_0_3_resume, 699 + .is_idle = jpeg_v4_0_3_is_idle, 700 + .wait_for_idle = jpeg_v4_0_3_wait_for_idle, 701 + .check_soft_reset = NULL, 702 + .pre_soft_reset = NULL, 703 + .soft_reset = NULL, 704 + .post_soft_reset = NULL, 705 + .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state, 706 + .set_powergating_state = jpeg_v4_0_3_set_powergating_state, 707 + }; 708 + 709 + static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { 710 + .type = AMDGPU_RING_TYPE_VCN_JPEG, 711 + .align_mask = 0xf, 712 + .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, 713 + .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, 714 + .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, 715 + .emit_frame_size = 716 + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + 717 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + 718 + 8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */ 719 + 18 + 18 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */ 720 + 8 + 16, 721 + .emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */ 722 + .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, 723 + .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, 724 + .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, 725 + .test_ring = amdgpu_jpeg_dec_ring_test_ring, 726 + .test_ib = amdgpu_jpeg_dec_ring_test_ib, 727 + .insert_nop = jpeg_v4_0_3_dec_ring_nop, 728 + .insert_start = jpeg_v4_0_3_dec_ring_insert_start, 729 + .insert_end = jpeg_v4_0_3_dec_ring_insert_end, 730 + .pad_ib = amdgpu_ring_generic_pad_ib, 731 + .begin_use = amdgpu_jpeg_ring_begin_use, 732 + .end_use = amdgpu_jpeg_ring_end_use, 733 + .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, 734 + .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, 735 + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 736 + }; 737 + 738 + static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev) 739 + { 740 + adev->jpeg.inst->ring_dec.funcs = &jpeg_v4_0_3_dec_ring_vm_funcs; 741 + adev->jpeg.inst->ring_dec.me = 0; 742 + DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); 743 + } 744 + 745 + static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = { 746 + .set = jpeg_v4_0_3_set_interrupt_state, 747 + .process = jpeg_v4_0_3_process_interrupt, 748 + }; 749 + 750 + static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) 751 + { 752 + adev->jpeg.inst->irq.num_types = 1; 753 + adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs; 754 + } 755 + 756 + const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = { 757 + .type = AMD_IP_BLOCK_TYPE_JPEG, 758 + .major = 4, 759 + .minor = 0, 760 + .rev = 3, 761 + .funcs = &jpeg_v4_0_3_ip_funcs, 762 + };
+49
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
··· 1 + /* 2 + * Copyright 2022 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #ifndef __JPEG_V4_0_3_H__ 25 + #define __JPEG_V4_0_3_H__ 26 + 27 + #define regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff 28 + #define regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x404d 29 + #define regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x404e 30 + #define regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x404f 31 + #define regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ab 32 + #define regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ac 33 + #define regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40a4 34 + #define regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40a6 35 + #define regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40b6 36 + #define regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40b7 37 + #define regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082 38 + #define regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x42d4 39 + #define regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x42d5 40 + #define regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085 41 + #define regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 42 + #define regUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 43 + #define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x4043 44 + 45 + #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 46 + 47 + extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; 48 + 49 + #endif /* __JPEG_V4_0_3_H__ */