Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: add gmc v10 ip block for navi10 (v6)

GMC in the GPU memory controller.

v1: add place holder and initial basic implementation (Ray)
v2: retire unused amdgpu_gart_set_defaults (Hawking)
v3: re-work get_vm_pde function (Hawking)
v4: replace legacy amdgpu_vram/gtt_location with
amdgpu_gmc_vram/gtt_location (Hawking)
v5: squash in updates (Alex)
v6: use get_vbios_fb_size (Alex)

Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Hawking Zhang and committed by
Alex Deucher
f9df67e9 adc43c1b

+947 -1
+1 -1
drivers/gpu/drm/amd/amdgpu/Makefile
··· 76 76 gmc_v7_0.o \ 77 77 gmc_v8_0.o \ 78 78 gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o \ 79 - gfxhub_v2_0.o mmhub_v2_0.o 79 + gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o 80 80 81 81 # add IH block 82 82 amdgpu-y += \
+916
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
··· 1 + /* 2 + * Copyright 2019 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + #include <linux/firmware.h> 24 + #include "amdgpu.h" 25 + #include "amdgpu_atomfirmware.h" 26 + #include "gmc_v10_0.h" 27 + 28 + #include "hdp/hdp_5_0_0_offset.h" 29 + #include "hdp/hdp_5_0_0_sh_mask.h" 30 + #include "gc/gc_10_1_0_sh_mask.h" 31 + #include "mmhub/mmhub_2_0_0_sh_mask.h" 32 + #include "dcn/dcn_2_0_0_offset.h" 33 + #include "dcn/dcn_2_0_0_sh_mask.h" 34 + #include "oss/osssys_5_0_0_offset.h" 35 + #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" 36 + #include "navi10_enum.h" 37 + 38 + #include "soc15.h" 39 + #include "soc15_common.h" 40 + 41 + #include "nbio_v2_3.h" 42 + 43 + #include "gfxhub_v2_0.h" 44 + #include "mmhub_v2_0.h" 45 + #include "athub_v2_0.h" 46 + /* XXX Move this macro to navi10 header file, which is like vid.h for VI.*/ 47 + #define AMDGPU_NUM_OF_VMIDS 8 48 + 49 + #if 0 50 + static const struct soc15_reg_golden golden_settings_navi10_hdp[] = 51 + { 52 + /* TODO add golden setting for hdp */ 53 + }; 54 + #endif 55 + 56 + static int 57 + gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, 58 + struct amdgpu_irq_src *src, unsigned type, 59 + enum amdgpu_interrupt_state state) 60 + { 61 + struct amdgpu_vmhub *hub; 62 + u32 tmp, reg, bits[AMDGPU_MAX_VMHUBS], i; 63 + 64 + bits[AMDGPU_GFXHUB] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 65 + GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 66 + GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 67 + GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 68 + GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 69 + GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 70 + GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; 71 + 72 + bits[AMDGPU_MMHUB] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 73 + MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 74 + MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 75 + MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 76 + MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 77 + MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 78 + MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; 79 + 80 + switch (state) { 81 + case AMDGPU_IRQ_STATE_DISABLE: 82 + /* MM HUB */ 83 + hub = &adev->vmhub[AMDGPU_MMHUB]; 84 + for (i = 0; i < 16; i++) { 85 + reg = hub->vm_context0_cntl + i; 86 + tmp = RREG32(reg); 87 + tmp &= ~bits[AMDGPU_MMHUB]; 88 + WREG32(reg, tmp); 89 + } 90 + 91 + /* GFX HUB */ 92 + hub = &adev->vmhub[AMDGPU_GFXHUB]; 93 + for (i = 0; i < 16; i++) { 94 + reg = hub->vm_context0_cntl + i; 95 + tmp = RREG32(reg); 96 + tmp &= ~bits[AMDGPU_GFXHUB]; 97 + WREG32(reg, tmp); 98 + } 99 + break; 100 + case AMDGPU_IRQ_STATE_ENABLE: 101 + /* MM HUB */ 102 + hub = &adev->vmhub[AMDGPU_MMHUB]; 103 + for (i = 0; i < 16; i++) { 104 + reg = hub->vm_context0_cntl + i; 105 + tmp = RREG32(reg); 106 + tmp |= bits[AMDGPU_MMHUB]; 107 + WREG32(reg, tmp); 108 + } 109 + 110 + /* GFX HUB */ 111 + hub = &adev->vmhub[AMDGPU_GFXHUB]; 112 + for (i = 0; i < 16; i++) { 113 + reg = hub->vm_context0_cntl + i; 114 + tmp = RREG32(reg); 115 + tmp |= bits[AMDGPU_GFXHUB]; 116 + WREG32(reg, tmp); 117 + } 118 + break; 119 + default: 120 + break; 121 + } 122 + 123 + return 0; 124 + } 125 + 126 + static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, 127 + struct amdgpu_irq_src *source, 128 + struct amdgpu_iv_entry *entry) 129 + { 130 + struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; 131 + uint32_t status = 0; 132 + u64 addr; 133 + 134 + addr = (u64)entry->src_data[0] << 12; 135 + addr |= ((u64)entry->src_data[1] & 0xf) << 44; 136 + 137 + if (!amdgpu_sriov_vf(adev)) { 138 + status = RREG32(hub->vm_l2_pro_fault_status); 139 + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); 140 + } 141 + 142 + if (printk_ratelimit()) { 143 + dev_err(adev->dev, 144 + "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", 145 + entry->vmid_src ? "mmhub" : "gfxhub", 146 + entry->src_id, entry->ring_id, entry->vmid, 147 + entry->pasid); 148 + dev_err(adev->dev, " at page 0x%016llx from %d\n", 149 + addr, entry->client_id); 150 + if (!amdgpu_sriov_vf(adev)) 151 + dev_err(adev->dev, 152 + "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", 153 + status); 154 + } 155 + 156 + return 0; 157 + } 158 + 159 + static const struct amdgpu_irq_src_funcs gmc_v10_0_irq_funcs = { 160 + .set = gmc_v10_0_vm_fault_interrupt_state, 161 + .process = gmc_v10_0_process_interrupt, 162 + }; 163 + 164 + static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev) 165 + { 166 + adev->gmc.vm_fault.num_types = 1; 167 + adev->gmc.vm_fault.funcs = &gmc_v10_0_irq_funcs; 168 + } 169 + 170 + static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid, 171 + uint32_t flush_type) 172 + { 173 + u32 req = 0; 174 + 175 + /* invalidate using legacy mode on vmid*/ 176 + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, 177 + PER_VMID_INVALIDATE_REQ, 1 << vmid); 178 + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); 179 + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); 180 + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); 181 + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); 182 + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); 183 + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); 184 + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, 185 + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); 186 + 187 + return req; 188 + } 189 + 190 + /* 191 + * GART 192 + * VMID 0 is the physical GPU addresses as used by the kernel. 193 + * VMIDs 1-15 are used for userspace clients and are handled 194 + * by the amdgpu vm/hsa code. 195 + */ 196 + 197 + static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, 198 + unsigned int vmhub, uint32_t flush_type) 199 + { 200 + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; 201 + u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type); 202 + /* Use register 17 for GART */ 203 + const unsigned eng = 17; 204 + unsigned int i; 205 + 206 + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); 207 + 208 + /* Wait for ACK with a delay.*/ 209 + for (i = 0; i < adev->usec_timeout; i++) { 210 + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); 211 + tmp &= 1 << vmid; 212 + if (tmp) 213 + break; 214 + 215 + udelay(1); 216 + } 217 + 218 + if (i < adev->usec_timeout) 219 + return; 220 + 221 + DRM_ERROR("Timeout waiting for VM flush ACK!\n"); 222 + } 223 + 224 + /** 225 + * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback 226 + * 227 + * @adev: amdgpu_device pointer 228 + * @vmid: vm instance to flush 229 + * 230 + * Flush the TLB for the requested page table. 231 + */ 232 + static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, 233 + uint32_t vmid, uint32_t flush_type) 234 + { 235 + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; 236 + struct dma_fence *fence; 237 + struct amdgpu_job *job; 238 + 239 + int r; 240 + 241 + /* flush hdp cache */ 242 + adev->nbio_funcs->hdp_flush(adev, NULL); 243 + 244 + mutex_lock(&adev->mman.gtt_window_lock); 245 + 246 + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB, 0); 247 + if (!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || 248 + adev->asic_type != CHIP_NAVI10) { 249 + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB, 0); 250 + mutex_unlock(&adev->mman.gtt_window_lock); 251 + return; 252 + } 253 + 254 + /* The SDMA on Navi has a bug which can theoretically result in memory 255 + * corruption if an invalidation happens at the same time as an VA 256 + * translation. Avoid this by doing the invalidation from the SDMA 257 + * itself. 258 + */ 259 + r = amdgpu_job_alloc_with_ib(adev, 16 * 4, &job); 260 + if (r) 261 + goto error_alloc; 262 + 263 + job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); 264 + job->vm_needs_flush = true; 265 + amdgpu_ring_pad_ib(ring, &job->ibs[0]); 266 + r = amdgpu_job_submit(job, &adev->mman.entity, 267 + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); 268 + if (r) 269 + goto error_submit; 270 + 271 + mutex_unlock(&adev->mman.gtt_window_lock); 272 + 273 + dma_fence_wait(fence, false); 274 + dma_fence_put(fence); 275 + 276 + return; 277 + 278 + error_submit: 279 + amdgpu_job_free(job); 280 + 281 + error_alloc: 282 + mutex_unlock(&adev->mman.gtt_window_lock); 283 + DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); 284 + } 285 + 286 + static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, 287 + unsigned vmid, uint64_t pd_addr) 288 + { 289 + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 290 + uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); 291 + unsigned eng = ring->vm_inv_eng; 292 + 293 + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), 294 + lower_32_bits(pd_addr)); 295 + 296 + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), 297 + upper_32_bits(pd_addr)); 298 + 299 + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); 300 + 301 + /* wait for the invalidate to complete */ 302 + amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, 303 + 1 << vmid, 1 << vmid); 304 + 305 + return pd_addr; 306 + } 307 + 308 + static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, 309 + unsigned pasid) 310 + { 311 + struct amdgpu_device *adev = ring->adev; 312 + uint32_t reg; 313 + 314 + if (ring->funcs->vmhub == AMDGPU_GFXHUB) 315 + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; 316 + else 317 + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; 318 + 319 + amdgpu_ring_emit_wreg(ring, reg, pasid); 320 + } 321 + 322 + /* 323 + * PTE format on NAVI 10: 324 + * 63:59 reserved 325 + * 58:57 reserved 326 + * 56 F 327 + * 55 L 328 + * 54 reserved 329 + * 53:52 SW 330 + * 51 T 331 + * 50:48 mtype 332 + * 47:12 4k physical page base address 333 + * 11:7 fragment 334 + * 6 write 335 + * 5 read 336 + * 4 exe 337 + * 3 Z 338 + * 2 snooped 339 + * 1 system 340 + * 0 valid 341 + * 342 + * PDE format on NAVI 10: 343 + * 63:59 block fragment size 344 + * 58:55 reserved 345 + * 54 P 346 + * 53:48 reserved 347 + * 47:6 physical base address of PD or PTE 348 + * 5:3 reserved 349 + * 2 C 350 + * 1 system 351 + * 0 valid 352 + */ 353 + static uint64_t gmc_v10_0_get_vm_pte_flags(struct amdgpu_device *adev, 354 + uint32_t flags) 355 + { 356 + uint64_t pte_flag = 0; 357 + 358 + if (flags & AMDGPU_VM_PAGE_EXECUTABLE) 359 + pte_flag |= AMDGPU_PTE_EXECUTABLE; 360 + if (flags & AMDGPU_VM_PAGE_READABLE) 361 + pte_flag |= AMDGPU_PTE_READABLE; 362 + if (flags & AMDGPU_VM_PAGE_WRITEABLE) 363 + pte_flag |= AMDGPU_PTE_WRITEABLE; 364 + 365 + switch (flags & AMDGPU_VM_MTYPE_MASK) { 366 + case AMDGPU_VM_MTYPE_DEFAULT: 367 + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); 368 + break; 369 + case AMDGPU_VM_MTYPE_NC: 370 + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); 371 + break; 372 + case AMDGPU_VM_MTYPE_WC: 373 + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_WC); 374 + break; 375 + case AMDGPU_VM_MTYPE_CC: 376 + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_CC); 377 + break; 378 + case AMDGPU_VM_MTYPE_UC: 379 + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); 380 + break; 381 + default: 382 + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); 383 + break; 384 + } 385 + 386 + if (flags & AMDGPU_VM_PAGE_PRT) 387 + pte_flag |= AMDGPU_PTE_PRT; 388 + 389 + return pte_flag; 390 + } 391 + 392 + static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, 393 + uint64_t *addr, uint64_t *flags) 394 + { 395 + if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM)) 396 + *addr = adev->vm_manager.vram_base_offset + *addr - 397 + adev->gmc.vram_start; 398 + BUG_ON(*addr & 0xFFFF00000000003FULL); 399 + 400 + if (!adev->gmc.translate_further) 401 + return; 402 + 403 + if (level == AMDGPU_VM_PDB1) { 404 + /* Set the block fragment size */ 405 + if (!(*flags & AMDGPU_PDE_PTE)) 406 + *flags |= AMDGPU_PDE_BFS(0x9); 407 + 408 + } else if (level == AMDGPU_VM_PDB0) { 409 + if (*flags & AMDGPU_PDE_PTE) 410 + *flags &= ~AMDGPU_PDE_PTE; 411 + else 412 + *flags |= AMDGPU_PTE_TF; 413 + } 414 + } 415 + 416 + static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { 417 + .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, 418 + .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, 419 + .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, 420 + .get_vm_pte_flags = gmc_v10_0_get_vm_pte_flags, 421 + .get_vm_pde = gmc_v10_0_get_vm_pde 422 + }; 423 + 424 + static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev) 425 + { 426 + if (adev->gmc.gmc_funcs == NULL) 427 + adev->gmc.gmc_funcs = &gmc_v10_0_gmc_funcs; 428 + } 429 + 430 + static int gmc_v10_0_early_init(void *handle) 431 + { 432 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 433 + 434 + gmc_v10_0_set_gmc_funcs(adev); 435 + gmc_v10_0_set_irq_funcs(adev); 436 + 437 + adev->gmc.shared_aperture_start = 0x2000000000000000ULL; 438 + adev->gmc.shared_aperture_end = 439 + adev->gmc.shared_aperture_start + (4ULL << 30) - 1; 440 + adev->gmc.private_aperture_start = 0x1000000000000000ULL; 441 + adev->gmc.private_aperture_end = 442 + adev->gmc.private_aperture_start + (4ULL << 30) - 1; 443 + 444 + return 0; 445 + } 446 + 447 + static int gmc_v10_0_late_init(void *handle) 448 + { 449 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 450 + unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 }; 451 + unsigned i; 452 + 453 + for(i = 0; i < adev->num_rings; ++i) { 454 + struct amdgpu_ring *ring = adev->rings[i]; 455 + unsigned vmhub = ring->funcs->vmhub; 456 + 457 + ring->vm_inv_eng = vm_inv_eng[vmhub]++; 458 + dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n", 459 + ring->idx, ring->name, ring->vm_inv_eng, 460 + ring->funcs->vmhub); 461 + } 462 + 463 + /* Engine 17 is used for GART flushes */ 464 + for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) 465 + BUG_ON(vm_inv_eng[i] > 17); 466 + 467 + return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); 468 + } 469 + 470 + static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, 471 + struct amdgpu_gmc *mc) 472 + { 473 + u64 base = 0; 474 + 475 + if (!amdgpu_sriov_vf(adev)) 476 + base = gfxhub_v2_0_get_fb_location(adev); 477 + 478 + amdgpu_gmc_vram_location(adev, &adev->gmc, base); 479 + amdgpu_gmc_gart_location(adev, mc); 480 + 481 + /* base offset of vram pages */ 482 + adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev); 483 + } 484 + 485 + /** 486 + * gmc_v10_0_mc_init - initialize the memory controller driver params 487 + * 488 + * @adev: amdgpu_device pointer 489 + * 490 + * Look up the amount of vram, vram width, and decide how to place 491 + * vram and gart within the GPU's physical address space. 492 + * Returns 0 for success. 493 + */ 494 + static int gmc_v10_0_mc_init(struct amdgpu_device *adev) 495 + { 496 + int chansize, numchan; 497 + 498 + if (!amdgpu_emu_mode) 499 + adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); 500 + else { 501 + /* hard code vram_width for emulation */ 502 + chansize = 128; 503 + numchan = 1; 504 + adev->gmc.vram_width = numchan * chansize; 505 + } 506 + 507 + /* Could aper size report 0 ? */ 508 + adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); 509 + adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); 510 + 511 + /* size in MB on si */ 512 + adev->gmc.mc_vram_size = 513 + adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; 514 + adev->gmc.real_vram_size = adev->gmc.mc_vram_size; 515 + adev->gmc.visible_vram_size = adev->gmc.aper_size; 516 + 517 + /* In case the PCI BAR is larger than the actual amount of vram */ 518 + if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) 519 + adev->gmc.visible_vram_size = adev->gmc.real_vram_size; 520 + 521 + /* set the gart size */ 522 + if (amdgpu_gart_size == -1) { 523 + switch (adev->asic_type) { 524 + case CHIP_NAVI10: 525 + default: 526 + adev->gmc.gart_size = 512ULL << 20; 527 + break; 528 + } 529 + } else 530 + adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; 531 + 532 + gmc_v10_0_vram_gtt_location(adev, &adev->gmc); 533 + 534 + return 0; 535 + } 536 + 537 + static int gmc_v10_0_gart_init(struct amdgpu_device *adev) 538 + { 539 + int r; 540 + 541 + if (adev->gart.bo) { 542 + WARN(1, "NAVI10 PCIE GART already initialized\n"); 543 + return 0; 544 + } 545 + 546 + /* Initialize common gart structure */ 547 + r = amdgpu_gart_init(adev); 548 + if (r) 549 + return r; 550 + 551 + adev->gart.table_size = adev->gart.num_gpu_pages * 8; 552 + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) | 553 + AMDGPU_PTE_EXECUTABLE; 554 + 555 + return amdgpu_gart_table_vram_alloc(adev); 556 + } 557 + 558 + static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev) 559 + { 560 + u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); 561 + unsigned size; 562 + 563 + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { 564 + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ 565 + } else { 566 + u32 viewport; 567 + u32 pitch; 568 + 569 + viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); 570 + pitch = RREG32_SOC15(DCE, 0, mmHUBPREQ0_DCSURF_SURFACE_PITCH); 571 + size = (REG_GET_FIELD(viewport, 572 + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * 573 + REG_GET_FIELD(pitch, HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH) * 574 + 4); 575 + } 576 + /* return 0 if the pre-OS buffer uses up most of vram */ 577 + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) { 578 + DRM_ERROR("Warning: pre-OS buffer uses most of vram, \ 579 + be aware of gart table overwrite\n"); 580 + return 0; 581 + } 582 + 583 + return size; 584 + } 585 + 586 + 587 + 588 + static int gmc_v10_0_sw_init(void *handle) 589 + { 590 + int r; 591 + int dma_bits; 592 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 593 + 594 + gfxhub_v2_0_init(adev); 595 + mmhub_v2_0_init(adev); 596 + 597 + spin_lock_init(&adev->gmc.invalidate_lock); 598 + 599 + adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); 600 + switch (adev->asic_type) { 601 + case CHIP_NAVI10: 602 + /* 603 + * To fulfill 4-level page support, 604 + * vm size is 256TB (48bit), maximum size of Navi10, 605 + * block size 512 (9bit) 606 + */ 607 + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); 608 + break; 609 + default: 610 + break; 611 + } 612 + 613 + /* This interrupt is VMC page fault.*/ 614 + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, 615 + VMC_1_0__SRCID__VM_FAULT, 616 + &adev->gmc.vm_fault); 617 + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, 618 + UTCL2_1_0__SRCID__FAULT, 619 + &adev->gmc.vm_fault); 620 + if (r) 621 + return r; 622 + 623 + /* 624 + * Set the internal MC address mask This is the max address of the GPU's 625 + * internal address space. 626 + */ 627 + adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ 628 + 629 + /* 630 + * Reserve 8M stolen memory for navi10 like vega10 631 + * TODO: will check if it's really needed on asic. 632 + */ 633 + if (amdgpu_emu_mode == 1) 634 + adev->gmc.stolen_size = 0; 635 + else 636 + adev->gmc.stolen_size = 9 * 1024 *1024; 637 + 638 + /* 639 + * Set DMA mask + need_dma32 flags. 640 + * PCIE - can handle 44-bits. 641 + * IGP - can handle 44-bits 642 + * PCI - dma32 for legacy pci gart, 44 bits on navi10 643 + */ 644 + adev->need_dma32 = false; 645 + dma_bits = adev->need_dma32 ? 32 : 44; 646 + 647 + r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); 648 + if (r) { 649 + adev->need_dma32 = true; 650 + dma_bits = 32; 651 + printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); 652 + } 653 + 654 + r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); 655 + if (r) { 656 + pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); 657 + printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); 658 + } 659 + 660 + r = gmc_v10_0_mc_init(adev); 661 + if (r) 662 + return r; 663 + 664 + adev->gmc.stolen_size = gmc_v10_0_get_vbios_fb_size(adev); 665 + 666 + /* Memory manager */ 667 + r = amdgpu_bo_init(adev); 668 + if (r) 669 + return r; 670 + 671 + r = gmc_v10_0_gart_init(adev); 672 + if (r) 673 + return r; 674 + 675 + /* 676 + * number of VMs 677 + * VMID 0 is reserved for System 678 + * amdgpu graphics/compute will use VMIDs 1-7 679 + * amdkfd will use VMIDs 8-15 680 + */ 681 + adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; 682 + adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; 683 + 684 + amdgpu_vm_manager_init(adev); 685 + 686 + return 0; 687 + } 688 + 689 + /** 690 + * gmc_v8_0_gart_fini - vm fini callback 691 + * 692 + * @adev: amdgpu_device pointer 693 + * 694 + * Tears down the driver GART/VM setup (CIK). 695 + */ 696 + static void gmc_v10_0_gart_fini(struct amdgpu_device *adev) 697 + { 698 + amdgpu_gart_table_vram_free(adev); 699 + amdgpu_gart_fini(adev); 700 + } 701 + 702 + static int gmc_v10_0_sw_fini(void *handle) 703 + { 704 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 705 + 706 + amdgpu_vm_manager_fini(adev); 707 + gmc_v10_0_gart_fini(adev); 708 + amdgpu_gem_force_release(adev); 709 + amdgpu_bo_fini(adev); 710 + 711 + return 0; 712 + } 713 + 714 + static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) 715 + { 716 + switch (adev->asic_type) { 717 + case CHIP_NAVI10: 718 + break; 719 + default: 720 + break; 721 + } 722 + } 723 + 724 + /** 725 + * gmc_v10_0_gart_enable - gart enable 726 + * 727 + * @adev: amdgpu_device pointer 728 + */ 729 + static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) 730 + { 731 + int r; 732 + bool value; 733 + u32 tmp; 734 + 735 + if (adev->gart.bo == NULL) { 736 + dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); 737 + return -EINVAL; 738 + } 739 + 740 + r = amdgpu_gart_table_vram_pin(adev); 741 + if (r) 742 + return r; 743 + 744 + r = gfxhub_v2_0_gart_enable(adev); 745 + if (r) 746 + return r; 747 + 748 + r = mmhub_v2_0_gart_enable(adev); 749 + if (r) 750 + return r; 751 + 752 + tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL); 753 + tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; 754 + WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp); 755 + 756 + tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); 757 + WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); 758 + 759 + /* Flush HDP after it is initialized */ 760 + adev->nbio_funcs->hdp_flush(adev, NULL); 761 + 762 + value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 763 + false : true; 764 + 765 + gfxhub_v2_0_set_fault_enable_default(adev, value); 766 + mmhub_v2_0_set_fault_enable_default(adev, value); 767 + gmc_v10_0_flush_gpu_tlb(adev, 0, 0); 768 + 769 + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 770 + (unsigned)(adev->gmc.gart_size >> 20), 771 + (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); 772 + 773 + adev->gart.ready = true; 774 + 775 + return 0; 776 + } 777 + 778 + static int gmc_v10_0_hw_init(void *handle) 779 + { 780 + int r; 781 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 782 + 783 + /* The sequence of these two function calls matters.*/ 784 + gmc_v10_0_init_golden_registers(adev); 785 + 786 + r = gmc_v10_0_gart_enable(adev); 787 + if (r) 788 + return r; 789 + 790 + return 0; 791 + } 792 + 793 + /** 794 + * gmc_v10_0_gart_disable - gart disable 795 + * 796 + * @adev: amdgpu_device pointer 797 + * 798 + * This disables all VM page table. 799 + */ 800 + static void gmc_v10_0_gart_disable(struct amdgpu_device *adev) 801 + { 802 + gfxhub_v2_0_gart_disable(adev); 803 + mmhub_v2_0_gart_disable(adev); 804 + amdgpu_gart_table_vram_unpin(adev); 805 + } 806 + 807 + static int gmc_v10_0_hw_fini(void *handle) 808 + { 809 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 810 + 811 + if (amdgpu_sriov_vf(adev)) { 812 + /* full access mode, so don't touch any GMC register */ 813 + DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 814 + return 0; 815 + } 816 + 817 + amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); 818 + gmc_v10_0_gart_disable(adev); 819 + 820 + return 0; 821 + } 822 + 823 + static int gmc_v10_0_suspend(void *handle) 824 + { 825 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 826 + 827 + gmc_v10_0_hw_fini(adev); 828 + 829 + return 0; 830 + } 831 + 832 + static int gmc_v10_0_resume(void *handle) 833 + { 834 + int r; 835 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 836 + 837 + r = gmc_v10_0_hw_init(adev); 838 + if (r) 839 + return r; 840 + 841 + amdgpu_vmid_reset_all(adev); 842 + 843 + return 0; 844 + } 845 + 846 + static bool gmc_v10_0_is_idle(void *handle) 847 + { 848 + /* MC is always ready in GMC v10.*/ 849 + return true; 850 + } 851 + 852 + static int gmc_v10_0_wait_for_idle(void *handle) 853 + { 854 + /* There is no need to wait for MC idle in GMC v10.*/ 855 + return 0; 856 + } 857 + 858 + static int gmc_v10_0_soft_reset(void *handle) 859 + { 860 + return 0; 861 + } 862 + 863 + static int gmc_v10_0_set_clockgating_state(void *handle, 864 + enum amd_clockgating_state state) 865 + { 866 + int r; 867 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 868 + 869 + r = mmhub_v2_0_set_clockgating(adev, state); 870 + if (r) 871 + return r; 872 + 873 + return athub_v2_0_set_clockgating(adev, state); 874 + } 875 + 876 + static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) 877 + { 878 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 879 + 880 + mmhub_v2_0_get_clockgating(adev, flags); 881 + 882 + athub_v2_0_get_clockgating(adev, flags); 883 + } 884 + 885 + static int gmc_v10_0_set_powergating_state(void *handle, 886 + enum amd_powergating_state state) 887 + { 888 + return 0; 889 + } 890 + 891 + const struct amd_ip_funcs gmc_v10_0_ip_funcs = { 892 + .name = "gmc_v10_0", 893 + .early_init = gmc_v10_0_early_init, 894 + .late_init = gmc_v10_0_late_init, 895 + .sw_init = gmc_v10_0_sw_init, 896 + .sw_fini = gmc_v10_0_sw_fini, 897 + .hw_init = gmc_v10_0_hw_init, 898 + .hw_fini = gmc_v10_0_hw_fini, 899 + .suspend = gmc_v10_0_suspend, 900 + .resume = gmc_v10_0_resume, 901 + .is_idle = gmc_v10_0_is_idle, 902 + .wait_for_idle = gmc_v10_0_wait_for_idle, 903 + .soft_reset = gmc_v10_0_soft_reset, 904 + .set_clockgating_state = gmc_v10_0_set_clockgating_state, 905 + .set_powergating_state = gmc_v10_0_set_powergating_state, 906 + .get_clockgating_state = gmc_v10_0_get_clockgating_state, 907 + }; 908 + 909 + const struct amdgpu_ip_block_version gmc_v10_0_ip_block = 910 + { 911 + .type = AMD_IP_BLOCK_TYPE_GMC, 912 + .major = 10, 913 + .minor = 0, 914 + .rev = 0, 915 + .funcs = &gmc_v10_0_ip_funcs, 916 + };
+30
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h
··· 1 + /* 2 + * Copyright 2019 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #ifndef __GMC_V10_0_H__ 25 + #define __GMC_V10_0_H__ 26 + 27 + extern const struct amd_ip_funcs gmc_v10_0_ip_funcs; 28 + extern const struct amdgpu_ip_block_version gmc_v10_0_ip_block; 29 + 30 + #endif