Merge tag 'drm-fixes-for-v4.12-rc1' of git://people.freedesktop.org/~airlied/linux

Pull drm fixes from Dave Airlie:
"AMD, nouveau, one i915, and one EDID fix for v4.12-rc1

Some fixes that it would be good to have in rc1. It contains the i915
quiet fix that you reported.

It also has an amdgpu fixes pull, with lots of ongoing work on Vega10
which is new in this kernel and is preliminary support so may have a
fair bit of movement.

Otherwise a few non-Vega10 AMD fixes, one EDID fix and some nouveau
regression fixers"

* tag 'drm-fixes-for-v4.12-rc1' of git://people.freedesktop.org/~airlied/linux: (144 commits)
drm/i915: Make vblank evade warnings optional
drm/nouveau/therm: remove ineffective workarounds for alarm bugs
drm/nouveau/tmr: avoid processing completed alarms when adding a new one
drm/nouveau/tmr: fix corruption of the pending list when rescheduling an alarm
drm/nouveau/tmr: handle races with hw when updating the next alarm time
drm/nouveau/tmr: ack interrupt before processing alarms
drm/nouveau/core: fix static checker warning
drm/nouveau/fb/ram/gf100-: remove 0x10f200 read
drm/nouveau/kms/nv50: skip core channel cursor update on position-only changes
drm/nouveau/kms/nv50: fix source-rect-only plane updates
drm/nouveau/kms/nv50: remove pointless argument to window atomic_check_acquire()
drm/amd/powerplay: refine pwm1_enable callback functions for CI.
drm/amd/powerplay: refine pwm1_enable callback functions for vi.
drm/amd/powerplay: refine pwm1_enable callback functions for Vega10.
drm/amdgpu: refine amdgpu pwm1_enable sysfs interface.
drm/amdgpu: add amd fan ctrl mode enums.
drm/amd/powerplay: add more smu message on Vega10.
drm/amdgpu: fix dependency issue
drm/amd: fix init order of sched job
drm/amdgpu: add some additional vega10 pci ids
...

+2435 -2124
+9 -7
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 110 110 extern int amdgpu_cntl_sb_buf_per_se; 111 111 extern int amdgpu_param_buf_per_se; 112 112 113 + #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ 113 114 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 114 115 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ 115 116 #define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2) ··· 967 966 unsigned mc_arb_ramcfg; 968 967 unsigned gb_addr_config; 969 968 unsigned num_rbs; 969 + unsigned gs_vgt_table_depth; 970 + unsigned gs_prim_buffer_depth; 970 971 971 972 uint32_t tile_mode_array[32]; 972 973 uint32_t macrotile_mode_array[16]; ··· 983 980 struct amdgpu_cu_info { 984 981 uint32_t number; /* total active CU number */ 985 982 uint32_t ao_cu_mask; 983 + uint32_t wave_front_size; 986 984 uint32_t bitmap[4][4]; 987 985 }; 988 986 ··· 1004 1000 }; 1005 1001 1006 1002 enum { 1007 - PRIM = 0, 1008 - POS, 1009 - CNTL, 1010 - PARAM, 1003 + NGG_PRIM = 0, 1004 + NGG_POS, 1005 + NGG_CNTL, 1006 + NGG_PARAM, 1011 1007 NGG_BUF_MAX 1012 1008 }; 1013 1009 ··· 1129 1125 void *owner; 1130 1126 uint64_t fence_ctx; /* the fence_context this job uses */ 1131 1127 bool vm_needs_flush; 1128 + bool need_pipeline_sync; 1132 1129 unsigned vm_id; 1133 1130 uint64_t vm_pd_addr; 1134 1131 uint32_t gds_base, gds_size; ··· 1708 1703 1709 1704 #define WREG32_FIELD_OFFSET(reg, offset, field, val) \ 1710 1705 WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) 1711 - 1712 - #define WREG32_FIELD15(ip, idx, reg, field, val) \ 1713 - WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) 1714 1706 1715 1707 /* 1716 1708 * BIOS helpers.
+6
drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
··· 1727 1727 { 1728 1728 int i; 1729 1729 1730 + /* 1731 + * VBIOS will check ASIC_INIT_COMPLETE bit to decide if 1732 + * execute ASIC_Init posting via driver 1733 + */ 1734 + adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK; 1735 + 1730 1736 for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) 1731 1737 WREG32(mmBIOS_SCRATCH_0 + i, adev->bios_scratch[i]); 1732 1738 }
+20
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
··· 26 26 #include "atomfirmware.h" 27 27 #include "amdgpu_atomfirmware.h" 28 28 #include "atom.h" 29 + #include "atombios.h" 29 30 30 31 #define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t)) 31 32 ··· 78 77 { 79 78 int i; 80 79 80 + /* 81 + * VBIOS will check ASIC_INIT_COMPLETE bit to decide if 82 + * execute ASIC_Init posting via driver 83 + */ 84 + adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK; 85 + 81 86 for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) 82 87 WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]); 88 + } 89 + 90 + void amdgpu_atomfirmware_scratch_regs_engine_hung(struct amdgpu_device *adev, 91 + bool hung) 92 + { 93 + u32 tmp = RREG32(adev->bios_scratch_reg_offset + 3); 94 + 95 + if (hung) 96 + tmp |= ATOM_S3_ASIC_GUI_ENGINE_HUNG; 97 + else 98 + tmp &= ~ATOM_S3_ASIC_GUI_ENGINE_HUNG; 99 + 100 + WREG32(adev->bios_scratch_reg_offset + 3, tmp); 83 101 } 84 102 85 103 int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
··· 28 28 void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); 29 29 void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev); 30 30 void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev); 31 + void amdgpu_atomfirmware_scratch_regs_engine_hung(struct amdgpu_device *adev, 32 + bool hung); 31 33 int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); 32 34 33 35 #endif
+7 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
··· 117 117 } 118 118 119 119 out_cleanup: 120 + /* Check error value now. The value can be overwritten when clean up.*/ 121 + if (r) { 122 + DRM_ERROR("Error while benchmarking BO move.\n"); 123 + } 124 + 120 125 if (sobj) { 121 - r = amdgpu_bo_reserve(sobj, false); 126 + r = amdgpu_bo_reserve(sobj, true); 122 127 if (likely(r == 0)) { 123 128 amdgpu_bo_unpin(sobj); 124 129 amdgpu_bo_unreserve(sobj); ··· 131 126 amdgpu_bo_unref(&sobj); 132 127 } 133 128 if (dobj) { 134 - r = amdgpu_bo_reserve(dobj, false); 129 + r = amdgpu_bo_reserve(dobj, true); 135 130 if (likely(r == 0)) { 136 131 amdgpu_bo_unpin(dobj); 137 132 amdgpu_bo_unreserve(dobj); 138 133 } 139 134 amdgpu_bo_unref(&dobj); 140 - } 141 - 142 - if (r) { 143 - DRM_ERROR("Error while benchmarking BO move.\n"); 144 135 } 145 136 } 146 137
+5 -203
drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
··· 42 42 struct amdgpu_device *adev = \ 43 43 ((struct amdgpu_cgs_device *)cgs_device)->adev 44 44 45 - static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type, 46 - uint64_t *mc_start, uint64_t *mc_size, 47 - uint64_t *mem_size) 48 - { 49 - CGS_FUNC_ADEV; 50 - switch(type) { 51 - case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB: 52 - case CGS_GPU_MEM_TYPE__VISIBLE_FB: 53 - *mc_start = 0; 54 - *mc_size = adev->mc.visible_vram_size; 55 - *mem_size = adev->mc.visible_vram_size - adev->vram_pin_size; 56 - break; 57 - case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB: 58 - case CGS_GPU_MEM_TYPE__INVISIBLE_FB: 59 - *mc_start = adev->mc.visible_vram_size; 60 - *mc_size = adev->mc.real_vram_size - adev->mc.visible_vram_size; 61 - *mem_size = *mc_size; 62 - break; 63 - case CGS_GPU_MEM_TYPE__GART_CACHEABLE: 64 - case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE: 65 - *mc_start = adev->mc.gtt_start; 66 - *mc_size = adev->mc.gtt_size; 67 - *mem_size = adev->mc.gtt_size - adev->gart_pin_size; 68 - break; 69 - default: 70 - return -EINVAL; 71 - } 72 - 73 - return 0; 74 - } 75 - 76 - static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void *kmem, 77 - uint64_t size, 78 - uint64_t min_offset, uint64_t max_offset, 79 - cgs_handle_t *kmem_handle, uint64_t *mcaddr) 80 - { 81 - CGS_FUNC_ADEV; 82 - int ret; 83 - struct amdgpu_bo *bo; 84 - struct page *kmem_page = vmalloc_to_page(kmem); 85 - int npages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT; 86 - 87 - struct sg_table *sg = drm_prime_pages_to_sg(&kmem_page, npages); 88 - ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false, 89 - AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL, &bo); 90 - if (ret) 91 - return ret; 92 - ret = amdgpu_bo_reserve(bo, false); 93 - if (unlikely(ret != 0)) 94 - return ret; 95 - 96 - /* pin buffer into GTT */ 97 - ret = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_GTT, 98 - min_offset, max_offset, mcaddr); 99 - amdgpu_bo_unreserve(bo); 100 - 101 - *kmem_handle = (cgs_handle_t)bo; 102 - return ret; 103 - } 104 - 105 - static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, cgs_handle_t kmem_handle) 106 - { 107 - struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle; 108 - 109 - if (obj) { 110 - int r = amdgpu_bo_reserve(obj, false); 111 - if (likely(r == 0)) { 112 - amdgpu_bo_unpin(obj); 113 - amdgpu_bo_unreserve(obj); 114 - } 115 - amdgpu_bo_unref(&obj); 116 - 117 - } 118 - return 0; 119 - } 120 - 121 45 static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, 122 46 enum cgs_gpu_mem_type type, 123 47 uint64_t size, uint64_t align, ··· 139 215 struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; 140 216 141 217 if (obj) { 142 - int r = amdgpu_bo_reserve(obj, false); 218 + int r = amdgpu_bo_reserve(obj, true); 143 219 if (likely(r == 0)) { 144 220 amdgpu_bo_kunmap(obj); 145 221 amdgpu_bo_unpin(obj); ··· 163 239 min_offset = obj->placements[0].fpfn << PAGE_SHIFT; 164 240 max_offset = obj->placements[0].lpfn << PAGE_SHIFT; 165 241 166 - r = amdgpu_bo_reserve(obj, false); 242 + r = amdgpu_bo_reserve(obj, true); 167 243 if (unlikely(r != 0)) 168 244 return r; 169 245 r = amdgpu_bo_pin_restricted(obj, obj->prefered_domains, ··· 176 252 { 177 253 int r; 178 254 struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; 179 - r = amdgpu_bo_reserve(obj, false); 255 + r = amdgpu_bo_reserve(obj, true); 180 256 if (unlikely(r != 0)) 181 257 return r; 182 258 r = amdgpu_bo_unpin(obj); ··· 189 265 { 190 266 int r; 191 267 struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; 192 - r = amdgpu_bo_reserve(obj, false); 268 + r = amdgpu_bo_reserve(obj, true); 193 269 if (unlikely(r != 0)) 194 270 return r; 195 271 r = amdgpu_bo_kmap(obj, map); ··· 201 277 { 202 278 int r; 203 279 struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; 204 - r = amdgpu_bo_reserve(obj, false); 280 + r = amdgpu_bo_reserve(obj, true); 205 281 if (unlikely(r != 0)) 206 282 return r; 207 283 amdgpu_bo_kunmap(obj); ··· 273 349 WARN(1, "Invalid indirect register space"); 274 350 } 275 351 276 - static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device *cgs_device, unsigned addr) 277 - { 278 - CGS_FUNC_ADEV; 279 - uint8_t val; 280 - int ret = pci_read_config_byte(adev->pdev, addr, &val); 281 - if (WARN(ret, "pci_read_config_byte error")) 282 - return 0; 283 - return val; 284 - } 285 - 286 - static uint16_t amdgpu_cgs_read_pci_config_word(struct cgs_device *cgs_device, unsigned addr) 287 - { 288 - CGS_FUNC_ADEV; 289 - uint16_t val; 290 - int ret = pci_read_config_word(adev->pdev, addr, &val); 291 - if (WARN(ret, "pci_read_config_word error")) 292 - return 0; 293 - return val; 294 - } 295 - 296 - static uint32_t amdgpu_cgs_read_pci_config_dword(struct cgs_device *cgs_device, 297 - unsigned addr) 298 - { 299 - CGS_FUNC_ADEV; 300 - uint32_t val; 301 - int ret = pci_read_config_dword(adev->pdev, addr, &val); 302 - if (WARN(ret, "pci_read_config_dword error")) 303 - return 0; 304 - return val; 305 - } 306 - 307 - static void amdgpu_cgs_write_pci_config_byte(struct cgs_device *cgs_device, unsigned addr, 308 - uint8_t value) 309 - { 310 - CGS_FUNC_ADEV; 311 - int ret = pci_write_config_byte(adev->pdev, addr, value); 312 - WARN(ret, "pci_write_config_byte error"); 313 - } 314 - 315 - static void amdgpu_cgs_write_pci_config_word(struct cgs_device *cgs_device, unsigned addr, 316 - uint16_t value) 317 - { 318 - CGS_FUNC_ADEV; 319 - int ret = pci_write_config_word(adev->pdev, addr, value); 320 - WARN(ret, "pci_write_config_word error"); 321 - } 322 - 323 - static void amdgpu_cgs_write_pci_config_dword(struct cgs_device *cgs_device, unsigned addr, 324 - uint32_t value) 325 - { 326 - CGS_FUNC_ADEV; 327 - int ret = pci_write_config_dword(adev->pdev, addr, value); 328 - WARN(ret, "pci_write_config_dword error"); 329 - } 330 - 331 - 332 352 static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device, 333 353 enum cgs_resource_type resource_type, 334 354 uint64_t size, ··· 343 475 344 476 return amdgpu_atom_execute_table( 345 477 adev->mode_info.atom_context, table, args); 346 - } 347 - 348 - static int amdgpu_cgs_create_pm_request(struct cgs_device *cgs_device, cgs_handle_t *request) 349 - { 350 - /* TODO */ 351 - return 0; 352 - } 353 - 354 - static int amdgpu_cgs_destroy_pm_request(struct cgs_device *cgs_device, cgs_handle_t request) 355 - { 356 - /* TODO */ 357 - return 0; 358 - } 359 - 360 - static int amdgpu_cgs_set_pm_request(struct cgs_device *cgs_device, cgs_handle_t request, 361 - int active) 362 - { 363 - /* TODO */ 364 - return 0; 365 - } 366 - 367 - static int amdgpu_cgs_pm_request_clock(struct cgs_device *cgs_device, cgs_handle_t request, 368 - enum cgs_clock clock, unsigned freq) 369 - { 370 - /* TODO */ 371 - return 0; 372 - } 373 - 374 - static int amdgpu_cgs_pm_request_engine(struct cgs_device *cgs_device, cgs_handle_t request, 375 - enum cgs_engine engine, int powered) 376 - { 377 - /* TODO */ 378 - return 0; 379 - } 380 - 381 - 382 - 383 - static int amdgpu_cgs_pm_query_clock_limits(struct cgs_device *cgs_device, 384 - enum cgs_clock clock, 385 - struct cgs_clock_limits *limits) 386 - { 387 - /* TODO */ 388 - return 0; 389 - } 390 - 391 - static int amdgpu_cgs_set_camera_voltages(struct cgs_device *cgs_device, uint32_t mask, 392 - const uint32_t *voltages) 393 - { 394 - DRM_ERROR("not implemented"); 395 - return -EPERM; 396 478 } 397 479 398 480 struct cgs_irq_params { ··· 1087 1269 } 1088 1270 1089 1271 static const struct cgs_ops amdgpu_cgs_ops = { 1090 - .gpu_mem_info = amdgpu_cgs_gpu_mem_info, 1091 - .gmap_kmem = amdgpu_cgs_gmap_kmem, 1092 - .gunmap_kmem = amdgpu_cgs_gunmap_kmem, 1093 1272 .alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem, 1094 1273 .free_gpu_mem = amdgpu_cgs_free_gpu_mem, 1095 1274 .gmap_gpu_mem = amdgpu_cgs_gmap_gpu_mem, ··· 1097 1282 .write_register = amdgpu_cgs_write_register, 1098 1283 .read_ind_register = amdgpu_cgs_read_ind_register, 1099 1284 .write_ind_register = amdgpu_cgs_write_ind_register, 1100 - .read_pci_config_byte = amdgpu_cgs_read_pci_config_byte, 1101 - .read_pci_config_word = amdgpu_cgs_read_pci_config_word, 1102 - .read_pci_config_dword = amdgpu_cgs_read_pci_config_dword, 1103 - .write_pci_config_byte = amdgpu_cgs_write_pci_config_byte, 1104 - .write_pci_config_word = amdgpu_cgs_write_pci_config_word, 1105 - .write_pci_config_dword = amdgpu_cgs_write_pci_config_dword, 1106 1285 .get_pci_resource = amdgpu_cgs_get_pci_resource, 1107 1286 .atom_get_data_table = amdgpu_cgs_atom_get_data_table, 1108 1287 .atom_get_cmd_table_revs = amdgpu_cgs_atom_get_cmd_table_revs, 1109 1288 .atom_exec_cmd_table = amdgpu_cgs_atom_exec_cmd_table, 1110 - .create_pm_request = amdgpu_cgs_create_pm_request, 1111 - .destroy_pm_request = amdgpu_cgs_destroy_pm_request, 1112 - .set_pm_request = amdgpu_cgs_set_pm_request, 1113 - .pm_request_clock = amdgpu_cgs_pm_request_clock, 1114 - .pm_request_engine = amdgpu_cgs_pm_request_engine, 1115 - .pm_query_clock_limits = amdgpu_cgs_pm_query_clock_limits, 1116 - .set_camera_voltages = amdgpu_cgs_set_camera_voltages, 1117 1289 .get_firmware_info = amdgpu_cgs_get_firmware_info, 1118 1290 .rel_firmware = amdgpu_cgs_rel_firmware, 1119 1291 .set_powergating_state = amdgpu_cgs_set_powergating_state,
+4
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
··· 1074 1074 cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); 1075 1075 job->uf_sequence = cs->out.handle; 1076 1076 amdgpu_job_free_resources(job); 1077 + amdgpu_cs_parser_fini(p, 0, true); 1077 1078 1078 1079 trace_amdgpu_cs_ioctl(job); 1079 1080 amd_sched_entity_push_job(&job->base); ··· 1130 1129 goto out; 1131 1130 1132 1131 r = amdgpu_cs_submit(&parser, cs); 1132 + if (r) 1133 + goto out; 1133 1134 1135 + return 0; 1134 1136 out: 1135 1137 amdgpu_cs_parser_fini(&parser, r, reserved_buffers); 1136 1138 return r;
+3
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
··· 273 273 274 274 spin_lock(&ctx->ring_lock); 275 275 276 + if (seq == ~0ull) 277 + seq = ctx->rings[ring->idx].sequence - 1; 278 + 276 279 if (seq >= cring->sequence) { 277 280 spin_unlock(&ctx->ring_lock); 278 281 return ERR_PTR(-EINVAL);
+56 -50
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 53 53 #include "bif/bif_4_1_d.h" 54 54 #include <linux/pci.h> 55 55 #include <linux/firmware.h> 56 - #include "amdgpu_pm.h" 57 56 58 57 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); 59 58 static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); ··· 349 350 if (adev->vram_scratch.robj == NULL) { 350 351 return; 351 352 } 352 - r = amdgpu_bo_reserve(adev->vram_scratch.robj, false); 353 + r = amdgpu_bo_reserve(adev->vram_scratch.robj, true); 353 354 if (likely(r == 0)) { 354 355 amdgpu_bo_kunmap(adev->vram_scratch.robj); 355 356 amdgpu_bo_unpin(adev->vram_scratch.robj); ··· 421 422 if (adev->doorbell.num_doorbells == 0) 422 423 return -EINVAL; 423 424 424 - adev->doorbell.ptr = ioremap(adev->doorbell.base, adev->doorbell.num_doorbells * sizeof(u32)); 425 - if (adev->doorbell.ptr == NULL) { 425 + adev->doorbell.ptr = ioremap(adev->doorbell.base, 426 + adev->doorbell.num_doorbells * 427 + sizeof(u32)); 428 + if (adev->doorbell.ptr == NULL) 426 429 return -ENOMEM; 427 - } 428 - DRM_INFO("doorbell mmio base: 0x%08X\n", (uint32_t)adev->doorbell.base); 429 - DRM_INFO("doorbell mmio size: %u\n", (unsigned)adev->doorbell.size); 430 430 431 431 return 0; 432 432 } ··· 1582 1584 } 1583 1585 } 1584 1586 1585 - amdgpu_dpm_enable_uvd(adev, false); 1586 - amdgpu_dpm_enable_vce(adev, false); 1587 - 1588 1587 return 0; 1589 1588 } 1590 1589 ··· 1849 1854 1850 1855 /* mutex initialization are all done here so we 1851 1856 * can recall function without having locking issues */ 1852 - mutex_init(&adev->vm_manager.lock); 1853 1857 atomic_set(&adev->irq.ih.lock, 0); 1854 1858 mutex_init(&adev->firmware.mutex); 1855 1859 mutex_init(&adev->pm.mutex); ··· 2065 2071 2066 2072 DRM_INFO("amdgpu: finishing device.\n"); 2067 2073 adev->shutdown = true; 2068 - drm_crtc_force_disable_all(adev->ddev); 2074 + if (adev->mode_info.mode_config_initialized) 2075 + drm_crtc_force_disable_all(adev->ddev); 2069 2076 /* evict vram memory */ 2070 2077 amdgpu_bo_evict_vram(adev); 2071 2078 amdgpu_ib_pool_fini(adev); ··· 2141 2146 2142 2147 if (amdgpu_crtc->cursor_bo) { 2143 2148 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2144 - r = amdgpu_bo_reserve(aobj, false); 2149 + r = amdgpu_bo_reserve(aobj, true); 2145 2150 if (r == 0) { 2146 2151 amdgpu_bo_unpin(aobj); 2147 2152 amdgpu_bo_unreserve(aobj); ··· 2154 2159 robj = gem_to_amdgpu_bo(rfb->obj); 2155 2160 /* don't unpin kernel fb objects */ 2156 2161 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { 2157 - r = amdgpu_bo_reserve(robj, false); 2162 + r = amdgpu_bo_reserve(robj, true); 2158 2163 if (r == 0) { 2159 2164 amdgpu_bo_unpin(robj); 2160 2165 amdgpu_bo_unreserve(robj); ··· 2211 2216 struct drm_connector *connector; 2212 2217 struct amdgpu_device *adev = dev->dev_private; 2213 2218 struct drm_crtc *crtc; 2214 - int r; 2219 + int r = 0; 2215 2220 2216 2221 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 2217 2222 return 0; ··· 2223 2228 pci_set_power_state(dev->pdev, PCI_D0); 2224 2229 pci_restore_state(dev->pdev); 2225 2230 r = pci_enable_device(dev->pdev); 2226 - if (r) { 2227 - if (fbcon) 2228 - console_unlock(); 2229 - return r; 2230 - } 2231 + if (r) 2232 + goto unlock; 2231 2233 } 2232 2234 if (adev->is_atom_fw) 2233 2235 amdgpu_atomfirmware_scratch_regs_restore(adev); ··· 2241 2249 r = amdgpu_resume(adev); 2242 2250 if (r) { 2243 2251 DRM_ERROR("amdgpu_resume failed (%d).\n", r); 2244 - return r; 2252 + goto unlock; 2245 2253 } 2246 2254 amdgpu_fence_driver_resume(adev); 2247 2255 ··· 2252 2260 } 2253 2261 2254 2262 r = amdgpu_late_init(adev); 2255 - if (r) { 2256 - if (fbcon) 2257 - console_unlock(); 2258 - return r; 2259 - } 2263 + if (r) 2264 + goto unlock; 2260 2265 2261 2266 /* pin cursors */ 2262 2267 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { ··· 2261 2272 2262 2273 if (amdgpu_crtc->cursor_bo) { 2263 2274 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2264 - r = amdgpu_bo_reserve(aobj, false); 2275 + r = amdgpu_bo_reserve(aobj, true); 2265 2276 if (r == 0) { 2266 2277 r = amdgpu_bo_pin(aobj, 2267 2278 AMDGPU_GEM_DOMAIN_VRAM, ··· 2303 2314 dev->dev->power.disable_depth--; 2304 2315 #endif 2305 2316 2306 - if (fbcon) { 2317 + if (fbcon) 2307 2318 amdgpu_fbdev_set_suspend(adev, 0); 2308 - console_unlock(); 2309 - } 2310 2319 2311 - return 0; 2320 + unlock: 2321 + if (fbcon) 2322 + console_unlock(); 2323 + 2324 + return r; 2312 2325 } 2313 2326 2314 2327 static bool amdgpu_check_soft_reset(struct amdgpu_device *adev) ··· 2421 2430 uint32_t domain; 2422 2431 int r; 2423 2432 2424 - if (!bo->shadow) 2425 - return 0; 2433 + if (!bo->shadow) 2434 + return 0; 2426 2435 2427 - r = amdgpu_bo_reserve(bo, false); 2428 - if (r) 2429 - return r; 2430 - domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); 2431 - /* if bo has been evicted, then no need to recover */ 2432 - if (domain == AMDGPU_GEM_DOMAIN_VRAM) { 2433 - r = amdgpu_bo_restore_from_shadow(adev, ring, bo, 2436 + r = amdgpu_bo_reserve(bo, true); 2437 + if (r) 2438 + return r; 2439 + domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); 2440 + /* if bo has been evicted, then no need to recover */ 2441 + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { 2442 + r = amdgpu_bo_validate(bo->shadow); 2443 + if (r) { 2444 + DRM_ERROR("bo validate failed!\n"); 2445 + goto err; 2446 + } 2447 + 2448 + r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem); 2449 + if (r) { 2450 + DRM_ERROR("%p bind failed\n", bo->shadow); 2451 + goto err; 2452 + } 2453 + 2454 + r = amdgpu_bo_restore_from_shadow(adev, ring, bo, 2434 2455 NULL, fence, true); 2435 - if (r) { 2436 - DRM_ERROR("recover page table failed!\n"); 2437 - goto err; 2438 - } 2439 - } 2456 + if (r) { 2457 + DRM_ERROR("recover page table failed!\n"); 2458 + goto err; 2459 + } 2460 + } 2440 2461 err: 2441 - amdgpu_bo_unreserve(bo); 2442 - return r; 2462 + amdgpu_bo_unreserve(bo); 2463 + return r; 2443 2464 } 2444 2465 2445 2466 /** ··· 2523 2520 ring = adev->mman.buffer_funcs_ring; 2524 2521 mutex_lock(&adev->shadow_list_lock); 2525 2522 list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { 2523 + next = NULL; 2526 2524 amdgpu_recover_vram_from_shadow(adev, ring, bo, &next); 2527 2525 if (fence) { 2528 2526 r = dma_fence_wait(fence, false); ··· 2597 2593 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 2598 2594 struct amdgpu_ring *ring = adev->rings[i]; 2599 2595 2600 - if (!ring) 2596 + if (!ring || !ring->sched.thread) 2601 2597 continue; 2602 2598 kthread_park(ring->sched.thread); 2603 2599 amd_sched_hw_job_reset(&ring->sched); ··· 2670 2666 DRM_INFO("recover vram bo from shadow\n"); 2671 2667 mutex_lock(&adev->shadow_list_lock); 2672 2668 list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { 2669 + next = NULL; 2673 2670 amdgpu_recover_vram_from_shadow(adev, ring, bo, &next); 2674 2671 if (fence) { 2675 2672 r = dma_fence_wait(fence, false); ··· 2693 2688 } 2694 2689 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 2695 2690 struct amdgpu_ring *ring = adev->rings[i]; 2696 - if (!ring) 2691 + 2692 + if (!ring || !ring->sched.thread) 2697 2693 continue; 2698 2694 2699 2695 amd_sched_job_recovery(&ring->sched); ··· 2703 2697 } else { 2704 2698 dev_err(adev->dev, "asic resume failed (%d).\n", r); 2705 2699 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 2706 - if (adev->rings[i]) { 2700 + if (adev->rings[i] && adev->rings[i]->sched.thread) { 2707 2701 kthread_unpark(adev->rings[i]->sched.thread); 2708 2702 } 2709 2703 }
+32 -112
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
··· 123 123 int r; 124 124 125 125 /* unpin of the old buffer */ 126 - r = amdgpu_bo_reserve(work->old_abo, false); 126 + r = amdgpu_bo_reserve(work->old_abo, true); 127 127 if (likely(r == 0)) { 128 128 r = amdgpu_bo_unpin(work->old_abo); 129 129 if (unlikely(r != 0)) { ··· 138 138 kfree(work); 139 139 } 140 140 141 - 142 - static void amdgpu_flip_work_cleanup(struct amdgpu_flip_work *work) 143 - { 144 - int i; 145 - 146 - amdgpu_bo_unref(&work->old_abo); 147 - dma_fence_put(work->excl); 148 - for (i = 0; i < work->shared_count; ++i) 149 - dma_fence_put(work->shared[i]); 150 - kfree(work->shared); 151 - kfree(work); 152 - } 153 - 154 - static void amdgpu_flip_cleanup_unreserve(struct amdgpu_flip_work *work, 155 - struct amdgpu_bo *new_abo) 156 - { 157 - amdgpu_bo_unreserve(new_abo); 158 - amdgpu_flip_work_cleanup(work); 159 - } 160 - 161 - static void amdgpu_flip_cleanup_unpin(struct amdgpu_flip_work *work, 162 - struct amdgpu_bo *new_abo) 163 - { 164 - if (unlikely(amdgpu_bo_unpin(new_abo) != 0)) 165 - DRM_ERROR("failed to unpin new abo in error path\n"); 166 - amdgpu_flip_cleanup_unreserve(work, new_abo); 167 - } 168 - 169 - void amdgpu_crtc_cleanup_flip_ctx(struct amdgpu_flip_work *work, 170 - struct amdgpu_bo *new_abo) 171 - { 172 - if (unlikely(amdgpu_bo_reserve(new_abo, false) != 0)) { 173 - DRM_ERROR("failed to reserve new abo in error path\n"); 174 - amdgpu_flip_work_cleanup(work); 175 - return; 176 - } 177 - amdgpu_flip_cleanup_unpin(work, new_abo); 178 - } 179 - 180 - int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc, 181 - struct drm_framebuffer *fb, 182 - struct drm_pending_vblank_event *event, 183 - uint32_t page_flip_flags, 184 - uint32_t target, 185 - struct amdgpu_flip_work **work_p, 186 - struct amdgpu_bo **new_abo_p) 141 + int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, 142 + struct drm_framebuffer *fb, 143 + struct drm_pending_vblank_event *event, 144 + uint32_t page_flip_flags, uint32_t target, 145 + struct drm_modeset_acquire_ctx *ctx) 187 146 { 188 147 struct drm_device *dev = crtc->dev; 189 148 struct amdgpu_device *adev = dev->dev_private; ··· 155 196 unsigned long flags; 156 197 u64 tiling_flags; 157 198 u64 base; 158 - int r; 199 + int i, r; 159 200 160 201 work = kzalloc(sizeof *work, GFP_KERNEL); 161 202 if (work == NULL) ··· 216 257 spin_unlock_irqrestore(&crtc->dev->event_lock, flags); 217 258 r = -EBUSY; 218 259 goto pflip_cleanup; 219 - 220 260 } 221 - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); 222 261 223 - *work_p = work; 224 - *new_abo_p = new_abo; 225 - 226 - return 0; 227 - 228 - pflip_cleanup: 229 - amdgpu_crtc_cleanup_flip_ctx(work, new_abo); 230 - return r; 231 - 232 - unpin: 233 - amdgpu_flip_cleanup_unpin(work, new_abo); 234 - return r; 235 - 236 - unreserve: 237 - amdgpu_flip_cleanup_unreserve(work, new_abo); 238 - return r; 239 - 240 - cleanup: 241 - amdgpu_flip_work_cleanup(work); 242 - return r; 243 - 244 - } 245 - 246 - void amdgpu_crtc_submit_flip(struct drm_crtc *crtc, 247 - struct drm_framebuffer *fb, 248 - struct amdgpu_flip_work *work, 249 - struct amdgpu_bo *new_abo) 250 - { 251 - unsigned long flags; 252 - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 253 - 254 - spin_lock_irqsave(&crtc->dev->event_lock, flags); 255 262 amdgpu_crtc->pflip_status = AMDGPU_FLIP_PENDING; 256 263 amdgpu_crtc->pflip_works = work; 257 264 265 + 266 + DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n", 267 + amdgpu_crtc->crtc_id, amdgpu_crtc, work); 258 268 /* update crtc fb */ 259 269 crtc->primary->fb = fb; 260 270 spin_unlock_irqrestore(&crtc->dev->event_lock, flags); 261 - 262 - DRM_DEBUG_DRIVER( 263 - "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n", 264 - amdgpu_crtc->crtc_id, amdgpu_crtc, work); 265 - 266 271 amdgpu_flip_work_func(&work->flip_work.work); 267 - } 268 - 269 - int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, 270 - struct drm_framebuffer *fb, 271 - struct drm_pending_vblank_event *event, 272 - uint32_t page_flip_flags, 273 - uint32_t target, 274 - struct drm_modeset_acquire_ctx *ctx) 275 - { 276 - struct amdgpu_bo *new_abo; 277 - struct amdgpu_flip_work *work; 278 - int r; 279 - 280 - r = amdgpu_crtc_prepare_flip(crtc, 281 - fb, 282 - event, 283 - page_flip_flags, 284 - target, 285 - &work, 286 - &new_abo); 287 - if (r) 288 - return r; 289 - 290 - amdgpu_crtc_submit_flip(crtc, fb, work, new_abo); 291 - 292 272 return 0; 273 + 274 + pflip_cleanup: 275 + if (unlikely(amdgpu_bo_reserve(new_abo, false) != 0)) { 276 + DRM_ERROR("failed to reserve new abo in error path\n"); 277 + goto cleanup; 278 + } 279 + unpin: 280 + if (unlikely(amdgpu_bo_unpin(new_abo) != 0)) { 281 + DRM_ERROR("failed to unpin new abo in error path\n"); 282 + } 283 + unreserve: 284 + amdgpu_bo_unreserve(new_abo); 285 + 286 + cleanup: 287 + amdgpu_bo_unref(&work->old_abo); 288 + dma_fence_put(work->excl); 289 + for (i = 0; i < work->shared_count; ++i) 290 + dma_fence_put(work->shared[i]); 291 + kfree(work->shared); 292 + kfree(work); 293 + 294 + return r; 293 295 } 294 296 295 297 int amdgpu_crtc_set_config(struct drm_mode_set *set,
+5 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
··· 63 63 * - 3.11.0 - Add support for sensor query info (clocks, temp, etc). 64 64 * - 3.12.0 - Add query for double offchip LDS buffers 65 65 * - 3.13.0 - Add PRT support 66 + * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality 67 + * - 3.15.0 - Export more gpu info for gfx9 66 68 */ 67 69 #define KMS_DRIVER_MAJOR 3 68 - #define KMS_DRIVER_MINOR 13 70 + #define KMS_DRIVER_MINOR 15 69 71 #define KMS_DRIVER_PATCHLEVEL 0 70 72 71 73 int amdgpu_vram_limit = 0; ··· 455 453 {0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, 456 454 {0x1002, 0x6862, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, 457 455 {0x1002, 0x6863, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, 456 + {0x1002, 0x6864, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, 458 457 {0x1002, 0x6867, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, 458 + {0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, 459 459 {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, 460 460 {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, 461 461 {0, 0, 0}
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
··· 112 112 struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); 113 113 int ret; 114 114 115 - ret = amdgpu_bo_reserve(abo, false); 115 + ret = amdgpu_bo_reserve(abo, true); 116 116 if (likely(ret == 0)) { 117 117 amdgpu_bo_kunmap(abo); 118 118 amdgpu_bo_unpin(abo);
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
··· 186 186 if (adev->gart.robj == NULL) { 187 187 return; 188 188 } 189 - r = amdgpu_bo_reserve(adev->gart.robj, false); 189 + r = amdgpu_bo_reserve(adev->gart.robj, true); 190 190 if (likely(r == 0)) { 191 191 amdgpu_bo_kunmap(adev->gart.robj); 192 192 amdgpu_bo_unpin(adev->gart.robj);
+37 -31
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
··· 139 139 return 0; 140 140 } 141 141 142 + static int amdgpu_gem_vm_check(void *param, struct amdgpu_bo *bo) 143 + { 144 + /* if anything is swapped out don't swap it in here, 145 + just abort and wait for the next CS */ 146 + if (!amdgpu_bo_gpu_accessible(bo)) 147 + return -ERESTARTSYS; 148 + 149 + if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow)) 150 + return -ERESTARTSYS; 151 + 152 + return 0; 153 + } 154 + 155 + static bool amdgpu_gem_vm_ready(struct amdgpu_device *adev, 156 + struct amdgpu_vm *vm, 157 + struct list_head *list) 158 + { 159 + struct ttm_validate_buffer *entry; 160 + 161 + list_for_each_entry(entry, list, head) { 162 + struct amdgpu_bo *bo = 163 + container_of(entry->bo, struct amdgpu_bo, tbo); 164 + if (amdgpu_gem_vm_check(NULL, bo)) 165 + return false; 166 + } 167 + 168 + return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_vm_check, NULL); 169 + } 170 + 142 171 void amdgpu_gem_object_close(struct drm_gem_object *obj, 143 172 struct drm_file *file_priv) 144 173 { ··· 177 148 struct amdgpu_vm *vm = &fpriv->vm; 178 149 179 150 struct amdgpu_bo_list_entry vm_pd; 180 - struct list_head list, duplicates; 151 + struct list_head list; 181 152 struct ttm_validate_buffer tv; 182 153 struct ww_acquire_ctx ticket; 183 154 struct amdgpu_bo_va *bo_va; 184 - struct dma_fence *fence = NULL; 185 155 int r; 186 156 187 157 INIT_LIST_HEAD(&list); 188 - INIT_LIST_HEAD(&duplicates); 189 158 190 159 tv.bo = &bo->tbo; 191 160 tv.shared = true; ··· 191 164 192 165 amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); 193 166 194 - r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); 167 + r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); 195 168 if (r) { 196 169 dev_err(adev->dev, "leaking bo va because " 197 170 "we fail to reserve bo (%d)\n", r); 198 171 return; 199 172 } 200 173 bo_va = amdgpu_vm_bo_find(vm, bo); 201 - if (bo_va) { 202 - if (--bo_va->ref_count == 0) { 203 - amdgpu_vm_bo_rmv(adev, bo_va); 174 + if (bo_va && --bo_va->ref_count == 0) { 175 + amdgpu_vm_bo_rmv(adev, bo_va); 176 + 177 + if (amdgpu_gem_vm_ready(adev, vm, &list)) { 178 + struct dma_fence *fence = NULL; 204 179 205 180 r = amdgpu_vm_clear_freed(adev, vm, &fence); 206 181 if (unlikely(r)) { ··· 531 502 return r; 532 503 } 533 504 534 - static int amdgpu_gem_va_check(void *param, struct amdgpu_bo *bo) 535 - { 536 - /* if anything is swapped out don't swap it in here, 537 - just abort and wait for the next CS */ 538 - if (!amdgpu_bo_gpu_accessible(bo)) 539 - return -ERESTARTSYS; 540 - 541 - if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow)) 542 - return -ERESTARTSYS; 543 - 544 - return 0; 545 - } 546 - 547 505 /** 548 506 * amdgpu_gem_va_update_vm -update the bo_va in its VM 549 507 * ··· 549 533 struct list_head *list, 550 534 uint32_t operation) 551 535 { 552 - struct ttm_validate_buffer *entry; 553 536 int r = -ERESTARTSYS; 554 537 555 - list_for_each_entry(entry, list, head) { 556 - struct amdgpu_bo *bo = 557 - container_of(entry->bo, struct amdgpu_bo, tbo); 558 - if (amdgpu_gem_va_check(NULL, bo)) 559 - goto error; 560 - } 561 - 562 - r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check, 563 - NULL); 564 - if (r) 538 + if (!amdgpu_gem_vm_ready(adev, vm, list)) 565 539 goto error; 566 540 567 541 r = amdgpu_vm_update_directories(adev, vm);
+9
drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
··· 134 134 return r; 135 135 } 136 136 137 + void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager *man) 138 + { 139 + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); 140 + struct amdgpu_gtt_mgr *mgr = man->priv; 141 + 142 + seq_printf(m, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n", 143 + man->size, mgr->available, (u64)atomic64_read(&adev->gtt_usage) >> 20); 144 + 145 + } 137 146 /** 138 147 * amdgpu_gtt_mgr_new - allocate a new node 139 148 *
+4 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
··· 160 160 dev_err(adev->dev, "scheduling IB failed (%d).\n", r); 161 161 return r; 162 162 } 163 + if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync) 164 + amdgpu_ring_emit_pipeline_sync(ring); 163 165 164 166 if (vm) { 165 167 r = amdgpu_vm_flush(ring, job); ··· 219 217 if (r) { 220 218 dev_err(adev->dev, "failed to emit fence (%d)\n", r); 221 219 if (job && job->vm_id) 222 - amdgpu_vm_reset_id(adev, job->vm_id); 220 + amdgpu_vm_reset_id(adev, ring->funcs->vmhub, 221 + job->vm_id); 223 222 amdgpu_ring_undo(ring); 224 223 return r; 225 224 }
+5 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
··· 57 57 (*job)->vm = vm; 58 58 (*job)->ibs = (void *)&(*job)[1]; 59 59 (*job)->num_ibs = num_ibs; 60 + (*job)->need_pipeline_sync = false; 60 61 61 62 amdgpu_sync_create(&(*job)->sync); 62 63 ··· 140 139 141 140 struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync); 142 141 143 - if (fence == NULL && vm && !job->vm_id) { 142 + while (fence == NULL && vm && !job->vm_id) { 144 143 struct amdgpu_ring *ring = job->ring; 145 144 int r; 146 145 ··· 152 151 153 152 fence = amdgpu_sync_get_fence(&job->sync); 154 153 } 154 + 155 + if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) 156 + job->need_pipeline_sync = true; 155 157 156 158 return fence; 157 159 }
+16 -5
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
··· 545 545 adev->gfx.config.double_offchip_lds_buf; 546 546 547 547 if (amdgpu_ngg) { 548 - dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[PRIM].gpu_addr; 549 - dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[POS].gpu_addr; 550 - dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[CNTL].gpu_addr; 551 - dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[PARAM].gpu_addr; 548 + dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PRIM].gpu_addr; 549 + dev_info.prim_buf_size = adev->gfx.ngg.buf[NGG_PRIM].size; 550 + dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[NGG_POS].gpu_addr; 551 + dev_info.pos_buf_size = adev->gfx.ngg.buf[NGG_POS].size; 552 + dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[NGG_CNTL].gpu_addr; 553 + dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[NGG_CNTL].size; 554 + dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PARAM].gpu_addr; 555 + dev_info.param_buf_size = adev->gfx.ngg.buf[NGG_PARAM].size; 552 556 } 557 + dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size; 558 + dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs; 559 + dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh; 560 + dev_info.num_tcc_blocks = adev->gfx.config.max_texture_channel_caches; 561 + dev_info.gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth; 562 + dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth; 563 + dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads; 553 564 554 565 return copy_to_user(out, &dev_info, 555 566 min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; ··· 821 810 822 811 if (amdgpu_sriov_vf(adev)) { 823 812 /* TODO: how to handle reserve failure */ 824 - BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false)); 813 + BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); 825 814 amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va); 826 815 fpriv->vm.csa_bo_va = NULL; 827 816 amdgpu_bo_unreserve(adev->virt.csa_obj);
-15
drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
··· 597 597 struct drm_pending_vblank_event *event, 598 598 uint32_t page_flip_flags, uint32_t target, 599 599 struct drm_modeset_acquire_ctx *ctx); 600 - void amdgpu_crtc_cleanup_flip_ctx(struct amdgpu_flip_work *work, 601 - struct amdgpu_bo *new_abo); 602 - int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc, 603 - struct drm_framebuffer *fb, 604 - struct drm_pending_vblank_event *event, 605 - uint32_t page_flip_flags, 606 - uint32_t target, 607 - struct amdgpu_flip_work **work, 608 - struct amdgpu_bo **new_abo); 609 - 610 - void amdgpu_crtc_submit_flip(struct drm_crtc *crtc, 611 - struct drm_framebuffer *fb, 612 - struct amdgpu_flip_work *work, 613 - struct amdgpu_bo *new_abo); 614 - 615 600 extern const struct drm_mode_config_funcs amdgpu_mode_funcs; 616 601 617 602 #endif
+22 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
··· 295 295 if (*bo == NULL) 296 296 return; 297 297 298 - if (likely(amdgpu_bo_reserve(*bo, false) == 0)) { 298 + if (likely(amdgpu_bo_reserve(*bo, true) == 0)) { 299 299 if (cpu_addr) 300 300 amdgpu_bo_kunmap(*bo); 301 301 ··· 540 540 amdgpu_bo_fence(bo, *fence, true); 541 541 542 542 err: 543 + return r; 544 + } 545 + 546 + int amdgpu_bo_validate(struct amdgpu_bo *bo) 547 + { 548 + uint32_t domain; 549 + int r; 550 + 551 + if (bo->pin_count) 552 + return 0; 553 + 554 + domain = bo->prefered_domains; 555 + 556 + retry: 557 + amdgpu_ttm_placement_from_domain(bo, domain); 558 + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); 559 + if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { 560 + domain = bo->allowed_domains; 561 + goto retry; 562 + } 563 + 543 564 return r; 544 565 } 545 566
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
··· 175 175 struct amdgpu_bo *bo, 176 176 struct reservation_object *resv, 177 177 struct dma_fence **fence, bool direct); 178 + int amdgpu_bo_validate(struct amdgpu_bo *bo); 178 179 int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, 179 180 struct amdgpu_ring *ring, 180 181 struct amdgpu_bo *bo,
+2 -10
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
··· 867 867 868 868 pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); 869 869 870 - /* never 0 (full-speed), fuse or smc-controlled always */ 871 - return sprintf(buf, "%i\n", pwm_mode == FDO_PWM_MODE_STATIC ? 1 : 2); 870 + return sprintf(buf, "%i\n", pwm_mode); 872 871 } 873 872 874 873 static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, ··· 886 887 if (err) 887 888 return err; 888 889 889 - switch (value) { 890 - case 1: /* manual, percent-based */ 891 - amdgpu_dpm_set_fan_control_mode(adev, FDO_PWM_MODE_STATIC); 892 - break; 893 - default: /* disable */ 894 - amdgpu_dpm_set_fan_control_mode(adev, 0); 895 - break; 896 - } 890 + amdgpu_dpm_set_fan_control_mode(adev, value); 897 891 898 892 return count; 899 893 }
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
··· 113 113 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); 114 114 int ret = 0; 115 115 116 - ret = amdgpu_bo_reserve(bo, false); 116 + ret = amdgpu_bo_reserve(bo, true); 117 117 if (unlikely(ret != 0)) 118 118 return; 119 119
+150 -87
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
··· 55 55 psp->bootloader_load_sos = psp_v3_1_bootloader_load_sos; 56 56 psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf; 57 57 psp->ring_init = psp_v3_1_ring_init; 58 + psp->ring_create = psp_v3_1_ring_create; 59 + psp->ring_destroy = psp_v3_1_ring_destroy; 58 60 psp->cmd_submit = psp_v3_1_cmd_submit; 59 61 psp->compare_sram_data = psp_v3_1_compare_sram_data; 60 62 psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; ··· 154 152 static int psp_tmr_init(struct psp_context *psp) 155 153 { 156 154 int ret; 157 - struct psp_gfx_cmd_resp *cmd; 158 - 159 - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); 160 - if (!cmd) 161 - return -ENOMEM; 162 155 163 156 /* 164 157 * Allocate 3M memory aligned to 1M from Frame Buffer (local ··· 165 168 ret = amdgpu_bo_create_kernel(psp->adev, 0x300000, 0x100000, 166 169 AMDGPU_GEM_DOMAIN_VRAM, 167 170 &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); 168 - if (ret) 169 - goto failed; 171 + 172 + return ret; 173 + } 174 + 175 + static int psp_tmr_load(struct psp_context *psp) 176 + { 177 + int ret; 178 + struct psp_gfx_cmd_resp *cmd; 179 + 180 + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); 181 + if (!cmd) 182 + return -ENOMEM; 170 183 171 184 psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, 0x300000); 172 185 173 186 ret = psp_cmd_submit_buf(psp, NULL, cmd, 174 187 psp->fence_buf_mc_addr, 1); 175 188 if (ret) 176 - goto failed_mem; 189 + goto failed; 177 190 178 191 kfree(cmd); 179 192 180 193 return 0; 181 194 182 - failed_mem: 183 - amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); 184 195 failed: 185 196 kfree(cmd); 186 197 return ret; ··· 208 203 cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; 209 204 } 210 205 211 - static int psp_asd_load(struct psp_context *psp) 206 + static int psp_asd_init(struct psp_context *psp) 212 207 { 213 208 int ret; 214 - struct amdgpu_bo *asd_bo, *asd_shared_bo; 215 - uint64_t asd_mc_addr, asd_shared_mc_addr; 216 - void *asd_buf, *asd_shared_buf; 217 - struct psp_gfx_cmd_resp *cmd; 218 - 219 - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); 220 - if (!cmd) 221 - return -ENOMEM; 222 209 223 210 /* 224 211 * Allocate 16k memory aligned to 4k from Frame Buffer (local 225 212 * physical) for shared ASD <-> Driver 226 213 */ 227 - ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE, PAGE_SIZE, 228 - AMDGPU_GEM_DOMAIN_VRAM, 229 - &asd_shared_bo, &asd_shared_mc_addr, &asd_buf); 230 - if (ret) 231 - goto failed; 214 + ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE, 215 + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 216 + &psp->asd_shared_bo, 217 + &psp->asd_shared_mc_addr, 218 + &psp->asd_shared_buf); 232 219 233 - /* 234 - * Allocate 256k memory aligned to 4k from Frame Buffer (local 235 - * physical) for ASD firmware 236 - */ 237 - ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_BIN_SIZE, PAGE_SIZE, 238 - AMDGPU_GEM_DOMAIN_VRAM, 239 - &asd_bo, &asd_mc_addr, &asd_buf); 240 - if (ret) 241 - goto failed_mem; 242 - 243 - memcpy(asd_buf, psp->asd_start_addr, psp->asd_ucode_size); 244 - 245 - psp_prep_asd_cmd_buf(cmd, asd_mc_addr, asd_shared_mc_addr, 246 - psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE); 247 - 248 - ret = psp_cmd_submit_buf(psp, NULL, cmd, 249 - psp->fence_buf_mc_addr, 2); 250 - if (ret) 251 - goto failed_mem1; 252 - 253 - amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf); 254 - amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf); 255 - kfree(cmd); 256 - 257 - return 0; 258 - 259 - failed_mem1: 260 - amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf); 261 - failed_mem: 262 - amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf); 263 - failed: 264 - kfree(cmd); 265 220 return ret; 266 221 } 267 222 268 - static int psp_load_fw(struct amdgpu_device *adev) 223 + static int psp_asd_load(struct psp_context *psp) 269 224 { 270 225 int ret; 271 226 struct psp_gfx_cmd_resp *cmd; 272 - int i; 273 - struct amdgpu_firmware_info *ucode; 274 - struct psp_context *psp = &adev->psp; 275 227 276 228 cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); 277 229 if (!cmd) 278 230 return -ENOMEM; 279 231 232 + memset(psp->fw_pri_buf, 0, PSP_1_MEG); 233 + memcpy(psp->fw_pri_buf, psp->asd_start_addr, psp->asd_ucode_size); 234 + 235 + psp_prep_asd_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->asd_shared_mc_addr, 236 + psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE); 237 + 238 + ret = psp_cmd_submit_buf(psp, NULL, cmd, 239 + psp->fence_buf_mc_addr, 2); 240 + 241 + kfree(cmd); 242 + 243 + return ret; 244 + } 245 + 246 + static int psp_hw_start(struct psp_context *psp) 247 + { 248 + int ret; 249 + 280 250 ret = psp_bootloader_load_sysdrv(psp); 281 251 if (ret) 282 - goto failed; 252 + return ret; 283 253 284 254 ret = psp_bootloader_load_sos(psp); 285 255 if (ret) 286 - goto failed; 256 + return ret; 287 257 288 - ret = psp_ring_init(psp, PSP_RING_TYPE__KM); 258 + ret = psp_ring_create(psp, PSP_RING_TYPE__KM); 289 259 if (ret) 290 - goto failed; 260 + return ret; 291 261 292 - ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, 293 - AMDGPU_GEM_DOMAIN_VRAM, 294 - &psp->fence_buf_bo, 295 - &psp->fence_buf_mc_addr, 296 - &psp->fence_buf); 262 + ret = psp_tmr_load(psp); 297 263 if (ret) 298 - goto failed; 299 - 300 - memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); 301 - 302 - ret = psp_tmr_init(psp); 303 - if (ret) 304 - goto failed_mem; 264 + return ret; 305 265 306 266 ret = psp_asd_load(psp); 307 267 if (ret) 308 - goto failed_mem; 268 + return ret; 269 + 270 + return 0; 271 + } 272 + 273 + static int psp_np_fw_load(struct psp_context *psp) 274 + { 275 + int i, ret; 276 + struct amdgpu_firmware_info *ucode; 277 + struct amdgpu_device* adev = psp->adev; 309 278 310 279 for (i = 0; i < adev->firmware.max_ucodes; i++) { 311 280 ucode = &adev->firmware.ucode[i]; ··· 289 310 if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC && 290 311 psp_smu_reload_quirk(psp)) 291 312 continue; 313 + if (amdgpu_sriov_vf(adev) && 314 + (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0 315 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 316 + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G)) 317 + /*skip ucode loading in SRIOV VF */ 318 + continue; 292 319 293 - ret = psp_prep_cmd_buf(ucode, cmd); 320 + ret = psp_prep_cmd_buf(ucode, psp->cmd); 294 321 if (ret) 295 - goto failed_mem; 322 + return ret; 296 323 297 - ret = psp_cmd_submit_buf(psp, ucode, cmd, 324 + ret = psp_cmd_submit_buf(psp, ucode, psp->cmd, 298 325 psp->fence_buf_mc_addr, i + 3); 299 326 if (ret) 300 - goto failed_mem; 327 + return ret; 301 328 302 329 #if 0 303 330 /* check if firmware loaded sucessfully */ ··· 312 327 #endif 313 328 } 314 329 315 - amdgpu_bo_free_kernel(&psp->fence_buf_bo, 316 - &psp->fence_buf_mc_addr, &psp->fence_buf); 330 + return 0; 331 + } 332 + 333 + static int psp_load_fw(struct amdgpu_device *adev) 334 + { 335 + int ret; 336 + struct psp_context *psp = &adev->psp; 337 + struct psp_gfx_cmd_resp *cmd; 338 + 339 + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); 340 + if (!cmd) 341 + return -ENOMEM; 342 + 343 + psp->cmd = cmd; 344 + 345 + ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, 346 + AMDGPU_GEM_DOMAIN_GTT, 347 + &psp->fw_pri_bo, 348 + &psp->fw_pri_mc_addr, 349 + &psp->fw_pri_buf); 350 + if (ret) 351 + goto failed; 352 + 353 + ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, 354 + AMDGPU_GEM_DOMAIN_VRAM, 355 + &psp->fence_buf_bo, 356 + &psp->fence_buf_mc_addr, 357 + &psp->fence_buf); 358 + if (ret) 359 + goto failed_mem1; 360 + 361 + memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); 362 + 363 + ret = psp_ring_init(psp, PSP_RING_TYPE__KM); 364 + if (ret) 365 + goto failed_mem1; 366 + 367 + ret = psp_tmr_init(psp); 368 + if (ret) 369 + goto failed_mem; 370 + 371 + ret = psp_asd_init(psp); 372 + if (ret) 373 + goto failed_mem; 374 + 375 + ret = psp_hw_start(psp); 376 + if (ret) 377 + goto failed_mem; 378 + 379 + ret = psp_np_fw_load(psp); 380 + if (ret) 381 + goto failed_mem; 382 + 317 383 kfree(cmd); 318 384 319 385 return 0; ··· 372 336 failed_mem: 373 337 amdgpu_bo_free_kernel(&psp->fence_buf_bo, 374 338 &psp->fence_buf_mc_addr, &psp->fence_buf); 339 + failed_mem1: 340 + amdgpu_bo_free_kernel(&psp->fw_pri_bo, 341 + &psp->fw_pri_mc_addr, &psp->fw_pri_buf); 375 342 failed: 376 343 kfree(cmd); 377 344 return ret; ··· 418 379 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 419 380 struct psp_context *psp = &adev->psp; 420 381 421 - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) 422 - amdgpu_ucode_fini_bo(adev); 382 + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 383 + return 0; 384 + 385 + amdgpu_ucode_fini_bo(adev); 386 + 387 + psp_ring_destroy(psp, PSP_RING_TYPE__KM); 423 388 424 389 if (psp->tmr_buf) 425 390 amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); 391 + 392 + if (psp->fw_pri_buf) 393 + amdgpu_bo_free_kernel(&psp->fw_pri_bo, 394 + &psp->fw_pri_mc_addr, &psp->fw_pri_buf); 395 + 396 + if (psp->fence_buf_bo) 397 + amdgpu_bo_free_kernel(&psp->fence_buf_bo, 398 + &psp->fence_buf_mc_addr, &psp->fence_buf); 426 399 427 400 return 0; 428 401 } ··· 448 397 { 449 398 int ret; 450 399 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 400 + struct psp_context *psp = &adev->psp; 451 401 452 402 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 453 403 return 0; 454 404 405 + DRM_INFO("PSP is resuming...\n"); 406 + 455 407 mutex_lock(&adev->firmware.mutex); 456 408 457 - ret = psp_load_fw(adev); 409 + ret = psp_hw_start(psp); 458 410 if (ret) 459 - DRM_ERROR("PSP resume failed\n"); 411 + goto failed; 412 + 413 + ret = psp_np_fw_load(psp); 414 + if (ret) 415 + goto failed; 460 416 461 417 mutex_unlock(&adev->firmware.mutex); 462 418 419 + return 0; 420 + 421 + failed: 422 + DRM_ERROR("PSP resume failed\n"); 423 + mutex_unlock(&adev->firmware.mutex); 463 424 return ret; 464 425 } 465 426
+16 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
··· 30 30 31 31 #define PSP_FENCE_BUFFER_SIZE 0x1000 32 32 #define PSP_CMD_BUFFER_SIZE 0x1000 33 - #define PSP_ASD_BIN_SIZE 0x40000 34 33 #define PSP_ASD_SHARED_MEM_SIZE 0x4000 34 + #define PSP_1_MEG 0x100000 35 35 36 36 enum psp_ring_type 37 37 { ··· 57 57 { 58 58 struct amdgpu_device *adev; 59 59 struct psp_ring km_ring; 60 + struct psp_gfx_cmd_resp *cmd; 60 61 61 62 int (*init_microcode)(struct psp_context *psp); 62 63 int (*bootloader_load_sysdrv)(struct psp_context *psp); ··· 65 64 int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode, 66 65 struct psp_gfx_cmd_resp *cmd); 67 66 int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); 67 + int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); 68 + int (*ring_destroy)(struct psp_context *psp, 69 + enum psp_ring_type ring_type); 68 70 int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode, 69 71 uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index); 70 72 bool (*compare_sram_data)(struct psp_context *psp, 71 73 struct amdgpu_firmware_info *ucode, 72 74 enum AMDGPU_UCODE_ID ucode_type); 73 75 bool (*smu_reload_quirk)(struct psp_context *psp); 76 + 77 + /* fence buffer */ 78 + struct amdgpu_bo *fw_pri_bo; 79 + uint64_t fw_pri_mc_addr; 80 + void *fw_pri_buf; 74 81 75 82 /* sos firmware */ 76 83 const struct firmware *sos_fw; ··· 94 85 uint64_t tmr_mc_addr; 95 86 void *tmr_buf; 96 87 97 - /* asd firmware */ 88 + /* asd firmware and buffer */ 98 89 const struct firmware *asd_fw; 99 90 uint32_t asd_fw_version; 100 91 uint32_t asd_feature_version; 101 92 uint32_t asd_ucode_size; 102 93 uint8_t *asd_start_addr; 94 + struct amdgpu_bo *asd_shared_bo; 95 + uint64_t asd_shared_mc_addr; 96 + void *asd_shared_buf; 103 97 104 98 /* fence buffer */ 105 99 struct amdgpu_bo *fence_buf_bo; ··· 117 105 118 106 #define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type)) 119 107 #define psp_ring_init(psp, type) (psp)->ring_init((psp), (type)) 108 + #define psp_ring_create(psp, type) (psp)->ring_create((psp), (type)) 109 + #define psp_ring_destroy(psp, type) ((psp)->ring_destroy((psp), (type))) 120 110 #define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ 121 111 (psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) 122 112 #define psp_compare_sram_data(psp, ucode, type) \
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
··· 99 99 uint32_t align_mask; 100 100 u32 nop; 101 101 bool support_64bit_ptrs; 102 + unsigned vmhub; 102 103 103 104 /* ring read/write ptr handling */ 104 105 u64 (*get_rptr)(struct amdgpu_ring *ring); ··· 179 178 unsigned cond_exe_offs; 180 179 u64 cond_exe_gpu_addr; 181 180 volatile u32 *cond_exe_cpu_addr; 181 + unsigned vm_inv_eng; 182 182 #if defined(CONFIG_DEBUG_FS) 183 183 struct dentry *ent; 184 184 #endif
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
··· 130 130 return -EINVAL; 131 131 } 132 132 133 - r = amdgpu_bo_reserve(sa_manager->bo, false); 133 + r = amdgpu_bo_reserve(sa_manager->bo, true); 134 134 if (!r) { 135 135 amdgpu_bo_kunmap(sa_manager->bo); 136 136 amdgpu_bo_unpin(sa_manager->bo);
+22 -15
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
··· 190 190 191 191 192 192 TRACE_EVENT(amdgpu_vm_grab_id, 193 - TP_PROTO(struct amdgpu_vm *vm, int ring, struct amdgpu_job *job), 193 + TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_ring *ring, 194 + struct amdgpu_job *job), 194 195 TP_ARGS(vm, ring, job), 195 196 TP_STRUCT__entry( 196 197 __field(struct amdgpu_vm *, vm) 197 198 __field(u32, ring) 198 - __field(u32, vmid) 199 + __field(u32, vm_id) 200 + __field(u32, vm_hub) 199 201 __field(u64, pd_addr) 200 202 __field(u32, needs_flush) 201 203 ), 202 204 203 205 TP_fast_assign( 204 206 __entry->vm = vm; 205 - __entry->ring = ring; 206 - __entry->vmid = job->vm_id; 207 + __entry->ring = ring->idx; 208 + __entry->vm_id = job->vm_id; 209 + __entry->vm_hub = ring->funcs->vmhub, 207 210 __entry->pd_addr = job->vm_pd_addr; 208 211 __entry->needs_flush = job->vm_needs_flush; 209 212 ), 210 - TP_printk("vm=%p, ring=%u, id=%u, pd_addr=%010Lx needs_flush=%u", 211 - __entry->vm, __entry->ring, __entry->vmid, 212 - __entry->pd_addr, __entry->needs_flush) 213 + TP_printk("vm=%p, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", 214 + __entry->vm, __entry->ring, __entry->vm_id, 215 + __entry->vm_hub, __entry->pd_addr, __entry->needs_flush) 213 216 ); 214 217 215 218 TRACE_EVENT(amdgpu_vm_bo_map, ··· 334 331 ); 335 332 336 333 TRACE_EVENT(amdgpu_vm_flush, 337 - TP_PROTO(uint64_t pd_addr, unsigned ring, unsigned id), 338 - TP_ARGS(pd_addr, ring, id), 334 + TP_PROTO(struct amdgpu_ring *ring, unsigned vm_id, 335 + uint64_t pd_addr), 336 + TP_ARGS(ring, vm_id, pd_addr), 339 337 TP_STRUCT__entry( 340 - __field(u64, pd_addr) 341 338 __field(u32, ring) 342 - __field(u32, id) 339 + __field(u32, vm_id) 340 + __field(u32, vm_hub) 341 + __field(u64, pd_addr) 343 342 ), 344 343 345 344 TP_fast_assign( 345 + __entry->ring = ring->idx; 346 + __entry->vm_id = vm_id; 347 + __entry->vm_hub = ring->funcs->vmhub; 346 348 __entry->pd_addr = pd_addr; 347 - __entry->ring = ring; 348 - __entry->id = id; 349 349 ), 350 - TP_printk("ring=%u, id=%u, pd_addr=%010Lx", 351 - __entry->ring, __entry->id, __entry->pd_addr) 350 + TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx", 351 + __entry->ring, __entry->vm_id, 352 + __entry->vm_hub,__entry->pd_addr) 352 353 ); 353 354 354 355 TRACE_EVENT(amdgpu_bo_list_set,
+27 -9
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
··· 203 203 abo = container_of(bo, struct amdgpu_bo, tbo); 204 204 switch (bo->mem.mem_type) { 205 205 case TTM_PL_VRAM: 206 - if (adev->mman.buffer_funcs_ring->ready == false) { 206 + if (adev->mman.buffer_funcs && 207 + adev->mman.buffer_funcs_ring && 208 + adev->mman.buffer_funcs_ring->ready == false) { 207 209 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); 208 210 } else { 209 211 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); ··· 765 763 { 766 764 struct amdgpu_ttm_tt *gtt, *tmp; 767 765 struct ttm_mem_reg bo_mem; 768 - uint32_t flags; 766 + uint64_t flags; 769 767 int r; 770 768 771 769 bo_mem.mem_type = TTM_PL_TT; ··· 1040 1038 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, 1041 1039 const struct ttm_place *place) 1042 1040 { 1043 - if (bo->mem.mem_type == TTM_PL_VRAM && 1044 - bo->mem.start == AMDGPU_BO_INVALID_OFFSET) { 1045 - unsigned long num_pages = bo->mem.num_pages; 1046 - struct drm_mm_node *node = bo->mem.mm_node; 1041 + unsigned long num_pages = bo->mem.num_pages; 1042 + struct drm_mm_node *node = bo->mem.mm_node; 1047 1043 1044 + if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET) 1045 + return ttm_bo_eviction_valuable(bo, place); 1046 + 1047 + switch (bo->mem.mem_type) { 1048 + case TTM_PL_TT: 1049 + return true; 1050 + 1051 + case TTM_PL_VRAM: 1048 1052 /* Check each drm MM node individually */ 1049 1053 while (num_pages) { 1050 1054 if (place->fpfn < (node->start + node->size) && ··· 1060 1052 num_pages -= node->size; 1061 1053 ++node; 1062 1054 } 1055 + break; 1063 1056 1064 - return false; 1057 + default: 1058 + break; 1065 1059 } 1066 1060 1067 1061 return ttm_bo_eviction_valuable(bo, place); ··· 1198 1188 return; 1199 1189 amdgpu_ttm_debugfs_fini(adev); 1200 1190 if (adev->stollen_vga_memory) { 1201 - r = amdgpu_bo_reserve(adev->stollen_vga_memory, false); 1191 + r = amdgpu_bo_reserve(adev->stollen_vga_memory, true); 1202 1192 if (r == 0) { 1203 1193 amdgpu_bo_unpin(adev->stollen_vga_memory); 1204 1194 amdgpu_bo_unreserve(adev->stollen_vga_memory); ··· 1411 1401 1412 1402 #if defined(CONFIG_DEBUG_FS) 1413 1403 1404 + extern void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager 1405 + *man); 1414 1406 static int amdgpu_mm_dump_table(struct seq_file *m, void *data) 1415 1407 { 1416 1408 struct drm_info_node *node = (struct drm_info_node *)m->private; ··· 1426 1414 spin_lock(&glob->lru_lock); 1427 1415 drm_mm_print(mm, &p); 1428 1416 spin_unlock(&glob->lru_lock); 1429 - if (ttm_pl == TTM_PL_VRAM) 1417 + switch (ttm_pl) { 1418 + case TTM_PL_VRAM: 1430 1419 seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", 1431 1420 adev->mman.bdev.man[ttm_pl].size, 1432 1421 (u64)atomic64_read(&adev->vram_usage) >> 20, 1433 1422 (u64)atomic64_read(&adev->vram_vis_usage) >> 20); 1423 + break; 1424 + case TTM_PL_TT: 1425 + amdgpu_gtt_mgr_print(m, &adev->mman.bdev.man[TTM_PL_TT]); 1426 + break; 1427 + } 1434 1428 return 0; 1435 1429 } 1436 1430
+7 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
··· 382 382 * if SMU loaded firmware, it needn't add SMC, UVD, and VCE 383 383 * ucode info here 384 384 */ 385 - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 386 - adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 4; 387 - else 385 + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 386 + if (amdgpu_sriov_vf(adev)) 387 + adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 3; 388 + else 389 + adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 4; 390 + } else { 388 391 adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM; 392 + } 389 393 390 394 for (i = 0; i < adev->firmware.max_ucodes; i++) { 391 395 ucode = &adev->firmware.ucode[i];
+5 -9
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
··· 955 955 struct amdgpu_device *adev = ring->adev; 956 956 uint32_t rptr = amdgpu_ring_get_rptr(ring); 957 957 unsigned i; 958 - int r; 958 + int r, timeout = adev->usec_timeout; 959 959 960 - /* TODO: remove it if VCE can work for sriov */ 960 + /* workaround VCE ring test slow issue for sriov*/ 961 961 if (amdgpu_sriov_vf(adev)) 962 - return 0; 962 + timeout *= 10; 963 963 964 964 r = amdgpu_ring_alloc(ring, 16); 965 965 if (r) { ··· 970 970 amdgpu_ring_write(ring, VCE_CMD_END); 971 971 amdgpu_ring_commit(ring); 972 972 973 - for (i = 0; i < adev->usec_timeout; i++) { 973 + for (i = 0; i < timeout; i++) { 974 974 if (amdgpu_ring_get_rptr(ring) != rptr) 975 975 break; 976 976 DRM_UDELAY(1); 977 977 } 978 978 979 - if (i < adev->usec_timeout) { 979 + if (i < timeout) { 980 980 DRM_INFO("ring test on %d succeeded in %d usecs\n", 981 981 ring->idx, i); 982 982 } else { ··· 998 998 { 999 999 struct dma_fence *fence = NULL; 1000 1000 long r; 1001 - 1002 - /* TODO: remove it if VCE can work for sriov */ 1003 - if (amdgpu_sriov_vf(ring->adev)) 1004 - return 0; 1005 1001 1006 1002 /* skip vce ring1/2 ib test for now, since it's not reliable */ 1007 1003 if (ring != &ring->adev->vce.ring[0])
+46
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
··· 225 225 226 226 return 0; 227 227 } 228 + 229 + /** 230 + * amdgpu_virt_alloc_mm_table() - alloc memory for mm table 231 + * @amdgpu: amdgpu device. 232 + * MM table is used by UVD and VCE for its initialization 233 + * Return: Zero if allocate success. 234 + */ 235 + int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev) 236 + { 237 + int r; 238 + 239 + if (!amdgpu_sriov_vf(adev) || adev->virt.mm_table.gpu_addr) 240 + return 0; 241 + 242 + r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, 243 + AMDGPU_GEM_DOMAIN_VRAM, 244 + &adev->virt.mm_table.bo, 245 + &adev->virt.mm_table.gpu_addr, 246 + (void *)&adev->virt.mm_table.cpu_addr); 247 + if (r) { 248 + DRM_ERROR("failed to alloc mm table and error = %d.\n", r); 249 + return r; 250 + } 251 + 252 + memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE); 253 + DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n", 254 + adev->virt.mm_table.gpu_addr, 255 + adev->virt.mm_table.cpu_addr); 256 + return 0; 257 + } 258 + 259 + /** 260 + * amdgpu_virt_free_mm_table() - free mm table memory 261 + * @amdgpu: amdgpu device. 262 + * Free MM table memory 263 + */ 264 + void amdgpu_virt_free_mm_table(struct amdgpu_device *adev) 265 + { 266 + if (!amdgpu_sriov_vf(adev) || !adev->virt.mm_table.gpu_addr) 267 + return; 268 + 269 + amdgpu_bo_free_kernel(&adev->virt.mm_table.bo, 270 + &adev->virt.mm_table.gpu_addr, 271 + (void *)&adev->virt.mm_table.cpu_addr); 272 + adev->virt.mm_table.gpu_addr = 0; 273 + }
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
··· 98 98 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); 99 99 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); 100 100 int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary); 101 + int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); 102 + void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); 101 103 102 104 #endif
+88 -69
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
··· 406 406 struct amdgpu_job *job) 407 407 { 408 408 struct amdgpu_device *adev = ring->adev; 409 + unsigned vmhub = ring->funcs->vmhub; 410 + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 409 411 uint64_t fence_context = adev->fence_context + ring->idx; 410 412 struct dma_fence *updates = sync->last_vm_update; 411 413 struct amdgpu_vm_id *id, *idle; ··· 415 413 unsigned i; 416 414 int r = 0; 417 415 418 - fences = kmalloc_array(sizeof(void *), adev->vm_manager.num_ids, 419 - GFP_KERNEL); 416 + fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); 420 417 if (!fences) 421 418 return -ENOMEM; 422 419 423 - mutex_lock(&adev->vm_manager.lock); 420 + mutex_lock(&id_mgr->lock); 424 421 425 422 /* Check if we have an idle VMID */ 426 423 i = 0; 427 - list_for_each_entry(idle, &adev->vm_manager.ids_lru, list) { 424 + list_for_each_entry(idle, &id_mgr->ids_lru, list) { 428 425 fences[i] = amdgpu_sync_peek_fence(&idle->active, ring); 429 426 if (!fences[i]) 430 427 break; ··· 431 430 } 432 431 433 432 /* If we can't find a idle VMID to use, wait till one becomes available */ 434 - if (&idle->list == &adev->vm_manager.ids_lru) { 433 + if (&idle->list == &id_mgr->ids_lru) { 435 434 u64 fence_context = adev->vm_manager.fence_context + ring->idx; 436 435 unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; 437 436 struct dma_fence_array *array; ··· 456 455 if (r) 457 456 goto error; 458 457 459 - mutex_unlock(&adev->vm_manager.lock); 458 + mutex_unlock(&id_mgr->lock); 460 459 return 0; 461 460 462 461 } 463 462 kfree(fences); 464 463 465 - job->vm_needs_flush = true; 464 + job->vm_needs_flush = false; 466 465 /* Check if we can use a VMID already assigned to this VM */ 467 - i = ring->idx; 468 - do { 466 + list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) { 469 467 struct dma_fence *flushed; 470 - 471 - id = vm->ids[i++]; 472 - if (i == AMDGPU_MAX_RINGS) 473 - i = 0; 468 + bool needs_flush = false; 474 469 475 470 /* Check all the prerequisites to using this VMID */ 476 - if (!id) 477 - continue; 478 471 if (amdgpu_vm_had_gpu_reset(adev, id)) 479 472 continue; 480 473 ··· 478 483 if (job->vm_pd_addr != id->pd_gpu_addr) 479 484 continue; 480 485 481 - if (!id->last_flush) 482 - continue; 483 - 484 - if (id->last_flush->context != fence_context && 485 - !dma_fence_is_signaled(id->last_flush)) 486 - continue; 486 + if (!id->last_flush || 487 + (id->last_flush->context != fence_context && 488 + !dma_fence_is_signaled(id->last_flush))) 489 + needs_flush = true; 487 490 488 491 flushed = id->flushed_updates; 489 - if (updates && 490 - (!flushed || dma_fence_is_later(updates, flushed))) 492 + if (updates && (!flushed || dma_fence_is_later(updates, flushed))) 493 + needs_flush = true; 494 + 495 + /* Concurrent flushes are only possible starting with Vega10 */ 496 + if (adev->asic_type < CHIP_VEGA10 && needs_flush) 491 497 continue; 492 498 493 499 /* Good we can use this VMID. Remember this submission as ··· 498 502 if (r) 499 503 goto error; 500 504 501 - list_move_tail(&id->list, &adev->vm_manager.ids_lru); 502 - vm->ids[ring->idx] = id; 505 + if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { 506 + dma_fence_put(id->flushed_updates); 507 + id->flushed_updates = dma_fence_get(updates); 508 + } 503 509 504 - job->vm_id = id - adev->vm_manager.ids; 505 - job->vm_needs_flush = false; 506 - trace_amdgpu_vm_grab_id(vm, ring->idx, job); 510 + if (needs_flush) 511 + goto needs_flush; 512 + else 513 + goto no_flush_needed; 507 514 508 - mutex_unlock(&adev->vm_manager.lock); 509 - return 0; 510 - 511 - } while (i != ring->idx); 515 + }; 512 516 513 517 /* Still no ID to use? Then use the idle one found earlier */ 514 518 id = idle; ··· 518 522 if (r) 519 523 goto error; 520 524 525 + id->pd_gpu_addr = job->vm_pd_addr; 526 + dma_fence_put(id->flushed_updates); 527 + id->flushed_updates = dma_fence_get(updates); 528 + id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); 529 + atomic64_set(&id->owner, vm->client_id); 530 + 531 + needs_flush: 532 + job->vm_needs_flush = true; 521 533 dma_fence_put(id->last_flush); 522 534 id->last_flush = NULL; 523 535 524 - dma_fence_put(id->flushed_updates); 525 - id->flushed_updates = dma_fence_get(updates); 536 + no_flush_needed: 537 + list_move_tail(&id->list, &id_mgr->ids_lru); 526 538 527 - id->pd_gpu_addr = job->vm_pd_addr; 528 - id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); 529 - list_move_tail(&id->list, &adev->vm_manager.ids_lru); 530 - atomic64_set(&id->owner, vm->client_id); 531 - vm->ids[ring->idx] = id; 532 - 533 - job->vm_id = id - adev->vm_manager.ids; 534 - trace_amdgpu_vm_grab_id(vm, ring->idx, job); 539 + job->vm_id = id - id_mgr->ids; 540 + trace_amdgpu_vm_grab_id(vm, ring, job); 535 541 536 542 error: 537 - mutex_unlock(&adev->vm_manager.lock); 543 + mutex_unlock(&id_mgr->lock); 538 544 return r; 539 545 } 540 546 ··· 588 590 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) 589 591 { 590 592 struct amdgpu_device *adev = ring->adev; 591 - struct amdgpu_vm_id *id = &adev->vm_manager.ids[job->vm_id]; 593 + unsigned vmhub = ring->funcs->vmhub; 594 + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 595 + struct amdgpu_vm_id *id = &id_mgr->ids[job->vm_id]; 592 596 bool gds_switch_needed = ring->funcs->emit_gds_switch && ( 593 597 id->gds_base != job->gds_base || 594 598 id->gds_size != job->gds_size || ··· 614 614 if (ring->funcs->init_cond_exec) 615 615 patch_offset = amdgpu_ring_init_cond_exec(ring); 616 616 617 - if (ring->funcs->emit_pipeline_sync) 617 + if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync) 618 618 amdgpu_ring_emit_pipeline_sync(ring); 619 619 620 620 if (ring->funcs->emit_vm_flush && vm_flush_needed) { 621 621 u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr); 622 622 struct dma_fence *fence; 623 623 624 - trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id); 624 + trace_amdgpu_vm_flush(ring, job->vm_id, pd_addr); 625 625 amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr); 626 626 627 627 r = amdgpu_fence_emit(ring, &fence); 628 628 if (r) 629 629 return r; 630 630 631 - mutex_lock(&adev->vm_manager.lock); 631 + mutex_lock(&id_mgr->lock); 632 632 dma_fence_put(id->last_flush); 633 633 id->last_flush = fence; 634 - mutex_unlock(&adev->vm_manager.lock); 634 + mutex_unlock(&id_mgr->lock); 635 635 } 636 636 637 637 if (gds_switch_needed) { ··· 666 666 * 667 667 * Reset saved GDW, GWS and OA to force switch on next flush. 668 668 */ 669 - void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id) 669 + void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, 670 + unsigned vmid) 670 671 { 671 - struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id]; 672 + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 673 + struct amdgpu_vm_id *id = &id_mgr->ids[vmid]; 672 674 673 675 id->gds_base = 0; 674 676 id->gds_size = 0; ··· 1338 1336 flags &= ~AMDGPU_PTE_MTYPE_MASK; 1339 1337 flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK); 1340 1338 1339 + if ((mapping->flags & AMDGPU_PTE_PRT) && 1340 + (adev->asic_type >= CHIP_VEGA10)) { 1341 + flags |= AMDGPU_PTE_PRT; 1342 + flags &= ~AMDGPU_PTE_VALID; 1343 + } 1344 + 1341 1345 trace_amdgpu_vm_bo_update(mapping); 1342 1346 1343 1347 pfn = mapping->offset >> PAGE_SHIFT; ··· 1637 1629 struct amdgpu_bo_va_mapping, list); 1638 1630 list_del(&mapping->list); 1639 1631 1640 - r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping, 1641 - 0, 0, &f); 1632 + r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm, 1633 + mapping->start, mapping->last, 1634 + 0, 0, &f); 1642 1635 amdgpu_vm_free_mapping(adev, vm, mapping, f); 1643 1636 if (r) { 1644 1637 dma_fence_put(f); ··· 2126 2117 unsigned ring_instance; 2127 2118 struct amdgpu_ring *ring; 2128 2119 struct amd_sched_rq *rq; 2129 - int i, r; 2120 + int r; 2130 2121 2131 - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 2132 - vm->ids[i] = NULL; 2133 2122 vm->va = RB_ROOT; 2134 2123 vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); 2135 2124 spin_lock_init(&vm->status_lock); ··· 2248 2241 */ 2249 2242 void amdgpu_vm_manager_init(struct amdgpu_device *adev) 2250 2243 { 2251 - unsigned i; 2244 + unsigned i, j; 2252 2245 2253 - INIT_LIST_HEAD(&adev->vm_manager.ids_lru); 2246 + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 2247 + struct amdgpu_vm_id_manager *id_mgr = 2248 + &adev->vm_manager.id_mgr[i]; 2254 2249 2255 - /* skip over VMID 0, since it is the system VM */ 2256 - for (i = 1; i < adev->vm_manager.num_ids; ++i) { 2257 - amdgpu_vm_reset_id(adev, i); 2258 - amdgpu_sync_create(&adev->vm_manager.ids[i].active); 2259 - list_add_tail(&adev->vm_manager.ids[i].list, 2260 - &adev->vm_manager.ids_lru); 2250 + mutex_init(&id_mgr->lock); 2251 + INIT_LIST_HEAD(&id_mgr->ids_lru); 2252 + 2253 + /* skip over VMID 0, since it is the system VM */ 2254 + for (j = 1; j < id_mgr->num_ids; ++j) { 2255 + amdgpu_vm_reset_id(adev, i, j); 2256 + amdgpu_sync_create(&id_mgr->ids[i].active); 2257 + list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); 2258 + } 2261 2259 } 2262 2260 2263 2261 adev->vm_manager.fence_context = 2264 2262 dma_fence_context_alloc(AMDGPU_MAX_RINGS); 2265 2263 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 2266 2264 adev->vm_manager.seqno[i] = 0; 2265 + 2267 2266 2268 2267 atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); 2269 2268 atomic64_set(&adev->vm_manager.client_counter, 0); ··· 2286 2273 */ 2287 2274 void amdgpu_vm_manager_fini(struct amdgpu_device *adev) 2288 2275 { 2289 - unsigned i; 2276 + unsigned i, j; 2290 2277 2291 - for (i = 0; i < AMDGPU_NUM_VM; ++i) { 2292 - struct amdgpu_vm_id *id = &adev->vm_manager.ids[i]; 2278 + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 2279 + struct amdgpu_vm_id_manager *id_mgr = 2280 + &adev->vm_manager.id_mgr[i]; 2293 2281 2294 - amdgpu_sync_free(&adev->vm_manager.ids[i].active); 2295 - dma_fence_put(id->flushed_updates); 2296 - dma_fence_put(id->last_flush); 2282 + mutex_destroy(&id_mgr->lock); 2283 + for (j = 0; j < AMDGPU_NUM_VM; ++j) { 2284 + struct amdgpu_vm_id *id = &id_mgr->ids[j]; 2285 + 2286 + amdgpu_sync_free(&id->active); 2287 + dma_fence_put(id->flushed_updates); 2288 + dma_fence_put(id->last_flush); 2289 + } 2297 2290 } 2298 2291 }
+12 -9
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
··· 65 65 66 66 #define AMDGPU_PTE_FRAG(x) ((x & 0x1fULL) << 7) 67 67 68 - #define AMDGPU_PTE_PRT (1ULL << 63) 68 + /* TILED for VEGA10, reserved for older ASICs */ 69 + #define AMDGPU_PTE_PRT (1ULL << 51) 69 70 70 71 /* VEGA10 only */ 71 72 #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) ··· 115 114 struct dma_fence *last_dir_update; 116 115 uint64_t last_eviction_counter; 117 116 118 - /* for id and flush management per ring */ 119 - struct amdgpu_vm_id *ids[AMDGPU_MAX_RINGS]; 120 - 121 117 /* protecting freed */ 122 118 spinlock_t freed_lock; 123 119 ··· 147 149 uint32_t oa_size; 148 150 }; 149 151 152 + struct amdgpu_vm_id_manager { 153 + struct mutex lock; 154 + unsigned num_ids; 155 + struct list_head ids_lru; 156 + struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; 157 + }; 158 + 150 159 struct amdgpu_vm_manager { 151 160 /* Handling of VMIDs */ 152 - struct mutex lock; 153 - unsigned num_ids; 154 - struct list_head ids_lru; 155 - struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; 161 + struct amdgpu_vm_id_manager id_mgr[AMDGPU_MAX_VMHUBS]; 156 162 157 163 /* Handling of VM fences */ 158 164 u64 fence_context; ··· 202 200 struct amdgpu_sync *sync, struct dma_fence *fence, 203 201 struct amdgpu_job *job); 204 202 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); 205 - void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id); 203 + void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, 204 + unsigned vmid); 206 205 int amdgpu_vm_update_directories(struct amdgpu_device *adev, 207 206 struct amdgpu_vm *vm); 208 207 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
+16 -14
drivers/gpu/drm/amd/amdgpu/ci_dpm.c
··· 1267 1267 1268 1268 static void ci_dpm_set_fan_control_mode(struct amdgpu_device *adev, u32 mode) 1269 1269 { 1270 - if (mode) { 1271 - /* stop auto-manage */ 1270 + switch (mode) { 1271 + case AMD_FAN_CTRL_NONE: 1272 1272 if (adev->pm.dpm.fan.ucode_fan_control) 1273 1273 ci_fan_ctrl_stop_smc_fan_control(adev); 1274 - ci_fan_ctrl_set_static_mode(adev, mode); 1275 - } else { 1276 - /* restart auto-manage */ 1274 + ci_dpm_set_fan_speed_percent(adev, 100); 1275 + break; 1276 + case AMD_FAN_CTRL_MANUAL: 1277 + if (adev->pm.dpm.fan.ucode_fan_control) 1278 + ci_fan_ctrl_stop_smc_fan_control(adev); 1279 + break; 1280 + case AMD_FAN_CTRL_AUTO: 1277 1281 if (adev->pm.dpm.fan.ucode_fan_control) 1278 1282 ci_thermal_start_smc_fan_control(adev); 1279 - else 1280 - ci_fan_ctrl_set_default_mode(adev); 1283 + break; 1284 + default: 1285 + break; 1281 1286 } 1282 1287 } 1283 1288 1284 1289 static u32 ci_dpm_get_fan_control_mode(struct amdgpu_device *adev) 1285 1290 { 1286 1291 struct ci_power_info *pi = ci_get_pi(adev); 1287 - u32 tmp; 1288 1292 1289 1293 if (pi->fan_is_controlled_by_smc) 1290 - return 0; 1291 - 1292 - tmp = RREG32_SMC(ixCG_FDO_CTRL2) & CG_FDO_CTRL2__FDO_PWM_MODE_MASK; 1293 - return (tmp >> CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT); 1294 + return AMD_FAN_CTRL_AUTO; 1295 + else 1296 + return AMD_FAN_CTRL_MANUAL; 1294 1297 } 1295 1298 1296 1299 #if 0 ··· 3039 3036 memory_clock, 3040 3037 &memory_level->MinVddcPhases); 3041 3038 3039 + memory_level->EnabledForActivity = 1; 3042 3040 memory_level->EnabledForThrottle = 1; 3043 3041 memory_level->UpH = 0; 3044 3042 memory_level->DownH = 100; ··· 3471 3467 if (ret) 3472 3468 return ret; 3473 3469 } 3474 - 3475 - pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1; 3476 3470 3477 3471 if ((dpm_table->mclk_table.count >= 2) && 3478 3472 ((adev->pdev->device == 0x67B0) || (adev->pdev->device == 0x67B1))) {
+3 -3
drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
··· 2230 2230 if (!atomic && fb && fb != crtc->primary->fb) { 2231 2231 amdgpu_fb = to_amdgpu_framebuffer(fb); 2232 2232 abo = gem_to_amdgpu_bo(amdgpu_fb->obj); 2233 - r = amdgpu_bo_reserve(abo, false); 2233 + r = amdgpu_bo_reserve(abo, true); 2234 2234 if (unlikely(r != 0)) 2235 2235 return r; 2236 2236 amdgpu_bo_unpin(abo); ··· 2589 2589 unpin: 2590 2590 if (amdgpu_crtc->cursor_bo) { 2591 2591 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2592 - ret = amdgpu_bo_reserve(aobj, false); 2592 + ret = amdgpu_bo_reserve(aobj, true); 2593 2593 if (likely(ret == 0)) { 2594 2594 amdgpu_bo_unpin(aobj); 2595 2595 amdgpu_bo_unreserve(aobj); ··· 2720 2720 2721 2721 amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); 2722 2722 abo = gem_to_amdgpu_bo(amdgpu_fb->obj); 2723 - r = amdgpu_bo_reserve(abo, false); 2723 + r = amdgpu_bo_reserve(abo, true); 2724 2724 if (unlikely(r)) 2725 2725 DRM_ERROR("failed to reserve abo before unpin\n"); 2726 2726 else {
+3 -3
drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
··· 2214 2214 if (!atomic && fb && fb != crtc->primary->fb) { 2215 2215 amdgpu_fb = to_amdgpu_framebuffer(fb); 2216 2216 abo = gem_to_amdgpu_bo(amdgpu_fb->obj); 2217 - r = amdgpu_bo_reserve(abo, false); 2217 + r = amdgpu_bo_reserve(abo, true); 2218 2218 if (unlikely(r != 0)) 2219 2219 return r; 2220 2220 amdgpu_bo_unpin(abo); ··· 2609 2609 unpin: 2610 2610 if (amdgpu_crtc->cursor_bo) { 2611 2611 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2612 - ret = amdgpu_bo_reserve(aobj, false); 2612 + ret = amdgpu_bo_reserve(aobj, true); 2613 2613 if (likely(ret == 0)) { 2614 2614 amdgpu_bo_unpin(aobj); 2615 2615 amdgpu_bo_unreserve(aobj); ··· 2740 2740 2741 2741 amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); 2742 2742 abo = gem_to_amdgpu_bo(amdgpu_fb->obj); 2743 - r = amdgpu_bo_reserve(abo, false); 2743 + r = amdgpu_bo_reserve(abo, true); 2744 2744 if (unlikely(r)) 2745 2745 DRM_ERROR("failed to reserve abo before unpin\n"); 2746 2746 else {
+9 -4
drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
··· 979 979 u32 priority_a_mark = 0, priority_b_mark = 0; 980 980 u32 priority_a_cnt = PRIORITY_OFF; 981 981 u32 priority_b_cnt = PRIORITY_OFF; 982 - u32 tmp, arb_control3; 982 + u32 tmp, arb_control3, lb_vblank_lead_lines = 0; 983 983 fixed20_12 a, b, c; 984 984 985 985 if (amdgpu_crtc->base.enabled && num_heads && mode) { ··· 1091 1091 c.full = dfixed_div(c, a); 1092 1092 priority_b_mark = dfixed_trunc(c); 1093 1093 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK; 1094 + 1095 + lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay); 1094 1096 } 1095 1097 1096 1098 /* select wm A */ ··· 1122 1120 /* save values for DPM */ 1123 1121 amdgpu_crtc->line_time = line_time; 1124 1122 amdgpu_crtc->wm_high = latency_watermark_a; 1123 + 1124 + /* Save number of lines the linebuffer leads before the scanout */ 1125 + amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; 1125 1126 } 1126 1127 1127 1128 /* watermark setup */ ··· 1645 1640 if (!atomic && fb && fb != crtc->primary->fb) { 1646 1641 amdgpu_fb = to_amdgpu_framebuffer(fb); 1647 1642 abo = gem_to_amdgpu_bo(amdgpu_fb->obj); 1648 - r = amdgpu_bo_reserve(abo, false); 1643 + r = amdgpu_bo_reserve(abo, true); 1649 1644 if (unlikely(r != 0)) 1650 1645 return r; 1651 1646 amdgpu_bo_unpin(abo); ··· 1962 1957 unpin: 1963 1958 if (amdgpu_crtc->cursor_bo) { 1964 1959 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 1965 - ret = amdgpu_bo_reserve(aobj, false); 1960 + ret = amdgpu_bo_reserve(aobj, true); 1966 1961 if (likely(ret == 0)) { 1967 1962 amdgpu_bo_unpin(aobj); 1968 1963 amdgpu_bo_unreserve(aobj); ··· 2088 2083 2089 2084 amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); 2090 2085 abo = gem_to_amdgpu_bo(amdgpu_fb->obj); 2091 - r = amdgpu_bo_reserve(abo, false); 2086 + r = amdgpu_bo_reserve(abo, true); 2092 2087 if (unlikely(r)) 2093 2088 DRM_ERROR("failed to reserve abo before unpin\n"); 2094 2089 else {
+3 -3
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
··· 2089 2089 if (!atomic && fb && fb != crtc->primary->fb) { 2090 2090 amdgpu_fb = to_amdgpu_framebuffer(fb); 2091 2091 abo = gem_to_amdgpu_bo(amdgpu_fb->obj); 2092 - r = amdgpu_bo_reserve(abo, false); 2092 + r = amdgpu_bo_reserve(abo, true); 2093 2093 if (unlikely(r != 0)) 2094 2094 return r; 2095 2095 amdgpu_bo_unpin(abo); ··· 2440 2440 unpin: 2441 2441 if (amdgpu_crtc->cursor_bo) { 2442 2442 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2443 - ret = amdgpu_bo_reserve(aobj, false); 2443 + ret = amdgpu_bo_reserve(aobj, true); 2444 2444 if (likely(ret == 0)) { 2445 2445 amdgpu_bo_unpin(aobj); 2446 2446 amdgpu_bo_unreserve(aobj); ··· 2571 2571 2572 2572 amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); 2573 2573 abo = gem_to_amdgpu_bo(amdgpu_fb->obj); 2574 - r = amdgpu_bo_reserve(abo, false); 2574 + r = amdgpu_bo_reserve(abo, true); 2575 2575 if (unlikely(r)) 2576 2576 DRM_ERROR("failed to reserve abo before unpin\n"); 2577 2577 else {
+1 -1
drivers/gpu/drm/amd/amdgpu/dce_virtual.c
··· 248 248 249 249 amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); 250 250 abo = gem_to_amdgpu_bo(amdgpu_fb->obj); 251 - r = amdgpu_bo_reserve(abo, false); 251 + r = amdgpu_bo_reserve(abo, true); 252 252 if (unlikely(r)) 253 253 DRM_ERROR("failed to reserve abo before unpin\n"); 254 254 else {
+5 -5
drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
··· 1579 1579 1580 1580 static void gfx_v6_0_config_init(struct amdgpu_device *adev) 1581 1581 { 1582 - adev->gfx.config.double_offchip_lds_buf = 1; 1582 + adev->gfx.config.double_offchip_lds_buf = 0; 1583 1583 } 1584 1584 1585 1585 static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) ··· 2437 2437 int r; 2438 2438 2439 2439 if (adev->gfx.rlc.save_restore_obj) { 2440 - r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false); 2440 + r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, true); 2441 2441 if (unlikely(r != 0)) 2442 2442 dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r); 2443 2443 amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj); ··· 2448 2448 } 2449 2449 2450 2450 if (adev->gfx.rlc.clear_state_obj) { 2451 - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 2451 + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 2452 2452 if (unlikely(r != 0)) 2453 2453 dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r); 2454 2454 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); ··· 2459 2459 } 2460 2460 2461 2461 if (adev->gfx.rlc.cp_table_obj) { 2462 - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 2462 + r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true); 2463 2463 if (unlikely(r != 0)) 2464 2464 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 2465 2465 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); ··· 3292 3292 ring->me = 1; 3293 3293 ring->pipe = i; 3294 3294 ring->queue = i; 3295 - sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue); 3295 + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 3296 3296 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; 3297 3297 r = amdgpu_ring_init(adev, ring, 1024, 3298 3298 &adev->gfx.eop_irq, irq_type);
+6 -6
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
··· 1935 1935 INDEX_STRIDE, 3); 1936 1936 1937 1937 mutex_lock(&adev->srbm_mutex); 1938 - for (i = 0; i < adev->vm_manager.num_ids; i++) { 1938 + for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 1939 1939 if (i == 0) 1940 1940 sh_mem_base = 0; 1941 1941 else ··· 2792 2792 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 2793 2793 2794 2794 if (ring->mqd_obj) { 2795 - r = amdgpu_bo_reserve(ring->mqd_obj, false); 2795 + r = amdgpu_bo_reserve(ring->mqd_obj, true); 2796 2796 if (unlikely(r != 0)) 2797 2797 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); 2798 2798 ··· 2810 2810 int r; 2811 2811 2812 2812 if (adev->gfx.mec.hpd_eop_obj) { 2813 - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 2813 + r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); 2814 2814 if (unlikely(r != 0)) 2815 2815 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 2816 2816 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); ··· 3359 3359 3360 3360 /* save restore block */ 3361 3361 if (adev->gfx.rlc.save_restore_obj) { 3362 - r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false); 3362 + r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, true); 3363 3363 if (unlikely(r != 0)) 3364 3364 dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r); 3365 3365 amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj); ··· 3371 3371 3372 3372 /* clear state block */ 3373 3373 if (adev->gfx.rlc.clear_state_obj) { 3374 - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 3374 + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 3375 3375 if (unlikely(r != 0)) 3376 3376 dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r); 3377 3377 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); ··· 3383 3383 3384 3384 /* clear state block */ 3385 3385 if (adev->gfx.rlc.cp_table_obj) { 3386 - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 3386 + r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true); 3387 3387 if (unlikely(r != 0)) 3388 3388 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 3389 3389 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
+17 -5
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
··· 1239 1239 1240 1240 /* clear state block */ 1241 1241 if (adev->gfx.rlc.clear_state_obj) { 1242 - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1242 + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 1243 1243 if (unlikely(r != 0)) 1244 1244 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r); 1245 1245 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); ··· 1250 1250 1251 1251 /* jump table block */ 1252 1252 if (adev->gfx.rlc.cp_table_obj) { 1253 - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1253 + r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true); 1254 1254 if (unlikely(r != 0)) 1255 1255 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1256 1256 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); ··· 1363 1363 int r; 1364 1364 1365 1365 if (adev->gfx.mec.hpd_eop_obj) { 1366 - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1366 + r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); 1367 1367 if (unlikely(r != 0)) 1368 1368 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 1369 1369 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); ··· 1490 1490 1491 1491 memset(hpd, 0, MEC_HPD_SIZE); 1492 1492 1493 - r = amdgpu_bo_reserve(kiq->eop_obj, false); 1493 + r = amdgpu_bo_reserve(kiq->eop_obj, true); 1494 1494 if (unlikely(r != 0)) 1495 1495 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); 1496 1496 amdgpu_bo_kunmap(kiq->eop_obj); ··· 1932 1932 case 0xca: 1933 1933 case 0xce: 1934 1934 case 0x88: 1935 + case 0xe6: 1935 1936 /* B6 */ 1936 1937 adev->gfx.config.max_cu_per_sh = 6; 1937 1938 break; ··· 1965 1964 adev->gfx.config.max_backends_per_se = 1; 1966 1965 1967 1966 switch (adev->pdev->revision) { 1967 + case 0x80: 1968 + case 0x81: 1968 1969 case 0xc0: 1969 1970 case 0xc1: 1970 1971 case 0xc2: 1971 1972 case 0xc4: 1972 1973 case 0xc8: 1973 1974 case 0xc9: 1975 + case 0xd6: 1976 + case 0xda: 1977 + case 0xe9: 1978 + case 0xea: 1974 1979 adev->gfx.config.max_cu_per_sh = 3; 1975 1980 break; 1981 + case 0x83: 1976 1982 case 0xd0: 1977 1983 case 0xd1: 1978 1984 case 0xd2: 1985 + case 0xd4: 1986 + case 0xdb: 1987 + case 0xe1: 1988 + case 0xe2: 1979 1989 default: 1980 1990 adev->gfx.config.max_cu_per_sh = 2; 1981 1991 break; ··· 3902 3890 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3903 3891 INDEX_STRIDE, 3); 3904 3892 mutex_lock(&adev->srbm_mutex); 3905 - for (i = 0; i < adev->vm_manager.num_ids; i++) { 3893 + for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3906 3894 vi_srbm_select(adev, 0, 0, 0, i); 3907 3895 /* CP and shaders */ 3908 3896 if (i == 0) {
+260 -257
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
··· 39 39 40 40 #define GFX9_NUM_GFX_RINGS 1 41 41 #define GFX9_NUM_COMPUTE_RINGS 8 42 - #define GFX9_NUM_SE 4 43 42 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000 44 43 45 44 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); ··· 452 453 int r; 453 454 454 455 if (adev->gfx.mec.hpd_eop_obj) { 455 - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 456 + r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); 456 457 if (unlikely(r != 0)) 457 458 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 458 459 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); ··· 462 463 adev->gfx.mec.hpd_eop_obj = NULL; 463 464 } 464 465 if (adev->gfx.mec.mec_fw_obj) { 465 - r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false); 466 + r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, true); 466 467 if (unlikely(r != 0)) 467 468 dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r); 468 469 amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj); ··· 598 599 599 600 memset(hpd, 0, MEC_HPD_SIZE); 600 601 601 - r = amdgpu_bo_reserve(kiq->eop_obj, false); 602 + r = amdgpu_bo_reserve(kiq->eop_obj, true); 602 603 if (unlikely(r != 0)) 603 604 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); 604 605 amdgpu_bo_kunmap(kiq->eop_obj); ··· 630 631 ring->pipe = 1; 631 632 } 632 633 633 - irq->data = ring; 634 634 ring->queue = 0; 635 635 ring->eop_gpu_addr = kiq->eop_gpu_addr; 636 636 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); ··· 645 647 { 646 648 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); 647 649 amdgpu_ring_fini(ring); 648 - irq->data = NULL; 649 650 } 650 651 651 652 /* create MQD for each compute queue */ ··· 702 705 703 706 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 704 707 { 705 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX), 708 + WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 706 709 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 707 710 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 708 711 (address << SQ_IND_INDEX__INDEX__SHIFT) | 709 712 (SQ_IND_INDEX__FORCE_READ_MASK)); 710 - return RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA)); 713 + return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 711 714 } 712 715 713 716 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 714 717 uint32_t wave, uint32_t thread, 715 718 uint32_t regno, uint32_t num, uint32_t *out) 716 719 { 717 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX), 720 + WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 718 721 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 719 722 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 720 723 (regno << SQ_IND_INDEX__INDEX__SHIFT) | ··· 722 725 (SQ_IND_INDEX__FORCE_READ_MASK) | 723 726 (SQ_IND_INDEX__AUTO_INCR_MASK)); 724 727 while (num--) 725 - *(out++) = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA)); 728 + *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 726 729 } 727 730 728 731 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) ··· 771 774 switch (adev->asic_type) { 772 775 case CHIP_VEGA10: 773 776 adev->gfx.config.max_shader_engines = 4; 774 - adev->gfx.config.max_tile_pipes = 8; //?? 775 777 adev->gfx.config.max_cu_per_sh = 16; 776 778 adev->gfx.config.max_sh_per_se = 1; 777 779 adev->gfx.config.max_backends_per_se = 4; ··· 783 787 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 784 788 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 785 789 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 790 + adev->gfx.config.gs_vgt_table_depth = 32; 791 + adev->gfx.config.gs_prim_buffer_depth = 1792; 786 792 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 787 793 break; 788 794 default: ··· 799 801 adev->gfx.config.gb_addr_config, 800 802 GB_ADDR_CONFIG, 801 803 NUM_PIPES); 804 + 805 + adev->gfx.config.max_tile_pipes = 806 + adev->gfx.config.gb_addr_config_fields.num_pipes; 807 + 802 808 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 803 809 REG_GET_FIELD( 804 810 adev->gfx.config.gb_addr_config, ··· 843 841 } 844 842 size_se = size_se ? size_se : default_size_se; 845 843 846 - ngg_buf->size = size_se * GFX9_NUM_SE; 844 + ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; 847 845 r = amdgpu_bo_create_kernel(adev, ngg_buf->size, 848 846 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 849 847 &ngg_buf->bo, ··· 890 888 adev->gfx.ngg.gds_reserve_addr += adev->gds.mem.gfx_partition_size; 891 889 892 890 /* Primitive Buffer */ 893 - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PRIM], 891 + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], 894 892 amdgpu_prim_buf_per_se, 895 893 64 * 1024); 896 894 if (r) { ··· 899 897 } 900 898 901 899 /* Position Buffer */ 902 - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[POS], 900 + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], 903 901 amdgpu_pos_buf_per_se, 904 902 256 * 1024); 905 903 if (r) { ··· 908 906 } 909 907 910 908 /* Control Sideband */ 911 - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[CNTL], 909 + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], 912 910 amdgpu_cntl_sb_buf_per_se, 913 911 256); 914 912 if (r) { ··· 920 918 if (amdgpu_param_buf_per_se <= 0) 921 919 goto out; 922 920 923 - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PARAM], 921 + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], 924 922 amdgpu_param_buf_per_se, 925 923 512 * 1024); 926 924 if (r) { ··· 949 947 950 948 /* Program buffer size */ 951 949 data = 0; 952 - size = adev->gfx.ngg.buf[PRIM].size / 256; 950 + size = adev->gfx.ngg.buf[NGG_PRIM].size / 256; 953 951 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, size); 954 952 955 - size = adev->gfx.ngg.buf[POS].size / 256; 953 + size = adev->gfx.ngg.buf[NGG_POS].size / 256; 956 954 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, size); 957 955 958 - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_1), data); 956 + WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 959 957 960 958 data = 0; 961 - size = adev->gfx.ngg.buf[CNTL].size / 256; 959 + size = adev->gfx.ngg.buf[NGG_CNTL].size / 256; 962 960 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, size); 963 961 964 - size = adev->gfx.ngg.buf[PARAM].size / 1024; 962 + size = adev->gfx.ngg.buf[NGG_PARAM].size / 1024; 965 963 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, size); 966 964 967 - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_2), data); 965 + WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 968 966 969 967 /* Program buffer base address */ 970 - base = lower_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr); 968 + base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 971 969 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); 972 - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE), data); 970 + WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); 973 971 974 - base = upper_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr); 972 + base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 975 973 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); 976 - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE_HI), data); 974 + WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); 977 975 978 - base = lower_32_bits(adev->gfx.ngg.buf[POS].gpu_addr); 976 + base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 979 977 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); 980 - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE), data); 978 + WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); 981 979 982 - base = upper_32_bits(adev->gfx.ngg.buf[POS].gpu_addr); 980 + base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 983 981 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); 984 - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE_HI), data); 982 + WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); 985 983 986 - base = lower_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr); 984 + base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 987 985 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); 988 - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE), data); 986 + WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); 989 987 990 - base = upper_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr); 988 + base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 991 989 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); 992 - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI), data); 990 + WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); 993 991 994 992 /* Clear GDS reserved memory */ 995 993 r = amdgpu_ring_alloc(ring, 17); ··· 1098 1096 ring->pipe = i / 8; 1099 1097 ring->queue = i % 8; 1100 1098 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 1101 - sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue); 1099 + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1102 1100 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; 1103 1101 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1104 1102 r = amdgpu_ring_init(adev, ring, 1024, ··· 1205 1203 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 1206 1204 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1207 1205 } 1208 - WREG32( SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); 1206 + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); 1209 1207 } 1210 1208 1211 1209 static u32 gfx_v9_0_create_bitmask(u32 bit_width) ··· 1217 1215 { 1218 1216 u32 data, mask; 1219 1217 1220 - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_RB_BACKEND_DISABLE)); 1221 - data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_RB_BACKEND_DISABLE)); 1218 + data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 1219 + data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 1222 1220 1223 1221 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1224 1222 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; ··· 1278 1276 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1279 1277 soc15_grbm_select(adev, 0, 0, 0, i); 1280 1278 /* CP and shaders */ 1281 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); 1282 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); 1279 + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 1280 + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 1283 1281 } 1284 1282 soc15_grbm_select(adev, 0, 0, 0, 0); 1285 1283 mutex_unlock(&adev->srbm_mutex); ··· 1306 1304 tmp = 0; 1307 1305 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 1308 1306 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1309 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), tmp); 1310 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), 0); 1307 + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); 1308 + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0); 1311 1309 } 1312 1310 soc15_grbm_select(adev, 0, 0, 0, 0); 1313 1311 ··· 1322 1320 */ 1323 1321 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1324 1322 1325 - WREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FIFO_SIZE), 1323 + WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE, 1326 1324 (adev->gfx.config.sc_prim_fifo_size_frontend << 1327 1325 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 1328 1326 (adev->gfx.config.sc_prim_fifo_size_backend << ··· 1345 1343 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1346 1344 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1347 1345 for (k = 0; k < adev->usec_timeout; k++) { 1348 - if (RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY)) == 0) 1346 + if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 1349 1347 break; 1350 1348 udelay(1); 1351 1349 } ··· 1359 1357 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 1360 1358 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 1361 1359 for (k = 0; k < adev->usec_timeout; k++) { 1362 - if ((RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY)) & mask) == 0) 1360 + if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 1363 1361 break; 1364 1362 udelay(1); 1365 1363 } ··· 1368 1366 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 1369 1367 bool enable) 1370 1368 { 1371 - u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0)); 1369 + u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 1372 1370 1373 1371 if (enable) 1374 1372 return; ··· 1378 1376 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 1379 1377 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 1380 1378 1381 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), tmp); 1379 + WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 1382 1380 } 1383 1381 1384 1382 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 1385 1383 { 1386 - u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)); 1384 + u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); 1387 1385 1388 1386 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 1389 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL), tmp); 1387 + WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp); 1390 1388 1391 1389 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 1392 1390 ··· 1417 1415 1418 1416 #ifdef AMDGPU_RLC_DEBUG_RETRY 1419 1417 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 1420 - rlc_ucode_ver = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_6)); 1418 + rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 1421 1419 if(rlc_ucode_ver == 0x108) { 1422 1420 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 1423 1421 rlc_ucode_ver, adev->gfx.rlc_fw_version); 1424 1422 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 1425 1423 * default is 0x9C4 to create a 100us interval */ 1426 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_TIMER_INT_3), 0x9C4); 1424 + WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 1427 1425 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 1428 1426 * to disable the page fault retry interrupts, default is 1429 1427 * 0x100 (256) */ 1430 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_12), 0x100); 1428 + WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 1431 1429 } 1432 1430 #endif 1433 1431 } ··· 1448 1446 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1449 1447 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 1450 1448 1451 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR), 1449 + WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 1452 1450 RLCG_UCODE_LOADING_START_ADDRESS); 1453 1451 for (i = 0; i < fw_size; i++) 1454 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA), le32_to_cpup(fw_data++)); 1455 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR), adev->gfx.rlc_fw_version); 1452 + WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 1453 + WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 1456 1454 1457 1455 return 0; 1458 1456 } ··· 1467 1465 gfx_v9_0_rlc_stop(adev); 1468 1466 1469 1467 /* disable CG */ 1470 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), 0); 1468 + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 1471 1469 1472 1470 /* disable PG */ 1473 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), 0); 1471 + WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0); 1474 1472 1475 1473 gfx_v9_0_rlc_reset(adev); 1476 1474 ··· 1489 1487 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 1490 1488 { 1491 1489 int i; 1492 - u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL)); 1490 + u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 1493 1491 1494 1492 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 1495 1493 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); ··· 1498 1496 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1499 1497 adev->gfx.gfx_ring[i].ready = false; 1500 1498 } 1501 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL), tmp); 1499 + WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); 1502 1500 udelay(50); 1503 1501 } 1504 1502 ··· 1531 1529 (adev->gfx.pfp_fw->data + 1532 1530 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 1533 1531 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 1534 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), 0); 1532 + WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 1535 1533 for (i = 0; i < fw_size; i++) 1536 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA), le32_to_cpup(fw_data++)); 1537 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), adev->gfx.pfp_fw_version); 1534 + WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 1535 + WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 1538 1536 1539 1537 /* CE */ 1540 1538 fw_data = (const __le32 *) 1541 1539 (adev->gfx.ce_fw->data + 1542 1540 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 1543 1541 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 1544 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), 0); 1542 + WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 1545 1543 for (i = 0; i < fw_size; i++) 1546 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA), le32_to_cpup(fw_data++)); 1547 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), adev->gfx.ce_fw_version); 1544 + WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 1545 + WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 1548 1546 1549 1547 /* ME */ 1550 1548 fw_data = (const __le32 *) 1551 1549 (adev->gfx.me_fw->data + 1552 1550 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 1553 1551 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 1554 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), 0); 1552 + WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 1555 1553 for (i = 0; i < fw_size; i++) 1556 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_DATA), le32_to_cpup(fw_data++)); 1557 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), adev->gfx.me_fw_version); 1554 + WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 1555 + WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 1558 1556 1559 1557 return 0; 1560 1558 } ··· 1596 1594 int r, i; 1597 1595 1598 1596 /* init the CP */ 1599 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MAX_CONTEXT), adev->gfx.config.max_hw_contexts - 1); 1600 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_DEVICE_ID), 1); 1597 + WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 1598 + WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 1601 1599 1602 1600 gfx_v9_0_cp_gfx_enable(adev, true); 1603 1601 ··· 1652 1650 u64 rb_addr, rptr_addr, wptr_gpu_addr; 1653 1651 1654 1652 /* Set the write pointer delay */ 1655 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_DELAY), 0); 1653 + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 1656 1654 1657 1655 /* set the RB to use vmid 0 */ 1658 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_VMID), 0); 1656 + WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 1659 1657 1660 1658 /* Set ring buffer size */ 1661 1659 ring = &adev->gfx.gfx_ring[0]; ··· 1665 1663 #ifdef __BIG_ENDIAN 1666 1664 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 1667 1665 #endif 1668 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp); 1666 + WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 1669 1667 1670 1668 /* Initialize the ring buffer's write pointers */ 1671 1669 ring->wptr = 0; 1672 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr)); 1673 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr)); 1670 + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 1671 + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 1674 1672 1675 1673 /* set the wb address wether it's enabled or not */ 1676 1674 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 1677 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR), lower_32_bits(rptr_addr)); 1678 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR_HI), upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 1675 + WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 1676 + WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 1679 1677 1680 1678 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 1681 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO), lower_32_bits(wptr_gpu_addr)); 1682 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI), upper_32_bits(wptr_gpu_addr)); 1679 + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 1680 + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 1683 1681 1684 1682 mdelay(1); 1685 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp); 1683 + WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 1686 1684 1687 1685 rb_addr = ring->gpu_addr >> 8; 1688 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE), rb_addr); 1689 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE_HI), upper_32_bits(rb_addr)); 1686 + WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 1687 + WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 1690 1688 1691 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL)); 1689 + tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 1692 1690 if (ring->use_doorbell) { 1693 1691 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 1694 1692 DOORBELL_OFFSET, ring->doorbell_index); ··· 1697 1695 } else { 1698 1696 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 1699 1697 } 1700 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL), tmp); 1698 + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 1701 1699 1702 1700 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 1703 1701 DOORBELL_RANGE_LOWER, ring->doorbell_index); 1704 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER), tmp); 1702 + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 1705 1703 1706 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER), 1704 + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 1707 1705 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 1708 1706 1709 1707 ··· 1719 1717 int i; 1720 1718 1721 1719 if (enable) { 1722 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL), 0); 1720 + WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); 1723 1721 } else { 1724 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL), 1722 + WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 1725 1723 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 1726 1724 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1727 1725 adev->gfx.compute_ring[i].ready = false; ··· 1758 1756 tmp = 0; 1759 1757 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 1760 1758 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 1761 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_CNTL), tmp); 1759 + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 1762 1760 1763 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_LO), 1761 + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 1764 1762 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 1765 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_HI), 1763 + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 1766 1764 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 1767 1765 1768 1766 /* MEC1 */ 1769 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR), 1767 + WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 1770 1768 mec_hdr->jt_offset); 1771 1769 for (i = 0; i < mec_hdr->jt_size; i++) 1772 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA), 1770 + WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 1773 1771 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 1774 1772 1775 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR), 1773 + WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 1776 1774 adev->gfx.mec_fw_version); 1777 1775 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 1778 1776 ··· 1787 1785 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 1788 1786 1789 1787 if (ring->mqd_obj) { 1790 - r = amdgpu_bo_reserve(ring->mqd_obj, false); 1788 + r = amdgpu_bo_reserve(ring->mqd_obj, true); 1791 1789 if (unlikely(r != 0)) 1792 1790 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); 1793 1791 ··· 1825 1823 struct amdgpu_device *adev = ring->adev; 1826 1824 1827 1825 /* tell RLC which is KIQ queue */ 1828 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); 1826 + tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 1829 1827 tmp &= 0xffffff00; 1830 1828 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 1831 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), tmp); 1829 + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 1832 1830 tmp |= 0x80; 1833 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), tmp); 1831 + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 1834 1832 } 1835 1833 1836 1834 static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring) ··· 1900 1898 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 1901 1899 1902 1900 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 1903 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL)); 1901 + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 1904 1902 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 1905 1903 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 1906 1904 1907 1905 mqd->cp_hqd_eop_control = tmp; 1908 1906 1909 1907 /* enable doorbell? */ 1910 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL)); 1908 + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 1911 1909 1912 1910 if (ring->use_doorbell) { 1913 1911 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, ··· 1937 1935 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 1938 1936 1939 1937 /* set MQD vmid to 0 */ 1940 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL)); 1938 + tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 1941 1939 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 1942 1940 mqd->cp_mqd_control = tmp; 1943 1941 ··· 1947 1945 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 1948 1946 1949 1947 /* set up the HQD, this is similar to CP_RB0_CNTL */ 1950 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL)); 1948 + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 1951 1949 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 1952 1950 (order_base_2(ring->ring_size / 4) - 1)); 1953 1951 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, ··· 1975 1973 tmp = 0; 1976 1974 /* enable the doorbell if requested */ 1977 1975 if (ring->use_doorbell) { 1978 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL)); 1976 + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 1979 1977 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1980 1978 DOORBELL_OFFSET, ring->doorbell_index); 1981 1979 ··· 1991 1989 1992 1990 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 1993 1991 ring->wptr = 0; 1994 - mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 1992 + mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 1995 1993 1996 1994 /* set the vmid for the queue */ 1997 1995 mqd->cp_hqd_vmid = 0; 1998 1996 1999 - tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 1997 + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 2000 1998 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 2001 1999 mqd->cp_hqd_persistent_state = tmp; 2000 + 2001 + /* set MIN_IB_AVAIL_SIZE */ 2002 + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 2003 + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 2004 + mqd->cp_hqd_ib_control = tmp; 2002 2005 2003 2006 /* activate the queue */ 2004 2007 mqd->cp_hqd_active = 1; ··· 2020 2013 /* disable wptr polling */ 2021 2014 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 2022 2015 2023 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR), 2016 + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 2024 2017 mqd->cp_hqd_eop_base_addr_lo); 2025 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), 2018 + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 2026 2019 mqd->cp_hqd_eop_base_addr_hi); 2027 2020 2028 2021 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2029 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL), 2022 + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, 2030 2023 mqd->cp_hqd_eop_control); 2031 2024 2032 2025 /* enable doorbell? */ 2033 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), 2026 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 2034 2027 mqd->cp_hqd_pq_doorbell_control); 2035 2028 2036 2029 /* disable the queue if it's active */ 2037 - if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) { 2038 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1); 2030 + if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 2031 + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 2039 2032 for (j = 0; j < adev->usec_timeout; j++) { 2040 - if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1)) 2033 + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 2041 2034 break; 2042 2035 udelay(1); 2043 2036 } 2044 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 2037 + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 2045 2038 mqd->cp_hqd_dequeue_request); 2046 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR), 2039 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 2047 2040 mqd->cp_hqd_pq_rptr); 2048 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), 2041 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 2049 2042 mqd->cp_hqd_pq_wptr_lo); 2050 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), 2043 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 2051 2044 mqd->cp_hqd_pq_wptr_hi); 2052 2045 } 2053 2046 2054 2047 /* set the pointer to the MQD */ 2055 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR), 2048 + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, 2056 2049 mqd->cp_mqd_base_addr_lo); 2057 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI), 2050 + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, 2058 2051 mqd->cp_mqd_base_addr_hi); 2059 2052 2060 2053 /* set MQD vmid to 0 */ 2061 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL), 2054 + WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 2062 2055 mqd->cp_mqd_control); 2063 2056 2064 2057 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 2065 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE), 2058 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, 2066 2059 mqd->cp_hqd_pq_base_lo); 2067 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI), 2060 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, 2068 2061 mqd->cp_hqd_pq_base_hi); 2069 2062 2070 2063 /* set up the HQD, this is similar to CP_RB0_CNTL */ 2071 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL), 2064 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, 2072 2065 mqd->cp_hqd_pq_control); 2073 2066 2074 2067 /* set the wb address whether it's enabled or not */ 2075 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR), 2068 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 2076 2069 mqd->cp_hqd_pq_rptr_report_addr_lo); 2077 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI), 2070 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 2078 2071 mqd->cp_hqd_pq_rptr_report_addr_hi); 2079 2072 2080 2073 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 2081 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), 2074 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 2082 2075 mqd->cp_hqd_pq_wptr_poll_addr_lo); 2083 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 2076 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 2084 2077 mqd->cp_hqd_pq_wptr_poll_addr_hi); 2085 2078 2086 2079 /* enable the doorbell if requested */ 2087 2080 if (ring->use_doorbell) { 2088 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER), 2081 + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 2089 2082 (AMDGPU_DOORBELL64_KIQ *2) << 2); 2090 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER), 2083 + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 2091 2084 (AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2); 2092 2085 } 2093 2086 2094 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), 2087 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 2095 2088 mqd->cp_hqd_pq_doorbell_control); 2096 2089 2097 2090 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 2098 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), 2091 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 2099 2092 mqd->cp_hqd_pq_wptr_lo); 2100 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), 2093 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 2101 2094 mqd->cp_hqd_pq_wptr_hi); 2102 2095 2103 2096 /* set the vmid for the queue */ 2104 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid); 2097 + WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 2105 2098 2106 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE), 2099 + WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 2107 2100 mqd->cp_hqd_persistent_state); 2108 2101 2109 2102 /* activate the queue */ 2110 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), 2103 + WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 2111 2104 mqd->cp_hqd_active); 2112 2105 2113 2106 if (ring->use_doorbell) ··· 2330 2323 { 2331 2324 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2332 2325 2333 - if (REG_GET_FIELD(RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)), 2326 + if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 2334 2327 GRBM_STATUS, GUI_ACTIVE)) 2335 2328 return false; 2336 2329 else ··· 2345 2338 2346 2339 for (i = 0; i < adev->usec_timeout; i++) { 2347 2340 /* read MC_STATUS */ 2348 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)) & 2341 + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) & 2349 2342 GRBM_STATUS__GUI_ACTIVE_MASK; 2350 2343 2351 2344 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) ··· 2362 2355 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2363 2356 2364 2357 /* GRBM_STATUS */ 2365 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)); 2358 + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 2366 2359 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 2367 2360 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 2368 2361 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | ··· 2381 2374 } 2382 2375 2383 2376 /* GRBM_STATUS2 */ 2384 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)); 2377 + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 2385 2378 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 2386 2379 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 2387 2380 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); ··· 2398 2391 gfx_v9_0_cp_compute_enable(adev, false); 2399 2392 2400 2393 if (grbm_soft_reset) { 2401 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET)); 2394 + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 2402 2395 tmp |= grbm_soft_reset; 2403 2396 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 2404 - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp); 2405 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET)); 2397 + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 2398 + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 2406 2399 2407 2400 udelay(50); 2408 2401 2409 2402 tmp &= ~grbm_soft_reset; 2410 - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp); 2411 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET)); 2403 + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 2404 + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 2412 2405 } 2413 2406 2414 2407 /* Wait a little for things to settle down */ ··· 2422 2415 uint64_t clock; 2423 2416 2424 2417 mutex_lock(&adev->gfx.gpu_clock_mutex); 2425 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT), 1); 2426 - clock = (uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB)) | 2427 - ((uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB)) << 32ULL); 2418 + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 2419 + clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 2420 + ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 2428 2421 mutex_unlock(&adev->gfx.gpu_clock_mutex); 2429 2422 return clock; 2430 2423 } ··· 2504 2497 return; 2505 2498 2506 2499 /* if RLC is not enabled, do nothing */ 2507 - rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)); 2500 + rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 2508 2501 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 2509 2502 return; 2510 2503 ··· 2513 2506 AMD_CG_SUPPORT_GFX_3D_CGCG)) { 2514 2507 data = RLC_SAFE_MODE__CMD_MASK; 2515 2508 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 2516 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data); 2509 + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 2517 2510 2518 2511 /* wait for RLC_SAFE_MODE */ 2519 2512 for (i = 0; i < adev->usec_timeout; i++) { ··· 2533 2526 return; 2534 2527 2535 2528 /* if RLC is not enabled, do nothing */ 2536 - rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)); 2529 + rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 2537 2530 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 2538 2531 return; 2539 2532 ··· 2544 2537 * mode. 2545 2538 */ 2546 2539 data = RLC_SAFE_MODE__CMD_MASK; 2547 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data); 2540 + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 2548 2541 adev->gfx.rlc.in_safe_mode = false; 2549 2542 } 2550 2543 } ··· 2557 2550 /* It is disabled by HW by default */ 2558 2551 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 2559 2552 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 2560 - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 2553 + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 2561 2554 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | 2562 2555 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 2563 2556 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | ··· 2567 2560 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 2568 2561 2569 2562 if (def != data) 2570 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); 2563 + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 2571 2564 2572 2565 /* MGLS is a global flag to control all MGLS in GFX */ 2573 2566 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 2574 2567 /* 2 - RLC memory Light sleep */ 2575 2568 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 2576 - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 2569 + def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 2577 2570 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 2578 2571 if (def != data) 2579 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data); 2572 + WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 2580 2573 } 2581 2574 /* 3 - CP memory Light sleep */ 2582 2575 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 2583 - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 2576 + def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 2584 2577 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 2585 2578 if (def != data) 2586 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data); 2579 + WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 2587 2580 } 2588 2581 } 2589 2582 } else { 2590 2583 /* 1 - MGCG_OVERRIDE */ 2591 - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 2584 + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 2592 2585 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | 2593 2586 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 2594 2587 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 2595 2588 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 2596 2589 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 2597 2590 if (def != data) 2598 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); 2591 + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 2599 2592 2600 2593 /* 2 - disable MGLS in RLC */ 2601 - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 2594 + data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 2602 2595 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 2603 2596 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 2604 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data); 2597 + WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 2605 2598 } 2606 2599 2607 2600 /* 3 - disable MGLS in CP */ 2608 - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 2601 + data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 2609 2602 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 2610 2603 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 2611 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data); 2604 + WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 2612 2605 } 2613 2606 } 2614 2607 } ··· 2623 2616 /* Enable 3D CGCG/CGLS */ 2624 2617 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 2625 2618 /* write cmd to clear cgcg/cgls ov */ 2626 - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 2619 + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 2627 2620 /* unset CGCG override */ 2628 2621 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 2629 2622 /* update CGCG and CGLS override bits */ 2630 2623 if (def != data) 2631 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); 2624 + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 2632 2625 /* enable 3Dcgcg FSM(0x0020003f) */ 2633 - def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 2626 + def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 2634 2627 data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 2635 2628 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 2636 2629 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 2637 2630 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 2638 2631 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 2639 2632 if (def != data) 2640 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data); 2633 + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 2641 2634 2642 2635 /* set IDLE_POLL_COUNT(0x00900100) */ 2643 - def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2636 + def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 2644 2637 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 2645 2638 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2646 2639 if (def != data) 2647 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2640 + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 2648 2641 } else { 2649 2642 /* Disable CGCG/CGLS */ 2650 - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 2643 + def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 2651 2644 /* disable cgcg, cgls should be disabled */ 2652 2645 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 2653 2646 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 2654 2647 /* disable cgcg and cgls in FSM */ 2655 2648 if (def != data) 2656 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data); 2649 + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 2657 2650 } 2658 2651 2659 2652 adev->gfx.rlc.funcs->exit_safe_mode(adev); ··· 2667 2660 adev->gfx.rlc.funcs->enter_safe_mode(adev); 2668 2661 2669 2662 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 2670 - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 2663 + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 2671 2664 /* unset CGCG override */ 2672 2665 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 2673 2666 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) ··· 2676 2669 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 2677 2670 /* update CGCG and CGLS override bits */ 2678 2671 if (def != data) 2679 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); 2672 + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 2680 2673 2681 2674 /* enable cgcg FSM(0x0020003F) */ 2682 - def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 2675 + def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 2683 2676 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 2684 2677 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 2685 2678 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 2686 2679 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 2687 2680 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 2688 2681 if (def != data) 2689 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data); 2682 + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 2690 2683 2691 2684 /* set IDLE_POLL_COUNT(0x00900100) */ 2692 - def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2685 + def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 2693 2686 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 2694 2687 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2695 2688 if (def != data) 2696 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2689 + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 2697 2690 } else { 2698 - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 2691 + def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 2699 2692 /* reset CGCG/CGLS bits */ 2700 2693 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 2701 2694 /* disable cgcg and cgls in FSM */ 2702 2695 if (def != data) 2703 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data); 2696 + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 2704 2697 } 2705 2698 2706 2699 adev->gfx.rlc.funcs->exit_safe_mode(adev); ··· 2747 2740 { 2748 2741 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2749 2742 2743 + if (amdgpu_sriov_vf(adev)) 2744 + return 0; 2745 + 2750 2746 switch (adev->asic_type) { 2751 2747 case CHIP_VEGA10: 2752 2748 gfx_v9_0_update_gfx_clock_gating(adev, ··· 2770 2760 *flags = 0; 2771 2761 2772 2762 /* AMD_CG_SUPPORT_GFX_MGCG */ 2773 - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 2763 + data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 2774 2764 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 2775 2765 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 2776 2766 2777 2767 /* AMD_CG_SUPPORT_GFX_CGCG */ 2778 - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 2768 + data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 2779 2769 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 2780 2770 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 2781 2771 ··· 2784 2774 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 2785 2775 2786 2776 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 2787 - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 2777 + data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 2788 2778 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 2789 2779 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 2790 2780 2791 2781 /* AMD_CG_SUPPORT_GFX_CP_LS */ 2792 - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 2782 + data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 2793 2783 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 2794 2784 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 2795 2785 2796 2786 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 2797 - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 2787 + data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 2798 2788 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 2799 2789 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 2800 2790 ··· 2817 2807 if (ring->use_doorbell) { 2818 2808 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 2819 2809 } else { 2820 - wptr = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR)); 2821 - wptr += (u64)RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI)) << 32; 2810 + wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 2811 + wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 2822 2812 } 2823 2813 2824 2814 return wptr; ··· 2833 2823 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 2834 2824 WDOORBELL64(ring->doorbell_index, ring->wptr); 2835 2825 } else { 2836 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr)); 2837 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr)); 2826 + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 2827 + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 2838 2828 } 2839 2829 } 2840 2830 ··· 2966 2956 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 2967 2957 unsigned vm_id, uint64_t pd_addr) 2968 2958 { 2959 + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 2969 2960 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 2970 2961 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 2971 - unsigned eng = ring->idx; 2972 - unsigned i; 2962 + unsigned eng = ring->vm_inv_eng; 2973 2963 2974 2964 pd_addr = pd_addr | 0x1; /* valid bit */ 2975 2965 /* now only use physical base address of PDE and valid */ 2976 2966 BUG_ON(pd_addr & 0xFFFF00000000003EULL); 2977 2967 2978 - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 2979 - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; 2968 + gfx_v9_0_write_data_to_reg(ring, usepfp, true, 2969 + hub->ctx0_ptb_addr_lo32 + (2 * vm_id), 2970 + lower_32_bits(pd_addr)); 2980 2971 2981 - gfx_v9_0_write_data_to_reg(ring, usepfp, true, 2982 - hub->ctx0_ptb_addr_lo32 2983 - + (2 * vm_id), 2984 - lower_32_bits(pd_addr)); 2972 + gfx_v9_0_write_data_to_reg(ring, usepfp, true, 2973 + hub->ctx0_ptb_addr_hi32 + (2 * vm_id), 2974 + upper_32_bits(pd_addr)); 2985 2975 2986 - gfx_v9_0_write_data_to_reg(ring, usepfp, true, 2987 - hub->ctx0_ptb_addr_hi32 2988 - + (2 * vm_id), 2989 - upper_32_bits(pd_addr)); 2976 + gfx_v9_0_write_data_to_reg(ring, usepfp, true, 2977 + hub->vm_inv_eng0_req + eng, req); 2990 2978 2991 - gfx_v9_0_write_data_to_reg(ring, usepfp, true, 2992 - hub->vm_inv_eng0_req + eng, req); 2993 - 2994 - /* wait for the invalidate to complete */ 2995 - gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + 2996 - eng, 0, 1 << vm_id, 1 << vm_id, 0x20); 2997 - } 2979 + /* wait for the invalidate to complete */ 2980 + gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + 2981 + eng, 0, 1 << vm_id, 1 << vm_id, 0x20); 2998 2982 2999 2983 /* compute doesn't have PFP */ 3000 2984 if (usepfp) { ··· 3377 3373 enum amdgpu_interrupt_state state) 3378 3374 { 3379 3375 uint32_t tmp, target; 3380 - struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data; 3381 - 3382 - BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)); 3376 + struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 3383 3377 3384 3378 if (ring->me == 1) 3385 3379 target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); ··· 3388 3386 switch (type) { 3389 3387 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 3390 3388 if (state == AMDGPU_IRQ_STATE_DISABLE) { 3391 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL)); 3389 + tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); 3392 3390 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 3393 3391 GENERIC2_INT_ENABLE, 0); 3394 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), tmp); 3392 + WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); 3395 3393 3396 3394 tmp = RREG32(target); 3397 3395 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, 3398 3396 GENERIC2_INT_ENABLE, 0); 3399 3397 WREG32(target, tmp); 3400 3398 } else { 3401 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL)); 3399 + tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); 3402 3400 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 3403 3401 GENERIC2_INT_ENABLE, 1); 3404 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), tmp); 3402 + WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); 3405 3403 3406 3404 tmp = RREG32(target); 3407 3405 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, ··· 3421 3419 struct amdgpu_iv_entry *entry) 3422 3420 { 3423 3421 u8 me_id, pipe_id, queue_id; 3424 - struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data; 3425 - 3426 - BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)); 3422 + struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 3427 3423 3428 3424 me_id = (entry->ring_id & 0x0c) >> 2; 3429 3425 pipe_id = (entry->ring_id & 0x03) >> 0; ··· 3456 3456 .align_mask = 0xff, 3457 3457 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 3458 3458 .support_64bit_ptrs = true, 3459 + .vmhub = AMDGPU_GFXHUB, 3459 3460 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 3460 3461 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 3461 3462 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 3462 3463 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 3463 3464 5 + /* COND_EXEC */ 3464 3465 7 + /* PIPELINE_SYNC */ 3465 - 46 + /* VM_FLUSH */ 3466 + 24 + /* VM_FLUSH */ 3466 3467 8 + /* FENCE for VM_FLUSH */ 3467 3468 20 + /* GDS switch */ 3468 3469 4 + /* double SWITCH_BUFFER, ··· 3501 3500 .align_mask = 0xff, 3502 3501 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 3503 3502 .support_64bit_ptrs = true, 3503 + .vmhub = AMDGPU_GFXHUB, 3504 3504 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 3505 3505 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 3506 3506 .set_wptr = gfx_v9_0_ring_set_wptr_compute, ··· 3510 3508 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 3511 3509 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ 3512 3510 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 3513 - 64 + /* gfx_v9_0_ring_emit_vm_flush */ 3511 + 24 + /* gfx_v9_0_ring_emit_vm_flush */ 3514 3512 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 3515 3513 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ 3516 3514 .emit_ib = gfx_v9_0_ring_emit_ib_compute, ··· 3531 3529 .align_mask = 0xff, 3532 3530 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 3533 3531 .support_64bit_ptrs = true, 3532 + .vmhub = AMDGPU_GFXHUB, 3534 3533 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 3535 3534 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 3536 3535 .set_wptr = gfx_v9_0_ring_set_wptr_compute, ··· 3540 3537 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 3541 3538 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ 3542 3539 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 3543 - 64 + /* gfx_v9_0_ring_emit_vm_flush */ 3540 + 24 + /* gfx_v9_0_ring_emit_vm_flush */ 3544 3541 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 3545 3542 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ 3546 3543 .emit_ib = gfx_v9_0_ring_emit_ib_compute, ··· 3615 3612 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 3616 3613 { 3617 3614 /* init asci gds info */ 3618 - adev->gds.mem.total_size = RREG32(SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE)); 3615 + adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 3619 3616 adev->gds.gws.total_size = 64; 3620 3617 adev->gds.oa.total_size = 16; 3621 3618 ··· 3644 3641 { 3645 3642 u32 data, mask; 3646 3643 3647 - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG)); 3648 - data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG)); 3644 + data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 3645 + data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 3649 3646 3650 3647 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 3651 3648 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; ··· 3766 3763 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE); 3767 3764 eop_gpu_addr >>= 8; 3768 3765 3769 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR), lower_32_bits(eop_gpu_addr)); 3770 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), upper_32_bits(eop_gpu_addr)); 3766 + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, lower_32_bits(eop_gpu_addr)); 3767 + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); 3771 3768 mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr); 3772 3769 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr); 3773 3770 3774 3771 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3775 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL)); 3772 + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3776 3773 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3777 3774 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 3778 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL), tmp); 3775 + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, tmp); 3779 3776 3780 3777 /* enable doorbell? */ 3781 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL)); 3778 + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3782 3779 if (use_doorbell) 3783 3780 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 3784 3781 else 3785 3782 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); 3786 3783 3787 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), tmp); 3784 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); 3788 3785 mqd->cp_hqd_pq_doorbell_control = tmp; 3789 3786 3790 3787 /* disable the queue if it's active */ ··· 3793 3790 mqd->cp_hqd_pq_rptr = 0; 3794 3791 mqd->cp_hqd_pq_wptr_lo = 0; 3795 3792 mqd->cp_hqd_pq_wptr_hi = 0; 3796 - if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) { 3797 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1); 3793 + if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3794 + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3798 3795 for (j = 0; j < adev->usec_timeout; j++) { 3799 - if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1)) 3796 + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3800 3797 break; 3801 3798 udelay(1); 3802 3799 } 3803 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), mqd->cp_hqd_dequeue_request); 3804 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR), mqd->cp_hqd_pq_rptr); 3805 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo); 3806 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi); 3800 + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 3801 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 3802 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); 3803 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); 3807 3804 } 3808 3805 3809 3806 /* set the pointer to the MQD */ 3810 3807 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; 3811 3808 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 3812 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR), mqd->cp_mqd_base_addr_lo); 3813 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI), mqd->cp_mqd_base_addr_hi); 3809 + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 3810 + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 3814 3811 3815 3812 /* set MQD vmid to 0 */ 3816 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL)); 3813 + tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3817 3814 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3818 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL), tmp); 3815 + WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, tmp); 3819 3816 mqd->cp_mqd_control = tmp; 3820 3817 3821 3818 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3822 3819 hqd_gpu_addr = ring->gpu_addr >> 8; 3823 3820 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3824 3821 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3825 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE), mqd->cp_hqd_pq_base_lo); 3826 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI), mqd->cp_hqd_pq_base_hi); 3822 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 3823 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 3827 3824 3828 3825 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3829 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL)); 3826 + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3830 3827 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3831 3828 (order_base_2(ring->ring_size / 4) - 1)); 3832 3829 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, ··· 3838 3835 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3839 3836 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3840 3837 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3841 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL), tmp); 3838 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, tmp); 3842 3839 mqd->cp_hqd_pq_control = tmp; 3843 3840 3844 3841 /* set the wb address wether it's enabled or not */ ··· 3846 3843 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3847 3844 mqd->cp_hqd_pq_rptr_report_addr_hi = 3848 3845 upper_32_bits(wb_gpu_addr) & 0xffff; 3849 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR), 3846 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3850 3847 mqd->cp_hqd_pq_rptr_report_addr_lo); 3851 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI), 3848 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3852 3849 mqd->cp_hqd_pq_rptr_report_addr_hi); 3853 3850 3854 3851 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3855 3852 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3856 3853 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3857 3854 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3858 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), 3855 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3859 3856 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3860 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 3857 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3861 3858 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3862 3859 3863 3860 /* enable the doorbell if requested */ 3864 3861 if (use_doorbell) { 3865 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER), 3862 + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3866 3863 (AMDGPU_DOORBELL64_KIQ * 2) << 2); 3867 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER), 3864 + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3868 3865 (AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2); 3869 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL)); 3866 + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3870 3867 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3871 3868 DOORBELL_OFFSET, ring->doorbell_index); 3872 3869 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); ··· 3877 3874 } else { 3878 3875 mqd->cp_hqd_pq_doorbell_control = 0; 3879 3876 } 3880 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), 3877 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3881 3878 mqd->cp_hqd_pq_doorbell_control); 3882 3879 3883 3880 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3884 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo); 3885 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi); 3881 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); 3882 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); 3886 3883 3887 3884 /* set the vmid for the queue */ 3888 3885 mqd->cp_hqd_vmid = 0; 3889 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid); 3886 + WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3890 3887 3891 - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE)); 3888 + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3892 3889 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3893 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE), tmp); 3890 + WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, tmp); 3894 3891 mqd->cp_hqd_persistent_state = tmp; 3895 3892 3896 3893 /* activate the queue */ 3897 3894 mqd->cp_hqd_active = 1; 3898 - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), mqd->cp_hqd_active); 3895 + WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 3899 3896 3900 3897 soc15_grbm_select(adev, 0, 0, 0, 0); 3901 3898 mutex_unlock(&adev->srbm_mutex);
+3 -2
drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
··· 346 346 * size equal to the 1024 or vram, whichever is larger. 347 347 */ 348 348 if (amdgpu_gart_size == -1) 349 - adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); 349 + adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), 350 + adev->mc.mc_vram_size); 350 351 else 351 352 adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; 352 353 ··· 622 621 * amdgpu graphics/compute will use VMIDs 1-7 623 622 * amdkfd will use VMIDs 8-15 624 623 */ 625 - adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; 624 + adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; 626 625 adev->vm_manager.num_level = 1; 627 626 amdgpu_vm_manager_init(adev); 628 627
+3 -2
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
··· 395 395 * size equal to the 1024 or vram, whichever is larger. 396 396 */ 397 397 if (amdgpu_gart_size == -1) 398 - adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); 398 + adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), 399 + adev->mc.mc_vram_size); 399 400 else 400 401 adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; 401 402 ··· 747 746 * amdgpu graphics/compute will use VMIDs 1-7 748 747 * amdkfd will use VMIDs 8-15 749 748 */ 750 - adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; 749 + adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; 751 750 adev->vm_manager.num_level = 1; 752 751 amdgpu_vm_manager_init(adev); 753 752
+3 -2
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
··· 557 557 * size equal to the 1024 or vram, whichever is larger. 558 558 */ 559 559 if (amdgpu_gart_size == -1) 560 - adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); 560 + adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), 561 + adev->mc.mc_vram_size); 561 562 else 562 563 adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; 563 564 ··· 950 949 * amdgpu graphics/compute will use VMIDs 1-7 951 950 * amdkfd will use VMIDs 8-15 952 951 */ 953 - adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; 952 + adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; 954 953 adev->vm_manager.num_level = 1; 955 954 amdgpu_vm_manager_init(adev); 956 955
+21 -2
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
··· 386 386 static int gmc_v9_0_late_init(void *handle) 387 387 { 388 388 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 389 + unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 3, 3 }; 390 + unsigned i; 391 + 392 + for(i = 0; i < adev->num_rings; ++i) { 393 + struct amdgpu_ring *ring = adev->rings[i]; 394 + unsigned vmhub = ring->funcs->vmhub; 395 + 396 + ring->vm_inv_eng = vm_inv_eng[vmhub]++; 397 + dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n", 398 + ring->idx, ring->name, ring->vm_inv_eng, 399 + ring->funcs->vmhub); 400 + } 401 + 402 + /* Engine 17 is used for GART flushes */ 403 + for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) 404 + BUG_ON(vm_inv_eng[i] > 17); 405 + 389 406 return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); 390 407 } 391 408 ··· 486 469 * size equal to the 1024 or vram, whichever is larger. 487 470 */ 488 471 if (amdgpu_gart_size == -1) 489 - adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); 472 + adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), 473 + adev->mc.mc_vram_size); 490 474 else 491 475 adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; 492 476 ··· 537 519 * amdgpu graphics/compute will use VMIDs 1-7 538 520 * amdkfd will use VMIDs 8-15 539 521 */ 540 - adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; 522 + adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; 523 + adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; 541 524 542 525 /* TODO: fix num_level for APU when updating vm size and block size */ 543 526 if (adev->flags & AMD_IS_APU)
+3
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
··· 511 511 { 512 512 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 513 513 514 + if (amdgpu_sriov_vf(adev)) 515 + return 0; 516 + 514 517 switch (adev->asic_type) { 515 518 case CHIP_VEGA10: 516 519 mmhub_v1_0_update_medium_grain_clock_gating(adev,
+57
drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
··· 84 84 uint32_t reg_value; 85 85 }; 86 86 87 + static inline void mmsch_v1_0_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt, 88 + uint32_t *init_table, 89 + uint32_t reg_offset, 90 + uint32_t value) 91 + { 92 + direct_wt->cmd_header.reg_offset = reg_offset; 93 + direct_wt->reg_value = value; 94 + memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write)); 95 + } 96 + 97 + static inline void mmsch_v1_0_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt, 98 + uint32_t *init_table, 99 + uint32_t reg_offset, 100 + uint32_t mask, uint32_t data) 101 + { 102 + direct_rd_mod_wt->cmd_header.reg_offset = reg_offset; 103 + direct_rd_mod_wt->mask_value = mask; 104 + direct_rd_mod_wt->write_data = data; 105 + memcpy((void *)init_table, direct_rd_mod_wt, 106 + sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)); 107 + } 108 + 109 + static inline void mmsch_v1_0_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll, 110 + uint32_t *init_table, 111 + uint32_t reg_offset, 112 + uint32_t mask, uint32_t wait) 113 + { 114 + direct_poll->cmd_header.reg_offset = reg_offset; 115 + direct_poll->mask_value = mask; 116 + direct_poll->wait_value = wait; 117 + memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling)); 118 + } 119 + 120 + #define MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \ 121 + mmsch_v1_0_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \ 122 + init_table, (reg), \ 123 + (mask), (data)); \ 124 + init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ 125 + table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ 126 + } 127 + 128 + #define MMSCH_V1_0_INSERT_DIRECT_WT(reg, value) { \ 129 + mmsch_v1_0_insert_direct_wt(&direct_wt, \ 130 + init_table, (reg), \ 131 + (value)); \ 132 + init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ 133 + table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ 134 + } 135 + 136 + #define MMSCH_V1_0_INSERT_DIRECT_POLL(reg, mask, wait) { \ 137 + mmsch_v1_0_insert_direct_poll(&direct_poll, \ 138 + init_table, (reg), \ 139 + (mask), (wait)); \ 140 + init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ 141 + table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ 142 + } 143 + 87 144 #endif
+6 -3
drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
··· 368 368 u32 reg; 369 369 u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, RCV_MSG_VALID); 370 370 371 - reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); 372 - if (!(reg & mask)) 373 - return -ENOENT; 371 + /* workaround: host driver doesn't set VALID for CMPL now */ 372 + if (event != IDH_FLR_NOTIFICATION_CMPL) { 373 + reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); 374 + if (!(reg & mask)) 375 + return -ENOENT; 376 + } 374 377 375 378 reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0); 376 379 if (reg != event)
+45 -41
drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
··· 166 166 { 167 167 int ret; 168 168 uint32_t psp_gfxdrv_command_reg = 0; 169 - struct amdgpu_bo *psp_sysdrv; 170 - void *psp_sysdrv_virt = NULL; 171 - uint64_t psp_sysdrv_mem; 172 169 struct amdgpu_device *adev = psp->adev; 173 - uint32_t size, sol_reg; 170 + uint32_t sol_reg; 174 171 175 172 /* Check sOS sign of life register to confirm sys driver and sOS 176 173 * are already been loaded. ··· 182 185 if (ret) 183 186 return ret; 184 187 185 - /* 186 - * Create a 1 meg GART memory to store the psp sys driver 187 - * binary with a 1 meg aligned address 188 - */ 189 - size = (psp->sys_bin_size + (PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)) & 190 - (~(PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)); 191 - 192 - ret = amdgpu_bo_create_kernel(adev, size, PSP_BOOTLOADER_1_MEG_ALIGNMENT, 193 - AMDGPU_GEM_DOMAIN_GTT, 194 - &psp_sysdrv, 195 - &psp_sysdrv_mem, 196 - &psp_sysdrv_virt); 197 - if (ret) 198 - return ret; 188 + memset(psp->fw_pri_buf, 0, PSP_1_MEG); 199 189 200 190 /* Copy PSP System Driver binary to memory */ 201 - memcpy(psp_sysdrv_virt, psp->sys_start_addr, psp->sys_bin_size); 191 + memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); 202 192 203 193 /* Provide the sys driver to bootrom */ 204 194 WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), 205 - (uint32_t)(psp_sysdrv_mem >> 20)); 195 + (uint32_t)(psp->fw_pri_mc_addr >> 20)); 206 196 psp_gfxdrv_command_reg = 1 << 16; 207 197 WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), 208 198 psp_gfxdrv_command_reg); ··· 200 216 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), 201 217 0x80000000, 0x80000000, false); 202 218 203 - amdgpu_bo_free_kernel(&psp_sysdrv, &psp_sysdrv_mem, &psp_sysdrv_virt); 204 - 205 219 return ret; 206 220 } 207 221 ··· 207 225 { 208 226 int ret; 209 227 unsigned int psp_gfxdrv_command_reg = 0; 210 - struct amdgpu_bo *psp_sos; 211 - void *psp_sos_virt = NULL; 212 - uint64_t psp_sos_mem; 213 228 struct amdgpu_device *adev = psp->adev; 214 - uint32_t size, sol_reg; 229 + uint32_t sol_reg; 215 230 216 231 /* Check sOS sign of life register to confirm sys driver and sOS 217 232 * are already been loaded. ··· 223 244 if (ret) 224 245 return ret; 225 246 226 - size = (psp->sos_bin_size + (PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)) & 227 - (~((uint64_t)PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)); 228 - 229 - ret = amdgpu_bo_create_kernel(adev, size, PSP_BOOTLOADER_1_MEG_ALIGNMENT, 230 - AMDGPU_GEM_DOMAIN_GTT, 231 - &psp_sos, 232 - &psp_sos_mem, 233 - &psp_sos_virt); 234 - if (ret) 235 - return ret; 247 + memset(psp->fw_pri_buf, 0, PSP_1_MEG); 236 248 237 249 /* Copy Secure OS binary to PSP memory */ 238 - memcpy(psp_sos_virt, psp->sos_start_addr, psp->sos_bin_size); 250 + memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); 239 251 240 252 /* Provide the PSP secure OS to bootrom */ 241 253 WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), 242 - (uint32_t)(psp_sos_mem >> 20)); 254 + (uint32_t)(psp->fw_pri_mc_addr >> 20)); 243 255 psp_gfxdrv_command_reg = 2 << 16; 244 256 WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), 245 257 psp_gfxdrv_command_reg); ··· 242 272 RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)), 243 273 0, true); 244 274 #endif 245 - 246 - amdgpu_bo_free_kernel(&psp_sos, &psp_sos_mem, &psp_sos_virt); 247 275 248 276 return ret; 249 277 } ··· 268 300 int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) 269 301 { 270 302 int ret = 0; 271 - unsigned int psp_ring_reg = 0; 272 303 struct psp_ring *ring; 273 304 struct amdgpu_device *adev = psp->adev; 274 305 ··· 286 319 ring->ring_size = 0; 287 320 return ret; 288 321 } 322 + 323 + return 0; 324 + } 325 + 326 + int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) 327 + { 328 + int ret = 0; 329 + unsigned int psp_ring_reg = 0; 330 + struct psp_ring *ring = &psp->km_ring; 331 + struct amdgpu_device *adev = psp->adev; 289 332 290 333 /* Write low address of the ring to C2PMSG_69 */ 291 334 psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); ··· 318 341 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), 319 342 0x80000000, 0x8000FFFF, false); 320 343 344 + return ret; 345 + } 346 + 347 + int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) 348 + { 349 + int ret = 0; 350 + struct psp_ring *ring; 351 + unsigned int psp_ring_reg = 0; 352 + struct amdgpu_device *adev = psp->adev; 353 + 354 + ring = &psp->km_ring; 355 + 356 + /* Write the ring destroy command to C2PMSG_64 */ 357 + psp_ring_reg = 3 << 16; 358 + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), psp_ring_reg); 359 + 360 + /* there might be handshake issue with hardware which needs delay */ 361 + mdelay(20); 362 + 363 + /* Wait for response flag (bit 31) in C2PMSG_64 */ 364 + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), 365 + 0x80000000, 0x80000000, false); 366 + 367 + if (ring->ring_mem) 368 + amdgpu_bo_free_kernel(&adev->firmware.rbuf, 369 + &ring->ring_mem_mc_addr, 370 + (void **)&ring->ring_mem); 321 371 return ret; 322 372 } 323 373
+4
drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
··· 39 39 struct psp_gfx_cmd_resp *cmd); 40 40 extern int psp_v3_1_ring_init(struct psp_context *psp, 41 41 enum psp_ring_type ring_type); 42 + extern int psp_v3_1_ring_create(struct psp_context *psp, 43 + enum psp_ring_type ring_type); 44 + extern int psp_v3_1_ring_destroy(struct psp_context *psp, 45 + enum psp_ring_type ring_type); 42 46 extern int psp_v3_1_cmd_submit(struct psp_context *psp, 43 47 struct amdgpu_firmware_info *ucode, 44 48 uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
+104 -112
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
··· 48 48 static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); 49 49 static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); 50 50 51 - static const u32 golden_settings_sdma_4[] = 52 - { 51 + static const u32 golden_settings_sdma_4[] = { 53 52 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07, 54 53 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100, 55 54 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100, ··· 75 76 SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_UTCL1_PAGE), 0x000003ff, 0x000003c0 76 77 }; 77 78 78 - static const u32 golden_settings_sdma_vg10[] = 79 - { 79 + static const u32 golden_settings_sdma_vg10[] = { 80 80 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00104002, 81 81 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002, 82 82 SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG), 0x0018773f, 0x00104002, ··· 85 87 static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset) 86 88 { 87 89 u32 base = 0; 90 + 88 91 switch (instance) { 89 - case 0: 90 - base = SDMA0_BASE.instance[0].segment[0]; 91 - break; 92 - case 1: 93 - base = SDMA1_BASE.instance[0].segment[0]; 94 - break; 95 - default: 96 - BUG(); 97 - break; 92 + case 0: 93 + base = SDMA0_BASE.instance[0].segment[0]; 94 + break; 95 + case 1: 96 + base = SDMA1_BASE.instance[0].segment[0]; 97 + break; 98 + default: 99 + BUG(); 100 + break; 98 101 } 99 102 100 103 return base + internal_offset; ··· 158 159 case CHIP_VEGA10: 159 160 chip_name = "vega10"; 160 161 break; 161 - default: BUG(); 162 + default: 163 + BUG(); 162 164 } 163 165 164 166 for (i = 0; i < adev->sdma.num_instances; i++) { ··· 179 179 if (adev->sdma.instance[i].feature_version >= 20) 180 180 adev->sdma.instance[i].burst_nop = true; 181 181 DRM_DEBUG("psp_load == '%s'\n", 182 - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP? "true": "false"); 182 + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); 183 183 184 184 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 185 185 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; ··· 192 192 } 193 193 out: 194 194 if (err) { 195 - printk(KERN_ERR 196 - "sdma_v4_0: Failed to load firmware \"%s\"\n", 197 - fw_name); 195 + DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name); 198 196 for (i = 0; i < adev->sdma.num_instances; i++) { 199 197 release_firmware(adev->sdma.instance[i].fw); 200 198 adev->sdma.instance[i].fw = NULL; ··· 210 212 */ 211 213 static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 212 214 { 213 - u64* rptr; 215 + u64 *rptr; 214 216 215 217 /* XXX check if swapping is necessary on BE */ 216 - rptr =((u64*)&ring->adev->wb.wb[ring->rptr_offs]); 218 + rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]); 217 219 218 220 DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); 219 221 return ((*rptr) >> 2); ··· 229 231 static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 230 232 { 231 233 struct amdgpu_device *adev = ring->adev; 232 - u64* wptr = NULL; 233 - uint64_t local_wptr=0; 234 + u64 *wptr = NULL; 235 + uint64_t local_wptr = 0; 234 236 235 237 if (ring->use_doorbell) { 236 238 /* XXX check if swapping is necessary on BE */ 237 - wptr = ((u64*)&adev->wb.wb[ring->wptr_offs]); 239 + wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]); 238 240 DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); 239 241 *wptr = (*wptr) >> 2; 240 242 DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); 241 243 } else { 242 244 u32 lowbit, highbit; 243 245 int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 244 - wptr=&local_wptr; 246 + 247 + wptr = &local_wptr; 245 248 lowbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR)) >> 2; 246 249 highbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; 247 250 ··· 284 285 WDOORBELL64(ring->doorbell_index, ring->wptr << 2); 285 286 } else { 286 287 int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; 288 + 287 289 DRM_DEBUG("Not using doorbell -- " 288 290 "mmSDMA%i_GFX_RB_WPTR == 0x%08x " 289 - "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x \n", 290 - me, 291 + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", 291 292 me, 292 293 lower_32_bits(ring->wptr << 2), 294 + me, 293 295 upper_32_bits(ring->wptr << 2)); 294 296 WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); 295 297 WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); ··· 319 319 * Schedule an IB in the DMA ring (VEGA10). 320 320 */ 321 321 static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, 322 - struct amdgpu_ib *ib, 323 - unsigned vm_id, bool ctx_switch) 322 + struct amdgpu_ib *ib, 323 + unsigned vm_id, bool ctx_switch) 324 324 { 325 - u32 vmid = vm_id & 0xf; 325 + u32 vmid = vm_id & 0xf; 326 326 327 - /* IB packet must end on a 8 DW boundary */ 328 - sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); 327 + /* IB packet must end on a 8 DW boundary */ 328 + sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); 329 329 330 - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 331 - SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); 332 - /* base must be 32 byte aligned */ 333 - amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); 334 - amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 335 - amdgpu_ring_write(ring, ib->length_dw); 336 - amdgpu_ring_write(ring, 0); 337 - amdgpu_ring_write(ring, 0); 330 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 331 + SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); 332 + /* base must be 32 byte aligned */ 333 + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); 334 + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 335 + amdgpu_ring_write(ring, ib->length_dw); 336 + amdgpu_ring_write(ring, 0); 337 + amdgpu_ring_write(ring, 0); 338 338 339 339 } 340 340 ··· 523 523 u32 doorbell; 524 524 u32 doorbell_offset; 525 525 u32 temp; 526 - int i,r; 526 + int i, r; 527 527 528 528 for (i = 0; i < adev->sdma.num_instances; i++) { 529 529 ring = &adev->sdma.instance[i].ring; ··· 572 572 doorbell = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL)); 573 573 doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET)); 574 574 575 - if (ring->use_doorbell){ 575 + if (ring->use_doorbell) { 576 576 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); 577 577 doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, 578 578 OFFSET, ring->doorbell_index); ··· 694 694 695 695 696 696 for (j = 0; j < fw_size; j++) 697 - { 698 697 WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); 699 - } 700 698 701 699 WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); 702 700 } ··· 742 744 if (r) 743 745 return r; 744 746 r = sdma_v4_0_rlc_resume(adev); 745 - if (r) 746 - return r; 747 747 748 - return 0; 748 + return r; 749 749 } 750 750 751 751 /** ··· 793 797 794 798 for (i = 0; i < adev->usec_timeout; i++) { 795 799 tmp = le32_to_cpu(adev->wb.wb[index]); 796 - if (tmp == 0xDEADBEEF) { 800 + if (tmp == 0xDEADBEEF) 797 801 break; 798 - } 799 802 DRM_UDELAY(1); 800 803 } 801 804 ··· 859 864 if (r) 860 865 goto err1; 861 866 862 - r = dma_fence_wait_timeout(f, false, timeout); 863 - if (r == 0) { 864 - DRM_ERROR("amdgpu: IB test timed out\n"); 865 - r = -ETIMEDOUT; 866 - goto err1; 867 - } else if (r < 0) { 868 - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 869 - goto err1; 870 - } 871 - tmp = le32_to_cpu(adev->wb.wb[index]); 872 - if (tmp == 0xDEADBEEF) { 873 - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 874 - r = 0; 875 - } else { 876 - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); 877 - r = -EINVAL; 878 - } 867 + r = dma_fence_wait_timeout(f, false, timeout); 868 + if (r == 0) { 869 + DRM_ERROR("amdgpu: IB test timed out\n"); 870 + r = -ETIMEDOUT; 871 + goto err1; 872 + } else if (r < 0) { 873 + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 874 + goto err1; 875 + } 876 + tmp = le32_to_cpu(adev->wb.wb[index]); 877 + if (tmp == 0xDEADBEEF) { 878 + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 879 + r = 0; 880 + } else { 881 + DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); 882 + r = -EINVAL; 883 + } 879 884 err1: 880 - amdgpu_ib_free(adev, &ib, NULL); 881 - dma_fence_put(f); 885 + amdgpu_ib_free(adev, &ib, NULL); 886 + dma_fence_put(f); 882 887 err0: 883 - amdgpu_wb_free(adev, index); 884 - return r; 888 + amdgpu_wb_free(adev, index); 889 + return r; 885 890 } 886 891 887 892 ··· 1034 1039 static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 1035 1040 unsigned vm_id, uint64_t pd_addr) 1036 1041 { 1042 + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 1037 1043 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 1038 - unsigned eng = ring->idx; 1039 - unsigned i; 1044 + unsigned eng = ring->vm_inv_eng; 1040 1045 1041 1046 pd_addr = pd_addr | 0x1; /* valid bit */ 1042 1047 /* now only use physical base address of PDE and valid */ 1043 1048 BUG_ON(pd_addr & 0xFFFF00000000003EULL); 1044 1049 1045 - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 1046 - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; 1050 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1051 + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1052 + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2); 1053 + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 1047 1054 1048 - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1049 - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1050 - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2); 1051 - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 1055 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1056 + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1057 + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2); 1058 + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 1052 1059 1053 - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1054 - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1055 - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2); 1056 - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 1060 + /* flush TLB */ 1061 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1062 + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1063 + amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng); 1064 + amdgpu_ring_write(ring, req); 1057 1065 1058 - /* flush TLB */ 1059 - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1060 - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1061 - amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng); 1062 - amdgpu_ring_write(ring, req); 1063 - 1064 - /* wait for flush */ 1065 - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1066 - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1067 - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ 1068 - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 1069 - amdgpu_ring_write(ring, 0); 1070 - amdgpu_ring_write(ring, 1 << vm_id); /* reference */ 1071 - amdgpu_ring_write(ring, 1 << vm_id); /* mask */ 1072 - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1073 - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); 1074 - } 1066 + /* wait for flush */ 1067 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1068 + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1069 + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ 1070 + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 1071 + amdgpu_ring_write(ring, 0); 1072 + amdgpu_ring_write(ring, 1 << vm_id); /* reference */ 1073 + amdgpu_ring_write(ring, 1 << vm_id); /* mask */ 1074 + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1075 + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); 1075 1076 } 1076 1077 1077 1078 static int sdma_v4_0_early_init(void *handle) ··· 1153 1162 sdma_v4_0_init_golden_registers(adev); 1154 1163 1155 1164 r = sdma_v4_0_start(adev); 1156 - if (r) 1157 - return r; 1158 1165 1159 1166 return r; 1160 1167 } ··· 1188 1199 { 1189 1200 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1190 1201 u32 i; 1202 + 1191 1203 for (i = 0; i < adev->sdma.num_instances; i++) { 1192 1204 u32 tmp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_STATUS_REG)); 1205 + 1193 1206 if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) 1194 - return false; 1207 + return false; 1195 1208 } 1196 1209 1197 1210 return true; ··· 1202 1211 static int sdma_v4_0_wait_for_idle(void *handle) 1203 1212 { 1204 1213 unsigned i; 1205 - u32 sdma0,sdma1; 1214 + u32 sdma0, sdma1; 1206 1215 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1216 + 1207 1217 for (i = 0; i < adev->usec_timeout; i++) { 1208 1218 sdma0 = RREG32(sdma_v4_0_get_reg_offset(0, mmSDMA0_STATUS_REG)); 1209 1219 sdma1 = RREG32(sdma_v4_0_get_reg_offset(1, mmSDMA0_STATUS_REG)); ··· 1232 1240 1233 1241 u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ? 1234 1242 sdma_v4_0_get_reg_offset(0, mmSDMA0_CNTL) : 1235 - sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL); 1243 + sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL); 1236 1244 1237 1245 sdma_cntl = RREG32(reg_offset); 1238 1246 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, ··· 1324 1332 SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1325 1333 SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1326 1334 SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); 1327 - if(def != data) 1335 + if (def != data) 1328 1336 WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); 1329 1337 } 1330 1338 } else { ··· 1374 1382 1375 1383 /* 1-not override: enable sdma1 mem light sleep */ 1376 1384 if (adev->asic_type == CHIP_VEGA10) { 1377 - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); 1378 - data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1379 - if (def != data) 1380 - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); 1385 + def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); 1386 + data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1387 + if (def != data) 1388 + WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); 1381 1389 } 1382 1390 } else { 1383 1391 /* 0-override:disable sdma0 mem light sleep */ 1384 1392 def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); 1385 1393 data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1386 1394 if (def != data) 1387 - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); 1395 + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); 1388 1396 1389 1397 /* 0-override:disable sdma1 mem light sleep */ 1390 1398 if (adev->asic_type == CHIP_VEGA10) { ··· 1465 1473 .align_mask = 0xf, 1466 1474 .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 1467 1475 .support_64bit_ptrs = true, 1476 + .vmhub = AMDGPU_MMHUB, 1468 1477 .get_rptr = sdma_v4_0_ring_get_rptr, 1469 1478 .get_wptr = sdma_v4_0_ring_get_wptr, 1470 1479 .set_wptr = sdma_v4_0_ring_set_wptr, ··· 1473 1480 6 + /* sdma_v4_0_ring_emit_hdp_flush */ 1474 1481 3 + /* sdma_v4_0_ring_emit_hdp_invalidate */ 1475 1482 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ 1476 - 36 + /* sdma_v4_0_ring_emit_vm_flush */ 1483 + 18 + /* sdma_v4_0_ring_emit_vm_flush */ 1477 1484 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ 1478 1485 .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ 1479 1486 .emit_ib = sdma_v4_0_ring_emit_ib, ··· 1599 1606 } 1600 1607 } 1601 1608 1602 - const struct amdgpu_ip_block_version sdma_v4_0_ip_block = 1603 - { 1609 + const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { 1604 1610 .type = AMD_IP_BLOCK_TYPE_SDMA, 1605 1611 .major = 4, 1606 1612 .minor = 0,
+4 -5
drivers/gpu/drm/amd/amdgpu/soc15.c
··· 25 25 #include <linux/module.h> 26 26 #include "drmP.h" 27 27 #include "amdgpu.h" 28 - #include "amdgpu_atombios.h" 28 + #include "amdgpu_atomfirmware.h" 29 29 #include "amdgpu_ih.h" 30 30 #include "amdgpu_uvd.h" 31 31 #include "amdgpu_vce.h" ··· 405 405 406 406 static int soc15_asic_reset(struct amdgpu_device *adev) 407 407 { 408 - amdgpu_atombios_scratch_regs_engine_hung(adev, true); 408 + amdgpu_atomfirmware_scratch_regs_engine_hung(adev, true); 409 409 410 410 soc15_gpu_pci_config_reset(adev); 411 411 412 - amdgpu_atombios_scratch_regs_engine_hung(adev, false); 412 + amdgpu_atomfirmware_scratch_regs_engine_hung(adev, false); 413 413 414 414 return 0; 415 415 } ··· 505 505 amdgpu_ip_block_add(adev, &dce_virtual_ip_block); 506 506 amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block); 507 507 amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block); 508 - if (!amdgpu_sriov_vf(adev)) 509 - amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block); 508 + amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block); 510 509 amdgpu_ip_block_add(adev, &vce_v4_0_ip_block); 511 510 break; 512 511 default:
+19 -1
drivers/gpu/drm/amd/amdgpu/soc15_common.h
··· 45 45 u32 index_offset; 46 46 u32 data_offset; 47 47 }; 48 - // Register Access Macro 48 + 49 + /* Register Access Macros */ 49 50 #define SOC15_REG_OFFSET(ip, inst, reg) (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ 50 51 (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ 51 52 (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ 52 53 (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ 53 54 (ip##_BASE__INST##inst##_SEG4 + reg))))) 55 + 56 + #define WREG32_FIELD15(ip, idx, reg, field, val) \ 57 + WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) 58 + 59 + #define RREG32_SOC15(ip, inst, reg) \ 60 + RREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ 61 + (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ 62 + (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ 63 + (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ 64 + (ip##_BASE__INST##inst##_SEG4 + reg)))))) 65 + 66 + #define WREG32_SOC15(ip, inst, reg, value) \ 67 + WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ 68 + (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ 69 + (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ 70 + (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ 71 + (ip##_BASE__INST##inst##_SEG4 + reg))))), value) 54 72 55 73 #endif 56 74
+367 -105
drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
··· 27 27 #include "amdgpu_uvd.h" 28 28 #include "soc15d.h" 29 29 #include "soc15_common.h" 30 + #include "mmsch_v1_0.h" 30 31 31 32 #include "vega10/soc15ip.h" 32 33 #include "vega10/UVD/uvd_7_0_offset.h" 33 34 #include "vega10/UVD/uvd_7_0_sh_mask.h" 35 + #include "vega10/VCE/vce_4_0_offset.h" 36 + #include "vega10/VCE/vce_4_0_default.h" 37 + #include "vega10/VCE/vce_4_0_sh_mask.h" 34 38 #include "vega10/NBIF/nbif_6_1_offset.h" 35 39 #include "vega10/HDP/hdp_4_0_offset.h" 36 40 #include "vega10/MMHUB/mmhub_1_0_offset.h" ··· 45 41 static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev); 46 42 static int uvd_v7_0_start(struct amdgpu_device *adev); 47 43 static void uvd_v7_0_stop(struct amdgpu_device *adev); 44 + static int uvd_v7_0_sriov_start(struct amdgpu_device *adev); 48 45 49 46 /** 50 47 * uvd_v7_0_ring_get_rptr - get read pointer ··· 103 98 { 104 99 struct amdgpu_device *adev = ring->adev; 105 100 101 + if (ring->use_doorbell) 102 + return adev->wb.wb[ring->wptr_offs]; 103 + 106 104 if (ring == &adev->uvd.ring_enc[0]) 107 105 return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR)); 108 106 else ··· 136 128 static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring) 137 129 { 138 130 struct amdgpu_device *adev = ring->adev; 131 + 132 + if (ring->use_doorbell) { 133 + /* XXX check if swapping is necessary on BE */ 134 + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 135 + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 136 + return; 137 + } 139 138 140 139 if (ring == &adev->uvd.ring_enc[0]) 141 140 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR), ··· 368 353 { 369 354 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 370 355 371 - adev->uvd.num_enc_rings = 2; 356 + if (amdgpu_sriov_vf(adev)) 357 + adev->uvd.num_enc_rings = 1; 358 + else 359 + adev->uvd.num_enc_rings = 2; 372 360 uvd_v7_0_set_ring_funcs(adev); 373 361 uvd_v7_0_set_enc_ring_funcs(adev); 374 362 uvd_v7_0_set_irq_funcs(adev); ··· 424 406 r = amdgpu_uvd_resume(adev); 425 407 if (r) 426 408 return r; 427 - 428 - ring = &adev->uvd.ring; 429 - sprintf(ring->name, "uvd"); 430 - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); 431 - if (r) 432 - return r; 433 - 434 - for (i = 0; i < adev->uvd.num_enc_rings; ++i) { 435 - ring = &adev->uvd.ring_enc[i]; 436 - sprintf(ring->name, "uvd_enc%d", i); 409 + if (!amdgpu_sriov_vf(adev)) { 410 + ring = &adev->uvd.ring; 411 + sprintf(ring->name, "uvd"); 437 412 r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); 438 413 if (r) 439 414 return r; 440 415 } 416 + 417 + 418 + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { 419 + ring = &adev->uvd.ring_enc[i]; 420 + sprintf(ring->name, "uvd_enc%d", i); 421 + if (amdgpu_sriov_vf(adev)) { 422 + ring->use_doorbell = true; 423 + ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; 424 + } 425 + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); 426 + if (r) 427 + return r; 428 + } 429 + 430 + r = amdgpu_virt_alloc_mm_table(adev); 431 + if (r) 432 + return r; 441 433 442 434 return r; 443 435 } ··· 456 428 { 457 429 int i, r; 458 430 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 431 + 432 + amdgpu_virt_free_mm_table(adev); 459 433 460 434 r = amdgpu_uvd_suspend(adev); 461 435 if (r) ··· 485 455 uint32_t tmp; 486 456 int i, r; 487 457 488 - r = uvd_v7_0_start(adev); 458 + if (amdgpu_sriov_vf(adev)) 459 + r = uvd_v7_0_sriov_start(adev); 460 + else 461 + r = uvd_v7_0_start(adev); 489 462 if (r) 490 463 goto done; 491 464 492 - ring->ready = true; 493 - r = amdgpu_ring_test_ring(ring); 494 - if (r) { 495 - ring->ready = false; 496 - goto done; 465 + if (!amdgpu_sriov_vf(adev)) { 466 + ring->ready = true; 467 + r = amdgpu_ring_test_ring(ring); 468 + if (r) { 469 + ring->ready = false; 470 + goto done; 471 + } 472 + 473 + r = amdgpu_ring_alloc(ring, 10); 474 + if (r) { 475 + DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); 476 + goto done; 477 + } 478 + 479 + tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, 480 + mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); 481 + amdgpu_ring_write(ring, tmp); 482 + amdgpu_ring_write(ring, 0xFFFFF); 483 + 484 + tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, 485 + mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); 486 + amdgpu_ring_write(ring, tmp); 487 + amdgpu_ring_write(ring, 0xFFFFF); 488 + 489 + tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, 490 + mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); 491 + amdgpu_ring_write(ring, tmp); 492 + amdgpu_ring_write(ring, 0xFFFFF); 493 + 494 + /* Clear timeout status bits */ 495 + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, 496 + mmUVD_SEMA_TIMEOUT_STATUS), 0)); 497 + amdgpu_ring_write(ring, 0x8); 498 + 499 + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, 500 + mmUVD_SEMA_CNTL), 0)); 501 + amdgpu_ring_write(ring, 3); 502 + 503 + amdgpu_ring_commit(ring); 497 504 } 498 - 499 - r = amdgpu_ring_alloc(ring, 10); 500 - if (r) { 501 - DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); 502 - goto done; 503 - } 504 - 505 - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, 506 - mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); 507 - amdgpu_ring_write(ring, tmp); 508 - amdgpu_ring_write(ring, 0xFFFFF); 509 - 510 - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, 511 - mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); 512 - amdgpu_ring_write(ring, tmp); 513 - amdgpu_ring_write(ring, 0xFFFFF); 514 - 515 - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, 516 - mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); 517 - amdgpu_ring_write(ring, tmp); 518 - amdgpu_ring_write(ring, 0xFFFFF); 519 - 520 - /* Clear timeout status bits */ 521 - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, 522 - mmUVD_SEMA_TIMEOUT_STATUS), 0)); 523 - amdgpu_ring_write(ring, 0x8); 524 - 525 - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, 526 - mmUVD_SEMA_CNTL), 0)); 527 - amdgpu_ring_write(ring, 3); 528 - 529 - amdgpu_ring_commit(ring); 530 505 531 506 for (i = 0; i < adev->uvd.num_enc_rings; ++i) { 532 507 ring = &adev->uvd.ring_enc[i]; ··· 651 616 adev->gfx.config.gb_addr_config); 652 617 653 618 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); 619 + } 620 + 621 + static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, 622 + struct amdgpu_mm_table *table) 623 + { 624 + uint32_t data = 0, loop; 625 + uint64_t addr = table->gpu_addr; 626 + struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 627 + uint32_t size; 628 + 629 + size = header->header_size + header->vce_table_size + header->uvd_table_size; 630 + 631 + /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 632 + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 633 + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 634 + 635 + /* 2, update vmid of descriptor */ 636 + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 637 + data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 638 + data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 639 + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 640 + 641 + /* 3, notify mmsch about the size of this descriptor */ 642 + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 643 + 644 + /* 4, set resp to zero */ 645 + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 646 + 647 + /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 648 + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 649 + 650 + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 651 + loop = 1000; 652 + while ((data & 0x10000002) != 0x10000002) { 653 + udelay(10); 654 + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 655 + loop--; 656 + if (!loop) 657 + break; 658 + } 659 + 660 + if (!loop) { 661 + dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 662 + return -EBUSY; 663 + } 664 + 665 + return 0; 666 + } 667 + 668 + static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) 669 + { 670 + struct amdgpu_ring *ring; 671 + uint32_t offset, size, tmp; 672 + uint32_t table_size = 0; 673 + struct mmsch_v1_0_cmd_direct_write direct_wt = { {0} }; 674 + struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} }; 675 + struct mmsch_v1_0_cmd_direct_polling direct_poll = { {0} }; 676 + struct mmsch_v1_0_cmd_end end = { {0} }; 677 + uint32_t *init_table = adev->virt.mm_table.cpu_addr; 678 + struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 679 + 680 + direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 681 + direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 682 + direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 683 + end.cmd_header.command_type = MMSCH_COMMAND__END; 684 + 685 + if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) { 686 + header->version = MMSCH_VERSION; 687 + header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 688 + 689 + if (header->vce_table_offset == 0 && header->vce_table_size == 0) 690 + header->uvd_table_offset = header->header_size; 691 + else 692 + header->uvd_table_offset = header->vce_table_size + header->vce_table_offset; 693 + 694 + init_table += header->uvd_table_offset; 695 + 696 + ring = &adev->uvd.ring; 697 + size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); 698 + 699 + /* disable clock gating */ 700 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 701 + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK, 0); 702 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 703 + 0xFFFFFFFF, 0x00000004); 704 + /* mc resume*/ 705 + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 706 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 707 + lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); 708 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 709 + upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); 710 + offset = 0; 711 + } else { 712 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 713 + lower_32_bits(adev->uvd.gpu_addr)); 714 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 715 + upper_32_bits(adev->uvd.gpu_addr)); 716 + offset = size; 717 + } 718 + 719 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 720 + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); 721 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size); 722 + 723 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 724 + lower_32_bits(adev->uvd.gpu_addr + offset)); 725 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 726 + upper_32_bits(adev->uvd.gpu_addr + offset)); 727 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); 728 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); 729 + 730 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), 731 + lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); 732 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), 733 + upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); 734 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); 735 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), 736 + AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); 737 + 738 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_ADDR_CONFIG), 739 + adev->gfx.config.gb_addr_config); 740 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG), 741 + adev->gfx.config.gb_addr_config); 742 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG), 743 + adev->gfx.config.gb_addr_config); 744 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); 745 + /* mc resume end*/ 746 + 747 + /* disable clock gating */ 748 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), 749 + ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0); 750 + 751 + /* disable interupt */ 752 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 753 + ~UVD_MASTINT_EN__VCPU_EN_MASK, 0); 754 + 755 + /* stall UMC and register bus before resetting VCPU */ 756 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 757 + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 758 + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); 759 + 760 + /* put LMI, VCPU, RBC etc... into reset */ 761 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 762 + (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | 763 + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | 764 + UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | 765 + UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | 766 + UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | 767 + UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | 768 + UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | 769 + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK)); 770 + 771 + /* initialize UVD memory controller */ 772 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL), 773 + (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 774 + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | 775 + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | 776 + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | 777 + UVD_LMI_CTRL__REQ_MODE_MASK | 778 + 0x00100000L)); 779 + 780 + /* disable byte swapping */ 781 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_SWAP_CNTL), 0); 782 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MP_SWAP_CNTL), 0); 783 + 784 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA0), 0x40c2040); 785 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA1), 0x0); 786 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB0), 0x40c2040); 787 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB1), 0x0); 788 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_ALU), 0); 789 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUX), 0x88); 790 + 791 + /* take all subblocks out of reset, except VCPU */ 792 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 793 + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); 794 + 795 + /* enable VCPU clock */ 796 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 797 + UVD_VCPU_CNTL__CLK_EN_MASK); 798 + 799 + /* enable UMC */ 800 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 801 + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); 802 + 803 + /* boot up the VCPU */ 804 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); 805 + 806 + MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02); 807 + 808 + /* enable master interrupt */ 809 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 810 + ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), 811 + (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); 812 + 813 + /* clear the bit 4 of UVD_STATUS */ 814 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 815 + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0); 816 + 817 + /* force RBC into idle state */ 818 + size = order_base_2(ring->ring_size); 819 + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size); 820 + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); 821 + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); 822 + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); 823 + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); 824 + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); 825 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); 826 + 827 + /* set the write pointer delay */ 828 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL), 0); 829 + 830 + /* set the wb address */ 831 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR), 832 + (upper_32_bits(ring->gpu_addr) >> 2)); 833 + 834 + /* programm the RB_BASE for ring buffer */ 835 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), 836 + lower_32_bits(ring->gpu_addr)); 837 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), 838 + upper_32_bits(ring->gpu_addr)); 839 + 840 + ring->wptr = 0; 841 + ring = &adev->uvd.ring_enc[0]; 842 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr); 843 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 844 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); 845 + 846 + /* add end packet */ 847 + memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 848 + table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 849 + header->uvd_table_size = table_size; 850 + 851 + return uvd_v7_0_mmsch_start(adev, &adev->virt.mm_table); 852 + } 853 + return -EINVAL; /* already initializaed ? */ 654 854 } 655 855 656 856 /** ··· 1304 1034 static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 1305 1035 unsigned vm_id, uint64_t pd_addr) 1306 1036 { 1037 + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 1307 1038 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 1308 1039 uint32_t data0, data1, mask; 1309 - unsigned eng = ring->idx; 1310 - unsigned i; 1040 + unsigned eng = ring->vm_inv_eng; 1311 1041 1312 1042 pd_addr = pd_addr | 0x1; /* valid bit */ 1313 1043 /* now only use physical base address of PDE and valid */ 1314 1044 BUG_ON(pd_addr & 0xFFFF00000000003EULL); 1315 1045 1316 - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 1317 - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; 1046 + data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; 1047 + data1 = upper_32_bits(pd_addr); 1048 + uvd_v7_0_vm_reg_write(ring, data0, data1); 1318 1049 1319 - data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; 1320 - data1 = upper_32_bits(pd_addr); 1321 - uvd_v7_0_vm_reg_write(ring, data0, data1); 1050 + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; 1051 + data1 = lower_32_bits(pd_addr); 1052 + uvd_v7_0_vm_reg_write(ring, data0, data1); 1322 1053 1323 - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; 1324 - data1 = lower_32_bits(pd_addr); 1325 - uvd_v7_0_vm_reg_write(ring, data0, data1); 1054 + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; 1055 + data1 = lower_32_bits(pd_addr); 1056 + mask = 0xffffffff; 1057 + uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); 1326 1058 1327 - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; 1328 - data1 = lower_32_bits(pd_addr); 1329 - mask = 0xffffffff; 1330 - uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); 1059 + /* flush TLB */ 1060 + data0 = (hub->vm_inv_eng0_req + eng) << 2; 1061 + data1 = req; 1062 + uvd_v7_0_vm_reg_write(ring, data0, data1); 1331 1063 1332 - /* flush TLB */ 1333 - data0 = (hub->vm_inv_eng0_req + eng) << 2; 1334 - data1 = req; 1335 - uvd_v7_0_vm_reg_write(ring, data0, data1); 1336 - 1337 - /* wait for flush */ 1338 - data0 = (hub->vm_inv_eng0_ack + eng) << 2; 1339 - data1 = 1 << vm_id; 1340 - mask = 1 << vm_id; 1341 - uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); 1342 - } 1064 + /* wait for flush */ 1065 + data0 = (hub->vm_inv_eng0_ack + eng) << 2; 1066 + data1 = 1 << vm_id; 1067 + mask = 1 << vm_id; 1068 + uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); 1343 1069 } 1344 1070 1345 1071 static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) ··· 1346 1080 static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, 1347 1081 unsigned int vm_id, uint64_t pd_addr) 1348 1082 { 1083 + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 1349 1084 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 1350 - unsigned eng = ring->idx; 1351 - unsigned i; 1085 + unsigned eng = ring->vm_inv_eng; 1352 1086 1353 1087 pd_addr = pd_addr | 0x1; /* valid bit */ 1354 1088 /* now only use physical base address of PDE and valid */ 1355 1089 BUG_ON(pd_addr & 0xFFFF00000000003EULL); 1356 1090 1357 - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 1358 - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; 1091 + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); 1092 + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); 1093 + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 1359 1094 1360 - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); 1361 - amdgpu_ring_write(ring, 1362 - (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); 1363 - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 1095 + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); 1096 + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 1097 + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 1364 1098 1365 - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); 1366 - amdgpu_ring_write(ring, 1367 - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 1368 - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 1099 + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); 1100 + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 1101 + amdgpu_ring_write(ring, 0xffffffff); 1102 + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 1369 1103 1370 - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); 1371 - amdgpu_ring_write(ring, 1372 - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 1373 - amdgpu_ring_write(ring, 0xffffffff); 1374 - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 1104 + /* flush TLB */ 1105 + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); 1106 + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); 1107 + amdgpu_ring_write(ring, req); 1375 1108 1376 - /* flush TLB */ 1377 - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); 1378 - amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); 1379 - amdgpu_ring_write(ring, req); 1380 - 1381 - /* wait for flush */ 1382 - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); 1383 - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 1384 - amdgpu_ring_write(ring, 1 << vm_id); 1385 - amdgpu_ring_write(ring, 1 << vm_id); 1386 - } 1109 + /* wait for flush */ 1110 + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); 1111 + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 1112 + amdgpu_ring_write(ring, 1 << vm_id); 1113 + amdgpu_ring_write(ring, 1 << vm_id); 1387 1114 } 1388 1115 1389 1116 #if 0 ··· 1499 1240 amdgpu_fence_process(&adev->uvd.ring_enc[0]); 1500 1241 break; 1501 1242 case 120: 1502 - amdgpu_fence_process(&adev->uvd.ring_enc[1]); 1243 + if (!amdgpu_sriov_vf(adev)) 1244 + amdgpu_fence_process(&adev->uvd.ring_enc[1]); 1503 1245 break; 1504 1246 default: 1505 1247 DRM_ERROR("Unhandled interrupt: %d %d\n", ··· 1708 1448 .align_mask = 0xf, 1709 1449 .nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0), 1710 1450 .support_64bit_ptrs = false, 1451 + .vmhub = AMDGPU_MMHUB, 1711 1452 .get_rptr = uvd_v7_0_ring_get_rptr, 1712 1453 .get_wptr = uvd_v7_0_ring_get_wptr, 1713 1454 .set_wptr = uvd_v7_0_ring_set_wptr, 1714 1455 .emit_frame_size = 1715 1456 2 + /* uvd_v7_0_ring_emit_hdp_flush */ 1716 1457 2 + /* uvd_v7_0_ring_emit_hdp_invalidate */ 1717 - 34 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_ring_emit_vm_flush */ 1458 + 34 + /* uvd_v7_0_ring_emit_vm_flush */ 1718 1459 14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */ 1719 1460 .emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */ 1720 1461 .emit_ib = uvd_v7_0_ring_emit_ib, ··· 1736 1475 .align_mask = 0x3f, 1737 1476 .nop = HEVC_ENC_CMD_NO_OP, 1738 1477 .support_64bit_ptrs = false, 1478 + .vmhub = AMDGPU_MMHUB, 1739 1479 .get_rptr = uvd_v7_0_enc_ring_get_rptr, 1740 1480 .get_wptr = uvd_v7_0_enc_ring_get_wptr, 1741 1481 .set_wptr = uvd_v7_0_enc_ring_set_wptr, 1742 1482 .emit_frame_size = 1743 - 17 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_enc_ring_emit_vm_flush */ 1483 + 17 + /* uvd_v7_0_enc_ring_emit_vm_flush */ 1744 1484 5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */ 1745 1485 1, /* uvd_v7_0_enc_ring_insert_end */ 1746 1486 .emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */
+75 -139
drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
··· 49 49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 50 50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 51 51 52 - static inline void mmsch_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt, 53 - uint32_t *init_table, 54 - uint32_t reg_offset, 55 - uint32_t value) 56 - { 57 - direct_wt->cmd_header.reg_offset = reg_offset; 58 - direct_wt->reg_value = value; 59 - memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write)); 60 - } 61 - 62 - static inline void mmsch_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt, 63 - uint32_t *init_table, 64 - uint32_t reg_offset, 65 - uint32_t mask, uint32_t data) 66 - { 67 - direct_rd_mod_wt->cmd_header.reg_offset = reg_offset; 68 - direct_rd_mod_wt->mask_value = mask; 69 - direct_rd_mod_wt->write_data = data; 70 - memcpy((void *)init_table, direct_rd_mod_wt, 71 - sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)); 72 - } 73 - 74 - static inline void mmsch_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll, 75 - uint32_t *init_table, 76 - uint32_t reg_offset, 77 - uint32_t mask, uint32_t wait) 78 - { 79 - direct_poll->cmd_header.reg_offset = reg_offset; 80 - direct_poll->mask_value = mask; 81 - direct_poll->wait_value = wait; 82 - memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling)); 83 - } 84 - 85 - #define INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \ 86 - mmsch_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \ 87 - init_table, (reg), \ 88 - (mask), (data)); \ 89 - init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ 90 - table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ 91 - } 92 - 93 - #define INSERT_DIRECT_WT(reg, value) { \ 94 - mmsch_insert_direct_wt(&direct_wt, \ 95 - init_table, (reg), \ 96 - (value)); \ 97 - init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ 98 - table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ 99 - } 100 - 101 - #define INSERT_DIRECT_POLL(reg, mask, wait) { \ 102 - mmsch_insert_direct_poll(&direct_poll, \ 103 - init_table, (reg), \ 104 - (mask), (wait)); \ 105 - init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ 106 - table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ 107 - } 108 - 109 52 /** 110 53 * vce_v4_0_ring_get_rptr - get read pointer 111 54 * ··· 223 280 init_table += header->vce_table_offset; 224 281 225 282 ring = &adev->vce.ring[0]; 226 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), ring->wptr); 227 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), ring->wptr); 228 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), lower_32_bits(ring->gpu_addr)); 229 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 230 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 283 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), 284 + lower_32_bits(ring->gpu_addr)); 285 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), 286 + upper_32_bits(ring->gpu_addr)); 287 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), 288 + ring->ring_size / 4); 231 289 232 290 /* BEGING OF MC_RESUME */ 233 - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), ~(1 << 16), 0); 234 - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), ~0xFF9FF000, 0x1FF000); 235 - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), ~0x3F, 0x3F); 236 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 291 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 292 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 293 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 294 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 295 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 237 296 238 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 239 - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 240 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 241 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 242 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 243 - 244 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->vce.gpu_addr >> 8); 245 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), adev->vce.gpu_addr >> 8); 246 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), adev->vce.gpu_addr >> 8); 297 + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 298 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 299 + adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 300 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 301 + adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 302 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 303 + adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 304 + } else { 305 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 306 + adev->vce.gpu_addr >> 8); 307 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 308 + adev->vce.gpu_addr >> 8); 309 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 310 + adev->vce.gpu_addr >> 8); 311 + } 247 312 248 313 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 249 314 size = VCE_V4_0_FW_SIZE; 250 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & 0x7FFFFFFF); 251 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 315 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 316 + offset & 0x7FFFFFFF); 317 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 252 318 253 319 offset += size; 254 320 size = VCE_V4_0_STACK_SIZE; 255 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), offset & 0x7FFFFFFF); 256 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 321 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), 322 + offset & 0x7FFFFFFF); 323 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 257 324 258 325 offset += size; 259 326 size = VCE_V4_0_DATA_SIZE; 260 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), offset & 0x7FFFFFFF); 261 - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 327 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), 328 + offset & 0x7FFFFFFF); 329 + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 262 330 263 - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 264 - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 265 - 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 331 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 332 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 333 + 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 266 334 267 335 /* end of MC_RESUME */ 268 - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 269 - ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 270 - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 271 - ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 336 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 337 + VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); 338 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 339 + ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 340 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 341 + ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 272 342 273 - INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 274 - VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 275 - VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 343 + MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 344 + VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 345 + VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 276 346 277 347 /* clear BUSY flag */ 278 - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 279 - ~VCE_STATUS__JOB_BUSY_MASK, 0); 348 + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 349 + ~VCE_STATUS__JOB_BUSY_MASK, 0); 280 350 281 351 /* add end packet */ 282 352 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); ··· 450 494 return r; 451 495 } 452 496 453 - if (amdgpu_sriov_vf(adev)) { 454 - r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, 455 - AMDGPU_GEM_DOMAIN_VRAM, 456 - &adev->virt.mm_table.bo, 457 - &adev->virt.mm_table.gpu_addr, 458 - (void *)&adev->virt.mm_table.cpu_addr); 459 - if (!r) { 460 - memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE); 461 - printk("mm table gpu addr = 0x%llx, cpu addr = %p. \n", 462 - adev->virt.mm_table.gpu_addr, 463 - adev->virt.mm_table.cpu_addr); 464 - } 497 + r = amdgpu_virt_alloc_mm_table(adev); 498 + if (r) 465 499 return r; 466 - } 467 500 468 501 return r; 469 502 } ··· 463 518 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 464 519 465 520 /* free MM table */ 466 - if (amdgpu_sriov_vf(adev)) 467 - amdgpu_bo_free_kernel(&adev->virt.mm_table.bo, 468 - &adev->virt.mm_table.gpu_addr, 469 - (void *)&adev->virt.mm_table.cpu_addr); 521 + amdgpu_virt_free_mm_table(adev); 470 522 471 523 r = amdgpu_vce_suspend(adev); 472 524 if (r) ··· 915 973 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 916 974 unsigned int vm_id, uint64_t pd_addr) 917 975 { 976 + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 918 977 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 919 - unsigned eng = ring->idx; 920 - unsigned i; 978 + unsigned eng = ring->vm_inv_eng; 921 979 922 980 pd_addr = pd_addr | 0x1; /* valid bit */ 923 981 /* now only use physical base address of PDE and valid */ 924 982 BUG_ON(pd_addr & 0xFFFF00000000003EULL); 925 983 926 - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 927 - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; 984 + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 985 + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); 986 + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 928 987 929 - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 930 - amdgpu_ring_write(ring, 931 - (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); 932 - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 988 + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 989 + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 990 + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 933 991 934 - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 935 - amdgpu_ring_write(ring, 936 - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 937 - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 992 + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 993 + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 994 + amdgpu_ring_write(ring, 0xffffffff); 995 + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 938 996 939 - amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 940 - amdgpu_ring_write(ring, 941 - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 942 - amdgpu_ring_write(ring, 0xffffffff); 943 - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 997 + /* flush TLB */ 998 + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 999 + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); 1000 + amdgpu_ring_write(ring, req); 944 1001 945 - /* flush TLB */ 946 - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 947 - amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); 948 - amdgpu_ring_write(ring, req); 949 - 950 - /* wait for flush */ 951 - amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 952 - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 953 - amdgpu_ring_write(ring, 1 << vm_id); 954 - amdgpu_ring_write(ring, 1 << vm_id); 955 - } 1002 + /* wait for flush */ 1003 + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 1004 + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 1005 + amdgpu_ring_write(ring, 1 << vm_id); 1006 + amdgpu_ring_write(ring, 1 << vm_id); 956 1007 } 957 1008 958 1009 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, ··· 1013 1078 .align_mask = 0x3f, 1014 1079 .nop = VCE_CMD_NO_OP, 1015 1080 .support_64bit_ptrs = false, 1081 + .vmhub = AMDGPU_MMHUB, 1016 1082 .get_rptr = vce_v4_0_ring_get_rptr, 1017 1083 .get_wptr = vce_v4_0_ring_get_wptr, 1018 1084 .set_wptr = vce_v4_0_ring_set_wptr, 1019 1085 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1020 1086 .emit_frame_size = 1021 - 17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */ 1087 + 17 + /* vce_v4_0_emit_vm_flush */ 1022 1088 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1023 1089 1, /* vce_v4_0_ring_insert_end */ 1024 1090 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
+6
drivers/gpu/drm/amd/include/amd_shared.h
··· 138 138 uint8_t down_hyst; 139 139 }; 140 140 141 + enum amd_fan_ctrl_mode { 142 + AMD_FAN_CTRL_NONE = 0, 143 + AMD_FAN_CTRL_MANUAL = 1, 144 + AMD_FAN_CTRL_AUTO = 2, 145 + }; 146 + 141 147 /* CG flags */ 142 148 #define AMD_CG_SUPPORT_GFX_MGCG (1 << 0) 143 149 #define AMD_CG_SUPPORT_GFX_MGLS (1 << 1)
-270
drivers/gpu/drm/amd/include/cgs_common.h
··· 54 54 }; 55 55 56 56 /** 57 - * enum cgs_clock - Clocks controlled by the SMU 58 - */ 59 - enum cgs_clock { 60 - CGS_CLOCK__SCLK, 61 - CGS_CLOCK__MCLK, 62 - CGS_CLOCK__VCLK, 63 - CGS_CLOCK__DCLK, 64 - CGS_CLOCK__ECLK, 65 - CGS_CLOCK__ACLK, 66 - CGS_CLOCK__ICLK, 67 - /* ... */ 68 - }; 69 - 70 - /** 71 57 * enum cgs_engine - Engines that can be statically power-gated 72 58 */ 73 59 enum cgs_engine { ··· 64 78 CGS_ENGINE__ACP_DSP0, 65 79 CGS_ENGINE__ACP_DSP1, 66 80 CGS_ENGINE__ISP, 67 - /* ... */ 68 - }; 69 - 70 - /** 71 - * enum cgs_voltage_planes - Voltage planes for external camera HW 72 - */ 73 - enum cgs_voltage_planes { 74 - CGS_VOLTAGE_PLANE__SENSOR0, 75 - CGS_VOLTAGE_PLANE__SENSOR1, 76 81 /* ... */ 77 82 }; 78 83 ··· 121 144 CGS_RESOURCE_TYPE_IO, 122 145 CGS_RESOURCE_TYPE_DOORBELL, 123 146 CGS_RESOURCE_TYPE_ROM, 124 - }; 125 - 126 - /** 127 - * struct cgs_clock_limits - Clock limits 128 - * 129 - * Clocks are specified in 10KHz units. 130 - */ 131 - struct cgs_clock_limits { 132 - unsigned min; /**< Minimum supported frequency */ 133 - unsigned max; /**< Maxumim supported frequency */ 134 - unsigned sustainable; /**< Thermally sustainable frequency */ 135 147 }; 136 148 137 149 /** ··· 185 219 struct cgs_acpi_method_argument *poutput_argument; 186 220 uint32_t padding[9]; 187 221 }; 188 - 189 - /** 190 - * cgs_gpu_mem_info() - Return information about memory heaps 191 - * @cgs_device: opaque device handle 192 - * @type: memory type 193 - * @mc_start: Start MC address of the heap (output) 194 - * @mc_size: MC address space size (output) 195 - * @mem_size: maximum amount of memory available for allocation (output) 196 - * 197 - * This function returns information about memory heaps. The type 198 - * parameter is used to select the memory heap. The mc_start and 199 - * mc_size for GART heaps may be bigger than the memory available for 200 - * allocation. 201 - * 202 - * mc_start and mc_size are undefined for non-contiguous FB memory 203 - * types, since buffers allocated with these types may or may not be 204 - * GART mapped. 205 - * 206 - * Return: 0 on success, -errno otherwise 207 - */ 208 - typedef int (*cgs_gpu_mem_info_t)(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type, 209 - uint64_t *mc_start, uint64_t *mc_size, 210 - uint64_t *mem_size); 211 - 212 - /** 213 - * cgs_gmap_kmem() - map kernel memory to GART aperture 214 - * @cgs_device: opaque device handle 215 - * @kmem: pointer to kernel memory 216 - * @size: size to map 217 - * @min_offset: minimum offset from start of GART aperture 218 - * @max_offset: maximum offset from start of GART aperture 219 - * @kmem_handle: kernel memory handle (output) 220 - * @mcaddr: MC address (output) 221 - * 222 - * Return: 0 on success, -errno otherwise 223 - */ 224 - typedef int (*cgs_gmap_kmem_t)(struct cgs_device *cgs_device, void *kmem, uint64_t size, 225 - uint64_t min_offset, uint64_t max_offset, 226 - cgs_handle_t *kmem_handle, uint64_t *mcaddr); 227 - 228 - /** 229 - * cgs_gunmap_kmem() - unmap kernel memory 230 - * @cgs_device: opaque device handle 231 - * @kmem_handle: kernel memory handle returned by gmap_kmem 232 - * 233 - * Return: 0 on success, -errno otherwise 234 - */ 235 - typedef int (*cgs_gunmap_kmem_t)(struct cgs_device *cgs_device, cgs_handle_t kmem_handle); 236 222 237 223 /** 238 224 * cgs_alloc_gpu_mem() - Allocate GPU memory ··· 310 392 unsigned index, uint32_t value); 311 393 312 394 /** 313 - * cgs_read_pci_config_byte() - Read byte from PCI configuration space 314 - * @cgs_device: opaque device handle 315 - * @addr: address 316 - * 317 - * Return: Value read 318 - */ 319 - typedef uint8_t (*cgs_read_pci_config_byte_t)(struct cgs_device *cgs_device, unsigned addr); 320 - 321 - /** 322 - * cgs_read_pci_config_word() - Read word from PCI configuration space 323 - * @cgs_device: opaque device handle 324 - * @addr: address, must be word-aligned 325 - * 326 - * Return: Value read 327 - */ 328 - typedef uint16_t (*cgs_read_pci_config_word_t)(struct cgs_device *cgs_device, unsigned addr); 329 - 330 - /** 331 - * cgs_read_pci_config_dword() - Read dword from PCI configuration space 332 - * @cgs_device: opaque device handle 333 - * @addr: address, must be dword-aligned 334 - * 335 - * Return: Value read 336 - */ 337 - typedef uint32_t (*cgs_read_pci_config_dword_t)(struct cgs_device *cgs_device, 338 - unsigned addr); 339 - 340 - /** 341 - * cgs_write_pci_config_byte() - Write byte to PCI configuration space 342 - * @cgs_device: opaque device handle 343 - * @addr: address 344 - * @value: value to write 345 - */ 346 - typedef void (*cgs_write_pci_config_byte_t)(struct cgs_device *cgs_device, unsigned addr, 347 - uint8_t value); 348 - 349 - /** 350 - * cgs_write_pci_config_word() - Write byte to PCI configuration space 351 - * @cgs_device: opaque device handle 352 - * @addr: address, must be word-aligned 353 - * @value: value to write 354 - */ 355 - typedef void (*cgs_write_pci_config_word_t)(struct cgs_device *cgs_device, unsigned addr, 356 - uint16_t value); 357 - 358 - /** 359 - * cgs_write_pci_config_dword() - Write byte to PCI configuration space 360 - * @cgs_device: opaque device handle 361 - * @addr: address, must be dword-aligned 362 - * @value: value to write 363 - */ 364 - typedef void (*cgs_write_pci_config_dword_t)(struct cgs_device *cgs_device, unsigned addr, 365 - uint32_t value); 366 - 367 - 368 - /** 369 395 * cgs_get_pci_resource() - provide access to a device resource (PCI BAR) 370 396 * @cgs_device: opaque device handle 371 397 * @resource_type: Type of Resource (MMIO, IO, ROM, FB, DOORBELL) ··· 363 501 unsigned table, void *args); 364 502 365 503 /** 366 - * cgs_create_pm_request() - Create a power management request 367 - * @cgs_device: opaque device handle 368 - * @request: handle of created PM request (output) 369 - * 370 - * Return: 0 on success, -errno otherwise 371 - */ 372 - typedef int (*cgs_create_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t *request); 373 - 374 - /** 375 - * cgs_destroy_pm_request() - Destroy a power management request 376 - * @cgs_device: opaque device handle 377 - * @request: handle of created PM request 378 - * 379 - * Return: 0 on success, -errno otherwise 380 - */ 381 - typedef int (*cgs_destroy_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t request); 382 - 383 - /** 384 - * cgs_set_pm_request() - Activate or deactiveate a PM request 385 - * @cgs_device: opaque device handle 386 - * @request: PM request handle 387 - * @active: 0 = deactivate, non-0 = activate 388 - * 389 - * While a PM request is active, its minimum clock requests are taken 390 - * into account as the requested engines are powered up. When the 391 - * request is inactive, the engines may be powered down and clocks may 392 - * be lower, depending on other PM requests by other driver 393 - * components. 394 - * 395 - * Return: 0 on success, -errno otherwise 396 - */ 397 - typedef int (*cgs_set_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t request, 398 - int active); 399 - 400 - /** 401 - * cgs_pm_request_clock() - Request a minimum frequency for a specific clock 402 - * @cgs_device: opaque device handle 403 - * @request: PM request handle 404 - * @clock: which clock? 405 - * @freq: requested min. frequency in 10KHz units (0 to clear request) 406 - * 407 - * Return: 0 on success, -errno otherwise 408 - */ 409 - typedef int (*cgs_pm_request_clock_t)(struct cgs_device *cgs_device, cgs_handle_t request, 410 - enum cgs_clock clock, unsigned freq); 411 - 412 - /** 413 - * cgs_pm_request_engine() - Request an engine to be powered up 414 - * @cgs_device: opaque device handle 415 - * @request: PM request handle 416 - * @engine: which engine? 417 - * @powered: 0 = powered down, non-0 = powered up 418 - * 419 - * Return: 0 on success, -errno otherwise 420 - */ 421 - typedef int (*cgs_pm_request_engine_t)(struct cgs_device *cgs_device, cgs_handle_t request, 422 - enum cgs_engine engine, int powered); 423 - 424 - /** 425 - * cgs_pm_query_clock_limits() - Query clock frequency limits 426 - * @cgs_device: opaque device handle 427 - * @clock: which clock? 428 - * @limits: clock limits 429 - * 430 - * Return: 0 on success, -errno otherwise 431 - */ 432 - typedef int (*cgs_pm_query_clock_limits_t)(struct cgs_device *cgs_device, 433 - enum cgs_clock clock, 434 - struct cgs_clock_limits *limits); 435 - 436 - /** 437 - * cgs_set_camera_voltages() - Apply specific voltages to PMIC voltage planes 438 - * @cgs_device: opaque device handle 439 - * @mask: bitmask of voltages to change (1<<CGS_VOLTAGE_PLANE__xyz|...) 440 - * @voltages: pointer to array of voltage values in 1mV units 441 - * 442 - * Return: 0 on success, -errno otherwise 443 - */ 444 - typedef int (*cgs_set_camera_voltages_t)(struct cgs_device *cgs_device, uint32_t mask, 445 - const uint32_t *voltages); 446 - /** 447 504 * cgs_get_firmware_info - Get the firmware information from core driver 448 505 * @cgs_device: opaque device handle 449 506 * @type: the firmware type ··· 408 627 409 628 struct cgs_ops { 410 629 /* memory management calls (similar to KFD interface) */ 411 - cgs_gpu_mem_info_t gpu_mem_info; 412 - cgs_gmap_kmem_t gmap_kmem; 413 - cgs_gunmap_kmem_t gunmap_kmem; 414 630 cgs_alloc_gpu_mem_t alloc_gpu_mem; 415 631 cgs_free_gpu_mem_t free_gpu_mem; 416 632 cgs_gmap_gpu_mem_t gmap_gpu_mem; ··· 419 641 cgs_write_register_t write_register; 420 642 cgs_read_ind_register_t read_ind_register; 421 643 cgs_write_ind_register_t write_ind_register; 422 - /* PCI configuration space access */ 423 - cgs_read_pci_config_byte_t read_pci_config_byte; 424 - cgs_read_pci_config_word_t read_pci_config_word; 425 - cgs_read_pci_config_dword_t read_pci_config_dword; 426 - cgs_write_pci_config_byte_t write_pci_config_byte; 427 - cgs_write_pci_config_word_t write_pci_config_word; 428 - cgs_write_pci_config_dword_t write_pci_config_dword; 429 644 /* PCI resources */ 430 645 cgs_get_pci_resource_t get_pci_resource; 431 646 /* ATOM BIOS */ 432 647 cgs_atom_get_data_table_t atom_get_data_table; 433 648 cgs_atom_get_cmd_table_revs_t atom_get_cmd_table_revs; 434 649 cgs_atom_exec_cmd_table_t atom_exec_cmd_table; 435 - /* Power management */ 436 - cgs_create_pm_request_t create_pm_request; 437 - cgs_destroy_pm_request_t destroy_pm_request; 438 - cgs_set_pm_request_t set_pm_request; 439 - cgs_pm_request_clock_t pm_request_clock; 440 - cgs_pm_request_engine_t pm_request_engine; 441 - cgs_pm_query_clock_limits_t pm_query_clock_limits; 442 - cgs_set_camera_voltages_t set_camera_voltages; 443 650 /* Firmware Info */ 444 651 cgs_get_firmware_info get_firmware_info; 445 652 cgs_rel_firmware rel_firmware; ··· 459 696 #define CGS_OS_CALL(func,dev,...) \ 460 697 (((struct cgs_device *)dev)->os_ops->func(dev, ##__VA_ARGS__)) 461 698 462 - #define cgs_gpu_mem_info(dev,type,mc_start,mc_size,mem_size) \ 463 - CGS_CALL(gpu_mem_info,dev,type,mc_start,mc_size,mem_size) 464 - #define cgs_gmap_kmem(dev,kmem,size,min_off,max_off,kmem_handle,mcaddr) \ 465 - CGS_CALL(gmap_kmem,dev,kmem,size,min_off,max_off,kmem_handle,mcaddr) 466 - #define cgs_gunmap_kmem(dev,kmem_handle) \ 467 - CGS_CALL(gunmap_kmem,dev,keme_handle) 468 699 #define cgs_alloc_gpu_mem(dev,type,size,align,min_off,max_off,handle) \ 469 700 CGS_CALL(alloc_gpu_mem,dev,type,size,align,min_off,max_off,handle) 470 701 #define cgs_free_gpu_mem(dev,handle) \ ··· 481 724 #define cgs_write_ind_register(dev,space,index,value) \ 482 725 CGS_CALL(write_ind_register,dev,space,index,value) 483 726 484 - #define cgs_read_pci_config_byte(dev,addr) \ 485 - CGS_CALL(read_pci_config_byte,dev,addr) 486 - #define cgs_read_pci_config_word(dev,addr) \ 487 - CGS_CALL(read_pci_config_word,dev,addr) 488 - #define cgs_read_pci_config_dword(dev,addr) \ 489 - CGS_CALL(read_pci_config_dword,dev,addr) 490 - #define cgs_write_pci_config_byte(dev,addr,value) \ 491 - CGS_CALL(write_pci_config_byte,dev,addr,value) 492 - #define cgs_write_pci_config_word(dev,addr,value) \ 493 - CGS_CALL(write_pci_config_word,dev,addr,value) 494 - #define cgs_write_pci_config_dword(dev,addr,value) \ 495 - CGS_CALL(write_pci_config_dword,dev,addr,value) 496 - 497 727 #define cgs_atom_get_data_table(dev,table,size,frev,crev) \ 498 728 CGS_CALL(atom_get_data_table,dev,table,size,frev,crev) 499 729 #define cgs_atom_get_cmd_table_revs(dev,table,frev,crev) \ ··· 488 744 #define cgs_atom_exec_cmd_table(dev,table,args) \ 489 745 CGS_CALL(atom_exec_cmd_table,dev,table,args) 490 746 491 - #define cgs_create_pm_request(dev,request) \ 492 - CGS_CALL(create_pm_request,dev,request) 493 - #define cgs_destroy_pm_request(dev,request) \ 494 - CGS_CALL(destroy_pm_request,dev,request) 495 - #define cgs_set_pm_request(dev,request,active) \ 496 - CGS_CALL(set_pm_request,dev,request,active) 497 - #define cgs_pm_request_clock(dev,request,clock,freq) \ 498 - CGS_CALL(pm_request_clock,dev,request,clock,freq) 499 - #define cgs_pm_request_engine(dev,request,engine,powered) \ 500 - CGS_CALL(pm_request_engine,dev,request,engine,powered) 501 - #define cgs_pm_query_clock_limits(dev,clock,limits) \ 502 - CGS_CALL(pm_query_clock_limits,dev,clock,limits) 503 - #define cgs_set_camera_voltages(dev,mask,voltages) \ 504 - CGS_CALL(set_camera_voltages,dev,mask,voltages) 505 747 #define cgs_get_firmware_info(dev, type, info) \ 506 748 CGS_CALL(get_firmware_info, dev, type, info) 507 749 #define cgs_rel_firmware(dev, type) \
+3 -1
drivers/gpu/drm/amd/powerplay/amd_powerplay.c
··· 251 251 252 252 ret = pp_check(pp_handle); 253 253 254 - if (ret != 0) 254 + if (ret == PP_DPM_DISABLED) 255 + return 0; 256 + else if (ret != 0) 255 257 return ret; 256 258 257 259 eventmgr = pp_handle->eventmgr;
+1 -1
drivers/gpu/drm/amd/powerplay/eventmgr/eventsubchains.c
··· 219 219 }; 220 220 221 221 const pem_event_action disable_smc_firmware_ctf_tasks[] = { 222 - /* PEM_Task_DisableSMCFirmwareCTF,*/ 222 + pem_task_disable_smc_firmware_ctf, 223 223 NULL 224 224 }; 225 225
+5
drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
··· 173 173 return 0; 174 174 } 175 175 176 + int pem_task_disable_smc_firmware_ctf(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data) 177 + { 178 + return phm_disable_smc_firmware_ctf(eventmgr->hwmgr); 179 + } 180 + 176 181 int pem_task_setup_asic(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data) 177 182 { 178 183 return phm_setup_asic(eventmgr->hwmgr);
+1
drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.h
··· 84 84 /*thermal */ 85 85 int pem_task_initialize_thermal_controller(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data); 86 86 int pem_task_uninitialize_thermal_controller(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data); 87 + int pem_task_disable_smc_firmware_ctf(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data); 87 88 88 89 #endif /* _EVENT_TASKS_H_ */
+10
drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
··· 501 501 502 502 return hwmgr->hwmgr_func->get_max_high_clocks(hwmgr, clocks); 503 503 } 504 + 505 + int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr) 506 + { 507 + PHM_FUNC_CHECK(hwmgr); 508 + 509 + if (hwmgr->hwmgr_func->disable_smc_firmware_ctf == NULL) 510 + return -EINVAL; 511 + 512 + return hwmgr->hwmgr_func->disable_smc_firmware_ctf(hwmgr); 513 + }
+35 -14
drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
··· 314 314 le32_to_cpu(profile->gb_vdroop_table_ckson_a2); 315 315 param->ulGbFuseTableCksoffM1 = 316 316 le32_to_cpu(profile->avfsgb_fuse_table_cksoff_m1); 317 - param->usGbFuseTableCksoffM2 = 317 + param->ulGbFuseTableCksoffM2 = 318 318 le16_to_cpu(profile->avfsgb_fuse_table_cksoff_m2); 319 319 param->ulGbFuseTableCksoffB = 320 320 le32_to_cpu(profile->avfsgb_fuse_table_cksoff_b); 321 321 param->ulGbFuseTableCksonM1 = 322 322 le32_to_cpu(profile->avfsgb_fuse_table_ckson_m1); 323 - param->usGbFuseTableCksonM2 = 323 + param->ulGbFuseTableCksonM2 = 324 324 le16_to_cpu(profile->avfsgb_fuse_table_ckson_m2); 325 325 param->ulGbFuseTableCksonB = 326 326 le32_to_cpu(profile->avfsgb_fuse_table_ckson_b); 327 - param->usMaxVoltage025mv = 328 - le16_to_cpu(profile->max_voltage_0_25mv); 329 - param->ucEnableGbVdroopTableCksoff = 330 - profile->enable_gb_vdroop_table_cksoff; 327 + 331 328 param->ucEnableGbVdroopTableCkson = 332 329 profile->enable_gb_vdroop_table_ckson; 333 - param->ucEnableGbFuseTableCksoff = 334 - profile->enable_gb_fuse_table_cksoff; 335 330 param->ucEnableGbFuseTableCkson = 336 331 profile->enable_gb_fuse_table_ckson; 337 332 param->usPsmAgeComfactor = 338 333 le16_to_cpu(profile->psm_age_comfactor); 339 - param->ucEnableApplyAvfsCksoffVoltage = 340 - profile->enable_apply_avfs_cksoff_voltage; 341 334 342 335 param->ulDispclk2GfxclkM1 = 343 336 le32_to_cpu(profile->dispclk2gfxclk_a); 344 - param->usDispclk2GfxclkM2 = 337 + param->ulDispclk2GfxclkM2 = 345 338 le16_to_cpu(profile->dispclk2gfxclk_b); 346 339 param->ulDispclk2GfxclkB = 347 340 le32_to_cpu(profile->dispclk2gfxclk_c); 348 341 param->ulDcefclk2GfxclkM1 = 349 342 le32_to_cpu(profile->dcefclk2gfxclk_a); 350 - param->usDcefclk2GfxclkM2 = 343 + param->ulDcefclk2GfxclkM2 = 351 344 le16_to_cpu(profile->dcefclk2gfxclk_b); 352 345 param->ulDcefclk2GfxclkB = 353 346 le32_to_cpu(profile->dcefclk2gfxclk_c); 354 347 param->ulPixelclk2GfxclkM1 = 355 348 le32_to_cpu(profile->pixclk2gfxclk_a); 356 - param->usPixelclk2GfxclkM2 = 349 + param->ulPixelclk2GfxclkM2 = 357 350 le16_to_cpu(profile->pixclk2gfxclk_b); 358 351 param->ulPixelclk2GfxclkB = 359 352 le32_to_cpu(profile->pixclk2gfxclk_c); 360 353 param->ulPhyclk2GfxclkM1 = 361 354 le32_to_cpu(profile->phyclk2gfxclk_a); 362 - param->usPhyclk2GfxclkM2 = 355 + param->ulPhyclk2GfxclkM2 = 363 356 le16_to_cpu(profile->phyclk2gfxclk_b); 364 357 param->ulPhyclk2GfxclkB = 365 358 le32_to_cpu(profile->phyclk2gfxclk_c); ··· 384 391 param->ucVR1HotPolarity = info->vr1hot_polarity; 385 392 param->ucFwCtfGpio = info->fw_ctf_gpio_bit; 386 393 param->ucFwCtfPolarity = info->fw_ctf_polarity; 394 + 395 + return 0; 396 + } 397 + 398 + int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr, 399 + struct pp_atomfwctrl_bios_boot_up_values *boot_values) 400 + { 401 + struct atom_firmware_info_v3_1 *info = NULL; 402 + uint16_t ix; 403 + 404 + ix = GetIndexIntoMasterDataTable(firmwareinfo); 405 + info = (struct atom_firmware_info_v3_1 *) 406 + cgs_atom_get_data_table(hwmgr->device, 407 + ix, NULL, NULL, NULL); 408 + 409 + if (!info) { 410 + pr_info("Error retrieving BIOS firmwareinfo!"); 411 + return -EINVAL; 412 + } 413 + 414 + boot_values->ulRevision = info->firmware_revision; 415 + boot_values->ulGfxClk = info->bootup_sclk_in10khz; 416 + boot_values->ulUClk = info->bootup_mclk_in10khz; 417 + boot_values->ulSocClk = 0; 418 + boot_values->usVddc = info->bootup_vddc_mv; 419 + boot_values->usVddci = info->bootup_vddci_mv; 420 + boot_values->usMvddc = info->bootup_mvddc_mv; 421 + boot_values->usVddGfx = info->bootup_vddgfx_mv; 387 422 388 423 return 0; 389 424 }
+27 -12
drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
··· 69 69 struct pp_atomfwctrl_avfs_parameters { 70 70 uint32_t ulMaxVddc; 71 71 uint32_t ulMinVddc; 72 - uint8_t ucMaxVidStep; 72 + 73 73 uint32_t ulMeanNsigmaAcontant0; 74 74 uint32_t ulMeanNsigmaAcontant1; 75 75 uint32_t ulMeanNsigmaAcontant2; ··· 82 82 uint32_t ulGbVdroopTableCksonA0; 83 83 uint32_t ulGbVdroopTableCksonA1; 84 84 uint32_t ulGbVdroopTableCksonA2; 85 + 85 86 uint32_t ulGbFuseTableCksoffM1; 86 - uint16_t usGbFuseTableCksoffM2; 87 - uint32_t ulGbFuseTableCksoffB;\ 87 + uint32_t ulGbFuseTableCksoffM2; 88 + uint32_t ulGbFuseTableCksoffB; 89 + 88 90 uint32_t ulGbFuseTableCksonM1; 89 - uint16_t usGbFuseTableCksonM2; 91 + uint32_t ulGbFuseTableCksonM2; 90 92 uint32_t ulGbFuseTableCksonB; 91 - uint16_t usMaxVoltage025mv; 92 - uint8_t ucEnableGbVdroopTableCksoff; 93 + 93 94 uint8_t ucEnableGbVdroopTableCkson; 94 - uint8_t ucEnableGbFuseTableCksoff; 95 95 uint8_t ucEnableGbFuseTableCkson; 96 96 uint16_t usPsmAgeComfactor; 97 - uint8_t ucEnableApplyAvfsCksoffVoltage; 97 + 98 98 uint32_t ulDispclk2GfxclkM1; 99 - uint16_t usDispclk2GfxclkM2; 99 + uint32_t ulDispclk2GfxclkM2; 100 100 uint32_t ulDispclk2GfxclkB; 101 101 uint32_t ulDcefclk2GfxclkM1; 102 - uint16_t usDcefclk2GfxclkM2; 102 + uint32_t ulDcefclk2GfxclkM2; 103 103 uint32_t ulDcefclk2GfxclkB; 104 104 uint32_t ulPixelclk2GfxclkM1; 105 - uint16_t usPixelclk2GfxclkM2; 105 + uint32_t ulPixelclk2GfxclkM2; 106 106 uint32_t ulPixelclk2GfxclkB; 107 107 uint32_t ulPhyclk2GfxclkM1; 108 - uint16_t usPhyclk2GfxclkM2; 108 + uint32_t ulPhyclk2GfxclkM2; 109 109 uint32_t ulPhyclk2GfxclkB; 110 110 }; 111 111 ··· 119 119 uint8_t ucFwCtfGpio; 120 120 uint8_t ucFwCtfPolarity; 121 121 }; 122 + 123 + struct pp_atomfwctrl_bios_boot_up_values { 124 + uint32_t ulRevision; 125 + uint32_t ulGfxClk; 126 + uint32_t ulUClk; 127 + uint32_t ulSocClk; 128 + uint16_t usVddc; 129 + uint16_t usVddci; 130 + uint16_t usMvddc; 131 + uint16_t usVddGfx; 132 + }; 133 + 122 134 int pp_atomfwctrl_get_gpu_pll_dividers_vega10(struct pp_hwmgr *hwmgr, 123 135 uint32_t clock_type, uint32_t clock_value, 124 136 struct pp_atomfwctrl_clock_dividers_soc15 *dividers); ··· 147 135 struct pp_atomfwctrl_avfs_parameters *param); 148 136 int pp_atomfwctrl_get_gpio_information(struct pp_hwmgr *hwmgr, 149 137 struct pp_atomfwctrl_gpio_parameters *param); 138 + 139 + int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr, 140 + struct pp_atomfwctrl_bios_boot_up_values *boot_values); 150 141 151 142 #endif 152 143
+21 -43
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
··· 4334 4334 4335 4335 static int smu7_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode) 4336 4336 { 4337 - if (mode) { 4338 - /* stop auto-manage */ 4339 - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, 4340 - PHM_PlatformCaps_MicrocodeFanControl)) 4341 - smu7_fan_ctrl_stop_smc_fan_control(hwmgr); 4342 - smu7_fan_ctrl_set_static_mode(hwmgr, mode); 4343 - } else 4344 - /* restart auto-manage */ 4345 - smu7_fan_ctrl_reset_fan_speed_to_default(hwmgr); 4337 + int result = 0; 4346 4338 4347 - return 0; 4339 + switch (mode) { 4340 + case AMD_FAN_CTRL_NONE: 4341 + result = smu7_fan_ctrl_set_fan_speed_percent(hwmgr, 100); 4342 + break; 4343 + case AMD_FAN_CTRL_MANUAL: 4344 + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, 4345 + PHM_PlatformCaps_MicrocodeFanControl)) 4346 + result = smu7_fan_ctrl_stop_smc_fan_control(hwmgr); 4347 + break; 4348 + case AMD_FAN_CTRL_AUTO: 4349 + result = smu7_fan_ctrl_set_static_mode(hwmgr, mode); 4350 + if (!result) 4351 + result = smu7_fan_ctrl_start_smc_fan_control(hwmgr); 4352 + break; 4353 + default: 4354 + break; 4355 + } 4356 + return result; 4348 4357 } 4349 4358 4350 4359 static int smu7_get_fan_control_mode(struct pp_hwmgr *hwmgr) 4351 4360 { 4352 - if (hwmgr->fan_ctrl_is_in_default_mode) 4353 - return hwmgr->fan_ctrl_default_mode; 4354 - else 4355 - return PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, 4356 - CG_FDO_CTRL2, FDO_PWM_MODE); 4361 + return hwmgr->fan_ctrl_enabled ? AMD_FAN_CTRL_AUTO : AMD_FAN_CTRL_MANUAL; 4357 4362 } 4358 4363 4359 4364 static int smu7_get_sclk_od(struct pp_hwmgr *hwmgr) ··· 4527 4522 return 0; 4528 4523 } 4529 4524 4530 - static int smu7_request_firmware(struct pp_hwmgr *hwmgr) 4531 - { 4532 - int ret; 4533 - struct cgs_firmware_info info = {0}; 4534 - 4535 - ret = cgs_get_firmware_info(hwmgr->device, 4536 - smu7_convert_fw_type_to_cgs(UCODE_ID_SMU), 4537 - &info); 4538 - if (ret || !info.kptr) 4539 - return -EINVAL; 4540 - 4541 - return 0; 4542 - } 4543 - 4544 - static int smu7_release_firmware(struct pp_hwmgr *hwmgr) 4545 - { 4546 - int ret; 4547 - 4548 - ret = cgs_rel_firmware(hwmgr->device, 4549 - smu7_convert_fw_type_to_cgs(UCODE_ID_SMU)); 4550 - if (ret) 4551 - return -EINVAL; 4552 - 4553 - return 0; 4554 - } 4555 - 4556 4525 static void smu7_find_min_clock_masks(struct pp_hwmgr *hwmgr, 4557 4526 uint32_t *sclk_mask, uint32_t *mclk_mask, 4558 4527 uint32_t min_sclk, uint32_t min_mclk) ··· 4670 4691 .get_clock_by_type = smu7_get_clock_by_type, 4671 4692 .read_sensor = smu7_read_sensor, 4672 4693 .dynamic_state_management_disable = smu7_disable_dpm_tasks, 4673 - .request_firmware = smu7_request_firmware, 4674 - .release_firmware = smu7_release_firmware, 4675 4694 .set_power_profile_state = smu7_set_power_profile_state, 4676 4695 .avfs_control = smu7_avfs_control, 4696 + .disable_smc_firmware_ctf = smu7_thermal_disable_alert, 4677 4697 }; 4678 4698 4679 4699 uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock,
+5 -4
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c
··· 112 112 */ 113 113 int smu7_fan_ctrl_set_static_mode(struct pp_hwmgr *hwmgr, uint32_t mode) 114 114 { 115 - 116 115 if (hwmgr->fan_ctrl_is_in_default_mode) { 117 116 hwmgr->fan_ctrl_default_mode = 118 - PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, 117 + PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, 119 118 CG_FDO_CTRL2, FDO_PWM_MODE); 120 119 hwmgr->tmin = 121 120 PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, ··· 148 149 return 0; 149 150 } 150 151 151 - static int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr) 152 + int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr) 152 153 { 153 154 int result; 154 155 ··· 178 179 PPSMC_MSG_SetFanTemperatureTarget, 179 180 hwmgr->thermal_controller. 180 181 advanceFanControlParameters.ucTargetTemperature); 182 + hwmgr->fan_ctrl_enabled = true; 181 183 182 184 return result; 183 185 } ··· 186 186 187 187 int smu7_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr) 188 188 { 189 + hwmgr->fan_ctrl_enabled = false; 189 190 return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_StopFanControl); 190 191 } 191 192 ··· 281 280 PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, 282 281 CG_TACH_STATUS, TACH_PERIOD, tach_period); 283 282 284 - return smu7_fan_ctrl_set_static_mode(hwmgr, FDO_PWM_MODE_STATIC); 283 + return smu7_fan_ctrl_set_static_mode(hwmgr, FDO_PWM_MODE_STATIC_RPM); 285 284 } 286 285 287 286 /**
+1 -1
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.h
··· 54 54 extern int smu7_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr); 55 55 extern int smu7_thermal_enable_alert(struct pp_hwmgr *hwmgr); 56 56 extern int smu7_thermal_disable_alert(struct pp_hwmgr *hwmgr); 57 - 57 + extern int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr); 58 58 #endif 59 59
+252 -160
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
··· 111 111 hwmgr->feature_mask & PP_SOCCLK_DPM_MASK ? false : true; 112 112 data->registry_data.mclk_dpm_key_disabled = 113 113 hwmgr->feature_mask & PP_MCLK_DPM_MASK ? false : true; 114 + data->registry_data.pcie_dpm_key_disabled = 115 + hwmgr->feature_mask & PP_PCIE_DPM_MASK ? false : true; 114 116 115 117 data->registry_data.dcefclk_dpm_key_disabled = 116 118 hwmgr->feature_mask & PP_DCEFCLK_DPM_MASK ? false : true; ··· 123 121 data->registry_data.enable_tdc_limit_feature = 1; 124 122 } 125 123 126 - data->registry_data.pcie_dpm_key_disabled = 1; 124 + data->registry_data.clock_stretcher_support = 125 + hwmgr->feature_mask & PP_CLOCK_STRETCH_MASK ? false : true; 126 + 127 127 data->registry_data.disable_water_mark = 0; 128 128 129 129 data->registry_data.fan_control_support = 1; ··· 1137 1133 int i; 1138 1134 1139 1135 for (i = 0; i < dep_table->count; i++) { 1140 - if (i == 0 || dpm_table->dpm_levels[dpm_table->count - 1].value != 1136 + if (i == 0 || dpm_table->dpm_levels[dpm_table->count - 1].value <= 1141 1137 dep_table->entries[i].clk) { 1142 1138 dpm_table->dpm_levels[dpm_table->count].value = 1143 1139 dep_table->entries[i].clk; ··· 1182 1178 else 1183 1179 pcie_table->lclk[i] = 1184 1180 bios_pcie_table->entries[i].pcie_sclk; 1185 - 1186 - pcie_table->count++; 1187 1181 } 1188 1182 1189 - if (data->registry_data.pcieSpeedOverride) 1190 - pcie_table->pcie_gen[i] = data->registry_data.pcieSpeedOverride; 1191 - else 1192 - pcie_table->pcie_gen[i] = 1193 - bios_pcie_table->entries[bios_pcie_table->count - 1].gen_speed; 1194 - 1195 - if (data->registry_data.pcieLaneOverride) 1196 - pcie_table->pcie_lane[i] = data->registry_data.pcieLaneOverride; 1197 - else 1198 - pcie_table->pcie_lane[i] = 1199 - bios_pcie_table->entries[bios_pcie_table->count - 1].lane_width; 1200 - 1201 - if (data->registry_data.pcieClockOverride) 1202 - pcie_table->lclk[i] = data->registry_data.pcieClockOverride; 1203 - else 1204 - pcie_table->lclk[i] = 1205 - bios_pcie_table->entries[bios_pcie_table->count - 1].pcie_sclk; 1206 - 1207 - pcie_table->count++; 1183 + pcie_table->count = NUM_LINK_LEVELS; 1208 1184 1209 1185 return 0; 1210 1186 } ··· 1274 1290 dpm_table = &(data->dpm_table.eclk_table); 1275 1291 for (i = 0; i < dep_mm_table->count; i++) { 1276 1292 if (i == 0 || dpm_table->dpm_levels 1277 - [dpm_table->count - 1].value != 1293 + [dpm_table->count - 1].value <= 1278 1294 dep_mm_table->entries[i].eclk) { 1279 1295 dpm_table->dpm_levels[dpm_table->count].value = 1280 1296 dep_mm_table->entries[i].eclk; ··· 1290 1306 dpm_table = &(data->dpm_table.vclk_table); 1291 1307 for (i = 0; i < dep_mm_table->count; i++) { 1292 1308 if (i == 0 || dpm_table->dpm_levels 1293 - [dpm_table->count - 1].value != 1309 + [dpm_table->count - 1].value <= 1294 1310 dep_mm_table->entries[i].vclk) { 1295 1311 dpm_table->dpm_levels[dpm_table->count].value = 1296 1312 dep_mm_table->entries[i].vclk; ··· 1304 1320 dpm_table = &(data->dpm_table.dclk_table); 1305 1321 for (i = 0; i < dep_mm_table->count; i++) { 1306 1322 if (i == 0 || dpm_table->dpm_levels 1307 - [dpm_table->count - 1].value != 1323 + [dpm_table->count - 1].value <= 1308 1324 dep_mm_table->entries[i].dclk) { 1309 1325 dpm_table->dpm_levels[dpm_table->count].value = 1310 1326 dep_mm_table->entries[i].dclk; ··· 1416 1432 (struct phm_ppt_v2_information *)(hwmgr->pptable); 1417 1433 1418 1434 data->smc_state_table.pp_table.UlvOffsetVid = 1419 - (uint8_t)(table_info->us_ulv_voltage_offset * 1420 - VOLTAGE_VID_OFFSET_SCALE2 / 1421 - VOLTAGE_VID_OFFSET_SCALE1); 1435 + (uint8_t)table_info->us_ulv_voltage_offset; 1422 1436 1423 1437 data->smc_state_table.pp_table.UlvSmnclkDid = 1424 1438 (uint8_t)(table_info->us_ulv_smnclk_did); ··· 1535 1553 current_gfxclk_level->FbMult = 1536 1554 cpu_to_le32(dividers.ulPll_fb_mult); 1537 1555 /* Spread FB Multiplier bit: bit 0:8 int, bit 31:16 frac */ 1538 - current_gfxclk_level->SsOn = dividers.ucPll_ss_enable; 1556 + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, 1557 + PHM_PlatformCaps_EngineSpreadSpectrumSupport)) 1558 + current_gfxclk_level->SsOn = dividers.ucPll_ss_enable; 1559 + else 1560 + current_gfxclk_level->SsOn = 0; 1539 1561 current_gfxclk_level->SsFbMult = 1540 1562 cpu_to_le32(dividers.ulPll_ss_fbsmult); 1541 1563 current_gfxclk_level->SsSlewFrac = ··· 2030 2044 table_info->vdd_dep_on_sclk; 2031 2045 uint32_t i; 2032 2046 2033 - for (i = 0; dep_table->count; i++) { 2047 + for (i = 0; i < dep_table->count; i++) { 2034 2048 pp_table->CksEnable[i] = dep_table->entries[i].cks_enable; 2035 - pp_table->CksVidOffset[i] = convert_to_vid( 2036 - dep_table->entries[i].cks_voffset); 2049 + pp_table->CksVidOffset[i] = (uint8_t)(dep_table->entries[i].cks_voffset 2050 + * VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1); 2037 2051 } 2038 2052 2039 2053 return 0; ··· 2059 2073 result = pp_atomfwctrl_get_avfs_information(hwmgr, &avfs_params); 2060 2074 if (!result) { 2061 2075 pp_table->MinVoltageVid = (uint8_t) 2062 - convert_to_vid((uint16_t)(avfs_params.ulMaxVddc)); 2063 - pp_table->MaxVoltageVid = (uint8_t) 2064 2076 convert_to_vid((uint16_t)(avfs_params.ulMinVddc)); 2065 - pp_table->BtcGbVdroopTableCksOn.a0 = 2066 - cpu_to_le32(avfs_params.ulGbVdroopTableCksonA0); 2067 - pp_table->BtcGbVdroopTableCksOn.a1 = 2068 - cpu_to_le32(avfs_params.ulGbVdroopTableCksonA1); 2069 - pp_table->BtcGbVdroopTableCksOn.a2 = 2070 - cpu_to_le32(avfs_params.ulGbVdroopTableCksonA2); 2077 + pp_table->MaxVoltageVid = (uint8_t) 2078 + convert_to_vid((uint16_t)(avfs_params.ulMaxVddc)); 2079 + 2080 + pp_table->AConstant[0] = cpu_to_le32(avfs_params.ulMeanNsigmaAcontant0); 2081 + pp_table->AConstant[1] = cpu_to_le32(avfs_params.ulMeanNsigmaAcontant1); 2082 + pp_table->AConstant[2] = cpu_to_le32(avfs_params.ulMeanNsigmaAcontant2); 2083 + pp_table->DC_tol_sigma = cpu_to_le16(avfs_params.usMeanNsigmaDcTolSigma); 2084 + pp_table->Platform_mean = cpu_to_le16(avfs_params.usMeanNsigmaPlatformMean); 2085 + pp_table->Platform_sigma = cpu_to_le16(avfs_params.usMeanNsigmaDcTolSigma); 2086 + pp_table->PSM_Age_CompFactor = cpu_to_le16(avfs_params.usPsmAgeComfactor); 2071 2087 2072 2088 pp_table->BtcGbVdroopTableCksOff.a0 = 2073 2089 cpu_to_le32(avfs_params.ulGbVdroopTableCksoffA0); 2090 + pp_table->BtcGbVdroopTableCksOff.a0_shift = 20; 2074 2091 pp_table->BtcGbVdroopTableCksOff.a1 = 2075 2092 cpu_to_le32(avfs_params.ulGbVdroopTableCksoffA1); 2093 + pp_table->BtcGbVdroopTableCksOff.a1_shift = 20; 2076 2094 pp_table->BtcGbVdroopTableCksOff.a2 = 2077 2095 cpu_to_le32(avfs_params.ulGbVdroopTableCksoffA2); 2096 + pp_table->BtcGbVdroopTableCksOff.a2_shift = 20; 2097 + 2098 + pp_table->OverrideBtcGbCksOn = avfs_params.ucEnableGbVdroopTableCkson; 2099 + pp_table->BtcGbVdroopTableCksOn.a0 = 2100 + cpu_to_le32(avfs_params.ulGbVdroopTableCksonA0); 2101 + pp_table->BtcGbVdroopTableCksOn.a0_shift = 20; 2102 + pp_table->BtcGbVdroopTableCksOn.a1 = 2103 + cpu_to_le32(avfs_params.ulGbVdroopTableCksonA1); 2104 + pp_table->BtcGbVdroopTableCksOn.a1_shift = 20; 2105 + pp_table->BtcGbVdroopTableCksOn.a2 = 2106 + cpu_to_le32(avfs_params.ulGbVdroopTableCksonA2); 2107 + pp_table->BtcGbVdroopTableCksOn.a2_shift = 20; 2078 2108 2079 2109 pp_table->AvfsGbCksOn.m1 = 2080 2110 cpu_to_le32(avfs_params.ulGbFuseTableCksonM1); 2081 2111 pp_table->AvfsGbCksOn.m2 = 2082 - cpu_to_le16(avfs_params.usGbFuseTableCksonM2); 2112 + cpu_to_le16(avfs_params.ulGbFuseTableCksonM2); 2083 2113 pp_table->AvfsGbCksOn.b = 2084 2114 cpu_to_le32(avfs_params.ulGbFuseTableCksonB); 2085 2115 pp_table->AvfsGbCksOn.m1_shift = 24; 2086 2116 pp_table->AvfsGbCksOn.m2_shift = 12; 2117 + pp_table->AvfsGbCksOn.b_shift = 0; 2087 2118 2119 + pp_table->OverrideAvfsGbCksOn = 2120 + avfs_params.ucEnableGbFuseTableCkson; 2088 2121 pp_table->AvfsGbCksOff.m1 = 2089 2122 cpu_to_le32(avfs_params.ulGbFuseTableCksoffM1); 2090 2123 pp_table->AvfsGbCksOff.m2 = 2091 - cpu_to_le16(avfs_params.usGbFuseTableCksoffM2); 2124 + cpu_to_le16(avfs_params.ulGbFuseTableCksoffM2); 2092 2125 pp_table->AvfsGbCksOff.b = 2093 2126 cpu_to_le32(avfs_params.ulGbFuseTableCksoffB); 2094 2127 pp_table->AvfsGbCksOff.m1_shift = 24; 2095 2128 pp_table->AvfsGbCksOff.m2_shift = 12; 2129 + pp_table->AvfsGbCksOff.b_shift = 0; 2096 2130 2097 - pp_table->AConstant[0] = 2098 - cpu_to_le32(avfs_params.ulMeanNsigmaAcontant0); 2099 - pp_table->AConstant[1] = 2100 - cpu_to_le32(avfs_params.ulMeanNsigmaAcontant1); 2101 - pp_table->AConstant[2] = 2102 - cpu_to_le32(avfs_params.ulMeanNsigmaAcontant2); 2103 - pp_table->DC_tol_sigma = 2104 - cpu_to_le16(avfs_params.usMeanNsigmaDcTolSigma); 2105 - pp_table->Platform_mean = 2106 - cpu_to_le16(avfs_params.usMeanNsigmaPlatformMean); 2107 - pp_table->PSM_Age_CompFactor = 2108 - cpu_to_le16(avfs_params.usPsmAgeComfactor); 2109 - pp_table->Platform_sigma = 2110 - cpu_to_le16(avfs_params.usMeanNsigmaDcTolSigma); 2111 - 2112 - for (i = 0; i < dep_table->count; i++) 2113 - pp_table->StaticVoltageOffsetVid[i] = (uint8_t) 2114 - (dep_table->entries[i].sclk_offset * 2131 + for (i = 0; i < dep_table->count; i++) { 2132 + if (dep_table->entries[i].sclk_offset == 0) 2133 + pp_table->StaticVoltageOffsetVid[i] = 248; 2134 + else 2135 + pp_table->StaticVoltageOffsetVid[i] = 2136 + (uint8_t)(dep_table->entries[i].sclk_offset * 2115 2137 VOLTAGE_VID_OFFSET_SCALE2 / 2116 2138 VOLTAGE_VID_OFFSET_SCALE1); 2117 - 2118 - pp_table->OverrideBtcGbCksOn = 2119 - avfs_params.ucEnableGbVdroopTableCkson; 2120 - pp_table->OverrideAvfsGbCksOn = 2121 - avfs_params.ucEnableGbFuseTableCkson; 2139 + } 2122 2140 2123 2141 if ((PPREGKEY_VEGA10QUADRATICEQUATION_DFLT != 2124 2142 data->disp_clk_quad_eqn_a) && ··· 2131 2141 pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m1 = 2132 2142 (int32_t)data->disp_clk_quad_eqn_a; 2133 2143 pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m2 = 2134 - (int16_t)data->disp_clk_quad_eqn_b; 2144 + (int32_t)data->disp_clk_quad_eqn_b; 2135 2145 pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].b = 2136 2146 (int32_t)data->disp_clk_quad_eqn_c; 2137 2147 } else { 2138 2148 pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m1 = 2139 2149 (int32_t)avfs_params.ulDispclk2GfxclkM1; 2140 2150 pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m2 = 2141 - (int16_t)avfs_params.usDispclk2GfxclkM2; 2151 + (int32_t)avfs_params.ulDispclk2GfxclkM2; 2142 2152 pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].b = 2143 2153 (int32_t)avfs_params.ulDispclk2GfxclkB; 2144 2154 } 2145 2155 2146 2156 pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m1_shift = 24; 2147 2157 pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].m2_shift = 12; 2158 + pp_table->DisplayClock2Gfxclk[DSPCLK_DISPCLK].b_shift = 12; 2148 2159 2149 2160 if ((PPREGKEY_VEGA10QUADRATICEQUATION_DFLT != 2150 2161 data->dcef_clk_quad_eqn_a) && ··· 2154 2163 pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m1 = 2155 2164 (int32_t)data->dcef_clk_quad_eqn_a; 2156 2165 pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m2 = 2157 - (int16_t)data->dcef_clk_quad_eqn_b; 2166 + (int32_t)data->dcef_clk_quad_eqn_b; 2158 2167 pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].b = 2159 2168 (int32_t)data->dcef_clk_quad_eqn_c; 2160 2169 } else { 2161 2170 pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m1 = 2162 2171 (int32_t)avfs_params.ulDcefclk2GfxclkM1; 2163 2172 pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m2 = 2164 - (int16_t)avfs_params.usDcefclk2GfxclkM2; 2173 + (int32_t)avfs_params.ulDcefclk2GfxclkM2; 2165 2174 pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].b = 2166 2175 (int32_t)avfs_params.ulDcefclk2GfxclkB; 2167 2176 } 2168 2177 2169 2178 pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m1_shift = 24; 2170 2179 pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].m2_shift = 12; 2180 + pp_table->DisplayClock2Gfxclk[DSPCLK_DCEFCLK].b_shift = 12; 2171 2181 2172 2182 if ((PPREGKEY_VEGA10QUADRATICEQUATION_DFLT != 2173 2183 data->pixel_clk_quad_eqn_a) && ··· 2177 2185 pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m1 = 2178 2186 (int32_t)data->pixel_clk_quad_eqn_a; 2179 2187 pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m2 = 2180 - (int16_t)data->pixel_clk_quad_eqn_b; 2188 + (int32_t)data->pixel_clk_quad_eqn_b; 2181 2189 pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].b = 2182 2190 (int32_t)data->pixel_clk_quad_eqn_c; 2183 2191 } else { 2184 2192 pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m1 = 2185 2193 (int32_t)avfs_params.ulPixelclk2GfxclkM1; 2186 2194 pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m2 = 2187 - (int16_t)avfs_params.usPixelclk2GfxclkM2; 2195 + (int32_t)avfs_params.ulPixelclk2GfxclkM2; 2188 2196 pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].b = 2189 2197 (int32_t)avfs_params.ulPixelclk2GfxclkB; 2190 2198 } 2191 2199 2192 2200 pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m1_shift = 24; 2193 2201 pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].m2_shift = 12; 2194 - 2202 + pp_table->DisplayClock2Gfxclk[DSPCLK_PIXCLK].b_shift = 12; 2195 2203 if ((PPREGKEY_VEGA10QUADRATICEQUATION_DFLT != 2196 2204 data->phy_clk_quad_eqn_a) && 2197 2205 (PPREGKEY_VEGA10QUADRATICEQUATION_DFLT != ··· 2199 2207 pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m1 = 2200 2208 (int32_t)data->phy_clk_quad_eqn_a; 2201 2209 pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m2 = 2202 - (int16_t)data->phy_clk_quad_eqn_b; 2210 + (int32_t)data->phy_clk_quad_eqn_b; 2203 2211 pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].b = 2204 2212 (int32_t)data->phy_clk_quad_eqn_c; 2205 2213 } else { 2206 2214 pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m1 = 2207 2215 (int32_t)avfs_params.ulPhyclk2GfxclkM1; 2208 2216 pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m2 = 2209 - (int16_t)avfs_params.usPhyclk2GfxclkM2; 2217 + (int32_t)avfs_params.ulPhyclk2GfxclkM2; 2210 2218 pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].b = 2211 2219 (int32_t)avfs_params.ulPhyclk2GfxclkB; 2212 2220 } 2213 2221 2214 2222 pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m1_shift = 24; 2215 2223 pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m2_shift = 12; 2224 + pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].b_shift = 12; 2216 2225 } else { 2217 2226 data->smu_features[GNLD_AVFS].supported = false; 2218 2227 } ··· 2302 2309 (struct phm_ppt_v2_information *)(hwmgr->pptable); 2303 2310 PPTable_t *pp_table = &(data->smc_state_table.pp_table); 2304 2311 struct pp_atomfwctrl_voltage_table voltage_table; 2312 + struct pp_atomfwctrl_bios_boot_up_values boot_up_values; 2305 2313 2306 2314 result = vega10_setup_default_dpm_tables(hwmgr); 2307 2315 PP_ASSERT_WITH_CODE(!result, ··· 2325 2331 (uint8_t)(table_info->uc_vce_dpm_voltage_mode); 2326 2332 pp_table->Mp0DpmVoltageMode = 2327 2333 (uint8_t)(table_info->uc_mp0_dpm_voltage_mode); 2334 + 2328 2335 pp_table->DisplayDpmVoltageMode = 2329 2336 (uint8_t)(table_info->uc_dcef_dpm_voltage_mode); 2330 2337 ··· 2367 2372 "Failed to initialize UVD Level!", 2368 2373 return result); 2369 2374 2370 - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, 2371 - PHM_PlatformCaps_ClockStretcher)) { 2375 + if (data->registry_data.clock_stretcher_support) { 2372 2376 result = vega10_populate_clock_stretcher_table(hwmgr); 2373 2377 PP_ASSERT_WITH_CODE(!result, 2374 2378 "Failed to populate Clock Stretcher Table!", 2375 2379 return result); 2380 + } 2381 + 2382 + result = pp_atomfwctrl_get_vbios_bootup_values(hwmgr, &boot_up_values); 2383 + if (!result) { 2384 + data->vbios_boot_state.vddc = boot_up_values.usVddc; 2385 + data->vbios_boot_state.vddci = boot_up_values.usVddci; 2386 + data->vbios_boot_state.mvddc = boot_up_values.usMvddc; 2387 + data->vbios_boot_state.gfx_clock = boot_up_values.ulGfxClk; 2388 + data->vbios_boot_state.mem_clock = boot_up_values.ulUClk; 2389 + data->vbios_boot_state.soc_clock = boot_up_values.ulSocClk; 2390 + if (0 != boot_up_values.usVddc) { 2391 + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, 2392 + PPSMC_MSG_SetFloorSocVoltage, 2393 + (boot_up_values.usVddc * 4)); 2394 + data->vbios_boot_state.bsoc_vddc_lock = true; 2395 + } else { 2396 + data->vbios_boot_state.bsoc_vddc_lock = false; 2397 + } 2376 2398 } 2377 2399 2378 2400 result = vega10_populate_avfs_parameters(hwmgr); ··· 2416 2404 PP_ASSERT_WITH_CODE(!result, 2417 2405 "Failed to upload PPtable!", return result); 2418 2406 2419 - if (data->smu_features[GNLD_AVFS].supported) { 2420 - uint32_t features_enabled; 2421 - result = vega10_get_smc_features(hwmgr->smumgr, &features_enabled); 2422 - PP_ASSERT_WITH_CODE(!result, 2423 - "Failed to Retrieve Enabled Features!", 2424 - return result); 2425 - if (!(features_enabled & (1 << FEATURE_AVFS_BIT))) { 2426 - result = vega10_perform_btc(hwmgr->smumgr); 2427 - PP_ASSERT_WITH_CODE(!result, 2428 - "Failed to Perform BTC!", 2407 + result = vega10_avfs_enable(hwmgr, true); 2408 + PP_ASSERT_WITH_CODE(!result, "Attempt to enable AVFS feature Failed!", 2429 2409 return result); 2430 - result = vega10_avfs_enable(hwmgr, true); 2431 - PP_ASSERT_WITH_CODE(!result, 2432 - "Attempt to enable AVFS feature Failed!", 2433 - return result); 2434 - result = vega10_save_vft_table(hwmgr->smumgr, 2435 - (uint8_t *)&(data->smc_state_table.avfs_table)); 2436 - PP_ASSERT_WITH_CODE(!result, 2437 - "Attempt to save VFT table Failed!", 2438 - return result); 2439 - } else { 2440 - data->smu_features[GNLD_AVFS].enabled = true; 2441 - result = vega10_restore_vft_table(hwmgr->smumgr, 2442 - (uint8_t *)&(data->smc_state_table.avfs_table)); 2443 - PP_ASSERT_WITH_CODE(!result, 2444 - "Attempt to restore VFT table Failed!", 2445 - return result;); 2446 - } 2447 - } 2448 2410 2449 2411 return 0; 2450 2412 } ··· 2438 2452 "Enable THERMAL Feature Failed!", 2439 2453 return -1); 2440 2454 data->smu_features[GNLD_THERMAL].enabled = true; 2455 + } 2456 + 2457 + return 0; 2458 + } 2459 + 2460 + static int vega10_disable_thermal_protection(struct pp_hwmgr *hwmgr) 2461 + { 2462 + struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); 2463 + 2464 + if (data->smu_features[GNLD_THERMAL].supported) { 2465 + if (!data->smu_features[GNLD_THERMAL].enabled) 2466 + pr_info("THERMAL Feature Already disabled!"); 2467 + 2468 + PP_ASSERT_WITH_CODE( 2469 + !vega10_enable_smc_features(hwmgr->smumgr, 2470 + false, 2471 + data->smu_features[GNLD_THERMAL].smu_feature_bitmap), 2472 + "disable THERMAL Feature Failed!", 2473 + return -1); 2474 + data->smu_features[GNLD_THERMAL].enabled = false; 2441 2475 } 2442 2476 2443 2477 return 0; ··· 2541 2535 return 0; 2542 2536 } 2543 2537 2538 + static int vega10_stop_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap) 2539 + { 2540 + struct vega10_hwmgr *data = 2541 + (struct vega10_hwmgr *)(hwmgr->backend); 2542 + uint32_t i, feature_mask = 0; 2543 + 2544 + 2545 + if(data->smu_features[GNLD_LED_DISPLAY].supported == true){ 2546 + PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr, 2547 + true, data->smu_features[GNLD_LED_DISPLAY].smu_feature_bitmap), 2548 + "Attempt to Enable LED DPM feature Failed!", return -EINVAL); 2549 + data->smu_features[GNLD_LED_DISPLAY].enabled = true; 2550 + } 2551 + 2552 + for (i = 0; i < GNLD_DPM_MAX; i++) { 2553 + if (data->smu_features[i].smu_feature_bitmap & bitmap) { 2554 + if (data->smu_features[i].supported) { 2555 + if (data->smu_features[i].enabled) { 2556 + feature_mask |= data->smu_features[i]. 2557 + smu_feature_bitmap; 2558 + data->smu_features[i].enabled = false; 2559 + } 2560 + } 2561 + } 2562 + } 2563 + 2564 + vega10_enable_smc_features(hwmgr->smumgr, false, feature_mask); 2565 + 2566 + return 0; 2567 + } 2568 + 2544 2569 /** 2545 2570 * @brief Tell SMC to enabled the supported DPMs. 2546 2571 * ··· 2613 2576 data->smu_features[GNLD_LED_DISPLAY].enabled = true; 2614 2577 } 2615 2578 2579 + if (data->vbios_boot_state.bsoc_vddc_lock) { 2580 + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, 2581 + PPSMC_MSG_SetFloorSocVoltage, 0); 2582 + data->vbios_boot_state.bsoc_vddc_lock = false; 2583 + } 2584 + 2616 2585 if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, 2617 2586 PHM_PlatformCaps_Falcon_QuickTransition)) { 2618 2587 if (data->smu_features[GNLD_ACDC].supported) { ··· 2644 2601 PP_ASSERT_WITH_CODE(!tmp_result, 2645 2602 "Failed to configure telemetry!", 2646 2603 return tmp_result); 2647 - 2648 - vega10_set_tools_address(hwmgr->smumgr); 2649 2604 2650 2605 smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, 2651 2606 PPSMC_MSG_NumOfDisplays, 0); ··· 3921 3880 3922 3881 static int vega10_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode) 3923 3882 { 3924 - if (mode) { 3925 - /* stop auto-manage */ 3926 - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, 3927 - PHM_PlatformCaps_MicrocodeFanControl)) 3928 - vega10_fan_ctrl_stop_smc_fan_control(hwmgr); 3929 - vega10_fan_ctrl_set_static_mode(hwmgr, mode); 3930 - } else 3931 - /* restart auto-manage */ 3932 - vega10_fan_ctrl_reset_fan_speed_to_default(hwmgr); 3883 + int result = 0; 3933 3884 3934 - return 0; 3885 + switch (mode) { 3886 + case AMD_FAN_CTRL_NONE: 3887 + result = vega10_fan_ctrl_set_fan_speed_percent(hwmgr, 100); 3888 + break; 3889 + case AMD_FAN_CTRL_MANUAL: 3890 + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, 3891 + PHM_PlatformCaps_MicrocodeFanControl)) 3892 + result = vega10_fan_ctrl_stop_smc_fan_control(hwmgr); 3893 + break; 3894 + case AMD_FAN_CTRL_AUTO: 3895 + result = vega10_fan_ctrl_set_static_mode(hwmgr, mode); 3896 + if (!result) 3897 + result = vega10_fan_ctrl_start_smc_fan_control(hwmgr); 3898 + break; 3899 + default: 3900 + break; 3901 + } 3902 + return result; 3935 3903 } 3936 3904 3937 3905 static int vega10_get_fan_control_mode(struct pp_hwmgr *hwmgr) 3938 3906 { 3939 - uint32_t reg; 3907 + struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); 3940 3908 3941 - if (hwmgr->fan_ctrl_is_in_default_mode) { 3942 - return hwmgr->fan_ctrl_default_mode; 3943 - } else { 3944 - reg = soc15_get_register_offset(THM_HWID, 0, 3945 - mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2); 3946 - return (cgs_read_register(hwmgr->device, reg) & 3947 - CG_FDO_CTRL2__FDO_PWM_MODE_MASK) >> 3948 - CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT; 3949 - } 3909 + if (data->smu_features[GNLD_FAN_CONTROL].enabled == false) 3910 + return AMD_FAN_CTRL_MANUAL; 3911 + else 3912 + return AMD_FAN_CTRL_AUTO; 3950 3913 } 3951 3914 3952 3915 static int vega10_get_dal_power_level(struct pp_hwmgr *hwmgr, ··· 4193 4148 4194 4149 switch (type) { 4195 4150 case PP_SCLK: 4196 - if (data->registry_data.sclk_dpm_key_disabled) 4197 - break; 4198 - 4199 4151 for (i = 0; i < 32; i++) { 4200 4152 if (mask & (1 << i)) 4201 4153 break; 4202 4154 } 4155 + data->smc_state_table.gfx_boot_level = i; 4203 4156 4204 - PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc_with_parameter( 4205 - hwmgr->smumgr, 4206 - PPSMC_MSG_SetSoftMinGfxclkByIndex, 4207 - i), 4208 - "Failed to set soft min sclk index!", 4209 - return -1); 4157 + for (i = 31; i >= 0; i--) { 4158 + if (mask & (1 << i)) 4159 + break; 4160 + } 4161 + data->smc_state_table.gfx_max_level = i; 4162 + 4163 + PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr), 4164 + "Failed to upload boot level to lowest!", 4165 + return -EINVAL); 4166 + 4167 + PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr), 4168 + "Failed to upload dpm max level to highest!", 4169 + return -EINVAL); 4210 4170 break; 4211 4171 4212 4172 case PP_MCLK: 4213 - if (data->registry_data.mclk_dpm_key_disabled) 4214 - break; 4215 - 4216 4173 for (i = 0; i < 32; i++) { 4217 4174 if (mask & (1 << i)) 4218 4175 break; 4219 4176 } 4220 4177 4221 - PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc_with_parameter( 4222 - hwmgr->smumgr, 4223 - PPSMC_MSG_SetSoftMinUclkByIndex, 4224 - i), 4225 - "Failed to set soft min mclk index!", 4226 - return -1); 4178 + for (i = 0; i < 32; i++) { 4179 + if (mask & (1 << i)) 4180 + break; 4181 + } 4182 + data->smc_state_table.mem_boot_level = i; 4183 + 4184 + for (i = 31; i >= 0; i--) { 4185 + if (mask & (1 << i)) 4186 + break; 4187 + } 4188 + data->smc_state_table.mem_max_level = i; 4189 + 4190 + PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr), 4191 + "Failed to upload boot level to lowest!", 4192 + return -EINVAL); 4193 + 4194 + PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr), 4195 + "Failed to upload dpm max level to highest!", 4196 + return -EINVAL); 4197 + 4227 4198 break; 4228 4199 4229 4200 case PP_PCIE: 4230 - if (data->registry_data.pcie_dpm_key_disabled) 4231 - break; 4232 - 4233 - for (i = 0; i < 32; i++) { 4234 - if (mask & (1 << i)) 4235 - break; 4236 - } 4237 - 4238 - PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc_with_parameter( 4239 - hwmgr->smumgr, 4240 - PPSMC_MSG_SetMinLinkDpmByIndex, 4241 - i), 4242 - "Failed to set min pcie index!", 4243 - return -1); 4244 - break; 4245 4201 default: 4246 4202 break; 4247 4203 } ··· 4441 4395 return is_update_required; 4442 4396 } 4443 4397 4398 + static int vega10_disable_dpm_tasks(struct pp_hwmgr *hwmgr) 4399 + { 4400 + int tmp_result, result = 0; 4401 + 4402 + tmp_result = (vega10_is_dpm_running(hwmgr)) ? 0 : -1; 4403 + PP_ASSERT_WITH_CODE(tmp_result == 0, 4404 + "DPM is not running right now, no need to disable DPM!", 4405 + return 0); 4406 + 4407 + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, 4408 + PHM_PlatformCaps_ThermalController)) 4409 + vega10_disable_thermal_protection(hwmgr); 4410 + 4411 + tmp_result = vega10_disable_power_containment(hwmgr); 4412 + PP_ASSERT_WITH_CODE((tmp_result == 0), 4413 + "Failed to disable power containment!", result = tmp_result); 4414 + 4415 + tmp_result = vega10_avfs_enable(hwmgr, false); 4416 + PP_ASSERT_WITH_CODE((tmp_result == 0), 4417 + "Failed to disable AVFS!", result = tmp_result); 4418 + 4419 + tmp_result = vega10_stop_dpm(hwmgr, SMC_DPM_FEATURES); 4420 + PP_ASSERT_WITH_CODE((tmp_result == 0), 4421 + "Failed to stop DPM!", result = tmp_result); 4422 + 4423 + return result; 4424 + } 4425 + 4426 + static int vega10_power_off_asic(struct pp_hwmgr *hwmgr) 4427 + { 4428 + struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); 4429 + int result; 4430 + 4431 + result = vega10_disable_dpm_tasks(hwmgr); 4432 + PP_ASSERT_WITH_CODE((0 == result), 4433 + "[disable_dpm_tasks] Failed to disable DPM!", 4434 + ); 4435 + data->water_marks_bitmap &= ~(WaterMarksLoaded); 4436 + 4437 + return result; 4438 + } 4439 + 4440 + 4444 4441 static const struct pp_hwmgr_func vega10_hwmgr_funcs = { 4445 4442 .backend_init = vega10_hwmgr_backend_init, 4446 4443 .backend_fini = vega10_hwmgr_backend_fini, 4447 4444 .asic_setup = vega10_setup_asic_task, 4448 4445 .dynamic_state_management_enable = vega10_enable_dpm_tasks, 4446 + .dynamic_state_management_disable = vega10_disable_dpm_tasks, 4449 4447 .get_num_of_pp_table_entries = 4450 4448 vega10_get_number_of_powerplay_table_entries, 4451 4449 .get_power_state_size = vega10_get_power_state_size, ··· 4529 4439 .check_states_equal = vega10_check_states_equal, 4530 4440 .check_smc_update_required_for_display_configuration = 4531 4441 vega10_check_smc_update_required_for_display_configuration, 4442 + .power_off_asic = vega10_power_off_asic, 4443 + .disable_smc_firmware_ctf = vega10_thermal_disable_alert, 4532 4444 }; 4533 4445 4534 4446 int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)
+3
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
··· 177 177 }; 178 178 179 179 struct vega10_vbios_boot_state { 180 + bool bsoc_vddc_lock; 180 181 uint16_t vddc; 181 182 uint16_t vddci; 183 + uint16_t mvddc; 184 + uint16_t vdd_gfx; 182 185 uint32_t gfx_clock; 183 186 uint32_t mem_clock; 184 187 uint32_t soc_clock;
+25 -2
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
··· 48 48 table->Tliquid1Limit = cpu_to_le16(tdp_table->usTemperatureLimitLiquid1); 49 49 table->Tliquid2Limit = cpu_to_le16(tdp_table->usTemperatureLimitLiquid2); 50 50 table->TplxLimit = cpu_to_le16(tdp_table->usTemperatureLimitPlx); 51 - table->LoadLineResistance = cpu_to_le16( 52 - hwmgr->platform_descriptor.LoadLineSlope); 51 + table->LoadLineResistance = 52 + hwmgr->platform_descriptor.LoadLineSlope * 256; 53 53 table->FitLimit = 0; /* Not used for Vega10 */ 54 54 55 55 table->Liquid1_I2C_address = tdp_table->ucLiquid1_I2C_address; ··· 111 111 } 112 112 113 113 return result; 114 + } 115 + 116 + int vega10_disable_power_containment(struct pp_hwmgr *hwmgr) 117 + { 118 + struct vega10_hwmgr *data = 119 + (struct vega10_hwmgr *)(hwmgr->backend); 120 + 121 + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, 122 + PHM_PlatformCaps_PowerContainment)) { 123 + if (data->smu_features[GNLD_PPT].supported) 124 + PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr, 125 + false, data->smu_features[GNLD_PPT].smu_feature_bitmap), 126 + "Attempt to disable PPT feature Failed!", 127 + data->smu_features[GNLD_PPT].supported = false); 128 + 129 + if (data->smu_features[GNLD_TDC].supported) 130 + PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr, 131 + false, data->smu_features[GNLD_TDC].smu_feature_bitmap), 132 + "Attempt to disable PPT feature Failed!", 133 + data->smu_features[GNLD_TDC].supported = false); 134 + } 135 + 136 + return 0; 114 137 } 115 138 116 139 static int vega10_set_overdrive_target_percentage(struct pp_hwmgr *hwmgr,
+1
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h
··· 60 60 int vega10_enable_power_containment(struct pp_hwmgr *hwmgr); 61 61 int vega10_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n); 62 62 int vega10_power_control_set_level(struct pp_hwmgr *hwmgr); 63 + int vega10_disable_power_containment(struct pp_hwmgr *hwmgr); 63 64 64 65 #endif /* _VEGA10_POWERTUNE_H_ */ 65 66
+2 -2
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
··· 407 407 tdp_table->ucPlx_I2C_address = power_tune_table->ucPlx_I2C_address; 408 408 tdp_table->ucPlx_I2C_Line = power_tune_table->ucPlx_I2C_LineSCL; 409 409 tdp_table->ucPlx_I2C_LineSDA = power_tune_table->ucPlx_I2C_LineSDA; 410 - hwmgr->platform_descriptor.LoadLineSlope = power_tune_table->usLoadLineResistance; 410 + hwmgr->platform_descriptor.LoadLineSlope = le16_to_cpu(power_tune_table->usLoadLineResistance); 411 411 } else { 412 412 power_tune_table_v2 = (ATOM_Vega10_PowerTune_Table_V2 *)table; 413 413 tdp_table->usMaximumPowerDeliveryLimit = le16_to_cpu(power_tune_table_v2->usSocketPowerLimit); ··· 453 453 tdp_table->ucPlx_I2C_LineSDA = sda; 454 454 455 455 hwmgr->platform_descriptor.LoadLineSlope = 456 - power_tune_table_v2->usLoadLineResistance; 456 + le16_to_cpu(power_tune_table_v2->usLoadLineResistance); 457 457 } 458 458 459 459 *info_tdp_table = tdp_table;
+51 -29
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
··· 381 381 382 382 temp = cgs_read_register(hwmgr->device, reg); 383 383 384 - temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> 385 - CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; 384 + temp = (temp & CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP_MASK) >> 385 + CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP__SHIFT; 386 386 387 - /* Bit 9 means the reading is lower than the lowest usable value. */ 388 - if (temp & 0x200) 389 - temp = VEGA10_THERMAL_MAXIMUM_TEMP_READING; 390 - else 391 - temp = temp & 0x1ff; 387 + temp = temp & 0x1ff; 392 388 393 389 temp *= PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 394 390 ··· 420 424 mmTHM_THERMAL_INT_CTRL_BASE_IDX, mmTHM_THERMAL_INT_CTRL); 421 425 422 426 val = cgs_read_register(hwmgr->device, reg); 423 - val &= ~(THM_THERMAL_INT_CTRL__DIG_THERM_INTH_MASK); 424 - val |= (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES) << 425 - THM_THERMAL_INT_CTRL__DIG_THERM_INTH__SHIFT; 426 - val &= ~(THM_THERMAL_INT_CTRL__DIG_THERM_INTL_MASK); 427 - val |= (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES) << 428 - THM_THERMAL_INT_CTRL__DIG_THERM_INTL__SHIFT; 427 + 428 + val &= (~THM_THERMAL_INT_CTRL__MAX_IH_CREDIT_MASK); 429 + val |= (5 << THM_THERMAL_INT_CTRL__MAX_IH_CREDIT__SHIFT); 430 + 431 + val &= (~THM_THERMAL_INT_CTRL__THERM_IH_HW_ENA_MASK); 432 + val |= (1 << THM_THERMAL_INT_CTRL__THERM_IH_HW_ENA__SHIFT); 433 + 434 + val &= (~THM_THERMAL_INT_CTRL__DIG_THERM_INTH_MASK); 435 + val |= ((high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES) 436 + << THM_THERMAL_INT_CTRL__DIG_THERM_INTH__SHIFT); 437 + 438 + val &= (~THM_THERMAL_INT_CTRL__DIG_THERM_INTL_MASK); 439 + val |= ((low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES) 440 + << THM_THERMAL_INT_CTRL__DIG_THERM_INTL__SHIFT); 441 + 442 + val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); 443 + 429 444 cgs_write_register(hwmgr->device, reg, val); 430 445 431 446 reg = soc15_get_register_offset(THM_HWID, 0, 432 447 mmTHM_TCON_HTC_BASE_IDX, mmTHM_TCON_HTC); 433 - 434 - val = cgs_read_register(hwmgr->device, reg); 435 - val &= ~(THM_TCON_HTC__HTC_TMP_LMT_MASK); 436 - val |= (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES) << 437 - THM_TCON_HTC__HTC_TMP_LMT__SHIFT; 438 - cgs_write_register(hwmgr->device, reg, val); 439 448 440 449 return 0; 441 450 } ··· 483 482 static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr) 484 483 { 485 484 struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); 485 + uint32_t val = 0; 486 + uint32_t reg; 486 487 487 488 if (data->smu_features[GNLD_FW_CTF].supported) { 488 489 if (data->smu_features[GNLD_FW_CTF].enabled) 489 490 printk("[Thermal_EnableAlert] FW CTF Already Enabled!\n"); 491 + 492 + PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr, 493 + true, 494 + data->smu_features[GNLD_FW_CTF].smu_feature_bitmap), 495 + "Attempt to Enable FW CTF feature Failed!", 496 + return -1); 497 + data->smu_features[GNLD_FW_CTF].enabled = true; 490 498 } 491 499 492 - PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr, 493 - true, 494 - data->smu_features[GNLD_FW_CTF].smu_feature_bitmap), 495 - "Attempt to Enable FW CTF feature Failed!", 496 - return -1); 497 - data->smu_features[GNLD_FW_CTF].enabled = true; 500 + val |= (1 << THM_THERMAL_INT_ENA__THERM_INTH_CLR__SHIFT); 501 + val |= (1 << THM_THERMAL_INT_ENA__THERM_INTL_CLR__SHIFT); 502 + val |= (1 << THM_THERMAL_INT_ENA__THERM_TRIGGER_CLR__SHIFT); 503 + 504 + reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA); 505 + cgs_write_register(hwmgr->device, reg, val); 506 + 498 507 return 0; 499 508 } 500 509 ··· 512 501 * Disable thermal alerts on the RV770 thermal controller. 513 502 * @param hwmgr The address of the hardware manager. 514 503 */ 515 - static int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr) 504 + int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr) 516 505 { 517 506 struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); 507 + uint32_t reg; 518 508 519 509 if (data->smu_features[GNLD_FW_CTF].supported) { 520 510 if (!data->smu_features[GNLD_FW_CTF].enabled) 521 511 printk("[Thermal_EnableAlert] FW CTF Already disabled!\n"); 522 - } 523 512 524 - PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr, 513 + 514 + PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr->smumgr, 525 515 false, 526 516 data->smu_features[GNLD_FW_CTF].smu_feature_bitmap), 527 517 "Attempt to disable FW CTF feature Failed!", 528 518 return -1); 529 - data->smu_features[GNLD_FW_CTF].enabled = false; 519 + data->smu_features[GNLD_FW_CTF].enabled = false; 520 + } 521 + 522 + reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA); 523 + cgs_write_register(hwmgr->device, reg, 0); 524 + 530 525 return 0; 531 526 } 532 527 ··· 578 561 advanceFanControlParameters.ulMinFanSCLKAcousticLimit); 579 562 table->FanTargetTemperature = hwmgr->thermal_controller. 580 563 advanceFanControlParameters.usTMax; 564 + 565 + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, 566 + PPSMC_MSG_SetFanTemperatureTarget, 567 + (uint32_t)table->FanTargetTemperature); 568 + 581 569 table->FanPwmMin = hwmgr->thermal_controller. 582 570 advanceFanControlParameters.usPWMMin * 255 / 100; 583 571 table->FanTargetGfxclk = (uint16_t)(hwmgr->thermal_controller.
+2
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h
··· 78 78 uint32_t *speed); 79 79 extern int vega10_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr); 80 80 extern uint32_t smu7_get_xclk(struct pp_hwmgr *hwmgr); 81 + extern int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr); 82 + int vega10_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr); 81 83 82 84 #endif 83 85
+1 -1
drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
··· 431 431 struct pp_display_clock_request *clock); 432 432 433 433 extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks); 434 - 434 + extern int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr); 435 435 #endif /* _HARDWARE_MANAGER_H_ */ 436 436
+2 -2
drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
··· 368 368 int (*get_mclk_od)(struct pp_hwmgr *hwmgr); 369 369 int (*set_mclk_od)(struct pp_hwmgr *hwmgr, uint32_t value); 370 370 int (*read_sensor)(struct pp_hwmgr *hwmgr, int idx, void *value, int *size); 371 - int (*request_firmware)(struct pp_hwmgr *hwmgr); 372 - int (*release_firmware)(struct pp_hwmgr *hwmgr); 373 371 int (*set_power_profile_state)(struct pp_hwmgr *hwmgr, 374 372 struct amd_pp_profile *request); 375 373 int (*avfs_control)(struct pp_hwmgr *hwmgr, bool enable); 374 + int (*disable_smc_firmware_ctf)(struct pp_hwmgr *hwmgr); 376 375 }; 377 376 378 377 struct pp_table_func { ··· 764 765 struct pp_thermal_controller_info thermal_controller; 765 766 bool fan_ctrl_is_in_default_mode; 766 767 uint32_t fan_ctrl_default_mode; 768 + bool fan_ctrl_enabled; 767 769 uint32_t tmin; 768 770 struct phm_microcode_version_info microcode_version_info; 769 771 uint32_t ps_size;
+16 -2
drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h
··· 30 30 * SMU TEAM: Always increment the interface version if 31 31 * any structure is changed in this file 32 32 */ 33 - #define SMU9_DRIVER_IF_VERSION 0xB 33 + #define SMU9_DRIVER_IF_VERSION 0xD 34 34 35 35 #define PPTABLE_V10_SMU_VERSION 1 36 36 ··· 302 302 303 303 uint32_t DpmLevelPowerDelta; 304 304 305 - uint32_t Reserved[19]; 305 + uint8_t EnableBoostState; 306 + uint8_t AConstant_Shift; 307 + uint8_t DC_tol_sigma_Shift; 308 + uint8_t PSM_Age_CompFactor_Shift; 309 + 310 + uint16_t BoostStartTemperature; 311 + uint16_t BoostStopTemperature; 312 + 313 + PllSetting_t GfxBoostState; 314 + 315 + uint32_t Reserved[14]; 306 316 307 317 /* Padding - ignore */ 308 318 uint32_t MmHubPadding[7]; /* SMU internal use */ ··· 473 463 #define DB_IR_SHIFT 25 474 464 #define DB_PCC_SHIFT 26 475 465 #define DB_EDC_SHIFT 27 466 + 467 + #define REMOVE_FMAX_MARGIN_BIT 0x0 468 + #define REMOVE_DCTOL_MARGIN_BIT 0x1 469 + #define REMOVE_PLATFORM_MARGIN_BIT 0x2 476 470 477 471 #endif
+4 -1
drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
··· 122 122 #define PPSMC_MSG_SetFanMinPwm 0x52 123 123 #define PPSMC_MSG_ConfigureGfxDidt 0x55 124 124 #define PPSMC_MSG_NumOfDisplays 0x56 125 - #define PPSMC_Message_Count 0x57 125 + #define PPSMC_MSG_ReadSerialNumTop32 0x58 126 + #define PPSMC_MSG_ReadSerialNumBottom32 0x59 127 + #define PPSMC_Message_Count 0x5A 128 + 126 129 127 130 typedef int PPSMC_Msg; 128 131
+122 -102
drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
··· 74 74 return false; 75 75 } 76 76 77 - /** 78 - * Check if SMC has responded to previous message. 79 - * 80 - * @param smumgr the address of the powerplay hardware manager. 81 - * @return TRUE SMC has responded, FALSE otherwise. 82 - */ 77 + /* 78 + * Check if SMC has responded to previous message. 79 + * 80 + * @param smumgr the address of the powerplay hardware manager. 81 + * @return TRUE SMC has responded, FALSE otherwise. 82 + */ 83 83 static uint32_t vega10_wait_for_response(struct pp_smumgr *smumgr) 84 84 { 85 85 uint32_t reg; 86 86 87 87 if (!vega10_is_smc_ram_running(smumgr)) 88 - return -1; 88 + return -EINVAL; 89 89 90 90 reg = soc15_get_register_offset(MP1_HWID, 0, 91 91 mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); ··· 96 96 return cgs_read_register(smumgr->device, reg); 97 97 } 98 98 99 - /** 100 - * Send a message to the SMC, and do not wait for its response. 101 - * 102 - * @param smumgr the address of the powerplay hardware manager. 103 - * @param msg the message to send. 104 - * @return Always return 0. 105 - */ 99 + /* 100 + * Send a message to the SMC, and do not wait for its response. 101 + * @param smumgr the address of the powerplay hardware manager. 102 + * @param msg the message to send. 103 + * @return Always return 0. 104 + */ 106 105 int vega10_send_msg_to_smc_without_waiting(struct pp_smumgr *smumgr, 107 106 uint16_t msg) 108 107 { 109 108 uint32_t reg; 110 109 111 110 if (!vega10_is_smc_ram_running(smumgr)) 112 - return -1; 111 + return -EINVAL; 113 112 114 113 reg = soc15_get_register_offset(MP1_HWID, 0, 115 114 mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66); ··· 117 118 return 0; 118 119 } 119 120 120 - /** 121 - * Send a message to the SMC, and wait for its response. 122 - * 123 - * @param smumgr the address of the powerplay hardware manager. 124 - * @param msg the message to send. 125 - * @return The response that came from the SMC. 126 - */ 121 + /* 122 + * Send a message to the SMC, and wait for its response. 123 + * @param smumgr the address of the powerplay hardware manager. 124 + * @param msg the message to send. 125 + * @return Always return 0. 126 + */ 127 127 int vega10_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg) 128 128 { 129 129 uint32_t reg; 130 130 131 131 if (!vega10_is_smc_ram_running(smumgr)) 132 - return -1; 132 + return -EINVAL; 133 133 134 134 vega10_wait_for_response(smumgr); 135 135 ··· 138 140 139 141 vega10_send_msg_to_smc_without_waiting(smumgr, msg); 140 142 141 - PP_ASSERT_WITH_CODE(vega10_wait_for_response(smumgr) == 1, 142 - "Failed to send Message.", 143 - return -1); 143 + if (vega10_wait_for_response(smumgr) != 1) 144 + pr_err("Failed to send message: 0x%x\n", msg); 144 145 145 146 return 0; 146 147 } 147 148 148 - /** 149 + /* 149 150 * Send a message to the SMC with parameter 150 151 * @param smumgr: the address of the powerplay hardware manager. 151 152 * @param msg: the message to send. 152 153 * @param parameter: the parameter to send 153 - * @return The response that came from the SMC. 154 + * @return Always return 0. 154 155 */ 155 156 int vega10_send_msg_to_smc_with_parameter(struct pp_smumgr *smumgr, 156 157 uint16_t msg, uint32_t parameter) ··· 157 160 uint32_t reg; 158 161 159 162 if (!vega10_is_smc_ram_running(smumgr)) 160 - return -1; 163 + return -EINVAL; 161 164 162 165 vega10_wait_for_response(smumgr); 163 166 ··· 171 174 172 175 vega10_send_msg_to_smc_without_waiting(smumgr, msg); 173 176 174 - PP_ASSERT_WITH_CODE(vega10_wait_for_response(smumgr) == 1, 175 - "Failed to send Message.", 176 - return -1); 177 + if (vega10_wait_for_response(smumgr) != 1) 178 + pr_err("Failed to send message: 0x%x\n", msg); 177 179 178 180 return 0; 179 181 } 180 182 181 183 182 - /** 183 - * Send a message to the SMC with parameter, do not wait for response 184 - * 185 - * @param smumgr: the address of the powerplay hardware manager. 186 - * @param msg: the message to send. 187 - * @param parameter: the parameter to send 188 - * @return The response that came from the SMC. 189 - */ 184 + /* 185 + * Send a message to the SMC with parameter, do not wait for response 186 + * @param smumgr: the address of the powerplay hardware manager. 187 + * @param msg: the message to send. 188 + * @param parameter: the parameter to send 189 + * @return The response that came from the SMC. 190 + */ 190 191 int vega10_send_msg_to_smc_with_parameter_without_waiting( 191 192 struct pp_smumgr *smumgr, uint16_t msg, uint32_t parameter) 192 193 { ··· 197 202 return vega10_send_msg_to_smc_without_waiting(smumgr, msg); 198 203 } 199 204 200 - /** 201 - * Retrieve an argument from SMC. 202 - * 203 - * @param smumgr the address of the powerplay hardware manager. 204 - * @param arg pointer to store the argument from SMC. 205 - * @return Always return 0. 206 - */ 205 + /* 206 + * Retrieve an argument from SMC. 207 + * @param smumgr the address of the powerplay hardware manager. 208 + * @param arg pointer to store the argument from SMC. 209 + * @return Always return 0. 210 + */ 207 211 int vega10_read_arg_from_smc(struct pp_smumgr *smumgr, uint32_t *arg) 208 212 { 209 213 uint32_t reg; ··· 215 221 return 0; 216 222 } 217 223 218 - /** 219 - * Copy table from SMC into driver FB 220 - * @param smumgr the address of the SMC manager 221 - * @param table_id the driver's table ID to copy from 222 - */ 224 + /* 225 + * Copy table from SMC into driver FB 226 + * @param smumgr the address of the SMC manager 227 + * @param table_id the driver's table ID to copy from 228 + */ 223 229 int vega10_copy_table_from_smc(struct pp_smumgr *smumgr, 224 230 uint8_t *table, int16_t table_id) 225 231 { ··· 227 233 (struct vega10_smumgr *)(smumgr->backend); 228 234 229 235 PP_ASSERT_WITH_CODE(table_id < MAX_SMU_TABLE, 230 - "Invalid SMU Table ID!", return -1;); 236 + "Invalid SMU Table ID!", return -EINVAL); 231 237 PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].version != 0, 232 - "Invalid SMU Table version!", return -1;); 238 + "Invalid SMU Table version!", return -EINVAL); 233 239 PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].size != 0, 234 - "Invalid SMU Table Length!", return -1;); 240 + "Invalid SMU Table Length!", return -EINVAL); 235 241 PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr, 236 242 PPSMC_MSG_SetDriverDramAddrHigh, 237 243 priv->smu_tables.entry[table_id].table_addr_high) == 0, 238 - "[CopyTableFromSMC] Attempt to Set Dram Addr High Failed!", return -1;); 244 + "[CopyTableFromSMC] Attempt to Set Dram Addr High Failed!", return -EINVAL); 239 245 PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr, 240 246 PPSMC_MSG_SetDriverDramAddrLow, 241 247 priv->smu_tables.entry[table_id].table_addr_low) == 0, 242 248 "[CopyTableFromSMC] Attempt to Set Dram Addr Low Failed!", 243 - return -1;); 249 + return -EINVAL); 244 250 PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr, 245 251 PPSMC_MSG_TransferTableSmu2Dram, 246 252 priv->smu_tables.entry[table_id].table_id) == 0, 247 253 "[CopyTableFromSMC] Attempt to Transfer Table From SMU Failed!", 248 - return -1;); 254 + return -EINVAL); 249 255 250 256 memcpy(table, priv->smu_tables.entry[table_id].table, 251 257 priv->smu_tables.entry[table_id].size); ··· 253 259 return 0; 254 260 } 255 261 256 - /** 257 - * Copy table from Driver FB into SMC 258 - * @param smumgr the address of the SMC manager 259 - * @param table_id the table to copy from 260 - */ 262 + /* 263 + * Copy table from Driver FB into SMC 264 + * @param smumgr the address of the SMC manager 265 + * @param table_id the table to copy from 266 + */ 261 267 int vega10_copy_table_to_smc(struct pp_smumgr *smumgr, 262 268 uint8_t *table, int16_t table_id) 263 269 { ··· 265 271 (struct vega10_smumgr *)(smumgr->backend); 266 272 267 273 PP_ASSERT_WITH_CODE(table_id < MAX_SMU_TABLE, 268 - "Invalid SMU Table ID!", return -1;); 274 + "Invalid SMU Table ID!", return -EINVAL); 269 275 PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].version != 0, 270 - "Invalid SMU Table version!", return -1;); 276 + "Invalid SMU Table version!", return -EINVAL); 271 277 PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].size != 0, 272 - "Invalid SMU Table Length!", return -1;); 278 + "Invalid SMU Table Length!", return -EINVAL); 273 279 274 280 memcpy(priv->smu_tables.entry[table_id].table, table, 275 281 priv->smu_tables.entry[table_id].size); ··· 278 284 PPSMC_MSG_SetDriverDramAddrHigh, 279 285 priv->smu_tables.entry[table_id].table_addr_high) == 0, 280 286 "[CopyTableToSMC] Attempt to Set Dram Addr High Failed!", 281 - return -1;); 287 + return -EINVAL;); 282 288 PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr, 283 289 PPSMC_MSG_SetDriverDramAddrLow, 284 290 priv->smu_tables.entry[table_id].table_addr_low) == 0, 285 291 "[CopyTableToSMC] Attempt to Set Dram Addr Low Failed!", 286 - return -1;); 292 + return -EINVAL); 287 293 PP_ASSERT_WITH_CODE(vega10_send_msg_to_smc_with_parameter(smumgr, 288 294 PPSMC_MSG_TransferTableDram2Smu, 289 295 priv->smu_tables.entry[table_id].table_id) == 0, 290 296 "[CopyTableToSMC] Attempt to Transfer Table To SMU Failed!", 291 - return -1;); 297 + return -EINVAL); 292 298 293 - return 0; 294 - } 295 - 296 - int vega10_perform_btc(struct pp_smumgr *smumgr) 297 - { 298 - PP_ASSERT_WITH_CODE(!vega10_send_msg_to_smc_with_parameter( 299 - smumgr, PPSMC_MSG_RunBtc, 0), 300 - "Attempt to run DC BTC Failed!", 301 - return -1); 302 299 return 0; 303 300 } 304 301 ··· 297 312 { 298 313 PP_ASSERT_WITH_CODE(avfs_table, 299 314 "No access to SMC AVFS Table", 300 - return -1); 315 + return -EINVAL); 301 316 302 317 return vega10_copy_table_from_smc(smumgr, avfs_table, AVFSTABLE); 303 318 } ··· 306 321 { 307 322 PP_ASSERT_WITH_CODE(avfs_table, 308 323 "No access to SMC AVFS Table", 309 - return -1); 324 + return -EINVAL); 310 325 311 326 return vega10_copy_table_to_smc(smumgr, avfs_table, AVFSTABLE); 312 327 } ··· 324 339 int vega10_get_smc_features(struct pp_smumgr *smumgr, 325 340 uint32_t *features_enabled) 326 341 { 342 + if (features_enabled == NULL) 343 + return -EINVAL; 344 + 327 345 if (!vega10_send_msg_to_smc(smumgr, 328 346 PPSMC_MSG_GetEnabledSmuFeatures)) { 329 - if (!vega10_read_arg_from_smc(smumgr, features_enabled)) 330 - return 0; 347 + vega10_read_arg_from_smc(smumgr, features_enabled); 348 + return 0; 331 349 } 332 350 333 - return -1; 351 + return -EINVAL; 334 352 } 335 353 336 354 int vega10_set_tools_address(struct pp_smumgr *smumgr) ··· 360 372 PP_ASSERT_WITH_CODE(!vega10_send_msg_to_smc(smumgr, 361 373 PPSMC_MSG_GetDriverIfVersion), 362 374 "Attempt to get SMC IF Version Number Failed!", 363 - return -1); 364 - PP_ASSERT_WITH_CODE(!vega10_read_arg_from_smc(smumgr, 365 - &smc_driver_if_version), 366 - "Attempt to read SMC IF Version Number Failed!", 367 - return -1); 375 + return -EINVAL); 376 + vega10_read_arg_from_smc(smumgr, &smc_driver_if_version); 368 377 369 - if (smc_driver_if_version != SMU9_DRIVER_IF_VERSION) 370 - return -1; 378 + if (smc_driver_if_version != SMU9_DRIVER_IF_VERSION) { 379 + pr_err("Your firmware(0x%x) doesn't match \ 380 + SMU9_DRIVER_IF_VERSION(0x%x). \ 381 + Please update your firmware!\n", 382 + smc_driver_if_version, SMU9_DRIVER_IF_VERSION); 383 + return -EINVAL; 384 + } 371 385 372 386 return 0; 373 387 } 374 388 375 - /** 376 - * Write a 32bit value to the SMC SRAM space. 377 - * ALL PARAMETERS ARE IN HOST BYTE ORDER. 378 - * @param smumgr the address of the powerplay hardware manager. 379 - * @param smc_addr the address in the SMC RAM to access. 380 - * @param value to write to the SMC SRAM. 381 - */ 382 389 static int vega10_smu_init(struct pp_smumgr *smumgr) 383 390 { 384 391 struct vega10_smumgr *priv; ··· 410 427 kfree(smumgr->backend); 411 428 cgs_free_gpu_mem(smumgr->device, 412 429 (cgs_handle_t)handle); 413 - return -1); 430 + return -EINVAL); 414 431 415 432 priv->smu_tables.entry[PPTABLE].version = 0x01; 416 433 priv->smu_tables.entry[PPTABLE].size = sizeof(PPTable_t); ··· 438 455 (cgs_handle_t)priv->smu_tables.entry[PPTABLE].handle); 439 456 cgs_free_gpu_mem(smumgr->device, 440 457 (cgs_handle_t)handle); 441 - return -1); 458 + return -EINVAL); 442 459 443 460 priv->smu_tables.entry[WMTABLE].version = 0x01; 444 461 priv->smu_tables.entry[WMTABLE].size = sizeof(Watermarks_t); ··· 468 485 (cgs_handle_t)priv->smu_tables.entry[WMTABLE].handle); 469 486 cgs_free_gpu_mem(smumgr->device, 470 487 (cgs_handle_t)handle); 471 - return -1); 488 + return -EINVAL); 472 489 473 490 priv->smu_tables.entry[AVFSTABLE].version = 0x01; 474 491 priv->smu_tables.entry[AVFSTABLE].size = sizeof(AvfsTable_t); ··· 480 497 priv->smu_tables.entry[AVFSTABLE].table = kaddr; 481 498 priv->smu_tables.entry[AVFSTABLE].handle = handle; 482 499 483 - tools_size = 0; 500 + tools_size = 0x19000; 484 501 if (tools_size) { 485 502 smu_allocate_memory(smumgr->device, 486 503 tools_size, ··· 500 517 smu_lower_32_bits(mc_addr); 501 518 priv->smu_tables.entry[TOOLSTABLE].table = kaddr; 502 519 priv->smu_tables.entry[TOOLSTABLE].handle = handle; 520 + vega10_set_tools_address(smumgr); 503 521 } 504 522 } 523 + 524 + /* allocate space for AVFS Fuse table */ 525 + smu_allocate_memory(smumgr->device, 526 + sizeof(AvfsFuseOverride_t), 527 + CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB, 528 + PAGE_SIZE, 529 + &mc_addr, 530 + &kaddr, 531 + &handle); 532 + 533 + PP_ASSERT_WITH_CODE(kaddr, 534 + "[vega10_smu_init] Out of memory for avfs fuse table.", 535 + kfree(smumgr->backend); 536 + cgs_free_gpu_mem(smumgr->device, 537 + (cgs_handle_t)priv->smu_tables.entry[PPTABLE].handle); 538 + cgs_free_gpu_mem(smumgr->device, 539 + (cgs_handle_t)priv->smu_tables.entry[WMTABLE].handle); 540 + cgs_free_gpu_mem(smumgr->device, 541 + (cgs_handle_t)priv->smu_tables.entry[AVFSTABLE].handle); 542 + cgs_free_gpu_mem(smumgr->device, 543 + (cgs_handle_t)priv->smu_tables.entry[TOOLSTABLE].handle); 544 + cgs_free_gpu_mem(smumgr->device, 545 + (cgs_handle_t)handle); 546 + return -EINVAL); 547 + 548 + priv->smu_tables.entry[AVFSFUSETABLE].version = 0x01; 549 + priv->smu_tables.entry[AVFSFUSETABLE].size = sizeof(AvfsFuseOverride_t); 550 + priv->smu_tables.entry[AVFSFUSETABLE].table_id = TABLE_AVFS_FUSE_OVERRIDE; 551 + priv->smu_tables.entry[AVFSFUSETABLE].table_addr_high = 552 + smu_upper_32_bits(mc_addr); 553 + priv->smu_tables.entry[AVFSFUSETABLE].table_addr_low = 554 + smu_lower_32_bits(mc_addr); 555 + priv->smu_tables.entry[AVFSFUSETABLE].table = kaddr; 556 + priv->smu_tables.entry[AVFSFUSETABLE].handle = handle; 505 557 506 558 return 0; 507 559 } ··· 556 538 if (priv->smu_tables.entry[TOOLSTABLE].table) 557 539 cgs_free_gpu_mem(smumgr->device, 558 540 (cgs_handle_t)priv->smu_tables.entry[TOOLSTABLE].handle); 541 + cgs_free_gpu_mem(smumgr->device, 542 + (cgs_handle_t)priv->smu_tables.entry[AVFSFUSETABLE].handle); 559 543 kfree(smumgr->backend); 560 544 smumgr->backend = NULL; 561 545 } ··· 568 548 { 569 549 PP_ASSERT_WITH_CODE(!vega10_verify_smc_interface(smumgr), 570 550 "Failed to verify SMC interface!", 571 - return -1); 551 + return -EINVAL); 572 552 return 0; 573 553 } 574 554
+1 -1
drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h
··· 30 30 WMTABLE, 31 31 AVFSTABLE, 32 32 TOOLSTABLE, 33 + AVFSFUSETABLE, 33 34 MAX_SMU_TABLE, 34 35 }; 35 36 ··· 63 62 uint32_t *features_enabled); 64 63 int vega10_save_vft_table(struct pp_smumgr *smumgr, uint8_t *avfs_table); 65 64 int vega10_restore_vft_table(struct pp_smumgr *smumgr, uint8_t *avfs_table); 66 - int vega10_perform_btc(struct pp_smumgr *smumgr); 67 65 68 66 int vega10_set_tools_address(struct pp_smumgr *smumgr); 69 67
+21 -2
drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
··· 236 236 dma_fence_put(f); 237 237 } 238 238 239 + bool amd_sched_dependency_optimized(struct dma_fence* fence, 240 + struct amd_sched_entity *entity) 241 + { 242 + struct amd_gpu_scheduler *sched = entity->sched; 243 + struct amd_sched_fence *s_fence; 244 + 245 + if (!fence || dma_fence_is_signaled(fence)) 246 + return false; 247 + if (fence->context == entity->fence_context) 248 + return true; 249 + s_fence = to_amd_sched_fence(fence); 250 + if (s_fence && s_fence->sched == sched) 251 + return true; 252 + 253 + return false; 254 + } 255 + 239 256 static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) 240 257 { 241 258 struct amd_gpu_scheduler *sched = entity->sched; ··· 404 387 405 388 spin_lock(&sched->job_list_lock); 406 389 list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { 407 - if (dma_fence_remove_callback(s_job->s_fence->parent, &s_job->s_fence->cb)) { 390 + if (s_job->s_fence->parent && 391 + dma_fence_remove_callback(s_job->s_fence->parent, 392 + &s_job->s_fence->cb)) { 408 393 dma_fence_put(s_job->s_fence->parent); 409 394 s_job->s_fence->parent = NULL; 410 395 } ··· 479 460 job->sched = sched; 480 461 job->s_entity = entity; 481 462 job->s_fence = amd_sched_fence_create(entity, owner); 482 - job->id = atomic64_inc_return(&sched->job_id_count); 483 463 if (!job->s_fence) 484 464 return -ENOMEM; 465 + job->id = atomic64_inc_return(&sched->job_id_count); 485 466 486 467 INIT_WORK(&job->finish_work, amd_sched_job_finish); 487 468 INIT_LIST_HEAD(&job->node);
+2
drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
··· 158 158 void *owner); 159 159 void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched); 160 160 void amd_sched_job_recovery(struct amd_gpu_scheduler *sched); 161 + bool amd_sched_dependency_optimized(struct dma_fence* fence, 162 + struct amd_sched_entity *entity); 161 163 #endif
+8
drivers/gpu/drm/drm_edid.c
··· 80 80 #define EDID_QUIRK_FORCE_12BPC (1 << 9) 81 81 /* Force 6bpc */ 82 82 #define EDID_QUIRK_FORCE_6BPC (1 << 10) 83 + /* Force 10bpc */ 84 + #define EDID_QUIRK_FORCE_10BPC (1 << 11) 83 85 84 86 struct detailed_mode_closure { 85 87 struct drm_connector *connector; ··· 123 121 /* Funai Electronics PM36B */ 124 122 { "FCM", 13600, EDID_QUIRK_PREFER_LARGE_75 | 125 123 EDID_QUIRK_DETAILED_IN_CM }, 124 + 125 + /* LGD panel of HP zBook 17 G2, eDP 10 bpc, but reports unknown bpc */ 126 + { "LGD", 764, EDID_QUIRK_FORCE_10BPC }, 126 127 127 128 /* LG Philips LCD LP154W01-A5 */ 128 129 { "LPL", 0, EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE }, ··· 4248 4243 4249 4244 if (quirks & EDID_QUIRK_FORCE_8BPC) 4250 4245 connector->display_info.bpc = 8; 4246 + 4247 + if (quirks & EDID_QUIRK_FORCE_10BPC) 4248 + connector->display_info.bpc = 10; 4251 4249 4252 4250 if (quirks & EDID_QUIRK_FORCE_12BPC) 4253 4251 connector->display_info.bpc = 12;
+13
drivers/gpu/drm/i915/Kconfig.debug
··· 87 87 and also analyze the request dependency resolving timeline. 88 88 89 89 If in doubt, say "N". 90 + 91 + config DRM_I915_DEBUG_VBLANK_EVADE 92 + bool "Enable extra debug warnings for vblank evasion" 93 + depends on DRM_I915 94 + default n 95 + help 96 + Choose this option to turn on extra debug warnings for the 97 + vblank evade mechanism. This gives a warning every time the 98 + the deadline allotted for the vblank evade critical section 99 + is exceeded, even if there isn't an actual risk of missing 100 + the vblank. 101 + 102 + If in doubt, say "N".
+5 -2
drivers/gpu/drm/i915/intel_sprite.c
··· 198 198 ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time), 199 199 crtc->debug.min_vbl, crtc->debug.max_vbl, 200 200 crtc->debug.scanline_start, scanline_end); 201 - } else if (ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time) > 202 - VBLANK_EVASION_TIME_US) 201 + } 202 + #ifdef CONFIG_DRM_I915_DEBUG_VBLANK_EVADE 203 + else if (ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time) > 204 + VBLANK_EVASION_TIME_US) 203 205 DRM_WARN("Atomic update on pipe (%c) took %lld us, max time under evasion is %u us\n", 204 206 pipe_name(pipe), 205 207 ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time), 206 208 VBLANK_EVASION_TIME_US); 209 + #endif 207 210 } 208 211 209 212 static void
+15 -14
drivers/gpu/drm/nouveau/nv50_display.c
··· 831 831 static int 832 832 nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, 833 833 struct nv50_wndw_atom *asyw, 834 - struct nv50_head_atom *asyh, 835 - u32 pflip_flags) 834 + struct nv50_head_atom *asyh) 836 835 { 837 836 struct nouveau_framebuffer *fb = nouveau_framebuffer(asyw->state.fb); 838 837 struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev); ··· 847 848 asyw->image.h = fb->base.height; 848 849 asyw->image.kind = (fb->nvbo->tile_flags & 0x0000ff00) >> 8; 849 850 850 - asyw->interval = pflip_flags & DRM_MODE_PAGE_FLIP_ASYNC ? 0 : 1; 851 + if (asyh->state.pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) 852 + asyw->interval = 0; 853 + else 854 + asyw->interval = 1; 851 855 852 856 if (asyw->image.kind) { 853 857 asyw->image.layout = 0; ··· 889 887 struct nv50_head_atom *harm = NULL, *asyh = NULL; 890 888 bool varm = false, asyv = false, asym = false; 891 889 int ret; 892 - u32 pflip_flags = 0; 893 890 894 891 NV_ATOMIC(drm, "%s atomic_check\n", plane->name); 895 892 if (asyw->state.crtc) { ··· 897 896 return PTR_ERR(asyh); 898 897 asym = drm_atomic_crtc_needs_modeset(&asyh->state); 899 898 asyv = asyh->state.active; 900 - pflip_flags = asyh->state.pageflip_flags; 901 899 } 902 900 903 901 if (armw->state.crtc) { ··· 912 912 if (memcmp(&armw->point, &asyw->point, sizeof(asyw->point))) 913 913 asyw->set.point = true; 914 914 915 - if (!varm || asym || armw->state.fb != asyw->state.fb) { 916 - ret = nv50_wndw_atomic_check_acquire( 917 - wndw, asyw, asyh, pflip_flags); 918 - if (ret) 919 - return ret; 920 - } 915 + ret = nv50_wndw_atomic_check_acquire(wndw, asyw, asyh); 916 + if (ret) 917 + return ret; 921 918 } else 922 919 if (varm) { 923 920 nv50_wndw_atomic_check_release(wndw, asyw, harm); ··· 1119 1122 nv50_curs_prepare(struct nv50_wndw *wndw, struct nv50_head_atom *asyh, 1120 1123 struct nv50_wndw_atom *asyw) 1121 1124 { 1122 - asyh->curs.handle = nv50_disp(wndw->plane.dev)->mast.base.vram.handle; 1123 - asyh->curs.offset = asyw->image.offset; 1124 - asyh->set.curs = asyh->curs.visible; 1125 + u32 handle = nv50_disp(wndw->plane.dev)->mast.base.vram.handle; 1126 + u32 offset = asyw->image.offset; 1127 + if (asyh->curs.handle != handle || asyh->curs.offset != offset) { 1128 + asyh->curs.handle = handle; 1129 + asyh->curs.offset = offset; 1130 + asyh->set.curs = asyh->curs.visible; 1131 + } 1125 1132 } 1126 1133 1127 1134 static void
+1 -1
drivers/gpu/drm/nouveau/nvkm/core/object.c
··· 295 295 INIT_LIST_HEAD(&object->head); 296 296 INIT_LIST_HEAD(&object->tree); 297 297 RB_CLEAR_NODE(&object->node); 298 - WARN_ON(oclass->engine && !object->engine); 298 + WARN_ON(IS_ERR(object->engine)); 299 299 } 300 300 301 301 int
-1
drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
··· 638 638 return ret; 639 639 } 640 640 641 - ram->ranks = (nvkm_rd32(device, 0x10f200) & 0x00000004) ? 2 : 1; 642 641 return 0; 643 642 } 644 643
+1 -1
drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
··· 146 146 poll = false; 147 147 } 148 148 149 - if (list_empty(&therm->alarm.head) && poll) 149 + if (poll) 150 150 nvkm_timer_alarm(tmr, 1000000000ULL, &therm->alarm); 151 151 spin_unlock_irqrestore(&therm->lock, flags); 152 152
+1 -1
drivers/gpu/drm/nouveau/nvkm/subdev/therm/fan.c
··· 83 83 spin_unlock_irqrestore(&fan->lock, flags); 84 84 85 85 /* schedule next fan update, if not at target speed already */ 86 - if (list_empty(&fan->alarm.head) && target != duty) { 86 + if (target != duty) { 87 87 u16 bump_period = fan->bios.bump_period; 88 88 u16 slow_down_period = fan->bios.slow_down_period; 89 89 u64 delay;
+1 -1
drivers/gpu/drm/nouveau/nvkm/subdev/therm/fantog.c
··· 53 53 duty = !nvkm_gpio_get(gpio, 0, DCB_GPIO_FAN, 0xff); 54 54 nvkm_gpio_set(gpio, 0, DCB_GPIO_FAN, 0xff, duty); 55 55 56 - if (list_empty(&fan->alarm.head) && percent != (duty * 100)) { 56 + if (percent != (duty * 100)) { 57 57 u64 next_change = (percent * fan->period_us) / 100; 58 58 if (!duty) 59 59 next_change = fan->period_us - next_change;
+1 -1
drivers/gpu/drm/nouveau/nvkm/subdev/therm/temp.c
··· 185 185 spin_unlock_irqrestore(&therm->sensor.alarm_program_lock, flags); 186 186 187 187 /* schedule the next poll in one second */ 188 - if (therm->func->temp_get(therm) >= 0 && list_empty(&alarm->head)) 188 + if (therm->func->temp_get(therm) >= 0) 189 189 nvkm_timer_alarm(tmr, 1000000000ULL, alarm); 190 190 } 191 191
+39 -20
drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
··· 36 36 unsigned long flags; 37 37 LIST_HEAD(exec); 38 38 39 - /* move any due alarms off the pending list */ 39 + /* Process pending alarms. */ 40 40 spin_lock_irqsave(&tmr->lock, flags); 41 41 list_for_each_entry_safe(alarm, atemp, &tmr->alarms, head) { 42 - if (alarm->timestamp <= nvkm_timer_read(tmr)) 43 - list_move_tail(&alarm->head, &exec); 42 + /* Have we hit the earliest alarm that hasn't gone off? */ 43 + if (alarm->timestamp > nvkm_timer_read(tmr)) { 44 + /* Schedule it. If we didn't race, we're done. */ 45 + tmr->func->alarm_init(tmr, alarm->timestamp); 46 + if (alarm->timestamp > nvkm_timer_read(tmr)) 47 + break; 48 + } 49 + 50 + /* Move to completed list. We'll drop the lock before 51 + * executing the callback so it can reschedule itself. 52 + */ 53 + list_move_tail(&alarm->head, &exec); 44 54 } 45 55 46 - /* reschedule interrupt for next alarm time */ 47 - if (!list_empty(&tmr->alarms)) { 48 - alarm = list_first_entry(&tmr->alarms, typeof(*alarm), head); 49 - tmr->func->alarm_init(tmr, alarm->timestamp); 50 - } else { 56 + /* Shut down interrupt if no more pending alarms. */ 57 + if (list_empty(&tmr->alarms)) 51 58 tmr->func->alarm_fini(tmr); 52 - } 53 59 spin_unlock_irqrestore(&tmr->lock, flags); 54 60 55 - /* execute any pending alarm handlers */ 61 + /* Execute completed callbacks. */ 56 62 list_for_each_entry_safe(alarm, atemp, &exec, head) { 57 63 list_del_init(&alarm->head); 58 64 alarm->func(alarm); ··· 71 65 struct nvkm_alarm *list; 72 66 unsigned long flags; 73 67 74 - alarm->timestamp = nvkm_timer_read(tmr) + nsec; 75 - 76 - /* append new alarm to list, in soonest-alarm-first order */ 68 + /* Remove alarm from pending list. 69 + * 70 + * This both protects against the corruption of the list, 71 + * and implements alarm rescheduling/cancellation. 72 + */ 77 73 spin_lock_irqsave(&tmr->lock, flags); 78 - if (!nsec) { 79 - if (!list_empty(&alarm->head)) 80 - list_del(&alarm->head); 81 - } else { 74 + list_del_init(&alarm->head); 75 + 76 + if (nsec) { 77 + /* Insert into pending list, ordered earliest to latest. */ 78 + alarm->timestamp = nvkm_timer_read(tmr) + nsec; 82 79 list_for_each_entry(list, &tmr->alarms, head) { 83 80 if (list->timestamp > alarm->timestamp) 84 81 break; 85 82 } 83 + 86 84 list_add_tail(&alarm->head, &list->head); 85 + 86 + /* Update HW if this is now the earliest alarm. */ 87 + list = list_first_entry(&tmr->alarms, typeof(*list), head); 88 + if (list == alarm) { 89 + tmr->func->alarm_init(tmr, alarm->timestamp); 90 + /* This shouldn't happen if callers aren't stupid. 91 + * 92 + * Worst case scenario is that it'll take roughly 93 + * 4 seconds for the next alarm to trigger. 94 + */ 95 + WARN_ON(alarm->timestamp <= nvkm_timer_read(tmr)); 96 + } 87 97 } 88 98 spin_unlock_irqrestore(&tmr->lock, flags); 89 - 90 - /* process pending alarms */ 91 - nvkm_timer_alarm_trigger(tmr); 92 99 } 93 100 94 101 void
+1 -1
drivers/gpu/drm/nouveau/nvkm/subdev/timer/nv04.c
··· 76 76 u32 stat = nvkm_rd32(device, NV04_PTIMER_INTR_0); 77 77 78 78 if (stat & 0x00000001) { 79 - nvkm_timer_alarm_trigger(tmr); 80 79 nvkm_wr32(device, NV04_PTIMER_INTR_0, 0x00000001); 80 + nvkm_timer_alarm_trigger(tmr); 81 81 stat &= ~0x00000001; 82 82 } 83 83
+8 -21
drivers/gpu/drm/radeon/cik.c
··· 9150 9150 a.full = dfixed_const(available_bandwidth); 9151 9151 b.full = dfixed_const(wm->num_heads); 9152 9152 a.full = dfixed_div(a, b); 9153 + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); 9154 + tmp = min(dfixed_trunc(a), tmp); 9153 9155 9154 - b.full = dfixed_const(mc_latency + 512); 9155 - c.full = dfixed_const(wm->disp_clk); 9156 - b.full = dfixed_div(b, c); 9157 - 9158 - c.full = dfixed_const(dmif_size); 9159 - b.full = dfixed_div(c, b); 9160 - 9161 - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); 9162 - 9163 - b.full = dfixed_const(1000); 9164 - c.full = dfixed_const(wm->disp_clk); 9165 - b.full = dfixed_div(c, b); 9166 - c.full = dfixed_const(wm->bytes_per_pixel); 9167 - b.full = dfixed_mul(b, c); 9168 - 9169 - lb_fill_bw = min(tmp, dfixed_trunc(b)); 9156 + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); 9170 9157 9171 9158 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); 9172 9159 b.full = dfixed_const(1000); ··· 9261 9274 { 9262 9275 struct drm_display_mode *mode = &radeon_crtc->base.mode; 9263 9276 struct dce8_wm_params wm_low, wm_high; 9264 - u32 pixel_period; 9277 + u32 active_time; 9265 9278 u32 line_time = 0; 9266 9279 u32 latency_watermark_a = 0, latency_watermark_b = 0; 9267 9280 u32 tmp, wm_mask; 9268 9281 9269 9282 if (radeon_crtc->base.enabled && num_heads && mode) { 9270 - pixel_period = 1000000 / (u32)mode->clock; 9271 - line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); 9283 + active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; 9284 + line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); 9272 9285 9273 9286 /* watermark for high clocks */ 9274 9287 if ((rdev->pm.pm_method == PM_METHOD_DPM) && ··· 9284 9297 9285 9298 wm_high.disp_clk = mode->clock; 9286 9299 wm_high.src_width = mode->crtc_hdisplay; 9287 - wm_high.active_time = mode->crtc_hdisplay * pixel_period; 9300 + wm_high.active_time = active_time; 9288 9301 wm_high.blank_time = line_time - wm_high.active_time; 9289 9302 wm_high.interlaced = false; 9290 9303 if (mode->flags & DRM_MODE_FLAG_INTERLACE) ··· 9324 9337 9325 9338 wm_low.disp_clk = mode->clock; 9326 9339 wm_low.src_width = mode->crtc_hdisplay; 9327 - wm_low.active_time = mode->crtc_hdisplay * pixel_period; 9340 + wm_low.active_time = active_time; 9328 9341 wm_low.blank_time = line_time - wm_low.active_time; 9329 9342 wm_low.interlaced = false; 9330 9343 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+6 -12
drivers/gpu/drm/radeon/evergreen.c
··· 2188 2188 b.full = dfixed_const(wm->num_heads); 2189 2189 a.full = dfixed_div(a, b); 2190 2190 2191 - b.full = dfixed_const(1000); 2192 - c.full = dfixed_const(wm->disp_clk); 2193 - b.full = dfixed_div(c, b); 2194 - c.full = dfixed_const(wm->bytes_per_pixel); 2195 - b.full = dfixed_mul(b, c); 2196 - 2197 - lb_fill_bw = min(dfixed_trunc(a), dfixed_trunc(b)); 2191 + lb_fill_bw = min(dfixed_trunc(a), wm->disp_clk * wm->bytes_per_pixel / 1000); 2198 2192 2199 2193 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); 2200 2194 b.full = dfixed_const(1000); ··· 2255 2261 struct drm_display_mode *mode = &radeon_crtc->base.mode; 2256 2262 struct evergreen_wm_params wm_low, wm_high; 2257 2263 u32 dram_channels; 2258 - u32 pixel_period; 2264 + u32 active_time; 2259 2265 u32 line_time = 0; 2260 2266 u32 latency_watermark_a = 0, latency_watermark_b = 0; 2261 2267 u32 priority_a_mark = 0, priority_b_mark = 0; ··· 2266 2272 fixed20_12 a, b, c; 2267 2273 2268 2274 if (radeon_crtc->base.enabled && num_heads && mode) { 2269 - pixel_period = 1000000 / (u32)mode->clock; 2270 - line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); 2275 + active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; 2276 + line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); 2271 2277 priority_a_cnt = 0; 2272 2278 priority_b_cnt = 0; 2273 2279 dram_channels = evergreen_get_number_of_dram_channels(rdev); ··· 2285 2291 2286 2292 wm_high.disp_clk = mode->clock; 2287 2293 wm_high.src_width = mode->crtc_hdisplay; 2288 - wm_high.active_time = mode->crtc_hdisplay * pixel_period; 2294 + wm_high.active_time = active_time; 2289 2295 wm_high.blank_time = line_time - wm_high.active_time; 2290 2296 wm_high.interlaced = false; 2291 2297 if (mode->flags & DRM_MODE_FLAG_INTERLACE) ··· 2312 2318 2313 2319 wm_low.disp_clk = mode->clock; 2314 2320 wm_low.src_width = mode->crtc_hdisplay; 2315 - wm_low.active_time = mode->crtc_hdisplay * pixel_period; 2321 + wm_low.active_time = active_time; 2316 2322 wm_low.blank_time = line_time - wm_low.active_time; 2317 2323 wm_low.interlaced = false; 2318 2324 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+6 -2
drivers/gpu/drm/radeon/r420.c
··· 203 203 204 204 static void r420_cp_errata_init(struct radeon_device *rdev) 205 205 { 206 + int r; 206 207 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 207 208 208 209 /* RV410 and R420 can lock up if CP DMA to host memory happens ··· 213 212 * of the CP init, apparently. 214 213 */ 215 214 radeon_scratch_get(rdev, &rdev->config.r300.resync_scratch); 216 - radeon_ring_lock(rdev, ring, 8); 215 + r = radeon_ring_lock(rdev, ring, 8); 216 + WARN_ON(r); 217 217 radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1)); 218 218 radeon_ring_write(ring, rdev->config.r300.resync_scratch); 219 219 radeon_ring_write(ring, 0xDEADBEEF); ··· 223 221 224 222 static void r420_cp_errata_fini(struct radeon_device *rdev) 225 223 { 224 + int r; 226 225 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 227 226 228 227 /* Catch the RESYNC we dispatched all the way back, 229 228 * at the very beginning of the CP init. 230 229 */ 231 - radeon_ring_lock(rdev, ring, 8); 230 + r = radeon_ring_lock(rdev, ring, 8); 231 + WARN_ON(r); 232 232 radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); 233 233 radeon_ring_write(ring, R300_RB3D_DC_FINISH); 234 234 radeon_ring_unlock_commit(rdev, ring, false);
+6 -4
drivers/gpu/drm/radeon/radeon_cs.c
··· 117 117 priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2 118 118 + !!r->write_domain; 119 119 120 - /* the first reloc of an UVD job is the msg and that must be in 121 - VRAM, also but everything into VRAM on AGP cards and older 122 - IGP chips to avoid image corruptions */ 120 + /* The first reloc of an UVD job is the msg and that must be in 121 + * VRAM, the second reloc is the DPB and for WMV that must be in 122 + * VRAM as well. Also put everything into VRAM on AGP cards and older 123 + * IGP chips to avoid image corruptions 124 + */ 123 125 if (p->ring == R600_RING_TYPE_UVD_INDEX && 124 - (i == 0 || pci_find_capability(p->rdev->ddev->pdev, 126 + (i <= 0 || pci_find_capability(p->rdev->ddev->pdev, 125 127 PCI_CAP_ID_AGP) || 126 128 p->rdev->family == CHIP_RS780 || 127 129 p->rdev->family == CHIP_RS880)) {
+1 -1
drivers/gpu/drm/radeon/radeon_object.c
··· 81 81 list_del_init(&bo->list); 82 82 mutex_unlock(&bo->rdev->gem.mutex); 83 83 radeon_bo_clear_surface_reg(bo); 84 - WARN_ON(!list_empty(&bo->va)); 84 + WARN_ON_ONCE(!list_empty(&bo->va)); 85 85 drm_gem_object_release(&bo->gem_base); 86 86 kfree(bo); 87 87 }
+6 -1
drivers/gpu/drm/radeon/radeon_test.c
··· 298 298 DRM_ERROR("Failed to lock ring A %d\n", ring->idx); 299 299 return r; 300 300 } 301 - radeon_fence_emit(rdev, fence, ring->idx); 301 + r = radeon_fence_emit(rdev, fence, ring->idx); 302 + if (r) { 303 + DRM_ERROR("Failed to emit fence\n"); 304 + radeon_ring_unlock_undo(rdev, ring); 305 + return r; 306 + } 302 307 radeon_ring_unlock_commit(rdev, ring, false); 303 308 } 304 309 return 0;
+1 -1
drivers/gpu/drm/radeon/radeon_uvd.c
··· 621 621 } 622 622 623 623 /* TODO: is this still necessary on NI+ ? */ 624 - if ((cmd == 0 || cmd == 0x3) && 624 + if ((cmd == 0 || cmd == 1 || cmd == 0x3) && 625 625 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { 626 626 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", 627 627 start, end);
+8 -21
drivers/gpu/drm/radeon/si.c
··· 2204 2204 a.full = dfixed_const(available_bandwidth); 2205 2205 b.full = dfixed_const(wm->num_heads); 2206 2206 a.full = dfixed_div(a, b); 2207 + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); 2208 + tmp = min(dfixed_trunc(a), tmp); 2207 2209 2208 - b.full = dfixed_const(mc_latency + 512); 2209 - c.full = dfixed_const(wm->disp_clk); 2210 - b.full = dfixed_div(b, c); 2211 - 2212 - c.full = dfixed_const(dmif_size); 2213 - b.full = dfixed_div(c, b); 2214 - 2215 - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); 2216 - 2217 - b.full = dfixed_const(1000); 2218 - c.full = dfixed_const(wm->disp_clk); 2219 - b.full = dfixed_div(c, b); 2220 - c.full = dfixed_const(wm->bytes_per_pixel); 2221 - b.full = dfixed_mul(b, c); 2222 - 2223 - lb_fill_bw = min(tmp, dfixed_trunc(b)); 2210 + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); 2224 2211 2225 2212 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); 2226 2213 b.full = dfixed_const(1000); ··· 2274 2287 struct drm_display_mode *mode = &radeon_crtc->base.mode; 2275 2288 struct dce6_wm_params wm_low, wm_high; 2276 2289 u32 dram_channels; 2277 - u32 pixel_period; 2290 + u32 active_time; 2278 2291 u32 line_time = 0; 2279 2292 u32 latency_watermark_a = 0, latency_watermark_b = 0; 2280 2293 u32 priority_a_mark = 0, priority_b_mark = 0; ··· 2284 2297 fixed20_12 a, b, c; 2285 2298 2286 2299 if (radeon_crtc->base.enabled && num_heads && mode) { 2287 - pixel_period = 1000000 / (u32)mode->clock; 2288 - line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); 2300 + active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; 2301 + line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); 2289 2302 priority_a_cnt = 0; 2290 2303 priority_b_cnt = 0; 2291 2304 ··· 2307 2320 2308 2321 wm_high.disp_clk = mode->clock; 2309 2322 wm_high.src_width = mode->crtc_hdisplay; 2310 - wm_high.active_time = mode->crtc_hdisplay * pixel_period; 2323 + wm_high.active_time = active_time; 2311 2324 wm_high.blank_time = line_time - wm_high.active_time; 2312 2325 wm_high.interlaced = false; 2313 2326 if (mode->flags & DRM_MODE_FLAG_INTERLACE) ··· 2334 2347 2335 2348 wm_low.disp_clk = mode->clock; 2336 2349 wm_low.src_width = mode->crtc_hdisplay; 2337 - wm_low.active_time = mode->crtc_hdisplay * pixel_period; 2350 + wm_low.active_time = active_time; 2338 2351 wm_low.blank_time = line_time - wm_low.active_time; 2339 2352 wm_low.interlaced = false; 2340 2353 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+1 -2
drivers/gpu/drm/ttm/ttm_bo.c
··· 1394 1394 int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, 1395 1395 unsigned long p_size) 1396 1396 { 1397 - int ret = -EINVAL; 1397 + int ret; 1398 1398 struct ttm_mem_type_manager *man; 1399 1399 unsigned i; 1400 1400 ··· 1412 1412 return ret; 1413 1413 man->bdev = bdev; 1414 1414 1415 - ret = 0; 1416 1415 if (type != TTM_PL_SYSTEM) { 1417 1416 ret = (*man->func->init)(man, p_size); 1418 1417 if (ret)
+23 -1
include/uapi/drm/amdgpu_drm.h
··· 295 295 }; 296 296 297 297 struct drm_amdgpu_wait_cs_in { 298 - /** Command submission handle */ 298 + /* Command submission handle 299 + * handle equals 0 means none to wait for 300 + * handle equals ~0ull means wait for the latest sequence number 301 + */ 299 302 __u64 handle; 300 303 /** Absolute timeout to wait */ 301 304 __u64 timeout; ··· 767 764 __u64 cntl_sb_buf_gpu_addr; 768 765 /* NGG Parameter Cache */ 769 766 __u64 param_buf_gpu_addr; 767 + __u32 prim_buf_size; 768 + __u32 pos_buf_size; 769 + __u32 cntl_sb_buf_size; 770 + __u32 param_buf_size; 771 + /* wavefront size*/ 772 + __u32 wave_front_size; 773 + /* shader visible vgprs*/ 774 + __u32 num_shader_visible_vgprs; 775 + /* CU per shader array*/ 776 + __u32 num_cu_per_sh; 777 + /* number of tcc blocks*/ 778 + __u32 num_tcc_blocks; 779 + /* gs vgt table depth*/ 780 + __u32 gs_vgt_table_depth; 781 + /* gs primitive buffer depth*/ 782 + __u32 gs_prim_buffer_depth; 783 + /* max gs wavefront per vgt*/ 784 + __u32 max_gs_waves_per_vgt; 785 + __u32 _pad1; 770 786 }; 771 787 772 788 struct drm_amdgpu_info_hw_ip {