Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.8-rc4 862 lines 23 kB view raw
1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23#include <linux/module.h> 24#include <linux/fdtable.h> 25#include <linux/uaccess.h> 26#include <drm/drmP.h> 27#include "radeon.h" 28#include "cikd.h" 29#include "cik_reg.h" 30#include "radeon_kfd.h" 31#include "radeon_ucode.h" 32#include <linux/firmware.h> 33#include "cik_structs.h" 34 35#define CIK_PIPE_PER_MEC (4) 36 37static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { 38 TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL, 39 TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL, 40 TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL, 41 TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL 42}; 43 44struct kgd_mem { 45 struct radeon_bo *bo; 46 uint64_t gpu_addr; 47 void *cpu_ptr; 48}; 49 50 51static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, 52 void **mem_obj, uint64_t *gpu_addr, 53 void **cpu_ptr); 54 55static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); 56 57static uint64_t get_vmem_size(struct kgd_dev *kgd); 58static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); 59 60static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); 61static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); 62 63/* 64 * Register access functions 65 */ 66 67static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 68 uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, 69 uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); 70 71static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 72 unsigned int vmid); 73 74static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, 75 uint32_t hpd_size, uint64_t hpd_gpu_addr); 76static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); 77static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 78 uint32_t queue_id, uint32_t __user *wptr); 79static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); 80static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 81 uint32_t pipe_id, uint32_t queue_id); 82 83static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 84 unsigned int timeout, uint32_t pipe_id, 85 uint32_t queue_id); 86static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); 87static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 88 unsigned int timeout); 89static int kgd_address_watch_disable(struct kgd_dev *kgd); 90static int kgd_address_watch_execute(struct kgd_dev *kgd, 91 unsigned int watch_point_id, 92 uint32_t cntl_val, 93 uint32_t addr_hi, 94 uint32_t addr_lo); 95static int kgd_wave_control_execute(struct kgd_dev *kgd, 96 uint32_t gfx_index_val, 97 uint32_t sq_cmd); 98static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, 99 unsigned int watch_point_id, 100 unsigned int reg_offset); 101 102static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); 103static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 104 uint8_t vmid); 105static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); 106 107static const struct kfd2kgd_calls kfd2kgd = { 108 .init_gtt_mem_allocation = alloc_gtt_mem, 109 .free_gtt_mem = free_gtt_mem, 110 .get_vmem_size = get_vmem_size, 111 .get_gpu_clock_counter = get_gpu_clock_counter, 112 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, 113 .program_sh_mem_settings = kgd_program_sh_mem_settings, 114 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 115 .init_pipeline = kgd_init_pipeline, 116 .init_interrupts = kgd_init_interrupts, 117 .hqd_load = kgd_hqd_load, 118 .hqd_sdma_load = kgd_hqd_sdma_load, 119 .hqd_is_occupied = kgd_hqd_is_occupied, 120 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, 121 .hqd_destroy = kgd_hqd_destroy, 122 .hqd_sdma_destroy = kgd_hqd_sdma_destroy, 123 .address_watch_disable = kgd_address_watch_disable, 124 .address_watch_execute = kgd_address_watch_execute, 125 .wave_control_execute = kgd_wave_control_execute, 126 .address_watch_get_offset = kgd_address_watch_get_offset, 127 .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, 128 .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, 129 .write_vmid_invalidate_request = write_vmid_invalidate_request, 130 .get_fw_version = get_fw_version 131}; 132 133static const struct kgd2kfd_calls *kgd2kfd; 134 135int radeon_kfd_init(void) 136{ 137 int ret; 138 139#if defined(CONFIG_HSA_AMD_MODULE) 140 int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); 141 142 kgd2kfd_init_p = symbol_request(kgd2kfd_init); 143 144 if (kgd2kfd_init_p == NULL) 145 return -ENOENT; 146 147 ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd); 148 if (ret) { 149 symbol_put(kgd2kfd_init); 150 kgd2kfd = NULL; 151 } 152 153#elif defined(CONFIG_HSA_AMD) 154 ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); 155 if (ret) 156 kgd2kfd = NULL; 157 158#else 159 ret = -ENOENT; 160#endif 161 162 return ret; 163} 164 165void radeon_kfd_fini(void) 166{ 167 if (kgd2kfd) { 168 kgd2kfd->exit(); 169 symbol_put(kgd2kfd_init); 170 } 171} 172 173void radeon_kfd_device_probe(struct radeon_device *rdev) 174{ 175 if (kgd2kfd) 176 rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, 177 rdev->pdev, &kfd2kgd); 178} 179 180void radeon_kfd_device_init(struct radeon_device *rdev) 181{ 182 if (rdev->kfd) { 183 struct kgd2kfd_shared_resources gpu_resources = { 184 .compute_vmid_bitmap = 0xFF00, 185 186 .first_compute_pipe = 1, 187 .compute_pipe_count = 4 - 1, 188 }; 189 190 radeon_doorbell_get_kfd_info(rdev, 191 &gpu_resources.doorbell_physical_address, 192 &gpu_resources.doorbell_aperture_size, 193 &gpu_resources.doorbell_start_offset); 194 195 kgd2kfd->device_init(rdev->kfd, &gpu_resources); 196 } 197} 198 199void radeon_kfd_device_fini(struct radeon_device *rdev) 200{ 201 if (rdev->kfd) { 202 kgd2kfd->device_exit(rdev->kfd); 203 rdev->kfd = NULL; 204 } 205} 206 207void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry) 208{ 209 if (rdev->kfd) 210 kgd2kfd->interrupt(rdev->kfd, ih_ring_entry); 211} 212 213void radeon_kfd_suspend(struct radeon_device *rdev) 214{ 215 if (rdev->kfd) 216 kgd2kfd->suspend(rdev->kfd); 217} 218 219int radeon_kfd_resume(struct radeon_device *rdev) 220{ 221 int r = 0; 222 223 if (rdev->kfd) 224 r = kgd2kfd->resume(rdev->kfd); 225 226 return r; 227} 228 229static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, 230 void **mem_obj, uint64_t *gpu_addr, 231 void **cpu_ptr) 232{ 233 struct radeon_device *rdev = (struct radeon_device *)kgd; 234 struct kgd_mem **mem = (struct kgd_mem **) mem_obj; 235 int r; 236 237 BUG_ON(kgd == NULL); 238 BUG_ON(gpu_addr == NULL); 239 BUG_ON(cpu_ptr == NULL); 240 241 *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); 242 if ((*mem) == NULL) 243 return -ENOMEM; 244 245 r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT, 246 RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo); 247 if (r) { 248 dev_err(rdev->dev, 249 "failed to allocate BO for amdkfd (%d)\n", r); 250 return r; 251 } 252 253 /* map the buffer */ 254 r = radeon_bo_reserve((*mem)->bo, true); 255 if (r) { 256 dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); 257 goto allocate_mem_reserve_bo_failed; 258 } 259 260 r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT, 261 &(*mem)->gpu_addr); 262 if (r) { 263 dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); 264 goto allocate_mem_pin_bo_failed; 265 } 266 *gpu_addr = (*mem)->gpu_addr; 267 268 r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); 269 if (r) { 270 dev_err(rdev->dev, 271 "(%d) failed to map bo to kernel for amdkfd\n", r); 272 goto allocate_mem_kmap_bo_failed; 273 } 274 *cpu_ptr = (*mem)->cpu_ptr; 275 276 radeon_bo_unreserve((*mem)->bo); 277 278 return 0; 279 280allocate_mem_kmap_bo_failed: 281 radeon_bo_unpin((*mem)->bo); 282allocate_mem_pin_bo_failed: 283 radeon_bo_unreserve((*mem)->bo); 284allocate_mem_reserve_bo_failed: 285 radeon_bo_unref(&(*mem)->bo); 286 287 return r; 288} 289 290static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) 291{ 292 struct kgd_mem *mem = (struct kgd_mem *) mem_obj; 293 294 BUG_ON(mem == NULL); 295 296 radeon_bo_reserve(mem->bo, true); 297 radeon_bo_kunmap(mem->bo); 298 radeon_bo_unpin(mem->bo); 299 radeon_bo_unreserve(mem->bo); 300 radeon_bo_unref(&(mem->bo)); 301 kfree(mem); 302} 303 304static uint64_t get_vmem_size(struct kgd_dev *kgd) 305{ 306 struct radeon_device *rdev = (struct radeon_device *)kgd; 307 308 BUG_ON(kgd == NULL); 309 310 return rdev->mc.real_vram_size; 311} 312 313static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) 314{ 315 struct radeon_device *rdev = (struct radeon_device *)kgd; 316 317 return rdev->asic->get_gpu_clock_counter(rdev); 318} 319 320static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) 321{ 322 struct radeon_device *rdev = (struct radeon_device *)kgd; 323 324 /* The sclk is in quantas of 10kHz */ 325 return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; 326} 327 328static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd) 329{ 330 return (struct radeon_device *)kgd; 331} 332 333static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value) 334{ 335 struct radeon_device *rdev = get_radeon_device(kgd); 336 337 writel(value, (void __iomem *)(rdev->rmmio + offset)); 338} 339 340static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset) 341{ 342 struct radeon_device *rdev = get_radeon_device(kgd); 343 344 return readl((void __iomem *)(rdev->rmmio + offset)); 345} 346 347static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, 348 uint32_t queue, uint32_t vmid) 349{ 350 struct radeon_device *rdev = get_radeon_device(kgd); 351 uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); 352 353 mutex_lock(&rdev->srbm_mutex); 354 write_register(kgd, SRBM_GFX_CNTL, value); 355} 356 357static void unlock_srbm(struct kgd_dev *kgd) 358{ 359 struct radeon_device *rdev = get_radeon_device(kgd); 360 361 write_register(kgd, SRBM_GFX_CNTL, 0); 362 mutex_unlock(&rdev->srbm_mutex); 363} 364 365static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, 366 uint32_t queue_id) 367{ 368 uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; 369 uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); 370 371 lock_srbm(kgd, mec, pipe, queue_id, 0); 372} 373 374static void release_queue(struct kgd_dev *kgd) 375{ 376 unlock_srbm(kgd); 377} 378 379static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 380 uint32_t sh_mem_config, 381 uint32_t sh_mem_ape1_base, 382 uint32_t sh_mem_ape1_limit, 383 uint32_t sh_mem_bases) 384{ 385 lock_srbm(kgd, 0, 0, 0, vmid); 386 387 write_register(kgd, SH_MEM_CONFIG, sh_mem_config); 388 write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base); 389 write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit); 390 write_register(kgd, SH_MEM_BASES, sh_mem_bases); 391 392 unlock_srbm(kgd); 393} 394 395static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 396 unsigned int vmid) 397{ 398 /* 399 * We have to assume that there is no outstanding mapping. 400 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 401 * because a mapping is in progress or because a mapping finished and 402 * the SW cleared it. 403 * So the protocol is to always wait & clear. 404 */ 405 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 406 ATC_VMID_PASID_MAPPING_VALID_MASK; 407 408 write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t), 409 pasid_mapping); 410 411 while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) & 412 (1U << vmid))) 413 cpu_relax(); 414 write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); 415 416 /* Mapping vmid to pasid also for IH block */ 417 write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t), 418 pasid_mapping); 419 420 return 0; 421} 422 423static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, 424 uint32_t hpd_size, uint64_t hpd_gpu_addr) 425{ 426 uint32_t mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; 427 uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); 428 429 lock_srbm(kgd, mec, pipe, 0, 0); 430 write_register(kgd, CP_HPD_EOP_BASE_ADDR, 431 lower_32_bits(hpd_gpu_addr >> 8)); 432 write_register(kgd, CP_HPD_EOP_BASE_ADDR_HI, 433 upper_32_bits(hpd_gpu_addr >> 8)); 434 write_register(kgd, CP_HPD_EOP_VMID, 0); 435 write_register(kgd, CP_HPD_EOP_CONTROL, hpd_size); 436 unlock_srbm(kgd); 437 438 return 0; 439} 440 441static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) 442{ 443 uint32_t mec; 444 uint32_t pipe; 445 446 mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; 447 pipe = (pipe_id % CIK_PIPE_PER_MEC); 448 449 lock_srbm(kgd, mec, pipe, 0, 0); 450 451 write_register(kgd, CPC_INT_CNTL, 452 TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE); 453 454 unlock_srbm(kgd); 455 456 return 0; 457} 458 459static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) 460{ 461 uint32_t retval; 462 463 retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + 464 m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; 465 466 pr_debug("kfd: sdma base address: 0x%x\n", retval); 467 468 return retval; 469} 470 471static inline struct cik_mqd *get_mqd(void *mqd) 472{ 473 return (struct cik_mqd *)mqd; 474} 475 476static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) 477{ 478 return (struct cik_sdma_rlc_registers *)mqd; 479} 480 481static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 482 uint32_t queue_id, uint32_t __user *wptr) 483{ 484 uint32_t wptr_shadow, is_wptr_shadow_valid; 485 struct cik_mqd *m; 486 487 m = get_mqd(mqd); 488 489 is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); 490 491 acquire_queue(kgd, pipe_id, queue_id); 492 write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); 493 write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); 494 write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control); 495 496 write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); 497 write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); 498 write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); 499 500 write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control); 501 write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo); 502 write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi); 503 504 write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr); 505 506 write_register(kgd, CP_HQD_PERSISTENT_STATE, 507 m->cp_hqd_persistent_state); 508 write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd); 509 write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type); 510 511 write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO, 512 m->cp_hqd_atomic0_preop_lo); 513 514 write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI, 515 m->cp_hqd_atomic0_preop_hi); 516 517 write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO, 518 m->cp_hqd_atomic1_preop_lo); 519 520 write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI, 521 m->cp_hqd_atomic1_preop_hi); 522 523 write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR, 524 m->cp_hqd_pq_rptr_report_addr_lo); 525 526 write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI, 527 m->cp_hqd_pq_rptr_report_addr_hi); 528 529 write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); 530 531 write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR, 532 m->cp_hqd_pq_wptr_poll_addr_lo); 533 534 write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI, 535 m->cp_hqd_pq_wptr_poll_addr_hi); 536 537 write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 538 m->cp_hqd_pq_doorbell_control); 539 540 write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid); 541 542 write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum); 543 544 write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); 545 write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); 546 547 write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr); 548 549 if (is_wptr_shadow_valid) 550 write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow); 551 552 write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active); 553 release_queue(kgd); 554 555 return 0; 556} 557 558static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) 559{ 560 struct cik_sdma_rlc_registers *m; 561 uint32_t sdma_base_addr; 562 563 m = get_sdma_mqd(mqd); 564 sdma_base_addr = get_sdma_base_addr(m); 565 566 write_register(kgd, 567 sdma_base_addr + SDMA0_RLC0_VIRTUAL_ADDR, 568 m->sdma_rlc_virtual_addr); 569 570 write_register(kgd, 571 sdma_base_addr + SDMA0_RLC0_RB_BASE, 572 m->sdma_rlc_rb_base); 573 574 write_register(kgd, 575 sdma_base_addr + SDMA0_RLC0_RB_BASE_HI, 576 m->sdma_rlc_rb_base_hi); 577 578 write_register(kgd, 579 sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_LO, 580 m->sdma_rlc_rb_rptr_addr_lo); 581 582 write_register(kgd, 583 sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_HI, 584 m->sdma_rlc_rb_rptr_addr_hi); 585 586 write_register(kgd, 587 sdma_base_addr + SDMA0_RLC0_DOORBELL, 588 m->sdma_rlc_doorbell); 589 590 write_register(kgd, 591 sdma_base_addr + SDMA0_RLC0_RB_CNTL, 592 m->sdma_rlc_rb_cntl); 593 594 return 0; 595} 596 597static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 598 uint32_t pipe_id, uint32_t queue_id) 599{ 600 uint32_t act; 601 bool retval = false; 602 uint32_t low, high; 603 604 acquire_queue(kgd, pipe_id, queue_id); 605 act = read_register(kgd, CP_HQD_ACTIVE); 606 if (act) { 607 low = lower_32_bits(queue_address >> 8); 608 high = upper_32_bits(queue_address >> 8); 609 610 if (low == read_register(kgd, CP_HQD_PQ_BASE) && 611 high == read_register(kgd, CP_HQD_PQ_BASE_HI)) 612 retval = true; 613 } 614 release_queue(kgd); 615 return retval; 616} 617 618static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) 619{ 620 struct cik_sdma_rlc_registers *m; 621 uint32_t sdma_base_addr; 622 uint32_t sdma_rlc_rb_cntl; 623 624 m = get_sdma_mqd(mqd); 625 sdma_base_addr = get_sdma_base_addr(m); 626 627 sdma_rlc_rb_cntl = read_register(kgd, 628 sdma_base_addr + SDMA0_RLC0_RB_CNTL); 629 630 if (sdma_rlc_rb_cntl & SDMA_RB_ENABLE) 631 return true; 632 633 return false; 634} 635 636static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 637 unsigned int timeout, uint32_t pipe_id, 638 uint32_t queue_id) 639{ 640 uint32_t temp; 641 642 acquire_queue(kgd, pipe_id, queue_id); 643 write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0); 644 645 write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type); 646 647 while (true) { 648 temp = read_register(kgd, CP_HQD_ACTIVE); 649 if (temp & 0x1) 650 break; 651 if (timeout == 0) { 652 pr_err("kfd: cp queue preemption time out (%dms)\n", 653 temp); 654 release_queue(kgd); 655 return -ETIME; 656 } 657 msleep(20); 658 timeout -= 20; 659 } 660 661 release_queue(kgd); 662 return 0; 663} 664 665static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 666 unsigned int timeout) 667{ 668 struct cik_sdma_rlc_registers *m; 669 uint32_t sdma_base_addr; 670 uint32_t temp; 671 672 m = get_sdma_mqd(mqd); 673 sdma_base_addr = get_sdma_base_addr(m); 674 675 temp = read_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL); 676 temp = temp & ~SDMA_RB_ENABLE; 677 write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL, temp); 678 679 while (true) { 680 temp = read_register(kgd, sdma_base_addr + 681 SDMA0_RLC0_CONTEXT_STATUS); 682 if (temp & SDMA_RLC_IDLE) 683 break; 684 if (timeout == 0) 685 return -ETIME; 686 msleep(20); 687 timeout -= 20; 688 } 689 690 write_register(kgd, sdma_base_addr + SDMA0_RLC0_DOORBELL, 0); 691 write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_RPTR, 0); 692 write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_WPTR, 0); 693 write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_BASE, 0); 694 695 return 0; 696} 697 698static int kgd_address_watch_disable(struct kgd_dev *kgd) 699{ 700 union TCP_WATCH_CNTL_BITS cntl; 701 unsigned int i; 702 703 cntl.u32All = 0; 704 705 cntl.bitfields.valid = 0; 706 cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; 707 cntl.bitfields.atc = 1; 708 709 /* Turning off this address until we set all the registers */ 710 for (i = 0; i < MAX_WATCH_ADDRESSES; i++) 711 write_register(kgd, 712 watchRegs[i * ADDRESS_WATCH_REG_MAX + 713 ADDRESS_WATCH_REG_CNTL], 714 cntl.u32All); 715 716 return 0; 717} 718 719static int kgd_address_watch_execute(struct kgd_dev *kgd, 720 unsigned int watch_point_id, 721 uint32_t cntl_val, 722 uint32_t addr_hi, 723 uint32_t addr_lo) 724{ 725 union TCP_WATCH_CNTL_BITS cntl; 726 727 cntl.u32All = cntl_val; 728 729 /* Turning off this watch point until we set all the registers */ 730 cntl.bitfields.valid = 0; 731 write_register(kgd, 732 watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 733 ADDRESS_WATCH_REG_CNTL], 734 cntl.u32All); 735 736 write_register(kgd, 737 watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 738 ADDRESS_WATCH_REG_ADDR_HI], 739 addr_hi); 740 741 write_register(kgd, 742 watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 743 ADDRESS_WATCH_REG_ADDR_LO], 744 addr_lo); 745 746 /* Enable the watch point */ 747 cntl.bitfields.valid = 1; 748 749 write_register(kgd, 750 watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 751 ADDRESS_WATCH_REG_CNTL], 752 cntl.u32All); 753 754 return 0; 755} 756 757static int kgd_wave_control_execute(struct kgd_dev *kgd, 758 uint32_t gfx_index_val, 759 uint32_t sq_cmd) 760{ 761 struct radeon_device *rdev = get_radeon_device(kgd); 762 uint32_t data; 763 764 mutex_lock(&rdev->grbm_idx_mutex); 765 766 write_register(kgd, GRBM_GFX_INDEX, gfx_index_val); 767 write_register(kgd, SQ_CMD, sq_cmd); 768 769 /* Restore the GRBM_GFX_INDEX register */ 770 771 data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES | 772 SE_BROADCAST_WRITES; 773 774 write_register(kgd, GRBM_GFX_INDEX, data); 775 776 mutex_unlock(&rdev->grbm_idx_mutex); 777 778 return 0; 779} 780 781static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, 782 unsigned int watch_point_id, 783 unsigned int reg_offset) 784{ 785 return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; 786} 787 788static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid) 789{ 790 uint32_t reg; 791 struct radeon_device *rdev = (struct radeon_device *) kgd; 792 793 reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); 794 return reg & ATC_VMID_PASID_MAPPING_VALID_MASK; 795} 796 797static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 798 uint8_t vmid) 799{ 800 uint32_t reg; 801 struct radeon_device *rdev = (struct radeon_device *) kgd; 802 803 reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); 804 return reg & ATC_VMID_PASID_MAPPING_PASID_MASK; 805} 806 807static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) 808{ 809 struct radeon_device *rdev = (struct radeon_device *) kgd; 810 811 return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid); 812} 813 814static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) 815{ 816 struct radeon_device *rdev = (struct radeon_device *) kgd; 817 const union radeon_firmware_header *hdr; 818 819 BUG_ON(kgd == NULL || rdev->mec_fw == NULL); 820 821 switch (type) { 822 case KGD_ENGINE_PFP: 823 hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data; 824 break; 825 826 case KGD_ENGINE_ME: 827 hdr = (const union radeon_firmware_header *) rdev->me_fw->data; 828 break; 829 830 case KGD_ENGINE_CE: 831 hdr = (const union radeon_firmware_header *) rdev->ce_fw->data; 832 break; 833 834 case KGD_ENGINE_MEC1: 835 hdr = (const union radeon_firmware_header *) rdev->mec_fw->data; 836 break; 837 838 case KGD_ENGINE_MEC2: 839 hdr = (const union radeon_firmware_header *) 840 rdev->mec2_fw->data; 841 break; 842 843 case KGD_ENGINE_RLC: 844 hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data; 845 break; 846 847 case KGD_ENGINE_SDMA1: 848 case KGD_ENGINE_SDMA2: 849 hdr = (const union radeon_firmware_header *) 850 rdev->sdma_fw->data; 851 break; 852 853 default: 854 return 0; 855 } 856 857 if (hdr == NULL) 858 return 0; 859 860 /* Only 12 bit in use*/ 861 return hdr->common.ucode_version; 862}