Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.5-rc5 863 lines 23 kB view raw
1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23#include <linux/module.h> 24#include <linux/fdtable.h> 25#include <linux/uaccess.h> 26#include <drm/drmP.h> 27#include "radeon.h" 28#include "cikd.h" 29#include "cik_reg.h" 30#include "radeon_kfd.h" 31#include "radeon_ucode.h" 32#include <linux/firmware.h> 33#include "cik_structs.h" 34 35#define CIK_PIPE_PER_MEC (4) 36 37static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { 38 TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL, 39 TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL, 40 TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL, 41 TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL 42}; 43 44struct kgd_mem { 45 struct radeon_bo *bo; 46 uint64_t gpu_addr; 47 void *cpu_ptr; 48}; 49 50 51static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, 52 void **mem_obj, uint64_t *gpu_addr, 53 void **cpu_ptr); 54 55static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); 56 57static uint64_t get_vmem_size(struct kgd_dev *kgd); 58static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); 59 60static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); 61static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); 62 63/* 64 * Register access functions 65 */ 66 67static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 68 uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, 69 uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); 70 71static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 72 unsigned int vmid); 73 74static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, 75 uint32_t hpd_size, uint64_t hpd_gpu_addr); 76static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); 77static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 78 uint32_t queue_id, uint32_t __user *wptr); 79static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); 80static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 81 uint32_t pipe_id, uint32_t queue_id); 82 83static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 84 unsigned int timeout, uint32_t pipe_id, 85 uint32_t queue_id); 86static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); 87static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 88 unsigned int timeout); 89static int kgd_address_watch_disable(struct kgd_dev *kgd); 90static int kgd_address_watch_execute(struct kgd_dev *kgd, 91 unsigned int watch_point_id, 92 uint32_t cntl_val, 93 uint32_t addr_hi, 94 uint32_t addr_lo); 95static int kgd_wave_control_execute(struct kgd_dev *kgd, 96 uint32_t gfx_index_val, 97 uint32_t sq_cmd); 98static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, 99 unsigned int watch_point_id, 100 unsigned int reg_offset); 101 102static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); 103static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 104 uint8_t vmid); 105static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); 106 107static const struct kfd2kgd_calls kfd2kgd = { 108 .init_gtt_mem_allocation = alloc_gtt_mem, 109 .free_gtt_mem = free_gtt_mem, 110 .get_vmem_size = get_vmem_size, 111 .get_gpu_clock_counter = get_gpu_clock_counter, 112 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, 113 .program_sh_mem_settings = kgd_program_sh_mem_settings, 114 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 115 .init_pipeline = kgd_init_pipeline, 116 .init_interrupts = kgd_init_interrupts, 117 .hqd_load = kgd_hqd_load, 118 .hqd_sdma_load = kgd_hqd_sdma_load, 119 .hqd_is_occupied = kgd_hqd_is_occupied, 120 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, 121 .hqd_destroy = kgd_hqd_destroy, 122 .hqd_sdma_destroy = kgd_hqd_sdma_destroy, 123 .address_watch_disable = kgd_address_watch_disable, 124 .address_watch_execute = kgd_address_watch_execute, 125 .wave_control_execute = kgd_wave_control_execute, 126 .address_watch_get_offset = kgd_address_watch_get_offset, 127 .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, 128 .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, 129 .write_vmid_invalidate_request = write_vmid_invalidate_request, 130 .get_fw_version = get_fw_version 131}; 132 133static const struct kgd2kfd_calls *kgd2kfd; 134 135bool radeon_kfd_init(void) 136{ 137#if defined(CONFIG_HSA_AMD_MODULE) 138 bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); 139 140 kgd2kfd_init_p = symbol_request(kgd2kfd_init); 141 142 if (kgd2kfd_init_p == NULL) 143 return false; 144 145 if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) { 146 symbol_put(kgd2kfd_init); 147 kgd2kfd = NULL; 148 149 return false; 150 } 151 152 return true; 153#elif defined(CONFIG_HSA_AMD) 154 if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) { 155 kgd2kfd = NULL; 156 157 return false; 158 } 159 160 return true; 161#else 162 return false; 163#endif 164} 165 166void radeon_kfd_fini(void) 167{ 168 if (kgd2kfd) { 169 kgd2kfd->exit(); 170 symbol_put(kgd2kfd_init); 171 } 172} 173 174void radeon_kfd_device_probe(struct radeon_device *rdev) 175{ 176 if (kgd2kfd) 177 rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, 178 rdev->pdev, &kfd2kgd); 179} 180 181void radeon_kfd_device_init(struct radeon_device *rdev) 182{ 183 if (rdev->kfd) { 184 struct kgd2kfd_shared_resources gpu_resources = { 185 .compute_vmid_bitmap = 0xFF00, 186 187 .first_compute_pipe = 1, 188 .compute_pipe_count = 4 - 1, 189 }; 190 191 radeon_doorbell_get_kfd_info(rdev, 192 &gpu_resources.doorbell_physical_address, 193 &gpu_resources.doorbell_aperture_size, 194 &gpu_resources.doorbell_start_offset); 195 196 kgd2kfd->device_init(rdev->kfd, &gpu_resources); 197 } 198} 199 200void radeon_kfd_device_fini(struct radeon_device *rdev) 201{ 202 if (rdev->kfd) { 203 kgd2kfd->device_exit(rdev->kfd); 204 rdev->kfd = NULL; 205 } 206} 207 208void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry) 209{ 210 if (rdev->kfd) 211 kgd2kfd->interrupt(rdev->kfd, ih_ring_entry); 212} 213 214void radeon_kfd_suspend(struct radeon_device *rdev) 215{ 216 if (rdev->kfd) 217 kgd2kfd->suspend(rdev->kfd); 218} 219 220int radeon_kfd_resume(struct radeon_device *rdev) 221{ 222 int r = 0; 223 224 if (rdev->kfd) 225 r = kgd2kfd->resume(rdev->kfd); 226 227 return r; 228} 229 230static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, 231 void **mem_obj, uint64_t *gpu_addr, 232 void **cpu_ptr) 233{ 234 struct radeon_device *rdev = (struct radeon_device *)kgd; 235 struct kgd_mem **mem = (struct kgd_mem **) mem_obj; 236 int r; 237 238 BUG_ON(kgd == NULL); 239 BUG_ON(gpu_addr == NULL); 240 BUG_ON(cpu_ptr == NULL); 241 242 *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); 243 if ((*mem) == NULL) 244 return -ENOMEM; 245 246 r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT, 247 RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo); 248 if (r) { 249 dev_err(rdev->dev, 250 "failed to allocate BO for amdkfd (%d)\n", r); 251 return r; 252 } 253 254 /* map the buffer */ 255 r = radeon_bo_reserve((*mem)->bo, true); 256 if (r) { 257 dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); 258 goto allocate_mem_reserve_bo_failed; 259 } 260 261 r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT, 262 &(*mem)->gpu_addr); 263 if (r) { 264 dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); 265 goto allocate_mem_pin_bo_failed; 266 } 267 *gpu_addr = (*mem)->gpu_addr; 268 269 r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); 270 if (r) { 271 dev_err(rdev->dev, 272 "(%d) failed to map bo to kernel for amdkfd\n", r); 273 goto allocate_mem_kmap_bo_failed; 274 } 275 *cpu_ptr = (*mem)->cpu_ptr; 276 277 radeon_bo_unreserve((*mem)->bo); 278 279 return 0; 280 281allocate_mem_kmap_bo_failed: 282 radeon_bo_unpin((*mem)->bo); 283allocate_mem_pin_bo_failed: 284 radeon_bo_unreserve((*mem)->bo); 285allocate_mem_reserve_bo_failed: 286 radeon_bo_unref(&(*mem)->bo); 287 288 return r; 289} 290 291static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) 292{ 293 struct kgd_mem *mem = (struct kgd_mem *) mem_obj; 294 295 BUG_ON(mem == NULL); 296 297 radeon_bo_reserve(mem->bo, true); 298 radeon_bo_kunmap(mem->bo); 299 radeon_bo_unpin(mem->bo); 300 radeon_bo_unreserve(mem->bo); 301 radeon_bo_unref(&(mem->bo)); 302 kfree(mem); 303} 304 305static uint64_t get_vmem_size(struct kgd_dev *kgd) 306{ 307 struct radeon_device *rdev = (struct radeon_device *)kgd; 308 309 BUG_ON(kgd == NULL); 310 311 return rdev->mc.real_vram_size; 312} 313 314static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) 315{ 316 struct radeon_device *rdev = (struct radeon_device *)kgd; 317 318 return rdev->asic->get_gpu_clock_counter(rdev); 319} 320 321static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) 322{ 323 struct radeon_device *rdev = (struct radeon_device *)kgd; 324 325 /* The sclk is in quantas of 10kHz */ 326 return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; 327} 328 329static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd) 330{ 331 return (struct radeon_device *)kgd; 332} 333 334static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value) 335{ 336 struct radeon_device *rdev = get_radeon_device(kgd); 337 338 writel(value, (void __iomem *)(rdev->rmmio + offset)); 339} 340 341static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset) 342{ 343 struct radeon_device *rdev = get_radeon_device(kgd); 344 345 return readl((void __iomem *)(rdev->rmmio + offset)); 346} 347 348static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, 349 uint32_t queue, uint32_t vmid) 350{ 351 struct radeon_device *rdev = get_radeon_device(kgd); 352 uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); 353 354 mutex_lock(&rdev->srbm_mutex); 355 write_register(kgd, SRBM_GFX_CNTL, value); 356} 357 358static void unlock_srbm(struct kgd_dev *kgd) 359{ 360 struct radeon_device *rdev = get_radeon_device(kgd); 361 362 write_register(kgd, SRBM_GFX_CNTL, 0); 363 mutex_unlock(&rdev->srbm_mutex); 364} 365 366static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, 367 uint32_t queue_id) 368{ 369 uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; 370 uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); 371 372 lock_srbm(kgd, mec, pipe, queue_id, 0); 373} 374 375static void release_queue(struct kgd_dev *kgd) 376{ 377 unlock_srbm(kgd); 378} 379 380static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 381 uint32_t sh_mem_config, 382 uint32_t sh_mem_ape1_base, 383 uint32_t sh_mem_ape1_limit, 384 uint32_t sh_mem_bases) 385{ 386 lock_srbm(kgd, 0, 0, 0, vmid); 387 388 write_register(kgd, SH_MEM_CONFIG, sh_mem_config); 389 write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base); 390 write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit); 391 write_register(kgd, SH_MEM_BASES, sh_mem_bases); 392 393 unlock_srbm(kgd); 394} 395 396static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 397 unsigned int vmid) 398{ 399 /* 400 * We have to assume that there is no outstanding mapping. 401 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 402 * because a mapping is in progress or because a mapping finished and 403 * the SW cleared it. 404 * So the protocol is to always wait & clear. 405 */ 406 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 407 ATC_VMID_PASID_MAPPING_VALID_MASK; 408 409 write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t), 410 pasid_mapping); 411 412 while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) & 413 (1U << vmid))) 414 cpu_relax(); 415 write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); 416 417 /* Mapping vmid to pasid also for IH block */ 418 write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t), 419 pasid_mapping); 420 421 return 0; 422} 423 424static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, 425 uint32_t hpd_size, uint64_t hpd_gpu_addr) 426{ 427 uint32_t mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; 428 uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); 429 430 lock_srbm(kgd, mec, pipe, 0, 0); 431 write_register(kgd, CP_HPD_EOP_BASE_ADDR, 432 lower_32_bits(hpd_gpu_addr >> 8)); 433 write_register(kgd, CP_HPD_EOP_BASE_ADDR_HI, 434 upper_32_bits(hpd_gpu_addr >> 8)); 435 write_register(kgd, CP_HPD_EOP_VMID, 0); 436 write_register(kgd, CP_HPD_EOP_CONTROL, hpd_size); 437 unlock_srbm(kgd); 438 439 return 0; 440} 441 442static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) 443{ 444 uint32_t mec; 445 uint32_t pipe; 446 447 mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; 448 pipe = (pipe_id % CIK_PIPE_PER_MEC); 449 450 lock_srbm(kgd, mec, pipe, 0, 0); 451 452 write_register(kgd, CPC_INT_CNTL, 453 TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE); 454 455 unlock_srbm(kgd); 456 457 return 0; 458} 459 460static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) 461{ 462 uint32_t retval; 463 464 retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + 465 m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; 466 467 pr_debug("kfd: sdma base address: 0x%x\n", retval); 468 469 return retval; 470} 471 472static inline struct cik_mqd *get_mqd(void *mqd) 473{ 474 return (struct cik_mqd *)mqd; 475} 476 477static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) 478{ 479 return (struct cik_sdma_rlc_registers *)mqd; 480} 481 482static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 483 uint32_t queue_id, uint32_t __user *wptr) 484{ 485 uint32_t wptr_shadow, is_wptr_shadow_valid; 486 struct cik_mqd *m; 487 488 m = get_mqd(mqd); 489 490 is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); 491 492 acquire_queue(kgd, pipe_id, queue_id); 493 write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); 494 write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); 495 write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control); 496 497 write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); 498 write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); 499 write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); 500 501 write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control); 502 write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo); 503 write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi); 504 505 write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr); 506 507 write_register(kgd, CP_HQD_PERSISTENT_STATE, 508 m->cp_hqd_persistent_state); 509 write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd); 510 write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type); 511 512 write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO, 513 m->cp_hqd_atomic0_preop_lo); 514 515 write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI, 516 m->cp_hqd_atomic0_preop_hi); 517 518 write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO, 519 m->cp_hqd_atomic1_preop_lo); 520 521 write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI, 522 m->cp_hqd_atomic1_preop_hi); 523 524 write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR, 525 m->cp_hqd_pq_rptr_report_addr_lo); 526 527 write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI, 528 m->cp_hqd_pq_rptr_report_addr_hi); 529 530 write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); 531 532 write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR, 533 m->cp_hqd_pq_wptr_poll_addr_lo); 534 535 write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI, 536 m->cp_hqd_pq_wptr_poll_addr_hi); 537 538 write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 539 m->cp_hqd_pq_doorbell_control); 540 541 write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid); 542 543 write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum); 544 545 write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); 546 write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); 547 548 write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr); 549 550 if (is_wptr_shadow_valid) 551 write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow); 552 553 write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active); 554 release_queue(kgd); 555 556 return 0; 557} 558 559static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) 560{ 561 struct cik_sdma_rlc_registers *m; 562 uint32_t sdma_base_addr; 563 564 m = get_sdma_mqd(mqd); 565 sdma_base_addr = get_sdma_base_addr(m); 566 567 write_register(kgd, 568 sdma_base_addr + SDMA0_RLC0_VIRTUAL_ADDR, 569 m->sdma_rlc_virtual_addr); 570 571 write_register(kgd, 572 sdma_base_addr + SDMA0_RLC0_RB_BASE, 573 m->sdma_rlc_rb_base); 574 575 write_register(kgd, 576 sdma_base_addr + SDMA0_RLC0_RB_BASE_HI, 577 m->sdma_rlc_rb_base_hi); 578 579 write_register(kgd, 580 sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_LO, 581 m->sdma_rlc_rb_rptr_addr_lo); 582 583 write_register(kgd, 584 sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_HI, 585 m->sdma_rlc_rb_rptr_addr_hi); 586 587 write_register(kgd, 588 sdma_base_addr + SDMA0_RLC0_DOORBELL, 589 m->sdma_rlc_doorbell); 590 591 write_register(kgd, 592 sdma_base_addr + SDMA0_RLC0_RB_CNTL, 593 m->sdma_rlc_rb_cntl); 594 595 return 0; 596} 597 598static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 599 uint32_t pipe_id, uint32_t queue_id) 600{ 601 uint32_t act; 602 bool retval = false; 603 uint32_t low, high; 604 605 acquire_queue(kgd, pipe_id, queue_id); 606 act = read_register(kgd, CP_HQD_ACTIVE); 607 if (act) { 608 low = lower_32_bits(queue_address >> 8); 609 high = upper_32_bits(queue_address >> 8); 610 611 if (low == read_register(kgd, CP_HQD_PQ_BASE) && 612 high == read_register(kgd, CP_HQD_PQ_BASE_HI)) 613 retval = true; 614 } 615 release_queue(kgd); 616 return retval; 617} 618 619static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) 620{ 621 struct cik_sdma_rlc_registers *m; 622 uint32_t sdma_base_addr; 623 uint32_t sdma_rlc_rb_cntl; 624 625 m = get_sdma_mqd(mqd); 626 sdma_base_addr = get_sdma_base_addr(m); 627 628 sdma_rlc_rb_cntl = read_register(kgd, 629 sdma_base_addr + SDMA0_RLC0_RB_CNTL); 630 631 if (sdma_rlc_rb_cntl & SDMA_RB_ENABLE) 632 return true; 633 634 return false; 635} 636 637static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 638 unsigned int timeout, uint32_t pipe_id, 639 uint32_t queue_id) 640{ 641 uint32_t temp; 642 643 acquire_queue(kgd, pipe_id, queue_id); 644 write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0); 645 646 write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type); 647 648 while (true) { 649 temp = read_register(kgd, CP_HQD_ACTIVE); 650 if (temp & 0x1) 651 break; 652 if (timeout == 0) { 653 pr_err("kfd: cp queue preemption time out (%dms)\n", 654 temp); 655 release_queue(kgd); 656 return -ETIME; 657 } 658 msleep(20); 659 timeout -= 20; 660 } 661 662 release_queue(kgd); 663 return 0; 664} 665 666static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 667 unsigned int timeout) 668{ 669 struct cik_sdma_rlc_registers *m; 670 uint32_t sdma_base_addr; 671 uint32_t temp; 672 673 m = get_sdma_mqd(mqd); 674 sdma_base_addr = get_sdma_base_addr(m); 675 676 temp = read_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL); 677 temp = temp & ~SDMA_RB_ENABLE; 678 write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL, temp); 679 680 while (true) { 681 temp = read_register(kgd, sdma_base_addr + 682 SDMA0_RLC0_CONTEXT_STATUS); 683 if (temp & SDMA_RLC_IDLE) 684 break; 685 if (timeout == 0) 686 return -ETIME; 687 msleep(20); 688 timeout -= 20; 689 } 690 691 write_register(kgd, sdma_base_addr + SDMA0_RLC0_DOORBELL, 0); 692 write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_RPTR, 0); 693 write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_WPTR, 0); 694 write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_BASE, 0); 695 696 return 0; 697} 698 699static int kgd_address_watch_disable(struct kgd_dev *kgd) 700{ 701 union TCP_WATCH_CNTL_BITS cntl; 702 unsigned int i; 703 704 cntl.u32All = 0; 705 706 cntl.bitfields.valid = 0; 707 cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; 708 cntl.bitfields.atc = 1; 709 710 /* Turning off this address until we set all the registers */ 711 for (i = 0; i < MAX_WATCH_ADDRESSES; i++) 712 write_register(kgd, 713 watchRegs[i * ADDRESS_WATCH_REG_MAX + 714 ADDRESS_WATCH_REG_CNTL], 715 cntl.u32All); 716 717 return 0; 718} 719 720static int kgd_address_watch_execute(struct kgd_dev *kgd, 721 unsigned int watch_point_id, 722 uint32_t cntl_val, 723 uint32_t addr_hi, 724 uint32_t addr_lo) 725{ 726 union TCP_WATCH_CNTL_BITS cntl; 727 728 cntl.u32All = cntl_val; 729 730 /* Turning off this watch point until we set all the registers */ 731 cntl.bitfields.valid = 0; 732 write_register(kgd, 733 watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 734 ADDRESS_WATCH_REG_CNTL], 735 cntl.u32All); 736 737 write_register(kgd, 738 watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 739 ADDRESS_WATCH_REG_ADDR_HI], 740 addr_hi); 741 742 write_register(kgd, 743 watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 744 ADDRESS_WATCH_REG_ADDR_LO], 745 addr_lo); 746 747 /* Enable the watch point */ 748 cntl.bitfields.valid = 1; 749 750 write_register(kgd, 751 watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 752 ADDRESS_WATCH_REG_CNTL], 753 cntl.u32All); 754 755 return 0; 756} 757 758static int kgd_wave_control_execute(struct kgd_dev *kgd, 759 uint32_t gfx_index_val, 760 uint32_t sq_cmd) 761{ 762 struct radeon_device *rdev = get_radeon_device(kgd); 763 uint32_t data; 764 765 mutex_lock(&rdev->grbm_idx_mutex); 766 767 write_register(kgd, GRBM_GFX_INDEX, gfx_index_val); 768 write_register(kgd, SQ_CMD, sq_cmd); 769 770 /* Restore the GRBM_GFX_INDEX register */ 771 772 data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES | 773 SE_BROADCAST_WRITES; 774 775 write_register(kgd, GRBM_GFX_INDEX, data); 776 777 mutex_unlock(&rdev->grbm_idx_mutex); 778 779 return 0; 780} 781 782static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, 783 unsigned int watch_point_id, 784 unsigned int reg_offset) 785{ 786 return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; 787} 788 789static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid) 790{ 791 uint32_t reg; 792 struct radeon_device *rdev = (struct radeon_device *) kgd; 793 794 reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); 795 return reg & ATC_VMID_PASID_MAPPING_VALID_MASK; 796} 797 798static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 799 uint8_t vmid) 800{ 801 uint32_t reg; 802 struct radeon_device *rdev = (struct radeon_device *) kgd; 803 804 reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); 805 return reg & ATC_VMID_PASID_MAPPING_PASID_MASK; 806} 807 808static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) 809{ 810 struct radeon_device *rdev = (struct radeon_device *) kgd; 811 812 return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid); 813} 814 815static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) 816{ 817 struct radeon_device *rdev = (struct radeon_device *) kgd; 818 const union radeon_firmware_header *hdr; 819 820 BUG_ON(kgd == NULL || rdev->mec_fw == NULL); 821 822 switch (type) { 823 case KGD_ENGINE_PFP: 824 hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data; 825 break; 826 827 case KGD_ENGINE_ME: 828 hdr = (const union radeon_firmware_header *) rdev->me_fw->data; 829 break; 830 831 case KGD_ENGINE_CE: 832 hdr = (const union radeon_firmware_header *) rdev->ce_fw->data; 833 break; 834 835 case KGD_ENGINE_MEC1: 836 hdr = (const union radeon_firmware_header *) rdev->mec_fw->data; 837 break; 838 839 case KGD_ENGINE_MEC2: 840 hdr = (const union radeon_firmware_header *) 841 rdev->mec2_fw->data; 842 break; 843 844 case KGD_ENGINE_RLC: 845 hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data; 846 break; 847 848 case KGD_ENGINE_SDMA1: 849 case KGD_ENGINE_SDMA2: 850 hdr = (const union radeon_firmware_header *) 851 rdev->sdma_fw->data; 852 break; 853 854 default: 855 return 0; 856 } 857 858 if (hdr == NULL) 859 return 0; 860 861 /* Only 12 bit in use*/ 862 return hdr->common.ucode_version; 863}