at v4.17 846 lines 24 kB view raw
1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24#include <linux/types.h> 25#include <linux/kernel.h> 26#include <linux/log2.h> 27#include <linux/sched.h> 28#include <linux/slab.h> 29#include <linux/mutex.h> 30#include <linux/device.h> 31 32#include "kfd_pm4_headers.h" 33#include "kfd_pm4_headers_diq.h" 34#include "kfd_kernel_queue.h" 35#include "kfd_priv.h" 36#include "kfd_pm4_opcodes.h" 37#include "cik_regs.h" 38#include "kfd_dbgmgr.h" 39#include "kfd_dbgdev.h" 40#include "kfd_device_queue_manager.h" 41#include "../../radeon/cik_reg.h" 42 43static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) 44{ 45 dev->kfd2kgd->address_watch_disable(dev->kgd); 46} 47 48static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, 49 unsigned int pasid, uint64_t vmid0_address, 50 uint32_t *packet_buff, size_t size_in_bytes) 51{ 52 struct pm4__release_mem *rm_packet; 53 struct pm4__indirect_buffer_pasid *ib_packet; 54 struct kfd_mem_obj *mem_obj; 55 size_t pq_packets_size_in_bytes; 56 union ULARGE_INTEGER *largep; 57 union ULARGE_INTEGER addr; 58 struct kernel_queue *kq; 59 uint64_t *rm_state; 60 unsigned int *ib_packet_buff; 61 int status; 62 63 if (WARN_ON(!size_in_bytes)) 64 return -EINVAL; 65 66 kq = dbgdev->kq; 67 68 pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) + 69 sizeof(struct pm4__indirect_buffer_pasid); 70 71 /* 72 * We acquire a buffer from DIQ 73 * The receive packet buff will be sitting on the Indirect Buffer 74 * and in the PQ we put the IB packet + sync packet(s). 75 */ 76 status = kq->ops.acquire_packet_buffer(kq, 77 pq_packets_size_in_bytes / sizeof(uint32_t), 78 &ib_packet_buff); 79 if (status) { 80 pr_err("acquire_packet_buffer failed\n"); 81 return status; 82 } 83 84 memset(ib_packet_buff, 0, pq_packets_size_in_bytes); 85 86 ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); 87 88 ib_packet->header.count = 3; 89 ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; 90 ib_packet->header.type = PM4_TYPE_3; 91 92 largep = (union ULARGE_INTEGER *) &vmid0_address; 93 94 ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; 95 ib_packet->bitfields3.ib_base_hi = largep->u.high_part; 96 97 ib_packet->control = (1 << 23) | (1 << 31) | 98 ((size_in_bytes / 4) & 0xfffff); 99 100 ib_packet->bitfields5.pasid = pasid; 101 102 /* 103 * for now we use release mem for GPU-CPU synchronization 104 * Consider WaitRegMem + WriteData as a better alternative 105 * we get a GART allocations ( gpu/cpu mapping), 106 * for the sync variable, and wait until: 107 * (a) Sync with HW 108 * (b) Sync var is written by CP to mem. 109 */ 110 rm_packet = (struct pm4__release_mem *) (ib_packet_buff + 111 (sizeof(struct pm4__indirect_buffer_pasid) / 112 sizeof(unsigned int))); 113 114 status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), 115 &mem_obj); 116 117 if (status) { 118 pr_err("Failed to allocate GART memory\n"); 119 kq->ops.rollback_packet(kq); 120 return status; 121 } 122 123 rm_state = (uint64_t *) mem_obj->cpu_ptr; 124 125 *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; 126 127 rm_packet->header.opcode = IT_RELEASE_MEM; 128 rm_packet->header.type = PM4_TYPE_3; 129 rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2; 130 131 rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; 132 rm_packet->bitfields2.event_index = 133 event_index___release_mem__end_of_pipe; 134 135 rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; 136 rm_packet->bitfields2.atc = 0; 137 rm_packet->bitfields2.tc_wb_action_ena = 1; 138 139 addr.quad_part = mem_obj->gpu_addr; 140 141 rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; 142 rm_packet->address_hi = addr.u.high_part; 143 144 rm_packet->bitfields3.data_sel = 145 data_sel___release_mem__send_64_bit_data; 146 147 rm_packet->bitfields3.int_sel = 148 int_sel___release_mem__send_data_after_write_confirm; 149 150 rm_packet->bitfields3.dst_sel = 151 dst_sel___release_mem__memory_controller; 152 153 rm_packet->data_lo = QUEUESTATE__ACTIVE; 154 155 kq->ops.submit_packet(kq); 156 157 /* Wait till CP writes sync code: */ 158 status = amdkfd_fence_wait_timeout( 159 (unsigned int *) rm_state, 160 QUEUESTATE__ACTIVE, 1500); 161 162 kfd_gtt_sa_free(dbgdev->dev, mem_obj); 163 164 return status; 165} 166 167static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) 168{ 169 /* 170 * no action is needed in this case, 171 * just make sure diq will not be used 172 */ 173 174 dbgdev->kq = NULL; 175 176 return 0; 177} 178 179static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) 180{ 181 struct queue_properties properties; 182 unsigned int qid; 183 struct kernel_queue *kq = NULL; 184 int status; 185 186 properties.type = KFD_QUEUE_TYPE_DIQ; 187 188 status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, 189 &properties, &qid); 190 191 if (status) { 192 pr_err("Failed to create DIQ\n"); 193 return status; 194 } 195 196 pr_debug("DIQ Created with queue id: %d\n", qid); 197 198 kq = pqm_get_kernel_queue(dbgdev->pqm, qid); 199 200 if (!kq) { 201 pr_err("Error getting DIQ\n"); 202 pqm_destroy_queue(dbgdev->pqm, qid); 203 return -EFAULT; 204 } 205 206 dbgdev->kq = kq; 207 208 return status; 209} 210 211static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) 212{ 213 /* disable watch address */ 214 dbgdev_address_watch_disable_nodiq(dbgdev->dev); 215 return 0; 216} 217 218static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) 219{ 220 /* todo - disable address watch */ 221 int status; 222 223 status = pqm_destroy_queue(dbgdev->pqm, 224 dbgdev->kq->queue->properties.queue_id); 225 dbgdev->kq = NULL; 226 227 return status; 228} 229 230static void dbgdev_address_watch_set_registers( 231 const struct dbg_address_watch_info *adw_info, 232 union TCP_WATCH_ADDR_H_BITS *addrHi, 233 union TCP_WATCH_ADDR_L_BITS *addrLo, 234 union TCP_WATCH_CNTL_BITS *cntl, 235 unsigned int index, unsigned int vmid) 236{ 237 union ULARGE_INTEGER addr; 238 239 addr.quad_part = 0; 240 addrHi->u32All = 0; 241 addrLo->u32All = 0; 242 cntl->u32All = 0; 243 244 if (adw_info->watch_mask) 245 cntl->bitfields.mask = 246 (uint32_t) (adw_info->watch_mask[index] & 247 ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); 248 else 249 cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; 250 251 addr.quad_part = (unsigned long long) adw_info->watch_address[index]; 252 253 addrHi->bitfields.addr = addr.u.high_part & 254 ADDRESS_WATCH_REG_ADDHIGH_MASK; 255 addrLo->bitfields.addr = 256 (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); 257 258 cntl->bitfields.mode = adw_info->watch_mode[index]; 259 cntl->bitfields.vmid = (uint32_t) vmid; 260 /* for now assume it is an ATC address */ 261 cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; 262 263 pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); 264 pr_debug("\t\t%20s %08x\n", "set reg add high :", 265 addrHi->bitfields.addr); 266 pr_debug("\t\t%20s %08x\n", "set reg add low :", 267 addrLo->bitfields.addr); 268} 269 270static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, 271 struct dbg_address_watch_info *adw_info) 272{ 273 union TCP_WATCH_ADDR_H_BITS addrHi; 274 union TCP_WATCH_ADDR_L_BITS addrLo; 275 union TCP_WATCH_CNTL_BITS cntl; 276 struct kfd_process_device *pdd; 277 unsigned int i; 278 279 /* taking the vmid for that process on the safe way using pdd */ 280 pdd = kfd_get_process_device_data(dbgdev->dev, 281 adw_info->process); 282 if (!pdd) { 283 pr_err("Failed to get pdd for wave control no DIQ\n"); 284 return -EFAULT; 285 } 286 287 addrHi.u32All = 0; 288 addrLo.u32All = 0; 289 cntl.u32All = 0; 290 291 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || 292 (adw_info->num_watch_points == 0)) { 293 pr_err("num_watch_points is invalid\n"); 294 return -EINVAL; 295 } 296 297 if (!adw_info->watch_mode || !adw_info->watch_address) { 298 pr_err("adw_info fields are not valid\n"); 299 return -EINVAL; 300 } 301 302 for (i = 0; i < adw_info->num_watch_points; i++) { 303 dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, 304 &cntl, i, pdd->qpd.vmid); 305 306 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 307 pr_debug("\t\t%20s %08x\n", "register index :", i); 308 pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid); 309 pr_debug("\t\t%20s %08x\n", "Address Low is :", 310 addrLo.bitfields.addr); 311 pr_debug("\t\t%20s %08x\n", "Address high is :", 312 addrHi.bitfields.addr); 313 pr_debug("\t\t%20s %08x\n", "Address high is :", 314 addrHi.bitfields.addr); 315 pr_debug("\t\t%20s %08x\n", "Control Mask is :", 316 cntl.bitfields.mask); 317 pr_debug("\t\t%20s %08x\n", "Control Mode is :", 318 cntl.bitfields.mode); 319 pr_debug("\t\t%20s %08x\n", "Control Vmid is :", 320 cntl.bitfields.vmid); 321 pr_debug("\t\t%20s %08x\n", "Control atc is :", 322 cntl.bitfields.atc); 323 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 324 325 pdd->dev->kfd2kgd->address_watch_execute( 326 dbgdev->dev->kgd, 327 i, 328 cntl.u32All, 329 addrHi.u32All, 330 addrLo.u32All); 331 } 332 333 return 0; 334} 335 336static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, 337 struct dbg_address_watch_info *adw_info) 338{ 339 struct pm4__set_config_reg *packets_vec; 340 union TCP_WATCH_ADDR_H_BITS addrHi; 341 union TCP_WATCH_ADDR_L_BITS addrLo; 342 union TCP_WATCH_CNTL_BITS cntl; 343 struct kfd_mem_obj *mem_obj; 344 unsigned int aw_reg_add_dword; 345 uint32_t *packet_buff_uint; 346 unsigned int i; 347 int status; 348 size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; 349 /* we do not control the vmid in DIQ mode, just a place holder */ 350 unsigned int vmid = 0; 351 352 addrHi.u32All = 0; 353 addrLo.u32All = 0; 354 cntl.u32All = 0; 355 356 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || 357 (adw_info->num_watch_points == 0)) { 358 pr_err("num_watch_points is invalid\n"); 359 return -EINVAL; 360 } 361 362 if (!adw_info->watch_mode || !adw_info->watch_address) { 363 pr_err("adw_info fields are not valid\n"); 364 return -EINVAL; 365 } 366 367 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); 368 369 if (status) { 370 pr_err("Failed to allocate GART memory\n"); 371 return status; 372 } 373 374 packet_buff_uint = mem_obj->cpu_ptr; 375 376 memset(packet_buff_uint, 0, ib_size); 377 378 packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); 379 380 packets_vec[0].header.count = 1; 381 packets_vec[0].header.opcode = IT_SET_CONFIG_REG; 382 packets_vec[0].header.type = PM4_TYPE_3; 383 packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; 384 packets_vec[0].bitfields2.insert_vmid = 1; 385 packets_vec[1].ordinal1 = packets_vec[0].ordinal1; 386 packets_vec[1].bitfields2.insert_vmid = 0; 387 packets_vec[2].ordinal1 = packets_vec[0].ordinal1; 388 packets_vec[2].bitfields2.insert_vmid = 0; 389 packets_vec[3].ordinal1 = packets_vec[0].ordinal1; 390 packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; 391 packets_vec[3].bitfields2.insert_vmid = 1; 392 393 for (i = 0; i < adw_info->num_watch_points; i++) { 394 dbgdev_address_watch_set_registers(adw_info, 395 &addrHi, 396 &addrLo, 397 &cntl, 398 i, 399 vmid); 400 401 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 402 pr_debug("\t\t%20s %08x\n", "register index :", i); 403 pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); 404 pr_debug("\t\t%20s %p\n", "Add ptr is :", 405 adw_info->watch_address); 406 pr_debug("\t\t%20s %08llx\n", "Add is :", 407 adw_info->watch_address[i]); 408 pr_debug("\t\t%20s %08x\n", "Address Low is :", 409 addrLo.bitfields.addr); 410 pr_debug("\t\t%20s %08x\n", "Address high is :", 411 addrHi.bitfields.addr); 412 pr_debug("\t\t%20s %08x\n", "Control Mask is :", 413 cntl.bitfields.mask); 414 pr_debug("\t\t%20s %08x\n", "Control Mode is :", 415 cntl.bitfields.mode); 416 pr_debug("\t\t%20s %08x\n", "Control Vmid is :", 417 cntl.bitfields.vmid); 418 pr_debug("\t\t%20s %08x\n", "Control atc is :", 419 cntl.bitfields.atc); 420 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 421 422 aw_reg_add_dword = 423 dbgdev->dev->kfd2kgd->address_watch_get_offset( 424 dbgdev->dev->kgd, 425 i, 426 ADDRESS_WATCH_REG_CNTL); 427 428 packets_vec[0].bitfields2.reg_offset = 429 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 430 431 packets_vec[0].reg_data[0] = cntl.u32All; 432 433 aw_reg_add_dword = 434 dbgdev->dev->kfd2kgd->address_watch_get_offset( 435 dbgdev->dev->kgd, 436 i, 437 ADDRESS_WATCH_REG_ADDR_HI); 438 439 packets_vec[1].bitfields2.reg_offset = 440 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 441 packets_vec[1].reg_data[0] = addrHi.u32All; 442 443 aw_reg_add_dword = 444 dbgdev->dev->kfd2kgd->address_watch_get_offset( 445 dbgdev->dev->kgd, 446 i, 447 ADDRESS_WATCH_REG_ADDR_LO); 448 449 packets_vec[2].bitfields2.reg_offset = 450 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 451 packets_vec[2].reg_data[0] = addrLo.u32All; 452 453 /* enable watch flag if address is not zero*/ 454 if (adw_info->watch_address[i] > 0) 455 cntl.bitfields.valid = 1; 456 else 457 cntl.bitfields.valid = 0; 458 459 aw_reg_add_dword = 460 dbgdev->dev->kfd2kgd->address_watch_get_offset( 461 dbgdev->dev->kgd, 462 i, 463 ADDRESS_WATCH_REG_CNTL); 464 465 packets_vec[3].bitfields2.reg_offset = 466 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 467 packets_vec[3].reg_data[0] = cntl.u32All; 468 469 status = dbgdev_diq_submit_ib( 470 dbgdev, 471 adw_info->process->pasid, 472 mem_obj->gpu_addr, 473 packet_buff_uint, 474 ib_size); 475 476 if (status) { 477 pr_err("Failed to submit IB to DIQ\n"); 478 break; 479 } 480 } 481 482 kfd_gtt_sa_free(dbgdev->dev, mem_obj); 483 return status; 484} 485 486static int dbgdev_wave_control_set_registers( 487 struct dbg_wave_control_info *wac_info, 488 union SQ_CMD_BITS *in_reg_sq_cmd, 489 union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) 490{ 491 int status = 0; 492 union SQ_CMD_BITS reg_sq_cmd; 493 union GRBM_GFX_INDEX_BITS reg_gfx_index; 494 struct HsaDbgWaveMsgAMDGen2 *pMsg; 495 496 reg_sq_cmd.u32All = 0; 497 reg_gfx_index.u32All = 0; 498 pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; 499 500 switch (wac_info->mode) { 501 /* Send command to single wave */ 502 case HSA_DBG_WAVEMODE_SINGLE: 503 /* 504 * Limit access to the process waves only, 505 * by setting vmid check 506 */ 507 reg_sq_cmd.bits.check_vmid = 1; 508 reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; 509 reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; 510 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; 511 512 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; 513 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; 514 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; 515 516 break; 517 518 /* Send command to all waves with matching VMID */ 519 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: 520 521 reg_gfx_index.bits.sh_broadcast_writes = 1; 522 reg_gfx_index.bits.se_broadcast_writes = 1; 523 reg_gfx_index.bits.instance_broadcast_writes = 1; 524 525 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 526 527 break; 528 529 /* Send command to all CU waves with matching VMID */ 530 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: 531 532 reg_sq_cmd.bits.check_vmid = 1; 533 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 534 535 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; 536 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; 537 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; 538 539 break; 540 541 default: 542 return -EINVAL; 543 } 544 545 switch (wac_info->operand) { 546 case HSA_DBG_WAVEOP_HALT: 547 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT; 548 break; 549 550 case HSA_DBG_WAVEOP_RESUME: 551 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME; 552 break; 553 554 case HSA_DBG_WAVEOP_KILL: 555 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 556 break; 557 558 case HSA_DBG_WAVEOP_DEBUG: 559 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG; 560 break; 561 562 case HSA_DBG_WAVEOP_TRAP: 563 if (wac_info->trapId < MAX_TRAPID) { 564 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP; 565 reg_sq_cmd.bits.trap_id = wac_info->trapId; 566 } else { 567 status = -EINVAL; 568 } 569 break; 570 571 default: 572 status = -EINVAL; 573 break; 574 } 575 576 if (status == 0) { 577 *in_reg_sq_cmd = reg_sq_cmd; 578 *in_reg_gfx_index = reg_gfx_index; 579 } 580 581 return status; 582} 583 584static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, 585 struct dbg_wave_control_info *wac_info) 586{ 587 588 int status; 589 union SQ_CMD_BITS reg_sq_cmd; 590 union GRBM_GFX_INDEX_BITS reg_gfx_index; 591 struct kfd_mem_obj *mem_obj; 592 uint32_t *packet_buff_uint; 593 struct pm4__set_config_reg *packets_vec; 594 size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; 595 596 reg_sq_cmd.u32All = 0; 597 598 status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd, 599 &reg_gfx_index); 600 if (status) { 601 pr_err("Failed to set wave control registers\n"); 602 return status; 603 } 604 605 /* we do not control the VMID in DIQ, so reset it to a known value */ 606 reg_sq_cmd.bits.vm_id = 0; 607 608 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 609 610 pr_debug("\t\t mode is: %u\n", wac_info->mode); 611 pr_debug("\t\t operand is: %u\n", wac_info->operand); 612 pr_debug("\t\t trap id is: %u\n", wac_info->trapId); 613 pr_debug("\t\t msg value is: %u\n", 614 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 615 pr_debug("\t\t vmid is: N/A\n"); 616 617 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); 618 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); 619 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); 620 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); 621 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); 622 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); 623 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); 624 625 pr_debug("\t\t ibw is : %u\n", 626 reg_gfx_index.bitfields.instance_broadcast_writes); 627 pr_debug("\t\t ii is : %u\n", 628 reg_gfx_index.bitfields.instance_index); 629 pr_debug("\t\t sebw is : %u\n", 630 reg_gfx_index.bitfields.se_broadcast_writes); 631 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); 632 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); 633 pr_debug("\t\t sbw is : %u\n", 634 reg_gfx_index.bitfields.sh_broadcast_writes); 635 636 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 637 638 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); 639 640 if (status != 0) { 641 pr_err("Failed to allocate GART memory\n"); 642 return status; 643 } 644 645 packet_buff_uint = mem_obj->cpu_ptr; 646 647 memset(packet_buff_uint, 0, ib_size); 648 649 packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; 650 packets_vec[0].header.count = 1; 651 packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; 652 packets_vec[0].header.type = PM4_TYPE_3; 653 packets_vec[0].bitfields2.reg_offset = 654 GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; 655 656 packets_vec[0].bitfields2.insert_vmid = 0; 657 packets_vec[0].reg_data[0] = reg_gfx_index.u32All; 658 659 packets_vec[1].header.count = 1; 660 packets_vec[1].header.opcode = IT_SET_CONFIG_REG; 661 packets_vec[1].header.type = PM4_TYPE_3; 662 packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE; 663 664 packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; 665 packets_vec[1].bitfields2.insert_vmid = 1; 666 packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; 667 668 /* Restore the GRBM_GFX_INDEX register */ 669 670 reg_gfx_index.u32All = 0; 671 reg_gfx_index.bits.sh_broadcast_writes = 1; 672 reg_gfx_index.bits.instance_broadcast_writes = 1; 673 reg_gfx_index.bits.se_broadcast_writes = 1; 674 675 676 packets_vec[2].ordinal1 = packets_vec[0].ordinal1; 677 packets_vec[2].bitfields2.reg_offset = 678 GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; 679 680 packets_vec[2].bitfields2.insert_vmid = 0; 681 packets_vec[2].reg_data[0] = reg_gfx_index.u32All; 682 683 status = dbgdev_diq_submit_ib( 684 dbgdev, 685 wac_info->process->pasid, 686 mem_obj->gpu_addr, 687 packet_buff_uint, 688 ib_size); 689 690 if (status) 691 pr_err("Failed to submit IB to DIQ\n"); 692 693 kfd_gtt_sa_free(dbgdev->dev, mem_obj); 694 695 return status; 696} 697 698static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, 699 struct dbg_wave_control_info *wac_info) 700{ 701 int status; 702 union SQ_CMD_BITS reg_sq_cmd; 703 union GRBM_GFX_INDEX_BITS reg_gfx_index; 704 struct kfd_process_device *pdd; 705 706 reg_sq_cmd.u32All = 0; 707 708 /* taking the VMID for that process on the safe way using PDD */ 709 pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); 710 711 if (!pdd) { 712 pr_err("Failed to get pdd for wave control no DIQ\n"); 713 return -EFAULT; 714 } 715 status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd, 716 &reg_gfx_index); 717 if (status) { 718 pr_err("Failed to set wave control registers\n"); 719 return status; 720 } 721 722 /* for non DIQ we need to patch the VMID: */ 723 724 reg_sq_cmd.bits.vm_id = pdd->qpd.vmid; 725 726 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 727 728 pr_debug("\t\t mode is: %u\n", wac_info->mode); 729 pr_debug("\t\t operand is: %u\n", wac_info->operand); 730 pr_debug("\t\t trap id is: %u\n", wac_info->trapId); 731 pr_debug("\t\t msg value is: %u\n", 732 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 733 pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid); 734 735 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); 736 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); 737 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); 738 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); 739 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); 740 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); 741 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); 742 743 pr_debug("\t\t ibw is : %u\n", 744 reg_gfx_index.bitfields.instance_broadcast_writes); 745 pr_debug("\t\t ii is : %u\n", 746 reg_gfx_index.bitfields.instance_index); 747 pr_debug("\t\t sebw is : %u\n", 748 reg_gfx_index.bitfields.se_broadcast_writes); 749 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); 750 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); 751 pr_debug("\t\t sbw is : %u\n", 752 reg_gfx_index.bitfields.sh_broadcast_writes); 753 754 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 755 756 return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, 757 reg_gfx_index.u32All, 758 reg_sq_cmd.u32All); 759} 760 761int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) 762{ 763 int status = 0; 764 unsigned int vmid; 765 union SQ_CMD_BITS reg_sq_cmd; 766 union GRBM_GFX_INDEX_BITS reg_gfx_index; 767 struct kfd_process_device *pdd; 768 struct dbg_wave_control_info wac_info; 769 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 770 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 771 772 reg_sq_cmd.u32All = 0; 773 status = 0; 774 775 wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; 776 wac_info.operand = HSA_DBG_WAVEOP_KILL; 777 778 pr_debug("Killing all process wavefronts\n"); 779 780 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 781 * ATC_VMID15_PASID_MAPPING 782 * to check which VMID the current process is mapped to. 783 */ 784 785 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 786 if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid 787 (dev->kgd, vmid)) { 788 if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid 789 (dev->kgd, vmid) == p->pasid) { 790 pr_debug("Killing wave fronts of vmid %d and pasid %d\n", 791 vmid, p->pasid); 792 break; 793 } 794 } 795 } 796 797 if (vmid > last_vmid_to_scan) { 798 pr_err("Didn't find vmid for pasid %d\n", p->pasid); 799 return -EFAULT; 800 } 801 802 /* taking the VMID for that process on the safe way using PDD */ 803 pdd = kfd_get_process_device_data(dev, p); 804 if (!pdd) 805 return -EFAULT; 806 807 status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd, 808 &reg_gfx_index); 809 if (status != 0) 810 return -EINVAL; 811 812 /* for non DIQ we need to patch the VMID: */ 813 reg_sq_cmd.bits.vm_id = vmid; 814 815 dev->kfd2kgd->wave_control_execute(dev->kgd, 816 reg_gfx_index.u32All, 817 reg_sq_cmd.u32All); 818 819 return 0; 820} 821 822void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, 823 enum DBGDEV_TYPE type) 824{ 825 pdbgdev->dev = pdev; 826 pdbgdev->kq = NULL; 827 pdbgdev->type = type; 828 pdbgdev->pqm = NULL; 829 830 switch (type) { 831 case DBGDEV_TYPE_NODIQ: 832 pdbgdev->dbgdev_register = dbgdev_register_nodiq; 833 pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq; 834 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq; 835 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq; 836 break; 837 case DBGDEV_TYPE_DIQ: 838 default: 839 pdbgdev->dbgdev_register = dbgdev_register_diq; 840 pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; 841 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; 842 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; 843 break; 844 } 845 846}