Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v4.11 886 lines 26 kB view raw
1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24#include <linux/types.h> 25#include <linux/kernel.h> 26#include <linux/log2.h> 27#include <linux/sched.h> 28#include <linux/slab.h> 29#include <linux/mutex.h> 30#include <linux/device.h> 31 32#include "kfd_pm4_headers.h" 33#include "kfd_pm4_headers_diq.h" 34#include "kfd_kernel_queue.h" 35#include "kfd_priv.h" 36#include "kfd_pm4_opcodes.h" 37#include "cik_regs.h" 38#include "kfd_dbgmgr.h" 39#include "kfd_dbgdev.h" 40#include "kfd_device_queue_manager.h" 41#include "../../radeon/cik_reg.h" 42 43static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) 44{ 45 BUG_ON(!dev || !dev->kfd2kgd); 46 47 dev->kfd2kgd->address_watch_disable(dev->kgd); 48} 49 50static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, 51 unsigned int pasid, uint64_t vmid0_address, 52 uint32_t *packet_buff, size_t size_in_bytes) 53{ 54 struct pm4__release_mem *rm_packet; 55 struct pm4__indirect_buffer_pasid *ib_packet; 56 struct kfd_mem_obj *mem_obj; 57 size_t pq_packets_size_in_bytes; 58 union ULARGE_INTEGER *largep; 59 union ULARGE_INTEGER addr; 60 struct kernel_queue *kq; 61 uint64_t *rm_state; 62 unsigned int *ib_packet_buff; 63 int status; 64 65 BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes); 66 67 kq = dbgdev->kq; 68 69 pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) + 70 sizeof(struct pm4__indirect_buffer_pasid); 71 72 /* 73 * We acquire a buffer from DIQ 74 * The receive packet buff will be sitting on the Indirect Buffer 75 * and in the PQ we put the IB packet + sync packet(s). 76 */ 77 status = kq->ops.acquire_packet_buffer(kq, 78 pq_packets_size_in_bytes / sizeof(uint32_t), 79 &ib_packet_buff); 80 if (status != 0) { 81 pr_err("amdkfd: acquire_packet_buffer failed\n"); 82 return status; 83 } 84 85 memset(ib_packet_buff, 0, pq_packets_size_in_bytes); 86 87 ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); 88 89 ib_packet->header.count = 3; 90 ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; 91 ib_packet->header.type = PM4_TYPE_3; 92 93 largep = (union ULARGE_INTEGER *) &vmid0_address; 94 95 ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; 96 ib_packet->bitfields3.ib_base_hi = largep->u.high_part; 97 98 ib_packet->control = (1 << 23) | (1 << 31) | 99 ((size_in_bytes / sizeof(uint32_t)) & 0xfffff); 100 101 ib_packet->bitfields5.pasid = pasid; 102 103 /* 104 * for now we use release mem for GPU-CPU synchronization 105 * Consider WaitRegMem + WriteData as a better alternative 106 * we get a GART allocations ( gpu/cpu mapping), 107 * for the sync variable, and wait until: 108 * (a) Sync with HW 109 * (b) Sync var is written by CP to mem. 110 */ 111 rm_packet = (struct pm4__release_mem *) (ib_packet_buff + 112 (sizeof(struct pm4__indirect_buffer_pasid) / 113 sizeof(unsigned int))); 114 115 status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), 116 &mem_obj); 117 118 if (status != 0) { 119 pr_err("amdkfd: Failed to allocate GART memory\n"); 120 kq->ops.rollback_packet(kq); 121 return status; 122 } 123 124 rm_state = (uint64_t *) mem_obj->cpu_ptr; 125 126 *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; 127 128 rm_packet->header.opcode = IT_RELEASE_MEM; 129 rm_packet->header.type = PM4_TYPE_3; 130 rm_packet->header.count = sizeof(struct pm4__release_mem) / 131 sizeof(unsigned int) - 2; 132 133 rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; 134 rm_packet->bitfields2.event_index = 135 event_index___release_mem__end_of_pipe; 136 137 rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; 138 rm_packet->bitfields2.atc = 0; 139 rm_packet->bitfields2.tc_wb_action_ena = 1; 140 141 addr.quad_part = mem_obj->gpu_addr; 142 143 rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; 144 rm_packet->address_hi = addr.u.high_part; 145 146 rm_packet->bitfields3.data_sel = 147 data_sel___release_mem__send_64_bit_data; 148 149 rm_packet->bitfields3.int_sel = 150 int_sel___release_mem__send_data_after_write_confirm; 151 152 rm_packet->bitfields3.dst_sel = 153 dst_sel___release_mem__memory_controller; 154 155 rm_packet->data_lo = QUEUESTATE__ACTIVE; 156 157 kq->ops.submit_packet(kq); 158 159 /* Wait till CP writes sync code: */ 160 status = amdkfd_fence_wait_timeout( 161 (unsigned int *) rm_state, 162 QUEUESTATE__ACTIVE, 1500); 163 164 kfd_gtt_sa_free(dbgdev->dev, mem_obj); 165 166 return status; 167} 168 169static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) 170{ 171 BUG_ON(!dbgdev); 172 173 /* 174 * no action is needed in this case, 175 * just make sure diq will not be used 176 */ 177 178 dbgdev->kq = NULL; 179 180 return 0; 181} 182 183static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) 184{ 185 struct queue_properties properties; 186 unsigned int qid; 187 struct kernel_queue *kq = NULL; 188 int status; 189 190 BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev); 191 192 status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, 193 &properties, 0, KFD_QUEUE_TYPE_DIQ, 194 &qid); 195 196 if (status) { 197 pr_err("amdkfd: Failed to create DIQ\n"); 198 return status; 199 } 200 201 pr_debug("DIQ Created with queue id: %d\n", qid); 202 203 kq = pqm_get_kernel_queue(dbgdev->pqm, qid); 204 205 if (kq == NULL) { 206 pr_err("amdkfd: Error getting DIQ\n"); 207 pqm_destroy_queue(dbgdev->pqm, qid); 208 return -EFAULT; 209 } 210 211 dbgdev->kq = kq; 212 213 return status; 214} 215 216static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) 217{ 218 BUG_ON(!dbgdev || !dbgdev->dev); 219 220 /* disable watch address */ 221 dbgdev_address_watch_disable_nodiq(dbgdev->dev); 222 return 0; 223} 224 225static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) 226{ 227 /* todo - disable address watch */ 228 int status; 229 230 BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq); 231 232 status = pqm_destroy_queue(dbgdev->pqm, 233 dbgdev->kq->queue->properties.queue_id); 234 dbgdev->kq = NULL; 235 236 return status; 237} 238 239static void dbgdev_address_watch_set_registers( 240 const struct dbg_address_watch_info *adw_info, 241 union TCP_WATCH_ADDR_H_BITS *addrHi, 242 union TCP_WATCH_ADDR_L_BITS *addrLo, 243 union TCP_WATCH_CNTL_BITS *cntl, 244 unsigned int index, unsigned int vmid) 245{ 246 union ULARGE_INTEGER addr; 247 248 BUG_ON(!adw_info || !addrHi || !addrLo || !cntl); 249 250 addr.quad_part = 0; 251 addrHi->u32All = 0; 252 addrLo->u32All = 0; 253 cntl->u32All = 0; 254 255 if (adw_info->watch_mask != NULL) 256 cntl->bitfields.mask = 257 (uint32_t) (adw_info->watch_mask[index] & 258 ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); 259 else 260 cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; 261 262 addr.quad_part = (unsigned long long) adw_info->watch_address[index]; 263 264 addrHi->bitfields.addr = addr.u.high_part & 265 ADDRESS_WATCH_REG_ADDHIGH_MASK; 266 addrLo->bitfields.addr = 267 (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); 268 269 cntl->bitfields.mode = adw_info->watch_mode[index]; 270 cntl->bitfields.vmid = (uint32_t) vmid; 271 /* for now assume it is an ATC address */ 272 cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; 273 274 pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); 275 pr_debug("\t\t%20s %08x\n", "set reg add high :", 276 addrHi->bitfields.addr); 277 pr_debug("\t\t%20s %08x\n", "set reg add low :", 278 addrLo->bitfields.addr); 279} 280 281static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, 282 struct dbg_address_watch_info *adw_info) 283{ 284 union TCP_WATCH_ADDR_H_BITS addrHi; 285 union TCP_WATCH_ADDR_L_BITS addrLo; 286 union TCP_WATCH_CNTL_BITS cntl; 287 struct kfd_process_device *pdd; 288 unsigned int i; 289 290 BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); 291 292 /* taking the vmid for that process on the safe way using pdd */ 293 pdd = kfd_get_process_device_data(dbgdev->dev, 294 adw_info->process); 295 if (!pdd) { 296 pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); 297 return -EFAULT; 298 } 299 300 addrHi.u32All = 0; 301 addrLo.u32All = 0; 302 cntl.u32All = 0; 303 304 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || 305 (adw_info->num_watch_points == 0)) { 306 pr_err("amdkfd: num_watch_points is invalid\n"); 307 return -EINVAL; 308 } 309 310 if ((adw_info->watch_mode == NULL) || 311 (adw_info->watch_address == NULL)) { 312 pr_err("amdkfd: adw_info fields are not valid\n"); 313 return -EINVAL; 314 } 315 316 for (i = 0 ; i < adw_info->num_watch_points ; i++) { 317 dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, 318 &cntl, i, pdd->qpd.vmid); 319 320 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 321 pr_debug("\t\t%20s %08x\n", "register index :", i); 322 pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid); 323 pr_debug("\t\t%20s %08x\n", "Address Low is :", 324 addrLo.bitfields.addr); 325 pr_debug("\t\t%20s %08x\n", "Address high is :", 326 addrHi.bitfields.addr); 327 pr_debug("\t\t%20s %08x\n", "Address high is :", 328 addrHi.bitfields.addr); 329 pr_debug("\t\t%20s %08x\n", "Control Mask is :", 330 cntl.bitfields.mask); 331 pr_debug("\t\t%20s %08x\n", "Control Mode is :", 332 cntl.bitfields.mode); 333 pr_debug("\t\t%20s %08x\n", "Control Vmid is :", 334 cntl.bitfields.vmid); 335 pr_debug("\t\t%20s %08x\n", "Control atc is :", 336 cntl.bitfields.atc); 337 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 338 339 pdd->dev->kfd2kgd->address_watch_execute( 340 dbgdev->dev->kgd, 341 i, 342 cntl.u32All, 343 addrHi.u32All, 344 addrLo.u32All); 345 } 346 347 return 0; 348} 349 350static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, 351 struct dbg_address_watch_info *adw_info) 352{ 353 struct pm4__set_config_reg *packets_vec; 354 union TCP_WATCH_ADDR_H_BITS addrHi; 355 union TCP_WATCH_ADDR_L_BITS addrLo; 356 union TCP_WATCH_CNTL_BITS cntl; 357 struct kfd_mem_obj *mem_obj; 358 unsigned int aw_reg_add_dword; 359 uint32_t *packet_buff_uint; 360 unsigned int i; 361 int status; 362 size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; 363 /* we do not control the vmid in DIQ mode, just a place holder */ 364 unsigned int vmid = 0; 365 366 BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); 367 368 addrHi.u32All = 0; 369 addrLo.u32All = 0; 370 cntl.u32All = 0; 371 372 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || 373 (adw_info->num_watch_points == 0)) { 374 pr_err("amdkfd: num_watch_points is invalid\n"); 375 return -EINVAL; 376 } 377 378 if ((NULL == adw_info->watch_mode) || 379 (NULL == adw_info->watch_address)) { 380 pr_err("amdkfd: adw_info fields are not valid\n"); 381 return -EINVAL; 382 } 383 384 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); 385 386 if (status != 0) { 387 pr_err("amdkfd: Failed to allocate GART memory\n"); 388 return status; 389 } 390 391 packet_buff_uint = mem_obj->cpu_ptr; 392 393 memset(packet_buff_uint, 0, ib_size); 394 395 packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); 396 397 packets_vec[0].header.count = 1; 398 packets_vec[0].header.opcode = IT_SET_CONFIG_REG; 399 packets_vec[0].header.type = PM4_TYPE_3; 400 packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; 401 packets_vec[0].bitfields2.insert_vmid = 1; 402 packets_vec[1].ordinal1 = packets_vec[0].ordinal1; 403 packets_vec[1].bitfields2.insert_vmid = 0; 404 packets_vec[2].ordinal1 = packets_vec[0].ordinal1; 405 packets_vec[2].bitfields2.insert_vmid = 0; 406 packets_vec[3].ordinal1 = packets_vec[0].ordinal1; 407 packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; 408 packets_vec[3].bitfields2.insert_vmid = 1; 409 410 for (i = 0; i < adw_info->num_watch_points; i++) { 411 dbgdev_address_watch_set_registers(adw_info, 412 &addrHi, 413 &addrLo, 414 &cntl, 415 i, 416 vmid); 417 418 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 419 pr_debug("\t\t%20s %08x\n", "register index :", i); 420 pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); 421 pr_debug("\t\t%20s %p\n", "Add ptr is :", 422 adw_info->watch_address); 423 pr_debug("\t\t%20s %08llx\n", "Add is :", 424 adw_info->watch_address[i]); 425 pr_debug("\t\t%20s %08x\n", "Address Low is :", 426 addrLo.bitfields.addr); 427 pr_debug("\t\t%20s %08x\n", "Address high is :", 428 addrHi.bitfields.addr); 429 pr_debug("\t\t%20s %08x\n", "Control Mask is :", 430 cntl.bitfields.mask); 431 pr_debug("\t\t%20s %08x\n", "Control Mode is :", 432 cntl.bitfields.mode); 433 pr_debug("\t\t%20s %08x\n", "Control Vmid is :", 434 cntl.bitfields.vmid); 435 pr_debug("\t\t%20s %08x\n", "Control atc is :", 436 cntl.bitfields.atc); 437 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 438 439 aw_reg_add_dword = 440 dbgdev->dev->kfd2kgd->address_watch_get_offset( 441 dbgdev->dev->kgd, 442 i, 443 ADDRESS_WATCH_REG_CNTL); 444 445 aw_reg_add_dword /= sizeof(uint32_t); 446 447 packets_vec[0].bitfields2.reg_offset = 448 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 449 450 packets_vec[0].reg_data[0] = cntl.u32All; 451 452 aw_reg_add_dword = 453 dbgdev->dev->kfd2kgd->address_watch_get_offset( 454 dbgdev->dev->kgd, 455 i, 456 ADDRESS_WATCH_REG_ADDR_HI); 457 458 aw_reg_add_dword /= sizeof(uint32_t); 459 460 packets_vec[1].bitfields2.reg_offset = 461 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 462 packets_vec[1].reg_data[0] = addrHi.u32All; 463 464 aw_reg_add_dword = 465 dbgdev->dev->kfd2kgd->address_watch_get_offset( 466 dbgdev->dev->kgd, 467 i, 468 ADDRESS_WATCH_REG_ADDR_LO); 469 470 aw_reg_add_dword /= sizeof(uint32_t); 471 472 packets_vec[2].bitfields2.reg_offset = 473 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 474 packets_vec[2].reg_data[0] = addrLo.u32All; 475 476 /* enable watch flag if address is not zero*/ 477 if (adw_info->watch_address[i] > 0) 478 cntl.bitfields.valid = 1; 479 else 480 cntl.bitfields.valid = 0; 481 482 aw_reg_add_dword = 483 dbgdev->dev->kfd2kgd->address_watch_get_offset( 484 dbgdev->dev->kgd, 485 i, 486 ADDRESS_WATCH_REG_CNTL); 487 488 aw_reg_add_dword /= sizeof(uint32_t); 489 490 packets_vec[3].bitfields2.reg_offset = 491 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 492 packets_vec[3].reg_data[0] = cntl.u32All; 493 494 status = dbgdev_diq_submit_ib( 495 dbgdev, 496 adw_info->process->pasid, 497 mem_obj->gpu_addr, 498 packet_buff_uint, 499 ib_size); 500 501 if (status != 0) { 502 pr_err("amdkfd: Failed to submit IB to DIQ\n"); 503 break; 504 } 505 } 506 507 kfd_gtt_sa_free(dbgdev->dev, mem_obj); 508 return status; 509} 510 511static int dbgdev_wave_control_set_registers( 512 struct dbg_wave_control_info *wac_info, 513 union SQ_CMD_BITS *in_reg_sq_cmd, 514 union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) 515{ 516 int status = 0; 517 union SQ_CMD_BITS reg_sq_cmd; 518 union GRBM_GFX_INDEX_BITS reg_gfx_index; 519 struct HsaDbgWaveMsgAMDGen2 *pMsg; 520 521 BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index); 522 523 reg_sq_cmd.u32All = 0; 524 reg_gfx_index.u32All = 0; 525 pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; 526 527 switch (wac_info->mode) { 528 /* Send command to single wave */ 529 case HSA_DBG_WAVEMODE_SINGLE: 530 /* 531 * Limit access to the process waves only, 532 * by setting vmid check 533 */ 534 reg_sq_cmd.bits.check_vmid = 1; 535 reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; 536 reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; 537 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; 538 539 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; 540 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; 541 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; 542 543 break; 544 545 /* Send command to all waves with matching VMID */ 546 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: 547 548 reg_gfx_index.bits.sh_broadcast_writes = 1; 549 reg_gfx_index.bits.se_broadcast_writes = 1; 550 reg_gfx_index.bits.instance_broadcast_writes = 1; 551 552 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 553 554 break; 555 556 /* Send command to all CU waves with matching VMID */ 557 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: 558 559 reg_sq_cmd.bits.check_vmid = 1; 560 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 561 562 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; 563 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; 564 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; 565 566 break; 567 568 default: 569 return -EINVAL; 570 } 571 572 switch (wac_info->operand) { 573 case HSA_DBG_WAVEOP_HALT: 574 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT; 575 break; 576 577 case HSA_DBG_WAVEOP_RESUME: 578 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME; 579 break; 580 581 case HSA_DBG_WAVEOP_KILL: 582 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 583 break; 584 585 case HSA_DBG_WAVEOP_DEBUG: 586 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG; 587 break; 588 589 case HSA_DBG_WAVEOP_TRAP: 590 if (wac_info->trapId < MAX_TRAPID) { 591 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP; 592 reg_sq_cmd.bits.trap_id = wac_info->trapId; 593 } else { 594 status = -EINVAL; 595 } 596 break; 597 598 default: 599 status = -EINVAL; 600 break; 601 } 602 603 if (status == 0) { 604 *in_reg_sq_cmd = reg_sq_cmd; 605 *in_reg_gfx_index = reg_gfx_index; 606 } 607 608 return status; 609} 610 611static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, 612 struct dbg_wave_control_info *wac_info) 613{ 614 615 int status; 616 union SQ_CMD_BITS reg_sq_cmd; 617 union GRBM_GFX_INDEX_BITS reg_gfx_index; 618 struct kfd_mem_obj *mem_obj; 619 uint32_t *packet_buff_uint; 620 struct pm4__set_config_reg *packets_vec; 621 size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; 622 623 BUG_ON(!dbgdev || !wac_info); 624 625 reg_sq_cmd.u32All = 0; 626 627 status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd, 628 &reg_gfx_index); 629 if (status) { 630 pr_err("amdkfd: Failed to set wave control registers\n"); 631 return status; 632 } 633 634 /* we do not control the VMID in DIQ,so reset it to a known value */ 635 reg_sq_cmd.bits.vm_id = 0; 636 637 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 638 639 pr_debug("\t\t mode is: %u\n", wac_info->mode); 640 pr_debug("\t\t operand is: %u\n", wac_info->operand); 641 pr_debug("\t\t trap id is: %u\n", wac_info->trapId); 642 pr_debug("\t\t msg value is: %u\n", 643 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 644 pr_debug("\t\t vmid is: N/A\n"); 645 646 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); 647 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); 648 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); 649 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); 650 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); 651 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); 652 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); 653 654 pr_debug("\t\t ibw is : %u\n", 655 reg_gfx_index.bitfields.instance_broadcast_writes); 656 pr_debug("\t\t ii is : %u\n", 657 reg_gfx_index.bitfields.instance_index); 658 pr_debug("\t\t sebw is : %u\n", 659 reg_gfx_index.bitfields.se_broadcast_writes); 660 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); 661 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); 662 pr_debug("\t\t sbw is : %u\n", 663 reg_gfx_index.bitfields.sh_broadcast_writes); 664 665 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 666 667 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); 668 669 if (status != 0) { 670 pr_err("amdkfd: Failed to allocate GART memory\n"); 671 return status; 672 } 673 674 packet_buff_uint = mem_obj->cpu_ptr; 675 676 memset(packet_buff_uint, 0, ib_size); 677 678 packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; 679 packets_vec[0].header.count = 1; 680 packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; 681 packets_vec[0].header.type = PM4_TYPE_3; 682 packets_vec[0].bitfields2.reg_offset = 683 GRBM_GFX_INDEX / (sizeof(uint32_t)) - 684 USERCONFIG_REG_BASE; 685 686 packets_vec[0].bitfields2.insert_vmid = 0; 687 packets_vec[0].reg_data[0] = reg_gfx_index.u32All; 688 689 packets_vec[1].header.count = 1; 690 packets_vec[1].header.opcode = IT_SET_CONFIG_REG; 691 packets_vec[1].header.type = PM4_TYPE_3; 692 packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) - 693 AMD_CONFIG_REG_BASE; 694 695 packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; 696 packets_vec[1].bitfields2.insert_vmid = 1; 697 packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; 698 699 /* Restore the GRBM_GFX_INDEX register */ 700 701 reg_gfx_index.u32All = 0; 702 reg_gfx_index.bits.sh_broadcast_writes = 1; 703 reg_gfx_index.bits.instance_broadcast_writes = 1; 704 reg_gfx_index.bits.se_broadcast_writes = 1; 705 706 707 packets_vec[2].ordinal1 = packets_vec[0].ordinal1; 708 packets_vec[2].bitfields2.reg_offset = 709 GRBM_GFX_INDEX / (sizeof(uint32_t)) - 710 USERCONFIG_REG_BASE; 711 712 packets_vec[2].bitfields2.insert_vmid = 0; 713 packets_vec[2].reg_data[0] = reg_gfx_index.u32All; 714 715 status = dbgdev_diq_submit_ib( 716 dbgdev, 717 wac_info->process->pasid, 718 mem_obj->gpu_addr, 719 packet_buff_uint, 720 ib_size); 721 722 if (status != 0) 723 pr_err("amdkfd: Failed to submit IB to DIQ\n"); 724 725 kfd_gtt_sa_free(dbgdev->dev, mem_obj); 726 727 return status; 728} 729 730static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, 731 struct dbg_wave_control_info *wac_info) 732{ 733 int status; 734 union SQ_CMD_BITS reg_sq_cmd; 735 union GRBM_GFX_INDEX_BITS reg_gfx_index; 736 struct kfd_process_device *pdd; 737 738 BUG_ON(!dbgdev || !dbgdev->dev || !wac_info); 739 740 reg_sq_cmd.u32All = 0; 741 742 /* taking the VMID for that process on the safe way using PDD */ 743 pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); 744 745 if (!pdd) { 746 pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); 747 return -EFAULT; 748 } 749 status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd, 750 &reg_gfx_index); 751 if (status) { 752 pr_err("amdkfd: Failed to set wave control registers\n"); 753 return status; 754 } 755 756 /* for non DIQ we need to patch the VMID: */ 757 758 reg_sq_cmd.bits.vm_id = pdd->qpd.vmid; 759 760 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 761 762 pr_debug("\t\t mode is: %u\n", wac_info->mode); 763 pr_debug("\t\t operand is: %u\n", wac_info->operand); 764 pr_debug("\t\t trap id is: %u\n", wac_info->trapId); 765 pr_debug("\t\t msg value is: %u\n", 766 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 767 pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid); 768 769 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); 770 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); 771 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); 772 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); 773 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); 774 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); 775 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); 776 777 pr_debug("\t\t ibw is : %u\n", 778 reg_gfx_index.bitfields.instance_broadcast_writes); 779 pr_debug("\t\t ii is : %u\n", 780 reg_gfx_index.bitfields.instance_index); 781 pr_debug("\t\t sebw is : %u\n", 782 reg_gfx_index.bitfields.se_broadcast_writes); 783 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); 784 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); 785 pr_debug("\t\t sbw is : %u\n", 786 reg_gfx_index.bitfields.sh_broadcast_writes); 787 788 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 789 790 return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, 791 reg_gfx_index.u32All, 792 reg_sq_cmd.u32All); 793} 794 795int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) 796{ 797 int status = 0; 798 unsigned int vmid; 799 union SQ_CMD_BITS reg_sq_cmd; 800 union GRBM_GFX_INDEX_BITS reg_gfx_index; 801 struct kfd_process_device *pdd; 802 struct dbg_wave_control_info wac_info; 803 int temp; 804 int first_vmid_to_scan = 8; 805 int last_vmid_to_scan = 15; 806 807 first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1; 808 temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan; 809 last_vmid_to_scan = first_vmid_to_scan + ffz(temp); 810 811 reg_sq_cmd.u32All = 0; 812 status = 0; 813 814 wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; 815 wac_info.operand = HSA_DBG_WAVEOP_KILL; 816 817 pr_debug("Killing all process wavefronts\n"); 818 819 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 820 * ATC_VMID15_PASID_MAPPING 821 * to check which VMID the current process is mapped to. */ 822 823 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 824 if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid 825 (dev->kgd, vmid)) { 826 if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid 827 (dev->kgd, vmid) == p->pasid) { 828 pr_debug("Killing wave fronts of vmid %d and pasid %d\n", 829 vmid, p->pasid); 830 break; 831 } 832 } 833 } 834 835 if (vmid > last_vmid_to_scan) { 836 pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid); 837 return -EFAULT; 838 } 839 840 /* taking the VMID for that process on the safe way using PDD */ 841 pdd = kfd_get_process_device_data(dev, p); 842 if (!pdd) 843 return -EFAULT; 844 845 status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd, 846 &reg_gfx_index); 847 if (status != 0) 848 return -EINVAL; 849 850 /* for non DIQ we need to patch the VMID: */ 851 reg_sq_cmd.bits.vm_id = vmid; 852 853 dev->kfd2kgd->wave_control_execute(dev->kgd, 854 reg_gfx_index.u32All, 855 reg_sq_cmd.u32All); 856 857 return 0; 858} 859 860void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, 861 enum DBGDEV_TYPE type) 862{ 863 BUG_ON(!pdbgdev || !pdev); 864 865 pdbgdev->dev = pdev; 866 pdbgdev->kq = NULL; 867 pdbgdev->type = type; 868 pdbgdev->pqm = NULL; 869 870 switch (type) { 871 case DBGDEV_TYPE_NODIQ: 872 pdbgdev->dbgdev_register = dbgdev_register_nodiq; 873 pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq; 874 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq; 875 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq; 876 break; 877 case DBGDEV_TYPE_DIQ: 878 default: 879 pdbgdev->dbgdev_register = dbgdev_register_diq; 880 pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; 881 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; 882 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; 883 break; 884 } 885 886}