Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'drm-amdkfd-next-2015-06-03' of git://people.freedesktop.org/~gabbayo/linux into drm-next

drm-amdkfd-next-2015-06-03:

- Add the H/W debugger support module, including new IOCTLs to:
- register/unregister a process as a debugged process
- Set address watch-point in the debugged process's GPU kernel
- Do a wave control operation in the debugged process's waves
See the commit messages for more details on the available operations.

The debugged process can only perform debug operation on itself. It is
blocked by the amdkfd+H/W from performing operations on other processes's
waves or GPU kernels. The blocking is done by setting the VMID and PASID of
the debugged process in the packets that are sent to the CP with the debug
instructions.

- Add support for static user-mode queues. These queues are regular queues,
but because they belong to the debugged process, we need to make sure the CP
doesn't preempt them during a debug operation. Therefore, we mark them as
static for the CP ignore them during preemption.

- Support killing all the waves when a process is terminated. This is needed
in case a process is terminated but we can't UNMAP its queues (can occur due
to several reasons). In that case, the CP could be stuck unless we kill all
its waves. This function is *very* important as it provides the kernel a high
level of control over the GPU. The reason we didn't upstream this function
so far, is because it is implemented using the H/W debugger module functions,
so we had to wait until we can upstream the H/W debugger module.

- Replace declaration of bitmap from unsigned long to standard DECLARE_BITMAP

* tag 'drm-amdkfd-next-2015-06-03' of git://people.freedesktop.org/~gabbayo/linux:
drm/amdkfd: Enforce kill all waves on process termination
drm/radeon: Add ATC VMID<-->PASID functions to kfd->kgd
drm/amdkfd: Implement address watch debugger IOCTL
drm/amdkfd: Implement wave control debugger IOCTL
drm/amdkfd: Implement (un)register debugger IOCTLs
drm/amdkfd: Add address watch operation to debugger
drm/amdkfd: Add wave control operation to debugger
drm/amdkfd: Add skeleton H/W debugger module support
drm/amdkfd: Add static user-mode queues support
drm/amdkfd: add H/W debugger IOCTL set definitions
drm/radeon: Add H/W debugger kfd->kgd functions
drm/amdkfd: Use DECLARE_BITMAP

+2551 -35
+2 -1
drivers/gpu/drm/amd/amdkfd/Makefile
··· 12 12 kfd_kernel_queue_vi.o kfd_packet_manager.o \ 13 13 kfd_process_queue_manager.o kfd_device_queue_manager.o \ 14 14 kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ 15 - kfd_interrupt.o kfd_events.o cik_event_interrupt.o 15 + kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ 16 + kfd_dbgdev.o kfd_dbgmgr.o 16 17 17 18 obj-$(CONFIG_HSA_AMD) += amdkfd.o
+308
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
··· 35 35 #include <asm/processor.h> 36 36 #include "kfd_priv.h" 37 37 #include "kfd_device_queue_manager.h" 38 + #include "kfd_dbgmgr.h" 38 39 39 40 static long kfd_ioctl(struct file *, unsigned int, unsigned long); 40 41 static int kfd_open(struct inode *, struct file *); ··· 433 432 return err; 434 433 } 435 434 435 + static int kfd_ioctl_dbg_register(struct file *filep, 436 + struct kfd_process *p, void *data) 437 + { 438 + struct kfd_ioctl_dbg_register_args *args = data; 439 + struct kfd_dev *dev; 440 + struct kfd_dbgmgr *dbgmgr_ptr; 441 + struct kfd_process_device *pdd; 442 + bool create_ok; 443 + long status = 0; 444 + 445 + dev = kfd_device_by_id(args->gpu_id); 446 + if (dev == NULL) 447 + return -EINVAL; 448 + 449 + if (dev->device_info->asic_family == CHIP_CARRIZO) { 450 + pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); 451 + return -EINVAL; 452 + } 453 + 454 + mutex_lock(kfd_get_dbgmgr_mutex()); 455 + mutex_lock(&p->mutex); 456 + 457 + /* 458 + * make sure that we have pdd, if this the first queue created for 459 + * this process 460 + */ 461 + pdd = kfd_bind_process_to_device(dev, p); 462 + if (IS_ERR(pdd)) { 463 + mutex_unlock(&p->mutex); 464 + mutex_unlock(kfd_get_dbgmgr_mutex()); 465 + return PTR_ERR(pdd); 466 + } 467 + 468 + if (dev->dbgmgr == NULL) { 469 + /* In case of a legal call, we have no dbgmgr yet */ 470 + create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); 471 + if (create_ok) { 472 + status = kfd_dbgmgr_register(dbgmgr_ptr, p); 473 + if (status != 0) 474 + kfd_dbgmgr_destroy(dbgmgr_ptr); 475 + else 476 + dev->dbgmgr = dbgmgr_ptr; 477 + } 478 + } else { 479 + pr_debug("debugger already registered\n"); 480 + status = -EINVAL; 481 + } 482 + 483 + mutex_unlock(&p->mutex); 484 + mutex_unlock(kfd_get_dbgmgr_mutex()); 485 + 486 + return status; 487 + } 488 + 489 + static int kfd_ioctl_dbg_unrgesiter(struct file *filep, 490 + struct kfd_process *p, void *data) 491 + { 492 + struct kfd_ioctl_dbg_unregister_args *args = data; 493 + struct kfd_dev *dev; 494 + long status; 495 + 496 + dev = kfd_device_by_id(args->gpu_id); 497 + if (dev == NULL) 498 + return -EINVAL; 499 + 500 + if (dev->device_info->asic_family == CHIP_CARRIZO) { 501 + pr_debug("kfd_ioctl_dbg_unrgesiter not supported on CZ\n"); 502 + return -EINVAL; 503 + } 504 + 505 + mutex_lock(kfd_get_dbgmgr_mutex()); 506 + 507 + status = kfd_dbgmgr_unregister(dev->dbgmgr, p); 508 + if (status == 0) { 509 + kfd_dbgmgr_destroy(dev->dbgmgr); 510 + dev->dbgmgr = NULL; 511 + } 512 + 513 + mutex_unlock(kfd_get_dbgmgr_mutex()); 514 + 515 + return status; 516 + } 517 + 518 + /* 519 + * Parse and generate variable size data structure for address watch. 520 + * Total size of the buffer and # watch points is limited in order 521 + * to prevent kernel abuse. (no bearing to the much smaller HW limitation 522 + * which is enforced by dbgdev module) 523 + * please also note that the watch address itself are not "copied from user", 524 + * since it be set into the HW in user mode values. 525 + * 526 + */ 527 + static int kfd_ioctl_dbg_address_watch(struct file *filep, 528 + struct kfd_process *p, void *data) 529 + { 530 + struct kfd_ioctl_dbg_address_watch_args *args = data; 531 + struct kfd_dev *dev; 532 + struct dbg_address_watch_info aw_info; 533 + unsigned char *args_buff; 534 + long status; 535 + void __user *cmd_from_user; 536 + uint64_t watch_mask_value = 0; 537 + unsigned int args_idx = 0; 538 + 539 + memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); 540 + 541 + dev = kfd_device_by_id(args->gpu_id); 542 + if (dev == NULL) 543 + return -EINVAL; 544 + 545 + if (dev->device_info->asic_family == CHIP_CARRIZO) { 546 + pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 547 + return -EINVAL; 548 + } 549 + 550 + cmd_from_user = (void __user *) args->content_ptr; 551 + 552 + /* Validate arguments */ 553 + 554 + if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || 555 + (args->buf_size_in_bytes <= sizeof(*args)) || 556 + (cmd_from_user == NULL)) 557 + return -EINVAL; 558 + 559 + /* this is the actual buffer to work with */ 560 + 561 + args_buff = kmalloc(args->buf_size_in_bytes - 562 + sizeof(*args), GFP_KERNEL); 563 + if (args_buff == NULL) 564 + return -ENOMEM; 565 + 566 + status = copy_from_user(args_buff, cmd_from_user, 567 + args->buf_size_in_bytes - sizeof(*args)); 568 + 569 + if (status != 0) { 570 + pr_debug("Failed to copy address watch user data\n"); 571 + kfree(args_buff); 572 + return -EINVAL; 573 + } 574 + 575 + aw_info.process = p; 576 + 577 + aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); 578 + args_idx += sizeof(aw_info.num_watch_points); 579 + 580 + aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; 581 + args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; 582 + 583 + /* 584 + * set watch address base pointer to point on the array base 585 + * within args_buff 586 + */ 587 + aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; 588 + 589 + /* skip over the addresses buffer */ 590 + args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; 591 + 592 + if (args_idx >= args->buf_size_in_bytes) { 593 + kfree(args_buff); 594 + return -EINVAL; 595 + } 596 + 597 + watch_mask_value = (uint64_t) args_buff[args_idx]; 598 + 599 + if (watch_mask_value > 0) { 600 + /* 601 + * There is an array of masks. 602 + * set watch mask base pointer to point on the array base 603 + * within args_buff 604 + */ 605 + aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; 606 + 607 + /* skip over the masks buffer */ 608 + args_idx += sizeof(aw_info.watch_mask) * 609 + aw_info.num_watch_points; 610 + } else { 611 + /* just the NULL mask, set to NULL and skip over it */ 612 + aw_info.watch_mask = NULL; 613 + args_idx += sizeof(aw_info.watch_mask); 614 + } 615 + 616 + if (args_idx > args->buf_size_in_bytes) { 617 + kfree(args_buff); 618 + return -EINVAL; 619 + } 620 + 621 + /* Currently HSA Event is not supported for DBG */ 622 + aw_info.watch_event = NULL; 623 + 624 + mutex_lock(kfd_get_dbgmgr_mutex()); 625 + 626 + status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); 627 + 628 + mutex_unlock(kfd_get_dbgmgr_mutex()); 629 + 630 + kfree(args_buff); 631 + 632 + return status; 633 + } 634 + 635 + /* Parse and generate fixed size data structure for wave control */ 636 + static int kfd_ioctl_dbg_wave_control(struct file *filep, 637 + struct kfd_process *p, void *data) 638 + { 639 + struct kfd_ioctl_dbg_wave_control_args *args = data; 640 + struct kfd_dev *dev; 641 + struct dbg_wave_control_info wac_info; 642 + unsigned char *args_buff; 643 + uint32_t computed_buff_size; 644 + long status; 645 + void __user *cmd_from_user; 646 + unsigned int args_idx = 0; 647 + 648 + memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); 649 + 650 + /* we use compact form, independent of the packing attribute value */ 651 + computed_buff_size = sizeof(*args) + 652 + sizeof(wac_info.mode) + 653 + sizeof(wac_info.operand) + 654 + sizeof(wac_info.dbgWave_msg.DbgWaveMsg) + 655 + sizeof(wac_info.dbgWave_msg.MemoryVA) + 656 + sizeof(wac_info.trapId); 657 + 658 + dev = kfd_device_by_id(args->gpu_id); 659 + if (dev == NULL) 660 + return -EINVAL; 661 + 662 + if (dev->device_info->asic_family == CHIP_CARRIZO) { 663 + pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 664 + return -EINVAL; 665 + } 666 + 667 + /* input size must match the computed "compact" size */ 668 + if (args->buf_size_in_bytes != computed_buff_size) { 669 + pr_debug("size mismatch, computed : actual %u : %u\n", 670 + args->buf_size_in_bytes, computed_buff_size); 671 + return -EINVAL; 672 + } 673 + 674 + cmd_from_user = (void __user *) args->content_ptr; 675 + 676 + if (cmd_from_user == NULL) 677 + return -EINVAL; 678 + 679 + /* this is the actual buffer to work with */ 680 + 681 + args_buff = kmalloc(args->buf_size_in_bytes - sizeof(*args), 682 + GFP_KERNEL); 683 + 684 + if (args_buff == NULL) 685 + return -ENOMEM; 686 + 687 + /* Now copy the entire buffer from user */ 688 + status = copy_from_user(args_buff, cmd_from_user, 689 + args->buf_size_in_bytes - sizeof(*args)); 690 + if (status != 0) { 691 + pr_debug("Failed to copy wave control user data\n"); 692 + kfree(args_buff); 693 + return -EINVAL; 694 + } 695 + 696 + /* move ptr to the start of the "pay-load" area */ 697 + wac_info.process = p; 698 + 699 + wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); 700 + args_idx += sizeof(wac_info.operand); 701 + 702 + wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); 703 + args_idx += sizeof(wac_info.mode); 704 + 705 + wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); 706 + args_idx += sizeof(wac_info.trapId); 707 + 708 + wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 709 + *((uint32_t *)(&args_buff[args_idx])); 710 + wac_info.dbgWave_msg.MemoryVA = NULL; 711 + 712 + mutex_lock(kfd_get_dbgmgr_mutex()); 713 + 714 + pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", 715 + wac_info.process, wac_info.operand, 716 + wac_info.mode, wac_info.trapId, 717 + wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 718 + 719 + status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); 720 + 721 + pr_debug("Returned status of dbg manager is %ld\n", status); 722 + 723 + mutex_unlock(kfd_get_dbgmgr_mutex()); 724 + 725 + kfree(args_buff); 726 + 727 + return status; 728 + } 729 + 436 730 static int kfd_ioctl_get_clock_counters(struct file *filep, 437 731 struct kfd_process *p, void *data) 438 732 { ··· 908 612 909 613 AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, 910 614 kfd_ioctl_wait_events, 0), 615 + 616 + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER, 617 + kfd_ioctl_dbg_register, 0), 618 + 619 + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER, 620 + kfd_ioctl_dbg_unrgesiter, 0), 621 + 622 + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH, 623 + kfd_ioctl_dbg_address_watch, 0), 624 + 625 + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, 626 + kfd_ioctl_dbg_wave_control, 0), 911 627 }; 912 628 913 629 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
+886
drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
··· 1 + /* 2 + * Copyright 2014 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #include <linux/types.h> 25 + #include <linux/kernel.h> 26 + #include <linux/log2.h> 27 + #include <linux/sched.h> 28 + #include <linux/slab.h> 29 + #include <linux/mutex.h> 30 + #include <linux/device.h> 31 + 32 + #include "kfd_pm4_headers.h" 33 + #include "kfd_pm4_headers_diq.h" 34 + #include "kfd_kernel_queue.h" 35 + #include "kfd_priv.h" 36 + #include "kfd_pm4_opcodes.h" 37 + #include "cik_regs.h" 38 + #include "kfd_dbgmgr.h" 39 + #include "kfd_dbgdev.h" 40 + #include "kfd_device_queue_manager.h" 41 + #include "../../radeon/cik_reg.h" 42 + 43 + static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) 44 + { 45 + BUG_ON(!dev || !dev->kfd2kgd); 46 + 47 + dev->kfd2kgd->address_watch_disable(dev->kgd); 48 + } 49 + 50 + static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, 51 + unsigned int pasid, uint64_t vmid0_address, 52 + uint32_t *packet_buff, size_t size_in_bytes) 53 + { 54 + struct pm4__release_mem *rm_packet; 55 + struct pm4__indirect_buffer_pasid *ib_packet; 56 + struct kfd_mem_obj *mem_obj; 57 + size_t pq_packets_size_in_bytes; 58 + union ULARGE_INTEGER *largep; 59 + union ULARGE_INTEGER addr; 60 + struct kernel_queue *kq; 61 + uint64_t *rm_state; 62 + unsigned int *ib_packet_buff; 63 + int status; 64 + 65 + BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes); 66 + 67 + kq = dbgdev->kq; 68 + 69 + pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) + 70 + sizeof(struct pm4__indirect_buffer_pasid); 71 + 72 + /* 73 + * We acquire a buffer from DIQ 74 + * The receive packet buff will be sitting on the Indirect Buffer 75 + * and in the PQ we put the IB packet + sync packet(s). 76 + */ 77 + status = kq->ops.acquire_packet_buffer(kq, 78 + pq_packets_size_in_bytes / sizeof(uint32_t), 79 + &ib_packet_buff); 80 + if (status != 0) { 81 + pr_err("amdkfd: acquire_packet_buffer failed\n"); 82 + return status; 83 + } 84 + 85 + memset(ib_packet_buff, 0, pq_packets_size_in_bytes); 86 + 87 + ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); 88 + 89 + ib_packet->header.count = 3; 90 + ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; 91 + ib_packet->header.type = PM4_TYPE_3; 92 + 93 + largep = (union ULARGE_INTEGER *) &vmid0_address; 94 + 95 + ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; 96 + ib_packet->bitfields3.ib_base_hi = largep->u.high_part; 97 + 98 + ib_packet->control = (1 << 23) | (1 << 31) | 99 + ((size_in_bytes / sizeof(uint32_t)) & 0xfffff); 100 + 101 + ib_packet->bitfields5.pasid = pasid; 102 + 103 + /* 104 + * for now we use release mem for GPU-CPU synchronization 105 + * Consider WaitRegMem + WriteData as a better alternative 106 + * we get a GART allocations ( gpu/cpu mapping), 107 + * for the sync variable, and wait until: 108 + * (a) Sync with HW 109 + * (b) Sync var is written by CP to mem. 110 + */ 111 + rm_packet = (struct pm4__release_mem *) (ib_packet_buff + 112 + (sizeof(struct pm4__indirect_buffer_pasid) / 113 + sizeof(unsigned int))); 114 + 115 + status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), 116 + &mem_obj); 117 + 118 + if (status != 0) { 119 + pr_err("amdkfd: Failed to allocate GART memory\n"); 120 + kq->ops.rollback_packet(kq); 121 + return status; 122 + } 123 + 124 + rm_state = (uint64_t *) mem_obj->cpu_ptr; 125 + 126 + *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; 127 + 128 + rm_packet->header.opcode = IT_RELEASE_MEM; 129 + rm_packet->header.type = PM4_TYPE_3; 130 + rm_packet->header.count = sizeof(struct pm4__release_mem) / 131 + sizeof(unsigned int) - 2; 132 + 133 + rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; 134 + rm_packet->bitfields2.event_index = 135 + event_index___release_mem__end_of_pipe; 136 + 137 + rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; 138 + rm_packet->bitfields2.atc = 0; 139 + rm_packet->bitfields2.tc_wb_action_ena = 1; 140 + 141 + addr.quad_part = mem_obj->gpu_addr; 142 + 143 + rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; 144 + rm_packet->address_hi = addr.u.high_part; 145 + 146 + rm_packet->bitfields3.data_sel = 147 + data_sel___release_mem__send_64_bit_data; 148 + 149 + rm_packet->bitfields3.int_sel = 150 + int_sel___release_mem__send_data_after_write_confirm; 151 + 152 + rm_packet->bitfields3.dst_sel = 153 + dst_sel___release_mem__memory_controller; 154 + 155 + rm_packet->data_lo = QUEUESTATE__ACTIVE; 156 + 157 + kq->ops.submit_packet(kq); 158 + 159 + /* Wait till CP writes sync code: */ 160 + status = amdkfd_fence_wait_timeout( 161 + (unsigned int *) rm_state, 162 + QUEUESTATE__ACTIVE, 1500); 163 + 164 + kfd_gtt_sa_free(dbgdev->dev, mem_obj); 165 + 166 + return status; 167 + } 168 + 169 + static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) 170 + { 171 + BUG_ON(!dbgdev); 172 + 173 + /* 174 + * no action is needed in this case, 175 + * just make sure diq will not be used 176 + */ 177 + 178 + dbgdev->kq = NULL; 179 + 180 + return 0; 181 + } 182 + 183 + static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) 184 + { 185 + struct queue_properties properties; 186 + unsigned int qid; 187 + struct kernel_queue *kq = NULL; 188 + int status; 189 + 190 + BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev); 191 + 192 + status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, 193 + &properties, 0, KFD_QUEUE_TYPE_DIQ, 194 + &qid); 195 + 196 + if (status) { 197 + pr_err("amdkfd: Failed to create DIQ\n"); 198 + return status; 199 + } 200 + 201 + pr_debug("DIQ Created with queue id: %d\n", qid); 202 + 203 + kq = pqm_get_kernel_queue(dbgdev->pqm, qid); 204 + 205 + if (kq == NULL) { 206 + pr_err("amdkfd: Error getting DIQ\n"); 207 + pqm_destroy_queue(dbgdev->pqm, qid); 208 + return -EFAULT; 209 + } 210 + 211 + dbgdev->kq = kq; 212 + 213 + return status; 214 + } 215 + 216 + static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) 217 + { 218 + BUG_ON(!dbgdev || !dbgdev->dev); 219 + 220 + /* disable watch address */ 221 + dbgdev_address_watch_disable_nodiq(dbgdev->dev); 222 + return 0; 223 + } 224 + 225 + static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) 226 + { 227 + /* todo - disable address watch */ 228 + int status; 229 + 230 + BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq); 231 + 232 + status = pqm_destroy_queue(dbgdev->pqm, 233 + dbgdev->kq->queue->properties.queue_id); 234 + dbgdev->kq = NULL; 235 + 236 + return status; 237 + } 238 + 239 + static void dbgdev_address_watch_set_registers( 240 + const struct dbg_address_watch_info *adw_info, 241 + union TCP_WATCH_ADDR_H_BITS *addrHi, 242 + union TCP_WATCH_ADDR_L_BITS *addrLo, 243 + union TCP_WATCH_CNTL_BITS *cntl, 244 + unsigned int index, unsigned int vmid) 245 + { 246 + union ULARGE_INTEGER addr; 247 + 248 + BUG_ON(!adw_info || !addrHi || !addrLo || !cntl); 249 + 250 + addr.quad_part = 0; 251 + addrHi->u32All = 0; 252 + addrLo->u32All = 0; 253 + cntl->u32All = 0; 254 + 255 + if (adw_info->watch_mask != NULL) 256 + cntl->bitfields.mask = 257 + (uint32_t) (adw_info->watch_mask[index] & 258 + ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); 259 + else 260 + cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; 261 + 262 + addr.quad_part = (unsigned long long) adw_info->watch_address[index]; 263 + 264 + addrHi->bitfields.addr = addr.u.high_part & 265 + ADDRESS_WATCH_REG_ADDHIGH_MASK; 266 + addrLo->bitfields.addr = 267 + (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); 268 + 269 + cntl->bitfields.mode = adw_info->watch_mode[index]; 270 + cntl->bitfields.vmid = (uint32_t) vmid; 271 + /* for now assume it is an ATC address */ 272 + cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; 273 + 274 + pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); 275 + pr_debug("\t\t%20s %08x\n", "set reg add high :", 276 + addrHi->bitfields.addr); 277 + pr_debug("\t\t%20s %08x\n", "set reg add low :", 278 + addrLo->bitfields.addr); 279 + } 280 + 281 + static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, 282 + struct dbg_address_watch_info *adw_info) 283 + { 284 + union TCP_WATCH_ADDR_H_BITS addrHi; 285 + union TCP_WATCH_ADDR_L_BITS addrLo; 286 + union TCP_WATCH_CNTL_BITS cntl; 287 + struct kfd_process_device *pdd; 288 + unsigned int i; 289 + 290 + BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); 291 + 292 + /* taking the vmid for that process on the safe way using pdd */ 293 + pdd = kfd_get_process_device_data(dbgdev->dev, 294 + adw_info->process); 295 + if (!pdd) { 296 + pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); 297 + return -EFAULT; 298 + } 299 + 300 + addrHi.u32All = 0; 301 + addrLo.u32All = 0; 302 + cntl.u32All = 0; 303 + 304 + if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || 305 + (adw_info->num_watch_points == 0)) { 306 + pr_err("amdkfd: num_watch_points is invalid\n"); 307 + return -EINVAL; 308 + } 309 + 310 + if ((adw_info->watch_mode == NULL) || 311 + (adw_info->watch_address == NULL)) { 312 + pr_err("amdkfd: adw_info fields are not valid\n"); 313 + return -EINVAL; 314 + } 315 + 316 + for (i = 0 ; i < adw_info->num_watch_points ; i++) { 317 + dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, 318 + &cntl, i, pdd->qpd.vmid); 319 + 320 + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 321 + pr_debug("\t\t%20s %08x\n", "register index :", i); 322 + pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid); 323 + pr_debug("\t\t%20s %08x\n", "Address Low is :", 324 + addrLo.bitfields.addr); 325 + pr_debug("\t\t%20s %08x\n", "Address high is :", 326 + addrHi.bitfields.addr); 327 + pr_debug("\t\t%20s %08x\n", "Address high is :", 328 + addrHi.bitfields.addr); 329 + pr_debug("\t\t%20s %08x\n", "Control Mask is :", 330 + cntl.bitfields.mask); 331 + pr_debug("\t\t%20s %08x\n", "Control Mode is :", 332 + cntl.bitfields.mode); 333 + pr_debug("\t\t%20s %08x\n", "Control Vmid is :", 334 + cntl.bitfields.vmid); 335 + pr_debug("\t\t%20s %08x\n", "Control atc is :", 336 + cntl.bitfields.atc); 337 + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 338 + 339 + pdd->dev->kfd2kgd->address_watch_execute( 340 + dbgdev->dev->kgd, 341 + i, 342 + cntl.u32All, 343 + addrHi.u32All, 344 + addrLo.u32All); 345 + } 346 + 347 + return 0; 348 + } 349 + 350 + static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, 351 + struct dbg_address_watch_info *adw_info) 352 + { 353 + struct pm4__set_config_reg *packets_vec; 354 + union TCP_WATCH_ADDR_H_BITS addrHi; 355 + union TCP_WATCH_ADDR_L_BITS addrLo; 356 + union TCP_WATCH_CNTL_BITS cntl; 357 + struct kfd_mem_obj *mem_obj; 358 + unsigned int aw_reg_add_dword; 359 + uint32_t *packet_buff_uint; 360 + unsigned int i; 361 + int status; 362 + size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; 363 + /* we do not control the vmid in DIQ mode, just a place holder */ 364 + unsigned int vmid = 0; 365 + 366 + BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); 367 + 368 + addrHi.u32All = 0; 369 + addrLo.u32All = 0; 370 + cntl.u32All = 0; 371 + 372 + if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || 373 + (adw_info->num_watch_points == 0)) { 374 + pr_err("amdkfd: num_watch_points is invalid\n"); 375 + return -EINVAL; 376 + } 377 + 378 + if ((NULL == adw_info->watch_mode) || 379 + (NULL == adw_info->watch_address)) { 380 + pr_err("amdkfd: adw_info fields are not valid\n"); 381 + return -EINVAL; 382 + } 383 + 384 + status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); 385 + 386 + if (status != 0) { 387 + pr_err("amdkfd: Failed to allocate GART memory\n"); 388 + return status; 389 + } 390 + 391 + packet_buff_uint = mem_obj->cpu_ptr; 392 + 393 + memset(packet_buff_uint, 0, ib_size); 394 + 395 + packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); 396 + 397 + packets_vec[0].header.count = 1; 398 + packets_vec[0].header.opcode = IT_SET_CONFIG_REG; 399 + packets_vec[0].header.type = PM4_TYPE_3; 400 + packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; 401 + packets_vec[0].bitfields2.insert_vmid = 1; 402 + packets_vec[1].ordinal1 = packets_vec[0].ordinal1; 403 + packets_vec[1].bitfields2.insert_vmid = 0; 404 + packets_vec[2].ordinal1 = packets_vec[0].ordinal1; 405 + packets_vec[2].bitfields2.insert_vmid = 0; 406 + packets_vec[3].ordinal1 = packets_vec[0].ordinal1; 407 + packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; 408 + packets_vec[3].bitfields2.insert_vmid = 1; 409 + 410 + for (i = 0; i < adw_info->num_watch_points; i++) { 411 + dbgdev_address_watch_set_registers(adw_info, 412 + &addrHi, 413 + &addrLo, 414 + &cntl, 415 + i, 416 + vmid); 417 + 418 + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 419 + pr_debug("\t\t%20s %08x\n", "register index :", i); 420 + pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); 421 + pr_debug("\t\t%20s %p\n", "Add ptr is :", 422 + adw_info->watch_address); 423 + pr_debug("\t\t%20s %08llx\n", "Add is :", 424 + adw_info->watch_address[i]); 425 + pr_debug("\t\t%20s %08x\n", "Address Low is :", 426 + addrLo.bitfields.addr); 427 + pr_debug("\t\t%20s %08x\n", "Address high is :", 428 + addrHi.bitfields.addr); 429 + pr_debug("\t\t%20s %08x\n", "Control Mask is :", 430 + cntl.bitfields.mask); 431 + pr_debug("\t\t%20s %08x\n", "Control Mode is :", 432 + cntl.bitfields.mode); 433 + pr_debug("\t\t%20s %08x\n", "Control Vmid is :", 434 + cntl.bitfields.vmid); 435 + pr_debug("\t\t%20s %08x\n", "Control atc is :", 436 + cntl.bitfields.atc); 437 + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 438 + 439 + aw_reg_add_dword = 440 + dbgdev->dev->kfd2kgd->address_watch_get_offset( 441 + dbgdev->dev->kgd, 442 + i, 443 + ADDRESS_WATCH_REG_CNTL); 444 + 445 + aw_reg_add_dword /= sizeof(uint32_t); 446 + 447 + packets_vec[0].bitfields2.reg_offset = 448 + aw_reg_add_dword - CONFIG_REG_BASE; 449 + 450 + packets_vec[0].reg_data[0] = cntl.u32All; 451 + 452 + aw_reg_add_dword = 453 + dbgdev->dev->kfd2kgd->address_watch_get_offset( 454 + dbgdev->dev->kgd, 455 + i, 456 + ADDRESS_WATCH_REG_ADDR_HI); 457 + 458 + aw_reg_add_dword /= sizeof(uint32_t); 459 + 460 + packets_vec[1].bitfields2.reg_offset = 461 + aw_reg_add_dword - CONFIG_REG_BASE; 462 + packets_vec[1].reg_data[0] = addrHi.u32All; 463 + 464 + aw_reg_add_dword = 465 + dbgdev->dev->kfd2kgd->address_watch_get_offset( 466 + dbgdev->dev->kgd, 467 + i, 468 + ADDRESS_WATCH_REG_ADDR_LO); 469 + 470 + aw_reg_add_dword /= sizeof(uint32_t); 471 + 472 + packets_vec[2].bitfields2.reg_offset = 473 + aw_reg_add_dword - CONFIG_REG_BASE; 474 + packets_vec[2].reg_data[0] = addrLo.u32All; 475 + 476 + /* enable watch flag if address is not zero*/ 477 + if (adw_info->watch_address[i] > 0) 478 + cntl.bitfields.valid = 1; 479 + else 480 + cntl.bitfields.valid = 0; 481 + 482 + aw_reg_add_dword = 483 + dbgdev->dev->kfd2kgd->address_watch_get_offset( 484 + dbgdev->dev->kgd, 485 + i, 486 + ADDRESS_WATCH_REG_CNTL); 487 + 488 + aw_reg_add_dword /= sizeof(uint32_t); 489 + 490 + packets_vec[3].bitfields2.reg_offset = 491 + aw_reg_add_dword - CONFIG_REG_BASE; 492 + packets_vec[3].reg_data[0] = cntl.u32All; 493 + 494 + status = dbgdev_diq_submit_ib( 495 + dbgdev, 496 + adw_info->process->pasid, 497 + mem_obj->gpu_addr, 498 + packet_buff_uint, 499 + ib_size); 500 + 501 + if (status != 0) { 502 + pr_err("amdkfd: Failed to submit IB to DIQ\n"); 503 + break; 504 + } 505 + } 506 + 507 + kfd_gtt_sa_free(dbgdev->dev, mem_obj); 508 + return status; 509 + } 510 + 511 + static int dbgdev_wave_control_set_registers( 512 + struct dbg_wave_control_info *wac_info, 513 + union SQ_CMD_BITS *in_reg_sq_cmd, 514 + union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) 515 + { 516 + int status; 517 + union SQ_CMD_BITS reg_sq_cmd; 518 + union GRBM_GFX_INDEX_BITS reg_gfx_index; 519 + struct HsaDbgWaveMsgAMDGen2 *pMsg; 520 + 521 + BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index); 522 + 523 + reg_sq_cmd.u32All = 0; 524 + reg_gfx_index.u32All = 0; 525 + pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; 526 + 527 + switch (wac_info->mode) { 528 + /* Send command to single wave */ 529 + case HSA_DBG_WAVEMODE_SINGLE: 530 + /* 531 + * Limit access to the process waves only, 532 + * by setting vmid check 533 + */ 534 + reg_sq_cmd.bits.check_vmid = 1; 535 + reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; 536 + reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; 537 + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; 538 + 539 + reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; 540 + reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; 541 + reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; 542 + 543 + break; 544 + 545 + /* Send command to all waves with matching VMID */ 546 + case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: 547 + 548 + reg_gfx_index.bits.sh_broadcast_writes = 1; 549 + reg_gfx_index.bits.se_broadcast_writes = 1; 550 + reg_gfx_index.bits.instance_broadcast_writes = 1; 551 + 552 + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 553 + 554 + break; 555 + 556 + /* Send command to all CU waves with matching VMID */ 557 + case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: 558 + 559 + reg_sq_cmd.bits.check_vmid = 1; 560 + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 561 + 562 + reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; 563 + reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; 564 + reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; 565 + 566 + break; 567 + 568 + default: 569 + return -EINVAL; 570 + } 571 + 572 + switch (wac_info->operand) { 573 + case HSA_DBG_WAVEOP_HALT: 574 + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT; 575 + break; 576 + 577 + case HSA_DBG_WAVEOP_RESUME: 578 + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME; 579 + break; 580 + 581 + case HSA_DBG_WAVEOP_KILL: 582 + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 583 + break; 584 + 585 + case HSA_DBG_WAVEOP_DEBUG: 586 + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG; 587 + break; 588 + 589 + case HSA_DBG_WAVEOP_TRAP: 590 + if (wac_info->trapId < MAX_TRAPID) { 591 + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP; 592 + reg_sq_cmd.bits.trap_id = wac_info->trapId; 593 + } else { 594 + status = -EINVAL; 595 + } 596 + break; 597 + 598 + default: 599 + status = -EINVAL; 600 + break; 601 + } 602 + 603 + if (status == 0) { 604 + *in_reg_sq_cmd = reg_sq_cmd; 605 + *in_reg_gfx_index = reg_gfx_index; 606 + } 607 + 608 + return status; 609 + } 610 + 611 + static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, 612 + struct dbg_wave_control_info *wac_info) 613 + { 614 + 615 + int status; 616 + union SQ_CMD_BITS reg_sq_cmd; 617 + union GRBM_GFX_INDEX_BITS reg_gfx_index; 618 + struct kfd_mem_obj *mem_obj; 619 + uint32_t *packet_buff_uint; 620 + struct pm4__set_config_reg *packets_vec; 621 + size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; 622 + 623 + BUG_ON(!dbgdev || !wac_info); 624 + 625 + reg_sq_cmd.u32All = 0; 626 + 627 + status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd, 628 + &reg_gfx_index); 629 + if (status) { 630 + pr_err("amdkfd: Failed to set wave control registers\n"); 631 + return status; 632 + } 633 + 634 + /* we do not control the VMID in DIQ,so reset it to a known value */ 635 + reg_sq_cmd.bits.vm_id = 0; 636 + 637 + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 638 + 639 + pr_debug("\t\t mode is: %u\n", wac_info->mode); 640 + pr_debug("\t\t operand is: %u\n", wac_info->operand); 641 + pr_debug("\t\t trap id is: %u\n", wac_info->trapId); 642 + pr_debug("\t\t msg value is: %u\n", 643 + wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 644 + pr_debug("\t\t vmid is: N/A\n"); 645 + 646 + pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); 647 + pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); 648 + pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); 649 + pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); 650 + pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); 651 + pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); 652 + pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); 653 + 654 + pr_debug("\t\t ibw is : %u\n", 655 + reg_gfx_index.bitfields.instance_broadcast_writes); 656 + pr_debug("\t\t ii is : %u\n", 657 + reg_gfx_index.bitfields.instance_index); 658 + pr_debug("\t\t sebw is : %u\n", 659 + reg_gfx_index.bitfields.se_broadcast_writes); 660 + pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); 661 + pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); 662 + pr_debug("\t\t sbw is : %u\n", 663 + reg_gfx_index.bitfields.sh_broadcast_writes); 664 + 665 + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 666 + 667 + status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); 668 + 669 + if (status != 0) { 670 + pr_err("amdkfd: Failed to allocate GART memory\n"); 671 + return status; 672 + } 673 + 674 + packet_buff_uint = mem_obj->cpu_ptr; 675 + 676 + memset(packet_buff_uint, 0, ib_size); 677 + 678 + packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; 679 + packets_vec[0].header.count = 1; 680 + packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; 681 + packets_vec[0].header.type = PM4_TYPE_3; 682 + packets_vec[0].bitfields2.reg_offset = 683 + GRBM_GFX_INDEX / (sizeof(uint32_t)) - 684 + USERCONFIG_REG_BASE; 685 + 686 + packets_vec[0].bitfields2.insert_vmid = 0; 687 + packets_vec[0].reg_data[0] = reg_gfx_index.u32All; 688 + 689 + packets_vec[1].header.count = 1; 690 + packets_vec[1].header.opcode = IT_SET_CONFIG_REG; 691 + packets_vec[1].header.type = PM4_TYPE_3; 692 + packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) - 693 + CONFIG_REG_BASE; 694 + 695 + packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; 696 + packets_vec[1].bitfields2.insert_vmid = 1; 697 + packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; 698 + 699 + /* Restore the GRBM_GFX_INDEX register */ 700 + 701 + reg_gfx_index.u32All = 0; 702 + reg_gfx_index.bits.sh_broadcast_writes = 1; 703 + reg_gfx_index.bits.instance_broadcast_writes = 1; 704 + reg_gfx_index.bits.se_broadcast_writes = 1; 705 + 706 + 707 + packets_vec[2].ordinal1 = packets_vec[0].ordinal1; 708 + packets_vec[2].bitfields2.reg_offset = 709 + GRBM_GFX_INDEX / (sizeof(uint32_t)) - 710 + USERCONFIG_REG_BASE; 711 + 712 + packets_vec[2].bitfields2.insert_vmid = 0; 713 + packets_vec[2].reg_data[0] = reg_gfx_index.u32All; 714 + 715 + status = dbgdev_diq_submit_ib( 716 + dbgdev, 717 + wac_info->process->pasid, 718 + mem_obj->gpu_addr, 719 + packet_buff_uint, 720 + ib_size); 721 + 722 + if (status != 0) 723 + pr_err("amdkfd: Failed to submit IB to DIQ\n"); 724 + 725 + kfd_gtt_sa_free(dbgdev->dev, mem_obj); 726 + 727 + return status; 728 + } 729 + 730 + static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, 731 + struct dbg_wave_control_info *wac_info) 732 + { 733 + int status; 734 + union SQ_CMD_BITS reg_sq_cmd; 735 + union GRBM_GFX_INDEX_BITS reg_gfx_index; 736 + struct kfd_process_device *pdd; 737 + 738 + BUG_ON(!dbgdev || !dbgdev->dev || !wac_info); 739 + 740 + reg_sq_cmd.u32All = 0; 741 + 742 + /* taking the VMID for that process on the safe way using PDD */ 743 + pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); 744 + 745 + if (!pdd) { 746 + pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); 747 + return -EFAULT; 748 + } 749 + status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd, 750 + &reg_gfx_index); 751 + if (status) { 752 + pr_err("amdkfd: Failed to set wave control registers\n"); 753 + return status; 754 + } 755 + 756 + /* for non DIQ we need to patch the VMID: */ 757 + 758 + reg_sq_cmd.bits.vm_id = pdd->qpd.vmid; 759 + 760 + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 761 + 762 + pr_debug("\t\t mode is: %u\n", wac_info->mode); 763 + pr_debug("\t\t operand is: %u\n", wac_info->operand); 764 + pr_debug("\t\t trap id is: %u\n", wac_info->trapId); 765 + pr_debug("\t\t msg value is: %u\n", 766 + wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 767 + pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid); 768 + 769 + pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); 770 + pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); 771 + pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); 772 + pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); 773 + pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); 774 + pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); 775 + pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); 776 + 777 + pr_debug("\t\t ibw is : %u\n", 778 + reg_gfx_index.bitfields.instance_broadcast_writes); 779 + pr_debug("\t\t ii is : %u\n", 780 + reg_gfx_index.bitfields.instance_index); 781 + pr_debug("\t\t sebw is : %u\n", 782 + reg_gfx_index.bitfields.se_broadcast_writes); 783 + pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); 784 + pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); 785 + pr_debug("\t\t sbw is : %u\n", 786 + reg_gfx_index.bitfields.sh_broadcast_writes); 787 + 788 + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 789 + 790 + return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, 791 + reg_gfx_index.u32All, 792 + reg_sq_cmd.u32All); 793 + } 794 + 795 + int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) 796 + { 797 + int status = 0; 798 + unsigned int vmid; 799 + union SQ_CMD_BITS reg_sq_cmd; 800 + union GRBM_GFX_INDEX_BITS reg_gfx_index; 801 + struct kfd_process_device *pdd; 802 + struct dbg_wave_control_info wac_info; 803 + int temp; 804 + int first_vmid_to_scan = 8; 805 + int last_vmid_to_scan = 15; 806 + 807 + first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1; 808 + temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan; 809 + last_vmid_to_scan = first_vmid_to_scan + ffz(temp); 810 + 811 + reg_sq_cmd.u32All = 0; 812 + status = 0; 813 + 814 + wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; 815 + wac_info.operand = HSA_DBG_WAVEOP_KILL; 816 + 817 + pr_debug("Killing all process wavefronts\n"); 818 + 819 + /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 820 + * ATC_VMID15_PASID_MAPPING 821 + * to check which VMID the current process is mapped to. */ 822 + 823 + for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 824 + if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid 825 + (dev->kgd, vmid)) { 826 + if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid 827 + (dev->kgd, vmid) == p->pasid) { 828 + pr_debug("Killing wave fronts of vmid %d and pasid %d\n", 829 + vmid, p->pasid); 830 + break; 831 + } 832 + } 833 + } 834 + 835 + if (vmid > last_vmid_to_scan) { 836 + pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid); 837 + return -EFAULT; 838 + } 839 + 840 + /* taking the VMID for that process on the safe way using PDD */ 841 + pdd = kfd_get_process_device_data(dev, p); 842 + if (!pdd) 843 + return -EFAULT; 844 + 845 + status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd, 846 + &reg_gfx_index); 847 + if (status != 0) 848 + return -EINVAL; 849 + 850 + /* for non DIQ we need to patch the VMID: */ 851 + reg_sq_cmd.bits.vm_id = vmid; 852 + 853 + dev->kfd2kgd->wave_control_execute(dev->kgd, 854 + reg_gfx_index.u32All, 855 + reg_sq_cmd.u32All); 856 + 857 + return 0; 858 + } 859 + 860 + void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, 861 + enum DBGDEV_TYPE type) 862 + { 863 + BUG_ON(!pdbgdev || !pdev); 864 + 865 + pdbgdev->dev = pdev; 866 + pdbgdev->kq = NULL; 867 + pdbgdev->type = type; 868 + pdbgdev->pqm = NULL; 869 + 870 + switch (type) { 871 + case DBGDEV_TYPE_NODIQ: 872 + pdbgdev->dbgdev_register = dbgdev_register_nodiq; 873 + pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq; 874 + pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq; 875 + pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq; 876 + break; 877 + case DBGDEV_TYPE_DIQ: 878 + default: 879 + pdbgdev->dbgdev_register = dbgdev_register_diq; 880 + pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; 881 + pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; 882 + pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; 883 + break; 884 + } 885 + 886 + }
+193
drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
··· 1 + /* 2 + * Copyright 2014 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + */ 22 + 23 + #ifndef KFD_DBGDEV_H_ 24 + #define KFD_DBGDEV_H_ 25 + 26 + enum { 27 + SQ_CMD_VMID_OFFSET = 28, 28 + ADDRESS_WATCH_CNTL_OFFSET = 24 29 + }; 30 + 31 + enum { 32 + PRIV_QUEUE_SYNC_TIME_MS = 200 33 + }; 34 + 35 + /* CONTEXT reg space definition */ 36 + enum { 37 + CONTEXT_REG_BASE = 0xA000, 38 + CONTEXT_REG_END = 0xA400, 39 + CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE 40 + }; 41 + 42 + /* USER CONFIG reg space definition */ 43 + enum { 44 + USERCONFIG_REG_BASE = 0xC000, 45 + USERCONFIG_REG_END = 0x10000, 46 + USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE 47 + }; 48 + 49 + /* CONFIG reg space definition */ 50 + enum { 51 + CONFIG_REG_BASE = 0x2000, /* in dwords */ 52 + CONFIG_REG_END = 0x2B00, 53 + CONFIG_REG_SIZE = CONFIG_REG_END - CONFIG_REG_BASE 54 + }; 55 + 56 + /* SH reg space definition */ 57 + enum { 58 + SH_REG_BASE = 0x2C00, 59 + SH_REG_END = 0x3000, 60 + SH_REG_SIZE = SH_REG_END - SH_REG_BASE 61 + }; 62 + 63 + enum SQ_IND_CMD_CMD { 64 + SQ_IND_CMD_CMD_NULL = 0x00000000, 65 + SQ_IND_CMD_CMD_HALT = 0x00000001, 66 + SQ_IND_CMD_CMD_RESUME = 0x00000002, 67 + SQ_IND_CMD_CMD_KILL = 0x00000003, 68 + SQ_IND_CMD_CMD_DEBUG = 0x00000004, 69 + SQ_IND_CMD_CMD_TRAP = 0x00000005, 70 + }; 71 + 72 + enum SQ_IND_CMD_MODE { 73 + SQ_IND_CMD_MODE_SINGLE = 0x00000000, 74 + SQ_IND_CMD_MODE_BROADCAST = 0x00000001, 75 + SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002, 76 + SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003, 77 + SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004, 78 + }; 79 + 80 + union SQ_IND_INDEX_BITS { 81 + struct { 82 + uint32_t wave_id:4; 83 + uint32_t simd_id:2; 84 + uint32_t thread_id:6; 85 + uint32_t:1; 86 + uint32_t force_read:1; 87 + uint32_t read_timeout:1; 88 + uint32_t unindexed:1; 89 + uint32_t index:16; 90 + 91 + } bitfields, bits; 92 + uint32_t u32All; 93 + signed int i32All; 94 + float f32All; 95 + }; 96 + 97 + union SQ_IND_CMD_BITS { 98 + struct { 99 + uint32_t data:32; 100 + } bitfields, bits; 101 + uint32_t u32All; 102 + signed int i32All; 103 + float f32All; 104 + }; 105 + 106 + union SQ_CMD_BITS { 107 + struct { 108 + uint32_t cmd:3; 109 + uint32_t:1; 110 + uint32_t mode:3; 111 + uint32_t check_vmid:1; 112 + uint32_t trap_id:3; 113 + uint32_t:5; 114 + uint32_t wave_id:4; 115 + uint32_t simd_id:2; 116 + uint32_t:2; 117 + uint32_t queue_id:3; 118 + uint32_t:1; 119 + uint32_t vm_id:4; 120 + } bitfields, bits; 121 + uint32_t u32All; 122 + signed int i32All; 123 + float f32All; 124 + }; 125 + 126 + union SQ_IND_DATA_BITS { 127 + struct { 128 + uint32_t data:32; 129 + } bitfields, bits; 130 + uint32_t u32All; 131 + signed int i32All; 132 + float f32All; 133 + }; 134 + 135 + union GRBM_GFX_INDEX_BITS { 136 + struct { 137 + uint32_t instance_index:8; 138 + uint32_t sh_index:8; 139 + uint32_t se_index:8; 140 + uint32_t:5; 141 + uint32_t sh_broadcast_writes:1; 142 + uint32_t instance_broadcast_writes:1; 143 + uint32_t se_broadcast_writes:1; 144 + } bitfields, bits; 145 + uint32_t u32All; 146 + signed int i32All; 147 + float f32All; 148 + }; 149 + 150 + union TCP_WATCH_ADDR_H_BITS { 151 + struct { 152 + uint32_t addr:16; 153 + uint32_t:16; 154 + 155 + } bitfields, bits; 156 + uint32_t u32All; 157 + signed int i32All; 158 + float f32All; 159 + }; 160 + 161 + union TCP_WATCH_ADDR_L_BITS { 162 + struct { 163 + uint32_t:6; 164 + uint32_t addr:26; 165 + } bitfields, bits; 166 + uint32_t u32All; 167 + signed int i32All; 168 + float f32All; 169 + }; 170 + 171 + enum { 172 + QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */ 173 + QUEUESTATE__ACTIVE_COMPLETION_PENDING, 174 + QUEUESTATE__ACTIVE 175 + }; 176 + 177 + union ULARGE_INTEGER { 178 + struct { 179 + uint32_t low_part; 180 + uint32_t high_part; 181 + } u; 182 + unsigned long long quad_part; 183 + }; 184 + 185 + 186 + #define KFD_CIK_VMID_START_OFFSET (8) 187 + #define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8)) 188 + 189 + 190 + void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, 191 + enum DBGDEV_TYPE type); 192 + 193 + #endif /* KFD_DBGDEV_H_ */
+168
drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
··· 1 + /* 2 + * Copyright 2014 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + #include <linux/types.h> 24 + #include <linux/kernel.h> 25 + #include <linux/log2.h> 26 + #include <linux/sched.h> 27 + #include <linux/slab.h> 28 + #include <linux/device.h> 29 + 30 + #include "kfd_priv.h" 31 + #include "cik_regs.h" 32 + #include "kfd_pm4_headers.h" 33 + #include "kfd_pm4_headers_diq.h" 34 + #include "kfd_dbgmgr.h" 35 + #include "kfd_dbgdev.h" 36 + 37 + static DEFINE_MUTEX(kfd_dbgmgr_mutex); 38 + 39 + struct mutex *kfd_get_dbgmgr_mutex(void) 40 + { 41 + return &kfd_dbgmgr_mutex; 42 + } 43 + 44 + 45 + static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) 46 + { 47 + BUG_ON(!pmgr); 48 + 49 + kfree(pmgr->dbgdev); 50 + 51 + pmgr->dbgdev = NULL; 52 + pmgr->pasid = 0; 53 + pmgr->dev = NULL; 54 + } 55 + 56 + void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) 57 + { 58 + if (pmgr != NULL) { 59 + kfd_dbgmgr_uninitialize(pmgr); 60 + kfree(pmgr); 61 + } 62 + } 63 + 64 + bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) 65 + { 66 + enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ; 67 + struct kfd_dbgmgr *new_buff; 68 + 69 + BUG_ON(pdev == NULL); 70 + BUG_ON(!pdev->init_complete); 71 + 72 + new_buff = kfd_alloc_struct(new_buff); 73 + if (!new_buff) { 74 + pr_err("amdkfd: Failed to allocate dbgmgr instance\n"); 75 + return false; 76 + } 77 + 78 + new_buff->pasid = 0; 79 + new_buff->dev = pdev; 80 + new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev); 81 + if (!new_buff->dbgdev) { 82 + pr_err("amdkfd: Failed to allocate dbgdev instance\n"); 83 + kfree(new_buff); 84 + return false; 85 + } 86 + 87 + /* get actual type of DBGDevice cpsch or not */ 88 + if (sched_policy == KFD_SCHED_POLICY_NO_HWS) 89 + type = DBGDEV_TYPE_NODIQ; 90 + 91 + kfd_dbgdev_init(new_buff->dbgdev, pdev, type); 92 + *ppmgr = new_buff; 93 + 94 + return true; 95 + } 96 + 97 + long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) 98 + { 99 + BUG_ON(!p || !pmgr || !pmgr->dbgdev); 100 + 101 + if (pmgr->pasid != 0) { 102 + pr_debug("H/W debugger is already active using pasid %d\n", 103 + pmgr->pasid); 104 + return -EBUSY; 105 + } 106 + 107 + /* remember pasid */ 108 + pmgr->pasid = p->pasid; 109 + 110 + /* provide the pqm for diq generation */ 111 + pmgr->dbgdev->pqm = &p->pqm; 112 + 113 + /* activate the actual registering */ 114 + pmgr->dbgdev->dbgdev_register(pmgr->dbgdev); 115 + 116 + return 0; 117 + } 118 + 119 + long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) 120 + { 121 + BUG_ON(!p || !pmgr || !pmgr->dbgdev); 122 + 123 + /* Is the requests coming from the already registered process? */ 124 + if (pmgr->pasid != p->pasid) { 125 + pr_debug("H/W debugger is not registered by calling pasid %d\n", 126 + p->pasid); 127 + return -EINVAL; 128 + } 129 + 130 + pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev); 131 + 132 + pmgr->pasid = 0; 133 + 134 + return 0; 135 + } 136 + 137 + long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, 138 + struct dbg_wave_control_info *wac_info) 139 + { 140 + BUG_ON(!pmgr || !pmgr->dbgdev || !wac_info); 141 + 142 + /* Is the requests coming from the already registered process? */ 143 + if (pmgr->pasid != wac_info->process->pasid) { 144 + pr_debug("H/W debugger support was not registered for requester pasid %d\n", 145 + wac_info->process->pasid); 146 + return -EINVAL; 147 + } 148 + 149 + return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info); 150 + } 151 + 152 + long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, 153 + struct dbg_address_watch_info *adw_info) 154 + { 155 + BUG_ON(!pmgr || !pmgr->dbgdev || !adw_info); 156 + 157 + 158 + /* Is the requests coming from the already registered process? */ 159 + if (pmgr->pasid != adw_info->process->pasid) { 160 + pr_debug("H/W debugger support was not registered for requester pasid %d\n", 161 + adw_info->process->pasid); 162 + return -EINVAL; 163 + } 164 + 165 + return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev, 166 + adw_info); 167 + } 168 +
+294
drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
··· 1 + /* 2 + * Copyright 2014 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #ifndef KFD_DBGMGR_H_ 25 + #define KFD_DBGMGR_H_ 26 + 27 + #include "kfd_priv.h" 28 + 29 + /* must align with hsakmttypes definition */ 30 + #pragma pack(push, 4) 31 + 32 + enum HSA_DBG_WAVEOP { 33 + HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ 34 + HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ 35 + HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ 36 + HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter 37 + debug mode */ 38 + HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take 39 + a trap */ 40 + HSA_DBG_NUM_WAVEOP = 5, 41 + HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF 42 + }; 43 + 44 + enum HSA_DBG_WAVEMODE { 45 + /* send command to a single wave */ 46 + HSA_DBG_WAVEMODE_SINGLE = 0, 47 + /* 48 + * Broadcast to all wavefronts of all processes is not 49 + * supported for HSA user mode 50 + */ 51 + 52 + /* send to waves within current process */ 53 + HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, 54 + /* send to waves within current process on CU */ 55 + HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, 56 + HSA_DBG_NUM_WAVEMODE = 3, 57 + HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF 58 + }; 59 + 60 + enum HSA_DBG_WAVEMSG_TYPE { 61 + HSA_DBG_WAVEMSG_AUTO = 0, 62 + HSA_DBG_WAVEMSG_USER = 1, 63 + HSA_DBG_WAVEMSG_ERROR = 2, 64 + HSA_DBG_NUM_WAVEMSG, 65 + HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF 66 + }; 67 + 68 + enum HSA_DBG_WATCH_MODE { 69 + HSA_DBG_WATCH_READ = 0, /* Read operations only */ 70 + HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */ 71 + HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */ 72 + HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */ 73 + HSA_DBG_WATCH_NUM, 74 + HSA_DBG_WATCH_SIZE = 0xFFFFFFFF 75 + }; 76 + 77 + /* This structure is hardware specific and may change in the future */ 78 + struct HsaDbgWaveMsgAMDGen2 { 79 + union { 80 + struct ui32 { 81 + uint32_t UserData:8; /* user data */ 82 + uint32_t ShaderArray:1; /* Shader array */ 83 + uint32_t Priv:1; /* Privileged */ 84 + uint32_t Reserved0:4; /* This field is reserved, 85 + should be 0 */ 86 + uint32_t WaveId:4; /* wave id */ 87 + uint32_t SIMD:2; /* SIMD id */ 88 + uint32_t HSACU:4; /* Compute unit */ 89 + uint32_t ShaderEngine:2;/* Shader engine */ 90 + uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */ 91 + uint32_t Reserved1:4; /* This field is reserved, 92 + should be 0 */ 93 + } ui32; 94 + uint32_t Value; 95 + }; 96 + uint32_t Reserved2; 97 + }; 98 + 99 + union HsaDbgWaveMessageAMD { 100 + struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2; 101 + /* for future HsaDbgWaveMsgAMDGen3; */ 102 + }; 103 + 104 + struct HsaDbgWaveMessage { 105 + void *MemoryVA; /* ptr to associated host-accessible data */ 106 + union HsaDbgWaveMessageAMD DbgWaveMsg; 107 + }; 108 + 109 + /* 110 + * TODO: This definitions to be MOVED to kfd_event, once it is implemented. 111 + * 112 + * HSA sync primitive, Event and HW Exception notification API definitions. 113 + * The API functions allow the runtime to define a so-called sync-primitive, 114 + * a SW object combining a user-mode provided "syncvar" and a scheduler event 115 + * that can be signaled through a defined GPU interrupt. A syncvar is 116 + * a process virtual memory location of a certain size that can be accessed 117 + * by CPU and GPU shader code within the process to set and query the content 118 + * within that memory. The definition of the content is determined by the HSA 119 + * runtime and potentially GPU shader code interfacing with the HSA runtime. 120 + * The syncvar values may be commonly written through an PM4 WRITE_DATA packet 121 + * in the user mode instruction stream. The OS scheduler event is typically 122 + * associated and signaled by an interrupt issued by the GPU, but other HSA 123 + * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced 124 + * by the KFD by this mechanism, too. */ 125 + 126 + /* these are the new definitions for events */ 127 + enum HSA_EVENTTYPE { 128 + HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */ 129 + HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */ 130 + HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change 131 + (start/stop) */ 132 + HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */ 133 + HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */ 134 + HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */ 135 + HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */ 136 + HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state 137 + (EOP pm4) */ 138 + /* ... */ 139 + HSA_EVENTTYPE_MAXID, 140 + HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF 141 + }; 142 + 143 + /* Sub-definitions for various event types: Syncvar */ 144 + struct HsaSyncVar { 145 + union SyncVar { 146 + void *UserData; /* pointer to user mode data */ 147 + uint64_t UserDataPtrValue; /* 64bit compatibility of value */ 148 + } SyncVar; 149 + uint64_t SyncVarSize; 150 + }; 151 + 152 + /* Sub-definitions for various event types: NodeChange */ 153 + 154 + enum HSA_EVENTTYPE_NODECHANGE_FLAGS { 155 + HSA_EVENTTYPE_NODECHANGE_ADD = 0, 156 + HSA_EVENTTYPE_NODECHANGE_REMOVE = 1, 157 + HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF 158 + }; 159 + 160 + struct HsaNodeChange { 161 + /* HSA node added/removed on the platform */ 162 + enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; 163 + }; 164 + 165 + /* Sub-definitions for various event types: DeviceStateChange */ 166 + enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS { 167 + /* device started (and available) */ 168 + HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, 169 + /* device stopped (i.e. unavailable) */ 170 + HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, 171 + HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF 172 + }; 173 + 174 + enum HSA_DEVICE { 175 + HSA_DEVICE_CPU = 0, 176 + HSA_DEVICE_GPU = 1, 177 + MAX_HSA_DEVICE = 2 178 + }; 179 + 180 + struct HsaDeviceStateChange { 181 + uint32_t NodeId; /* F-NUMA node that contains the device */ 182 + enum HSA_DEVICE Device; /* device type: GPU or CPU */ 183 + enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */ 184 + }; 185 + 186 + struct HsaEventData { 187 + enum HSA_EVENTTYPE EventType; /* event type */ 188 + union EventData { 189 + /* 190 + * return data associated with HSA_EVENTTYPE_SIGNAL 191 + * and other events 192 + */ 193 + struct HsaSyncVar SyncVar; 194 + 195 + /* data associated with HSA_EVENTTYPE_NODE_CHANGE */ 196 + struct HsaNodeChange NodeChangeState; 197 + 198 + /* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */ 199 + struct HsaDeviceStateChange DeviceState; 200 + } EventData; 201 + 202 + /* the following data entries are internal to the KFD & thunk itself */ 203 + 204 + /* internal thunk store for Event data (OsEventHandle) */ 205 + uint64_t HWData1; 206 + /* internal thunk store for Event data (HWAddress) */ 207 + uint64_t HWData2; 208 + /* internal thunk store for Event data (HWData) */ 209 + uint32_t HWData3; 210 + }; 211 + 212 + struct HsaEventDescriptor { 213 + /* event type to allocate */ 214 + enum HSA_EVENTTYPE EventType; 215 + /* H-NUMA node containing GPU device that is event source */ 216 + uint32_t NodeId; 217 + /* pointer to user mode syncvar data, syncvar->UserDataPtrValue 218 + * may be NULL 219 + */ 220 + struct HsaSyncVar SyncVar; 221 + }; 222 + 223 + struct HsaEvent { 224 + uint32_t EventId; 225 + struct HsaEventData EventData; 226 + }; 227 + 228 + #pragma pack(pop) 229 + 230 + enum DBGDEV_TYPE { 231 + DBGDEV_TYPE_ILLEGAL = 0, 232 + DBGDEV_TYPE_NODIQ = 1, 233 + DBGDEV_TYPE_DIQ = 2, 234 + DBGDEV_TYPE_TEST = 3 235 + }; 236 + 237 + struct dbg_address_watch_info { 238 + struct kfd_process *process; 239 + enum HSA_DBG_WATCH_MODE *watch_mode; 240 + uint64_t *watch_address; 241 + uint64_t *watch_mask; 242 + struct HsaEvent *watch_event; 243 + uint32_t num_watch_points; 244 + }; 245 + 246 + struct dbg_wave_control_info { 247 + struct kfd_process *process; 248 + uint32_t trapId; 249 + enum HSA_DBG_WAVEOP operand; 250 + enum HSA_DBG_WAVEMODE mode; 251 + struct HsaDbgWaveMessage dbgWave_msg; 252 + }; 253 + 254 + struct kfd_dbgdev { 255 + 256 + /* The device that owns this data. */ 257 + struct kfd_dev *dev; 258 + 259 + /* kernel queue for DIQ */ 260 + struct kernel_queue *kq; 261 + 262 + /* a pointer to the pqm of the calling process */ 263 + struct process_queue_manager *pqm; 264 + 265 + /* type of debug device ( DIQ, non DIQ, etc. ) */ 266 + enum DBGDEV_TYPE type; 267 + 268 + /* virtualized function pointers to device dbg */ 269 + int (*dbgdev_register)(struct kfd_dbgdev *dbgdev); 270 + int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev); 271 + int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev, 272 + struct dbg_address_watch_info *adw_info); 273 + int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev, 274 + struct dbg_wave_control_info *wac_info); 275 + 276 + }; 277 + 278 + struct kfd_dbgmgr { 279 + unsigned int pasid; 280 + struct kfd_dev *dev; 281 + struct kfd_dbgdev *dbgdev; 282 + }; 283 + 284 + /* prototypes for debug manager functions */ 285 + struct mutex *kfd_get_dbgmgr_mutex(void); 286 + void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr); 287 + bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev); 288 + long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p); 289 + long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p); 290 + long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, 291 + struct dbg_wave_control_info *wac_info); 292 + long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, 293 + struct dbg_address_watch_info *adw_info); 294 + #endif /* KFD_DBGMGR_H_ */
+5
drivers/gpu/drm/amd/amdkfd/kfd_device.c
··· 33 33 static const struct kfd_device_info kaveri_device_info = { 34 34 .asic_family = CHIP_KAVERI, 35 35 .max_pasid_bits = 16, 36 + /* max num of queues for KV.TODO should be a dynamic value */ 37 + .max_no_of_hqd = 24, 36 38 .ih_ring_entry_size = 4 * sizeof(uint32_t), 37 39 .event_interrupt_class = &event_interrupt_class_cik, 40 + .num_of_watch_points = 4, 38 41 .mqd_size_aligned = MQD_SIZE_ALIGNED 39 42 }; 40 43 ··· 296 293 kfd->pdev->vendor, kfd->pdev->device); 297 294 goto dqm_start_error; 298 295 } 296 + 297 + kfd->dbgmgr = NULL; 299 298 300 299 kfd->init_complete = true; 301 300 dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
+39 -9
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 45 45 struct qcm_process_device *qpd); 46 46 47 47 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); 48 - static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock); 48 + static int destroy_queues_cpsch(struct device_queue_manager *dqm, 49 + bool preempt_static_queues, bool lock); 49 50 50 51 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 51 52 struct queue *q, ··· 776 775 777 776 BUG_ON(!dqm); 778 777 779 - destroy_queues_cpsch(dqm, true); 778 + destroy_queues_cpsch(dqm, true, true); 780 779 781 780 list_for_each_entry(node, &dqm->queues, list) { 782 781 pdd = qpd_to_pdd(node->qpd); ··· 830 829 pr_debug("kfd: In %s\n", __func__); 831 830 832 831 mutex_lock(&dqm->lock); 833 - destroy_queues_cpsch(dqm, false); 832 + /* here we actually preempt the DIQ */ 833 + destroy_queues_cpsch(dqm, true, false); 834 834 list_del(&kq->list); 835 835 dqm->queue_count--; 836 836 qpd->is_debug = false; ··· 915 913 return retval; 916 914 } 917 915 918 - static int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 916 + int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 919 917 unsigned int fence_value, 920 918 unsigned long timeout) 921 919 { ··· 937 935 unsigned int sdma_engine) 938 936 { 939 937 return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, 940 - KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 938 + KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false, 941 939 sdma_engine); 942 940 } 943 941 944 - static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) 942 + static int destroy_queues_cpsch(struct device_queue_manager *dqm, 943 + bool preempt_static_queues, bool lock) 945 944 { 946 945 int retval; 946 + enum kfd_preempt_type_filter preempt_type; 947 + struct kfd_process *p; 947 948 948 949 BUG_ON(!dqm); 949 950 ··· 965 960 destroy_sdma_queues(dqm, 1); 966 961 } 967 962 963 + preempt_type = preempt_static_queues ? 964 + KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES : 965 + KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES; 966 + 968 967 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 969 - KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0); 968 + preempt_type, 0, false, 0); 970 969 if (retval != 0) 971 970 goto out; 972 971 ··· 978 969 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, 979 970 KFD_FENCE_COMPLETED); 980 971 /* should be timed out */ 981 - amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 972 + retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 982 973 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); 974 + if (retval != 0) { 975 + p = kfd_get_process(current); 976 + p->reset_wavefronts = true; 977 + goto out; 978 + } 983 979 pm_release_ib(&dqm->packets); 984 980 dqm->active_runlist = false; 985 981 ··· 1003 989 if (lock) 1004 990 mutex_lock(&dqm->lock); 1005 991 1006 - retval = destroy_queues_cpsch(dqm, false); 992 + retval = destroy_queues_cpsch(dqm, false, false); 1007 993 if (retval != 0) { 1008 994 pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption"); 1009 995 goto out; ··· 1038 1024 { 1039 1025 int retval; 1040 1026 struct mqd_manager *mqd; 1027 + bool preempt_all_queues; 1041 1028 1042 1029 BUG_ON(!dqm || !qpd || !q); 1030 + 1031 + preempt_all_queues = false; 1043 1032 1044 1033 retval = 0; 1045 1034 1046 1035 /* remove queue from list to prevent rescheduling after preemption */ 1047 1036 mutex_lock(&dqm->lock); 1037 + 1038 + if (qpd->is_debug) { 1039 + /* 1040 + * error, currently we do not allow to destroy a queue 1041 + * of a currently debugged process 1042 + */ 1043 + retval = -EBUSY; 1044 + goto failed_try_destroy_debugged_queue; 1045 + 1046 + } 1047 + 1048 1048 mqd = dqm->ops.get_mqd_manager(dqm, 1049 1049 get_mqd_type_from_queue_type(q->properties.type)); 1050 1050 if (!mqd) { ··· 1090 1062 return 0; 1091 1063 1092 1064 failed: 1065 + failed_try_destroy_debugged_queue: 1066 + 1093 1067 mutex_unlock(&dqm->lock); 1094 1068 return retval; 1095 1069 }
+6
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
··· 88 88 struct queue *q, 89 89 struct qcm_process_device *qpd, 90 90 int *allocate_vmid); 91 + 91 92 int (*destroy_queue)(struct device_queue_manager *dqm, 92 93 struct qcm_process_device *qpd, 93 94 struct queue *q); 95 + 94 96 int (*update_queue)(struct device_queue_manager *dqm, 95 97 struct queue *q); 96 98 ··· 102 100 103 101 int (*register_process)(struct device_queue_manager *dqm, 104 102 struct qcm_process_device *qpd); 103 + 105 104 int (*unregister_process)(struct device_queue_manager *dqm, 106 105 struct qcm_process_device *qpd); 106 + 107 107 int (*initialize)(struct device_queue_manager *dqm); 108 108 int (*start)(struct device_queue_manager *dqm); 109 109 int (*stop)(struct device_queue_manager *dqm); ··· 113 109 int (*create_kernel_queue)(struct device_queue_manager *dqm, 114 110 struct kernel_queue *kq, 115 111 struct qcm_process_device *qpd); 112 + 116 113 void (*destroy_kernel_queue)(struct device_queue_manager *dqm, 117 114 struct kernel_queue *kq, 118 115 struct qcm_process_device *qpd); 116 + 119 117 bool (*set_cache_memory_policy)(struct device_queue_manager *dqm, 120 118 struct qcm_process_device *qpd, 121 119 enum cache_policy default_policy,
+36 -10
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
··· 163 163 num_queues = 0; 164 164 list_for_each_entry(cur, &qpd->queues_list, list) 165 165 num_queues++; 166 - packet->bitfields10.num_queues = num_queues; 166 + packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : num_queues; 167 167 168 168 packet->sh_mem_config = qpd->sh_mem_config; 169 169 packet->sh_mem_bases = qpd->sh_mem_bases; ··· 177 177 } 178 178 179 179 static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, 180 - struct queue *q) 180 + struct queue *q, bool is_static) 181 181 { 182 182 struct pm4_map_queues *packet; 183 + bool use_static = is_static; 183 184 184 185 BUG_ON(!pm || !buffer || !q); 185 186 ··· 210 209 case KFD_QUEUE_TYPE_SDMA: 211 210 packet->bitfields2.engine_sel = 212 211 engine_sel__mes_map_queues__sdma0; 212 + use_static = false; /* no static queues under SDMA */ 213 213 break; 214 214 default: 215 215 BUG(); ··· 219 217 220 218 packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset = 221 219 q->properties.doorbell_off; 220 + 221 + packet->mes_map_queues_ordinals[0].bitfields3.is_static = 222 + (use_static == true) ? 1 : 0; 222 223 223 224 packet->mes_map_queues_ordinals[0].mqd_addr_lo = 224 225 lower_32_bits(q->gart_mqd_addr); ··· 276 271 pm_release_ib(pm); 277 272 return -ENOMEM; 278 273 } 274 + 279 275 retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd); 280 276 if (retval != 0) 281 277 return retval; 278 + 282 279 proccesses_mapped++; 283 280 inc_wptr(&rl_wptr, sizeof(struct pm4_map_process), 284 281 alloc_size_bytes); ··· 288 281 list_for_each_entry(kq, &qpd->priv_queue_list, list) { 289 282 if (kq->queue->properties.is_active != true) 290 283 continue; 284 + 285 + pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n", 286 + kq->queue->queue, qpd->is_debug); 287 + 291 288 retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], 292 - kq->queue); 289 + kq->queue, qpd->is_debug); 293 290 if (retval != 0) 294 291 return retval; 295 - inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues), 296 - alloc_size_bytes); 292 + 293 + inc_wptr(&rl_wptr, 294 + sizeof(struct pm4_map_queues), 295 + alloc_size_bytes); 297 296 } 298 297 299 298 list_for_each_entry(q, &qpd->queues_list, list) { 300 299 if (q->properties.is_active != true) 301 300 continue; 302 - retval = pm_create_map_queue(pm, 303 - &rl_buffer[rl_wptr], q); 301 + 302 + pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n", 303 + q->queue, qpd->is_debug); 304 + 305 + retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], 306 + q, qpd->is_debug); 307 + 304 308 if (retval != 0) 305 309 return retval; 306 - inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues), 307 - alloc_size_bytes); 310 + 311 + inc_wptr(&rl_wptr, 312 + sizeof(struct pm4_map_queues), 313 + alloc_size_bytes); 308 314 } 309 315 } 310 316 ··· 508 488 509 489 packet = (struct pm4_unmap_queues *)buffer; 510 490 memset(buffer, 0, sizeof(struct pm4_unmap_queues)); 511 - 491 + pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n", 492 + mode, reset, type); 512 493 packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES, 513 494 sizeof(struct pm4_unmap_queues)); 514 495 switch (type) { ··· 549 528 case KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES: 550 529 packet->bitfields2.queue_sel = 551 530 queue_sel__mes_unmap_queues__perform_request_on_all_active_queues; 531 + break; 532 + case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES: 533 + /* in this case, we do not preempt static queues */ 534 + packet->bitfields2.queue_sel = 535 + queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only; 552 536 break; 553 537 default: 554 538 BUG();
+4 -2
drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
··· 237 237 struct { 238 238 union { 239 239 struct { 240 - uint32_t reserved5:2; 240 + uint32_t is_static:1; 241 + uint32_t reserved5:1; 241 242 uint32_t doorbell_offset:21; 242 243 uint32_t reserved6:3; 243 244 uint32_t queue:6; ··· 329 328 enum unmap_queues_queue_sel_enum { 330 329 queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0, 331 330 queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1, 332 - queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2 331 + queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2, 332 + queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3 333 333 }; 334 334 335 335 enum unmap_queues_engine_sel_enum {
+290
drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
··· 1 + /* 2 + * Copyright 2014 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #ifndef KFD_PM4_HEADERS_DIQ_H_ 25 + #define KFD_PM4_HEADERS_DIQ_H_ 26 + 27 + /*--------------------_INDIRECT_BUFFER-------------------- */ 28 + 29 + #ifndef _PM4__INDIRECT_BUFFER_DEFINED 30 + #define _PM4__INDIRECT_BUFFER_DEFINED 31 + enum _INDIRECT_BUFFER_cache_policy_enum { 32 + cache_policy___indirect_buffer__lru = 0, 33 + cache_policy___indirect_buffer__stream = 1, 34 + cache_policy___indirect_buffer__bypass = 2 35 + }; 36 + 37 + enum { 38 + IT_INDIRECT_BUFFER_PASID = 0x5C 39 + }; 40 + 41 + struct pm4__indirect_buffer_pasid { 42 + union { 43 + union PM4_MES_TYPE_3_HEADER header; /* header */ 44 + unsigned int ordinal1; 45 + }; 46 + 47 + union { 48 + struct { 49 + unsigned int reserved1:2; 50 + unsigned int ib_base_lo:30; 51 + } bitfields2; 52 + unsigned int ordinal2; 53 + }; 54 + 55 + union { 56 + struct { 57 + unsigned int ib_base_hi:16; 58 + unsigned int reserved2:16; 59 + } bitfields3; 60 + unsigned int ordinal3; 61 + }; 62 + 63 + union { 64 + unsigned int control; 65 + unsigned int ordinal4; 66 + }; 67 + 68 + union { 69 + struct { 70 + unsigned int pasid:10; 71 + unsigned int reserved4:22; 72 + } bitfields5; 73 + unsigned int ordinal5; 74 + }; 75 + 76 + }; 77 + 78 + #endif 79 + 80 + /*--------------------_RELEASE_MEM-------------------- */ 81 + 82 + #ifndef _PM4__RELEASE_MEM_DEFINED 83 + #define _PM4__RELEASE_MEM_DEFINED 84 + enum _RELEASE_MEM_event_index_enum { 85 + event_index___release_mem__end_of_pipe = 5, 86 + event_index___release_mem__shader_done = 6 87 + }; 88 + 89 + enum _RELEASE_MEM_cache_policy_enum { 90 + cache_policy___release_mem__lru = 0, 91 + cache_policy___release_mem__stream = 1, 92 + cache_policy___release_mem__bypass = 2 93 + }; 94 + 95 + enum _RELEASE_MEM_dst_sel_enum { 96 + dst_sel___release_mem__memory_controller = 0, 97 + dst_sel___release_mem__tc_l2 = 1, 98 + dst_sel___release_mem__queue_write_pointer_register = 2, 99 + dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3 100 + }; 101 + 102 + enum _RELEASE_MEM_int_sel_enum { 103 + int_sel___release_mem__none = 0, 104 + int_sel___release_mem__send_interrupt_only = 1, 105 + int_sel___release_mem__send_interrupt_after_write_confirm = 2, 106 + int_sel___release_mem__send_data_after_write_confirm = 3 107 + }; 108 + 109 + enum _RELEASE_MEM_data_sel_enum { 110 + data_sel___release_mem__none = 0, 111 + data_sel___release_mem__send_32_bit_low = 1, 112 + data_sel___release_mem__send_64_bit_data = 2, 113 + data_sel___release_mem__send_gpu_clock_counter = 3, 114 + data_sel___release_mem__send_cp_perfcounter_hi_lo = 4, 115 + data_sel___release_mem__store_gds_data_to_memory = 5 116 + }; 117 + 118 + struct pm4__release_mem { 119 + union { 120 + union PM4_MES_TYPE_3_HEADER header; /*header */ 121 + unsigned int ordinal1; 122 + }; 123 + 124 + union { 125 + struct { 126 + unsigned int event_type:6; 127 + unsigned int reserved1:2; 128 + enum _RELEASE_MEM_event_index_enum event_index:4; 129 + unsigned int tcl1_vol_action_ena:1; 130 + unsigned int tc_vol_action_ena:1; 131 + unsigned int reserved2:1; 132 + unsigned int tc_wb_action_ena:1; 133 + unsigned int tcl1_action_ena:1; 134 + unsigned int tc_action_ena:1; 135 + unsigned int reserved3:6; 136 + unsigned int atc:1; 137 + enum _RELEASE_MEM_cache_policy_enum cache_policy:2; 138 + unsigned int reserved4:5; 139 + } bitfields2; 140 + unsigned int ordinal2; 141 + }; 142 + 143 + union { 144 + struct { 145 + unsigned int reserved5:16; 146 + enum _RELEASE_MEM_dst_sel_enum dst_sel:2; 147 + unsigned int reserved6:6; 148 + enum _RELEASE_MEM_int_sel_enum int_sel:3; 149 + unsigned int reserved7:2; 150 + enum _RELEASE_MEM_data_sel_enum data_sel:3; 151 + } bitfields3; 152 + unsigned int ordinal3; 153 + }; 154 + 155 + union { 156 + struct { 157 + unsigned int reserved8:2; 158 + unsigned int address_lo_32b:30; 159 + } bitfields4; 160 + struct { 161 + unsigned int reserved9:3; 162 + unsigned int address_lo_64b:29; 163 + } bitfields5; 164 + unsigned int ordinal4; 165 + }; 166 + 167 + unsigned int address_hi; 168 + 169 + unsigned int data_lo; 170 + 171 + unsigned int data_hi; 172 + 173 + }; 174 + #endif 175 + 176 + 177 + /*--------------------_SET_CONFIG_REG-------------------- */ 178 + 179 + #ifndef _PM4__SET_CONFIG_REG_DEFINED 180 + #define _PM4__SET_CONFIG_REG_DEFINED 181 + 182 + struct pm4__set_config_reg { 183 + union { 184 + union PM4_MES_TYPE_3_HEADER header; /*header */ 185 + unsigned int ordinal1; 186 + }; 187 + 188 + union { 189 + struct { 190 + unsigned int reg_offset:16; 191 + unsigned int reserved1:7; 192 + unsigned int vmid_shift:5; 193 + unsigned int insert_vmid:1; 194 + unsigned int reserved2:3; 195 + } bitfields2; 196 + unsigned int ordinal2; 197 + }; 198 + 199 + unsigned int reg_data[1]; /*1..N of these fields */ 200 + 201 + }; 202 + #endif 203 + 204 + /*--------------------_WAIT_REG_MEM-------------------- */ 205 + 206 + #ifndef _PM4__WAIT_REG_MEM_DEFINED 207 + #define _PM4__WAIT_REG_MEM_DEFINED 208 + enum _WAIT_REG_MEM_function_enum { 209 + function___wait_reg_mem__always_pass = 0, 210 + function___wait_reg_mem__less_than_ref_value = 1, 211 + function___wait_reg_mem__less_than_equal_to_the_ref_value = 2, 212 + function___wait_reg_mem__equal_to_the_reference_value = 3, 213 + function___wait_reg_mem__not_equal_reference_value = 4, 214 + function___wait_reg_mem__greater_than_or_equal_reference_value = 5, 215 + function___wait_reg_mem__greater_than_reference_value = 6, 216 + function___wait_reg_mem__reserved = 7 217 + }; 218 + 219 + enum _WAIT_REG_MEM_mem_space_enum { 220 + mem_space___wait_reg_mem__register_space = 0, 221 + mem_space___wait_reg_mem__memory_space = 1 222 + }; 223 + 224 + enum _WAIT_REG_MEM_operation_enum { 225 + operation___wait_reg_mem__wait_reg_mem = 0, 226 + operation___wait_reg_mem__wr_wait_wr_reg = 1 227 + }; 228 + 229 + struct pm4__wait_reg_mem { 230 + union { 231 + union PM4_MES_TYPE_3_HEADER header; /*header */ 232 + unsigned int ordinal1; 233 + }; 234 + 235 + union { 236 + struct { 237 + enum _WAIT_REG_MEM_function_enum function:3; 238 + unsigned int reserved1:1; 239 + enum _WAIT_REG_MEM_mem_space_enum mem_space:2; 240 + enum _WAIT_REG_MEM_operation_enum operation:2; 241 + unsigned int reserved2:24; 242 + } bitfields2; 243 + unsigned int ordinal2; 244 + }; 245 + 246 + union { 247 + struct { 248 + unsigned int reserved3:2; 249 + unsigned int memory_poll_addr_lo:30; 250 + } bitfields3; 251 + struct { 252 + unsigned int register_poll_addr:16; 253 + unsigned int reserved4:16; 254 + } bitfields4; 255 + struct { 256 + unsigned int register_write_addr:16; 257 + unsigned int reserved5:16; 258 + } bitfields5; 259 + unsigned int ordinal3; 260 + }; 261 + 262 + union { 263 + struct { 264 + unsigned int poll_address_hi:16; 265 + unsigned int reserved6:16; 266 + } bitfields6; 267 + struct { 268 + unsigned int register_write_addr:16; 269 + unsigned int reserved7:16; 270 + } bitfields7; 271 + unsigned int ordinal4; 272 + }; 273 + 274 + unsigned int reference; 275 + 276 + unsigned int mask; 277 + 278 + union { 279 + struct { 280 + unsigned int poll_interval:16; 281 + unsigned int reserved8:16; 282 + } bitfields8; 283 + unsigned int ordinal7; 284 + }; 285 + 286 + }; 287 + #endif 288 + 289 + 290 + #endif /* KFD_PM4_HEADERS_DIQ_H_ */
+20 -4
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 128 128 unsigned int asic_family; 129 129 const struct kfd_event_interrupt_class *event_interrupt_class; 130 130 unsigned int max_pasid_bits; 131 + unsigned int max_no_of_hqd; 131 132 size_t ih_ring_entry_size; 132 133 uint8_t num_of_watch_points; 133 134 uint16_t mqd_size_aligned; ··· 168 167 169 168 const struct kfd2kgd_calls *kfd2kgd; 170 169 struct mutex doorbell_mutex; 171 - unsigned long doorbell_available_index[DIV_ROUND_UP( 172 - KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)]; 170 + DECLARE_BITMAP(doorbell_available_index, 171 + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 173 172 174 173 void *gtt_mem; 175 174 uint64_t gtt_start_gpu_addr; ··· 196 195 * from the HW ring into a SW ring. 197 196 */ 198 197 bool interrupts_active; 198 + 199 + /* Debug manager */ 200 + struct kfd_dbgmgr *dbgmgr; 199 201 }; 200 202 201 203 /* KGD2KFD callbacks */ ··· 235 231 enum kfd_preempt_type_filter { 236 232 KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE, 237 233 KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 234 + KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 238 235 KFD_PREEMPT_TYPE_FILTER_BY_PASID 239 236 }; 240 237 ··· 508 503 /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */ 509 504 struct kfd_queue **queues; 510 505 511 - unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)]; 512 - 513 506 /*Is the user space process 32 bit?*/ 514 507 bool is_32bit_user_mode; 515 508 ··· 519 516 event_pages */ 520 517 u32 next_nonsignal_event_id; 521 518 size_t signal_event_count; 519 + /* 520 + * This flag tells if we should reset all wavefronts on 521 + * process termination 522 + */ 523 + bool reset_wavefronts; 522 524 }; 523 525 524 526 /** ··· 658 650 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); 659 651 int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, 660 652 struct queue_properties *p); 653 + struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, 654 + unsigned int qid); 655 + 656 + int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 657 + unsigned int fence_value, 658 + unsigned long timeout); 661 659 662 660 /* Packet Manager */ 663 661 ··· 730 716 uint32_t *event_id, uint32_t *event_trigger_data, 731 717 uint64_t *event_page_offset, uint32_t *event_slot_index); 732 718 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); 719 + 720 + int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); 733 721 734 722 #endif
+11
drivers/gpu/drm/amd/amdkfd/kfd_process.c
··· 31 31 struct mm_struct; 32 32 33 33 #include "kfd_priv.h" 34 + #include "kfd_dbgmgr.h" 34 35 35 36 /* 36 37 * Initial size for the array of queues. ··· 173 172 pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", 174 173 pdd->dev->id, p->pasid); 175 174 175 + if (p->reset_wavefronts) 176 + dbgdev_wave_reset_wavefronts(pdd->dev, p); 177 + 176 178 amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); 177 179 list_del(&pdd->per_device_list); 178 180 ··· 305 301 if (kfd_init_apertures(process) != 0) 306 302 goto err_init_apretures; 307 303 304 + process->reset_wavefronts = false; 305 + 308 306 return process; 309 307 310 308 err_init_apretures: ··· 405 399 406 400 mutex_lock(&p->mutex); 407 401 402 + if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) 403 + kfd_dbgmgr_destroy(dev->dbgmgr); 404 + 408 405 pqm_uninit(&p->pqm); 406 + if (p->reset_wavefronts) 407 + dbgdev_wave_reset_wavefronts(dev, p); 409 408 410 409 pdd = kfd_get_process_device_data(dev, p); 411 410
+17 -1
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
··· 158 158 struct queue *q; 159 159 struct process_queue_node *pqn; 160 160 struct kernel_queue *kq; 161 + int num_queues = 0; 162 + struct queue *cur; 161 163 162 164 BUG_ON(!pqm || !dev || !properties || !qid); 163 165 ··· 172 170 if (!pdd) { 173 171 pr_err("Process device data doesn't exist\n"); 174 172 return -1; 173 + } 174 + 175 + /* 176 + * for debug process, verify that it is within the static queues limit 177 + * currently limit is set to half of the total avail HQD slots 178 + * If we are just about to create DIQ, the is_debug flag is not set yet 179 + * Hence we also check the type as well 180 + */ 181 + if ((pdd->qpd.is_debug) || 182 + (type == KFD_QUEUE_TYPE_DIQ)) { 183 + list_for_each_entry(cur, &pdd->qpd.queues_list, list) 184 + num_queues++; 185 + if (num_queues >= dev->device_info->max_no_of_hqd/2) 186 + return (-ENOSPC); 175 187 } 176 188 177 189 retval = find_available_queue_slot(pqm, qid); ··· 357 341 return 0; 358 342 } 359 343 360 - static __attribute__((unused)) struct kernel_queue *pqm_get_kernel_queue( 344 + struct kernel_queue *pqm_get_kernel_queue( 361 345 struct process_queue_manager *pqm, 362 346 unsigned int qid) 363 347 {
+21
drivers/gpu/drm/amd/include/kgd_kfd_interface.h
··· 163 163 int (*hqd_sdma_destroy)(struct kgd_dev *kgd, void *mqd, 164 164 unsigned int timeout); 165 165 166 + int (*address_watch_disable)(struct kgd_dev *kgd); 167 + int (*address_watch_execute)(struct kgd_dev *kgd, 168 + unsigned int watch_point_id, 169 + uint32_t cntl_val, 170 + uint32_t addr_hi, 171 + uint32_t addr_lo); 172 + int (*wave_control_execute)(struct kgd_dev *kgd, 173 + uint32_t gfx_index_val, 174 + uint32_t sq_cmd); 175 + uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd, 176 + unsigned int watch_point_id, 177 + unsigned int reg_offset); 178 + bool (*get_atc_vmid_pasid_mapping_valid)( 179 + struct kgd_dev *kgd, 180 + uint8_t vmid); 181 + uint16_t (*get_atc_vmid_pasid_mapping_pasid)( 182 + struct kgd_dev *kgd, 183 + uint8_t vmid); 184 + void (*write_vmid_invalidate_request)(struct kgd_dev *kgd, 185 + uint8_t vmid); 186 + 166 187 uint16_t (*get_fw_version)(struct kgd_dev *kgd, 167 188 enum kgd_engine_type type); 168 189 };
+55 -1
drivers/gpu/drm/radeon/cik_reg.h
··· 149 149 150 150 #define KFD_CIK_SDMA_QUEUE_OFFSET 0x200 151 151 152 + #define SQ_IND_INDEX 0x8DE0 153 + #define SQ_CMD 0x8DEC 154 + #define SQ_IND_DATA 0x8DE4 155 + 156 + /* 157 + * The TCP_WATCHx_xxxx addresses that are shown here are in dwords, 158 + * and that's why they are multiplied by 4 159 + */ 160 + #define TCP_WATCH0_ADDR_H (0x32A0*4) 161 + #define TCP_WATCH1_ADDR_H (0x32A3*4) 162 + #define TCP_WATCH2_ADDR_H (0x32A6*4) 163 + #define TCP_WATCH3_ADDR_H (0x32A9*4) 164 + #define TCP_WATCH0_ADDR_L (0x32A1*4) 165 + #define TCP_WATCH1_ADDR_L (0x32A4*4) 166 + #define TCP_WATCH2_ADDR_L (0x32A7*4) 167 + #define TCP_WATCH3_ADDR_L (0x32AA*4) 168 + #define TCP_WATCH0_CNTL (0x32A2*4) 169 + #define TCP_WATCH1_CNTL (0x32A5*4) 170 + #define TCP_WATCH2_CNTL (0x32A8*4) 171 + #define TCP_WATCH3_CNTL (0x32AB*4) 172 + 152 173 #define CPC_INT_CNTL 0xC2D0 153 174 154 175 #define CP_HQD_IQ_RPTR 0xC970u 155 - #define AQL_ENABLE (1U << 0) 156 176 #define SDMA0_RLC0_RB_CNTL 0xD400u 157 177 #define SDMA_RB_VMID(x) (x << 24) 158 178 #define SDMA0_RLC0_RB_BASE 0xD404u ··· 205 185 #define SDMA0_RLC0_WATERMARK 0xD4A8u 206 186 #define SDMA0_CNTL 0xD010 207 187 #define SDMA1_CNTL 0xD810 188 + 189 + enum { 190 + MAX_TRAPID = 8, /* 3 bits in the bitfield. */ 191 + MAX_WATCH_ADDRESSES = 4 192 + }; 193 + 194 + enum { 195 + ADDRESS_WATCH_REG_ADDR_HI = 0, 196 + ADDRESS_WATCH_REG_ADDR_LO, 197 + ADDRESS_WATCH_REG_CNTL, 198 + ADDRESS_WATCH_REG_MAX 199 + }; 200 + 201 + enum { /* not defined in the CI/KV reg file */ 202 + ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, 203 + ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, 204 + ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000, 205 + /* extend the mask to 26 bits in order to match the low address field */ 206 + ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, 207 + ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF 208 + }; 209 + 210 + union TCP_WATCH_CNTL_BITS { 211 + struct { 212 + uint32_t mask:24; 213 + uint32_t vmid:4; 214 + uint32_t atc:1; 215 + uint32_t mode:2; 216 + uint32_t valid:1; 217 + } bitfields, bits; 218 + uint32_t u32All; 219 + signed int i32All; 220 + float f32All; 221 + }; 208 222 209 223 #endif
+6 -3
drivers/gpu/drm/radeon/cikd.h
··· 2148 2148 #define VCE_CMD_IB_AUTO 0x00000005 2149 2149 #define VCE_CMD_SEMAPHORE 0x00000006 2150 2150 2151 - #define ATC_VMID0_PASID_MAPPING 0x339Cu 2152 - #define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u 2153 - #define ATC_VMID_PASID_MAPPING_VALID (1U << 31) 2151 + #define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u 2152 + #define ATC_VMID0_PASID_MAPPING 0x339Cu 2153 + #define ATC_VMID_PASID_MAPPING_PASID_MASK (0xFFFF) 2154 + #define ATC_VMID_PASID_MAPPING_PASID_SHIFT 0 2155 + #define ATC_VMID_PASID_MAPPING_VALID_MASK (0x1 << 31) 2156 + #define ATC_VMID_PASID_MAPPING_VALID_SHIFT 31 2154 2157 2155 2158 #define ATC_VM_APERTURE0_CNTL 0x3310u 2156 2159 #define ATS_ACCESS_MODE_NEVER 0
+149 -2
drivers/gpu/drm/radeon/radeon_kfd.c
··· 34 34 35 35 #define CIK_PIPE_PER_MEC (4) 36 36 37 + static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { 38 + TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL, 39 + TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL, 40 + TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL, 41 + TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL 42 + }; 43 + 37 44 struct kgd_mem { 38 45 struct radeon_bo *bo; 39 46 uint64_t gpu_addr; ··· 86 79 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); 87 80 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 88 81 unsigned int timeout); 82 + static int kgd_address_watch_disable(struct kgd_dev *kgd); 83 + static int kgd_address_watch_execute(struct kgd_dev *kgd, 84 + unsigned int watch_point_id, 85 + uint32_t cntl_val, 86 + uint32_t addr_hi, 87 + uint32_t addr_lo); 88 + static int kgd_wave_control_execute(struct kgd_dev *kgd, 89 + uint32_t gfx_index_val, 90 + uint32_t sq_cmd); 91 + static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, 92 + unsigned int watch_point_id, 93 + unsigned int reg_offset); 94 + 95 + static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); 96 + static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 97 + uint8_t vmid); 98 + static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); 89 99 90 100 static const struct kfd2kgd_calls kfd2kgd = { 91 101 .init_gtt_mem_allocation = alloc_gtt_mem, ··· 120 96 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, 121 97 .hqd_destroy = kgd_hqd_destroy, 122 98 .hqd_sdma_destroy = kgd_hqd_sdma_destroy, 99 + .address_watch_disable = kgd_address_watch_disable, 100 + .address_watch_execute = kgd_address_watch_execute, 101 + .wave_control_execute = kgd_wave_control_execute, 102 + .address_watch_get_offset = kgd_address_watch_get_offset, 103 + .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, 104 + .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, 105 + .write_vmid_invalidate_request = write_vmid_invalidate_request, 123 106 .get_fw_version = get_fw_version 124 107 }; 125 108 ··· 403 372 * the SW cleared it. 404 373 * So the protocol is to always wait & clear. 405 374 */ 406 - uint32_t pasid_mapping = (pasid == 0) ? 0 : 407 - (uint32_t)pasid | ATC_VMID_PASID_MAPPING_VALID; 375 + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 376 + ATC_VMID_PASID_MAPPING_VALID_MASK; 408 377 409 378 write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t), 410 379 pasid_mapping); ··· 694 663 write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_BASE, 0); 695 664 696 665 return 0; 666 + } 667 + 668 + static int kgd_address_watch_disable(struct kgd_dev *kgd) 669 + { 670 + union TCP_WATCH_CNTL_BITS cntl; 671 + unsigned int i; 672 + 673 + cntl.u32All = 0; 674 + 675 + cntl.bitfields.valid = 0; 676 + cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; 677 + cntl.bitfields.atc = 1; 678 + 679 + /* Turning off this address until we set all the registers */ 680 + for (i = 0; i < MAX_WATCH_ADDRESSES; i++) 681 + write_register(kgd, 682 + watchRegs[i * ADDRESS_WATCH_REG_MAX + 683 + ADDRESS_WATCH_REG_CNTL], 684 + cntl.u32All); 685 + 686 + return 0; 687 + } 688 + 689 + static int kgd_address_watch_execute(struct kgd_dev *kgd, 690 + unsigned int watch_point_id, 691 + uint32_t cntl_val, 692 + uint32_t addr_hi, 693 + uint32_t addr_lo) 694 + { 695 + union TCP_WATCH_CNTL_BITS cntl; 696 + 697 + cntl.u32All = cntl_val; 698 + 699 + /* Turning off this watch point until we set all the registers */ 700 + cntl.bitfields.valid = 0; 701 + write_register(kgd, 702 + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 703 + ADDRESS_WATCH_REG_CNTL], 704 + cntl.u32All); 705 + 706 + write_register(kgd, 707 + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 708 + ADDRESS_WATCH_REG_ADDR_HI], 709 + addr_hi); 710 + 711 + write_register(kgd, 712 + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 713 + ADDRESS_WATCH_REG_ADDR_LO], 714 + addr_lo); 715 + 716 + /* Enable the watch point */ 717 + cntl.bitfields.valid = 1; 718 + 719 + write_register(kgd, 720 + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 721 + ADDRESS_WATCH_REG_CNTL], 722 + cntl.u32All); 723 + 724 + return 0; 725 + } 726 + 727 + static int kgd_wave_control_execute(struct kgd_dev *kgd, 728 + uint32_t gfx_index_val, 729 + uint32_t sq_cmd) 730 + { 731 + struct radeon_device *rdev = get_radeon_device(kgd); 732 + uint32_t data; 733 + 734 + mutex_lock(&rdev->grbm_idx_mutex); 735 + 736 + write_register(kgd, GRBM_GFX_INDEX, gfx_index_val); 737 + write_register(kgd, SQ_CMD, sq_cmd); 738 + 739 + /* Restore the GRBM_GFX_INDEX register */ 740 + 741 + data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES | 742 + SE_BROADCAST_WRITES; 743 + 744 + write_register(kgd, GRBM_GFX_INDEX, data); 745 + 746 + mutex_unlock(&rdev->grbm_idx_mutex); 747 + 748 + return 0; 749 + } 750 + 751 + static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, 752 + unsigned int watch_point_id, 753 + unsigned int reg_offset) 754 + { 755 + return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; 756 + } 757 + 758 + static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid) 759 + { 760 + uint32_t reg; 761 + struct radeon_device *rdev = (struct radeon_device *) kgd; 762 + 763 + reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); 764 + return reg & ATC_VMID_PASID_MAPPING_VALID_MASK; 765 + } 766 + 767 + static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 768 + uint8_t vmid) 769 + { 770 + uint32_t reg; 771 + struct radeon_device *rdev = (struct radeon_device *) kgd; 772 + 773 + reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); 774 + return reg & ATC_VMID_PASID_MAPPING_PASID_MASK; 775 + } 776 + 777 + static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) 778 + { 779 + struct radeon_device *rdev = (struct radeon_device *) kgd; 780 + 781 + return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid); 697 782 } 698 783 699 784 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
+41 -2
include/uapi/linux/kfd_ioctl.h
··· 128 128 uint32_t pad; 129 129 }; 130 130 131 + #define MAX_ALLOWED_NUM_POINTS 100 132 + #define MAX_ALLOWED_AW_BUFF_SIZE 4096 133 + #define MAX_ALLOWED_WAC_BUFF_SIZE 128 134 + 135 + struct kfd_ioctl_dbg_register_args { 136 + uint32_t gpu_id; /* to KFD */ 137 + uint32_t pad; 138 + }; 139 + 140 + struct kfd_ioctl_dbg_unregister_args { 141 + uint32_t gpu_id; /* to KFD */ 142 + uint32_t pad; 143 + }; 144 + 145 + struct kfd_ioctl_dbg_address_watch_args { 146 + uint64_t content_ptr; /* a pointer to the actual content */ 147 + uint32_t gpu_id; /* to KFD */ 148 + uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */ 149 + }; 150 + 151 + struct kfd_ioctl_dbg_wave_control_args { 152 + uint64_t content_ptr; /* a pointer to the actual content */ 153 + uint32_t gpu_id; /* to KFD */ 154 + uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */ 155 + }; 156 + 131 157 /* Matching HSA_EVENTTYPE */ 132 158 #define KFD_IOC_EVENT_SIGNAL 0 133 159 #define KFD_IOC_EVENT_NODECHANGE 1 ··· 224 198 }; 225 199 226 200 struct kfd_ioctl_wait_events_args { 227 - uint64_t events_ptr; /* to KFD */ 201 + uint64_t events_ptr; /* pointed to struct 202 + kfd_event_data array, to KFD */ 228 203 uint32_t num_events; /* to KFD */ 229 204 uint32_t wait_for_all; /* to KFD */ 230 205 uint32_t timeout; /* to KFD */ ··· 274 247 #define AMDKFD_IOC_WAIT_EVENTS \ 275 248 AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) 276 249 250 + #define AMDKFD_IOC_DBG_REGISTER \ 251 + AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) 252 + 253 + #define AMDKFD_IOC_DBG_UNREGISTER \ 254 + AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) 255 + 256 + #define AMDKFD_IOC_DBG_ADDRESS_WATCH \ 257 + AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) 258 + 259 + #define AMDKFD_IOC_DBG_WAVE_CONTROL \ 260 + AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) 261 + 277 262 #define AMDKFD_COMMAND_START 0x01 278 - #define AMDKFD_COMMAND_END 0x0D 263 + #define AMDKFD_COMMAND_END 0x11 279 264 280 265 #endif