Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Add static user-mode queues support

This patch adds support for static user-mode queues in QCM.
Queues which are designated as static can NOT be preempted by
the CP microcode when it is executing its scheduling algorithm.

This is needed for supporting the debugger feature, because we
can't allow the CP to preempt queues which are currently being debugged.

The number of queues that can be designated as static is limited by the
number of HQDs (Hardware Queue Descriptors).

Signed-off-by: Yair Shachar <yair.shachar@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>

authored by

Yair Shachar and committed by
Oded Gabbay
992839ad aef11009

+97 -19
+2
drivers/gpu/drm/amd/amdkfd/kfd_device.c
··· 33 33 static const struct kfd_device_info kaveri_device_info = { 34 34 .asic_family = CHIP_KAVERI, 35 35 .max_pasid_bits = 16, 36 + /* max num of queues for KV.TODO should be a dynamic value */ 37 + .max_no_of_hqd = 24, 36 38 .ih_ring_entry_size = 4 * sizeof(uint32_t), 37 39 .event_interrupt_class = &event_interrupt_class_cik, 38 40 .mqd_size_aligned = MQD_SIZE_ALIGNED
+31 -7
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 45 45 struct qcm_process_device *qpd); 46 46 47 47 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); 48 - static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock); 48 + static int destroy_queues_cpsch(struct device_queue_manager *dqm, 49 + bool preempt_static_queues, bool lock); 49 50 50 51 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 51 52 struct queue *q, ··· 776 775 777 776 BUG_ON(!dqm); 778 777 779 - destroy_queues_cpsch(dqm, true); 778 + destroy_queues_cpsch(dqm, true, true); 780 779 781 780 list_for_each_entry(node, &dqm->queues, list) { 782 781 pdd = qpd_to_pdd(node->qpd); ··· 830 829 pr_debug("kfd: In %s\n", __func__); 831 830 832 831 mutex_lock(&dqm->lock); 833 - destroy_queues_cpsch(dqm, false); 832 + /* here we actually preempt the DIQ */ 833 + destroy_queues_cpsch(dqm, true, false); 834 834 list_del(&kq->list); 835 835 dqm->queue_count--; 836 836 qpd->is_debug = false; ··· 937 935 unsigned int sdma_engine) 938 936 { 939 937 return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, 940 - KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 938 + KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false, 941 939 sdma_engine); 942 940 } 943 941 944 - static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) 942 + static int destroy_queues_cpsch(struct device_queue_manager *dqm, 943 + bool preempt_static_queues, bool lock) 945 944 { 946 945 int retval; 946 + enum kfd_preempt_type_filter preempt_type; 947 947 948 948 BUG_ON(!dqm); 949 949 ··· 964 960 destroy_sdma_queues(dqm, 1); 965 961 } 966 962 963 + preempt_type = preempt_static_queues ? 964 + KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES : 965 + KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES; 966 + 967 967 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 968 - KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0); 968 + preempt_type, 0, false, 0); 969 969 if (retval != 0) 970 970 goto out; 971 971 ··· 997 989 if (lock) 998 990 mutex_lock(&dqm->lock); 999 991 1000 - retval = destroy_queues_cpsch(dqm, false); 992 + retval = destroy_queues_cpsch(dqm, false, false); 1001 993 if (retval != 0) { 1002 994 pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption"); 1003 995 goto out; ··· 1032 1024 { 1033 1025 int retval; 1034 1026 struct mqd_manager *mqd; 1027 + bool preempt_all_queues; 1035 1028 1036 1029 BUG_ON(!dqm || !qpd || !q); 1030 + 1031 + preempt_all_queues = false; 1037 1032 1038 1033 retval = 0; 1039 1034 1040 1035 /* remove queue from list to prevent rescheduling after preemption */ 1041 1036 mutex_lock(&dqm->lock); 1037 + 1038 + if (qpd->is_debug) { 1039 + /* 1040 + * error, currently we do not allow to destroy a queue 1041 + * of a currently debugged process 1042 + */ 1043 + retval = -EBUSY; 1044 + goto failed_try_destroy_debugged_queue; 1045 + 1046 + } 1047 + 1042 1048 mqd = dqm->ops.get_mqd_manager(dqm, 1043 1049 get_mqd_type_from_queue_type(q->properties.type)); 1044 1050 if (!mqd) { ··· 1084 1062 return 0; 1085 1063 1086 1064 failed: 1065 + failed_try_destroy_debugged_queue: 1066 + 1087 1067 mutex_unlock(&dqm->lock); 1088 1068 return retval; 1089 1069 }
+6
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
··· 88 88 struct queue *q, 89 89 struct qcm_process_device *qpd, 90 90 int *allocate_vmid); 91 + 91 92 int (*destroy_queue)(struct device_queue_manager *dqm, 92 93 struct qcm_process_device *qpd, 93 94 struct queue *q); 95 + 94 96 int (*update_queue)(struct device_queue_manager *dqm, 95 97 struct queue *q); 96 98 ··· 102 100 103 101 int (*register_process)(struct device_queue_manager *dqm, 104 102 struct qcm_process_device *qpd); 103 + 105 104 int (*unregister_process)(struct device_queue_manager *dqm, 106 105 struct qcm_process_device *qpd); 106 + 107 107 int (*initialize)(struct device_queue_manager *dqm); 108 108 int (*start)(struct device_queue_manager *dqm); 109 109 int (*stop)(struct device_queue_manager *dqm); ··· 113 109 int (*create_kernel_queue)(struct device_queue_manager *dqm, 114 110 struct kernel_queue *kq, 115 111 struct qcm_process_device *qpd); 112 + 116 113 void (*destroy_kernel_queue)(struct device_queue_manager *dqm, 117 114 struct kernel_queue *kq, 118 115 struct qcm_process_device *qpd); 116 + 119 117 bool (*set_cache_memory_policy)(struct device_queue_manager *dqm, 120 118 struct qcm_process_device *qpd, 121 119 enum cache_policy default_policy,
+36 -10
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
··· 163 163 num_queues = 0; 164 164 list_for_each_entry(cur, &qpd->queues_list, list) 165 165 num_queues++; 166 - packet->bitfields10.num_queues = num_queues; 166 + packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : num_queues; 167 167 168 168 packet->sh_mem_config = qpd->sh_mem_config; 169 169 packet->sh_mem_bases = qpd->sh_mem_bases; ··· 177 177 } 178 178 179 179 static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, 180 - struct queue *q) 180 + struct queue *q, bool is_static) 181 181 { 182 182 struct pm4_map_queues *packet; 183 + bool use_static = is_static; 183 184 184 185 BUG_ON(!pm || !buffer || !q); 185 186 ··· 210 209 case KFD_QUEUE_TYPE_SDMA: 211 210 packet->bitfields2.engine_sel = 212 211 engine_sel__mes_map_queues__sdma0; 212 + use_static = false; /* no static queues under SDMA */ 213 213 break; 214 214 default: 215 215 BUG(); ··· 219 217 220 218 packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset = 221 219 q->properties.doorbell_off; 220 + 221 + packet->mes_map_queues_ordinals[0].bitfields3.is_static = 222 + (use_static == true) ? 1 : 0; 222 223 223 224 packet->mes_map_queues_ordinals[0].mqd_addr_lo = 224 225 lower_32_bits(q->gart_mqd_addr); ··· 276 271 pm_release_ib(pm); 277 272 return -ENOMEM; 278 273 } 274 + 279 275 retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd); 280 276 if (retval != 0) 281 277 return retval; 278 + 282 279 proccesses_mapped++; 283 280 inc_wptr(&rl_wptr, sizeof(struct pm4_map_process), 284 281 alloc_size_bytes); ··· 288 281 list_for_each_entry(kq, &qpd->priv_queue_list, list) { 289 282 if (kq->queue->properties.is_active != true) 290 283 continue; 284 + 285 + pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n", 286 + kq->queue->queue, qpd->is_debug); 287 + 291 288 retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], 292 - kq->queue); 289 + kq->queue, qpd->is_debug); 293 290 if (retval != 0) 294 291 return retval; 295 - inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues), 296 - alloc_size_bytes); 292 + 293 + inc_wptr(&rl_wptr, 294 + sizeof(struct pm4_map_queues), 295 + alloc_size_bytes); 297 296 } 298 297 299 298 list_for_each_entry(q, &qpd->queues_list, list) { 300 299 if (q->properties.is_active != true) 301 300 continue; 302 - retval = pm_create_map_queue(pm, 303 - &rl_buffer[rl_wptr], q); 301 + 302 + pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n", 303 + q->queue, qpd->is_debug); 304 + 305 + retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], 306 + q, qpd->is_debug); 307 + 304 308 if (retval != 0) 305 309 return retval; 306 - inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues), 307 - alloc_size_bytes); 310 + 311 + inc_wptr(&rl_wptr, 312 + sizeof(struct pm4_map_queues), 313 + alloc_size_bytes); 308 314 } 309 315 } 310 316 ··· 508 488 509 489 packet = (struct pm4_unmap_queues *)buffer; 510 490 memset(buffer, 0, sizeof(struct pm4_unmap_queues)); 511 - 491 + pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n", 492 + mode, reset, type); 512 493 packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES, 513 494 sizeof(struct pm4_unmap_queues)); 514 495 switch (type) { ··· 549 528 case KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES: 550 529 packet->bitfields2.queue_sel = 551 530 queue_sel__mes_unmap_queues__perform_request_on_all_active_queues; 531 + break; 532 + case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES: 533 + /* in this case, we do not preempt static queues */ 534 + packet->bitfields2.queue_sel = 535 + queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only; 552 536 break; 553 537 default: 554 538 BUG();
+4 -2
drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
··· 237 237 struct { 238 238 union { 239 239 struct { 240 - uint32_t reserved5:2; 240 + uint32_t is_static:1; 241 + uint32_t reserved5:1; 241 242 uint32_t doorbell_offset:21; 242 243 uint32_t reserved6:3; 243 244 uint32_t queue:6; ··· 329 328 enum unmap_queues_queue_sel_enum { 330 329 queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0, 331 330 queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1, 332 - queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2 331 + queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2, 332 + queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3 333 333 }; 334 334 335 335 enum unmap_queues_engine_sel_enum {
+2
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 128 128 unsigned int asic_family; 129 129 const struct kfd_event_interrupt_class *event_interrupt_class; 130 130 unsigned int max_pasid_bits; 131 + unsigned int max_no_of_hqd; 131 132 size_t ih_ring_entry_size; 132 133 uint8_t num_of_watch_points; 133 134 uint16_t mqd_size_aligned; ··· 232 231 enum kfd_preempt_type_filter { 233 232 KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE, 234 233 KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 234 + KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 235 235 KFD_PREEMPT_TYPE_FILTER_BY_PASID 236 236 }; 237 237
+16
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
··· 158 158 struct queue *q; 159 159 struct process_queue_node *pqn; 160 160 struct kernel_queue *kq; 161 + int num_queues = 0; 162 + struct queue *cur; 161 163 162 164 BUG_ON(!pqm || !dev || !properties || !qid); 163 165 ··· 172 170 if (!pdd) { 173 171 pr_err("Process device data doesn't exist\n"); 174 172 return -1; 173 + } 174 + 175 + /* 176 + * for debug process, verify that it is within the static queues limit 177 + * currently limit is set to half of the total avail HQD slots 178 + * If we are just about to create DIQ, the is_debug flag is not set yet 179 + * Hence we also check the type as well 180 + */ 181 + if ((pdd->qpd.is_debug) || 182 + (type == KFD_QUEUE_TYPE_DIQ)) { 183 + list_for_each_entry(cur, &pdd->qpd.queues_list, list) 184 + num_queues++; 185 + if (num_queues >= dev->device_info->max_no_of_hqd/2) 186 + return (-ENOSPC); 175 187 } 176 188 177 189 retval = find_available_queue_slot(pqm, qid);