drivers/gpu/drm/xe/xe_guc_submit.c at v6.10-rc3

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / drivers / gpu / drm / xe / xe_guc_submit.c
at v6.10-rc3 1970 lines 53 kB view raw
wrap content
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2022 Intel Corporation
   4 */
   5
   6#include "xe_guc_submit.h"
   7
   8#include <linux/bitfield.h>
   9#include <linux/bitmap.h>
  10#include <linux/circ_buf.h>
  11#include <linux/delay.h>
  12#include <linux/dma-fence-array.h>
  13
  14#include <drm/drm_managed.h>
  15
  16#include "abi/guc_actions_abi.h"
  17#include "abi/guc_klvs_abi.h"
  18#include "regs/xe_lrc_layout.h"
  19#include "xe_assert.h"
  20#include "xe_devcoredump.h"
  21#include "xe_device.h"
  22#include "xe_exec_queue.h"
  23#include "xe_force_wake.h"
  24#include "xe_gpu_scheduler.h"
  25#include "xe_gt.h"
  26#include "xe_gt_printk.h"
  27#include "xe_guc.h"
  28#include "xe_guc_ct.h"
  29#include "xe_guc_exec_queue_types.h"
  30#include "xe_guc_id_mgr.h"
  31#include "xe_guc_submit_types.h"
  32#include "xe_hw_engine.h"
  33#include "xe_hw_fence.h"
  34#include "xe_lrc.h"
  35#include "xe_macros.h"
  36#include "xe_map.h"
  37#include "xe_mocs.h"
  38#include "xe_ring_ops_types.h"
  39#include "xe_sched_job.h"
  40#include "xe_trace.h"
  41#include "xe_vm.h"
  42
  43static struct xe_guc *
  44exec_queue_to_guc(struct xe_exec_queue *q)
  45{
  46	return &q->gt->uc.guc;
  47}
  48
  49/*
  50 * Helpers for engine state, using an atomic as some of the bits can transition
  51 * as the same time (e.g. a suspend can be happning at the same time as schedule
  52 * engine done being processed).
  53 */
  54#define EXEC_QUEUE_STATE_REGISTERED		(1 << 0)
  55#define ENGINE_STATE_ENABLED		(1 << 1)
  56#define EXEC_QUEUE_STATE_PENDING_ENABLE	(1 << 2)
  57#define EXEC_QUEUE_STATE_PENDING_DISABLE	(1 << 3)
  58#define EXEC_QUEUE_STATE_DESTROYED		(1 << 4)
  59#define ENGINE_STATE_SUSPENDED		(1 << 5)
  60#define EXEC_QUEUE_STATE_RESET		(1 << 6)
  61#define ENGINE_STATE_KILLED		(1 << 7)
  62
  63static bool exec_queue_registered(struct xe_exec_queue *q)
  64{
  65	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED;
  66}
  67
  68static void set_exec_queue_registered(struct xe_exec_queue *q)
  69{
  70	atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
  71}
  72
  73static void clear_exec_queue_registered(struct xe_exec_queue *q)
  74{
  75	atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
  76}
  77
  78static bool exec_queue_enabled(struct xe_exec_queue *q)
  79{
  80	return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED;
  81}
  82
  83static void set_exec_queue_enabled(struct xe_exec_queue *q)
  84{
  85	atomic_or(ENGINE_STATE_ENABLED, &q->guc->state);
  86}
  87
  88static void clear_exec_queue_enabled(struct xe_exec_queue *q)
  89{
  90	atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state);
  91}
  92
  93static bool exec_queue_pending_enable(struct xe_exec_queue *q)
  94{
  95	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE;
  96}
  97
  98static void set_exec_queue_pending_enable(struct xe_exec_queue *q)
  99{
 100	atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
 101}
 102
 103static void clear_exec_queue_pending_enable(struct xe_exec_queue *q)
 104{
 105	atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
 106}
 107
 108static bool exec_queue_pending_disable(struct xe_exec_queue *q)
 109{
 110	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE;
 111}
 112
 113static void set_exec_queue_pending_disable(struct xe_exec_queue *q)
 114{
 115	atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
 116}
 117
 118static void clear_exec_queue_pending_disable(struct xe_exec_queue *q)
 119{
 120	atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
 121}
 122
 123static bool exec_queue_destroyed(struct xe_exec_queue *q)
 124{
 125	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED;
 126}
 127
 128static void set_exec_queue_destroyed(struct xe_exec_queue *q)
 129{
 130	atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
 131}
 132
 133static bool exec_queue_banned(struct xe_exec_queue *q)
 134{
 135	return (q->flags & EXEC_QUEUE_FLAG_BANNED);
 136}
 137
 138static void set_exec_queue_banned(struct xe_exec_queue *q)
 139{
 140	q->flags |= EXEC_QUEUE_FLAG_BANNED;
 141}
 142
 143static bool exec_queue_suspended(struct xe_exec_queue *q)
 144{
 145	return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED;
 146}
 147
 148static void set_exec_queue_suspended(struct xe_exec_queue *q)
 149{
 150	atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state);
 151}
 152
 153static void clear_exec_queue_suspended(struct xe_exec_queue *q)
 154{
 155	atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state);
 156}
 157
 158static bool exec_queue_reset(struct xe_exec_queue *q)
 159{
 160	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET;
 161}
 162
 163static void set_exec_queue_reset(struct xe_exec_queue *q)
 164{
 165	atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state);
 166}
 167
 168static bool exec_queue_killed(struct xe_exec_queue *q)
 169{
 170	return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED;
 171}
 172
 173static void set_exec_queue_killed(struct xe_exec_queue *q)
 174{
 175	atomic_or(ENGINE_STATE_KILLED, &q->guc->state);
 176}
 177
 178static bool exec_queue_killed_or_banned(struct xe_exec_queue *q)
 179{
 180	return exec_queue_killed(q) || exec_queue_banned(q);
 181}
 182
 183#ifdef CONFIG_PROVE_LOCKING
 184static int alloc_submit_wq(struct xe_guc *guc)
 185{
 186	int i;
 187
 188	for (i = 0; i < NUM_SUBMIT_WQ; ++i) {
 189		guc->submission_state.submit_wq_pool[i] =
 190			alloc_ordered_workqueue("submit_wq", 0);
 191		if (!guc->submission_state.submit_wq_pool[i])
 192			goto err_free;
 193	}
 194
 195	return 0;
 196
 197err_free:
 198	while (i)
 199		destroy_workqueue(guc->submission_state.submit_wq_pool[--i]);
 200
 201	return -ENOMEM;
 202}
 203
 204static void free_submit_wq(struct xe_guc *guc)
 205{
 206	int i;
 207
 208	for (i = 0; i < NUM_SUBMIT_WQ; ++i)
 209		destroy_workqueue(guc->submission_state.submit_wq_pool[i]);
 210}
 211
 212static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
 213{
 214	int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ;
 215
 216	return guc->submission_state.submit_wq_pool[idx];
 217}
 218#else
 219static int alloc_submit_wq(struct xe_guc *guc)
 220{
 221	return 0;
 222}
 223
 224static void free_submit_wq(struct xe_guc *guc)
 225{
 226
 227}
 228
 229static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
 230{
 231	return NULL;
 232}
 233#endif
 234
 235static void guc_submit_fini(struct drm_device *drm, void *arg)
 236{
 237	struct xe_guc *guc = arg;
 238
 239	xa_destroy(&guc->submission_state.exec_queue_lookup);
 240	free_submit_wq(guc);
 241}
 242
 243static const struct xe_exec_queue_ops guc_exec_queue_ops;
 244
 245static void primelockdep(struct xe_guc *guc)
 246{
 247	if (!IS_ENABLED(CONFIG_LOCKDEP))
 248		return;
 249
 250	fs_reclaim_acquire(GFP_KERNEL);
 251
 252	mutex_lock(&guc->submission_state.lock);
 253	might_lock(&guc->submission_state.suspend.lock);
 254	mutex_unlock(&guc->submission_state.lock);
 255
 256	fs_reclaim_release(GFP_KERNEL);
 257}
 258
 259int xe_guc_submit_init(struct xe_guc *guc)
 260{
 261	struct xe_device *xe = guc_to_xe(guc);
 262	struct xe_gt *gt = guc_to_gt(guc);
 263	int err;
 264
 265	err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock);
 266	if (err)
 267		return err;
 268
 269	err = xe_guc_id_mgr_init(&guc->submission_state.idm, ~0);
 270	if (err)
 271		return err;
 272
 273	err = alloc_submit_wq(guc);
 274	if (err)
 275		return err;
 276
 277	gt->exec_queue_ops = &guc_exec_queue_ops;
 278
 279	xa_init(&guc->submission_state.exec_queue_lookup);
 280
 281	spin_lock_init(&guc->submission_state.suspend.lock);
 282	guc->submission_state.suspend.context = dma_fence_context_alloc(1);
 283
 284	primelockdep(guc);
 285
 286	return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
 287}
 288
 289static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
 290{
 291	int i;
 292
 293	lockdep_assert_held(&guc->submission_state.lock);
 294
 295	for (i = 0; i < xa_count; ++i)
 296		xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
 297
 298	xe_guc_id_mgr_release_locked(&guc->submission_state.idm,
 299				     q->guc->id, q->width);
 300}
 301
 302static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 303{
 304	int ret;
 305	void *ptr;
 306	int i;
 307
 308	/*
 309	 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
 310	 * worse case user gets -ENOMEM on engine create and has to try again.
 311	 *
 312	 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
 313	 * failure.
 314	 */
 315	lockdep_assert_held(&guc->submission_state.lock);
 316
 317	ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm,
 318					   q->width);
 319	if (ret < 0)
 320		return ret;
 321
 322	q->guc->id = ret;
 323
 324	for (i = 0; i < q->width; ++i) {
 325		ptr = xa_store(&guc->submission_state.exec_queue_lookup,
 326			       q->guc->id + i, q, GFP_NOWAIT);
 327		if (IS_ERR(ptr)) {
 328			ret = PTR_ERR(ptr);
 329			goto err_release;
 330		}
 331	}
 332
 333	return 0;
 334
 335err_release:
 336	__release_guc_id(guc, q, i);
 337
 338	return ret;
 339}
 340
 341static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 342{
 343	mutex_lock(&guc->submission_state.lock);
 344	__release_guc_id(guc, q, q->width);
 345	mutex_unlock(&guc->submission_state.lock);
 346}
 347
 348struct exec_queue_policy {
 349	u32 count;
 350	struct guc_update_exec_queue_policy h2g;
 351};
 352
 353static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy)
 354{
 355	size_t bytes = sizeof(policy->h2g.header) +
 356		       (sizeof(policy->h2g.klv[0]) * policy->count);
 357
 358	return bytes / sizeof(u32);
 359}
 360
 361static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy,
 362					      u16 guc_id)
 363{
 364	policy->h2g.header.action =
 365		XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
 366	policy->h2g.header.guc_id = guc_id;
 367	policy->count = 0;
 368}
 369
 370#define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \
 371static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \
 372					   u32 data) \
 373{ \
 374	XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
 375\
 376	policy->h2g.klv[policy->count].kl = \
 377		FIELD_PREP(GUC_KLV_0_KEY, \
 378			   GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
 379		FIELD_PREP(GUC_KLV_0_LEN, 1); \
 380	policy->h2g.klv[policy->count].value = data; \
 381	policy->count++; \
 382}
 383
 384MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
 385MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
 386MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
 387#undef MAKE_EXEC_QUEUE_POLICY_ADD
 388
 389static const int xe_exec_queue_prio_to_guc[] = {
 390	[XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
 391	[XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
 392	[XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
 393	[XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
 394};
 395
 396static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
 397{
 398	struct exec_queue_policy policy;
 399	struct xe_device *xe = guc_to_xe(guc);
 400	enum xe_exec_queue_priority prio = q->sched_props.priority;
 401	u32 timeslice_us = q->sched_props.timeslice_us;
 402	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
 403
 404	xe_assert(xe, exec_queue_registered(q));
 405
 406	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
 407	__guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
 408	__guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us);
 409	__guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us);
 410
 411	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
 412		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
 413}
 414
 415static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q)
 416{
 417	struct exec_queue_policy policy;
 418
 419	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
 420	__guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
 421
 422	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
 423		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
 424}
 425
 426#define parallel_read(xe_, map_, field_) \
 427	xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
 428			field_)
 429#define parallel_write(xe_, map_, field_, val_) \
 430	xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
 431			field_, val_)
 432
 433static void __register_mlrc_engine(struct xe_guc *guc,
 434				   struct xe_exec_queue *q,
 435				   struct guc_ctxt_registration_info *info)
 436{
 437#define MAX_MLRC_REG_SIZE      (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
 438	struct xe_device *xe = guc_to_xe(guc);
 439	u32 action[MAX_MLRC_REG_SIZE];
 440	int len = 0;
 441	int i;
 442
 443	xe_assert(xe, xe_exec_queue_is_parallel(q));
 444
 445	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
 446	action[len++] = info->flags;
 447	action[len++] = info->context_idx;
 448	action[len++] = info->engine_class;
 449	action[len++] = info->engine_submit_mask;
 450	action[len++] = info->wq_desc_lo;
 451	action[len++] = info->wq_desc_hi;
 452	action[len++] = info->wq_base_lo;
 453	action[len++] = info->wq_base_hi;
 454	action[len++] = info->wq_size;
 455	action[len++] = q->width;
 456	action[len++] = info->hwlrca_lo;
 457	action[len++] = info->hwlrca_hi;
 458
 459	for (i = 1; i < q->width; ++i) {
 460		struct xe_lrc *lrc = q->lrc + i;
 461
 462		action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
 463		action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
 464	}
 465
 466	xe_assert(xe, len <= MAX_MLRC_REG_SIZE);
 467#undef MAX_MLRC_REG_SIZE
 468
 469	xe_guc_ct_send(&guc->ct, action, len, 0, 0);
 470}
 471
 472static void __register_engine(struct xe_guc *guc,
 473			      struct guc_ctxt_registration_info *info)
 474{
 475	u32 action[] = {
 476		XE_GUC_ACTION_REGISTER_CONTEXT,
 477		info->flags,
 478		info->context_idx,
 479		info->engine_class,
 480		info->engine_submit_mask,
 481		info->wq_desc_lo,
 482		info->wq_desc_hi,
 483		info->wq_base_lo,
 484		info->wq_base_hi,
 485		info->wq_size,
 486		info->hwlrca_lo,
 487		info->hwlrca_hi,
 488	};
 489
 490	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
 491}
 492
 493static void register_engine(struct xe_exec_queue *q)
 494{
 495	struct xe_guc *guc = exec_queue_to_guc(q);
 496	struct xe_device *xe = guc_to_xe(guc);
 497	struct xe_lrc *lrc = q->lrc;
 498	struct guc_ctxt_registration_info info;
 499
 500	xe_assert(xe, !exec_queue_registered(q));
 501
 502	memset(&info, 0, sizeof(info));
 503	info.context_idx = q->guc->id;
 504	info.engine_class = xe_engine_class_to_guc_class(q->class);
 505	info.engine_submit_mask = q->logical_mask;
 506	info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
 507	info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
 508	info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
 509
 510	if (xe_exec_queue_is_parallel(q)) {
 511		u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
 512		struct iosys_map map = xe_lrc_parallel_map(lrc);
 513
 514		info.wq_desc_lo = lower_32_bits(ggtt_addr +
 515			offsetof(struct guc_submit_parallel_scratch, wq_desc));
 516		info.wq_desc_hi = upper_32_bits(ggtt_addr +
 517			offsetof(struct guc_submit_parallel_scratch, wq_desc));
 518		info.wq_base_lo = lower_32_bits(ggtt_addr +
 519			offsetof(struct guc_submit_parallel_scratch, wq[0]));
 520		info.wq_base_hi = upper_32_bits(ggtt_addr +
 521			offsetof(struct guc_submit_parallel_scratch, wq[0]));
 522		info.wq_size = WQ_SIZE;
 523
 524		q->guc->wqi_head = 0;
 525		q->guc->wqi_tail = 0;
 526		xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
 527		parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
 528	}
 529
 530	/*
 531	 * We must keep a reference for LR engines if engine is registered with
 532	 * the GuC as jobs signal immediately and can't destroy an engine if the
 533	 * GuC has a reference to it.
 534	 */
 535	if (xe_exec_queue_is_lr(q))
 536		xe_exec_queue_get(q);
 537
 538	set_exec_queue_registered(q);
 539	trace_xe_exec_queue_register(q);
 540	if (xe_exec_queue_is_parallel(q))
 541		__register_mlrc_engine(guc, q, &info);
 542	else
 543		__register_engine(guc, &info);
 544	init_policies(guc, q);
 545}
 546
 547static u32 wq_space_until_wrap(struct xe_exec_queue *q)
 548{
 549	return (WQ_SIZE - q->guc->wqi_tail);
 550}
 551
 552static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
 553{
 554	struct xe_guc *guc = exec_queue_to_guc(q);
 555	struct xe_device *xe = guc_to_xe(guc);
 556	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
 557	unsigned int sleep_period_ms = 1;
 558
 559#define AVAILABLE_SPACE \
 560	CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
 561	if (wqi_size > AVAILABLE_SPACE) {
 562try_again:
 563		q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
 564		if (wqi_size > AVAILABLE_SPACE) {
 565			if (sleep_period_ms == 1024) {
 566				xe_gt_reset_async(q->gt);
 567				return -ENODEV;
 568			}
 569
 570			msleep(sleep_period_ms);
 571			sleep_period_ms <<= 1;
 572			goto try_again;
 573		}
 574	}
 575#undef AVAILABLE_SPACE
 576
 577	return 0;
 578}
 579
 580static int wq_noop_append(struct xe_exec_queue *q)
 581{
 582	struct xe_guc *guc = exec_queue_to_guc(q);
 583	struct xe_device *xe = guc_to_xe(guc);
 584	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
 585	u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
 586
 587	if (wq_wait_for_space(q, wq_space_until_wrap(q)))
 588		return -ENODEV;
 589
 590	xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw));
 591
 592	parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
 593		       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
 594		       FIELD_PREP(WQ_LEN_MASK, len_dw));
 595	q->guc->wqi_tail = 0;
 596
 597	return 0;
 598}
 599
 600static void wq_item_append(struct xe_exec_queue *q)
 601{
 602	struct xe_guc *guc = exec_queue_to_guc(q);
 603	struct xe_device *xe = guc_to_xe(guc);
 604	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
 605#define WQ_HEADER_SIZE	4	/* Includes 1 LRC address too */
 606	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
 607	u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
 608	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
 609	int i = 0, j;
 610
 611	if (wqi_size > wq_space_until_wrap(q)) {
 612		if (wq_noop_append(q))
 613			return;
 614	}
 615	if (wq_wait_for_space(q, wqi_size))
 616		return;
 617
 618	wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
 619		FIELD_PREP(WQ_LEN_MASK, len_dw);
 620	wqi[i++] = xe_lrc_descriptor(q->lrc);
 621	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
 622		FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64));
 623	wqi[i++] = 0;
 624	for (j = 1; j < q->width; ++j) {
 625		struct xe_lrc *lrc = q->lrc + j;
 626
 627		wqi[i++] = lrc->ring.tail / sizeof(u64);
 628	}
 629
 630	xe_assert(xe, i == wqi_size / sizeof(u32));
 631
 632	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
 633				      wq[q->guc->wqi_tail / sizeof(u32)]));
 634	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
 635	q->guc->wqi_tail += wqi_size;
 636	xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE);
 637
 638	xe_device_wmb(xe);
 639
 640	map = xe_lrc_parallel_map(q->lrc);
 641	parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
 642}
 643
 644#define RESUME_PENDING	~0x0ull
 645static void submit_exec_queue(struct xe_exec_queue *q)
 646{
 647	struct xe_guc *guc = exec_queue_to_guc(q);
 648	struct xe_device *xe = guc_to_xe(guc);
 649	struct xe_lrc *lrc = q->lrc;
 650	u32 action[3];
 651	u32 g2h_len = 0;
 652	u32 num_g2h = 0;
 653	int len = 0;
 654	bool extra_submit = false;
 655
 656	xe_assert(xe, exec_queue_registered(q));
 657
 658	if (xe_exec_queue_is_parallel(q))
 659		wq_item_append(q);
 660	else
 661		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
 662
 663	if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
 664		return;
 665
 666	if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
 667		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
 668		action[len++] = q->guc->id;
 669		action[len++] = GUC_CONTEXT_ENABLE;
 670		g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
 671		num_g2h = 1;
 672		if (xe_exec_queue_is_parallel(q))
 673			extra_submit = true;
 674
 675		q->guc->resume_time = RESUME_PENDING;
 676		set_exec_queue_pending_enable(q);
 677		set_exec_queue_enabled(q);
 678		trace_xe_exec_queue_scheduling_enable(q);
 679	} else {
 680		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
 681		action[len++] = q->guc->id;
 682		trace_xe_exec_queue_submit(q);
 683	}
 684
 685	xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
 686
 687	if (extra_submit) {
 688		len = 0;
 689		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
 690		action[len++] = q->guc->id;
 691		trace_xe_exec_queue_submit(q);
 692
 693		xe_guc_ct_send(&guc->ct, action, len, 0, 0);
 694	}
 695}
 696
 697static struct dma_fence *
 698guc_exec_queue_run_job(struct drm_sched_job *drm_job)
 699{
 700	struct xe_sched_job *job = to_xe_sched_job(drm_job);
 701	struct xe_exec_queue *q = job->q;
 702	struct xe_guc *guc = exec_queue_to_guc(q);
 703	struct xe_device *xe = guc_to_xe(guc);
 704	bool lr = xe_exec_queue_is_lr(q);
 705
 706	xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
 707		  exec_queue_banned(q) || exec_queue_suspended(q));
 708
 709	trace_xe_sched_job_run(job);
 710
 711	if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) {
 712		if (!exec_queue_registered(q))
 713			register_engine(q);
 714		if (!lr)	/* LR jobs are emitted in the exec IOCTL */
 715			q->ring_ops->emit_job(job);
 716		submit_exec_queue(q);
 717	}
 718
 719	if (lr) {
 720		xe_sched_job_set_error(job, -EOPNOTSUPP);
 721		return NULL;
 722	} else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) {
 723		return job->fence;
 724	} else {
 725		return dma_fence_get(job->fence);
 726	}
 727}
 728
 729static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
 730{
 731	struct xe_sched_job *job = to_xe_sched_job(drm_job);
 732
 733	trace_xe_sched_job_free(job);
 734	xe_sched_job_put(job);
 735}
 736
 737static int guc_read_stopped(struct xe_guc *guc)
 738{
 739	return atomic_read(&guc->submission_state.stopped);
 740}
 741
 742#define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable)			\
 743	u32 action[] = {						\
 744		XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET,			\
 745		q->guc->id,						\
 746		GUC_CONTEXT_##enable_disable,				\
 747	}
 748
 749static void disable_scheduling_deregister(struct xe_guc *guc,
 750					  struct xe_exec_queue *q)
 751{
 752	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
 753	struct xe_device *xe = guc_to_xe(guc);
 754	int ret;
 755
 756	set_min_preemption_timeout(guc, q);
 757	smp_rmb();
 758	ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) ||
 759				 guc_read_stopped(guc), HZ * 5);
 760	if (!ret) {
 761		struct xe_gpu_scheduler *sched = &q->guc->sched;
 762
 763		drm_warn(&xe->drm, "Pending enable failed to respond");
 764		xe_sched_submission_start(sched);
 765		xe_gt_reset_async(q->gt);
 766		xe_sched_tdr_queue_imm(sched);
 767		return;
 768	}
 769
 770	clear_exec_queue_enabled(q);
 771	set_exec_queue_pending_disable(q);
 772	set_exec_queue_destroyed(q);
 773	trace_xe_exec_queue_scheduling_disable(q);
 774
 775	/*
 776	 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
 777	 * handler and we are not allowed to reserved G2H space in handlers.
 778	 */
 779	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
 780		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
 781		       G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
 782}
 783
 784static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p);
 785
 786#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
 787static void simple_error_capture(struct xe_exec_queue *q)
 788{
 789	struct xe_guc *guc = exec_queue_to_guc(q);
 790	struct xe_device *xe = guc_to_xe(guc);
 791	struct drm_printer p = drm_err_printer(&xe->drm, NULL);
 792	struct xe_hw_engine *hwe;
 793	enum xe_hw_engine_id id;
 794	u32 adj_logical_mask = q->logical_mask;
 795	u32 width_mask = (0x1 << q->width) - 1;
 796	int i;
 797	bool cookie;
 798
 799	if (q->vm && !q->vm->error_capture.capture_once) {
 800		q->vm->error_capture.capture_once = true;
 801		cookie = dma_fence_begin_signalling();
 802		for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
 803			if (adj_logical_mask & BIT(i)) {
 804				adj_logical_mask |= width_mask << i;
 805				i += q->width;
 806			} else {
 807				++i;
 808			}
 809		}
 810
 811		if (xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL))
 812			xe_gt_info(guc_to_gt(guc),
 813				   "failed to get forcewake for error capture");
 814		xe_guc_ct_print(&guc->ct, &p, true);
 815		guc_exec_queue_print(q, &p);
 816		for_each_hw_engine(hwe, guc_to_gt(guc), id) {
 817			if (hwe->class != q->hwe->class ||
 818			    !(BIT(hwe->logical_instance) & adj_logical_mask))
 819				continue;
 820			xe_hw_engine_print(hwe, &p);
 821		}
 822		xe_analyze_vm(&p, q->vm, q->gt->info.id);
 823		xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
 824		dma_fence_end_signalling(cookie);
 825	}
 826}
 827#else
 828static void simple_error_capture(struct xe_exec_queue *q)
 829{
 830}
 831#endif
 832
 833static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
 834{
 835	struct xe_guc *guc = exec_queue_to_guc(q);
 836	struct xe_device *xe = guc_to_xe(guc);
 837
 838	/** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
 839	wake_up_all(&xe->ufence_wq);
 840
 841	if (xe_exec_queue_is_lr(q))
 842		queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
 843	else
 844		xe_sched_tdr_queue_imm(&q->guc->sched);
 845}
 846
 847static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
 848{
 849	struct xe_guc_exec_queue *ge =
 850		container_of(w, struct xe_guc_exec_queue, lr_tdr);
 851	struct xe_exec_queue *q = ge->q;
 852	struct xe_guc *guc = exec_queue_to_guc(q);
 853	struct xe_device *xe = guc_to_xe(guc);
 854	struct xe_gpu_scheduler *sched = &ge->sched;
 855
 856	xe_assert(xe, xe_exec_queue_is_lr(q));
 857	trace_xe_exec_queue_lr_cleanup(q);
 858
 859	/* Kill the run_job / process_msg entry points */
 860	xe_sched_submission_stop(sched);
 861
 862	/*
 863	 * Engine state now mostly stable, disable scheduling / deregister if
 864	 * needed. This cleanup routine might be called multiple times, where
 865	 * the actual async engine deregister drops the final engine ref.
 866	 * Calling disable_scheduling_deregister will mark the engine as
 867	 * destroyed and fire off the CT requests to disable scheduling /
 868	 * deregister, which we only want to do once. We also don't want to mark
 869	 * the engine as pending_disable again as this may race with the
 870	 * xe_guc_deregister_done_handler() which treats it as an unexpected
 871	 * state.
 872	 */
 873	if (exec_queue_registered(q) && !exec_queue_destroyed(q)) {
 874		struct xe_guc *guc = exec_queue_to_guc(q);
 875		int ret;
 876
 877		set_exec_queue_banned(q);
 878		disable_scheduling_deregister(guc, q);
 879
 880		/*
 881		 * Must wait for scheduling to be disabled before signalling
 882		 * any fences, if GT broken the GT reset code should signal us.
 883		 */
 884		ret = wait_event_timeout(guc->ct.wq,
 885					 !exec_queue_pending_disable(q) ||
 886					 guc_read_stopped(guc), HZ * 5);
 887		if (!ret) {
 888			drm_warn(&xe->drm, "Schedule disable failed to respond");
 889			xe_sched_submission_start(sched);
 890			xe_gt_reset_async(q->gt);
 891			return;
 892		}
 893	}
 894
 895	xe_sched_submission_start(sched);
 896}
 897
 898static enum drm_gpu_sched_stat
 899guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 900{
 901	struct xe_sched_job *job = to_xe_sched_job(drm_job);
 902	struct xe_sched_job *tmp_job;
 903	struct xe_exec_queue *q = job->q;
 904	struct xe_gpu_scheduler *sched = &q->guc->sched;
 905	struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q));
 906	int err = -ETIME;
 907	int i = 0;
 908
 909	/*
 910	 * TDR has fired before free job worker. Common if exec queue
 911	 * immediately closed after last fence signaled.
 912	 */
 913	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
 914		guc_exec_queue_free_job(drm_job);
 915
 916		return DRM_GPU_SCHED_STAT_NOMINAL;
 917	}
 918
 919	drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
 920		   xe_sched_job_seqno(job), q->guc->id, q->flags);
 921	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
 922		   "Kernel-submitted job timed out\n");
 923	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
 924		   "VM job timed out on non-killed execqueue\n");
 925
 926	simple_error_capture(q);
 927	xe_devcoredump(job);
 928
 929	trace_xe_sched_job_timedout(job);
 930
 931	/* Kill the run_job entry point */
 932	xe_sched_submission_stop(sched);
 933
 934	/*
 935	 * Kernel jobs should never fail, nor should VM jobs if they do
 936	 * somethings has gone wrong and the GT needs a reset
 937	 */
 938	if (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
 939	    (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) {
 940		if (!xe_sched_invalidate_job(job, 2)) {
 941			xe_sched_add_pending_job(sched, job);
 942			xe_sched_submission_start(sched);
 943			xe_gt_reset_async(q->gt);
 944			goto out;
 945		}
 946	}
 947
 948	/* Engine state now stable, disable scheduling if needed */
 949	if (exec_queue_registered(q)) {
 950		struct xe_guc *guc = exec_queue_to_guc(q);
 951		int ret;
 952
 953		if (exec_queue_reset(q))
 954			err = -EIO;
 955		set_exec_queue_banned(q);
 956		if (!exec_queue_destroyed(q)) {
 957			xe_exec_queue_get(q);
 958			disable_scheduling_deregister(guc, q);
 959		}
 960
 961		/*
 962		 * Must wait for scheduling to be disabled before signalling
 963		 * any fences, if GT broken the GT reset code should signal us.
 964		 *
 965		 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
 966		 * error) messages which can cause the schedule disable to get
 967		 * lost. If this occurs, trigger a GT reset to recover.
 968		 */
 969		smp_rmb();
 970		ret = wait_event_timeout(guc->ct.wq,
 971					 !exec_queue_pending_disable(q) ||
 972					 guc_read_stopped(guc), HZ * 5);
 973		if (!ret || guc_read_stopped(guc)) {
 974			drm_warn(&xe->drm, "Schedule disable failed to respond");
 975			xe_sched_add_pending_job(sched, job);
 976			xe_sched_submission_start(sched);
 977			xe_gt_reset_async(q->gt);
 978			xe_sched_tdr_queue_imm(sched);
 979			goto out;
 980		}
 981	}
 982
 983	/* Stop fence signaling */
 984	xe_hw_fence_irq_stop(q->fence_irq);
 985
 986	/*
 987	 * Fence state now stable, stop / start scheduler which cleans up any
 988	 * fences that are complete
 989	 */
 990	xe_sched_add_pending_job(sched, job);
 991	xe_sched_submission_start(sched);
 992	xe_guc_exec_queue_trigger_cleanup(q);
 993
 994	/* Mark all outstanding jobs as bad, thus completing them */
 995	spin_lock(&sched->base.job_list_lock);
 996	list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
 997		xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
 998	spin_unlock(&sched->base.job_list_lock);
 999
1000	/* Start fence signaling */
1001	xe_hw_fence_irq_start(q->fence_irq);
1002
1003out:
1004	return DRM_GPU_SCHED_STAT_NOMINAL;
1005}
1006
1007static void __guc_exec_queue_fini_async(struct work_struct *w)
1008{
1009	struct xe_guc_exec_queue *ge =
1010		container_of(w, struct xe_guc_exec_queue, fini_async);
1011	struct xe_exec_queue *q = ge->q;
1012	struct xe_guc *guc = exec_queue_to_guc(q);
1013
1014	trace_xe_exec_queue_destroy(q);
1015
1016	if (xe_exec_queue_is_lr(q))
1017		cancel_work_sync(&ge->lr_tdr);
1018	release_guc_id(guc, q);
1019	xe_sched_entity_fini(&ge->entity);
1020	xe_sched_fini(&ge->sched);
1021
1022	kfree(ge);
1023	xe_exec_queue_fini(q);
1024}
1025
1026static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
1027{
1028	INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
1029
1030	/* We must block on kernel engines so slabs are empty on driver unload */
1031	if (q->flags & EXEC_QUEUE_FLAG_PERMANENT)
1032		__guc_exec_queue_fini_async(&q->guc->fini_async);
1033	else
1034		queue_work(system_wq, &q->guc->fini_async);
1035}
1036
1037static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
1038{
1039	/*
1040	 * Might be done from within the GPU scheduler, need to do async as we
1041	 * fini the scheduler when the engine is fini'd, the scheduler can't
1042	 * complete fini within itself (circular dependency). Async resolves
1043	 * this we and don't really care when everything is fini'd, just that it
1044	 * is.
1045	 */
1046	guc_exec_queue_fini_async(q);
1047}
1048
1049static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
1050{
1051	struct xe_exec_queue *q = msg->private_data;
1052	struct xe_guc *guc = exec_queue_to_guc(q);
1053	struct xe_device *xe = guc_to_xe(guc);
1054
1055	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
1056	trace_xe_exec_queue_cleanup_entity(q);
1057
1058	if (exec_queue_registered(q))
1059		disable_scheduling_deregister(guc, q);
1060	else
1061		__guc_exec_queue_fini(guc, q);
1062}
1063
1064static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
1065{
1066	return !exec_queue_killed_or_banned(q) && exec_queue_registered(q);
1067}
1068
1069static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
1070{
1071	struct xe_exec_queue *q = msg->private_data;
1072	struct xe_guc *guc = exec_queue_to_guc(q);
1073
1074	if (guc_exec_queue_allowed_to_change_state(q))
1075		init_policies(guc, q);
1076	kfree(msg);
1077}
1078
1079static void suspend_fence_signal(struct xe_exec_queue *q)
1080{
1081	struct xe_guc *guc = exec_queue_to_guc(q);
1082	struct xe_device *xe = guc_to_xe(guc);
1083
1084	xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) ||
1085		  guc_read_stopped(guc));
1086	xe_assert(xe, q->guc->suspend_pending);
1087
1088	q->guc->suspend_pending = false;
1089	smp_wmb();
1090	wake_up(&q->guc->suspend_wait);
1091}
1092
1093static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
1094{
1095	struct xe_exec_queue *q = msg->private_data;
1096	struct xe_guc *guc = exec_queue_to_guc(q);
1097
1098	if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
1099	    exec_queue_enabled(q)) {
1100		wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING ||
1101			   guc_read_stopped(guc));
1102
1103		if (!guc_read_stopped(guc)) {
1104			MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
1105			s64 since_resume_ms =
1106				ktime_ms_delta(ktime_get(),
1107					       q->guc->resume_time);
1108			s64 wait_ms = q->vm->preempt.min_run_period_ms -
1109				since_resume_ms;
1110
1111			if (wait_ms > 0 && q->guc->resume_time)
1112				msleep(wait_ms);
1113
1114			set_exec_queue_suspended(q);
1115			clear_exec_queue_enabled(q);
1116			set_exec_queue_pending_disable(q);
1117			trace_xe_exec_queue_scheduling_disable(q);
1118
1119			xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1120				       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1121		}
1122	} else if (q->guc->suspend_pending) {
1123		set_exec_queue_suspended(q);
1124		suspend_fence_signal(q);
1125	}
1126}
1127
1128static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
1129{
1130	struct xe_exec_queue *q = msg->private_data;
1131	struct xe_guc *guc = exec_queue_to_guc(q);
1132
1133	if (guc_exec_queue_allowed_to_change_state(q)) {
1134		MAKE_SCHED_CONTEXT_ACTION(q, ENABLE);
1135
1136		q->guc->resume_time = RESUME_PENDING;
1137		clear_exec_queue_suspended(q);
1138		set_exec_queue_pending_enable(q);
1139		set_exec_queue_enabled(q);
1140		trace_xe_exec_queue_scheduling_enable(q);
1141
1142		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1143			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1144	} else {
1145		clear_exec_queue_suspended(q);
1146	}
1147}
1148
1149#define CLEANUP		1	/* Non-zero values to catch uninitialized msg */
1150#define SET_SCHED_PROPS	2
1151#define SUSPEND		3
1152#define RESUME		4
1153
1154static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
1155{
1156	trace_xe_sched_msg_recv(msg);
1157
1158	switch (msg->opcode) {
1159	case CLEANUP:
1160		__guc_exec_queue_process_msg_cleanup(msg);
1161		break;
1162	case SET_SCHED_PROPS:
1163		__guc_exec_queue_process_msg_set_sched_props(msg);
1164		break;
1165	case SUSPEND:
1166		__guc_exec_queue_process_msg_suspend(msg);
1167		break;
1168	case RESUME:
1169		__guc_exec_queue_process_msg_resume(msg);
1170		break;
1171	default:
1172		XE_WARN_ON("Unknown message type");
1173	}
1174}
1175
1176static const struct drm_sched_backend_ops drm_sched_ops = {
1177	.run_job = guc_exec_queue_run_job,
1178	.free_job = guc_exec_queue_free_job,
1179	.timedout_job = guc_exec_queue_timedout_job,
1180};
1181
1182static const struct xe_sched_backend_ops xe_sched_ops = {
1183	.process_msg = guc_exec_queue_process_msg,
1184};
1185
1186static int guc_exec_queue_init(struct xe_exec_queue *q)
1187{
1188	struct xe_gpu_scheduler *sched;
1189	struct xe_guc *guc = exec_queue_to_guc(q);
1190	struct xe_device *xe = guc_to_xe(guc);
1191	struct xe_guc_exec_queue *ge;
1192	long timeout;
1193	int err;
1194
1195	xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc)));
1196
1197	ge = kzalloc(sizeof(*ge), GFP_KERNEL);
1198	if (!ge)
1199		return -ENOMEM;
1200
1201	q->guc = ge;
1202	ge->q = q;
1203	init_waitqueue_head(&ge->suspend_wait);
1204
1205	timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
1206		  msecs_to_jiffies(q->sched_props.job_timeout_ms);
1207	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
1208			    get_submit_wq(guc),
1209			    q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64,
1210			    timeout, guc_to_gt(guc)->ordered_wq, NULL,
1211			    q->name, gt_to_xe(q->gt)->drm.dev);
1212	if (err)
1213		goto err_free;
1214
1215	sched = &ge->sched;
1216	err = xe_sched_entity_init(&ge->entity, sched);
1217	if (err)
1218		goto err_sched;
1219
1220	if (xe_exec_queue_is_lr(q))
1221		INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup);
1222
1223	mutex_lock(&guc->submission_state.lock);
1224
1225	err = alloc_guc_id(guc, q);
1226	if (err)
1227		goto err_entity;
1228
1229	q->entity = &ge->entity;
1230
1231	if (guc_read_stopped(guc))
1232		xe_sched_stop(sched);
1233
1234	mutex_unlock(&guc->submission_state.lock);
1235
1236	xe_exec_queue_assign_name(q, q->guc->id);
1237
1238	trace_xe_exec_queue_create(q);
1239
1240	return 0;
1241
1242err_entity:
1243	mutex_unlock(&guc->submission_state.lock);
1244	xe_sched_entity_fini(&ge->entity);
1245err_sched:
1246	xe_sched_fini(&ge->sched);
1247err_free:
1248	kfree(ge);
1249
1250	return err;
1251}
1252
1253static void guc_exec_queue_kill(struct xe_exec_queue *q)
1254{
1255	trace_xe_exec_queue_kill(q);
1256	set_exec_queue_killed(q);
1257	xe_guc_exec_queue_trigger_cleanup(q);
1258}
1259
1260static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
1261				   u32 opcode)
1262{
1263	INIT_LIST_HEAD(&msg->link);
1264	msg->opcode = opcode;
1265	msg->private_data = q;
1266
1267	trace_xe_sched_msg_add(msg);
1268	xe_sched_add_msg(&q->guc->sched, msg);
1269}
1270
1271#define STATIC_MSG_CLEANUP	0
1272#define STATIC_MSG_SUSPEND	1
1273#define STATIC_MSG_RESUME	2
1274static void guc_exec_queue_fini(struct xe_exec_queue *q)
1275{
1276	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
1277
1278	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT))
1279		guc_exec_queue_add_msg(q, msg, CLEANUP);
1280	else
1281		__guc_exec_queue_fini(exec_queue_to_guc(q), q);
1282}
1283
1284static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
1285				       enum xe_exec_queue_priority priority)
1286{
1287	struct xe_sched_msg *msg;
1288
1289	if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q))
1290		return 0;
1291
1292	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1293	if (!msg)
1294		return -ENOMEM;
1295
1296	q->sched_props.priority = priority;
1297	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1298
1299	return 0;
1300}
1301
1302static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
1303{
1304	struct xe_sched_msg *msg;
1305
1306	if (q->sched_props.timeslice_us == timeslice_us ||
1307	    exec_queue_killed_or_banned(q))
1308		return 0;
1309
1310	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1311	if (!msg)
1312		return -ENOMEM;
1313
1314	q->sched_props.timeslice_us = timeslice_us;
1315	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1316
1317	return 0;
1318}
1319
1320static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
1321					      u32 preempt_timeout_us)
1322{
1323	struct xe_sched_msg *msg;
1324
1325	if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
1326	    exec_queue_killed_or_banned(q))
1327		return 0;
1328
1329	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1330	if (!msg)
1331		return -ENOMEM;
1332
1333	q->sched_props.preempt_timeout_us = preempt_timeout_us;
1334	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1335
1336	return 0;
1337}
1338
1339static int guc_exec_queue_suspend(struct xe_exec_queue *q)
1340{
1341	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
1342
1343	if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending)
1344		return -EINVAL;
1345
1346	q->guc->suspend_pending = true;
1347	guc_exec_queue_add_msg(q, msg, SUSPEND);
1348
1349	return 0;
1350}
1351
1352static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
1353{
1354	struct xe_guc *guc = exec_queue_to_guc(q);
1355
1356	wait_event(q->guc->suspend_wait, !q->guc->suspend_pending ||
1357		   guc_read_stopped(guc));
1358}
1359
1360static void guc_exec_queue_resume(struct xe_exec_queue *q)
1361{
1362	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
1363	struct xe_guc *guc = exec_queue_to_guc(q);
1364	struct xe_device *xe = guc_to_xe(guc);
1365
1366	xe_assert(xe, !q->guc->suspend_pending);
1367
1368	guc_exec_queue_add_msg(q, msg, RESUME);
1369}
1370
1371static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
1372{
1373	return exec_queue_reset(q);
1374}
1375
1376/*
1377 * All of these functions are an abstraction layer which other parts of XE can
1378 * use to trap into the GuC backend. All of these functions, aside from init,
1379 * really shouldn't do much other than trap into the DRM scheduler which
1380 * synchronizes these operations.
1381 */
1382static const struct xe_exec_queue_ops guc_exec_queue_ops = {
1383	.init = guc_exec_queue_init,
1384	.kill = guc_exec_queue_kill,
1385	.fini = guc_exec_queue_fini,
1386	.set_priority = guc_exec_queue_set_priority,
1387	.set_timeslice = guc_exec_queue_set_timeslice,
1388	.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
1389	.suspend = guc_exec_queue_suspend,
1390	.suspend_wait = guc_exec_queue_suspend_wait,
1391	.resume = guc_exec_queue_resume,
1392	.reset_status = guc_exec_queue_reset_status,
1393};
1394
1395static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
1396{
1397	struct xe_gpu_scheduler *sched = &q->guc->sched;
1398
1399	/* Stop scheduling + flush any DRM scheduler operations */
1400	xe_sched_submission_stop(sched);
1401
1402	/* Clean up lost G2H + reset engine state */
1403	if (exec_queue_registered(q)) {
1404		if ((exec_queue_banned(q) && exec_queue_destroyed(q)) ||
1405		    xe_exec_queue_is_lr(q))
1406			xe_exec_queue_put(q);
1407		else if (exec_queue_destroyed(q))
1408			__guc_exec_queue_fini(guc, q);
1409	}
1410	if (q->guc->suspend_pending) {
1411		set_exec_queue_suspended(q);
1412		suspend_fence_signal(q);
1413	}
1414	atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED,
1415		   &q->guc->state);
1416	q->guc->resume_time = 0;
1417	trace_xe_exec_queue_stop(q);
1418
1419	/*
1420	 * Ban any engine (aside from kernel and engines used for VM ops) with a
1421	 * started but not complete job or if a job has gone through a GT reset
1422	 * more than twice.
1423	 */
1424	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
1425		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
1426
1427		if (job) {
1428			if ((xe_sched_job_started(job) &&
1429			    !xe_sched_job_completed(job)) ||
1430			    xe_sched_invalidate_job(job, 2)) {
1431				trace_xe_sched_job_ban(job);
1432				xe_sched_tdr_queue_imm(&q->guc->sched);
1433				set_exec_queue_banned(q);
1434			}
1435		}
1436	}
1437}
1438
1439int xe_guc_submit_reset_prepare(struct xe_guc *guc)
1440{
1441	int ret;
1442
1443	/*
1444	 * Using an atomic here rather than submission_state.lock as this
1445	 * function can be called while holding the CT lock (engine reset
1446	 * failure). submission_state.lock needs the CT lock to resubmit jobs.
1447	 * Atomic is not ideal, but it works to prevent against concurrent reset
1448	 * and releasing any TDRs waiting on guc->submission_state.stopped.
1449	 */
1450	ret = atomic_fetch_or(1, &guc->submission_state.stopped);
1451	smp_wmb();
1452	wake_up_all(&guc->ct.wq);
1453
1454	return ret;
1455}
1456
1457void xe_guc_submit_reset_wait(struct xe_guc *guc)
1458{
1459	wait_event(guc->ct.wq, !guc_read_stopped(guc));
1460}
1461
1462int xe_guc_submit_stop(struct xe_guc *guc)
1463{
1464	struct xe_exec_queue *q;
1465	unsigned long index;
1466	struct xe_device *xe = guc_to_xe(guc);
1467
1468	xe_assert(xe, guc_read_stopped(guc) == 1);
1469
1470	mutex_lock(&guc->submission_state.lock);
1471
1472	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1473		guc_exec_queue_stop(guc, q);
1474
1475	mutex_unlock(&guc->submission_state.lock);
1476
1477	/*
1478	 * No one can enter the backend at this point, aside from new engine
1479	 * creation which is protected by guc->submission_state.lock.
1480	 */
1481
1482	return 0;
1483}
1484
1485static void guc_exec_queue_start(struct xe_exec_queue *q)
1486{
1487	struct xe_gpu_scheduler *sched = &q->guc->sched;
1488
1489	if (!exec_queue_killed_or_banned(q)) {
1490		int i;
1491
1492		trace_xe_exec_queue_resubmit(q);
1493		for (i = 0; i < q->width; ++i)
1494			xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail);
1495		xe_sched_resubmit_jobs(sched);
1496	}
1497
1498	xe_sched_submission_start(sched);
1499}
1500
1501int xe_guc_submit_start(struct xe_guc *guc)
1502{
1503	struct xe_exec_queue *q;
1504	unsigned long index;
1505	struct xe_device *xe = guc_to_xe(guc);
1506
1507	xe_assert(xe, guc_read_stopped(guc) == 1);
1508
1509	mutex_lock(&guc->submission_state.lock);
1510	atomic_dec(&guc->submission_state.stopped);
1511	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1512		guc_exec_queue_start(q);
1513	mutex_unlock(&guc->submission_state.lock);
1514
1515	wake_up_all(&guc->ct.wq);
1516
1517	return 0;
1518}
1519
1520static struct xe_exec_queue *
1521g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
1522{
1523	struct xe_device *xe = guc_to_xe(guc);
1524	struct xe_exec_queue *q;
1525
1526	if (unlikely(guc_id >= GUC_ID_MAX)) {
1527		drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
1528		return NULL;
1529	}
1530
1531	q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
1532	if (unlikely(!q)) {
1533		drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
1534		return NULL;
1535	}
1536
1537	xe_assert(xe, guc_id >= q->guc->id);
1538	xe_assert(xe, guc_id < (q->guc->id + q->width));
1539
1540	return q;
1541}
1542
1543static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
1544{
1545	u32 action[] = {
1546		XE_GUC_ACTION_DEREGISTER_CONTEXT,
1547		q->guc->id,
1548	};
1549
1550	trace_xe_exec_queue_deregister(q);
1551
1552	xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
1553}
1554
1555static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q)
1556{
1557	trace_xe_exec_queue_scheduling_done(q);
1558
1559	if (exec_queue_pending_enable(q)) {
1560		q->guc->resume_time = ktime_get();
1561		clear_exec_queue_pending_enable(q);
1562		smp_wmb();
1563		wake_up_all(&guc->ct.wq);
1564	} else {
1565		clear_exec_queue_pending_disable(q);
1566		if (q->guc->suspend_pending) {
1567			suspend_fence_signal(q);
1568		} else {
1569			if (exec_queue_banned(q)) {
1570				smp_wmb();
1571				wake_up_all(&guc->ct.wq);
1572			}
1573			deregister_exec_queue(guc, q);
1574		}
1575	}
1576}
1577
1578int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
1579{
1580	struct xe_device *xe = guc_to_xe(guc);
1581	struct xe_exec_queue *q;
1582	u32 guc_id = msg[0];
1583
1584	if (unlikely(len < 2)) {
1585		drm_err(&xe->drm, "Invalid length %u", len);
1586		return -EPROTO;
1587	}
1588
1589	q = g2h_exec_queue_lookup(guc, guc_id);
1590	if (unlikely(!q))
1591		return -EPROTO;
1592
1593	if (unlikely(!exec_queue_pending_enable(q) &&
1594		     !exec_queue_pending_disable(q))) {
1595		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
1596			atomic_read(&q->guc->state));
1597		return -EPROTO;
1598	}
1599
1600	handle_sched_done(guc, q);
1601
1602	return 0;
1603}
1604
1605static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
1606{
1607	trace_xe_exec_queue_deregister_done(q);
1608
1609	clear_exec_queue_registered(q);
1610
1611	if (exec_queue_banned(q) || xe_exec_queue_is_lr(q))
1612		xe_exec_queue_put(q);
1613	else
1614		__guc_exec_queue_fini(guc, q);
1615}
1616
1617int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
1618{
1619	struct xe_device *xe = guc_to_xe(guc);
1620	struct xe_exec_queue *q;
1621	u32 guc_id = msg[0];
1622
1623	if (unlikely(len < 1)) {
1624		drm_err(&xe->drm, "Invalid length %u", len);
1625		return -EPROTO;
1626	}
1627
1628	q = g2h_exec_queue_lookup(guc, guc_id);
1629	if (unlikely(!q))
1630		return -EPROTO;
1631
1632	if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
1633	    exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
1634		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
1635			atomic_read(&q->guc->state));
1636		return -EPROTO;
1637	}
1638
1639	handle_deregister_done(guc, q);
1640
1641	return 0;
1642}
1643
1644int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
1645{
1646	struct xe_device *xe = guc_to_xe(guc);
1647	struct xe_exec_queue *q;
1648	u32 guc_id = msg[0];
1649
1650	if (unlikely(len < 1)) {
1651		drm_err(&xe->drm, "Invalid length %u", len);
1652		return -EPROTO;
1653	}
1654
1655	q = g2h_exec_queue_lookup(guc, guc_id);
1656	if (unlikely(!q))
1657		return -EPROTO;
1658
1659	drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id);
1660
1661	/* FIXME: Do error capture, most likely async */
1662
1663	trace_xe_exec_queue_reset(q);
1664
1665	/*
1666	 * A banned engine is a NOP at this point (came from
1667	 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel
1668	 * jobs by setting timeout of the job to the minimum value kicking
1669	 * guc_exec_queue_timedout_job.
1670	 */
1671	set_exec_queue_reset(q);
1672	if (!exec_queue_banned(q))
1673		xe_guc_exec_queue_trigger_cleanup(q);
1674
1675	return 0;
1676}
1677
1678int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
1679					       u32 len)
1680{
1681	struct xe_device *xe = guc_to_xe(guc);
1682	struct xe_exec_queue *q;
1683	u32 guc_id = msg[0];
1684
1685	if (unlikely(len < 1)) {
1686		drm_err(&xe->drm, "Invalid length %u", len);
1687		return -EPROTO;
1688	}
1689
1690	q = g2h_exec_queue_lookup(guc, guc_id);
1691	if (unlikely(!q))
1692		return -EPROTO;
1693
1694	drm_dbg(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id);
1695	trace_xe_exec_queue_memory_cat_error(q);
1696
1697	/* Treat the same as engine reset */
1698	set_exec_queue_reset(q);
1699	if (!exec_queue_banned(q))
1700		xe_guc_exec_queue_trigger_cleanup(q);
1701
1702	return 0;
1703}
1704
1705int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
1706{
1707	struct xe_device *xe = guc_to_xe(guc);
1708	u8 guc_class, instance;
1709	u32 reason;
1710
1711	if (unlikely(len != 3)) {
1712		drm_err(&xe->drm, "Invalid length %u", len);
1713		return -EPROTO;
1714	}
1715
1716	guc_class = msg[0];
1717	instance = msg[1];
1718	reason = msg[2];
1719
1720	/* Unexpected failure of a hardware feature, log an actual error */
1721	drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X",
1722		guc_class, instance, reason);
1723
1724	xe_gt_reset_async(guc_to_gt(guc));
1725
1726	return 0;
1727}
1728
1729static void
1730guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
1731				   struct xe_guc_submit_exec_queue_snapshot *snapshot)
1732{
1733	struct xe_guc *guc = exec_queue_to_guc(q);
1734	struct xe_device *xe = guc_to_xe(guc);
1735	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
1736	int i;
1737
1738	snapshot->guc.wqi_head = q->guc->wqi_head;
1739	snapshot->guc.wqi_tail = q->guc->wqi_tail;
1740	snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
1741	snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
1742	snapshot->parallel.wq_desc.status = parallel_read(xe, map,
1743							  wq_desc.wq_status);
1744
1745	if (snapshot->parallel.wq_desc.head !=
1746	    snapshot->parallel.wq_desc.tail) {
1747		for (i = snapshot->parallel.wq_desc.head;
1748		     i != snapshot->parallel.wq_desc.tail;
1749		     i = (i + sizeof(u32)) % WQ_SIZE)
1750			snapshot->parallel.wq[i / sizeof(u32)] =
1751				parallel_read(xe, map, wq[i / sizeof(u32)]);
1752	}
1753}
1754
1755static void
1756guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
1757				 struct drm_printer *p)
1758{
1759	int i;
1760
1761	drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
1762		   snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
1763	drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
1764		   snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
1765	drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
1766
1767	if (snapshot->parallel.wq_desc.head !=
1768	    snapshot->parallel.wq_desc.tail) {
1769		for (i = snapshot->parallel.wq_desc.head;
1770		     i != snapshot->parallel.wq_desc.tail;
1771		     i = (i + sizeof(u32)) % WQ_SIZE)
1772			drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
1773				   snapshot->parallel.wq[i / sizeof(u32)]);
1774	}
1775}
1776
1777/**
1778 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
1779 * @q: faulty exec queue
1780 *
1781 * This can be printed out in a later stage like during dev_coredump
1782 * analysis.
1783 *
1784 * Returns: a GuC Submit Engine snapshot object that must be freed by the
1785 * caller, using `xe_guc_exec_queue_snapshot_free`.
1786 */
1787struct xe_guc_submit_exec_queue_snapshot *
1788xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
1789{
1790	struct xe_gpu_scheduler *sched = &q->guc->sched;
1791	struct xe_guc_submit_exec_queue_snapshot *snapshot;
1792	int i;
1793
1794	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
1795
1796	if (!snapshot)
1797		return NULL;
1798
1799	snapshot->guc.id = q->guc->id;
1800	memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
1801	snapshot->class = q->class;
1802	snapshot->logical_mask = q->logical_mask;
1803	snapshot->width = q->width;
1804	snapshot->refcount = kref_read(&q->refcount);
1805	snapshot->sched_timeout = sched->base.timeout;
1806	snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
1807	snapshot->sched_props.preempt_timeout_us =
1808		q->sched_props.preempt_timeout_us;
1809
1810	snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *),
1811				      GFP_ATOMIC);
1812
1813	if (snapshot->lrc) {
1814		for (i = 0; i < q->width; ++i) {
1815			struct xe_lrc *lrc = q->lrc + i;
1816
1817			snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
1818		}
1819	}
1820
1821	snapshot->schedule_state = atomic_read(&q->guc->state);
1822	snapshot->exec_queue_flags = q->flags;
1823
1824	snapshot->parallel_execution = xe_exec_queue_is_parallel(q);
1825	if (snapshot->parallel_execution)
1826		guc_exec_queue_wq_snapshot_capture(q, snapshot);
1827
1828	spin_lock(&sched->base.job_list_lock);
1829	snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list);
1830	snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
1831					       sizeof(struct pending_list_snapshot),
1832					       GFP_ATOMIC);
1833
1834	if (snapshot->pending_list) {
1835		struct xe_sched_job *job_iter;
1836
1837		i = 0;
1838		list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) {
1839			snapshot->pending_list[i].seqno =
1840				xe_sched_job_seqno(job_iter);
1841			snapshot->pending_list[i].fence =
1842				dma_fence_is_signaled(job_iter->fence) ? 1 : 0;
1843			snapshot->pending_list[i].finished =
1844				dma_fence_is_signaled(&job_iter->drm.s_fence->finished)
1845				? 1 : 0;
1846			i++;
1847		}
1848	}
1849
1850	spin_unlock(&sched->base.job_list_lock);
1851
1852	return snapshot;
1853}
1854
1855/**
1856 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine.
1857 * @snapshot: Previously captured snapshot of job.
1858 *
1859 * This captures some data that requires taking some locks, so it cannot be done in signaling path.
1860 */
1861void
1862xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot)
1863{
1864	int i;
1865
1866	if (!snapshot || !snapshot->lrc)
1867		return;
1868
1869	for (i = 0; i < snapshot->width; ++i)
1870		xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]);
1871}
1872
1873/**
1874 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
1875 * @snapshot: GuC Submit Engine snapshot object.
1876 * @p: drm_printer where it will be printed out.
1877 *
1878 * This function prints out a given GuC Submit Engine snapshot object.
1879 */
1880void
1881xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
1882				 struct drm_printer *p)
1883{
1884	int i;
1885
1886	if (!snapshot)
1887		return;
1888
1889	drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id);
1890	drm_printf(p, "\tName: %s\n", snapshot->name);
1891	drm_printf(p, "\tClass: %d\n", snapshot->class);
1892	drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
1893	drm_printf(p, "\tWidth: %d\n", snapshot->width);
1894	drm_printf(p, "\tRef: %d\n", snapshot->refcount);
1895	drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
1896	drm_printf(p, "\tTimeslice: %u (us)\n",
1897		   snapshot->sched_props.timeslice_us);
1898	drm_printf(p, "\tPreempt timeout: %u (us)\n",
1899		   snapshot->sched_props.preempt_timeout_us);
1900
1901	for (i = 0; snapshot->lrc && i < snapshot->width; ++i)
1902		xe_lrc_snapshot_print(snapshot->lrc[i], p);
1903
1904	drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
1905	drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
1906
1907	if (snapshot->parallel_execution)
1908		guc_exec_queue_wq_snapshot_print(snapshot, p);
1909
1910	for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size;
1911	     i++)
1912		drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
1913			   snapshot->pending_list[i].seqno,
1914			   snapshot->pending_list[i].fence,
1915			   snapshot->pending_list[i].finished);
1916}
1917
1918/**
1919 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given
1920 * snapshot.
1921 * @snapshot: GuC Submit Engine snapshot object.
1922 *
1923 * This function free all the memory that needed to be allocated at capture
1924 * time.
1925 */
1926void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
1927{
1928	int i;
1929
1930	if (!snapshot)
1931		return;
1932
1933	if (snapshot->lrc) {
1934		for (i = 0; i < snapshot->width; i++)
1935			xe_lrc_snapshot_free(snapshot->lrc[i]);
1936		kfree(snapshot->lrc);
1937	}
1938	kfree(snapshot->pending_list);
1939	kfree(snapshot);
1940}
1941
1942static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
1943{
1944	struct xe_guc_submit_exec_queue_snapshot *snapshot;
1945
1946	snapshot = xe_guc_exec_queue_snapshot_capture(q);
1947	xe_guc_exec_queue_snapshot_print(snapshot, p);
1948	xe_guc_exec_queue_snapshot_free(snapshot);
1949}
1950
1951/**
1952 * xe_guc_submit_print - GuC Submit Print.
1953 * @guc: GuC.
1954 * @p: drm_printer where it will be printed out.
1955 *
1956 * This function capture and prints snapshots of **all** GuC Engines.
1957 */
1958void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
1959{
1960	struct xe_exec_queue *q;
1961	unsigned long index;
1962
1963	if (!xe_device_uc_enabled(guc_to_xe(guc)))
1964		return;
1965
1966	mutex_lock(&guc->submission_state.lock);
1967	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1968		guc_exec_queue_print(q, p);
1969	mutex_unlock(&guc->submission_state.lock);
1970}
Configure Feed

Configure Feed