drivers/gpu/drm/i915/intel_guc_submission.c at v5.3

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / drivers / gpu / drm / i915 / intel_guc_submission.c
at v5.3 1458 lines 44 kB view raw
wrap content
   1/*
   2 * Copyright © 2014 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 */
  24
  25#include <linux/circ_buf.h>
  26
  27#include "gt/intel_engine_pm.h"
  28#include "gt/intel_lrc_reg.h"
  29#include "gt/intel_context.h"
  30#include "gem/i915_gem_context.h"
  31
  32#include "intel_guc_submission.h"
  33#include "i915_drv.h"
  34
  35#define GUC_PREEMPT_FINISHED		0x1
  36#define GUC_PREEMPT_BREADCRUMB_DWORDS	0x8
  37#define GUC_PREEMPT_BREADCRUMB_BYTES	\
  38	(sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS)
  39
  40/**
  41 * DOC: GuC-based command submission
  42 *
  43 * GuC client:
  44 * A intel_guc_client refers to a submission path through GuC. Currently, there
  45 * are two clients. One of them (the execbuf_client) is charged with all
  46 * submissions to the GuC, the other one (preempt_client) is responsible for
  47 * preempting the execbuf_client. This struct is the owner of a doorbell, a
  48 * process descriptor and a workqueue (all of them inside a single gem object
  49 * that contains all required pages for these elements).
  50 *
  51 * GuC stage descriptor:
  52 * During initialization, the driver allocates a static pool of 1024 such
  53 * descriptors, and shares them with the GuC.
  54 * Currently, there exists a 1:1 mapping between a intel_guc_client and a
  55 * guc_stage_desc (via the client's stage_id), so effectively only one
  56 * gets used. This stage descriptor lets the GuC know about the doorbell,
  57 * workqueue and process descriptor. Theoretically, it also lets the GuC
  58 * know about our HW contexts (context ID, etc...), but we actually
  59 * employ a kind of submission where the GuC uses the LRCA sent via the work
  60 * item instead (the single guc_stage_desc associated to execbuf client
  61 * contains information about the default kernel context only, but this is
  62 * essentially unused). This is called a "proxy" submission.
  63 *
  64 * The Scratch registers:
  65 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
  66 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
  67 * triggers an interrupt on the GuC via another register write (0xC4C8).
  68 * Firmware writes a success/fail code back to the action register after
  69 * processes the request. The kernel driver polls waiting for this update and
  70 * then proceeds.
  71 * See intel_guc_send()
  72 *
  73 * Doorbells:
  74 * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW)
  75 * mapped into process space.
  76 *
  77 * Work Items:
  78 * There are several types of work items that the host may place into a
  79 * workqueue, each with its own requirements and limitations. Currently only
  80 * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
  81 * represents in-order queue. The kernel driver packs ring tail pointer and an
  82 * ELSP context descriptor dword into Work Item.
  83 * See guc_add_request()
  84 *
  85 */
  86
  87static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
  88{
  89	return (i915_ggtt_offset(engine->status_page.vma) +
  90		I915_GEM_HWS_PREEMPT_ADDR);
  91}
  92
  93static inline struct i915_priolist *to_priolist(struct rb_node *rb)
  94{
  95	return rb_entry(rb, struct i915_priolist, node);
  96}
  97
  98static inline bool is_high_priority(struct intel_guc_client *client)
  99{
 100	return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
 101		client->priority == GUC_CLIENT_PRIORITY_HIGH);
 102}
 103
 104static int reserve_doorbell(struct intel_guc_client *client)
 105{
 106	unsigned long offset;
 107	unsigned long end;
 108	u16 id;
 109
 110	GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID);
 111
 112	/*
 113	 * The bitmap tracks which doorbell registers are currently in use.
 114	 * It is split into two halves; the first half is used for normal
 115	 * priority contexts, the second half for high-priority ones.
 116	 */
 117	offset = 0;
 118	end = GUC_NUM_DOORBELLS / 2;
 119	if (is_high_priority(client)) {
 120		offset = end;
 121		end += offset;
 122	}
 123
 124	id = find_next_zero_bit(client->guc->doorbell_bitmap, end, offset);
 125	if (id == end)
 126		return -ENOSPC;
 127
 128	__set_bit(id, client->guc->doorbell_bitmap);
 129	client->doorbell_id = id;
 130	DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell: %d\n",
 131			 client->stage_id, yesno(is_high_priority(client)),
 132			 id);
 133	return 0;
 134}
 135
 136static bool has_doorbell(struct intel_guc_client *client)
 137{
 138	if (client->doorbell_id == GUC_DOORBELL_INVALID)
 139		return false;
 140
 141	return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
 142}
 143
 144static void unreserve_doorbell(struct intel_guc_client *client)
 145{
 146	GEM_BUG_ON(!has_doorbell(client));
 147
 148	__clear_bit(client->doorbell_id, client->guc->doorbell_bitmap);
 149	client->doorbell_id = GUC_DOORBELL_INVALID;
 150}
 151
 152/*
 153 * Tell the GuC to allocate or deallocate a specific doorbell
 154 */
 155
 156static int __guc_allocate_doorbell(struct intel_guc *guc, u32 stage_id)
 157{
 158	u32 action[] = {
 159		INTEL_GUC_ACTION_ALLOCATE_DOORBELL,
 160		stage_id
 161	};
 162
 163	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 164}
 165
 166static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 stage_id)
 167{
 168	u32 action[] = {
 169		INTEL_GUC_ACTION_DEALLOCATE_DOORBELL,
 170		stage_id
 171	};
 172
 173	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 174}
 175
 176static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client)
 177{
 178	struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr;
 179
 180	return &base[client->stage_id];
 181}
 182
 183/*
 184 * Initialise, update, or clear doorbell data shared with the GuC
 185 *
 186 * These functions modify shared data and so need access to the mapped
 187 * client object which contains the page being used for the doorbell
 188 */
 189
 190static void __update_doorbell_desc(struct intel_guc_client *client, u16 new_id)
 191{
 192	struct guc_stage_desc *desc;
 193
 194	/* Update the GuC's idea of the doorbell ID */
 195	desc = __get_stage_desc(client);
 196	desc->db_id = new_id;
 197}
 198
 199static struct guc_doorbell_info *__get_doorbell(struct intel_guc_client *client)
 200{
 201	return client->vaddr + client->doorbell_offset;
 202}
 203
 204static bool __doorbell_valid(struct intel_guc *guc, u16 db_id)
 205{
 206	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 207
 208	GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS);
 209	return I915_READ(GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID;
 210}
 211
 212static void __init_doorbell(struct intel_guc_client *client)
 213{
 214	struct guc_doorbell_info *doorbell;
 215
 216	doorbell = __get_doorbell(client);
 217	doorbell->db_status = GUC_DOORBELL_ENABLED;
 218	doorbell->cookie = 0;
 219}
 220
 221static void __fini_doorbell(struct intel_guc_client *client)
 222{
 223	struct guc_doorbell_info *doorbell;
 224	u16 db_id = client->doorbell_id;
 225
 226	doorbell = __get_doorbell(client);
 227	doorbell->db_status = GUC_DOORBELL_DISABLED;
 228
 229	/* Doorbell release flow requires that we wait for GEN8_DRB_VALID bit
 230	 * to go to zero after updating db_status before we call the GuC to
 231	 * release the doorbell
 232	 */
 233	if (wait_for_us(!__doorbell_valid(client->guc, db_id), 10))
 234		WARN_ONCE(true, "Doorbell never became invalid after disable\n");
 235}
 236
 237static int create_doorbell(struct intel_guc_client *client)
 238{
 239	int ret;
 240
 241	if (WARN_ON(!has_doorbell(client)))
 242		return -ENODEV; /* internal setup error, should never happen */
 243
 244	__update_doorbell_desc(client, client->doorbell_id);
 245	__init_doorbell(client);
 246
 247	ret = __guc_allocate_doorbell(client->guc, client->stage_id);
 248	if (ret) {
 249		__fini_doorbell(client);
 250		__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
 251		DRM_DEBUG_DRIVER("Couldn't create client %u doorbell: %d\n",
 252				 client->stage_id, ret);
 253		return ret;
 254	}
 255
 256	return 0;
 257}
 258
 259static int destroy_doorbell(struct intel_guc_client *client)
 260{
 261	int ret;
 262
 263	GEM_BUG_ON(!has_doorbell(client));
 264
 265	__fini_doorbell(client);
 266	ret = __guc_deallocate_doorbell(client->guc, client->stage_id);
 267	if (ret)
 268		DRM_ERROR("Couldn't destroy client %u doorbell: %d\n",
 269			  client->stage_id, ret);
 270
 271	__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
 272
 273	return ret;
 274}
 275
 276static unsigned long __select_cacheline(struct intel_guc *guc)
 277{
 278	unsigned long offset;
 279
 280	/* Doorbell uses a single cache line within a page */
 281	offset = offset_in_page(guc->db_cacheline);
 282
 283	/* Moving to next cache line to reduce contention */
 284	guc->db_cacheline += cache_line_size();
 285
 286	DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n",
 287			 offset, guc->db_cacheline, cache_line_size());
 288	return offset;
 289}
 290
 291static inline struct guc_process_desc *
 292__get_process_desc(struct intel_guc_client *client)
 293{
 294	return client->vaddr + client->proc_desc_offset;
 295}
 296
 297/*
 298 * Initialise the process descriptor shared with the GuC firmware.
 299 */
 300static void guc_proc_desc_init(struct intel_guc_client *client)
 301{
 302	struct guc_process_desc *desc;
 303
 304	desc = memset(__get_process_desc(client), 0, sizeof(*desc));
 305
 306	/*
 307	 * XXX: pDoorbell and WQVBaseAddress are pointers in process address
 308	 * space for ring3 clients (set them as in mmap_ioctl) or kernel
 309	 * space for kernel clients (map on demand instead? May make debug
 310	 * easier to have it mapped).
 311	 */
 312	desc->wq_base_addr = 0;
 313	desc->db_base_addr = 0;
 314
 315	desc->stage_id = client->stage_id;
 316	desc->wq_size_bytes = GUC_WQ_SIZE;
 317	desc->wq_status = WQ_STATUS_ACTIVE;
 318	desc->priority = client->priority;
 319}
 320
 321static void guc_proc_desc_fini(struct intel_guc_client *client)
 322{
 323	struct guc_process_desc *desc;
 324
 325	desc = __get_process_desc(client);
 326	memset(desc, 0, sizeof(*desc));
 327}
 328
 329static int guc_stage_desc_pool_create(struct intel_guc *guc)
 330{
 331	struct i915_vma *vma;
 332	void *vaddr;
 333
 334	vma = intel_guc_allocate_vma(guc,
 335				     PAGE_ALIGN(sizeof(struct guc_stage_desc) *
 336				     GUC_MAX_STAGE_DESCRIPTORS));
 337	if (IS_ERR(vma))
 338		return PTR_ERR(vma);
 339
 340	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
 341	if (IS_ERR(vaddr)) {
 342		i915_vma_unpin_and_release(&vma, 0);
 343		return PTR_ERR(vaddr);
 344	}
 345
 346	guc->stage_desc_pool = vma;
 347	guc->stage_desc_pool_vaddr = vaddr;
 348	ida_init(&guc->stage_ids);
 349
 350	return 0;
 351}
 352
 353static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
 354{
 355	ida_destroy(&guc->stage_ids);
 356	i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP);
 357}
 358
 359/*
 360 * Initialise/clear the stage descriptor shared with the GuC firmware.
 361 *
 362 * This descriptor tells the GuC where (in GGTT space) to find the important
 363 * data structures relating to this client (doorbell, process descriptor,
 364 * write queue, etc).
 365 */
 366static void guc_stage_desc_init(struct intel_guc_client *client)
 367{
 368	struct intel_guc *guc = client->guc;
 369	struct i915_gem_context *ctx = client->owner;
 370	struct i915_gem_engines_iter it;
 371	struct guc_stage_desc *desc;
 372	struct intel_context *ce;
 373	u32 gfx_addr;
 374
 375	desc = __get_stage_desc(client);
 376	memset(desc, 0, sizeof(*desc));
 377
 378	desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE |
 379			  GUC_STAGE_DESC_ATTR_KERNEL;
 380	if (is_high_priority(client))
 381		desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT;
 382	desc->stage_id = client->stage_id;
 383	desc->priority = client->priority;
 384	desc->db_id = client->doorbell_id;
 385
 386	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
 387		struct guc_execlist_context *lrc;
 388
 389		if (!(ce->engine->mask & client->engines))
 390			continue;
 391
 392		/* TODO: We have a design issue to be solved here. Only when we
 393		 * receive the first batch, we know which engine is used by the
 394		 * user. But here GuC expects the lrc and ring to be pinned. It
 395		 * is not an issue for default context, which is the only one
 396		 * for now who owns a GuC client. But for future owner of GuC
 397		 * client, need to make sure lrc is pinned prior to enter here.
 398		 */
 399		if (!ce->state)
 400			break;	/* XXX: continue? */
 401
 402		/*
 403		 * XXX: When this is a GUC_STAGE_DESC_ATTR_KERNEL client (proxy
 404		 * submission or, in other words, not using a direct submission
 405		 * model) the KMD's LRCA is not used for any work submission.
 406		 * Instead, the GuC uses the LRCA of the user mode context (see
 407		 * guc_add_request below).
 408		 */
 409		lrc = &desc->lrc[ce->engine->guc_id];
 410		lrc->context_desc = lower_32_bits(ce->lrc_desc);
 411
 412		/* The state page is after PPHWSP */
 413		lrc->ring_lrca = intel_guc_ggtt_offset(guc, ce->state) +
 414				 LRC_STATE_PN * PAGE_SIZE;
 415
 416		/* XXX: In direct submission, the GuC wants the HW context id
 417		 * here. In proxy submission, it wants the stage id
 418		 */
 419		lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) |
 420				(ce->engine->guc_id << GUC_ELC_ENGINE_OFFSET);
 421
 422		lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma);
 423		lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
 424		lrc->ring_next_free_location = lrc->ring_begin;
 425		lrc->ring_current_tail_pointer_value = 0;
 426
 427		desc->engines_used |= BIT(ce->engine->guc_id);
 428	}
 429	i915_gem_context_unlock_engines(ctx);
 430
 431	DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
 432			 client->engines, desc->engines_used);
 433	WARN_ON(desc->engines_used == 0);
 434
 435	/*
 436	 * The doorbell, process descriptor, and workqueue are all parts
 437	 * of the client object, which the GuC will reference via the GGTT
 438	 */
 439	gfx_addr = intel_guc_ggtt_offset(guc, client->vma);
 440	desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) +
 441				client->doorbell_offset;
 442	desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client));
 443	desc->db_trigger_uk = gfx_addr + client->doorbell_offset;
 444	desc->process_desc = gfx_addr + client->proc_desc_offset;
 445	desc->wq_addr = gfx_addr + GUC_DB_SIZE;
 446	desc->wq_size = GUC_WQ_SIZE;
 447
 448	desc->desc_private = ptr_to_u64(client);
 449}
 450
 451static void guc_stage_desc_fini(struct intel_guc_client *client)
 452{
 453	struct guc_stage_desc *desc;
 454
 455	desc = __get_stage_desc(client);
 456	memset(desc, 0, sizeof(*desc));
 457}
 458
 459/* Construct a Work Item and append it to the GuC's Work Queue */
 460static void guc_wq_item_append(struct intel_guc_client *client,
 461			       u32 target_engine, u32 context_desc,
 462			       u32 ring_tail, u32 fence_id)
 463{
 464	/* wqi_len is in DWords, and does not include the one-word header */
 465	const size_t wqi_size = sizeof(struct guc_wq_item);
 466	const u32 wqi_len = wqi_size / sizeof(u32) - 1;
 467	struct guc_process_desc *desc = __get_process_desc(client);
 468	struct guc_wq_item *wqi;
 469	u32 wq_off;
 470
 471	lockdep_assert_held(&client->wq_lock);
 472
 473	/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
 474	 * should not have the case where structure wqi is across page, neither
 475	 * wrapped to the beginning. This simplifies the implementation below.
 476	 *
 477	 * XXX: if not the case, we need save data to a temp wqi and copy it to
 478	 * workqueue buffer dw by dw.
 479	 */
 480	BUILD_BUG_ON(wqi_size != 16);
 481
 482	/* We expect the WQ to be active if we're appending items to it */
 483	GEM_BUG_ON(desc->wq_status != WQ_STATUS_ACTIVE);
 484
 485	/* Free space is guaranteed. */
 486	wq_off = READ_ONCE(desc->tail);
 487	GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head),
 488			      GUC_WQ_SIZE) < wqi_size);
 489	GEM_BUG_ON(wq_off & (wqi_size - 1));
 490
 491	/* WQ starts from the page after doorbell / process_desc */
 492	wqi = client->vaddr + wq_off + GUC_DB_SIZE;
 493
 494	if (I915_SELFTEST_ONLY(client->use_nop_wqi)) {
 495		wqi->header = WQ_TYPE_NOOP | (wqi_len << WQ_LEN_SHIFT);
 496	} else {
 497		/* Now fill in the 4-word work queue item */
 498		wqi->header = WQ_TYPE_INORDER |
 499			      (wqi_len << WQ_LEN_SHIFT) |
 500			      (target_engine << WQ_TARGET_SHIFT) |
 501			      WQ_NO_WCFLUSH_WAIT;
 502		wqi->context_desc = context_desc;
 503		wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT;
 504		GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX);
 505		wqi->fence_id = fence_id;
 506	}
 507
 508	/* Make the update visible to GuC */
 509	WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1));
 510}
 511
 512static void guc_ring_doorbell(struct intel_guc_client *client)
 513{
 514	struct guc_doorbell_info *db;
 515	u32 cookie;
 516
 517	lockdep_assert_held(&client->wq_lock);
 518
 519	/* pointer of current doorbell cacheline */
 520	db = __get_doorbell(client);
 521
 522	/*
 523	 * We're not expecting the doorbell cookie to change behind our back,
 524	 * we also need to treat 0 as a reserved value.
 525	 */
 526	cookie = READ_ONCE(db->cookie);
 527	WARN_ON_ONCE(xchg(&db->cookie, cookie + 1 ?: cookie + 2) != cookie);
 528
 529	/* XXX: doorbell was lost and need to acquire it again */
 530	GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED);
 531}
 532
 533static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 534{
 535	struct intel_guc_client *client = guc->execbuf_client;
 536	struct intel_engine_cs *engine = rq->engine;
 537	u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
 538	u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
 539
 540	spin_lock(&client->wq_lock);
 541
 542	guc_wq_item_append(client, engine->guc_id, ctx_desc,
 543			   ring_tail, rq->fence.seqno);
 544	guc_ring_doorbell(client);
 545
 546	client->submissions[engine->id] += 1;
 547
 548	spin_unlock(&client->wq_lock);
 549}
 550
 551/*
 552 * When we're doing submissions using regular execlists backend, writing to
 553 * ELSP from CPU side is enough to make sure that writes to ringbuffer pages
 554 * pinned in mappable aperture portion of GGTT are visible to command streamer.
 555 * Writes done by GuC on our behalf are not guaranteeing such ordering,
 556 * therefore, to ensure the flush, we're issuing a POSTING READ.
 557 */
 558static void flush_ggtt_writes(struct i915_vma *vma)
 559{
 560	struct drm_i915_private *i915 = vma->vm->i915;
 561
 562	if (i915_vma_is_map_and_fenceable(vma))
 563		intel_uncore_posting_read_fw(&i915->uncore, GUC_STATUS);
 564}
 565
 566static void inject_preempt_context(struct work_struct *work)
 567{
 568	struct guc_preempt_work *preempt_work =
 569		container_of(work, typeof(*preempt_work), work);
 570	struct intel_engine_cs *engine = preempt_work->engine;
 571	struct intel_guc *guc = container_of(preempt_work, typeof(*guc),
 572					     preempt_work[engine->id]);
 573	struct intel_guc_client *client = guc->preempt_client;
 574	struct guc_stage_desc *stage_desc = __get_stage_desc(client);
 575	struct intel_context *ce = engine->preempt_context;
 576	u32 data[7];
 577
 578	if (!ce->ring->emit) { /* recreate upon load/resume */
 579		u32 addr = intel_hws_preempt_done_address(engine);
 580		u32 *cs;
 581
 582		cs = ce->ring->vaddr;
 583		if (engine->class == RENDER_CLASS) {
 584			cs = gen8_emit_ggtt_write_rcs(cs,
 585						      GUC_PREEMPT_FINISHED,
 586						      addr,
 587						      PIPE_CONTROL_CS_STALL);
 588		} else {
 589			cs = gen8_emit_ggtt_write(cs,
 590						  GUC_PREEMPT_FINISHED,
 591						  addr,
 592						  0);
 593			*cs++ = MI_NOOP;
 594			*cs++ = MI_NOOP;
 595		}
 596		*cs++ = MI_USER_INTERRUPT;
 597		*cs++ = MI_NOOP;
 598
 599		ce->ring->emit = GUC_PREEMPT_BREADCRUMB_BYTES;
 600		GEM_BUG_ON((void *)cs - ce->ring->vaddr != ce->ring->emit);
 601
 602		flush_ggtt_writes(ce->ring->vma);
 603	}
 604
 605	spin_lock_irq(&client->wq_lock);
 606	guc_wq_item_append(client, engine->guc_id, lower_32_bits(ce->lrc_desc),
 607			   GUC_PREEMPT_BREADCRUMB_BYTES / sizeof(u64), 0);
 608	spin_unlock_irq(&client->wq_lock);
 609
 610	/*
 611	 * If GuC firmware performs an engine reset while that engine had
 612	 * a preemption pending, it will set the terminated attribute bit
 613	 * on our preemption stage descriptor. GuC firmware retains all
 614	 * pending work items for a high-priority GuC client, unlike the
 615	 * normal-priority GuC client where work items are dropped. It
 616	 * wants to make sure the preempt-to-idle work doesn't run when
 617	 * scheduling resumes, and uses this bit to inform its scheduler
 618	 * and presumably us as well. Our job is to clear it for the next
 619	 * preemption after reset, otherwise that and future preemptions
 620	 * will never complete. We'll just clear it every time.
 621	 */
 622	stage_desc->attribute &= ~GUC_STAGE_DESC_ATTR_TERMINATED;
 623
 624	data[0] = INTEL_GUC_ACTION_REQUEST_PREEMPTION;
 625	data[1] = client->stage_id;
 626	data[2] = INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q |
 627		  INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q;
 628	data[3] = engine->guc_id;
 629	data[4] = guc->execbuf_client->priority;
 630	data[5] = guc->execbuf_client->stage_id;
 631	data[6] = intel_guc_ggtt_offset(guc, guc->shared_data);
 632
 633	if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) {
 634		execlists_clear_active(&engine->execlists,
 635				       EXECLISTS_ACTIVE_PREEMPT);
 636		tasklet_schedule(&engine->execlists.tasklet);
 637	}
 638
 639	(void)I915_SELFTEST_ONLY(engine->execlists.preempt_hang.count++);
 640}
 641
 642/*
 643 * We're using user interrupt and HWSP value to mark that preemption has
 644 * finished and GPU is idle. Normally, we could unwind and continue similar to
 645 * execlists submission path. Unfortunately, with GuC we also need to wait for
 646 * it to finish its own postprocessing, before attempting to submit. Otherwise
 647 * GuC may silently ignore our submissions, and thus we risk losing request at
 648 * best, executing out-of-order and causing kernel panic at worst.
 649 */
 650#define GUC_PREEMPT_POSTPROCESS_DELAY_MS 10
 651static void wait_for_guc_preempt_report(struct intel_engine_cs *engine)
 652{
 653	struct intel_guc *guc = &engine->i915->guc;
 654	struct guc_shared_ctx_data *data = guc->shared_data_vaddr;
 655	struct guc_ctx_report *report =
 656		&data->preempt_ctx_report[engine->guc_id];
 657
 658	if (wait_for_atomic(report->report_return_status ==
 659			    INTEL_GUC_REPORT_STATUS_COMPLETE,
 660			    GUC_PREEMPT_POSTPROCESS_DELAY_MS))
 661		DRM_ERROR("Timed out waiting for GuC preemption report\n");
 662	/*
 663	 * GuC is expecting that we're also going to clear the affected context
 664	 * counter, let's also reset the return status to not depend on GuC
 665	 * resetting it after recieving another preempt action
 666	 */
 667	report->affected_count = 0;
 668	report->report_return_status = INTEL_GUC_REPORT_STATUS_UNKNOWN;
 669}
 670
 671static void complete_preempt_context(struct intel_engine_cs *engine)
 672{
 673	struct intel_engine_execlists *execlists = &engine->execlists;
 674
 675	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
 676
 677	if (inject_preempt_hang(execlists))
 678		return;
 679
 680	execlists_cancel_port_requests(execlists);
 681	execlists_unwind_incomplete_requests(execlists);
 682
 683	wait_for_guc_preempt_report(engine);
 684	intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0);
 685}
 686
 687/**
 688 * guc_submit() - Submit commands through GuC
 689 * @engine: engine associated with the commands
 690 *
 691 * The only error here arises if the doorbell hardware isn't functioning
 692 * as expected, which really shouln't happen.
 693 */
 694static void guc_submit(struct intel_engine_cs *engine)
 695{
 696	struct intel_guc *guc = &engine->i915->guc;
 697	struct intel_engine_execlists * const execlists = &engine->execlists;
 698	struct execlist_port *port = execlists->port;
 699	unsigned int n;
 700
 701	for (n = 0; n < execlists_num_ports(execlists); n++) {
 702		struct i915_request *rq;
 703		unsigned int count;
 704
 705		rq = port_unpack(&port[n], &count);
 706		if (rq && count == 0) {
 707			port_set(&port[n], port_pack(rq, ++count));
 708
 709			flush_ggtt_writes(rq->ring->vma);
 710
 711			guc_add_request(guc, rq);
 712		}
 713	}
 714}
 715
 716static void port_assign(struct execlist_port *port, struct i915_request *rq)
 717{
 718	GEM_BUG_ON(port_isset(port));
 719
 720	port_set(port, i915_request_get(rq));
 721}
 722
 723static inline int rq_prio(const struct i915_request *rq)
 724{
 725	return rq->sched.attr.priority;
 726}
 727
 728static inline int port_prio(const struct execlist_port *port)
 729{
 730	return rq_prio(port_request(port)) | __NO_PREEMPTION;
 731}
 732
 733static bool __guc_dequeue(struct intel_engine_cs *engine)
 734{
 735	struct intel_engine_execlists * const execlists = &engine->execlists;
 736	struct execlist_port *port = execlists->port;
 737	struct i915_request *last = NULL;
 738	const struct execlist_port * const last_port =
 739		&execlists->port[execlists->port_mask];
 740	bool submit = false;
 741	struct rb_node *rb;
 742
 743	lockdep_assert_held(&engine->active.lock);
 744
 745	if (port_isset(port)) {
 746		if (intel_engine_has_preemption(engine)) {
 747			struct guc_preempt_work *preempt_work =
 748				&engine->i915->guc.preempt_work[engine->id];
 749			int prio = execlists->queue_priority_hint;
 750
 751			if (i915_scheduler_need_preempt(prio,
 752							port_prio(port))) {
 753				execlists_set_active(execlists,
 754						     EXECLISTS_ACTIVE_PREEMPT);
 755				queue_work(engine->i915->guc.preempt_wq,
 756					   &preempt_work->work);
 757				return false;
 758			}
 759		}
 760
 761		port++;
 762		if (port_isset(port))
 763			return false;
 764	}
 765	GEM_BUG_ON(port_isset(port));
 766
 767	while ((rb = rb_first_cached(&execlists->queue))) {
 768		struct i915_priolist *p = to_priolist(rb);
 769		struct i915_request *rq, *rn;
 770		int i;
 771
 772		priolist_for_each_request_consume(rq, rn, p, i) {
 773			if (last && rq->hw_context != last->hw_context) {
 774				if (port == last_port)
 775					goto done;
 776
 777				if (submit)
 778					port_assign(port, last);
 779				port++;
 780			}
 781
 782			list_del_init(&rq->sched.link);
 783
 784			__i915_request_submit(rq);
 785			trace_i915_request_in(rq, port_index(port, execlists));
 786
 787			last = rq;
 788			submit = true;
 789		}
 790
 791		rb_erase_cached(&p->node, &execlists->queue);
 792		i915_priolist_free(p);
 793	}
 794done:
 795	execlists->queue_priority_hint =
 796		rb ? to_priolist(rb)->priority : INT_MIN;
 797	if (submit)
 798		port_assign(port, last);
 799	if (last)
 800		execlists_user_begin(execlists, execlists->port);
 801
 802	/* We must always keep the beast fed if we have work piled up */
 803	GEM_BUG_ON(port_isset(execlists->port) &&
 804		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
 805	GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
 806		   !port_isset(execlists->port));
 807
 808	return submit;
 809}
 810
 811static void guc_dequeue(struct intel_engine_cs *engine)
 812{
 813	if (__guc_dequeue(engine))
 814		guc_submit(engine);
 815}
 816
 817static void guc_submission_tasklet(unsigned long data)
 818{
 819	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 820	struct intel_engine_execlists * const execlists = &engine->execlists;
 821	struct execlist_port *port = execlists->port;
 822	struct i915_request *rq;
 823	unsigned long flags;
 824
 825	spin_lock_irqsave(&engine->active.lock, flags);
 826
 827	rq = port_request(port);
 828	while (rq && i915_request_completed(rq)) {
 829		trace_i915_request_out(rq);
 830		i915_request_put(rq);
 831
 832		port = execlists_port_complete(execlists, port);
 833		if (port_isset(port)) {
 834			execlists_user_begin(execlists, port);
 835			rq = port_request(port);
 836		} else {
 837			execlists_user_end(execlists);
 838			rq = NULL;
 839		}
 840	}
 841
 842	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
 843	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
 844	    GUC_PREEMPT_FINISHED)
 845		complete_preempt_context(engine);
 846
 847	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
 848		guc_dequeue(engine);
 849
 850	spin_unlock_irqrestore(&engine->active.lock, flags);
 851}
 852
 853static void guc_reset_prepare(struct intel_engine_cs *engine)
 854{
 855	struct intel_engine_execlists * const execlists = &engine->execlists;
 856
 857	GEM_TRACE("%s\n", engine->name);
 858
 859	/*
 860	 * Prevent request submission to the hardware until we have
 861	 * completed the reset in i915_gem_reset_finish(). If a request
 862	 * is completed by one engine, it may then queue a request
 863	 * to a second via its execlists->tasklet *just* as we are
 864	 * calling engine->init_hw() and also writing the ELSP.
 865	 * Turning off the execlists->tasklet until the reset is over
 866	 * prevents the race.
 867	 */
 868	__tasklet_disable_sync_once(&execlists->tasklet);
 869
 870	/*
 871	 * We're using worker to queue preemption requests from the tasklet in
 872	 * GuC submission mode.
 873	 * Even though tasklet was disabled, we may still have a worker queued.
 874	 * Let's make sure that all workers scheduled before disabling the
 875	 * tasklet are completed before continuing with the reset.
 876	 */
 877	if (engine->i915->guc.preempt_wq)
 878		flush_workqueue(engine->i915->guc.preempt_wq);
 879}
 880
 881static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 882{
 883	struct intel_engine_execlists * const execlists = &engine->execlists;
 884	struct i915_request *rq;
 885	unsigned long flags;
 886
 887	spin_lock_irqsave(&engine->active.lock, flags);
 888
 889	execlists_cancel_port_requests(execlists);
 890
 891	/* Push back any incomplete requests for replay after the reset. */
 892	rq = execlists_unwind_incomplete_requests(execlists);
 893	if (!rq)
 894		goto out_unlock;
 895
 896	if (!i915_request_started(rq))
 897		stalled = false;
 898
 899	i915_reset_request(rq, stalled);
 900	intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled);
 901
 902out_unlock:
 903	spin_unlock_irqrestore(&engine->active.lock, flags);
 904}
 905
 906static void guc_cancel_requests(struct intel_engine_cs *engine)
 907{
 908	struct intel_engine_execlists * const execlists = &engine->execlists;
 909	struct i915_request *rq, *rn;
 910	struct rb_node *rb;
 911	unsigned long flags;
 912
 913	GEM_TRACE("%s\n", engine->name);
 914
 915	/*
 916	 * Before we call engine->cancel_requests(), we should have exclusive
 917	 * access to the submission state. This is arranged for us by the
 918	 * caller disabling the interrupt generation, the tasklet and other
 919	 * threads that may then access the same state, giving us a free hand
 920	 * to reset state. However, we still need to let lockdep be aware that
 921	 * we know this state may be accessed in hardirq context, so we
 922	 * disable the irq around this manipulation and we want to keep
 923	 * the spinlock focused on its duties and not accidentally conflate
 924	 * coverage to the submission's irq state. (Similarly, although we
 925	 * shouldn't need to disable irq around the manipulation of the
 926	 * submission's irq state, we also wish to remind ourselves that
 927	 * it is irq state.)
 928	 */
 929	spin_lock_irqsave(&engine->active.lock, flags);
 930
 931	/* Cancel the requests on the HW and clear the ELSP tracker. */
 932	execlists_cancel_port_requests(execlists);
 933
 934	/* Mark all executing requests as skipped. */
 935	list_for_each_entry(rq, &engine->active.requests, sched.link) {
 936		if (!i915_request_signaled(rq))
 937			dma_fence_set_error(&rq->fence, -EIO);
 938
 939		i915_request_mark_complete(rq);
 940	}
 941
 942	/* Flush the queued requests to the timeline list (for retiring). */
 943	while ((rb = rb_first_cached(&execlists->queue))) {
 944		struct i915_priolist *p = to_priolist(rb);
 945		int i;
 946
 947		priolist_for_each_request_consume(rq, rn, p, i) {
 948			list_del_init(&rq->sched.link);
 949			__i915_request_submit(rq);
 950			dma_fence_set_error(&rq->fence, -EIO);
 951			i915_request_mark_complete(rq);
 952		}
 953
 954		rb_erase_cached(&p->node, &execlists->queue);
 955		i915_priolist_free(p);
 956	}
 957
 958	/* Remaining _unready_ requests will be nop'ed when submitted */
 959
 960	execlists->queue_priority_hint = INT_MIN;
 961	execlists->queue = RB_ROOT_CACHED;
 962	GEM_BUG_ON(port_isset(execlists->port));
 963
 964	spin_unlock_irqrestore(&engine->active.lock, flags);
 965}
 966
 967static void guc_reset_finish(struct intel_engine_cs *engine)
 968{
 969	struct intel_engine_execlists * const execlists = &engine->execlists;
 970
 971	if (__tasklet_enable(&execlists->tasklet))
 972		/* And kick in case we missed a new request submission. */
 973		tasklet_hi_schedule(&execlists->tasklet);
 974
 975	GEM_TRACE("%s: depth->%d\n", engine->name,
 976		  atomic_read(&execlists->tasklet.count));
 977}
 978
 979/*
 980 * Everything below here is concerned with setup & teardown, and is
 981 * therefore not part of the somewhat time-critical batch-submission
 982 * path of guc_submit() above.
 983 */
 984
 985/* Check that a doorbell register is in the expected state */
 986static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
 987{
 988	bool valid;
 989
 990	GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS);
 991
 992	valid = __doorbell_valid(guc, db_id);
 993
 994	if (test_bit(db_id, guc->doorbell_bitmap) == valid)
 995		return true;
 996
 997	DRM_DEBUG_DRIVER("Doorbell %u has unexpected state: valid=%s\n",
 998			 db_id, yesno(valid));
 999
1000	return false;
1001}
1002
1003static bool guc_verify_doorbells(struct intel_guc *guc)
1004{
1005	bool doorbells_ok = true;
1006	u16 db_id;
1007
1008	for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id)
1009		if (!doorbell_ok(guc, db_id))
1010			doorbells_ok = false;
1011
1012	return doorbells_ok;
1013}
1014
1015/**
1016 * guc_client_alloc() - Allocate an intel_guc_client
1017 * @dev_priv:	driver private data structure
1018 * @engines:	The set of engines to enable for this client
1019 * @priority:	four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW
1020 *		The kernel client to replace ExecList submission is created with
1021 *		NORMAL priority. Priority of a client for scheduler can be HIGH,
1022 *		while a preemption context can use CRITICAL.
1023 * @ctx:	the context that owns the client (we use the default render
1024 *		context)
1025 *
1026 * Return:	An intel_guc_client object if success, else NULL.
1027 */
1028static struct intel_guc_client *
1029guc_client_alloc(struct drm_i915_private *dev_priv,
1030		 u32 engines,
1031		 u32 priority,
1032		 struct i915_gem_context *ctx)
1033{
1034	struct intel_guc_client *client;
1035	struct intel_guc *guc = &dev_priv->guc;
1036	struct i915_vma *vma;
1037	void *vaddr;
1038	int ret;
1039
1040	client = kzalloc(sizeof(*client), GFP_KERNEL);
1041	if (!client)
1042		return ERR_PTR(-ENOMEM);
1043
1044	client->guc = guc;
1045	client->owner = ctx;
1046	client->engines = engines;
1047	client->priority = priority;
1048	client->doorbell_id = GUC_DOORBELL_INVALID;
1049	spin_lock_init(&client->wq_lock);
1050
1051	ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS,
1052			     GFP_KERNEL);
1053	if (ret < 0)
1054		goto err_client;
1055
1056	client->stage_id = ret;
1057
1058	/* The first page is doorbell/proc_desc. Two followed pages are wq. */
1059	vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE);
1060	if (IS_ERR(vma)) {
1061		ret = PTR_ERR(vma);
1062		goto err_id;
1063	}
1064
1065	/* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
1066	client->vma = vma;
1067
1068	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1069	if (IS_ERR(vaddr)) {
1070		ret = PTR_ERR(vaddr);
1071		goto err_vma;
1072	}
1073	client->vaddr = vaddr;
1074
1075	ret = reserve_doorbell(client);
1076	if (ret)
1077		goto err_vaddr;
1078
1079	client->doorbell_offset = __select_cacheline(guc);
1080
1081	/*
1082	 * Since the doorbell only requires a single cacheline, we can save
1083	 * space by putting the application process descriptor in the same
1084	 * page. Use the half of the page that doesn't include the doorbell.
1085	 */
1086	if (client->doorbell_offset >= (GUC_DB_SIZE / 2))
1087		client->proc_desc_offset = 0;
1088	else
1089		client->proc_desc_offset = (GUC_DB_SIZE / 2);
1090
1091	DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: stage_id %u\n",
1092			 priority, client, client->engines, client->stage_id);
1093	DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n",
1094			 client->doorbell_id, client->doorbell_offset);
1095
1096	return client;
1097
1098err_vaddr:
1099	i915_gem_object_unpin_map(client->vma->obj);
1100err_vma:
1101	i915_vma_unpin_and_release(&client->vma, 0);
1102err_id:
1103	ida_simple_remove(&guc->stage_ids, client->stage_id);
1104err_client:
1105	kfree(client);
1106	return ERR_PTR(ret);
1107}
1108
1109static void guc_client_free(struct intel_guc_client *client)
1110{
1111	unreserve_doorbell(client);
1112	i915_vma_unpin_and_release(&client->vma, I915_VMA_RELEASE_MAP);
1113	ida_simple_remove(&client->guc->stage_ids, client->stage_id);
1114	kfree(client);
1115}
1116
1117static inline bool ctx_save_restore_disabled(struct intel_context *ce)
1118{
1119	u32 sr = ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1];
1120
1121#define SR_DISABLED \
1122	_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | \
1123			   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)
1124
1125	return (sr & SR_DISABLED) == SR_DISABLED;
1126
1127#undef SR_DISABLED
1128}
1129
1130static int guc_clients_create(struct intel_guc *guc)
1131{
1132	struct drm_i915_private *dev_priv = guc_to_i915(guc);
1133	struct intel_guc_client *client;
1134
1135	GEM_BUG_ON(guc->execbuf_client);
1136	GEM_BUG_ON(guc->preempt_client);
1137
1138	client = guc_client_alloc(dev_priv,
1139				  INTEL_INFO(dev_priv)->engine_mask,
1140				  GUC_CLIENT_PRIORITY_KMD_NORMAL,
1141				  dev_priv->kernel_context);
1142	if (IS_ERR(client)) {
1143		DRM_ERROR("Failed to create GuC client for submission!\n");
1144		return PTR_ERR(client);
1145	}
1146	guc->execbuf_client = client;
1147
1148	if (dev_priv->preempt_context) {
1149		client = guc_client_alloc(dev_priv,
1150					  INTEL_INFO(dev_priv)->engine_mask,
1151					  GUC_CLIENT_PRIORITY_KMD_HIGH,
1152					  dev_priv->preempt_context);
1153		if (IS_ERR(client)) {
1154			DRM_ERROR("Failed to create GuC client for preemption!\n");
1155			guc_client_free(guc->execbuf_client);
1156			guc->execbuf_client = NULL;
1157			return PTR_ERR(client);
1158		}
1159		guc->preempt_client = client;
1160	}
1161
1162	return 0;
1163}
1164
1165static void guc_clients_destroy(struct intel_guc *guc)
1166{
1167	struct intel_guc_client *client;
1168
1169	client = fetch_and_zero(&guc->preempt_client);
1170	if (client)
1171		guc_client_free(client);
1172
1173	client = fetch_and_zero(&guc->execbuf_client);
1174	if (client)
1175		guc_client_free(client);
1176}
1177
1178static int __guc_client_enable(struct intel_guc_client *client)
1179{
1180	int ret;
1181
1182	guc_proc_desc_init(client);
1183	guc_stage_desc_init(client);
1184
1185	ret = create_doorbell(client);
1186	if (ret)
1187		goto fail;
1188
1189	return 0;
1190
1191fail:
1192	guc_stage_desc_fini(client);
1193	guc_proc_desc_fini(client);
1194	return ret;
1195}
1196
1197static void __guc_client_disable(struct intel_guc_client *client)
1198{
1199	/*
1200	 * By the time we're here, GuC may have already been reset. if that is
1201	 * the case, instead of trying (in vain) to communicate with it, let's
1202	 * just cleanup the doorbell HW and our internal state.
1203	 */
1204	if (intel_guc_is_loaded(client->guc))
1205		destroy_doorbell(client);
1206	else
1207		__fini_doorbell(client);
1208
1209	guc_stage_desc_fini(client);
1210	guc_proc_desc_fini(client);
1211}
1212
1213static int guc_clients_enable(struct intel_guc *guc)
1214{
1215	int ret;
1216
1217	ret = __guc_client_enable(guc->execbuf_client);
1218	if (ret)
1219		return ret;
1220
1221	if (guc->preempt_client) {
1222		ret = __guc_client_enable(guc->preempt_client);
1223		if (ret) {
1224			__guc_client_disable(guc->execbuf_client);
1225			return ret;
1226		}
1227	}
1228
1229	return 0;
1230}
1231
1232static void guc_clients_disable(struct intel_guc *guc)
1233{
1234	if (guc->preempt_client)
1235		__guc_client_disable(guc->preempt_client);
1236
1237	if (guc->execbuf_client)
1238		__guc_client_disable(guc->execbuf_client);
1239}
1240
1241/*
1242 * Set up the memory resources to be shared with the GuC (via the GGTT)
1243 * at firmware loading time.
1244 */
1245int intel_guc_submission_init(struct intel_guc *guc)
1246{
1247	struct drm_i915_private *dev_priv = guc_to_i915(guc);
1248	struct intel_engine_cs *engine;
1249	enum intel_engine_id id;
1250	int ret;
1251
1252	if (guc->stage_desc_pool)
1253		return 0;
1254
1255	ret = guc_stage_desc_pool_create(guc);
1256	if (ret)
1257		return ret;
1258	/*
1259	 * Keep static analysers happy, let them know that we allocated the
1260	 * vma after testing that it didn't exist earlier.
1261	 */
1262	GEM_BUG_ON(!guc->stage_desc_pool);
1263
1264	WARN_ON(!guc_verify_doorbells(guc));
1265	ret = guc_clients_create(guc);
1266	if (ret)
1267		goto err_pool;
1268
1269	for_each_engine(engine, dev_priv, id) {
1270		guc->preempt_work[id].engine = engine;
1271		INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context);
1272	}
1273
1274	return 0;
1275
1276err_pool:
1277	guc_stage_desc_pool_destroy(guc);
1278	return ret;
1279}
1280
1281void intel_guc_submission_fini(struct intel_guc *guc)
1282{
1283	struct drm_i915_private *dev_priv = guc_to_i915(guc);
1284	struct intel_engine_cs *engine;
1285	enum intel_engine_id id;
1286
1287	for_each_engine(engine, dev_priv, id)
1288		cancel_work_sync(&guc->preempt_work[id].work);
1289
1290	guc_clients_destroy(guc);
1291	WARN_ON(!guc_verify_doorbells(guc));
1292
1293	if (guc->stage_desc_pool)
1294		guc_stage_desc_pool_destroy(guc);
1295}
1296
1297static void guc_interrupts_capture(struct drm_i915_private *dev_priv)
1298{
1299	struct intel_rps *rps = &dev_priv->gt_pm.rps;
1300	struct intel_engine_cs *engine;
1301	enum intel_engine_id id;
1302	int irqs;
1303
1304	/* tell all command streamers to forward interrupts (but not vblank)
1305	 * to GuC
1306	 */
1307	irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING);
1308	for_each_engine(engine, dev_priv, id)
1309		ENGINE_WRITE(engine, RING_MODE_GEN7, irqs);
1310
1311	/* route USER_INTERRUPT to Host, all others are sent to GuC. */
1312	irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
1313	       GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
1314	/* These three registers have the same bit definitions */
1315	I915_WRITE(GUC_BCS_RCS_IER, ~irqs);
1316	I915_WRITE(GUC_VCS2_VCS1_IER, ~irqs);
1317	I915_WRITE(GUC_WD_VECS_IER, ~irqs);
1318
1319	/*
1320	 * The REDIRECT_TO_GUC bit of the PMINTRMSK register directs all
1321	 * (unmasked) PM interrupts to the GuC. All other bits of this
1322	 * register *disable* generation of a specific interrupt.
1323	 *
1324	 * 'pm_intrmsk_mbz' indicates bits that are NOT to be set when
1325	 * writing to the PM interrupt mask register, i.e. interrupts
1326	 * that must not be disabled.
1327	 *
1328	 * If the GuC is handling these interrupts, then we must not let
1329	 * the PM code disable ANY interrupt that the GuC is expecting.
1330	 * So for each ENABLED (0) bit in this register, we must SET the
1331	 * bit in pm_intrmsk_mbz so that it's left enabled for the GuC.
1332	 * GuC needs ARAT expired interrupt unmasked hence it is set in
1333	 * pm_intrmsk_mbz.
1334	 *
1335	 * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will
1336	 * result in the register bit being left SET!
1337	 */
1338	rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK;
1339	rps->pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
1340}
1341
1342static void guc_interrupts_release(struct drm_i915_private *dev_priv)
1343{
1344	struct intel_rps *rps = &dev_priv->gt_pm.rps;
1345	struct intel_engine_cs *engine;
1346	enum intel_engine_id id;
1347	int irqs;
1348
1349	/*
1350	 * tell all command streamers NOT to forward interrupts or vblank
1351	 * to GuC.
1352	 */
1353	irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER);
1354	irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING);
1355	for_each_engine(engine, dev_priv, id)
1356		ENGINE_WRITE(engine, RING_MODE_GEN7, irqs);
1357
1358	/* route all GT interrupts to the host */
1359	I915_WRITE(GUC_BCS_RCS_IER, 0);
1360	I915_WRITE(GUC_VCS2_VCS1_IER, 0);
1361	I915_WRITE(GUC_WD_VECS_IER, 0);
1362
1363	rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
1364	rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK;
1365}
1366
1367static void guc_submission_park(struct intel_engine_cs *engine)
1368{
1369	intel_engine_park(engine);
1370	intel_engine_unpin_breadcrumbs_irq(engine);
1371	engine->flags &= ~I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
1372}
1373
1374static void guc_submission_unpark(struct intel_engine_cs *engine)
1375{
1376	engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
1377	intel_engine_pin_breadcrumbs_irq(engine);
1378}
1379
1380static void guc_set_default_submission(struct intel_engine_cs *engine)
1381{
1382	/*
1383	 * We inherit a bunch of functions from execlists that we'd like
1384	 * to keep using:
1385	 *
1386	 *    engine->submit_request = execlists_submit_request;
1387	 *    engine->cancel_requests = execlists_cancel_requests;
1388	 *    engine->schedule = execlists_schedule;
1389	 *
1390	 * But we need to override the actual submission backend in order
1391	 * to talk to the GuC.
1392	 */
1393	intel_execlists_set_default_submission(engine);
1394
1395	engine->execlists.tasklet.func = guc_submission_tasklet;
1396
1397	engine->park = guc_submission_park;
1398	engine->unpark = guc_submission_unpark;
1399
1400	engine->reset.prepare = guc_reset_prepare;
1401	engine->reset.reset = guc_reset;
1402	engine->reset.finish = guc_reset_finish;
1403
1404	engine->cancel_requests = guc_cancel_requests;
1405
1406	engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
1407}
1408
1409int intel_guc_submission_enable(struct intel_guc *guc)
1410{
1411	struct drm_i915_private *dev_priv = guc_to_i915(guc);
1412	struct intel_engine_cs *engine;
1413	enum intel_engine_id id;
1414	int err;
1415
1416	/*
1417	 * We're using GuC work items for submitting work through GuC. Since
1418	 * we're coalescing multiple requests from a single context into a
1419	 * single work item prior to assigning it to execlist_port, we can
1420	 * never have more work items than the total number of ports (for all
1421	 * engines). The GuC firmware is controlling the HEAD of work queue,
1422	 * and it is guaranteed that it will remove the work item from the
1423	 * queue before our request is completed.
1424	 */
1425	BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.port) *
1426		     sizeof(struct guc_wq_item) *
1427		     I915_NUM_ENGINES > GUC_WQ_SIZE);
1428
1429	GEM_BUG_ON(!guc->execbuf_client);
1430
1431	err = guc_clients_enable(guc);
1432	if (err)
1433		return err;
1434
1435	/* Take over from manual control of ELSP (execlists) */
1436	guc_interrupts_capture(dev_priv);
1437
1438	for_each_engine(engine, dev_priv, id) {
1439		engine->set_default_submission = guc_set_default_submission;
1440		engine->set_default_submission(engine);
1441	}
1442
1443	return 0;
1444}
1445
1446void intel_guc_submission_disable(struct intel_guc *guc)
1447{
1448	struct drm_i915_private *dev_priv = guc_to_i915(guc);
1449
1450	GEM_BUG_ON(dev_priv->gt.awake); /* GT should be parked first */
1451
1452	guc_interrupts_release(dev_priv);
1453	guc_clients_disable(guc);
1454}
1455
1456#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1457#include "selftests/intel_guc.c"
1458#endif
Configure Feed

Configure Feed