Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'drm-intel-testing-2017-04-03' of git://anongit.freedesktop.org/git/drm-intel into drm-next

Last 4.12 feature pile:

GVT updates:
- Add mdev attribute group for per-vgpu info
- Time slice based vGPU scheduling QoS support (Gao Ping)
- Initial KBL support for E3 server (Han Xu)
- other misc.

i915:
- lots and lots of small fixes and improvements all over
- refactor fw_domain code (Chris Wilson)
- improve guc code (Oscar Mateo)
- refactor cursor/sprite code, precompute more for less overhead in
the critical path (Ville)
- refactor guc/huc fw loading code a bit (Michal Wajdeczko)

* tag 'drm-intel-testing-2017-04-03' of git://anongit.freedesktop.org/git/drm-intel: (121 commits)
drm/i915: Update DRIVER_DATE to 20170403
drm/i915: Clear gt.active_requests before checking idle status
drm/i915/uc: Drop use of MISSING_CASE on trivial enums
drm/i915: make a few DDI functions static
drm/i915: Combine reset_all_global_seqno() loops into one
drm/i915: Remove redudant wait for each engine to idle from seqno wrap
drm/i915: Wait for all engines to be idle as part of i915_gem_wait_for_idle()
drm/i915: Move retire-requests into i915_gem_wait_for_idle()
drm/i915/uc: Move fw path check to fetch_uc_fw()
drm/i915/huc: Remove unused intel_huc_fini()
drm/i915/uc: Add intel_uc_fw_fini()
drm/i915/uc: Add intel_uc_fw_type_repr()
drm/i915/uc: Move intel_uc_fw_status_repr() to intel_uc.h
drivers: gpu: drm: i915L intel_lpe_audio: Fix kerneldoc comments
drm/i915: Suppress busy status for engines if wedged
drm/i915: Do request retirement before marking engines as wedged
drm/i915: Drop verbose and archaic "ring" from our internal engine names
drm/i915: Use a dummy timeline name for a signaled fence
drm/i915: Ironlake do_idle_maps w/a may be called w/o struct_mutex
drm/i915/guc: Take enable_guc_loading check out of GEM core code
...

+2633 -2029
+28 -14
drivers/gpu/drm/i915/gvt/cmd_parser.c
··· 1215 1215 if (!info->async_flip) 1216 1216 return 0; 1217 1217 1218 - if (IS_SKYLAKE(dev_priv)) { 1218 + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { 1219 1219 stride = vgpu_vreg(s->vgpu, info->stride_reg) & GENMASK(9, 0); 1220 1220 tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) & 1221 1221 GENMASK(12, 10)) >> 10; ··· 1243 1243 1244 1244 set_mask_bits(&vgpu_vreg(vgpu, info->surf_reg), GENMASK(31, 12), 1245 1245 info->surf_val << 12); 1246 - if (IS_SKYLAKE(dev_priv)) { 1246 + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { 1247 1247 set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(9, 0), 1248 1248 info->stride_val); 1249 1249 set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(12, 10), ··· 1267 1267 1268 1268 if (IS_BROADWELL(dev_priv)) 1269 1269 return gen8_decode_mi_display_flip(s, info); 1270 - if (IS_SKYLAKE(dev_priv)) 1270 + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) 1271 1271 return skl_decode_mi_display_flip(s, info); 1272 1272 1273 1273 return -ENODEV; ··· 1278 1278 { 1279 1279 struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; 1280 1280 1281 - if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv)) 1281 + if (IS_BROADWELL(dev_priv) 1282 + || IS_SKYLAKE(dev_priv) 1283 + || IS_KABYLAKE(dev_priv)) 1282 1284 return gen8_check_mi_display_flip(s, info); 1283 1285 return -ENODEV; 1284 1286 } ··· 1291 1289 { 1292 1290 struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; 1293 1291 1294 - if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv)) 1292 + if (IS_BROADWELL(dev_priv) 1293 + || IS_SKYLAKE(dev_priv) 1294 + || IS_KABYLAKE(dev_priv)) 1295 1295 return gen8_update_plane_mmio_from_mi_display_flip(s, info); 1296 1296 return -ENODEV; 1297 1297 } ··· 1573 1569 { 1574 1570 struct intel_gvt *gvt = s->vgpu->gvt; 1575 1571 1576 - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { 1572 + if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) 1573 + || IS_KABYLAKE(gvt->dev_priv)) { 1577 1574 /* BDW decides privilege based on address space */ 1578 1575 if (cmd_val(s, 0) & (1 << 8)) 1579 1576 return 0; ··· 2609 2604 unsigned long gma_head, gma_tail, gma_bottom, ring_size, ring_tail; 2610 2605 struct parser_exec_state s; 2611 2606 int ret = 0; 2607 + struct intel_vgpu_workload *workload = container_of(wa_ctx, 2608 + struct intel_vgpu_workload, 2609 + wa_ctx); 2612 2610 2613 2611 /* ring base is page aligned */ 2614 2612 if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma, GTT_PAGE_SIZE))) ··· 2626 2618 2627 2619 s.buf_type = RING_BUFFER_INSTRUCTION; 2628 2620 s.buf_addr_type = GTT_BUFFER; 2629 - s.vgpu = wa_ctx->workload->vgpu; 2630 - s.ring_id = wa_ctx->workload->ring_id; 2621 + s.vgpu = workload->vgpu; 2622 + s.ring_id = workload->ring_id; 2631 2623 s.ring_start = wa_ctx->indirect_ctx.guest_gma; 2632 2624 s.ring_size = ring_size; 2633 2625 s.ring_head = gma_head; 2634 2626 s.ring_tail = gma_tail; 2635 2627 s.rb_va = wa_ctx->indirect_ctx.shadow_va; 2636 - s.workload = wa_ctx->workload; 2628 + s.workload = workload; 2637 2629 2638 2630 ret = ip_gma_set(&s, gma_head); 2639 2631 if (ret) ··· 2716 2708 { 2717 2709 int ctx_size = wa_ctx->indirect_ctx.size; 2718 2710 unsigned long guest_gma = wa_ctx->indirect_ctx.guest_gma; 2719 - struct intel_vgpu *vgpu = wa_ctx->workload->vgpu; 2711 + struct intel_vgpu_workload *workload = container_of(wa_ctx, 2712 + struct intel_vgpu_workload, 2713 + wa_ctx); 2714 + struct intel_vgpu *vgpu = workload->vgpu; 2720 2715 struct drm_i915_gem_object *obj; 2721 2716 int ret = 0; 2722 2717 void *map; 2723 2718 2724 - obj = i915_gem_object_create(wa_ctx->workload->vgpu->gvt->dev_priv, 2719 + obj = i915_gem_object_create(workload->vgpu->gvt->dev_priv, 2725 2720 roundup(ctx_size + CACHELINE_BYTES, 2726 2721 PAGE_SIZE)); 2727 2722 if (IS_ERR(obj)) ··· 2744 2733 goto unmap_src; 2745 2734 } 2746 2735 2747 - ret = copy_gma_to_hva(wa_ctx->workload->vgpu, 2748 - wa_ctx->workload->vgpu->gtt.ggtt_mm, 2736 + ret = copy_gma_to_hva(workload->vgpu, 2737 + workload->vgpu->gtt.ggtt_mm, 2749 2738 guest_gma, guest_gma + ctx_size, 2750 2739 map); 2751 2740 if (ret < 0) { ··· 2783 2772 int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) 2784 2773 { 2785 2774 int ret; 2786 - struct intel_vgpu *vgpu = wa_ctx->workload->vgpu; 2775 + struct intel_vgpu_workload *workload = container_of(wa_ctx, 2776 + struct intel_vgpu_workload, 2777 + wa_ctx); 2778 + struct intel_vgpu *vgpu = workload->vgpu; 2787 2779 2788 2780 if (wa_ctx->indirect_ctx.size == 0) 2789 2781 return 0;
+17 -5
drivers/gpu/drm/i915/gvt/display.c
··· 161 161 162 162 #define DPCD_HEADER_SIZE 0xb 163 163 164 + /* let the virtual display supports DP1.2 */ 164 165 static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = { 165 - 0x11, 0x0a, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 166 + 0x12, 0x014, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 166 167 }; 167 168 168 169 static void emulate_monitor_status_change(struct intel_vgpu *vgpu) ··· 173 172 SDE_PORTC_HOTPLUG_CPT | 174 173 SDE_PORTD_HOTPLUG_CPT); 175 174 176 - if (IS_SKYLAKE(dev_priv)) 175 + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { 177 176 vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT | 178 177 SDE_PORTE_HOTPLUG_SPT); 178 + vgpu_vreg(vgpu, SKL_FUSE_STATUS) |= 179 + SKL_FUSE_DOWNLOAD_STATUS | 180 + SKL_FUSE_PG0_DIST_STATUS | 181 + SKL_FUSE_PG1_DIST_STATUS | 182 + SKL_FUSE_PG2_DIST_STATUS; 183 + vgpu_vreg(vgpu, LCPLL1_CTL) |= 184 + LCPLL_PLL_ENABLE | 185 + LCPLL_PLL_LOCK; 186 + vgpu_vreg(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE; 187 + 188 + } 179 189 180 190 if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { 181 191 vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT; ··· 203 191 vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; 204 192 } 205 193 206 - if (IS_SKYLAKE(dev_priv) && 194 + if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && 207 195 intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) { 208 196 vgpu_vreg(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT; 209 197 } ··· 365 353 { 366 354 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; 367 355 368 - if (IS_SKYLAKE(dev_priv)) 356 + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) 369 357 clean_virtual_dp_monitor(vgpu, PORT_D); 370 358 else 371 359 clean_virtual_dp_monitor(vgpu, PORT_B); ··· 387 375 388 376 intel_vgpu_init_i2c_edid(vgpu); 389 377 390 - if (IS_SKYLAKE(dev_priv)) 378 + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) 391 379 return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D, 392 380 resolution); 393 381 else
+5 -4
drivers/gpu/drm/i915/gvt/execlist.c
··· 394 394 395 395 static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) 396 396 { 397 - int ring_id = wa_ctx->workload->ring_id; 398 - struct i915_gem_context *shadow_ctx = 399 - wa_ctx->workload->vgpu->shadow_ctx; 397 + struct intel_vgpu_workload *workload = container_of(wa_ctx, 398 + struct intel_vgpu_workload, 399 + wa_ctx); 400 + int ring_id = workload->ring_id; 401 + struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; 400 402 struct drm_i915_gem_object *ctx_obj = 401 403 shadow_ctx->engine[ring_id].state->obj; 402 404 struct execlist_ring_context *shadow_ring_context; ··· 682 680 CACHELINE_BYTES; 683 681 workload->wa_ctx.per_ctx.guest_gma = 684 682 per_ctx & PER_CTX_ADDR_MASK; 685 - workload->wa_ctx.workload = workload; 686 683 687 684 WARN_ON(workload->wa_ctx.indirect_ctx.size && !(per_ctx & 0x1)); 688 685 }
+2 -1
drivers/gpu/drm/i915/gvt/gtt.c
··· 2220 2220 2221 2221 gvt_dbg_core("init gtt\n"); 2222 2222 2223 - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { 2223 + if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) 2224 + || IS_KABYLAKE(gvt->dev_priv)) { 2224 2225 gvt->gtt.pte_ops = &gen8_gtt_pte_ops; 2225 2226 gvt->gtt.gma_ops = &gen8_gtt_gma_ops; 2226 2227 gvt->gtt.mm_alloc_page_table = gen8_mm_alloc_page_table;
+18 -1
drivers/gpu/drm/i915/gvt/gvt.c
··· 106 106 struct intel_gvt_device_info *info = &gvt->device_info; 107 107 struct pci_dev *pdev = gvt->dev_priv->drm.pdev; 108 108 109 - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { 109 + if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) 110 + || IS_KABYLAKE(gvt->dev_priv)) { 110 111 info->max_support_vgpus = 8; 111 112 info->cfg_space_size = 256; 112 113 info->mmio_size = 2 * 1024 * 1024; ··· 143 142 mutex_lock(&gvt->lock); 144 143 intel_gvt_emulate_vblank(gvt); 145 144 mutex_unlock(&gvt->lock); 145 + } 146 + 147 + if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED, 148 + (void *)&gvt->service_request)) { 149 + intel_gvt_schedule(gvt); 146 150 } 147 151 } 148 152 ··· 202 196 203 197 idr_destroy(&gvt->vgpu_idr); 204 198 199 + intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); 200 + 205 201 kfree(dev_priv->gvt); 206 202 dev_priv->gvt = NULL; 207 203 } ··· 222 214 int intel_gvt_init_device(struct drm_i915_private *dev_priv) 223 215 { 224 216 struct intel_gvt *gvt; 217 + struct intel_vgpu *vgpu; 225 218 int ret; 226 219 227 220 /* ··· 294 285 gvt_err("failed to register gvt-g host device: %d\n", ret); 295 286 goto out_clean_types; 296 287 } 288 + 289 + vgpu = intel_gvt_create_idle_vgpu(gvt); 290 + if (IS_ERR(vgpu)) { 291 + ret = PTR_ERR(vgpu); 292 + gvt_err("failed to create idle vgpu\n"); 293 + goto out_clean_types; 294 + } 295 + gvt->idle_vgpu = vgpu; 297 296 298 297 gvt_dbg_core("gvt device initialization is done\n"); 299 298 dev_priv->gvt = gvt;
+13
drivers/gpu/drm/i915/gvt/gvt.h
··· 138 138 struct intel_vgpu_sbi sbi; 139 139 }; 140 140 141 + struct vgpu_sched_ctl { 142 + int weight; 143 + }; 144 + 141 145 struct intel_vgpu { 142 146 struct intel_gvt *gvt; 143 147 int id; ··· 151 147 bool failsafe; 152 148 bool resetting; 153 149 void *sched_data; 150 + struct vgpu_sched_ctl sched_ctl; 154 151 155 152 struct intel_vgpu_fence fence; 156 153 struct intel_vgpu_gm gm; ··· 165 160 struct list_head workload_q_head[I915_NUM_ENGINES]; 166 161 struct kmem_cache *workloads; 167 162 atomic_t running_workload_num; 163 + ktime_t last_ctx_submit_time; 168 164 DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); 169 165 struct i915_gem_context *shadow_ctx; 170 166 ··· 221 215 unsigned int low_gm_size; 222 216 unsigned int high_gm_size; 223 217 unsigned int fence; 218 + unsigned int weight; 224 219 enum intel_vgpu_edid resolution; 225 220 }; 226 221 ··· 243 236 DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS); 244 237 struct intel_vgpu_type *types; 245 238 unsigned int num_types; 239 + struct intel_vgpu *idle_vgpu; 246 240 247 241 struct task_struct *service_thread; 248 242 wait_queue_head_t service_thread_wq; ··· 257 249 258 250 enum { 259 251 INTEL_GVT_REQUEST_EMULATE_VBLANK = 0, 252 + INTEL_GVT_REQUEST_SCHED = 1, 260 253 }; 261 254 262 255 static inline void intel_gvt_request_service(struct intel_gvt *gvt, ··· 331 322 __u64 resolution; 332 323 __s32 primary; 333 324 __u64 vgpu_id; 325 + 326 + __u32 weight; 334 327 }; 335 328 336 329 int intel_vgpu_alloc_resource(struct intel_vgpu *vgpu, ··· 387 376 int intel_gvt_init_vgpu_types(struct intel_gvt *gvt); 388 377 void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt); 389 378 379 + struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt); 380 + void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu); 390 381 struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, 391 382 struct intel_vgpu_type *type); 392 383 void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu);
+191 -170
drivers/gpu/drm/i915/gvt/handlers.c
··· 68 68 return D_BDW; 69 69 else if (IS_SKYLAKE(gvt->dev_priv)) 70 70 return D_SKL; 71 + else if (IS_KABYLAKE(gvt->dev_priv)) 72 + return D_KBL; 71 73 72 74 return 0; 73 75 } ··· 236 234 old = vgpu_vreg(vgpu, offset); 237 235 new = CALC_MODE_MASK_REG(old, *(u32 *)p_data); 238 236 239 - if (IS_SKYLAKE(vgpu->gvt->dev_priv)) { 237 + if (IS_SKYLAKE(vgpu->gvt->dev_priv) 238 + || IS_KABYLAKE(vgpu->gvt->dev_priv)) { 240 239 switch (offset) { 241 240 case FORCEWAKE_RENDER_GEN9_REG: 242 241 ack_reg_offset = FORCEWAKE_ACK_RENDER_GEN9_REG; ··· 826 823 write_vreg(vgpu, offset, p_data, bytes); 827 824 data = vgpu_vreg(vgpu, offset); 828 825 829 - if (IS_SKYLAKE(vgpu->gvt->dev_priv) && 830 - offset != _REG_SKL_DP_AUX_CH_CTL(port_index)) { 826 + if ((IS_SKYLAKE(vgpu->gvt->dev_priv) 827 + || IS_KABYLAKE(vgpu->gvt->dev_priv)) 828 + && offset != _REG_SKL_DP_AUX_CH_CTL(port_index)) { 831 829 /* SKL DPB/C/D aux ctl register changed */ 832 830 return 0; 833 831 } else if (IS_BROADWELL(vgpu->gvt->dev_priv) && ··· 1307 1303 1308 1304 switch (cmd) { 1309 1305 case GEN9_PCODE_READ_MEM_LATENCY: 1310 - if (IS_SKYLAKE(vgpu->gvt->dev_priv)) { 1306 + if (IS_SKYLAKE(vgpu->gvt->dev_priv) 1307 + || IS_KABYLAKE(vgpu->gvt->dev_priv)) { 1311 1308 /** 1312 1309 * "Read memory latency" command on gen9. 1313 1310 * Below memory latency values are read ··· 1321 1316 } 1322 1317 break; 1323 1318 case SKL_PCODE_CDCLK_CONTROL: 1324 - if (IS_SKYLAKE(vgpu->gvt->dev_priv)) 1319 + if (IS_SKYLAKE(vgpu->gvt->dev_priv) 1320 + || IS_KABYLAKE(vgpu->gvt->dev_priv)) 1325 1321 *data0 = SKL_CDCLK_READY_FOR_CHANGE; 1326 1322 break; 1327 1323 case GEN6_PCODE_READ_RC6VIDS: ··· 1416 1410 1417 1411 execlist->elsp_dwords.data[execlist->elsp_dwords.index] = data; 1418 1412 if (execlist->elsp_dwords.index == 3) { 1413 + vgpu->last_ctx_submit_time = ktime_get(); 1419 1414 ret = intel_vgpu_submit_execlist(vgpu, ring_id); 1420 1415 if(ret) 1421 1416 gvt_vgpu_err("fail submit workload on ring %d\n", ··· 2591 2584 MMIO_DH(FORCEWAKE_MEDIA_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write); 2592 2585 MMIO_DH(FORCEWAKE_ACK_MEDIA_GEN9, D_SKL_PLUS, NULL, NULL); 2593 2586 2594 - MMIO_F(_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write); 2595 - MMIO_F(_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write); 2596 - MMIO_F(_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write); 2587 + MMIO_F(_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, 2588 + dp_aux_ch_ctl_mmio_write); 2589 + MMIO_F(_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, 2590 + dp_aux_ch_ctl_mmio_write); 2591 + MMIO_F(_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, 2592 + dp_aux_ch_ctl_mmio_write); 2597 2593 2598 - MMIO_D(HSW_PWR_WELL_BIOS, D_SKL); 2599 - MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL, NULL, skl_power_well_ctl_write); 2594 + MMIO_D(HSW_PWR_WELL_BIOS, D_SKL_PLUS); 2595 + MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL_PLUS, NULL, 2596 + skl_power_well_ctl_write); 2597 + MMIO_DH(GEN6_PCODE_MAILBOX, D_SKL_PLUS, NULL, mailbox_write); 2600 2598 2601 2599 MMIO_D(0xa210, D_SKL_PLUS); 2602 2600 MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); 2603 2601 MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); 2604 2602 MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); 2605 - MMIO_DH(0x4ddc, D_SKL, NULL, skl_misc_ctl_write); 2606 - MMIO_DH(0x42080, D_SKL, NULL, skl_misc_ctl_write); 2607 - MMIO_D(0x45504, D_SKL); 2608 - MMIO_D(0x45520, D_SKL); 2609 - MMIO_D(0x46000, D_SKL); 2610 - MMIO_DH(0x46010, D_SKL, NULL, skl_lcpll_write); 2611 - MMIO_DH(0x46014, D_SKL, NULL, skl_lcpll_write); 2612 - MMIO_D(0x6C040, D_SKL); 2613 - MMIO_D(0x6C048, D_SKL); 2614 - MMIO_D(0x6C050, D_SKL); 2615 - MMIO_D(0x6C044, D_SKL); 2616 - MMIO_D(0x6C04C, D_SKL); 2617 - MMIO_D(0x6C054, D_SKL); 2618 - MMIO_D(0x6c058, D_SKL); 2619 - MMIO_D(0x6c05c, D_SKL); 2620 - MMIO_DH(0X6c060, D_SKL, dpll_status_read, NULL); 2603 + MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, skl_misc_ctl_write); 2604 + MMIO_DH(0x42080, D_SKL_PLUS, NULL, skl_misc_ctl_write); 2605 + MMIO_D(0x45504, D_SKL_PLUS); 2606 + MMIO_D(0x45520, D_SKL_PLUS); 2607 + MMIO_D(0x46000, D_SKL_PLUS); 2608 + MMIO_DH(0x46010, D_SKL | D_KBL, NULL, skl_lcpll_write); 2609 + MMIO_DH(0x46014, D_SKL | D_KBL, NULL, skl_lcpll_write); 2610 + MMIO_D(0x6C040, D_SKL | D_KBL); 2611 + MMIO_D(0x6C048, D_SKL | D_KBL); 2612 + MMIO_D(0x6C050, D_SKL | D_KBL); 2613 + MMIO_D(0x6C044, D_SKL | D_KBL); 2614 + MMIO_D(0x6C04C, D_SKL | D_KBL); 2615 + MMIO_D(0x6C054, D_SKL | D_KBL); 2616 + MMIO_D(0x6c058, D_SKL | D_KBL); 2617 + MMIO_D(0x6c05c, D_SKL | D_KBL); 2618 + MMIO_DH(0X6c060, D_SKL | D_KBL, dpll_status_read, NULL); 2621 2619 2622 - MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL, NULL, pf_write); 2623 - MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL, NULL, pf_write); 2624 - MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 0), D_SKL, NULL, pf_write); 2625 - MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 1), D_SKL, NULL, pf_write); 2626 - MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 0), D_SKL, NULL, pf_write); 2627 - MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 1), D_SKL, NULL, pf_write); 2620 + MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write); 2621 + MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write); 2622 + MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write); 2623 + MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write); 2624 + MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write); 2625 + MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write); 2628 2626 2629 - MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 0), D_SKL, NULL, pf_write); 2630 - MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 1), D_SKL, NULL, pf_write); 2631 - MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 0), D_SKL, NULL, pf_write); 2632 - MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 1), D_SKL, NULL, pf_write); 2633 - MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 0), D_SKL, NULL, pf_write); 2634 - MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 1), D_SKL, NULL, pf_write); 2627 + MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write); 2628 + MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write); 2629 + MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write); 2630 + MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write); 2631 + MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write); 2632 + MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write); 2635 2633 2636 - MMIO_DH(SKL_PS_CTRL(PIPE_A, 0), D_SKL, NULL, pf_write); 2637 - MMIO_DH(SKL_PS_CTRL(PIPE_A, 1), D_SKL, NULL, pf_write); 2638 - MMIO_DH(SKL_PS_CTRL(PIPE_B, 0), D_SKL, NULL, pf_write); 2639 - MMIO_DH(SKL_PS_CTRL(PIPE_B, 1), D_SKL, NULL, pf_write); 2640 - MMIO_DH(SKL_PS_CTRL(PIPE_C, 0), D_SKL, NULL, pf_write); 2641 - MMIO_DH(SKL_PS_CTRL(PIPE_C, 1), D_SKL, NULL, pf_write); 2634 + MMIO_DH(SKL_PS_CTRL(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write); 2635 + MMIO_DH(SKL_PS_CTRL(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write); 2636 + MMIO_DH(SKL_PS_CTRL(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write); 2637 + MMIO_DH(SKL_PS_CTRL(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write); 2638 + MMIO_DH(SKL_PS_CTRL(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write); 2639 + MMIO_DH(SKL_PS_CTRL(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write); 2642 2640 2643 - MMIO_DH(PLANE_BUF_CFG(PIPE_A, 0), D_SKL, NULL, NULL); 2644 - MMIO_DH(PLANE_BUF_CFG(PIPE_A, 1), D_SKL, NULL, NULL); 2645 - MMIO_DH(PLANE_BUF_CFG(PIPE_A, 2), D_SKL, NULL, NULL); 2646 - MMIO_DH(PLANE_BUF_CFG(PIPE_A, 3), D_SKL, NULL, NULL); 2641 + MMIO_DH(PLANE_BUF_CFG(PIPE_A, 0), D_SKL_PLUS, NULL, NULL); 2642 + MMIO_DH(PLANE_BUF_CFG(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); 2643 + MMIO_DH(PLANE_BUF_CFG(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); 2644 + MMIO_DH(PLANE_BUF_CFG(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); 2647 2645 2648 - MMIO_DH(PLANE_BUF_CFG(PIPE_B, 0), D_SKL, NULL, NULL); 2649 - MMIO_DH(PLANE_BUF_CFG(PIPE_B, 1), D_SKL, NULL, NULL); 2650 - MMIO_DH(PLANE_BUF_CFG(PIPE_B, 2), D_SKL, NULL, NULL); 2651 - MMIO_DH(PLANE_BUF_CFG(PIPE_B, 3), D_SKL, NULL, NULL); 2646 + MMIO_DH(PLANE_BUF_CFG(PIPE_B, 0), D_SKL_PLUS, NULL, NULL); 2647 + MMIO_DH(PLANE_BUF_CFG(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); 2648 + MMIO_DH(PLANE_BUF_CFG(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); 2649 + MMIO_DH(PLANE_BUF_CFG(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); 2652 2650 2653 - MMIO_DH(PLANE_BUF_CFG(PIPE_C, 0), D_SKL, NULL, NULL); 2654 - MMIO_DH(PLANE_BUF_CFG(PIPE_C, 1), D_SKL, NULL, NULL); 2655 - MMIO_DH(PLANE_BUF_CFG(PIPE_C, 2), D_SKL, NULL, NULL); 2656 - MMIO_DH(PLANE_BUF_CFG(PIPE_C, 3), D_SKL, NULL, NULL); 2651 + MMIO_DH(PLANE_BUF_CFG(PIPE_C, 0), D_SKL_PLUS, NULL, NULL); 2652 + MMIO_DH(PLANE_BUF_CFG(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); 2653 + MMIO_DH(PLANE_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); 2654 + MMIO_DH(PLANE_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); 2657 2655 2658 - MMIO_DH(CUR_BUF_CFG(PIPE_A), D_SKL, NULL, NULL); 2659 - MMIO_DH(CUR_BUF_CFG(PIPE_B), D_SKL, NULL, NULL); 2660 - MMIO_DH(CUR_BUF_CFG(PIPE_C), D_SKL, NULL, NULL); 2656 + MMIO_DH(CUR_BUF_CFG(PIPE_A), D_SKL_PLUS, NULL, NULL); 2657 + MMIO_DH(CUR_BUF_CFG(PIPE_B), D_SKL_PLUS, NULL, NULL); 2658 + MMIO_DH(CUR_BUF_CFG(PIPE_C), D_SKL_PLUS, NULL, NULL); 2661 2659 2662 - MMIO_F(PLANE_WM(PIPE_A, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); 2663 - MMIO_F(PLANE_WM(PIPE_A, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); 2664 - MMIO_F(PLANE_WM(PIPE_A, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); 2660 + MMIO_F(PLANE_WM(PIPE_A, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); 2661 + MMIO_F(PLANE_WM(PIPE_A, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); 2662 + MMIO_F(PLANE_WM(PIPE_A, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); 2665 2663 2666 - MMIO_F(PLANE_WM(PIPE_B, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); 2667 - MMIO_F(PLANE_WM(PIPE_B, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); 2668 - MMIO_F(PLANE_WM(PIPE_B, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); 2664 + MMIO_F(PLANE_WM(PIPE_B, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); 2665 + MMIO_F(PLANE_WM(PIPE_B, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); 2666 + MMIO_F(PLANE_WM(PIPE_B, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); 2669 2667 2670 - MMIO_F(PLANE_WM(PIPE_C, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); 2671 - MMIO_F(PLANE_WM(PIPE_C, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); 2672 - MMIO_F(PLANE_WM(PIPE_C, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); 2668 + MMIO_F(PLANE_WM(PIPE_C, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); 2669 + MMIO_F(PLANE_WM(PIPE_C, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); 2670 + MMIO_F(PLANE_WM(PIPE_C, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); 2673 2671 2674 - MMIO_F(CUR_WM(PIPE_A, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); 2675 - MMIO_F(CUR_WM(PIPE_B, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); 2676 - MMIO_F(CUR_WM(PIPE_C, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); 2672 + MMIO_F(CUR_WM(PIPE_A, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); 2673 + MMIO_F(CUR_WM(PIPE_B, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); 2674 + MMIO_F(CUR_WM(PIPE_C, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); 2677 2675 2678 - MMIO_DH(PLANE_WM_TRANS(PIPE_A, 0), D_SKL, NULL, NULL); 2679 - MMIO_DH(PLANE_WM_TRANS(PIPE_A, 1), D_SKL, NULL, NULL); 2680 - MMIO_DH(PLANE_WM_TRANS(PIPE_A, 2), D_SKL, NULL, NULL); 2676 + MMIO_DH(PLANE_WM_TRANS(PIPE_A, 0), D_SKL_PLUS, NULL, NULL); 2677 + MMIO_DH(PLANE_WM_TRANS(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); 2678 + MMIO_DH(PLANE_WM_TRANS(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); 2681 2679 2682 - MMIO_DH(PLANE_WM_TRANS(PIPE_B, 0), D_SKL, NULL, NULL); 2683 - MMIO_DH(PLANE_WM_TRANS(PIPE_B, 1), D_SKL, NULL, NULL); 2684 - MMIO_DH(PLANE_WM_TRANS(PIPE_B, 2), D_SKL, NULL, NULL); 2680 + MMIO_DH(PLANE_WM_TRANS(PIPE_B, 0), D_SKL_PLUS, NULL, NULL); 2681 + MMIO_DH(PLANE_WM_TRANS(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); 2682 + MMIO_DH(PLANE_WM_TRANS(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); 2685 2683 2686 - MMIO_DH(PLANE_WM_TRANS(PIPE_C, 0), D_SKL, NULL, NULL); 2687 - MMIO_DH(PLANE_WM_TRANS(PIPE_C, 1), D_SKL, NULL, NULL); 2688 - MMIO_DH(PLANE_WM_TRANS(PIPE_C, 2), D_SKL, NULL, NULL); 2684 + MMIO_DH(PLANE_WM_TRANS(PIPE_C, 0), D_SKL_PLUS, NULL, NULL); 2685 + MMIO_DH(PLANE_WM_TRANS(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); 2686 + MMIO_DH(PLANE_WM_TRANS(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); 2689 2687 2690 - MMIO_DH(CUR_WM_TRANS(PIPE_A), D_SKL, NULL, NULL); 2691 - MMIO_DH(CUR_WM_TRANS(PIPE_B), D_SKL, NULL, NULL); 2692 - MMIO_DH(CUR_WM_TRANS(PIPE_C), D_SKL, NULL, NULL); 2688 + MMIO_DH(CUR_WM_TRANS(PIPE_A), D_SKL_PLUS, NULL, NULL); 2689 + MMIO_DH(CUR_WM_TRANS(PIPE_B), D_SKL_PLUS, NULL, NULL); 2690 + MMIO_DH(CUR_WM_TRANS(PIPE_C), D_SKL_PLUS, NULL, NULL); 2693 2691 2694 - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 0), D_SKL, NULL, NULL); 2695 - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 1), D_SKL, NULL, NULL); 2696 - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 2), D_SKL, NULL, NULL); 2697 - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 3), D_SKL, NULL, NULL); 2692 + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 0), D_SKL_PLUS, NULL, NULL); 2693 + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); 2694 + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); 2695 + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); 2698 2696 2699 - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 0), D_SKL, NULL, NULL); 2700 - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 1), D_SKL, NULL, NULL); 2701 - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 2), D_SKL, NULL, NULL); 2702 - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 3), D_SKL, NULL, NULL); 2697 + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 0), D_SKL_PLUS, NULL, NULL); 2698 + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); 2699 + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); 2700 + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); 2703 2701 2704 - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 0), D_SKL, NULL, NULL); 2705 - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 1), D_SKL, NULL, NULL); 2706 - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL, NULL, NULL); 2707 - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL, NULL, NULL); 2702 + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 0), D_SKL_PLUS, NULL, NULL); 2703 + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); 2704 + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); 2705 + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); 2708 2706 2709 - MMIO_DH(_REG_701C0(PIPE_A, 1), D_SKL, NULL, NULL); 2710 - MMIO_DH(_REG_701C0(PIPE_A, 2), D_SKL, NULL, NULL); 2711 - MMIO_DH(_REG_701C0(PIPE_A, 3), D_SKL, NULL, NULL); 2712 - MMIO_DH(_REG_701C0(PIPE_A, 4), D_SKL, NULL, NULL); 2707 + MMIO_DH(_REG_701C0(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); 2708 + MMIO_DH(_REG_701C0(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); 2709 + MMIO_DH(_REG_701C0(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); 2710 + MMIO_DH(_REG_701C0(PIPE_A, 4), D_SKL_PLUS, NULL, NULL); 2713 2711 2714 - MMIO_DH(_REG_701C0(PIPE_B, 1), D_SKL, NULL, NULL); 2715 - MMIO_DH(_REG_701C0(PIPE_B, 2), D_SKL, NULL, NULL); 2716 - MMIO_DH(_REG_701C0(PIPE_B, 3), D_SKL, NULL, NULL); 2717 - MMIO_DH(_REG_701C0(PIPE_B, 4), D_SKL, NULL, NULL); 2712 + MMIO_DH(_REG_701C0(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); 2713 + MMIO_DH(_REG_701C0(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); 2714 + MMIO_DH(_REG_701C0(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); 2715 + MMIO_DH(_REG_701C0(PIPE_B, 4), D_SKL_PLUS, NULL, NULL); 2718 2716 2719 - MMIO_DH(_REG_701C0(PIPE_C, 1), D_SKL, NULL, NULL); 2720 - MMIO_DH(_REG_701C0(PIPE_C, 2), D_SKL, NULL, NULL); 2721 - MMIO_DH(_REG_701C0(PIPE_C, 3), D_SKL, NULL, NULL); 2722 - MMIO_DH(_REG_701C0(PIPE_C, 4), D_SKL, NULL, NULL); 2717 + MMIO_DH(_REG_701C0(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); 2718 + MMIO_DH(_REG_701C0(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); 2719 + MMIO_DH(_REG_701C0(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); 2720 + MMIO_DH(_REG_701C0(PIPE_C, 4), D_SKL_PLUS, NULL, NULL); 2723 2721 2724 - MMIO_DH(_REG_701C4(PIPE_A, 1), D_SKL, NULL, NULL); 2725 - MMIO_DH(_REG_701C4(PIPE_A, 2), D_SKL, NULL, NULL); 2726 - MMIO_DH(_REG_701C4(PIPE_A, 3), D_SKL, NULL, NULL); 2727 - MMIO_DH(_REG_701C4(PIPE_A, 4), D_SKL, NULL, NULL); 2722 + MMIO_DH(_REG_701C4(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); 2723 + MMIO_DH(_REG_701C4(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); 2724 + MMIO_DH(_REG_701C4(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); 2725 + MMIO_DH(_REG_701C4(PIPE_A, 4), D_SKL_PLUS, NULL, NULL); 2728 2726 2729 - MMIO_DH(_REG_701C4(PIPE_B, 1), D_SKL, NULL, NULL); 2730 - MMIO_DH(_REG_701C4(PIPE_B, 2), D_SKL, NULL, NULL); 2731 - MMIO_DH(_REG_701C4(PIPE_B, 3), D_SKL, NULL, NULL); 2732 - MMIO_DH(_REG_701C4(PIPE_B, 4), D_SKL, NULL, NULL); 2727 + MMIO_DH(_REG_701C4(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); 2728 + MMIO_DH(_REG_701C4(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); 2729 + MMIO_DH(_REG_701C4(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); 2730 + MMIO_DH(_REG_701C4(PIPE_B, 4), D_SKL_PLUS, NULL, NULL); 2733 2731 2734 - MMIO_DH(_REG_701C4(PIPE_C, 1), D_SKL, NULL, NULL); 2735 - MMIO_DH(_REG_701C4(PIPE_C, 2), D_SKL, NULL, NULL); 2736 - MMIO_DH(_REG_701C4(PIPE_C, 3), D_SKL, NULL, NULL); 2737 - MMIO_DH(_REG_701C4(PIPE_C, 4), D_SKL, NULL, NULL); 2732 + MMIO_DH(_REG_701C4(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); 2733 + MMIO_DH(_REG_701C4(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); 2734 + MMIO_DH(_REG_701C4(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); 2735 + MMIO_DH(_REG_701C4(PIPE_C, 4), D_SKL_PLUS, NULL, NULL); 2738 2736 2739 - MMIO_D(0x70380, D_SKL); 2740 - MMIO_D(0x71380, D_SKL); 2741 - MMIO_D(0x72380, D_SKL); 2742 - MMIO_D(0x7039c, D_SKL); 2737 + MMIO_D(0x70380, D_SKL_PLUS); 2738 + MMIO_D(0x71380, D_SKL_PLUS); 2739 + MMIO_D(0x72380, D_SKL_PLUS); 2740 + MMIO_D(0x7039c, D_SKL_PLUS); 2743 2741 2744 - MMIO_F(0x80000, 0x3000, 0, 0, 0, D_SKL, NULL, NULL); 2745 - MMIO_D(0x8f074, D_SKL); 2746 - MMIO_D(0x8f004, D_SKL); 2747 - MMIO_D(0x8f034, D_SKL); 2742 + MMIO_F(0x80000, 0x3000, 0, 0, 0, D_SKL_PLUS, NULL, NULL); 2743 + MMIO_D(0x8f074, D_SKL | D_KBL); 2744 + MMIO_D(0x8f004, D_SKL | D_KBL); 2745 + MMIO_D(0x8f034, D_SKL | D_KBL); 2748 2746 2749 - MMIO_D(0xb11c, D_SKL); 2747 + MMIO_D(0xb11c, D_SKL | D_KBL); 2750 2748 2751 - MMIO_D(0x51000, D_SKL); 2752 - MMIO_D(0x6c00c, D_SKL); 2749 + MMIO_D(0x51000, D_SKL | D_KBL); 2750 + MMIO_D(0x6c00c, D_SKL_PLUS); 2753 2751 2754 - MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL); 2755 - MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL); 2752 + MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); 2753 + MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); 2756 2754 2757 - MMIO_D(0xd08, D_SKL); 2758 - MMIO_DFH(0x20e0, D_SKL, F_MODE_MASK, NULL, NULL); 2759 - MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); 2755 + MMIO_D(0xd08, D_SKL_PLUS); 2756 + MMIO_DFH(0x20e0, D_SKL_PLUS, F_MODE_MASK, NULL, NULL); 2757 + MMIO_DFH(0x20ec, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); 2760 2758 2761 2759 /* TRTT */ 2762 - MMIO_DFH(0x4de0, D_SKL, F_CMD_ACCESS, NULL, NULL); 2763 - MMIO_DFH(0x4de4, D_SKL, F_CMD_ACCESS, NULL, NULL); 2764 - MMIO_DFH(0x4de8, D_SKL, F_CMD_ACCESS, NULL, NULL); 2765 - MMIO_DFH(0x4dec, D_SKL, F_CMD_ACCESS, NULL, NULL); 2766 - MMIO_DFH(0x4df0, D_SKL, F_CMD_ACCESS, NULL, NULL); 2767 - MMIO_DFH(0x4df4, D_SKL, F_CMD_ACCESS, NULL, gen9_trtte_write); 2768 - MMIO_DH(0x4dfc, D_SKL, NULL, gen9_trtt_chicken_write); 2760 + MMIO_DFH(0x4de0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); 2761 + MMIO_DFH(0x4de4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); 2762 + MMIO_DFH(0x4de8, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); 2763 + MMIO_DFH(0x4dec, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); 2764 + MMIO_DFH(0x4df0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); 2765 + MMIO_DFH(0x4df4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, gen9_trtte_write); 2766 + MMIO_DH(0x4dfc, D_SKL | D_KBL, NULL, gen9_trtt_chicken_write); 2769 2767 2770 - MMIO_D(0x45008, D_SKL); 2768 + MMIO_D(0x45008, D_SKL | D_KBL); 2771 2769 2772 - MMIO_D(0x46430, D_SKL); 2770 + MMIO_D(0x46430, D_SKL | D_KBL); 2773 2771 2774 - MMIO_D(0x46520, D_SKL); 2772 + MMIO_D(0x46520, D_SKL | D_KBL); 2775 2773 2776 - MMIO_D(0xc403c, D_SKL); 2777 - MMIO_D(0xb004, D_SKL); 2774 + MMIO_D(0xc403c, D_SKL | D_KBL); 2775 + MMIO_D(0xb004, D_SKL_PLUS); 2778 2776 MMIO_DH(DMA_CTRL, D_SKL_PLUS, NULL, dma_ctrl_write); 2779 2777 2780 - MMIO_D(0x65900, D_SKL); 2781 - MMIO_D(0x1082c0, D_SKL); 2782 - MMIO_D(0x4068, D_SKL); 2783 - MMIO_D(0x67054, D_SKL); 2784 - MMIO_D(0x6e560, D_SKL); 2785 - MMIO_D(0x6e554, D_SKL); 2786 - MMIO_D(0x2b20, D_SKL); 2787 - MMIO_D(0x65f00, D_SKL); 2788 - MMIO_D(0x65f08, D_SKL); 2789 - MMIO_D(0x320f0, D_SKL); 2778 + MMIO_D(0x65900, D_SKL_PLUS); 2779 + MMIO_D(0x1082c0, D_SKL | D_KBL); 2780 + MMIO_D(0x4068, D_SKL | D_KBL); 2781 + MMIO_D(0x67054, D_SKL | D_KBL); 2782 + MMIO_D(0x6e560, D_SKL | D_KBL); 2783 + MMIO_D(0x6e554, D_SKL | D_KBL); 2784 + MMIO_D(0x2b20, D_SKL | D_KBL); 2785 + MMIO_D(0x65f00, D_SKL | D_KBL); 2786 + MMIO_D(0x65f08, D_SKL | D_KBL); 2787 + MMIO_D(0x320f0, D_SKL | D_KBL); 2790 2788 2791 - MMIO_DFH(_REG_VCS2_EXCC, D_SKL, F_CMD_ACCESS, NULL, NULL); 2792 - MMIO_D(0x70034, D_SKL); 2793 - MMIO_D(0x71034, D_SKL); 2794 - MMIO_D(0x72034, D_SKL); 2789 + MMIO_DFH(_REG_VCS2_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); 2790 + MMIO_DFH(_REG_VECS_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); 2791 + MMIO_D(0x70034, D_SKL_PLUS); 2792 + MMIO_D(0x71034, D_SKL_PLUS); 2793 + MMIO_D(0x72034, D_SKL_PLUS); 2795 2794 2796 - MMIO_D(_PLANE_KEYVAL_1(PIPE_A), D_SKL); 2797 - MMIO_D(_PLANE_KEYVAL_1(PIPE_B), D_SKL); 2798 - MMIO_D(_PLANE_KEYVAL_1(PIPE_C), D_SKL); 2799 - MMIO_D(_PLANE_KEYMSK_1(PIPE_A), D_SKL); 2800 - MMIO_D(_PLANE_KEYMSK_1(PIPE_B), D_SKL); 2801 - MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL); 2795 + MMIO_D(_PLANE_KEYVAL_1(PIPE_A), D_SKL_PLUS); 2796 + MMIO_D(_PLANE_KEYVAL_1(PIPE_B), D_SKL_PLUS); 2797 + MMIO_D(_PLANE_KEYVAL_1(PIPE_C), D_SKL_PLUS); 2798 + MMIO_D(_PLANE_KEYMSK_1(PIPE_A), D_SKL_PLUS); 2799 + MMIO_D(_PLANE_KEYMSK_1(PIPE_B), D_SKL_PLUS); 2800 + MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL_PLUS); 2802 2801 2803 - MMIO_D(0x44500, D_SKL); 2802 + MMIO_D(0x44500, D_SKL_PLUS); 2804 2803 MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); 2805 - MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL, F_MODE_MASK | F_CMD_ACCESS, 2804 + MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL | D_KBL, F_MODE_MASK | F_CMD_ACCESS, 2806 2805 NULL, NULL); 2806 + 2807 + MMIO_D(0x4ab8, D_KBL); 2808 + MMIO_D(0x940c, D_SKL_PLUS); 2809 + MMIO_D(0x2248, D_SKL_PLUS | D_KBL); 2810 + MMIO_D(0x4ab0, D_SKL | D_KBL); 2811 + MMIO_D(0x20d4, D_SKL | D_KBL); 2812 + 2807 2813 return 0; 2808 2814 } 2809 2815 ··· 2893 2873 ret = init_broadwell_mmio_info(gvt); 2894 2874 if (ret) 2895 2875 goto err; 2896 - } else if (IS_SKYLAKE(dev_priv)) { 2876 + } else if (IS_SKYLAKE(dev_priv) 2877 + || IS_KABYLAKE(dev_priv)) { 2897 2878 ret = init_broadwell_mmio_info(gvt); 2898 2879 if (ret) 2899 2880 goto err;
+3 -2
drivers/gpu/drm/i915/gvt/interrupt.c
··· 580 580 581 581 SET_BIT_INFO(irq, 4, PRIMARY_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C); 582 582 SET_BIT_INFO(irq, 5, SPRITE_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C); 583 - } else if (IS_SKYLAKE(gvt->dev_priv)) { 583 + } else if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) { 584 584 SET_BIT_INFO(irq, 25, AUX_CHANNEL_B, INTEL_GVT_IRQ_INFO_DE_PORT); 585 585 SET_BIT_INFO(irq, 26, AUX_CHANNEL_C, INTEL_GVT_IRQ_INFO_DE_PORT); 586 586 SET_BIT_INFO(irq, 27, AUX_CHANNEL_D, INTEL_GVT_IRQ_INFO_DE_PORT); ··· 690 690 691 691 gvt_dbg_core("init irq framework\n"); 692 692 693 - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { 693 + if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) 694 + || IS_KABYLAKE(gvt->dev_priv)) { 694 695 irq->ops = &gen8_irq_ops; 695 696 irq->irq_map = gen8_irq_map; 696 697 } else {
+36 -9
drivers/gpu/drm/i915/gvt/kvmgt.c
··· 295 295 return 0; 296 296 297 297 return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" 298 - "fence: %d\nresolution: %s\n", 298 + "fence: %d\nresolution: %s\n" 299 + "weight: %d\n", 299 300 BYTES_TO_MB(type->low_gm_size), 300 301 BYTES_TO_MB(type->high_gm_size), 301 - type->fence, vgpu_edid_str(type->resolution)); 302 + type->fence, vgpu_edid_str(type->resolution), 303 + type->weight); 302 304 } 303 305 304 306 static MDEV_TYPE_ATTR_RO(available_instances); ··· 1148 1146 return 0; 1149 1147 } 1150 1148 1149 + static ssize_t 1150 + vgpu_id_show(struct device *dev, struct device_attribute *attr, 1151 + char *buf) 1152 + { 1153 + struct mdev_device *mdev = mdev_from_dev(dev); 1154 + 1155 + if (mdev) { 1156 + struct intel_vgpu *vgpu = (struct intel_vgpu *) 1157 + mdev_get_drvdata(mdev); 1158 + return sprintf(buf, "%d\n", vgpu->id); 1159 + } 1160 + return sprintf(buf, "\n"); 1161 + } 1162 + 1163 + static DEVICE_ATTR_RO(vgpu_id); 1164 + 1165 + static struct attribute *intel_vgpu_attrs[] = { 1166 + &dev_attr_vgpu_id.attr, 1167 + NULL 1168 + }; 1169 + 1170 + static const struct attribute_group intel_vgpu_group = { 1171 + .name = "intel_vgpu", 1172 + .attrs = intel_vgpu_attrs, 1173 + }; 1174 + 1175 + static const struct attribute_group *intel_vgpu_groups[] = { 1176 + &intel_vgpu_group, 1177 + NULL, 1178 + }; 1179 + 1151 1180 static const struct mdev_parent_ops intel_vgpu_ops = { 1152 1181 .supported_type_groups = intel_vgpu_type_groups, 1182 + .mdev_attr_groups = intel_vgpu_groups, 1153 1183 .create = intel_vgpu_create, 1154 1184 .remove = intel_vgpu_remove, 1155 1185 ··· 1373 1339 1374 1340 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info) 1375 1341 { 1376 - struct intel_vgpu *vgpu = info->vgpu; 1377 - 1378 - if (!info) { 1379 - gvt_vgpu_err("kvmgt_guest_info invalid\n"); 1380 - return false; 1381 - } 1382 - 1383 1342 kvm_page_track_unregister_notifier(info->kvm, &info->track_node); 1384 1343 kvmgt_protect_table_destroy(info); 1385 1344 gvt_cache_destroy(info->vgpu);
+10 -9
drivers/gpu/drm/i915/gvt/mmio.h
··· 44 44 #define D_HSW (1 << 2) 45 45 #define D_BDW (1 << 3) 46 46 #define D_SKL (1 << 4) 47 + #define D_KBL (1 << 5) 47 48 48 - #define D_GEN9PLUS (D_SKL) 49 - #define D_GEN8PLUS (D_BDW | D_SKL) 50 - #define D_GEN75PLUS (D_HSW | D_BDW | D_SKL) 51 - #define D_GEN7PLUS (D_IVB | D_HSW | D_BDW | D_SKL) 49 + #define D_GEN9PLUS (D_SKL | D_KBL) 50 + #define D_GEN8PLUS (D_BDW | D_SKL | D_KBL) 51 + #define D_GEN75PLUS (D_HSW | D_BDW | D_SKL | D_KBL) 52 + #define D_GEN7PLUS (D_IVB | D_HSW | D_BDW | D_SKL | D_KBL) 52 53 53 - #define D_SKL_PLUS (D_SKL) 54 - #define D_BDW_PLUS (D_BDW | D_SKL) 55 - #define D_HSW_PLUS (D_HSW | D_BDW | D_SKL) 56 - #define D_IVB_PLUS (D_IVB | D_HSW | D_BDW | D_SKL) 54 + #define D_SKL_PLUS (D_SKL | D_KBL) 55 + #define D_BDW_PLUS (D_BDW | D_SKL | D_KBL) 56 + #define D_HSW_PLUS (D_HSW | D_BDW | D_SKL | D_KBL) 57 + #define D_IVB_PLUS (D_IVB | D_HSW | D_BDW | D_SKL | D_KBL) 57 58 58 59 #define D_PRE_BDW (D_SNB | D_IVB | D_HSW) 59 60 #define D_PRE_SKL (D_SNB | D_IVB | D_HSW | D_BDW) 60 - #define D_ALL (D_SNB | D_IVB | D_HSW | D_BDW | D_SKL) 61 + #define D_ALL (D_SNB | D_IVB | D_HSW | D_BDW | D_SKL | D_KBL) 61 62 62 63 struct intel_gvt_mmio_info { 63 64 u32 offset;
+18 -5
drivers/gpu/drm/i915/gvt/render.c
··· 126 126 {VCS2, _MMIO(0x1c028), 0xffff, false}, 127 127 128 128 {VECS, _MMIO(0x1a028), 0xffff, false}, 129 + 130 + {RCS, _MMIO(0x7304), 0xffff, true}, 131 + {RCS, _MMIO(0x2248), 0x0, false}, 132 + {RCS, _MMIO(0x940c), 0x0, false}, 133 + {RCS, _MMIO(0x4ab8), 0x0, false}, 134 + 135 + {RCS, _MMIO(0x4ab0), 0x0, false}, 136 + {RCS, _MMIO(0x20d4), 0x0, false}, 137 + 138 + {RCS, _MMIO(0xb004), 0x0, false}, 139 + {RCS, _MMIO(0x20a0), 0x0, false}, 140 + {RCS, _MMIO(0x20e4), 0xffff, false}, 129 141 }; 130 142 131 143 static u32 gen9_render_mocs[I915_NUM_ENGINES][64]; ··· 171 159 */ 172 160 fw = intel_uncore_forcewake_for_reg(dev_priv, reg, 173 161 FW_REG_READ | FW_REG_WRITE); 174 - if (ring_id == RCS && IS_SKYLAKE(dev_priv)) 162 + if (ring_id == RCS && (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) 175 163 fw |= FORCEWAKE_RENDER; 176 164 177 165 intel_uncore_forcewake_get(dev_priv, fw); ··· 204 192 if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) 205 193 return; 206 194 207 - if (!IS_SKYLAKE(dev_priv)) 195 + if (!(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) 208 196 return; 209 197 210 198 offset.reg = regs[ring_id]; ··· 242 230 if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) 243 231 return; 244 232 245 - if (!IS_SKYLAKE(dev_priv)) 233 + if (!(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) 246 234 return; 247 235 248 236 offset.reg = regs[ring_id]; ··· 277 265 u32 inhibit_mask = 278 266 _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 279 267 280 - if (IS_SKYLAKE(vgpu->gvt->dev_priv)) { 268 + if (IS_SKYLAKE(vgpu->gvt->dev_priv) 269 + || IS_KABYLAKE(vgpu->gvt->dev_priv)) { 281 270 mmio = gen9_render_mmio_list; 282 271 array_size = ARRAY_SIZE(gen9_render_mmio_list); 283 272 load_mocs(vgpu, ring_id); ··· 325 312 u32 v; 326 313 int i, array_size; 327 314 328 - if (IS_SKYLAKE(dev_priv)) { 315 + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { 329 316 mmio = gen9_render_mmio_list; 330 317 array_size = ARRAY_SIZE(gen9_render_mmio_list); 331 318 restore_mocs(vgpu, ring_id);
+167 -62
drivers/gpu/drm/i915/gvt/sched_policy.c
··· 47 47 return false; 48 48 } 49 49 50 + struct vgpu_sched_data { 51 + struct list_head lru_list; 52 + struct intel_vgpu *vgpu; 53 + 54 + ktime_t sched_in_time; 55 + ktime_t sched_out_time; 56 + ktime_t sched_time; 57 + ktime_t left_ts; 58 + ktime_t allocated_ts; 59 + 60 + struct vgpu_sched_ctl sched_ctl; 61 + }; 62 + 63 + struct gvt_sched_data { 64 + struct intel_gvt *gvt; 65 + struct hrtimer timer; 66 + unsigned long period; 67 + struct list_head lru_runq_head; 68 + }; 69 + 70 + static void vgpu_update_timeslice(struct intel_vgpu *pre_vgpu) 71 + { 72 + ktime_t delta_ts; 73 + struct vgpu_sched_data *vgpu_data = pre_vgpu->sched_data; 74 + 75 + delta_ts = vgpu_data->sched_out_time - vgpu_data->sched_in_time; 76 + 77 + vgpu_data->sched_time += delta_ts; 78 + vgpu_data->left_ts -= delta_ts; 79 + } 80 + 81 + #define GVT_TS_BALANCE_PERIOD_MS 100 82 + #define GVT_TS_BALANCE_STAGE_NUM 10 83 + 84 + static void gvt_balance_timeslice(struct gvt_sched_data *sched_data) 85 + { 86 + struct vgpu_sched_data *vgpu_data; 87 + struct list_head *pos; 88 + static uint64_t stage_check; 89 + int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM; 90 + 91 + /* The timeslice accumulation reset at stage 0, which is 92 + * allocated again without adding previous debt. 93 + */ 94 + if (stage == 0) { 95 + int total_weight = 0; 96 + ktime_t fair_timeslice; 97 + 98 + list_for_each(pos, &sched_data->lru_runq_head) { 99 + vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); 100 + total_weight += vgpu_data->sched_ctl.weight; 101 + } 102 + 103 + list_for_each(pos, &sched_data->lru_runq_head) { 104 + vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); 105 + fair_timeslice = ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS) * 106 + vgpu_data->sched_ctl.weight / 107 + total_weight; 108 + 109 + vgpu_data->allocated_ts = fair_timeslice; 110 + vgpu_data->left_ts = vgpu_data->allocated_ts; 111 + } 112 + } else { 113 + list_for_each(pos, &sched_data->lru_runq_head) { 114 + vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); 115 + 116 + /* timeslice for next 100ms should add the left/debt 117 + * slice of previous stages. 118 + */ 119 + vgpu_data->left_ts += vgpu_data->allocated_ts; 120 + } 121 + } 122 + } 123 + 50 124 static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) 51 125 { 52 126 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 53 127 enum intel_engine_id i; 54 128 struct intel_engine_cs *engine; 129 + struct vgpu_sched_data *vgpu_data; 130 + ktime_t cur_time; 55 131 56 132 /* no target to schedule */ 57 133 if (!scheduler->next_vgpu) ··· 153 77 gvt_dbg_sched("switch to next vgpu %d\n", 154 78 scheduler->next_vgpu->id); 155 79 80 + cur_time = ktime_get(); 81 + if (scheduler->current_vgpu) { 82 + vgpu_data = scheduler->current_vgpu->sched_data; 83 + vgpu_data->sched_out_time = cur_time; 84 + vgpu_update_timeslice(scheduler->current_vgpu); 85 + } 86 + vgpu_data = scheduler->next_vgpu->sched_data; 87 + vgpu_data->sched_in_time = cur_time; 88 + 156 89 /* switch current vgpu */ 157 90 scheduler->current_vgpu = scheduler->next_vgpu; 158 91 scheduler->next_vgpu = NULL; ··· 173 88 wake_up(&scheduler->waitq[i]); 174 89 } 175 90 176 - struct tbs_vgpu_data { 177 - struct list_head list; 178 - struct intel_vgpu *vgpu; 179 - /* put some per-vgpu sched stats here */ 180 - }; 181 - 182 - struct tbs_sched_data { 183 - struct intel_gvt *gvt; 184 - struct delayed_work work; 185 - unsigned long period; 186 - struct list_head runq_head; 187 - }; 188 - 189 - #define GVT_DEFAULT_TIME_SLICE (msecs_to_jiffies(1)) 190 - 191 - static void tbs_sched_func(struct work_struct *work) 91 + static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data) 192 92 { 193 - struct tbs_sched_data *sched_data = container_of(work, 194 - struct tbs_sched_data, work.work); 195 - struct tbs_vgpu_data *vgpu_data; 196 - 197 - struct intel_gvt *gvt = sched_data->gvt; 198 - struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 199 - 93 + struct vgpu_sched_data *vgpu_data; 200 94 struct intel_vgpu *vgpu = NULL; 201 - struct list_head *pos, *head; 202 - 203 - mutex_lock(&gvt->lock); 204 - 205 - /* no vgpu or has already had a target */ 206 - if (list_empty(&sched_data->runq_head) || scheduler->next_vgpu) 207 - goto out; 208 - 209 - if (scheduler->current_vgpu) { 210 - vgpu_data = scheduler->current_vgpu->sched_data; 211 - head = &vgpu_data->list; 212 - } else { 213 - head = &sched_data->runq_head; 214 - } 95 + struct list_head *head = &sched_data->lru_runq_head; 96 + struct list_head *pos; 215 97 216 98 /* search a vgpu with pending workload */ 217 99 list_for_each(pos, head) { 218 - if (pos == &sched_data->runq_head) 219 - continue; 220 100 221 - vgpu_data = container_of(pos, struct tbs_vgpu_data, list); 101 + vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); 222 102 if (!vgpu_has_pending_workload(vgpu_data->vgpu)) 223 103 continue; 224 104 225 - vgpu = vgpu_data->vgpu; 226 - break; 105 + /* Return the vGPU only if it has time slice left */ 106 + if (vgpu_data->left_ts > 0) { 107 + vgpu = vgpu_data->vgpu; 108 + break; 109 + } 227 110 } 228 111 112 + return vgpu; 113 + } 114 + 115 + /* in nanosecond */ 116 + #define GVT_DEFAULT_TIME_SLICE 1000000 117 + 118 + static void tbs_sched_func(struct gvt_sched_data *sched_data) 119 + { 120 + struct intel_gvt *gvt = sched_data->gvt; 121 + struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 122 + struct vgpu_sched_data *vgpu_data; 123 + struct intel_vgpu *vgpu = NULL; 124 + static uint64_t timer_check; 125 + 126 + if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS)) 127 + gvt_balance_timeslice(sched_data); 128 + 129 + /* no active vgpu or has already had a target */ 130 + if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu) 131 + goto out; 132 + 133 + vgpu = find_busy_vgpu(sched_data); 229 134 if (vgpu) { 230 135 scheduler->next_vgpu = vgpu; 136 + 137 + /* Move the last used vGPU to the tail of lru_list */ 138 + vgpu_data = vgpu->sched_data; 139 + list_del_init(&vgpu_data->lru_list); 140 + list_add_tail(&vgpu_data->lru_list, 141 + &sched_data->lru_runq_head); 142 + 231 143 gvt_dbg_sched("pick next vgpu %d\n", vgpu->id); 144 + } else { 145 + scheduler->next_vgpu = gvt->idle_vgpu; 232 146 } 233 147 out: 234 148 if (scheduler->next_vgpu) { ··· 235 151 scheduler->next_vgpu->id); 236 152 try_to_schedule_next_vgpu(gvt); 237 153 } 154 + } 238 155 239 - /* 240 - * still have vgpu on runq 241 - * or last schedule haven't finished due to running workload 242 - */ 243 - if (!list_empty(&sched_data->runq_head) || scheduler->next_vgpu) 244 - schedule_delayed_work(&sched_data->work, sched_data->period); 156 + void intel_gvt_schedule(struct intel_gvt *gvt) 157 + { 158 + struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; 245 159 160 + mutex_lock(&gvt->lock); 161 + tbs_sched_func(sched_data); 246 162 mutex_unlock(&gvt->lock); 163 + } 164 + 165 + static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data) 166 + { 167 + struct gvt_sched_data *data; 168 + 169 + data = container_of(timer_data, struct gvt_sched_data, timer); 170 + 171 + intel_gvt_request_service(data->gvt, INTEL_GVT_REQUEST_SCHED); 172 + 173 + hrtimer_add_expires_ns(&data->timer, data->period); 174 + 175 + return HRTIMER_RESTART; 247 176 } 248 177 249 178 static int tbs_sched_init(struct intel_gvt *gvt) ··· 264 167 struct intel_gvt_workload_scheduler *scheduler = 265 168 &gvt->scheduler; 266 169 267 - struct tbs_sched_data *data; 170 + struct gvt_sched_data *data; 268 171 269 172 data = kzalloc(sizeof(*data), GFP_KERNEL); 270 173 if (!data) 271 174 return -ENOMEM; 272 175 273 - INIT_LIST_HEAD(&data->runq_head); 274 - INIT_DELAYED_WORK(&data->work, tbs_sched_func); 176 + INIT_LIST_HEAD(&data->lru_runq_head); 177 + hrtimer_init(&data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 178 + data->timer.function = tbs_timer_fn; 275 179 data->period = GVT_DEFAULT_TIME_SLICE; 276 180 data->gvt = gvt; 277 181 278 182 scheduler->sched_data = data; 183 + 279 184 return 0; 280 185 } 281 186 ··· 285 186 { 286 187 struct intel_gvt_workload_scheduler *scheduler = 287 188 &gvt->scheduler; 288 - struct tbs_sched_data *data = scheduler->sched_data; 189 + struct gvt_sched_data *data = scheduler->sched_data; 289 190 290 - cancel_delayed_work(&data->work); 191 + hrtimer_cancel(&data->timer); 192 + 291 193 kfree(data); 292 194 scheduler->sched_data = NULL; 293 195 } 294 196 295 197 static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu) 296 198 { 297 - struct tbs_vgpu_data *data; 199 + struct vgpu_sched_data *data; 298 200 299 201 data = kzalloc(sizeof(*data), GFP_KERNEL); 300 202 if (!data) 301 203 return -ENOMEM; 302 204 205 + data->sched_ctl.weight = vgpu->sched_ctl.weight; 303 206 data->vgpu = vgpu; 304 - INIT_LIST_HEAD(&data->list); 207 + INIT_LIST_HEAD(&data->lru_list); 305 208 306 209 vgpu->sched_data = data; 210 + 307 211 return 0; 308 212 } 309 213 ··· 318 216 319 217 static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) 320 218 { 321 - struct tbs_sched_data *sched_data = vgpu->gvt->scheduler.sched_data; 322 - struct tbs_vgpu_data *vgpu_data = vgpu->sched_data; 219 + struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data; 220 + struct vgpu_sched_data *vgpu_data = vgpu->sched_data; 323 221 324 - if (!list_empty(&vgpu_data->list)) 222 + if (!list_empty(&vgpu_data->lru_list)) 325 223 return; 326 224 327 - list_add_tail(&vgpu_data->list, &sched_data->runq_head); 328 - schedule_delayed_work(&sched_data->work, 0); 225 + list_add_tail(&vgpu_data->lru_list, &sched_data->lru_runq_head); 226 + 227 + if (!hrtimer_active(&sched_data->timer)) 228 + hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(), 229 + sched_data->period), HRTIMER_MODE_ABS); 329 230 } 330 231 331 232 static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu) 332 233 { 333 - struct tbs_vgpu_data *vgpu_data = vgpu->sched_data; 234 + struct vgpu_sched_data *vgpu_data = vgpu->sched_data; 334 235 335 - list_del_init(&vgpu_data->list); 236 + list_del_init(&vgpu_data->lru_list); 336 237 } 337 238 338 239 static struct intel_gvt_sched_policy_ops tbs_schedule_ops = {
+2
drivers/gpu/drm/i915/gvt/sched_policy.h
··· 43 43 void (*stop_schedule)(struct intel_vgpu *vgpu); 44 44 }; 45 45 46 + void intel_gvt_schedule(struct intel_gvt *gvt); 47 + 46 48 int intel_gvt_init_sched_policy(struct intel_gvt *gvt); 47 49 48 50 void intel_gvt_clean_sched_policy(struct intel_gvt *gvt);
+2 -1
drivers/gpu/drm/i915/gvt/scheduler.c
··· 448 448 struct intel_vgpu_workload *workload = NULL; 449 449 struct intel_vgpu *vgpu = NULL; 450 450 int ret; 451 - bool need_force_wake = IS_SKYLAKE(gvt->dev_priv); 451 + bool need_force_wake = IS_SKYLAKE(gvt->dev_priv) 452 + || IS_KABYLAKE(gvt->dev_priv); 452 453 DEFINE_WAIT_FUNC(wait, woken_wake_function); 453 454 454 455 kfree(p);
-1
drivers/gpu/drm/i915/gvt/scheduler.h
··· 67 67 }; 68 68 69 69 struct intel_shadow_wa_ctx { 70 - struct intel_vgpu_workload *workload; 71 70 struct shadow_indirect_ctx indirect_ctx; 72 71 struct shadow_per_ctx per_ctx; 73 72
+79 -6
drivers/gpu/drm/i915/gvt/vgpu.c
··· 64 64 WARN_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); 65 65 } 66 66 67 + #define VGPU_MAX_WEIGHT 16 68 + #define VGPU_WEIGHT(vgpu_num) \ 69 + (VGPU_MAX_WEIGHT / (vgpu_num)) 70 + 67 71 static struct { 68 72 unsigned int low_mm; 69 73 unsigned int high_mm; 70 74 unsigned int fence; 75 + 76 + /* A vGPU with a weight of 8 will get twice as much GPU as a vGPU 77 + * with a weight of 4 on a contended host, different vGPU type has 78 + * different weight set. Legal weights range from 1 to 16. 79 + */ 80 + unsigned int weight; 71 81 enum intel_vgpu_edid edid; 72 82 char *name; 73 83 } vgpu_types[] = { 74 84 /* Fixed vGPU type table */ 75 - { MB_TO_BYTES(64), MB_TO_BYTES(512), 4, GVT_EDID_1024_768, "8" }, 76 - { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" }, 77 - { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" }, 78 - { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" }, 85 + { MB_TO_BYTES(64), MB_TO_BYTES(384), 4, VGPU_WEIGHT(8), GVT_EDID_1024_768, "8" }, 86 + { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, VGPU_WEIGHT(4), GVT_EDID_1920_1200, "4" }, 87 + { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, VGPU_WEIGHT(2), GVT_EDID_1920_1200, "2" }, 88 + { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, VGPU_WEIGHT(1), GVT_EDID_1920_1200, "1" }, 79 89 }; 80 90 81 91 /** ··· 130 120 gvt->types[i].low_gm_size = vgpu_types[i].low_mm; 131 121 gvt->types[i].high_gm_size = vgpu_types[i].high_mm; 132 122 gvt->types[i].fence = vgpu_types[i].fence; 123 + 124 + if (vgpu_types[i].weight < 1 || 125 + vgpu_types[i].weight > VGPU_MAX_WEIGHT) 126 + return -EINVAL; 127 + 128 + gvt->types[i].weight = vgpu_types[i].weight; 133 129 gvt->types[i].resolution = vgpu_types[i].edid; 134 130 gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm, 135 131 high_avail / vgpu_types[i].high_mm); ··· 147 131 sprintf(gvt->types[i].name, "GVTg_V5_%s", 148 132 vgpu_types[i].name); 149 133 150 - gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u res %s\n", 134 + gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u weight %u res %s\n", 151 135 i, gvt->types[i].name, 152 136 gvt->types[i].avail_instance, 153 137 gvt->types[i].low_gm_size, 154 138 gvt->types[i].high_gm_size, gvt->types[i].fence, 139 + gvt->types[i].weight, 155 140 vgpu_edid_str(gvt->types[i].resolution)); 156 141 } 157 142 ··· 233 216 mutex_unlock(&gvt->lock); 234 217 } 235 218 219 + #define IDLE_VGPU_IDR 0 220 + 221 + /** 222 + * intel_gvt_create_idle_vgpu - create an idle virtual GPU 223 + * @gvt: GVT device 224 + * 225 + * This function is called when user wants to create an idle virtual GPU. 226 + * 227 + * Returns: 228 + * pointer to intel_vgpu, error pointer if failed. 229 + */ 230 + struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt) 231 + { 232 + struct intel_vgpu *vgpu; 233 + enum intel_engine_id i; 234 + int ret; 235 + 236 + vgpu = vzalloc(sizeof(*vgpu)); 237 + if (!vgpu) 238 + return ERR_PTR(-ENOMEM); 239 + 240 + vgpu->id = IDLE_VGPU_IDR; 241 + vgpu->gvt = gvt; 242 + 243 + for (i = 0; i < I915_NUM_ENGINES; i++) 244 + INIT_LIST_HEAD(&vgpu->workload_q_head[i]); 245 + 246 + ret = intel_vgpu_init_sched_policy(vgpu); 247 + if (ret) 248 + goto out_free_vgpu; 249 + 250 + vgpu->active = false; 251 + 252 + return vgpu; 253 + 254 + out_free_vgpu: 255 + vfree(vgpu); 256 + return ERR_PTR(ret); 257 + } 258 + 259 + /** 260 + * intel_gvt_destroy_vgpu - destroy an idle virtual GPU 261 + * @vgpu: virtual GPU 262 + * 263 + * This function is called when user wants to destroy an idle virtual GPU. 264 + * 265 + */ 266 + void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu) 267 + { 268 + intel_vgpu_clean_sched_policy(vgpu); 269 + vfree(vgpu); 270 + } 271 + 236 272 static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, 237 273 struct intel_vgpu_creation_params *param) 238 274 { ··· 302 232 303 233 mutex_lock(&gvt->lock); 304 234 305 - ret = idr_alloc(&gvt->vgpu_idr, vgpu, 1, GVT_MAX_VGPU, GFP_KERNEL); 235 + ret = idr_alloc(&gvt->vgpu_idr, vgpu, IDLE_VGPU_IDR + 1, GVT_MAX_VGPU, 236 + GFP_KERNEL); 306 237 if (ret < 0) 307 238 goto out_free_vgpu; 308 239 309 240 vgpu->id = ret; 310 241 vgpu->handle = param->handle; 311 242 vgpu->gvt = gvt; 243 + vgpu->sched_ctl.weight = param->weight; 312 244 bitmap_zero(vgpu->tlb_handle_pending, I915_NUM_ENGINES); 313 245 314 246 intel_vgpu_init_cfg_space(vgpu, param->primary); ··· 397 325 param.low_gm_sz = type->low_gm_size; 398 326 param.high_gm_sz = type->high_gm_size; 399 327 param.fence_sz = type->fence; 328 + param.weight = type->weight; 400 329 param.resolution = type->resolution; 401 330 402 331 /* XXX current param based on MB */
+26 -24
drivers/gpu/drm/i915/i915_debugfs.c
··· 1012 1012 1013 1013 static int i915_gpu_info_open(struct inode *inode, struct file *file) 1014 1014 { 1015 + struct drm_i915_private *i915 = inode->i_private; 1015 1016 struct i915_gpu_state *gpu; 1016 1017 1017 - gpu = i915_capture_gpu_state(inode->i_private); 1018 + intel_runtime_pm_get(i915); 1019 + gpu = i915_capture_gpu_state(i915); 1020 + intel_runtime_pm_put(i915); 1018 1021 if (!gpu) 1019 1022 return -ENOMEM; 1020 1023 ··· 1462 1459 1463 1460 static int i915_forcewake_domains(struct seq_file *m, void *data) 1464 1461 { 1465 - struct drm_i915_private *dev_priv = node_to_i915(m->private); 1462 + struct drm_i915_private *i915 = node_to_i915(m->private); 1466 1463 struct intel_uncore_forcewake_domain *fw_domain; 1464 + unsigned int tmp; 1467 1465 1468 - spin_lock_irq(&dev_priv->uncore.lock); 1469 - for_each_fw_domain(fw_domain, dev_priv) { 1466 + for_each_fw_domain(fw_domain, i915, tmp) 1470 1467 seq_printf(m, "%s.wake_count = %u\n", 1471 1468 intel_uncore_forcewake_domain_to_str(fw_domain->id), 1472 - fw_domain->wake_count); 1473 - } 1474 - spin_unlock_irq(&dev_priv->uncore.lock); 1469 + READ_ONCE(fw_domain->wake_count)); 1475 1470 1476 1471 return 0; 1477 1472 } ··· 1939 1938 1940 1939 static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring) 1941 1940 { 1942 - seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)", 1943 - ring->space, ring->head, ring->tail, 1944 - ring->last_retired_head); 1941 + seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u)", 1942 + ring->space, ring->head, ring->tail); 1945 1943 } 1946 1944 1947 1945 static int i915_context_status(struct seq_file *m, void *unused) ··· 2474 2474 enum intel_engine_id id; 2475 2475 uint64_t tot = 0; 2476 2476 2477 - seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n", 2478 - client->priority, client->ctx_index, client->proc_desc_offset); 2479 - seq_printf(m, "\tDoorbell id %d, offset: 0x%x, cookie 0x%x\n", 2477 + seq_printf(m, "\tPriority %d, GuC stage index: %u, PD offset 0x%x\n", 2478 + client->priority, client->stage_id, client->proc_desc_offset); 2479 + seq_printf(m, "\tDoorbell id %d, offset: 0x%lx, cookie 0x%x\n", 2480 2480 client->doorbell_id, client->doorbell_offset, client->doorbell_cookie); 2481 2481 seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n", 2482 2482 client->wq_size, client->wq_offset, client->wq_tail); ··· 2511 2511 } 2512 2512 2513 2513 seq_printf(m, "Doorbell map:\n"); 2514 - seq_printf(m, "\t%*pb\n", GUC_MAX_DOORBELLS, guc->doorbell_bitmap); 2514 + seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap); 2515 2515 seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline); 2516 2516 2517 2517 seq_printf(m, "GuC total action count: %llu\n", guc->action_count); ··· 4129 4129 static int 4130 4130 i915_wedged_set(void *data, u64 val) 4131 4131 { 4132 - struct drm_i915_private *dev_priv = data; 4132 + struct drm_i915_private *i915 = data; 4133 + struct intel_engine_cs *engine; 4134 + unsigned int tmp; 4133 4135 4134 4136 /* 4135 4137 * There is no safeguard against this debugfs entry colliding ··· 4141 4139 * while it is writing to 'i915_wedged' 4142 4140 */ 4143 4141 4144 - if (i915_reset_backoff(&dev_priv->gpu_error)) 4142 + if (i915_reset_backoff(&i915->gpu_error)) 4145 4143 return -EAGAIN; 4146 4144 4147 - i915_handle_error(dev_priv, val, 4148 - "Manually setting wedged to %llu", val); 4145 + for_each_engine_masked(engine, i915, val, tmp) { 4146 + engine->hangcheck.seqno = intel_engine_get_seqno(engine); 4147 + engine->hangcheck.stalled = true; 4148 + } 4149 4149 4150 - wait_on_bit(&dev_priv->gpu_error.flags, 4150 + i915_handle_error(i915, val, "Manually setting wedged to %llu", val); 4151 + 4152 + wait_on_bit(&i915->gpu_error.flags, 4151 4153 I915_RESET_HANDOFF, 4152 4154 TASK_UNINTERRUPTIBLE); 4153 4155 ··· 4178 4172 I915_WAIT_INTERRUPTIBLE); 4179 4173 if (err) 4180 4174 goto err_unlock; 4181 - 4182 - /* Retire to kick idle work */ 4183 - i915_gem_retire_requests(i915); 4184 - GEM_BUG_ON(i915->gt.active_requests); 4185 4175 4186 4176 *irq = val; 4187 4177 mutex_unlock(&i915->drm.struct_mutex); ··· 4282 4280 goto unlock; 4283 4281 } 4284 4282 4285 - if (val & (DROP_RETIRE | DROP_ACTIVE)) 4283 + if (val & DROP_RETIRE) 4286 4284 i915_gem_retire_requests(dev_priv); 4287 4285 4288 4286 lockdep_set_current_reclaim_state(GFP_KERNEL);
+5 -5
drivers/gpu/drm/i915/i915_drv.c
··· 549 549 static void i915_gem_fini(struct drm_i915_private *dev_priv) 550 550 { 551 551 mutex_lock(&dev_priv->drm.struct_mutex); 552 + intel_uc_fini_hw(dev_priv); 552 553 i915_gem_cleanup_engines(dev_priv); 553 554 i915_gem_context_fini(dev_priv); 554 555 mutex_unlock(&dev_priv->drm.struct_mutex); ··· 610 609 611 610 ret = i915_gem_init(dev_priv); 612 611 if (ret) 613 - goto cleanup_irq; 612 + goto cleanup_uc; 614 613 615 614 intel_modeset_gem_init(dev); 616 615 ··· 632 631 if (i915_gem_suspend(dev_priv)) 633 632 DRM_ERROR("failed to idle hardware; continuing to unload!\n"); 634 633 i915_gem_fini(dev_priv); 634 + cleanup_uc: 635 + intel_uc_fini_fw(dev_priv); 635 636 cleanup_irq: 636 - intel_guc_fini(dev_priv); 637 - intel_huc_fini(dev_priv); 638 637 drm_irq_uninstall(dev); 639 638 intel_teardown_gmbus(dev_priv); 640 639 cleanup_csr: ··· 1352 1351 /* Flush any outstanding unpin_work. */ 1353 1352 drain_workqueue(dev_priv->wq); 1354 1353 1355 - intel_guc_fini(dev_priv); 1356 - intel_huc_fini(dev_priv); 1357 1354 i915_gem_fini(dev_priv); 1355 + intel_uc_fini_fw(dev_priv); 1358 1356 intel_fbc_cleanup_cfb(dev_priv); 1359 1357 1360 1358 intel_power_domains_fini(dev_priv);
+40 -56
drivers/gpu/drm/i915/i915_drv.h
··· 79 79 80 80 #define DRIVER_NAME "i915" 81 81 #define DRIVER_DESC "Intel Graphics" 82 - #define DRIVER_DATE "20170320" 83 - #define DRIVER_TIMESTAMP 1489994464 84 - 85 - #undef WARN_ON 86 - /* Many gcc seem to no see through this and fall over :( */ 87 - #if 0 88 - #define WARN_ON(x) ({ \ 89 - bool __i915_warn_cond = (x); \ 90 - if (__builtin_constant_p(__i915_warn_cond)) \ 91 - BUILD_BUG_ON(__i915_warn_cond); \ 92 - WARN(__i915_warn_cond, "WARN_ON(" #x ")"); }) 93 - #else 94 - #define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")") 95 - #endif 96 - 97 - #undef WARN_ON_ONCE 98 - #define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")") 99 - 100 - #define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \ 101 - (long) (x), __func__); 82 + #define DRIVER_DATE "20170403" 83 + #define DRIVER_TIMESTAMP 1491198738 102 84 103 85 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and 104 86 * WARN_ON()) for hw state sanity checks to check for unexpected conditions ··· 685 703 }; 686 704 687 705 enum forcewake_domains { 688 - FORCEWAKE_RENDER = (1 << FW_DOMAIN_ID_RENDER), 689 - FORCEWAKE_BLITTER = (1 << FW_DOMAIN_ID_BLITTER), 690 - FORCEWAKE_MEDIA = (1 << FW_DOMAIN_ID_MEDIA), 706 + FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER), 707 + FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER), 708 + FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA), 691 709 FORCEWAKE_ALL = (FORCEWAKE_RENDER | 692 710 FORCEWAKE_BLITTER | 693 711 FORCEWAKE_MEDIA) ··· 714 732 715 733 struct intel_uncore_funcs { 716 734 void (*force_wake_get)(struct drm_i915_private *dev_priv, 717 - enum forcewake_domains domains); 735 + enum forcewake_domains domains); 718 736 void (*force_wake_put)(struct drm_i915_private *dev_priv, 719 - enum forcewake_domains domains); 737 + enum forcewake_domains domains); 720 738 721 - uint8_t (*mmio_readb)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace); 722 - uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace); 723 - uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace); 724 - uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace); 739 + uint8_t (*mmio_readb)(struct drm_i915_private *dev_priv, 740 + i915_reg_t r, bool trace); 741 + uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, 742 + i915_reg_t r, bool trace); 743 + uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, 744 + i915_reg_t r, bool trace); 745 + uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, 746 + i915_reg_t r, bool trace); 725 747 726 - void (*mmio_writeb)(struct drm_i915_private *dev_priv, i915_reg_t r, 727 - uint8_t val, bool trace); 728 - void (*mmio_writew)(struct drm_i915_private *dev_priv, i915_reg_t r, 729 - uint16_t val, bool trace); 730 - void (*mmio_writel)(struct drm_i915_private *dev_priv, i915_reg_t r, 731 - uint32_t val, bool trace); 748 + void (*mmio_writeb)(struct drm_i915_private *dev_priv, 749 + i915_reg_t r, uint8_t val, bool trace); 750 + void (*mmio_writew)(struct drm_i915_private *dev_priv, 751 + i915_reg_t r, uint16_t val, bool trace); 752 + void (*mmio_writel)(struct drm_i915_private *dev_priv, 753 + i915_reg_t r, uint32_t val, bool trace); 732 754 }; 733 755 734 756 struct intel_forcewake_range { ··· 756 770 enum forcewake_domains fw_domains; 757 771 enum forcewake_domains fw_domains_active; 758 772 773 + u32 fw_set; 774 + u32 fw_clear; 775 + u32 fw_reset; 776 + 759 777 struct intel_uncore_forcewake_domain { 760 - struct drm_i915_private *i915; 761 778 enum forcewake_domain_id id; 762 779 enum forcewake_domains mask; 763 780 unsigned wake_count; 764 781 struct hrtimer timer; 765 782 i915_reg_t reg_set; 766 - u32 val_set; 767 - u32 val_clear; 768 783 i915_reg_t reg_ack; 769 - i915_reg_t reg_post; 770 - u32 val_reset; 771 784 } fw_domain[FW_DOMAIN_ID_COUNT]; 772 785 773 786 int unclaimed_mmio_check; 774 787 }; 775 788 776 - /* Iterate over initialised fw domains */ 777 - #define for_each_fw_domain_masked(domain__, mask__, dev_priv__) \ 778 - for ((domain__) = &(dev_priv__)->uncore.fw_domain[0]; \ 779 - (domain__) < &(dev_priv__)->uncore.fw_domain[FW_DOMAIN_ID_COUNT]; \ 780 - (domain__)++) \ 781 - for_each_if ((mask__) & (domain__)->mask) 789 + #define __mask_next_bit(mask) ({ \ 790 + int __idx = ffs(mask) - 1; \ 791 + mask &= ~BIT(__idx); \ 792 + __idx; \ 793 + }) 782 794 783 - #define for_each_fw_domain(domain__, dev_priv__) \ 784 - for_each_fw_domain_masked(domain__, FORCEWAKE_ALL, dev_priv__) 795 + /* Iterate over initialised fw domains */ 796 + #define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \ 797 + for (tmp__ = (mask__); \ 798 + tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;) 799 + 800 + #define for_each_fw_domain(domain__, dev_priv__, tmp__) \ 801 + for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__) 785 802 786 803 #define CSR_VERSION(major, minor) ((major) << 16 | (minor)) 787 804 #define CSR_VERSION_MAJOR(version) ((version) >> 16) ··· 835 846 func(has_resource_streamer); \ 836 847 func(has_runtime_pm); \ 837 848 func(has_snoop); \ 849 + func(unfenced_needs_alignment); \ 838 850 func(cursor_needs_physical); \ 839 851 func(hws_needs_physical); \ 840 852 func(overlay_needs_physical); \ ··· 2568 2578 (id__)++) \ 2569 2579 for_each_if ((engine__) = (dev_priv__)->engine[(id__)]) 2570 2580 2571 - #define __mask_next_bit(mask) ({ \ 2572 - int __idx = ffs(mask) - 1; \ 2573 - mask &= ~BIT(__idx); \ 2574 - __idx; \ 2575 - }) 2576 - 2577 2581 /* Iterator over subset of engines selected by mask */ 2578 2582 #define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \ 2579 2583 for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask; \ ··· 3940 3956 #define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg) 3941 3957 3942 3958 #define __raw_read(x, s) \ 3943 - static inline uint##x##_t __raw_i915_read##x(struct drm_i915_private *dev_priv, \ 3959 + static inline uint##x##_t __raw_i915_read##x(const struct drm_i915_private *dev_priv, \ 3944 3960 i915_reg_t reg) \ 3945 3961 { \ 3946 3962 return read##s(dev_priv->regs + i915_mmio_reg_offset(reg)); \ 3947 3963 } 3948 3964 3949 3965 #define __raw_write(x, s) \ 3950 - static inline void __raw_i915_write##x(struct drm_i915_private *dev_priv, \ 3966 + static inline void __raw_i915_write##x(const struct drm_i915_private *dev_priv, \ 3951 3967 i915_reg_t reg, uint##x##_t val) \ 3952 3968 { \ 3953 3969 write##s(val, dev_priv->regs + i915_mmio_reg_offset(reg)); \
+56 -15
drivers/gpu/drm/i915/i915_gem.c
··· 2321 2321 st->nents = 0; 2322 2322 for (i = 0; i < page_count; i++) { 2323 2323 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2324 - if (IS_ERR(page)) { 2324 + if (unlikely(IS_ERR(page))) { 2325 2325 i915_gem_shrink(dev_priv, 2326 2326 page_count, 2327 2327 I915_SHRINK_BOUND | ··· 2329 2329 I915_SHRINK_PURGEABLE); 2330 2330 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2331 2331 } 2332 - if (IS_ERR(page)) { 2332 + if (unlikely(IS_ERR(page))) { 2333 + gfp_t reclaim; 2334 + 2333 2335 /* We've tried hard to allocate the memory by reaping 2334 2336 * our own buffer, now let the real VM do its job and 2335 2337 * go down in flames if truly OOM. 2338 + * 2339 + * However, since graphics tend to be disposable, 2340 + * defer the oom here by reporting the ENOMEM back 2341 + * to userspace. 2336 2342 */ 2337 - page = shmem_read_mapping_page(mapping, i); 2343 + reclaim = mapping_gfp_constraint(mapping, 0); 2344 + reclaim |= __GFP_NORETRY; /* reclaim, but no oom */ 2345 + 2346 + page = shmem_read_mapping_page_gfp(mapping, i, reclaim); 2338 2347 if (IS_ERR(page)) { 2339 2348 ret = PTR_ERR(page); 2340 2349 goto err_sg; ··· 2998 2989 lockdep_assert_held(&dev_priv->drm.struct_mutex); 2999 2990 set_bit(I915_WEDGED, &dev_priv->gpu_error.flags); 3000 2991 2992 + /* Retire completed requests first so the list of inflight/incomplete 2993 + * requests is accurate and we don't try and mark successful requests 2994 + * as in error during __i915_gem_set_wedged_BKL(). 2995 + */ 2996 + i915_gem_retire_requests(dev_priv); 2997 + 3001 2998 stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); 3002 2999 3003 3000 i915_gem_context_lost(dev_priv); 3004 - i915_gem_retire_requests(dev_priv); 3005 3001 3006 3002 mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); 3007 3003 } ··· 3112 3098 * Wait for last execlists context complete, but bail out in case a 3113 3099 * new request is submitted. 3114 3100 */ 3115 - wait_for(READ_ONCE(dev_priv->gt.active_requests) || 3116 - intel_engines_are_idle(dev_priv), 3117 - 10); 3101 + wait_for(intel_engines_are_idle(dev_priv), 10); 3118 3102 if (READ_ONCE(dev_priv->gt.active_requests)) 3119 3103 return; 3120 3104 ··· 3271 3259 return 0; 3272 3260 } 3273 3261 3262 + static int wait_for_engine(struct intel_engine_cs *engine, int timeout_ms) 3263 + { 3264 + return wait_for(intel_engine_is_idle(engine), timeout_ms); 3265 + } 3266 + 3267 + static int wait_for_engines(struct drm_i915_private *i915) 3268 + { 3269 + struct intel_engine_cs *engine; 3270 + enum intel_engine_id id; 3271 + 3272 + for_each_engine(engine, i915, id) { 3273 + if (GEM_WARN_ON(wait_for_engine(engine, 50))) { 3274 + i915_gem_set_wedged(i915); 3275 + return -EIO; 3276 + } 3277 + 3278 + GEM_BUG_ON(intel_engine_get_seqno(engine) != 3279 + intel_engine_last_submit(engine)); 3280 + } 3281 + 3282 + return 0; 3283 + } 3284 + 3274 3285 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) 3275 3286 { 3276 3287 int ret; ··· 3308 3273 if (ret) 3309 3274 return ret; 3310 3275 } 3276 + 3277 + i915_gem_retire_requests(i915); 3278 + GEM_BUG_ON(i915->gt.active_requests); 3279 + 3280 + ret = wait_for_engines(i915); 3311 3281 } else { 3312 3282 ret = wait_for_timeline(&i915->gt.global_timeline, flags); 3313 - if (ret) 3314 - return ret; 3315 3283 } 3316 3284 3317 - return 0; 3285 + return ret; 3318 3286 } 3319 3287 3320 3288 /** Flushes the GTT write domain for the object if it's dirty. */ ··· 3345 3307 * system agents we cannot reproduce this behaviour). 3346 3308 */ 3347 3309 wmb(); 3348 - if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) 3349 - POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); 3310 + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { 3311 + if (intel_runtime_pm_get_if_in_use(dev_priv)) { 3312 + spin_lock_irq(&dev_priv->uncore.lock); 3313 + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); 3314 + spin_unlock_irq(&dev_priv->uncore.lock); 3315 + intel_runtime_pm_put(dev_priv); 3316 + } 3317 + } 3350 3318 3351 3319 intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT)); 3352 3320 ··· 4451 4407 I915_WAIT_LOCKED); 4452 4408 if (ret) 4453 4409 goto err_unlock; 4454 - 4455 - i915_gem_retire_requests(dev_priv); 4456 - GEM_BUG_ON(dev_priv->gt.active_requests); 4457 4410 4458 4411 assert_kernel_context_is_current(dev_priv); 4459 4412 i915_gem_context_lost(dev_priv);
+1 -1
drivers/gpu/drm/i915/i915_gem_clflush.c
··· 168 168 169 169 i915_sw_fence_await_reservation(&clflush->wait, 170 170 obj->resv, NULL, 171 - false, I915_FENCE_TIMEOUT, 171 + true, I915_FENCE_TIMEOUT, 172 172 GFP_KERNEL); 173 173 174 174 reservation_object_lock(obj->resv, NULL);
+9 -9
drivers/gpu/drm/i915/i915_gem_context.c
··· 576 576 } 577 577 578 578 static inline int 579 - mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) 579 + mi_set_context(struct drm_i915_gem_request *req, u32 flags) 580 580 { 581 581 struct drm_i915_private *dev_priv = req->i915; 582 582 struct intel_engine_cs *engine = req->engine; 583 583 enum intel_engine_id id; 584 - u32 *cs, flags = hw_flags | MI_MM_SPACE_GTT; 585 584 const int num_rings = 586 - /* Use an extended w/a on ivb+ if signalling from other rings */ 587 - i915.semaphores ? 585 + /* Use an extended w/a on gen7 if signalling from other rings */ 586 + (i915.semaphores && INTEL_GEN(dev_priv) == 7) ? 588 587 INTEL_INFO(dev_priv)->num_rings - 1 : 589 588 0; 590 589 int len; 590 + u32 *cs; 591 591 592 - /* These flags are for resource streamer on HSW+ */ 592 + flags |= MI_MM_SPACE_GTT; 593 593 if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) 594 - flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN); 595 - else if (INTEL_GEN(dev_priv) < 8) 596 - flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); 597 - 594 + /* These flags are for resource streamer on HSW+ */ 595 + flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; 596 + else 597 + flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; 598 598 599 599 len = 4; 600 600 if (INTEL_GEN(dev_priv) >= 7)
-2
drivers/gpu/drm/i915/i915_gem_evict.c
··· 196 196 if (ret) 197 197 return ret; 198 198 199 - i915_gem_retire_requests(dev_priv); 200 199 goto search_again; 201 200 202 201 found: ··· 382 383 if (ret) 383 384 return ret; 384 385 385 - i915_gem_retire_requests(dev_priv); 386 386 WARN_ON(!list_empty(&vm->active_list)); 387 387 } 388 388
+3 -1
drivers/gpu/drm/i915/i915_gem_execbuffer.c
··· 890 890 struct list_head ordered_vmas; 891 891 struct list_head pinned_vmas; 892 892 bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4; 893 + bool needs_unfenced_map = INTEL_INFO(engine->i915)->unfenced_needs_alignment; 893 894 int retry; 894 895 895 896 vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; ··· 911 910 if (!has_fenced_gpu_access) 912 911 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; 913 912 need_fence = 914 - entry->flags & EXEC_OBJECT_NEEDS_FENCE && 913 + (entry->flags & EXEC_OBJECT_NEEDS_FENCE || 914 + needs_unfenced_map) && 915 915 i915_gem_object_is_tiled(obj); 916 916 need_mappable = need_fence || need_reloc_mappable(vma); 917 917
+1 -1
drivers/gpu/drm/i915/i915_gem_gtt.c
··· 2364 2364 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2365 2365 2366 2366 if (unlikely(ggtt->do_idle_maps)) { 2367 - if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) { 2367 + if (i915_gem_wait_for_idle(dev_priv, 0)) { 2368 2368 DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); 2369 2369 /* Wait a bit, in hopes it avoids the hang */ 2370 2370 udelay(10);
+17 -16
drivers/gpu/drm/i915/i915_gem_request.c
··· 37 37 38 38 static const char *i915_fence_get_timeline_name(struct dma_fence *fence) 39 39 { 40 + /* The timeline struct (as part of the ppgtt underneath a context) 41 + * may be freed when the request is no longer in use by the GPU. 42 + * We could extend the life of a context to beyond that of all 43 + * fences, possibly keeping the hw resource around indefinitely, 44 + * or we just give them a false name. Since 45 + * dma_fence_ops.get_timeline_name is a debug feature, the occasional 46 + * lie seems justifiable. 47 + */ 48 + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 49 + return "signaled"; 50 + 40 51 return to_request(fence)->timeline->common->name; 41 52 } 42 53 ··· 191 180 192 181 static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) 193 182 { 194 - struct i915_gem_timeline *timeline = &i915->gt.global_timeline; 195 183 struct intel_engine_cs *engine; 196 184 enum intel_engine_id id; 197 185 int ret; ··· 202 192 if (ret) 203 193 return ret; 204 194 205 - i915_gem_retire_requests(i915); 206 - GEM_BUG_ON(i915->gt.active_requests > 1); 207 - 208 195 /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ 209 196 for_each_engine(engine, i915, id) { 210 - struct intel_timeline *tl = &timeline->engine[id]; 211 - 212 - if (wait_for(intel_engine_is_idle(engine), 50)) 213 - return -EBUSY; 197 + struct i915_gem_timeline *timeline; 198 + struct intel_timeline *tl = engine->timeline; 214 199 215 200 if (!i915_seqno_passed(seqno, tl->seqno)) { 216 201 /* spin until threads are complete */ ··· 216 211 /* Finally reset hw state */ 217 212 tl->seqno = seqno; 218 213 intel_engine_init_global_seqno(engine, seqno); 219 - } 220 214 221 - list_for_each_entry(timeline, &i915->gt.timelines, link) { 222 - for_each_engine(engine, i915, id) { 223 - struct intel_timeline *tl = &timeline->engine[id]; 224 - 225 - memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno)); 226 - } 215 + list_for_each_entry(timeline, &i915->gt.timelines, link) 216 + memset(timeline->engine[id].sync_seqno, 0, 217 + sizeof(timeline->engine[id].sync_seqno)); 227 218 } 228 219 229 220 return 0; ··· 296 295 * completion order. 297 296 */ 298 297 list_del(&request->ring_link); 299 - request->ring->last_retired_head = request->postfix; 298 + request->ring->head = request->postfix; 300 299 if (!--request->i915->gt.active_requests) { 301 300 GEM_BUG_ON(!request->i915->gt.awake); 302 301 mod_delayed_work(request->i915->wq,
+476 -374
drivers/gpu/drm/i915/i915_guc_submission.c
··· 30 30 /** 31 31 * DOC: GuC-based command submission 32 32 * 33 - * i915_guc_client: 34 - * We use the term client to avoid confusion with contexts. A i915_guc_client is 35 - * equivalent to GuC object guc_context_desc. This context descriptor is 36 - * allocated from a pool of 1024 entries. Kernel driver will allocate doorbell 37 - * and workqueue for it. Also the process descriptor (guc_process_desc), which 38 - * is mapped to client space. So the client can write Work Item then ring the 39 - * doorbell. 33 + * GuC client: 34 + * A i915_guc_client refers to a submission path through GuC. Currently, there 35 + * is only one of these (the execbuf_client) and this one is charged with all 36 + * submissions to the GuC. This struct is the owner of a doorbell, a process 37 + * descriptor and a workqueue (all of them inside a single gem object that 38 + * contains all required pages for these elements). 40 39 * 41 - * To simplify the implementation, we allocate one gem object that contains all 42 - * pages for doorbell, process descriptor and workqueue. 40 + * GuC stage descriptor: 41 + * During initialization, the driver allocates a static pool of 1024 such 42 + * descriptors, and shares them with the GuC. 43 + * Currently, there exists a 1:1 mapping between a i915_guc_client and a 44 + * guc_stage_desc (via the client's stage_id), so effectively only one 45 + * gets used. This stage descriptor lets the GuC know about the doorbell, 46 + * workqueue and process descriptor. Theoretically, it also lets the GuC 47 + * know about our HW contexts (context ID, etc...), but we actually 48 + * employ a kind of submission where the GuC uses the LRCA sent via the work 49 + * item instead (the single guc_stage_desc associated to execbuf client 50 + * contains information about the default kernel context only, but this is 51 + * essentially unused). This is called a "proxy" submission. 43 52 * 44 53 * The Scratch registers: 45 54 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes ··· 71 62 * ELSP context descriptor dword into Work Item. 72 63 * See guc_wq_item_append() 73 64 * 65 + * ADS: 66 + * The Additional Data Struct (ADS) has pointers for different buffers used by 67 + * the GuC. One single gem object contains the ADS struct itself (guc_ads), the 68 + * scheduling policies (guc_policies), a structure describing a collection of 69 + * register sets (guc_mmio_reg_state) and some extra pages for the GuC to save 70 + * its internal state for sleep. 71 + * 74 72 */ 73 + 74 + static inline bool is_high_priority(struct i915_guc_client* client) 75 + { 76 + return client->priority <= GUC_CLIENT_PRIORITY_HIGH; 77 + } 78 + 79 + static int __reserve_doorbell(struct i915_guc_client *client) 80 + { 81 + unsigned long offset; 82 + unsigned long end; 83 + u16 id; 84 + 85 + GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID); 86 + 87 + /* 88 + * The bitmap tracks which doorbell registers are currently in use. 89 + * It is split into two halves; the first half is used for normal 90 + * priority contexts, the second half for high-priority ones. 91 + */ 92 + offset = 0; 93 + end = GUC_NUM_DOORBELLS/2; 94 + if (is_high_priority(client)) { 95 + offset = end; 96 + end += offset; 97 + } 98 + 99 + id = find_next_zero_bit(client->guc->doorbell_bitmap, offset, end); 100 + if (id == end) 101 + return -ENOSPC; 102 + 103 + __set_bit(id, client->guc->doorbell_bitmap); 104 + client->doorbell_id = id; 105 + DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell: %d\n", 106 + client->stage_id, yesno(is_high_priority(client)), 107 + id); 108 + return 0; 109 + } 110 + 111 + static void __unreserve_doorbell(struct i915_guc_client *client) 112 + { 113 + GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID); 114 + 115 + __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap); 116 + client->doorbell_id = GUC_DOORBELL_INVALID; 117 + } 75 118 76 119 /* 77 120 * Tell the GuC to allocate or deallocate a specific doorbell 78 121 */ 79 122 80 - static int guc_allocate_doorbell(struct intel_guc *guc, 81 - struct i915_guc_client *client) 123 + static int __guc_allocate_doorbell(struct intel_guc *guc, u32 stage_id) 82 124 { 83 125 u32 action[] = { 84 126 INTEL_GUC_ACTION_ALLOCATE_DOORBELL, 85 - client->ctx_index 127 + stage_id 86 128 }; 87 129 88 130 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 89 131 } 90 132 91 - static int guc_release_doorbell(struct intel_guc *guc, 92 - struct i915_guc_client *client) 133 + static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 stage_id) 93 134 { 94 135 u32 action[] = { 95 136 INTEL_GUC_ACTION_DEALLOCATE_DOORBELL, 96 - client->ctx_index 137 + stage_id 97 138 }; 98 139 99 140 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 141 + } 142 + 143 + static struct guc_stage_desc *__get_stage_desc(struct i915_guc_client *client) 144 + { 145 + struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr; 146 + 147 + return &base[client->stage_id]; 100 148 } 101 149 102 150 /* ··· 163 97 * client object which contains the page being used for the doorbell 164 98 */ 165 99 166 - static int guc_update_doorbell_id(struct intel_guc *guc, 167 - struct i915_guc_client *client, 168 - u16 new_id) 100 + static void __update_doorbell_desc(struct i915_guc_client *client, u16 new_id) 169 101 { 170 - struct sg_table *sg = guc->ctx_pool_vma->pages; 171 - void *doorbell_bitmap = guc->doorbell_bitmap; 172 - struct guc_doorbell_info *doorbell; 173 - struct guc_context_desc desc; 174 - size_t len; 175 - 176 - doorbell = client->vaddr + client->doorbell_offset; 177 - 178 - if (client->doorbell_id != GUC_INVALID_DOORBELL_ID && 179 - test_bit(client->doorbell_id, doorbell_bitmap)) { 180 - /* Deactivate the old doorbell */ 181 - doorbell->db_status = GUC_DOORBELL_DISABLED; 182 - (void)guc_release_doorbell(guc, client); 183 - __clear_bit(client->doorbell_id, doorbell_bitmap); 184 - } 102 + struct guc_stage_desc *desc; 185 103 186 104 /* Update the GuC's idea of the doorbell ID */ 187 - len = sg_pcopy_to_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), 188 - sizeof(desc) * client->ctx_index); 189 - if (len != sizeof(desc)) 190 - return -EFAULT; 191 - desc.db_id = new_id; 192 - len = sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), 193 - sizeof(desc) * client->ctx_index); 194 - if (len != sizeof(desc)) 195 - return -EFAULT; 196 - 197 - client->doorbell_id = new_id; 198 - if (new_id == GUC_INVALID_DOORBELL_ID) 199 - return 0; 200 - 201 - /* Activate the new doorbell */ 202 - __set_bit(new_id, doorbell_bitmap); 203 - doorbell->db_status = GUC_DOORBELL_ENABLED; 204 - doorbell->cookie = client->doorbell_cookie; 205 - return guc_allocate_doorbell(guc, client); 105 + desc = __get_stage_desc(client); 106 + desc->db_id = new_id; 206 107 } 207 108 208 - static void guc_disable_doorbell(struct intel_guc *guc, 209 - struct i915_guc_client *client) 109 + static struct guc_doorbell_info *__get_doorbell(struct i915_guc_client *client) 210 110 { 211 - (void)guc_update_doorbell_id(guc, client, GUC_INVALID_DOORBELL_ID); 111 + return client->vaddr + client->doorbell_offset; 112 + } 113 + 114 + static bool has_doorbell(struct i915_guc_client *client) 115 + { 116 + if (client->doorbell_id == GUC_DOORBELL_INVALID) 117 + return false; 118 + 119 + return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); 120 + } 121 + 122 + static int __create_doorbell(struct i915_guc_client *client) 123 + { 124 + struct guc_doorbell_info *doorbell; 125 + int err; 126 + 127 + doorbell = __get_doorbell(client); 128 + doorbell->db_status = GUC_DOORBELL_ENABLED; 129 + doorbell->cookie = client->doorbell_cookie; 130 + 131 + err = __guc_allocate_doorbell(client->guc, client->stage_id); 132 + if (err) { 133 + doorbell->db_status = GUC_DOORBELL_DISABLED; 134 + doorbell->cookie = 0; 135 + } 136 + return err; 137 + } 138 + 139 + static int __destroy_doorbell(struct i915_guc_client *client) 140 + { 141 + struct drm_i915_private *dev_priv = guc_to_i915(client->guc); 142 + struct guc_doorbell_info *doorbell; 143 + u16 db_id = client->doorbell_id; 144 + 145 + GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID); 146 + 147 + doorbell = __get_doorbell(client); 148 + doorbell->db_status = GUC_DOORBELL_DISABLED; 149 + doorbell->cookie = 0; 150 + 151 + /* Doorbell release flow requires that we wait for GEN8_DRB_VALID bit 152 + * to go to zero after updating db_status before we call the GuC to 153 + * release the doorbell */ 154 + if (wait_for_us(!(I915_READ(GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID), 10)) 155 + WARN_ONCE(true, "Doorbell never became invalid after disable\n"); 156 + 157 + return __guc_deallocate_doorbell(client->guc, client->stage_id); 158 + } 159 + 160 + static int create_doorbell(struct i915_guc_client *client) 161 + { 162 + int ret; 163 + 164 + ret = __reserve_doorbell(client); 165 + if (ret) 166 + return ret; 167 + 168 + __update_doorbell_desc(client, client->doorbell_id); 169 + 170 + ret = __create_doorbell(client); 171 + if (ret) 172 + goto err; 173 + 174 + return 0; 175 + 176 + err: 177 + __update_doorbell_desc(client, GUC_DOORBELL_INVALID); 178 + __unreserve_doorbell(client); 179 + return ret; 180 + } 181 + 182 + static int destroy_doorbell(struct i915_guc_client *client) 183 + { 184 + int err; 185 + 186 + GEM_BUG_ON(!has_doorbell(client)); 212 187 213 188 /* XXX: wait for any interrupts */ 214 189 /* XXX: wait for workqueue to drain */ 190 + 191 + err = __destroy_doorbell(client); 192 + if (err) 193 + return err; 194 + 195 + __update_doorbell_desc(client, GUC_DOORBELL_INVALID); 196 + 197 + __unreserve_doorbell(client); 198 + 199 + return 0; 215 200 } 216 201 217 - static uint16_t 218 - select_doorbell_register(struct intel_guc *guc, uint32_t priority) 202 + static unsigned long __select_cacheline(struct intel_guc* guc) 219 203 { 220 - /* 221 - * The bitmap tracks which doorbell registers are currently in use. 222 - * It is split into two halves; the first half is used for normal 223 - * priority contexts, the second half for high-priority ones. 224 - * Note that logically higher priorities are numerically less than 225 - * normal ones, so the test below means "is it high-priority?" 226 - */ 227 - const bool hi_pri = (priority <= GUC_CTX_PRIORITY_HIGH); 228 - const uint16_t half = GUC_MAX_DOORBELLS / 2; 229 - const uint16_t start = hi_pri ? half : 0; 230 - const uint16_t end = start + half; 231 - uint16_t id; 232 - 233 - id = find_next_zero_bit(guc->doorbell_bitmap, end, start); 234 - if (id == end) 235 - id = GUC_INVALID_DOORBELL_ID; 236 - 237 - DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n", 238 - hi_pri ? "high" : "normal", id); 239 - 240 - return id; 241 - } 242 - 243 - /* 244 - * Select, assign and relase doorbell cachelines 245 - * 246 - * These functions track which doorbell cachelines are in use. 247 - * The data they manipulate is protected by the intel_guc_send lock. 248 - */ 249 - 250 - static uint32_t select_doorbell_cacheline(struct intel_guc *guc) 251 - { 252 - const uint32_t cacheline_size = cache_line_size(); 253 - uint32_t offset; 204 + unsigned long offset; 254 205 255 206 /* Doorbell uses a single cache line within a page */ 256 207 offset = offset_in_page(guc->db_cacheline); 257 208 258 209 /* Moving to next cache line to reduce contention */ 259 - guc->db_cacheline += cacheline_size; 210 + guc->db_cacheline += cache_line_size(); 260 211 261 - DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n", 262 - offset, guc->db_cacheline, cacheline_size); 263 - 212 + DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n", 213 + offset, guc->db_cacheline, cache_line_size()); 264 214 return offset; 215 + } 216 + 217 + static inline struct guc_process_desc * 218 + __get_process_desc(struct i915_guc_client *client) 219 + { 220 + return client->vaddr + client->proc_desc_offset; 265 221 } 266 222 267 223 /* ··· 294 206 { 295 207 struct guc_process_desc *desc; 296 208 297 - desc = client->vaddr + client->proc_desc_offset; 298 - 299 - memset(desc, 0, sizeof(*desc)); 209 + desc = memset(__get_process_desc(client), 0, sizeof(*desc)); 300 210 301 211 /* 302 212 * XXX: pDoorbell and WQVBaseAddress are pointers in process address ··· 305 219 desc->wq_base_addr = 0; 306 220 desc->db_base_addr = 0; 307 221 308 - desc->context_id = client->ctx_index; 222 + desc->stage_id = client->stage_id; 309 223 desc->wq_size_bytes = client->wq_size; 310 224 desc->wq_status = WQ_STATUS_ACTIVE; 311 225 desc->priority = client->priority; 312 226 } 313 227 314 228 /* 315 - * Initialise/clear the context descriptor shared with the GuC firmware. 229 + * Initialise/clear the stage descriptor shared with the GuC firmware. 316 230 * 317 231 * This descriptor tells the GuC where (in GGTT space) to find the important 318 232 * data structures relating to this client (doorbell, process descriptor, 319 233 * write queue, etc). 320 234 */ 321 - 322 - static void guc_ctx_desc_init(struct intel_guc *guc, 323 - struct i915_guc_client *client) 235 + static void guc_stage_desc_init(struct intel_guc *guc, 236 + struct i915_guc_client *client) 324 237 { 325 238 struct drm_i915_private *dev_priv = guc_to_i915(guc); 326 239 struct intel_engine_cs *engine; 327 240 struct i915_gem_context *ctx = client->owner; 328 - struct guc_context_desc desc; 329 - struct sg_table *sg; 241 + struct guc_stage_desc *desc; 330 242 unsigned int tmp; 331 243 u32 gfx_addr; 332 244 333 - memset(&desc, 0, sizeof(desc)); 245 + desc = __get_stage_desc(client); 246 + memset(desc, 0, sizeof(*desc)); 334 247 335 - desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL; 336 - desc.context_id = client->ctx_index; 337 - desc.priority = client->priority; 338 - desc.db_id = client->doorbell_id; 248 + desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | GUC_STAGE_DESC_ATTR_KERNEL; 249 + desc->stage_id = client->stage_id; 250 + desc->priority = client->priority; 251 + desc->db_id = client->doorbell_id; 339 252 340 253 for_each_engine_masked(engine, dev_priv, client->engines, tmp) { 341 254 struct intel_context *ce = &ctx->engine[engine->id]; 342 255 uint32_t guc_engine_id = engine->guc_id; 343 - struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id]; 256 + struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; 344 257 345 258 /* TODO: We have a design issue to be solved here. Only when we 346 259 * receive the first batch, we know which engine is used by the ··· 351 266 if (!ce->state) 352 267 break; /* XXX: continue? */ 353 268 269 + /* 270 + * XXX: When this is a GUC_STAGE_DESC_ATTR_KERNEL client (proxy 271 + * submission or, in other words, not using a direct submission 272 + * model) the KMD's LRCA is not used for any work submission. 273 + * Instead, the GuC uses the LRCA of the user mode context (see 274 + * guc_wq_item_append below). 275 + */ 354 276 lrc->context_desc = lower_32_bits(ce->lrc_desc); 355 277 356 278 /* The state page is after PPHWSP */ 357 - lrc->ring_lcra = 279 + lrc->ring_lrca = 358 280 guc_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; 359 - lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | 281 + 282 + /* XXX: In direct submission, the GuC wants the HW context id 283 + * here. In proxy submission, it wants the stage id */ 284 + lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) | 360 285 (guc_engine_id << GUC_ELC_ENGINE_OFFSET); 361 286 362 287 lrc->ring_begin = guc_ggtt_offset(ce->ring->vma); ··· 374 279 lrc->ring_next_free_location = lrc->ring_begin; 375 280 lrc->ring_current_tail_pointer_value = 0; 376 281 377 - desc.engines_used |= (1 << guc_engine_id); 282 + desc->engines_used |= (1 << guc_engine_id); 378 283 } 379 284 380 285 DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", 381 - client->engines, desc.engines_used); 382 - WARN_ON(desc.engines_used == 0); 286 + client->engines, desc->engines_used); 287 + WARN_ON(desc->engines_used == 0); 383 288 384 289 /* 385 290 * The doorbell, process descriptor, and workqueue are all parts 386 291 * of the client object, which the GuC will reference via the GGTT 387 292 */ 388 293 gfx_addr = guc_ggtt_offset(client->vma); 389 - desc.db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + 294 + desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + 390 295 client->doorbell_offset; 391 - desc.db_trigger_cpu = 392 - (uintptr_t)client->vaddr + client->doorbell_offset; 393 - desc.db_trigger_uk = gfx_addr + client->doorbell_offset; 394 - desc.process_desc = gfx_addr + client->proc_desc_offset; 395 - desc.wq_addr = gfx_addr + client->wq_offset; 396 - desc.wq_size = client->wq_size; 296 + desc->db_trigger_cpu = (uintptr_t)__get_doorbell(client); 297 + desc->db_trigger_uk = gfx_addr + client->doorbell_offset; 298 + desc->process_desc = gfx_addr + client->proc_desc_offset; 299 + desc->wq_addr = gfx_addr + client->wq_offset; 300 + desc->wq_size = client->wq_size; 397 301 398 - /* 399 - * XXX: Take LRCs from an existing context if this is not an 400 - * IsKMDCreatedContext client 401 - */ 402 - desc.desc_private = (uintptr_t)client; 403 - 404 - /* Pool context is pinned already */ 405 - sg = guc->ctx_pool_vma->pages; 406 - sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), 407 - sizeof(desc) * client->ctx_index); 302 + desc->desc_private = (uintptr_t)client; 408 303 } 409 304 410 - static void guc_ctx_desc_fini(struct intel_guc *guc, 411 - struct i915_guc_client *client) 305 + static void guc_stage_desc_fini(struct intel_guc *guc, 306 + struct i915_guc_client *client) 412 307 { 413 - struct guc_context_desc desc; 414 - struct sg_table *sg; 308 + struct guc_stage_desc *desc; 415 309 416 - memset(&desc, 0, sizeof(desc)); 417 - 418 - sg = guc->ctx_pool_vma->pages; 419 - sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), 420 - sizeof(desc) * client->ctx_index); 310 + desc = __get_stage_desc(client); 311 + memset(desc, 0, sizeof(*desc)); 421 312 } 422 313 423 314 /** ··· 426 345 { 427 346 const size_t wqi_size = sizeof(struct guc_wq_item); 428 347 struct i915_guc_client *client = request->i915->guc.execbuf_client; 429 - struct guc_process_desc *desc = client->vaddr + 430 - client->proc_desc_offset; 348 + struct guc_process_desc *desc = __get_process_desc(client); 431 349 u32 freespace; 432 350 int ret; 433 351 ··· 471 391 const size_t wqi_size = sizeof(struct guc_wq_item); 472 392 const u32 wqi_len = wqi_size/sizeof(u32) - 1; 473 393 struct intel_engine_cs *engine = rq->engine; 474 - struct guc_process_desc *desc; 394 + struct guc_process_desc *desc = __get_process_desc(client); 475 395 struct guc_wq_item *wqi; 476 396 u32 freespace, tail, wq_off; 477 - 478 - desc = client->vaddr + client->proc_desc_offset; 479 397 480 398 /* Free space is guaranteed, see i915_guc_wq_reserve() above */ 481 399 freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); ··· 481 403 482 404 /* The GuC firmware wants the tail index in QWords, not bytes */ 483 405 tail = rq->tail; 484 - GEM_BUG_ON(tail & 7); 406 + assert_ring_tail_valid(rq->ring, rq->tail); 485 407 tail >>= 3; 486 408 GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); 487 409 ··· 514 436 /* The GuC wants only the low-order word of the context descriptor */ 515 437 wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine); 516 438 517 - wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; 439 + wqi->submit_element_info = tail << WQ_RING_TAIL_SHIFT; 518 440 wqi->fence_id = rq->global_seqno; 441 + } 442 + 443 + static void guc_reset_wq(struct i915_guc_client *client) 444 + { 445 + struct guc_process_desc *desc = __get_process_desc(client); 446 + 447 + desc->head = 0; 448 + desc->tail = 0; 449 + 450 + client->wq_tail = 0; 519 451 } 520 452 521 453 static int guc_ring_doorbell(struct i915_guc_client *client) 522 454 { 523 - struct guc_process_desc *desc; 455 + struct guc_process_desc *desc = __get_process_desc(client); 524 456 union guc_doorbell_qw db_cmp, db_exc, db_ret; 525 457 union guc_doorbell_qw *db; 526 458 int attempt = 2, ret = -EAGAIN; 527 - 528 - desc = client->vaddr + client->proc_desc_offset; 529 459 530 460 /* Update the tail so it is visible to GuC */ 531 461 desc->tail = client->wq_tail; ··· 549 463 db_exc.cookie = 1; 550 464 551 465 /* pointer of current doorbell cacheline */ 552 - db = client->vaddr + client->doorbell_offset; 466 + db = (union guc_doorbell_qw *)__get_doorbell(client); 553 467 554 468 while (attempt--) { 555 469 /* lets ring the doorbell */ ··· 659 573 { 660 574 struct execlist_port *port = engine->execlist_port; 661 575 struct drm_i915_gem_request *last = port[0].request; 662 - unsigned long flags; 663 576 struct rb_node *rb; 664 577 bool submit = false; 665 578 666 - /* After execlist_first is updated, the tasklet will be rescheduled. 667 - * 668 - * If we are currently running (inside the tasklet) and a third 669 - * party queues a request and so updates engine->execlist_first under 670 - * the spinlock (which we have elided), it will atomically set the 671 - * TASKLET_SCHED flag causing the us to be re-executed and pick up 672 - * the change in state (the update to TASKLET_SCHED incurs a memory 673 - * barrier making this cross-cpu checking safe). 674 - */ 675 - if (!READ_ONCE(engine->execlist_first)) 676 - return false; 677 - 678 - spin_lock_irqsave(&engine->timeline->lock, flags); 579 + spin_lock_irq(&engine->timeline->lock); 679 580 rb = engine->execlist_first; 680 581 while (rb) { 681 582 struct drm_i915_gem_request *rq = ··· 682 609 RB_CLEAR_NODE(&rq->priotree.node); 683 610 rq->priotree.priority = INT_MAX; 684 611 685 - trace_i915_gem_request_in(rq, port - engine->execlist_port); 686 612 i915_guc_submit(rq); 613 + trace_i915_gem_request_in(rq, port - engine->execlist_port); 687 614 last = rq; 688 615 submit = true; 689 616 } ··· 692 619 nested_enable_signaling(last); 693 620 engine->execlist_first = rb; 694 621 } 695 - spin_unlock_irqrestore(&engine->timeline->lock, flags); 622 + spin_unlock_irq(&engine->timeline->lock); 696 623 697 624 return submit; 698 625 } ··· 768 695 return vma; 769 696 } 770 697 771 - static void 772 - guc_client_free(struct drm_i915_private *dev_priv, 773 - struct i915_guc_client *client) 774 - { 775 - struct intel_guc *guc = &dev_priv->guc; 776 - 777 - if (!client) 778 - return; 779 - 780 - /* 781 - * XXX: wait for any outstanding submissions before freeing memory. 782 - * Be sure to drop any locks 783 - */ 784 - 785 - if (client->vaddr) { 786 - /* 787 - * If we got as far as setting up a doorbell, make sure we 788 - * shut it down before unmapping & deallocating the memory. 789 - */ 790 - guc_disable_doorbell(guc, client); 791 - 792 - i915_gem_object_unpin_map(client->vma->obj); 793 - } 794 - 795 - i915_vma_unpin_and_release(&client->vma); 796 - 797 - if (client->ctx_index != GUC_INVALID_CTX_ID) { 798 - guc_ctx_desc_fini(guc, client); 799 - ida_simple_remove(&guc->ctx_ids, client->ctx_index); 800 - } 801 - 802 - kfree(client); 803 - } 804 - 805 698 /* Check that a doorbell register is in the expected state */ 806 - static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id) 699 + static bool doorbell_ok(struct intel_guc *guc, u16 db_id) 807 700 { 808 701 struct drm_i915_private *dev_priv = guc_to_i915(guc); 809 - i915_reg_t drbreg = GEN8_DRBREGL(db_id); 810 - uint32_t value = I915_READ(drbreg); 811 - bool enabled = (value & GUC_DOORBELL_ENABLED) != 0; 812 - bool expected = test_bit(db_id, guc->doorbell_bitmap); 702 + u32 drbregl; 703 + bool valid; 813 704 814 - if (enabled == expected) 705 + GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID); 706 + 707 + drbregl = I915_READ(GEN8_DRBREGL(db_id)); 708 + valid = drbregl & GEN8_DRB_VALID; 709 + 710 + if (test_bit(db_id, guc->doorbell_bitmap) == valid) 815 711 return true; 816 712 817 - DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n", 818 - db_id, drbreg.reg, value, 819 - expected ? "active" : "inactive"); 713 + DRM_DEBUG_DRIVER("Doorbell %d has unexpected state (0x%x): valid=%s\n", 714 + db_id, drbregl, yesno(valid)); 820 715 821 716 return false; 822 717 } 823 718 824 719 /* 825 - * Borrow the first client to set up & tear down each unused doorbell 826 - * in turn, to ensure that all doorbell h/w is (re)initialised. 720 + * If the GuC thinks that the doorbell is unassigned (e.g. because we reset and 721 + * reloaded the GuC FW) we can use this function to tell the GuC to reassign the 722 + * doorbell to the rightful owner. 827 723 */ 828 - static void guc_init_doorbell_hw(struct intel_guc *guc) 724 + static int __reset_doorbell(struct i915_guc_client* client, u16 db_id) 725 + { 726 + int err; 727 + 728 + __update_doorbell_desc(client, db_id); 729 + err = __create_doorbell(client); 730 + if (!err) 731 + err = __destroy_doorbell(client); 732 + 733 + return err; 734 + } 735 + 736 + /* 737 + * Set up & tear down each unused doorbell in turn, to ensure that all doorbell 738 + * HW is (re)initialised. For that end, we might have to borrow the first 739 + * client. Also, tell GuC about all the doorbells in use by all clients. 740 + * We do this because the KMD, the GuC and the doorbell HW can easily go out of 741 + * sync (e.g. we can reset the GuC, but not the doorbel HW). 742 + */ 743 + static int guc_init_doorbell_hw(struct intel_guc *guc) 829 744 { 830 745 struct i915_guc_client *client = guc->execbuf_client; 831 - uint16_t db_id; 832 - int i, err; 746 + bool recreate_first_client = false; 747 + u16 db_id; 748 + int ret; 833 749 834 - guc_disable_doorbell(guc, client); 835 - 836 - for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { 837 - /* Skip if doorbell is OK */ 838 - if (guc_doorbell_check(guc, i)) 750 + /* For unused doorbells, make sure they are disabled */ 751 + for_each_clear_bit(db_id, guc->doorbell_bitmap, GUC_NUM_DOORBELLS) { 752 + if (doorbell_ok(guc, db_id)) 839 753 continue; 840 754 841 - err = guc_update_doorbell_id(guc, client, i); 842 - if (err) 843 - DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n", 844 - i, err); 755 + if (has_doorbell(client)) { 756 + /* Borrow execbuf_client (we will recreate it later) */ 757 + destroy_doorbell(client); 758 + recreate_first_client = true; 759 + } 760 + 761 + ret = __reset_doorbell(client, db_id); 762 + WARN(ret, "Doorbell %u reset failed, err %d\n", db_id, ret); 845 763 } 846 764 847 - db_id = select_doorbell_register(guc, client->priority); 848 - WARN_ON(db_id == GUC_INVALID_DOORBELL_ID); 765 + if (recreate_first_client) { 766 + ret = __reserve_doorbell(client); 767 + if (unlikely(ret)) { 768 + DRM_ERROR("Couldn't re-reserve first client db: %d\n", ret); 769 + return ret; 770 + } 849 771 850 - err = guc_update_doorbell_id(guc, client, db_id); 851 - if (err) 852 - DRM_WARN("Failed to restore doorbell to %d, err %d\n", 853 - db_id, err); 772 + __update_doorbell_desc(client, client->doorbell_id); 773 + } 854 774 855 - /* Read back & verify all doorbell registers */ 856 - for (i = 0; i < GUC_MAX_DOORBELLS; ++i) 857 - (void)guc_doorbell_check(guc, i); 775 + /* Now for every client (and not only execbuf_client) make sure their 776 + * doorbells are known by the GuC */ 777 + //for (client = client_list; client != NULL; client = client->next) 778 + { 779 + ret = __create_doorbell(client); 780 + if (ret) { 781 + DRM_ERROR("Couldn't recreate client %u doorbell: %d\n", 782 + client->stage_id, ret); 783 + return ret; 784 + } 785 + } 786 + 787 + /* Read back & verify all (used & unused) doorbell registers */ 788 + for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) 789 + WARN_ON(!doorbell_ok(guc, db_id)); 790 + 791 + return 0; 858 792 } 859 793 860 794 /** ··· 887 807 struct intel_guc *guc = &dev_priv->guc; 888 808 struct i915_vma *vma; 889 809 void *vaddr; 890 - uint16_t db_id; 810 + int ret; 891 811 892 812 client = kzalloc(sizeof(*client), GFP_KERNEL); 893 813 if (!client) 894 - return NULL; 814 + return ERR_PTR(-ENOMEM); 895 815 896 - client->owner = ctx; 897 816 client->guc = guc; 817 + client->owner = ctx; 898 818 client->engines = engines; 899 819 client->priority = priority; 900 - client->doorbell_id = GUC_INVALID_DOORBELL_ID; 820 + client->doorbell_id = GUC_DOORBELL_INVALID; 821 + client->wq_offset = GUC_DB_SIZE; 822 + client->wq_size = GUC_WQ_SIZE; 823 + spin_lock_init(&client->wq_lock); 901 824 902 - client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0, 903 - GUC_MAX_GPU_CONTEXTS, GFP_KERNEL); 904 - if (client->ctx_index >= GUC_MAX_GPU_CONTEXTS) { 905 - client->ctx_index = GUC_INVALID_CTX_ID; 906 - goto err; 907 - } 825 + ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS, 826 + GFP_KERNEL); 827 + if (ret < 0) 828 + goto err_client; 829 + 830 + client->stage_id = ret; 908 831 909 832 /* The first page is doorbell/proc_desc. Two followed pages are wq. */ 910 833 vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE); 911 - if (IS_ERR(vma)) 912 - goto err; 834 + if (IS_ERR(vma)) { 835 + ret = PTR_ERR(vma); 836 + goto err_id; 837 + } 913 838 914 839 /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ 915 840 client->vma = vma; 916 841 917 842 vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); 918 - if (IS_ERR(vaddr)) 919 - goto err; 920 - 843 + if (IS_ERR(vaddr)) { 844 + ret = PTR_ERR(vaddr); 845 + goto err_vma; 846 + } 921 847 client->vaddr = vaddr; 922 848 923 - spin_lock_init(&client->wq_lock); 924 - client->wq_offset = GUC_DB_SIZE; 925 - client->wq_size = GUC_WQ_SIZE; 926 - 927 - db_id = select_doorbell_register(guc, client->priority); 928 - if (db_id == GUC_INVALID_DOORBELL_ID) 929 - /* XXX: evict a doorbell instead? */ 930 - goto err; 931 - 932 - client->doorbell_offset = select_doorbell_cacheline(guc); 849 + client->doorbell_offset = __select_cacheline(guc); 933 850 934 851 /* 935 852 * Since the doorbell only requires a single cacheline, we can save ··· 939 862 client->proc_desc_offset = (GUC_DB_SIZE / 2); 940 863 941 864 guc_proc_desc_init(guc, client); 942 - guc_ctx_desc_init(guc, client); 865 + guc_stage_desc_init(guc, client); 943 866 944 - /* For runtime client allocation we need to enable the doorbell. Not 945 - * required yet for the static execbuf_client as this special kernel 946 - * client is enabled from i915_guc_submission_enable(). 947 - * 948 - * guc_update_doorbell_id(guc, client, db_id); 949 - */ 867 + ret = create_doorbell(client); 868 + if (ret) 869 + goto err_vaddr; 950 870 951 - DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n", 952 - priority, client, client->engines, client->ctx_index); 953 - DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n", 954 - client->doorbell_id, client->doorbell_offset); 871 + DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: stage_id %u\n", 872 + priority, client, client->engines, client->stage_id); 873 + DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n", 874 + client->doorbell_id, client->doorbell_offset); 955 875 956 876 return client; 957 877 958 - err: 959 - guc_client_free(dev_priv, client); 960 - return NULL; 878 + err_vaddr: 879 + i915_gem_object_unpin_map(client->vma->obj); 880 + err_vma: 881 + i915_vma_unpin_and_release(&client->vma); 882 + err_id: 883 + ida_simple_remove(&guc->stage_ids, client->stage_id); 884 + err_client: 885 + kfree(client); 886 + return ERR_PTR(ret); 961 887 } 962 888 889 + static void guc_client_free(struct i915_guc_client *client) 890 + { 891 + /* 892 + * XXX: wait for any outstanding submissions before freeing memory. 893 + * Be sure to drop any locks 894 + */ 963 895 896 + /* FIXME: in many cases, by the time we get here the GuC has been 897 + * reset, so we cannot destroy the doorbell properly. Ignore the 898 + * error message for now */ 899 + destroy_doorbell(client); 900 + guc_stage_desc_fini(client->guc, client); 901 + i915_gem_object_unpin_map(client->vma->obj); 902 + i915_vma_unpin_and_release(&client->vma); 903 + ida_simple_remove(&client->guc->stage_ids, client->stage_id); 904 + kfree(client); 905 + } 964 906 965 907 static void guc_policies_init(struct guc_policies *policies) 966 908 { ··· 989 893 policies->dpc_promote_time = 500000; 990 894 policies->max_num_work_items = POLICY_MAX_NUM_WI; 991 895 992 - for (p = 0; p < GUC_CTX_PRIORITY_NUM; p++) { 896 + for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) { 993 897 for (i = GUC_RENDER_ENGINE; i < GUC_MAX_ENGINES_NUM; i++) { 994 898 policy = &policies->policy[p][i]; 995 899 ··· 1003 907 policies->is_valid = 1; 1004 908 } 1005 909 1006 - static void guc_addon_create(struct intel_guc *guc) 910 + static int guc_ads_create(struct intel_guc *guc) 1007 911 { 1008 912 struct drm_i915_private *dev_priv = guc_to_i915(guc); 1009 913 struct i915_vma *vma; ··· 1019 923 enum intel_engine_id id; 1020 924 u32 base; 1021 925 1022 - vma = guc->ads_vma; 1023 - if (!vma) { 1024 - vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(sizeof(*blob))); 1025 - if (IS_ERR(vma)) 1026 - return; 926 + GEM_BUG_ON(guc->ads_vma); 1027 927 1028 - guc->ads_vma = vma; 1029 - } 928 + vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(sizeof(*blob))); 929 + if (IS_ERR(vma)) 930 + return PTR_ERR(vma); 931 + 932 + guc->ads_vma = vma; 1030 933 1031 934 page = i915_vma_first_page(vma); 1032 935 blob = kmap(page); ··· 1035 940 1036 941 /* MMIO reg state */ 1037 942 for_each_engine(engine, dev_priv, id) { 1038 - blob->reg_state.mmio_white_list[engine->guc_id].mmio_start = 943 + blob->reg_state.white_list[engine->guc_id].mmio_start = 1039 944 engine->mmio_base + GUC_MMIO_WHITE_LIST_START; 1040 945 1041 946 /* Nothing to be saved or restored for now. */ 1042 - blob->reg_state.mmio_white_list[engine->guc_id].count = 0; 947 + blob->reg_state.white_list[engine->guc_id].count = 0; 1043 948 } 1044 949 1045 950 /* ··· 1062 967 blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state); 1063 968 1064 969 kunmap(page); 970 + 971 + return 0; 972 + } 973 + 974 + static void guc_ads_destroy(struct intel_guc *guc) 975 + { 976 + i915_vma_unpin_and_release(&guc->ads_vma); 1065 977 } 1066 978 1067 979 /* 1068 - * Set up the memory resources to be shared with the GuC. At this point, 1069 - * we require just one object that can be mapped through the GGTT. 980 + * Set up the memory resources to be shared with the GuC (via the GGTT) 981 + * at firmware loading time. 1070 982 */ 1071 983 int i915_guc_submission_init(struct drm_i915_private *dev_priv) 1072 984 { 1073 - const size_t ctxsize = sizeof(struct guc_context_desc); 1074 - const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize; 1075 - const size_t gemsize = round_up(poolsize, PAGE_SIZE); 1076 985 struct intel_guc *guc = &dev_priv->guc; 1077 986 struct i915_vma *vma; 987 + void *vaddr; 988 + int ret; 1078 989 1079 - if (!HAS_GUC_SCHED(dev_priv)) 990 + if (guc->stage_desc_pool) 1080 991 return 0; 1081 992 1082 - /* Wipe bitmap & delete client in case of reinitialisation */ 1083 - bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS); 1084 - i915_guc_submission_disable(dev_priv); 1085 - 1086 - if (!i915.enable_guc_submission) 1087 - return 0; /* not enabled */ 1088 - 1089 - if (guc->ctx_pool_vma) 1090 - return 0; /* already allocated */ 1091 - 1092 - vma = intel_guc_allocate_vma(guc, gemsize); 993 + vma = intel_guc_allocate_vma(guc, 994 + PAGE_ALIGN(sizeof(struct guc_stage_desc) * 995 + GUC_MAX_STAGE_DESCRIPTORS)); 1093 996 if (IS_ERR(vma)) 1094 997 return PTR_ERR(vma); 1095 998 1096 - guc->ctx_pool_vma = vma; 1097 - ida_init(&guc->ctx_ids); 1098 - intel_guc_log_create(guc); 1099 - guc_addon_create(guc); 999 + guc->stage_desc_pool = vma; 1100 1000 1101 - guc->execbuf_client = guc_client_alloc(dev_priv, 1102 - INTEL_INFO(dev_priv)->ring_mask, 1103 - GUC_CTX_PRIORITY_KMD_NORMAL, 1104 - dev_priv->kernel_context); 1105 - if (!guc->execbuf_client) { 1106 - DRM_ERROR("Failed to create GuC client for execbuf!\n"); 1107 - goto err; 1001 + vaddr = i915_gem_object_pin_map(guc->stage_desc_pool->obj, I915_MAP_WB); 1002 + if (IS_ERR(vaddr)) { 1003 + ret = PTR_ERR(vaddr); 1004 + goto err_vma; 1108 1005 } 1006 + 1007 + guc->stage_desc_pool_vaddr = vaddr; 1008 + 1009 + ret = intel_guc_log_create(guc); 1010 + if (ret < 0) 1011 + goto err_vaddr; 1012 + 1013 + ret = guc_ads_create(guc); 1014 + if (ret < 0) 1015 + goto err_log; 1016 + 1017 + ida_init(&guc->stage_ids); 1109 1018 1110 1019 return 0; 1111 1020 1112 - err: 1113 - i915_guc_submission_fini(dev_priv); 1114 - return -ENOMEM; 1021 + err_log: 1022 + intel_guc_log_destroy(guc); 1023 + err_vaddr: 1024 + i915_gem_object_unpin_map(guc->stage_desc_pool->obj); 1025 + err_vma: 1026 + i915_vma_unpin_and_release(&guc->stage_desc_pool); 1027 + return ret; 1115 1028 } 1116 1029 1117 - static void guc_reset_wq(struct i915_guc_client *client) 1030 + void i915_guc_submission_fini(struct drm_i915_private *dev_priv) 1118 1031 { 1119 - struct guc_process_desc *desc = client->vaddr + 1120 - client->proc_desc_offset; 1032 + struct intel_guc *guc = &dev_priv->guc; 1121 1033 1122 - desc->head = 0; 1123 - desc->tail = 0; 1124 - 1125 - client->wq_tail = 0; 1034 + ida_destroy(&guc->stage_ids); 1035 + guc_ads_destroy(guc); 1036 + intel_guc_log_destroy(guc); 1037 + i915_gem_object_unpin_map(guc->stage_desc_pool->obj); 1038 + i915_vma_unpin_and_release(&guc->stage_desc_pool); 1126 1039 } 1127 1040 1128 1041 static void guc_interrupts_capture(struct drm_i915_private *dev_priv) ··· 1175 1072 dev_priv->rps.pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 1176 1073 } 1177 1074 1075 + static void guc_interrupts_release(struct drm_i915_private *dev_priv) 1076 + { 1077 + struct intel_engine_cs *engine; 1078 + enum intel_engine_id id; 1079 + int irqs; 1080 + 1081 + /* 1082 + * tell all command streamers NOT to forward interrupts or vblank 1083 + * to GuC. 1084 + */ 1085 + irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); 1086 + irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); 1087 + for_each_engine(engine, dev_priv, id) 1088 + I915_WRITE(RING_MODE_GEN7(engine), irqs); 1089 + 1090 + /* route all GT interrupts to the host */ 1091 + I915_WRITE(GUC_BCS_RCS_IER, 0); 1092 + I915_WRITE(GUC_VCS2_VCS1_IER, 0); 1093 + I915_WRITE(GUC_WD_VECS_IER, 0); 1094 + 1095 + dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 1096 + dev_priv->rps.pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; 1097 + } 1098 + 1178 1099 int i915_guc_submission_enable(struct drm_i915_private *dev_priv) 1179 1100 { 1180 1101 struct intel_guc *guc = &dev_priv->guc; 1181 1102 struct i915_guc_client *client = guc->execbuf_client; 1182 1103 struct intel_engine_cs *engine; 1183 1104 enum intel_engine_id id; 1105 + int err; 1184 1106 1185 - if (!client) 1186 - return -ENODEV; 1107 + if (!client) { 1108 + client = guc_client_alloc(dev_priv, 1109 + INTEL_INFO(dev_priv)->ring_mask, 1110 + GUC_CLIENT_PRIORITY_KMD_NORMAL, 1111 + dev_priv->kernel_context); 1112 + if (IS_ERR(client)) { 1113 + DRM_ERROR("Failed to create GuC client for execbuf!\n"); 1114 + return PTR_ERR(client); 1115 + } 1187 1116 1188 - intel_guc_sample_forcewake(guc); 1117 + guc->execbuf_client = client; 1118 + } 1119 + 1120 + err = intel_guc_sample_forcewake(guc); 1121 + if (err) 1122 + goto err_execbuf_client; 1189 1123 1190 1124 guc_reset_wq(client); 1191 - guc_init_doorbell_hw(guc); 1125 + 1126 + err = guc_init_doorbell_hw(guc); 1127 + if (err) 1128 + goto err_execbuf_client; 1192 1129 1193 1130 /* Take over from manual control of ELSP (execlists) */ 1194 1131 guc_interrupts_capture(dev_priv); ··· 1255 1112 } 1256 1113 1257 1114 return 0; 1258 - } 1259 1115 1260 - static void guc_interrupts_release(struct drm_i915_private *dev_priv) 1261 - { 1262 - struct intel_engine_cs *engine; 1263 - enum intel_engine_id id; 1264 - int irqs; 1265 - 1266 - /* 1267 - * tell all command streamers NOT to forward interrupts or vblank 1268 - * to GuC. 1269 - */ 1270 - irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); 1271 - irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); 1272 - for_each_engine(engine, dev_priv, id) 1273 - I915_WRITE(RING_MODE_GEN7(engine), irqs); 1274 - 1275 - /* route all GT interrupts to the host */ 1276 - I915_WRITE(GUC_BCS_RCS_IER, 0); 1277 - I915_WRITE(GUC_VCS2_VCS1_IER, 0); 1278 - I915_WRITE(GUC_WD_VECS_IER, 0); 1279 - 1280 - dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 1281 - dev_priv->rps.pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; 1116 + err_execbuf_client: 1117 + guc_client_free(guc->execbuf_client); 1118 + guc->execbuf_client = NULL; 1119 + return err; 1282 1120 } 1283 1121 1284 1122 void i915_guc_submission_disable(struct drm_i915_private *dev_priv) ··· 1268 1144 1269 1145 guc_interrupts_release(dev_priv); 1270 1146 1271 - if (!guc->execbuf_client) 1272 - return; 1273 - 1274 1147 /* Revert back to manual ELSP submission */ 1275 1148 intel_engines_reset_default_submission(dev_priv); 1276 - } 1277 1149 1278 - void i915_guc_submission_fini(struct drm_i915_private *dev_priv) 1279 - { 1280 - struct intel_guc *guc = &dev_priv->guc; 1281 - struct i915_guc_client *client; 1282 - 1283 - client = fetch_and_zero(&guc->execbuf_client); 1284 - if (!client) 1285 - return; 1286 - 1287 - guc_client_free(dev_priv, client); 1288 - 1289 - i915_vma_unpin_and_release(&guc->ads_vma); 1290 - i915_vma_unpin_and_release(&guc->log.vma); 1291 - 1292 - if (guc->ctx_pool_vma) 1293 - ida_destroy(&guc->ctx_ids); 1294 - i915_vma_unpin_and_release(&guc->ctx_pool_vma); 1150 + guc_client_free(guc->execbuf_client); 1151 + guc->execbuf_client = NULL; 1295 1152 } 1296 1153 1297 1154 /** ··· 1301 1196 return intel_guc_send(guc, data, ARRAY_SIZE(data)); 1302 1197 } 1303 1198 1304 - 1305 1199 /** 1306 1200 * intel_guc_resume() - notify GuC resuming from suspend state 1307 1201 * @dev_priv: i915 device private ··· 1326 1222 1327 1223 return intel_guc_send(guc, data, ARRAY_SIZE(data)); 1328 1224 } 1329 - 1330 -
+2 -2
drivers/gpu/drm/i915/i915_irq.c
··· 1742 1742 I915_WRITE(SOFT_SCRATCH(15), msg & ~flush); 1743 1743 1744 1744 /* Handle flush interrupt in bottom half */ 1745 - queue_work(dev_priv->guc.log.flush_wq, 1746 - &dev_priv->guc.log.flush_work); 1745 + queue_work(dev_priv->guc.log.runtime.flush_wq, 1746 + &dev_priv->guc.log.runtime.flush_work); 1747 1747 1748 1748 dev_priv->guc.log.flush_interrupt_count++; 1749 1749 } else {
+5
drivers/gpu/drm/i915/i915_pci.c
··· 61 61 .has_overlay = 1, .overlay_needs_physical = 1, \ 62 62 .has_gmch_display = 1, \ 63 63 .hws_needs_physical = 1, \ 64 + .unfenced_needs_alignment = 1, \ 64 65 .ring_mask = RENDER_RING, \ 65 66 GEN_DEFAULT_PIPEOFFSETS, \ 66 67 CURSOR_OFFSETS ··· 103 102 .platform = INTEL_I915G, .cursor_needs_physical = 1, 104 103 .has_overlay = 1, .overlay_needs_physical = 1, 105 104 .hws_needs_physical = 1, 105 + .unfenced_needs_alignment = 1, 106 106 }; 107 107 108 108 static const struct intel_device_info intel_i915gm_info = { ··· 115 113 .supports_tv = 1, 116 114 .has_fbc = 1, 117 115 .hws_needs_physical = 1, 116 + .unfenced_needs_alignment = 1, 118 117 }; 119 118 120 119 static const struct intel_device_info intel_i945g_info = { ··· 124 121 .has_hotplug = 1, .cursor_needs_physical = 1, 125 122 .has_overlay = 1, .overlay_needs_physical = 1, 126 123 .hws_needs_physical = 1, 124 + .unfenced_needs_alignment = 1, 127 125 }; 128 126 129 127 static const struct intel_device_info intel_i945gm_info = { ··· 135 131 .supports_tv = 1, 136 132 .has_fbc = 1, 137 133 .hws_needs_physical = 1, 134 + .unfenced_needs_alignment = 1, 138 135 }; 139 136 140 137 static const struct intel_device_info intel_g33_info = {
+8 -3
drivers/gpu/drm/i915/i915_perf.c
··· 1705 1705 */ 1706 1706 if (WARN_ON(stream->sample_flags != props->sample_flags)) { 1707 1707 ret = -ENODEV; 1708 - goto err_alloc; 1708 + goto err_flags; 1709 1709 } 1710 1710 1711 1711 list_add(&stream->link, &dev_priv->perf.streams); ··· 1728 1728 1729 1729 err_open: 1730 1730 list_del(&stream->link); 1731 + err_flags: 1731 1732 if (stream->ops->destroy) 1732 1733 stream->ops->destroy(stream); 1733 1734 err_alloc: ··· 1793 1792 ret = get_user(value, uprop + 1); 1794 1793 if (ret) 1795 1794 return ret; 1795 + 1796 + if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) { 1797 + DRM_DEBUG("Unknown i915 perf property ID\n"); 1798 + return -EINVAL; 1799 + } 1796 1800 1797 1801 switch ((enum drm_i915_perf_property_id)id) { 1798 1802 case DRM_I915_PERF_PROP_CTX_HANDLE: ··· 1868 1862 props->oa_periodic = true; 1869 1863 props->oa_period_exponent = value; 1870 1864 break; 1871 - default: 1865 + case DRM_I915_PERF_PROP_MAX: 1872 1866 MISSING_CASE(id); 1873 - DRM_DEBUG("Unknown i915 perf property ID\n"); 1874 1867 return -EINVAL; 1875 1868 } 1876 1869
+7
drivers/gpu/drm/i915/i915_reg.h
··· 7829 7829 #define TRANS_DDI_EDP_INPUT_B_ONOFF (5<<12) 7830 7830 #define TRANS_DDI_EDP_INPUT_C_ONOFF (6<<12) 7831 7831 #define TRANS_DDI_DP_VC_PAYLOAD_ALLOC (1<<8) 7832 + #define TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE (1<<7) 7833 + #define TRANS_DDI_HDMI_SCRAMBLER_RESET_FREQ (1<<6) 7832 7834 #define TRANS_DDI_BFI_ENABLE (1<<4) 7835 + #define TRANS_DDI_HIGH_TMDS_CHAR_RATE (1<<4) 7836 + #define TRANS_DDI_HDMI_SCRAMBLING (1<<0) 7837 + #define TRANS_DDI_HDMI_SCRAMBLING_MASK (TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE \ 7838 + | TRANS_DDI_HDMI_SCRAMBLER_RESET_FREQ \ 7839 + | TRANS_DDI_HDMI_SCRAMBLING) 7833 7840 7834 7841 /* DisplayPort Transport Control */ 7835 7842 #define _DP_TP_CTL_A 0x64040
+18
drivers/gpu/drm/i915/i915_utils.h
··· 25 25 #ifndef __I915_UTILS_H 26 26 #define __I915_UTILS_H 27 27 28 + #undef WARN_ON 29 + /* Many gcc seem to no see through this and fall over :( */ 30 + #if 0 31 + #define WARN_ON(x) ({ \ 32 + bool __i915_warn_cond = (x); \ 33 + if (__builtin_constant_p(__i915_warn_cond)) \ 34 + BUILD_BUG_ON(__i915_warn_cond); \ 35 + WARN(__i915_warn_cond, "WARN_ON(" #x ")"); }) 36 + #else 37 + #define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")") 38 + #endif 39 + 40 + #undef WARN_ON_ONCE 41 + #define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")") 42 + 43 + #define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \ 44 + (long)(x), __func__) 45 + 28 46 #if GCC_VERSION >= 70000 29 47 #define add_overflows(A, B) \ 30 48 __builtin_add_overflow_p((A), (B), (typeof((A) + (B)))0)
+3 -2
drivers/gpu/drm/i915/intel_breadcrumbs.c
··· 47 47 unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) 48 48 { 49 49 struct intel_breadcrumbs *b = &engine->breadcrumbs; 50 + unsigned long flags; 50 51 unsigned int result; 51 52 52 - spin_lock_irq(&b->irq_lock); 53 + spin_lock_irqsave(&b->irq_lock, flags); 53 54 result = __intel_breadcrumbs_wakeup(b); 54 - spin_unlock_irq(&b->irq_lock); 55 + spin_unlock_irqrestore(&b->irq_lock, flags); 55 56 56 57 return result; 57 58 }
+23 -6
drivers/gpu/drm/i915/intel_cdclk.c
··· 1442 1442 if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) 1443 1443 pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); 1444 1444 1445 - /* BSpec says "Do not use DisplayPort with CDCLK less than 1446 - * 432 MHz, audio enabled, port width x4, and link rate 1447 - * HBR2 (5.4 GHz), or else there may be audio corruption or 1448 - * screen corruption." 1445 + /* BSpec says "Do not use DisplayPort with CDCLK less than 432 MHz, 1446 + * audio enabled, port width x4, and link rate HBR2 (5.4 GHz), or else 1447 + * there may be audio corruption or screen corruption." This cdclk 1448 + * restriction for GLK is 316.8 MHz and since GLK can output two 1449 + * pixels per clock, the pixel rate becomes 2 * 316.8 MHz. 1449 1450 */ 1450 1451 if (intel_crtc_has_dp_encoder(crtc_state) && 1451 1452 crtc_state->has_audio && 1452 1453 crtc_state->port_clock >= 540000 && 1453 - crtc_state->lane_count == 4) 1454 - pixel_rate = max(432000, pixel_rate); 1454 + crtc_state->lane_count == 4) { 1455 + if (IS_GEMINILAKE(dev_priv)) 1456 + pixel_rate = max(2 * 316800, pixel_rate); 1457 + else 1458 + pixel_rate = max(432000, pixel_rate); 1459 + } 1460 + 1461 + /* According to BSpec, "The CD clock frequency must be at least twice 1462 + * the frequency of the Azalia BCLK." and BCLK is 96 MHz by default. 1463 + * The check for GLK has to be adjusted as the platform can output 1464 + * two pixels per clock. 1465 + */ 1466 + if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) { 1467 + if (IS_GEMINILAKE(dev_priv)) 1468 + pixel_rate = max(2 * 2 * 96000, pixel_rate); 1469 + else 1470 + pixel_rate = max(2 * 96000, pixel_rate); 1471 + } 1455 1472 1456 1473 return pixel_rate; 1457 1474 }
+1 -1
drivers/gpu/drm/i915/intel_csr.c
··· 49 49 MODULE_FIRMWARE(I915_CSR_BXT); 50 50 #define BXT_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) 51 51 52 - #define FIRMWARE_URL "https://01.org/linuxgraphics/intel-linux-graphics-firmwares" 52 + #define FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware" 53 53 54 54 55 55
+29 -71
drivers/gpu/drm/i915/intel_ddi.c
··· 539 539 * values in advance. This function programs the correct values for 540 540 * DP/eDP/FDI use cases. 541 541 */ 542 - void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) 542 + static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) 543 543 { 544 544 struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); 545 545 u32 iboost_bit = 0; ··· 806 806 DP_TP_CTL_ENABLE); 807 807 } 808 808 809 - void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) 809 + static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) 810 810 { 811 811 struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); 812 812 struct intel_digital_port *intel_dig_port = ··· 837 837 return ret; 838 838 } 839 839 840 - static struct intel_encoder * 840 + /* Finds the only possible encoder associated with the given CRTC. */ 841 + struct intel_encoder * 841 842 intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state) 842 843 { 843 844 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); ··· 1128 1127 bxt_ddi_clock_get(encoder, pipe_config); 1129 1128 } 1130 1129 1131 - static bool 1132 - hsw_ddi_pll_select(struct intel_crtc *intel_crtc, 1133 - struct intel_crtc_state *crtc_state, 1134 - struct intel_encoder *encoder) 1135 - { 1136 - struct intel_shared_dpll *pll; 1137 - 1138 - pll = intel_get_shared_dpll(intel_crtc, crtc_state, 1139 - encoder); 1140 - if (!pll) 1141 - DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n", 1142 - pipe_name(intel_crtc->pipe)); 1143 - 1144 - return pll; 1145 - } 1146 - 1147 - static bool 1148 - skl_ddi_pll_select(struct intel_crtc *intel_crtc, 1149 - struct intel_crtc_state *crtc_state, 1150 - struct intel_encoder *encoder) 1151 - { 1152 - struct intel_shared_dpll *pll; 1153 - 1154 - pll = intel_get_shared_dpll(intel_crtc, crtc_state, encoder); 1155 - if (pll == NULL) { 1156 - DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n", 1157 - pipe_name(intel_crtc->pipe)); 1158 - return false; 1159 - } 1160 - 1161 - return true; 1162 - } 1163 - 1164 - static bool 1165 - bxt_ddi_pll_select(struct intel_crtc *intel_crtc, 1166 - struct intel_crtc_state *crtc_state, 1167 - struct intel_encoder *encoder) 1168 - { 1169 - return !!intel_get_shared_dpll(intel_crtc, crtc_state, encoder); 1170 - } 1171 - 1172 - /* 1173 - * Tries to find a *shared* PLL for the CRTC and store it in 1174 - * intel_crtc->ddi_pll_sel. 1175 - * 1176 - * For private DPLLs, compute_config() should do the selection for us. This 1177 - * function should be folded into compute_config() eventually. 1178 - */ 1179 - bool intel_ddi_pll_select(struct intel_crtc *intel_crtc, 1180 - struct intel_crtc_state *crtc_state) 1181 - { 1182 - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); 1183 - struct intel_encoder *encoder = 1184 - intel_ddi_get_crtc_new_encoder(crtc_state); 1185 - 1186 - if (IS_GEN9_BC(dev_priv)) 1187 - return skl_ddi_pll_select(intel_crtc, crtc_state, 1188 - encoder); 1189 - else if (IS_GEN9_LP(dev_priv)) 1190 - return bxt_ddi_pll_select(intel_crtc, crtc_state, 1191 - encoder); 1192 - else 1193 - return hsw_ddi_pll_select(intel_crtc, crtc_state, 1194 - encoder); 1195 - } 1196 - 1197 1130 void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) 1198 1131 { 1199 1132 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); ··· 1244 1309 temp |= TRANS_DDI_MODE_SELECT_HDMI; 1245 1310 else 1246 1311 temp |= TRANS_DDI_MODE_SELECT_DVI; 1312 + 1313 + if (crtc_state->hdmi_scrambling) 1314 + temp |= TRANS_DDI_HDMI_SCRAMBLING_MASK; 1315 + if (crtc_state->hdmi_high_tmds_clock_ratio) 1316 + temp |= TRANS_DDI_HIGH_TMDS_CHAR_RATE; 1247 1317 } else if (type == INTEL_OUTPUT_ANALOG) { 1248 1318 temp |= TRANS_DDI_MODE_SELECT_FDI; 1249 1319 temp |= (crtc_state->fdi_lanes - 1) << 1; ··· 1616 1676 return DDI_BUF_TRANS_SELECT(level); 1617 1677 } 1618 1678 1619 - void intel_ddi_clk_select(struct intel_encoder *encoder, 1620 - struct intel_shared_dpll *pll) 1679 + static void intel_ddi_clk_select(struct intel_encoder *encoder, 1680 + struct intel_shared_dpll *pll) 1621 1681 { 1622 1682 struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); 1623 1683 enum port port = intel_ddi_get_encoder_port(encoder); ··· 1821 1881 if (type == INTEL_OUTPUT_HDMI) { 1822 1882 struct intel_digital_port *intel_dig_port = 1823 1883 enc_to_dig_port(encoder); 1884 + bool clock_ratio = pipe_config->hdmi_high_tmds_clock_ratio; 1885 + bool scrambling = pipe_config->hdmi_scrambling; 1886 + 1887 + intel_hdmi_handle_sink_scrambling(intel_encoder, 1888 + conn_state->connector, 1889 + clock_ratio, scrambling); 1824 1890 1825 1891 /* In HDMI/DVI mode, the port width, and swing/emphasis values 1826 1892 * are ignored so nothing special needs to be done besides ··· 1859 1913 1860 1914 if (old_crtc_state->has_audio) 1861 1915 intel_audio_codec_disable(intel_encoder); 1916 + 1917 + if (type == INTEL_OUTPUT_HDMI) { 1918 + intel_hdmi_handle_sink_scrambling(intel_encoder, 1919 + old_conn_state->connector, 1920 + false, false); 1921 + } 1862 1922 1863 1923 if (type == INTEL_OUTPUT_EDP) { 1864 1924 struct intel_dp *intel_dp = enc_to_intel_dp(encoder); ··· 1992 2040 1993 2041 if (intel_hdmi->infoframe_enabled(&encoder->base, pipe_config)) 1994 2042 pipe_config->has_infoframe = true; 2043 + 2044 + if ((temp & TRANS_DDI_HDMI_SCRAMBLING_MASK) == 2045 + TRANS_DDI_HDMI_SCRAMBLING_MASK) 2046 + pipe_config->hdmi_scrambling = true; 2047 + if (temp & TRANS_DDI_HIGH_TMDS_CHAR_RATE) 2048 + pipe_config->hdmi_high_tmds_clock_ratio = true; 1995 2049 /* fall through */ 1996 2050 case TRANS_DDI_MODE_SELECT_DVI: 1997 2051 pipe_config->lane_count = 4;
+234 -211
drivers/gpu/drm/i915/intel_display.c
··· 1997 1997 unsigned int cpp = fb->format->cpp[plane]; 1998 1998 1999 1999 switch (fb->modifier) { 2000 - case DRM_FORMAT_MOD_NONE: 2000 + case DRM_FORMAT_MOD_LINEAR: 2001 2001 return cpp; 2002 2002 case I915_FORMAT_MOD_X_TILED: 2003 2003 if (IS_GEN2(dev_priv)) ··· 2033 2033 static unsigned int 2034 2034 intel_tile_height(const struct drm_framebuffer *fb, int plane) 2035 2035 { 2036 - if (fb->modifier == DRM_FORMAT_MOD_NONE) 2036 + if (fb->modifier == DRM_FORMAT_MOD_LINEAR) 2037 2037 return 1; 2038 2038 else 2039 2039 return intel_tile_size(to_i915(fb->dev)) / ··· 2107 2107 return 4096; 2108 2108 2109 2109 switch (fb->modifier) { 2110 - case DRM_FORMAT_MOD_NONE: 2110 + case DRM_FORMAT_MOD_LINEAR: 2111 2111 return intel_linear_alignment(dev_priv); 2112 2112 case I915_FORMAT_MOD_X_TILED: 2113 2113 if (INTEL_GEN(dev_priv) >= 9) ··· 2290 2290 2291 2291 WARN_ON(new_offset > old_offset); 2292 2292 2293 - if (fb->modifier != DRM_FORMAT_MOD_NONE) { 2293 + if (fb->modifier != DRM_FORMAT_MOD_LINEAR) { 2294 2294 unsigned int tile_size, tile_width, tile_height; 2295 2295 unsigned int pitch_tiles; 2296 2296 ··· 2345 2345 if (alignment) 2346 2346 alignment--; 2347 2347 2348 - if (fb_modifier != DRM_FORMAT_MOD_NONE) { 2348 + if (fb_modifier != DRM_FORMAT_MOD_LINEAR) { 2349 2349 unsigned int tile_size, tile_width, tile_height; 2350 2350 unsigned int tile_rows, tiles, pitch_tiles; 2351 2351 ··· 2471 2471 DRM_ROTATE_0, tile_size); 2472 2472 offset /= tile_size; 2473 2473 2474 - if (fb->modifier != DRM_FORMAT_MOD_NONE) { 2474 + if (fb->modifier != DRM_FORMAT_MOD_LINEAR) { 2475 2475 unsigned int tile_width, tile_height; 2476 2476 unsigned int pitch_tiles; 2477 2477 struct drm_rect r; ··· 2803 2803 int cpp = fb->format->cpp[plane]; 2804 2804 2805 2805 switch (fb->modifier) { 2806 - case DRM_FORMAT_MOD_NONE: 2806 + case DRM_FORMAT_MOD_LINEAR: 2807 2807 case I915_FORMAT_MOD_X_TILED: 2808 2808 switch (cpp) { 2809 2809 case 8: ··· 2962 2962 return 0; 2963 2963 } 2964 2964 2965 - static void i9xx_update_primary_plane(struct drm_plane *primary, 2966 - const struct intel_crtc_state *crtc_state, 2967 - const struct intel_plane_state *plane_state) 2965 + static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, 2966 + const struct intel_plane_state *plane_state) 2968 2967 { 2969 - struct drm_i915_private *dev_priv = to_i915(primary->dev); 2970 - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); 2971 - struct drm_framebuffer *fb = plane_state->base.fb; 2972 - int plane = intel_crtc->plane; 2973 - u32 linear_offset; 2974 - u32 dspcntr; 2975 - i915_reg_t reg = DSPCNTR(plane); 2968 + struct drm_i915_private *dev_priv = 2969 + to_i915(plane_state->base.plane->dev); 2970 + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 2971 + const struct drm_framebuffer *fb = plane_state->base.fb; 2976 2972 unsigned int rotation = plane_state->base.rotation; 2977 - int x = plane_state->base.src.x1 >> 16; 2978 - int y = plane_state->base.src.y1 >> 16; 2979 - unsigned long irqflags; 2973 + u32 dspcntr; 2980 2974 2981 - dspcntr = DISPPLANE_GAMMA_ENABLE; 2975 + dspcntr = DISPLAY_PLANE_ENABLE | DISPPLANE_GAMMA_ENABLE; 2982 2976 2983 - dspcntr |= DISPLAY_PLANE_ENABLE; 2977 + if (IS_G4X(dev_priv) || IS_GEN5(dev_priv) || 2978 + IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) 2979 + dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; 2980 + 2981 + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 2982 + dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; 2984 2983 2985 2984 if (INTEL_GEN(dev_priv) < 4) { 2986 - if (intel_crtc->pipe == PIPE_B) 2985 + if (crtc->pipe == PIPE_B) 2987 2986 dspcntr |= DISPPLANE_SEL_PIPE_B; 2988 2987 } 2989 2988 ··· 3009 3010 dspcntr |= DISPPLANE_RGBX101010; 3010 3011 break; 3011 3012 default: 3012 - BUG(); 3013 + MISSING_CASE(fb->format->format); 3014 + return 0; 3013 3015 } 3014 3016 3015 3017 if (INTEL_GEN(dev_priv) >= 4 && ··· 3023 3023 if (rotation & DRM_REFLECT_X) 3024 3024 dspcntr |= DISPPLANE_MIRROR; 3025 3025 3026 - if (IS_G4X(dev_priv)) 3027 - dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; 3026 + return dspcntr; 3027 + } 3028 3028 3029 - intel_add_fb_offsets(&x, &y, plane_state, 0); 3029 + int i9xx_check_plane_surface(struct intel_plane_state *plane_state) 3030 + { 3031 + struct drm_i915_private *dev_priv = 3032 + to_i915(plane_state->base.plane->dev); 3033 + int src_x = plane_state->base.src.x1 >> 16; 3034 + int src_y = plane_state->base.src.y1 >> 16; 3035 + u32 offset; 3036 + 3037 + intel_add_fb_offsets(&src_x, &src_y, plane_state, 0); 3030 3038 3031 3039 if (INTEL_GEN(dev_priv) >= 4) 3032 - intel_crtc->dspaddr_offset = 3033 - intel_compute_tile_offset(&x, &y, plane_state, 0); 3040 + offset = intel_compute_tile_offset(&src_x, &src_y, 3041 + plane_state, 0); 3042 + else 3043 + offset = 0; 3034 3044 3035 - if (rotation & DRM_ROTATE_180) { 3036 - x += crtc_state->pipe_src_w - 1; 3037 - y += crtc_state->pipe_src_h - 1; 3038 - } else if (rotation & DRM_REFLECT_X) { 3039 - x += crtc_state->pipe_src_w - 1; 3045 + /* HSW/BDW do this automagically in hardware */ 3046 + if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) { 3047 + unsigned int rotation = plane_state->base.rotation; 3048 + int src_w = drm_rect_width(&plane_state->base.src) >> 16; 3049 + int src_h = drm_rect_height(&plane_state->base.src) >> 16; 3050 + 3051 + if (rotation & DRM_ROTATE_180) { 3052 + src_x += src_w - 1; 3053 + src_y += src_h - 1; 3054 + } else if (rotation & DRM_REFLECT_X) { 3055 + src_x += src_w - 1; 3056 + } 3040 3057 } 3058 + 3059 + plane_state->main.offset = offset; 3060 + plane_state->main.x = src_x; 3061 + plane_state->main.y = src_y; 3062 + 3063 + return 0; 3064 + } 3065 + 3066 + static void i9xx_update_primary_plane(struct drm_plane *primary, 3067 + const struct intel_crtc_state *crtc_state, 3068 + const struct intel_plane_state *plane_state) 3069 + { 3070 + struct drm_i915_private *dev_priv = to_i915(primary->dev); 3071 + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); 3072 + struct drm_framebuffer *fb = plane_state->base.fb; 3073 + int plane = intel_crtc->plane; 3074 + u32 linear_offset; 3075 + u32 dspcntr = plane_state->ctl; 3076 + i915_reg_t reg = DSPCNTR(plane); 3077 + int x = plane_state->main.x; 3078 + int y = plane_state->main.y; 3079 + unsigned long irqflags; 3041 3080 3042 3081 linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); 3043 3082 3044 - if (INTEL_GEN(dev_priv) < 4) 3083 + if (INTEL_GEN(dev_priv) >= 4) 3084 + intel_crtc->dspaddr_offset = plane_state->main.offset; 3085 + else 3045 3086 intel_crtc->dspaddr_offset = linear_offset; 3046 3087 3047 3088 intel_crtc->adjusted_x = x; ··· 3109 3068 I915_WRITE_FW(reg, dspcntr); 3110 3069 3111 3070 I915_WRITE_FW(DSPSTRIDE(plane), fb->pitches[0]); 3112 - if (INTEL_GEN(dev_priv) >= 4) { 3071 + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { 3072 + I915_WRITE_FW(DSPSURF(plane), 3073 + intel_plane_ggtt_offset(plane_state) + 3074 + intel_crtc->dspaddr_offset); 3075 + I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x); 3076 + } else if (INTEL_GEN(dev_priv) >= 4) { 3113 3077 I915_WRITE_FW(DSPSURF(plane), 3114 3078 intel_plane_ggtt_offset(plane_state) + 3115 3079 intel_crtc->dspaddr_offset); ··· 3151 3105 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); 3152 3106 } 3153 3107 3154 - static void ironlake_update_primary_plane(struct drm_plane *primary, 3155 - const struct intel_crtc_state *crtc_state, 3156 - const struct intel_plane_state *plane_state) 3157 - { 3158 - struct drm_device *dev = primary->dev; 3159 - struct drm_i915_private *dev_priv = to_i915(dev); 3160 - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); 3161 - struct drm_framebuffer *fb = plane_state->base.fb; 3162 - int plane = intel_crtc->plane; 3163 - u32 linear_offset; 3164 - u32 dspcntr; 3165 - i915_reg_t reg = DSPCNTR(plane); 3166 - unsigned int rotation = plane_state->base.rotation; 3167 - int x = plane_state->base.src.x1 >> 16; 3168 - int y = plane_state->base.src.y1 >> 16; 3169 - unsigned long irqflags; 3170 - 3171 - dspcntr = DISPPLANE_GAMMA_ENABLE; 3172 - dspcntr |= DISPLAY_PLANE_ENABLE; 3173 - 3174 - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 3175 - dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; 3176 - 3177 - switch (fb->format->format) { 3178 - case DRM_FORMAT_C8: 3179 - dspcntr |= DISPPLANE_8BPP; 3180 - break; 3181 - case DRM_FORMAT_RGB565: 3182 - dspcntr |= DISPPLANE_BGRX565; 3183 - break; 3184 - case DRM_FORMAT_XRGB8888: 3185 - dspcntr |= DISPPLANE_BGRX888; 3186 - break; 3187 - case DRM_FORMAT_XBGR8888: 3188 - dspcntr |= DISPPLANE_RGBX888; 3189 - break; 3190 - case DRM_FORMAT_XRGB2101010: 3191 - dspcntr |= DISPPLANE_BGRX101010; 3192 - break; 3193 - case DRM_FORMAT_XBGR2101010: 3194 - dspcntr |= DISPPLANE_RGBX101010; 3195 - break; 3196 - default: 3197 - BUG(); 3198 - } 3199 - 3200 - if (fb->modifier == I915_FORMAT_MOD_X_TILED) 3201 - dspcntr |= DISPPLANE_TILED; 3202 - 3203 - if (rotation & DRM_ROTATE_180) 3204 - dspcntr |= DISPPLANE_ROTATE_180; 3205 - 3206 - if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) 3207 - dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; 3208 - 3209 - intel_add_fb_offsets(&x, &y, plane_state, 0); 3210 - 3211 - intel_crtc->dspaddr_offset = 3212 - intel_compute_tile_offset(&x, &y, plane_state, 0); 3213 - 3214 - /* HSW+ does this automagically in hardware */ 3215 - if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv) && 3216 - rotation & DRM_ROTATE_180) { 3217 - x += crtc_state->pipe_src_w - 1; 3218 - y += crtc_state->pipe_src_h - 1; 3219 - } 3220 - 3221 - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); 3222 - 3223 - intel_crtc->adjusted_x = x; 3224 - intel_crtc->adjusted_y = y; 3225 - 3226 - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); 3227 - 3228 - I915_WRITE_FW(reg, dspcntr); 3229 - 3230 - I915_WRITE_FW(DSPSTRIDE(plane), fb->pitches[0]); 3231 - I915_WRITE_FW(DSPSURF(plane), 3232 - intel_plane_ggtt_offset(plane_state) + 3233 - intel_crtc->dspaddr_offset); 3234 - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { 3235 - I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x); 3236 - } else { 3237 - I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x); 3238 - I915_WRITE_FW(DSPLINOFF(plane), linear_offset); 3239 - } 3240 - POSTING_READ_FW(reg); 3241 - 3242 - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); 3243 - } 3244 - 3245 3108 static u32 3246 3109 intel_fb_stride_alignment(const struct drm_framebuffer *fb, int plane) 3247 3110 { 3248 - if (fb->modifier == DRM_FORMAT_MOD_NONE) 3111 + if (fb->modifier == DRM_FORMAT_MOD_LINEAR) 3249 3112 return 64; 3250 3113 else 3251 3114 return intel_tile_width_bytes(fb, plane); ··· 3209 3254 return stride; 3210 3255 } 3211 3256 3212 - u32 skl_plane_ctl_format(uint32_t pixel_format) 3257 + static u32 skl_plane_ctl_format(uint32_t pixel_format) 3213 3258 { 3214 3259 switch (pixel_format) { 3215 3260 case DRM_FORMAT_C8: ··· 3250 3295 return 0; 3251 3296 } 3252 3297 3253 - u32 skl_plane_ctl_tiling(uint64_t fb_modifier) 3298 + static u32 skl_plane_ctl_tiling(uint64_t fb_modifier) 3254 3299 { 3255 3300 switch (fb_modifier) { 3256 - case DRM_FORMAT_MOD_NONE: 3301 + case DRM_FORMAT_MOD_LINEAR: 3257 3302 break; 3258 3303 case I915_FORMAT_MOD_X_TILED: 3259 3304 return PLANE_CTL_TILED_X; ··· 3268 3313 return 0; 3269 3314 } 3270 3315 3271 - u32 skl_plane_ctl_rotation(unsigned int rotation) 3316 + static u32 skl_plane_ctl_rotation(unsigned int rotation) 3272 3317 { 3273 3318 switch (rotation) { 3274 3319 case DRM_ROTATE_0: ··· 3290 3335 return 0; 3291 3336 } 3292 3337 3293 - static void skylake_update_primary_plane(struct drm_plane *plane, 3294 - const struct intel_crtc_state *crtc_state, 3295 - const struct intel_plane_state *plane_state) 3338 + u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, 3339 + const struct intel_plane_state *plane_state) 3296 3340 { 3297 - struct drm_device *dev = plane->dev; 3298 - struct drm_i915_private *dev_priv = to_i915(dev); 3299 - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); 3300 - struct drm_framebuffer *fb = plane_state->base.fb; 3301 - enum plane_id plane_id = to_intel_plane(plane)->id; 3302 - enum pipe pipe = to_intel_plane(plane)->pipe; 3303 - u32 plane_ctl; 3341 + struct drm_i915_private *dev_priv = 3342 + to_i915(plane_state->base.plane->dev); 3343 + const struct drm_framebuffer *fb = plane_state->base.fb; 3304 3344 unsigned int rotation = plane_state->base.rotation; 3305 - u32 stride = skl_plane_stride(fb, 0, rotation); 3306 - u32 surf_addr = plane_state->main.offset; 3307 - int scaler_id = plane_state->scaler_id; 3308 - int src_x = plane_state->main.x; 3309 - int src_y = plane_state->main.y; 3310 - int src_w = drm_rect_width(&plane_state->base.src) >> 16; 3311 - int src_h = drm_rect_height(&plane_state->base.src) >> 16; 3312 - int dst_x = plane_state->base.dst.x1; 3313 - int dst_y = plane_state->base.dst.y1; 3314 - int dst_w = drm_rect_width(&plane_state->base.dst); 3315 - int dst_h = drm_rect_height(&plane_state->base.dst); 3316 - unsigned long irqflags; 3345 + const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; 3346 + u32 plane_ctl; 3317 3347 3318 3348 plane_ctl = PLANE_CTL_ENABLE; 3319 3349 ··· 3312 3372 plane_ctl |= skl_plane_ctl_format(fb->format->format); 3313 3373 plane_ctl |= skl_plane_ctl_tiling(fb->modifier); 3314 3374 plane_ctl |= skl_plane_ctl_rotation(rotation); 3375 + 3376 + if (key->flags & I915_SET_COLORKEY_DESTINATION) 3377 + plane_ctl |= PLANE_CTL_KEY_ENABLE_DESTINATION; 3378 + else if (key->flags & I915_SET_COLORKEY_SOURCE) 3379 + plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE; 3380 + 3381 + return plane_ctl; 3382 + } 3383 + 3384 + static void skylake_update_primary_plane(struct drm_plane *plane, 3385 + const struct intel_crtc_state *crtc_state, 3386 + const struct intel_plane_state *plane_state) 3387 + { 3388 + struct drm_device *dev = plane->dev; 3389 + struct drm_i915_private *dev_priv = to_i915(dev); 3390 + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); 3391 + struct drm_framebuffer *fb = plane_state->base.fb; 3392 + enum plane_id plane_id = to_intel_plane(plane)->id; 3393 + enum pipe pipe = to_intel_plane(plane)->pipe; 3394 + u32 plane_ctl = plane_state->ctl; 3395 + unsigned int rotation = plane_state->base.rotation; 3396 + u32 stride = skl_plane_stride(fb, 0, rotation); 3397 + u32 surf_addr = plane_state->main.offset; 3398 + int scaler_id = plane_state->scaler_id; 3399 + int src_x = plane_state->main.x; 3400 + int src_y = plane_state->main.y; 3401 + int src_w = drm_rect_width(&plane_state->base.src) >> 16; 3402 + int src_h = drm_rect_height(&plane_state->base.src) >> 16; 3403 + int dst_x = plane_state->base.dst.x1; 3404 + int dst_y = plane_state->base.dst.y1; 3405 + int dst_w = drm_rect_width(&plane_state->base.dst); 3406 + int dst_h = drm_rect_height(&plane_state->base.dst); 3407 + unsigned long irqflags; 3315 3408 3316 3409 /* Sizes are 0 based */ 3317 3410 src_w--; ··· 6290 6317 static void compute_m_n(unsigned int m, unsigned int n, 6291 6318 uint32_t *ret_m, uint32_t *ret_n) 6292 6319 { 6320 + /* 6321 + * Reduce M/N as much as possible without loss in precision. Several DP 6322 + * dongles in particular seem to be fussy about too large *link* M/N 6323 + * values. The passed in values are more likely to have the least 6324 + * significant bits zero than M after rounding below, so do this first. 6325 + */ 6326 + while ((m & 1) == 0 && (n & 1) == 0) { 6327 + m >>= 1; 6328 + n >>= 1; 6329 + } 6330 + 6293 6331 *ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX); 6294 6332 *ret_m = div_u64((uint64_t) m * *ret_n, n); 6295 6333 intel_reduce_m_n_ratio(ret_m, ret_n); ··· 8390 8406 tiling = val & PLANE_CTL_TILED_MASK; 8391 8407 switch (tiling) { 8392 8408 case PLANE_CTL_TILED_LINEAR: 8393 - fb->modifier = DRM_FORMAT_MOD_NONE; 8409 + fb->modifier = DRM_FORMAT_MOD_LINEAR; 8394 8410 break; 8395 8411 case PLANE_CTL_TILED_X: 8396 8412 plane_config->tiling = I915_TILING_X; ··· 8846 8862 struct intel_crtc_state *crtc_state) 8847 8863 { 8848 8864 if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI)) { 8849 - if (!intel_ddi_pll_select(crtc, crtc_state)) 8865 + struct intel_encoder *encoder = 8866 + intel_ddi_get_crtc_new_encoder(crtc_state); 8867 + 8868 + if (!intel_get_shared_dpll(crtc, crtc_state, encoder)) { 8869 + DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n", 8870 + pipe_name(crtc->pipe)); 8850 8871 return -EINVAL; 8872 + } 8851 8873 } 8852 8874 8853 8875 crtc->lowfreq_avail = false; ··· 9149 9159 return active; 9150 9160 } 9151 9161 9162 + static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, 9163 + const struct intel_plane_state *plane_state) 9164 + { 9165 + unsigned int width = plane_state->base.crtc_w; 9166 + unsigned int stride = roundup_pow_of_two(width) * 4; 9167 + 9168 + switch (stride) { 9169 + default: 9170 + WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n", 9171 + width, stride); 9172 + stride = 256; 9173 + /* fallthrough */ 9174 + case 256: 9175 + case 512: 9176 + case 1024: 9177 + case 2048: 9178 + break; 9179 + } 9180 + 9181 + return CURSOR_ENABLE | 9182 + CURSOR_GAMMA_ENABLE | 9183 + CURSOR_FORMAT_ARGB | 9184 + CURSOR_STRIDE(stride); 9185 + } 9186 + 9152 9187 static void i845_update_cursor(struct drm_crtc *crtc, u32 base, 9153 9188 const struct intel_plane_state *plane_state) 9154 9189 { ··· 9185 9170 if (plane_state && plane_state->base.visible) { 9186 9171 unsigned int width = plane_state->base.crtc_w; 9187 9172 unsigned int height = plane_state->base.crtc_h; 9188 - unsigned int stride = roundup_pow_of_two(width) * 4; 9189 9173 9190 - switch (stride) { 9191 - default: 9192 - WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n", 9193 - width, stride); 9194 - stride = 256; 9195 - /* fallthrough */ 9196 - case 256: 9197 - case 512: 9198 - case 1024: 9199 - case 2048: 9200 - break; 9201 - } 9202 - 9203 - cntl |= CURSOR_ENABLE | 9204 - CURSOR_GAMMA_ENABLE | 9205 - CURSOR_FORMAT_ARGB | 9206 - CURSOR_STRIDE(stride); 9207 - 9174 + cntl = plane_state->ctl; 9208 9175 size = (height << 12) | width; 9209 9176 } 9210 9177 ··· 9219 9222 } 9220 9223 } 9221 9224 9225 + static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, 9226 + const struct intel_plane_state *plane_state) 9227 + { 9228 + struct drm_i915_private *dev_priv = 9229 + to_i915(plane_state->base.plane->dev); 9230 + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 9231 + enum pipe pipe = crtc->pipe; 9232 + u32 cntl; 9233 + 9234 + cntl = MCURSOR_GAMMA_ENABLE; 9235 + 9236 + if (HAS_DDI(dev_priv)) 9237 + cntl |= CURSOR_PIPE_CSC_ENABLE; 9238 + 9239 + cntl |= pipe << 28; /* Connect to correct pipe */ 9240 + 9241 + switch (plane_state->base.crtc_w) { 9242 + case 64: 9243 + cntl |= CURSOR_MODE_64_ARGB_AX; 9244 + break; 9245 + case 128: 9246 + cntl |= CURSOR_MODE_128_ARGB_AX; 9247 + break; 9248 + case 256: 9249 + cntl |= CURSOR_MODE_256_ARGB_AX; 9250 + break; 9251 + default: 9252 + MISSING_CASE(plane_state->base.crtc_w); 9253 + return 0; 9254 + } 9255 + 9256 + if (plane_state->base.rotation & DRM_ROTATE_180) 9257 + cntl |= CURSOR_ROTATE_180; 9258 + 9259 + return cntl; 9260 + } 9261 + 9222 9262 static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, 9223 9263 const struct intel_plane_state *plane_state) 9224 9264 { ··· 9265 9231 int pipe = intel_crtc->pipe; 9266 9232 uint32_t cntl = 0; 9267 9233 9268 - if (plane_state && plane_state->base.visible) { 9269 - cntl = MCURSOR_GAMMA_ENABLE; 9270 - switch (plane_state->base.crtc_w) { 9271 - case 64: 9272 - cntl |= CURSOR_MODE_64_ARGB_AX; 9273 - break; 9274 - case 128: 9275 - cntl |= CURSOR_MODE_128_ARGB_AX; 9276 - break; 9277 - case 256: 9278 - cntl |= CURSOR_MODE_256_ARGB_AX; 9279 - break; 9280 - default: 9281 - MISSING_CASE(plane_state->base.crtc_w); 9282 - return; 9283 - } 9284 - cntl |= pipe << 28; /* Connect to correct pipe */ 9285 - 9286 - if (HAS_DDI(dev_priv)) 9287 - cntl |= CURSOR_PIPE_CSC_ENABLE; 9288 - 9289 - if (plane_state->base.rotation & DRM_ROTATE_180) 9290 - cntl |= CURSOR_ROTATE_180; 9291 - } 9234 + if (plane_state && plane_state->base.visible) 9235 + cntl = plane_state->ctl; 9292 9236 9293 9237 if (intel_crtc->cursor_cntl != cntl) { 9294 9238 I915_WRITE_FW(CURCNTR(pipe), cntl); ··· 10366 10354 ctl = I915_READ(PLANE_CTL(pipe, 0)); 10367 10355 ctl &= ~PLANE_CTL_TILED_MASK; 10368 10356 switch (fb->modifier) { 10369 - case DRM_FORMAT_MOD_NONE: 10357 + case DRM_FORMAT_MOD_LINEAR: 10370 10358 break; 10371 10359 case I915_FORMAT_MOD_X_TILED: 10372 10360 ctl |= PLANE_CTL_TILED_X; ··· 11721 11709 if ((INTEL_GEN(dev_priv) < 8 && !IS_HASWELL(dev_priv)) || 11722 11710 IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) 11723 11711 PIPE_CONF_CHECK_I(limited_color_range); 11712 + 11713 + PIPE_CONF_CHECK_I(hdmi_scrambling); 11714 + PIPE_CONF_CHECK_I(hdmi_high_tmds_clock_ratio); 11724 11715 PIPE_CONF_CHECK_I(has_infoframe); 11725 11716 11726 11717 PIPE_CONF_CHECK_I(has_audio); ··· 13359 13344 ret = skl_check_plane_surface(state); 13360 13345 if (ret) 13361 13346 return ret; 13347 + 13348 + state->ctl = skl_plane_ctl(crtc_state, state); 13349 + } else { 13350 + ret = i9xx_check_plane_surface(state); 13351 + if (ret) 13352 + return ret; 13353 + 13354 + state->ctl = i9xx_plane_ctl(crtc_state, state); 13362 13355 } 13363 13356 13364 13357 return 0; ··· 13626 13603 13627 13604 primary->update_plane = skylake_update_primary_plane; 13628 13605 primary->disable_plane = skylake_disable_primary_plane; 13629 - } else if (HAS_PCH_SPLIT(dev_priv)) { 13630 - intel_primary_formats = i965_primary_formats; 13631 - num_formats = ARRAY_SIZE(i965_primary_formats); 13632 - 13633 - primary->update_plane = ironlake_update_primary_plane; 13634 - primary->disable_plane = i9xx_disable_primary_plane; 13635 13606 } else if (INTEL_GEN(dev_priv) >= 4) { 13636 13607 intel_primary_formats = i965_primary_formats; 13637 13608 num_formats = ARRAY_SIZE(i965_primary_formats); ··· 13697 13680 struct intel_crtc_state *crtc_state, 13698 13681 struct intel_plane_state *state) 13699 13682 { 13683 + struct drm_i915_private *dev_priv = to_i915(plane->dev); 13700 13684 struct drm_framebuffer *fb = state->base.fb; 13701 13685 struct drm_i915_gem_object *obj = intel_fb_obj(fb); 13702 13686 enum pipe pipe = to_intel_plane(plane)->pipe; ··· 13717 13699 return 0; 13718 13700 13719 13701 /* Check for which cursor types we support */ 13720 - if (!cursor_size_ok(to_i915(plane->dev), state->base.crtc_w, 13702 + if (!cursor_size_ok(dev_priv, state->base.crtc_w, 13721 13703 state->base.crtc_h)) { 13722 13704 DRM_DEBUG("Cursor dimension %dx%d not supported\n", 13723 13705 state->base.crtc_w, state->base.crtc_h); ··· 13730 13712 return -ENOMEM; 13731 13713 } 13732 13714 13733 - if (fb->modifier != DRM_FORMAT_MOD_NONE) { 13715 + if (fb->modifier != DRM_FORMAT_MOD_LINEAR) { 13734 13716 DRM_DEBUG_KMS("cursor cannot be tiled\n"); 13735 13717 return -EINVAL; 13736 13718 } ··· 13745 13727 * display power well must be turned off and on again. 13746 13728 * Refuse the put the cursor into that compromised position. 13747 13729 */ 13748 - if (IS_CHERRYVIEW(to_i915(plane->dev)) && pipe == PIPE_C && 13730 + if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C && 13749 13731 state->base.visible && state->base.crtc_x < 0) { 13750 13732 DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n"); 13751 13733 return -EINVAL; 13752 13734 } 13735 + 13736 + if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) 13737 + state->ctl = i845_cursor_ctl(crtc_state, state); 13738 + else 13739 + state->ctl = i9xx_cursor_ctl(crtc_state, state); 13753 13740 13754 13741 return 0; 13755 13742 } ··· 14391 14368 mode_cmd->modifier[0]); 14392 14369 goto err; 14393 14370 } 14394 - case DRM_FORMAT_MOD_NONE: 14371 + case DRM_FORMAT_MOD_LINEAR: 14395 14372 case I915_FORMAT_MOD_X_TILED: 14396 14373 break; 14397 14374 default: ··· 14414 14391 mode_cmd->pixel_format); 14415 14392 if (mode_cmd->pitches[0] > pitch_limit) { 14416 14393 DRM_DEBUG_KMS("%s pitch (%u) must be at most %d\n", 14417 - mode_cmd->modifier[0] != DRM_FORMAT_MOD_NONE ? 14394 + mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ? 14418 14395 "tiled" : "linear", 14419 14396 mode_cmd->pitches[0], pitch_limit); 14420 14397 goto err;
+18 -9
drivers/gpu/drm/i915/intel_drv.h
··· 398 398 int x, y; 399 399 } aux; 400 400 401 + /* plane control register */ 402 + u32 ctl; 403 + 401 404 /* 402 405 * scaler_id 403 406 * = -1 : not using a scaler ··· 732 729 733 730 /* bitmask of visible planes (enum plane_id) */ 734 731 u8 active_planes; 732 + 733 + /* HDMI scrambling status */ 734 + bool hdmi_scrambling; 735 + 736 + /* HDMI High TMDS char rate ratio */ 737 + bool hdmi_high_tmds_clock_ratio; 735 738 }; 736 739 737 740 struct intel_crtc { ··· 1229 1220 void intel_crt_reset(struct drm_encoder *encoder); 1230 1221 1231 1222 /* intel_ddi.c */ 1232 - void intel_ddi_clk_select(struct intel_encoder *encoder, 1233 - struct intel_shared_dpll *pll); 1234 1223 void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, 1235 1224 struct intel_crtc_state *old_crtc_state, 1236 1225 struct drm_connector_state *old_conn_state); 1237 - void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder); 1238 1226 void hsw_fdi_link_train(struct intel_crtc *crtc, 1239 1227 const struct intel_crtc_state *crtc_state); 1240 1228 void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); ··· 1242 1236 enum transcoder cpu_transcoder); 1243 1237 void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state); 1244 1238 void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state); 1245 - bool intel_ddi_pll_select(struct intel_crtc *crtc, 1246 - struct intel_crtc_state *crtc_state); 1239 + struct intel_encoder * 1240 + intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state); 1247 1241 void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state); 1248 1242 void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp); 1249 1243 bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); ··· 1252 1246 void intel_ddi_get_config(struct intel_encoder *encoder, 1253 1247 struct intel_crtc_state *pipe_config); 1254 1248 1255 - void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder); 1256 1249 void intel_ddi_clock_get(struct intel_encoder *encoder, 1257 1250 struct intel_crtc_state *pipe_config); 1258 1251 void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, ··· 1450 1445 return i915_ggtt_offset(state->vma); 1451 1446 } 1452 1447 1453 - u32 skl_plane_ctl_format(uint32_t pixel_format); 1454 - u32 skl_plane_ctl_tiling(uint64_t fb_modifier); 1455 - u32 skl_plane_ctl_rotation(unsigned int rotation); 1448 + u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, 1449 + const struct intel_plane_state *plane_state); 1456 1450 u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, 1457 1451 unsigned int rotation); 1458 1452 int skl_check_plane_surface(struct intel_plane_state *plane_state); 1453 + int i9xx_check_plane_surface(struct intel_plane_state *plane_state); 1459 1454 1460 1455 /* intel_csr.c */ 1461 1456 void intel_csr_ucode_init(struct drm_i915_private *); ··· 1625 1620 bool intel_hdmi_compute_config(struct intel_encoder *encoder, 1626 1621 struct intel_crtc_state *pipe_config, 1627 1622 struct drm_connector_state *conn_state); 1623 + void intel_hdmi_handle_sink_scrambling(struct intel_encoder *intel_encoder, 1624 + struct drm_connector *connector, 1625 + bool high_tmds_clock_ratio, 1626 + bool scrambling); 1628 1627 void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable); 1629 1628 1630 1629
+21 -12
drivers/gpu/drm/i915/intel_engine_cs.c
··· 36 36 int (*init_execlists)(struct intel_engine_cs *engine); 37 37 } intel_engines[] = { 38 38 [RCS] = { 39 - .name = "render ring", 40 - .exec_id = I915_EXEC_RENDER, 39 + .name = "rcs", 41 40 .hw_id = RCS_HW, 41 + .exec_id = I915_EXEC_RENDER, 42 42 .mmio_base = RENDER_RING_BASE, 43 43 .irq_shift = GEN8_RCS_IRQ_SHIFT, 44 44 .init_execlists = logical_render_ring_init, 45 45 .init_legacy = intel_init_render_ring_buffer, 46 46 }, 47 47 [BCS] = { 48 - .name = "blitter ring", 49 - .exec_id = I915_EXEC_BLT, 48 + .name = "bcs", 50 49 .hw_id = BCS_HW, 50 + .exec_id = I915_EXEC_BLT, 51 51 .mmio_base = BLT_RING_BASE, 52 52 .irq_shift = GEN8_BCS_IRQ_SHIFT, 53 53 .init_execlists = logical_xcs_ring_init, 54 54 .init_legacy = intel_init_blt_ring_buffer, 55 55 }, 56 56 [VCS] = { 57 - .name = "bsd ring", 58 - .exec_id = I915_EXEC_BSD, 57 + .name = "vcs", 59 58 .hw_id = VCS_HW, 59 + .exec_id = I915_EXEC_BSD, 60 60 .mmio_base = GEN6_BSD_RING_BASE, 61 61 .irq_shift = GEN8_VCS1_IRQ_SHIFT, 62 62 .init_execlists = logical_xcs_ring_init, 63 63 .init_legacy = intel_init_bsd_ring_buffer, 64 64 }, 65 65 [VCS2] = { 66 - .name = "bsd2 ring", 67 - .exec_id = I915_EXEC_BSD, 66 + .name = "vcs2", 68 67 .hw_id = VCS2_HW, 68 + .exec_id = I915_EXEC_BSD, 69 69 .mmio_base = GEN8_BSD2_RING_BASE, 70 70 .irq_shift = GEN8_VCS2_IRQ_SHIFT, 71 71 .init_execlists = logical_xcs_ring_init, 72 72 .init_legacy = intel_init_bsd2_ring_buffer, 73 73 }, 74 74 [VECS] = { 75 - .name = "video enhancement ring", 76 - .exec_id = I915_EXEC_VEBOX, 75 + .name = "vecs", 77 76 .hw_id = VECS_HW, 77 + .exec_id = I915_EXEC_VEBOX, 78 78 .mmio_base = VEBOX_RING_BASE, 79 79 .irq_shift = GEN8_VECS_IRQ_SHIFT, 80 80 .init_execlists = logical_xcs_ring_init, ··· 242 242 void *semaphores; 243 243 244 244 /* Semaphores are in noncoherent memory, flush to be safe */ 245 - semaphores = kmap(page); 245 + semaphores = kmap_atomic(page); 246 246 memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), 247 247 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); 248 248 drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), 249 249 I915_NUM_ENGINES * gen8_semaphore_seqno_size); 250 - kunmap(page); 250 + kunmap_atomic(semaphores); 251 251 } 252 252 253 253 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); ··· 1110 1110 { 1111 1111 struct intel_engine_cs *engine; 1112 1112 enum intel_engine_id id; 1113 + 1114 + if (READ_ONCE(dev_priv->gt.active_requests)) 1115 + return false; 1116 + 1117 + /* If the driver is wedged, HW state may be very inconsistent and 1118 + * report that it is still busy, even though we have stopped using it. 1119 + */ 1120 + if (i915_terminally_wedged(&dev_priv->gpu_error)) 1121 + return true; 1113 1122 1114 1123 for_each_engine(engine, dev_priv, id) { 1115 1124 if (!intel_engine_is_idle(engine))
+40 -31
drivers/gpu/drm/i915/intel_guc_fwif.h
··· 26 26 #define GFXCORE_FAMILY_GEN9 12 27 27 #define GFXCORE_FAMILY_UNKNOWN 0x7fffffff 28 28 29 - #define GUC_CTX_PRIORITY_KMD_HIGH 0 30 - #define GUC_CTX_PRIORITY_HIGH 1 31 - #define GUC_CTX_PRIORITY_KMD_NORMAL 2 32 - #define GUC_CTX_PRIORITY_NORMAL 3 33 - #define GUC_CTX_PRIORITY_NUM 4 29 + #define GUC_CLIENT_PRIORITY_KMD_HIGH 0 30 + #define GUC_CLIENT_PRIORITY_HIGH 1 31 + #define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 32 + #define GUC_CLIENT_PRIORITY_NORMAL 3 33 + #define GUC_CLIENT_PRIORITY_NUM 4 34 34 35 - #define GUC_MAX_GPU_CONTEXTS 1024 36 - #define GUC_INVALID_CTX_ID GUC_MAX_GPU_CONTEXTS 35 + #define GUC_MAX_STAGE_DESCRIPTORS 1024 36 + #define GUC_INVALID_STAGE_ID GUC_MAX_STAGE_DESCRIPTORS 37 37 38 38 #define GUC_RENDER_ENGINE 0 39 39 #define GUC_VIDEO_ENGINE 1 ··· 68 68 #define GUC_DOORBELL_ENABLED 1 69 69 #define GUC_DOORBELL_DISABLED 0 70 70 71 - #define GUC_CTX_DESC_ATTR_ACTIVE (1 << 0) 72 - #define GUC_CTX_DESC_ATTR_PENDING_DB (1 << 1) 73 - #define GUC_CTX_DESC_ATTR_KERNEL (1 << 2) 74 - #define GUC_CTX_DESC_ATTR_PREEMPT (1 << 3) 75 - #define GUC_CTX_DESC_ATTR_RESET (1 << 4) 76 - #define GUC_CTX_DESC_ATTR_WQLOCKED (1 << 5) 77 - #define GUC_CTX_DESC_ATTR_PCH (1 << 6) 78 - #define GUC_CTX_DESC_ATTR_TERMINATED (1 << 7) 71 + #define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0) 72 + #define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1) 73 + #define GUC_STAGE_DESC_ATTR_KERNEL BIT(2) 74 + #define GUC_STAGE_DESC_ATTR_PREEMPT BIT(3) 75 + #define GUC_STAGE_DESC_ATTR_RESET BIT(4) 76 + #define GUC_STAGE_DESC_ATTR_WQLOCKED BIT(5) 77 + #define GUC_STAGE_DESC_ATTR_PCH BIT(6) 78 + #define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7) 79 79 80 80 /* The guc control data is 10 DWORDs */ 81 81 #define GUC_CTL_CTXINFO 0 ··· 241 241 u64 value_qw; 242 242 } __packed; 243 243 244 - #define GUC_MAX_DOORBELLS 256 245 - #define GUC_INVALID_DOORBELL_ID (GUC_MAX_DOORBELLS) 244 + #define GUC_NUM_DOORBELLS 256 245 + #define GUC_DOORBELL_INVALID (GUC_NUM_DOORBELLS) 246 246 247 247 #define GUC_DB_SIZE (PAGE_SIZE) 248 248 #define GUC_WQ_SIZE (PAGE_SIZE * 2) ··· 251 251 struct guc_wq_item { 252 252 u32 header; 253 253 u32 context_desc; 254 - u32 ring_tail; 254 + u32 submit_element_info; 255 255 u32 fence_id; 256 256 } __packed; 257 257 258 258 struct guc_process_desc { 259 - u32 context_id; 259 + u32 stage_id; 260 260 u64 db_base_addr; 261 261 u32 head; 262 262 u32 tail; ··· 278 278 u32 context_desc; 279 279 u32 context_id; 280 280 u32 ring_status; 281 - u32 ring_lcra; 281 + u32 ring_lrca; 282 282 u32 ring_begin; 283 283 u32 ring_end; 284 284 u32 ring_next_free_location; ··· 289 289 u16 engine_submit_queue_count; 290 290 } __packed; 291 291 292 - /*Context descriptor for communicating between uKernel and Driver*/ 293 - struct guc_context_desc { 292 + /* 293 + * This structure describes a stage set arranged for a particular communication 294 + * between uKernel (GuC) and Driver (KMD). Technically, this is known as a 295 + * "GuC Context descriptor" in the specs, but we use the term "stage descriptor" 296 + * to avoid confusion with all the other things already named "context" in the 297 + * driver. A static pool of these descriptors are stored inside a GEM object 298 + * (stage_desc_pool) which is held for the entire lifetime of our interaction 299 + * with the GuC, being allocated before the GuC is loaded with its firmware. 300 + */ 301 + struct guc_stage_desc { 294 302 u32 sched_common_area; 295 - u32 context_id; 303 + u32 stage_id; 296 304 u32 pas_id; 297 305 u8 engines_used; 298 306 u64 db_trigger_cpu; ··· 367 359 } __packed; 368 360 369 361 struct guc_policies { 370 - struct guc_policy policy[GUC_CTX_PRIORITY_NUM][GUC_MAX_ENGINES_NUM]; 362 + struct guc_policy policy[GUC_CLIENT_PRIORITY_NUM][GUC_MAX_ENGINES_NUM]; 371 363 372 364 /* In micro seconds. How much time to allow before DPC processing is 373 365 * called back via interrupt (to prevent DPC queue drain starving). ··· 409 401 u32 number_of_registers; 410 402 } __packed; 411 403 404 + /* MMIO registers that are set as non privileged */ 405 + struct mmio_white_list { 406 + u32 mmio_start; 407 + u32 offsets[GUC_MMIO_WHITE_LIST_MAX]; 408 + u32 count; 409 + } __packed; 410 + 412 411 struct guc_mmio_reg_state { 413 412 struct guc_mmio_regset global_reg; 414 413 struct guc_mmio_regset engine_reg[GUC_MAX_ENGINES_NUM]; 415 - 416 - /* MMIO registers that are set as non privileged */ 417 - struct __packed { 418 - u32 mmio_start; 419 - u32 offsets[GUC_MMIO_WHITE_LIST_MAX]; 420 - u32 count; 421 - } mmio_white_list[GUC_MAX_ENGINES_NUM]; 414 + struct mmio_white_list white_list[GUC_MAX_ENGINES_NUM]; 422 415 } __packed; 423 416 424 417 /* GuC Additional Data Struct */
+6 -45
drivers/gpu/drm/i915/intel_guc_loader.c
··· 73 73 #define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR) 74 74 MODULE_FIRMWARE(I915_KBL_GUC_UCODE); 75 75 76 - /* User-friendly representation of an enum */ 77 - const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) 78 - { 79 - switch (status) { 80 - case INTEL_UC_FIRMWARE_FAIL: 81 - return "FAIL"; 82 - case INTEL_UC_FIRMWARE_NONE: 83 - return "NONE"; 84 - case INTEL_UC_FIRMWARE_PENDING: 85 - return "PENDING"; 86 - case INTEL_UC_FIRMWARE_SUCCESS: 87 - return "SUCCESS"; 88 - default: 89 - return "UNKNOWN!"; 90 - } 91 - }; 92 76 93 77 static u32 get_gttype(struct drm_i915_private *dev_priv) 94 78 { ··· 132 148 } else 133 149 params[GUC_CTL_DEBUG] = GUC_LOG_DISABLED; 134 150 135 - if (guc->ads_vma) { 136 - u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; 137 - params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; 138 - params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; 139 - } 140 - 141 151 /* If GuC submission is enabled, set up additional parameters here */ 142 152 if (i915.enable_guc_submission) { 143 - u32 pgs = guc_ggtt_offset(dev_priv->guc.ctx_pool_vma); 144 - u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16; 153 + u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; 154 + u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool); 155 + u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16; 156 + 157 + params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; 158 + params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; 145 159 146 160 pgs >>= PAGE_SHIFT; 147 161 params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) | ··· 411 429 } 412 430 413 431 return 0; 414 - } 415 - 416 - /** 417 - * intel_guc_fini() - clean up all allocated resources 418 - * @dev_priv: i915 device private 419 - */ 420 - void intel_guc_fini(struct drm_i915_private *dev_priv) 421 - { 422 - struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; 423 - struct drm_i915_gem_object *obj; 424 - 425 - mutex_lock(&dev_priv->drm.struct_mutex); 426 - i915_guc_submission_disable(dev_priv); 427 - i915_guc_submission_fini(dev_priv); 428 - mutex_unlock(&dev_priv->drm.struct_mutex); 429 - 430 - obj = fetch_and_zero(&guc_fw->obj); 431 - if (obj) 432 - i915_gem_object_put(obj); 433 - 434 - guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; 435 432 }
+192 -192
drivers/gpu/drm/i915/intel_guc_log.c
··· 66 66 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 67 67 } 68 68 69 - 70 69 /* 71 70 * Sub buffer switch callback. Called whenever relay has to switch to a new 72 71 * sub buffer, relay stays on the same sub buffer if 0 is returned. ··· 138 139 .remove_buf_file = remove_buf_file_callback, 139 140 }; 140 141 141 - static void guc_log_remove_relay_file(struct intel_guc *guc) 142 - { 143 - relay_close(guc->log.relay_chan); 144 - } 145 - 146 - static int guc_log_create_relay_channel(struct intel_guc *guc) 147 - { 148 - struct drm_i915_private *dev_priv = guc_to_i915(guc); 149 - struct rchan *guc_log_relay_chan; 150 - size_t n_subbufs, subbuf_size; 151 - 152 - /* Keep the size of sub buffers same as shared log buffer */ 153 - subbuf_size = guc->log.vma->obj->base.size; 154 - 155 - /* Store up to 8 snapshots, which is large enough to buffer sufficient 156 - * boot time logs and provides enough leeway to User, in terms of 157 - * latency, for consuming the logs from relay. Also doesn't take 158 - * up too much memory. 159 - */ 160 - n_subbufs = 8; 161 - 162 - guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size, 163 - n_subbufs, &relay_callbacks, dev_priv); 164 - if (!guc_log_relay_chan) { 165 - DRM_ERROR("Couldn't create relay chan for GuC logging\n"); 166 - return -ENOMEM; 167 - } 168 - 169 - GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); 170 - guc->log.relay_chan = guc_log_relay_chan; 171 - return 0; 172 - } 173 - 174 - static int guc_log_create_relay_file(struct intel_guc *guc) 142 + static int guc_log_relay_file_create(struct intel_guc *guc) 175 143 { 176 144 struct drm_i915_private *dev_priv = guc_to_i915(guc); 177 145 struct dentry *log_dir; 178 146 int ret; 147 + 148 + if (i915.guc_log_level < 0) 149 + return 0; 179 150 180 151 /* For now create the log file in /sys/kernel/debug/dri/0 dir */ 181 152 log_dir = dev_priv->drm.primary->debugfs_root; ··· 166 197 return -ENODEV; 167 198 } 168 199 169 - ret = relay_late_setup_files(guc->log.relay_chan, "guc_log", log_dir); 170 - if (ret) { 200 + ret = relay_late_setup_files(guc->log.runtime.relay_chan, "guc_log", log_dir); 201 + if (ret < 0 && ret != -EEXIST) { 171 202 DRM_ERROR("Couldn't associate relay chan with file %d\n", ret); 172 203 return ret; 173 204 } ··· 183 214 smp_wmb(); 184 215 185 216 /* All data has been written, so now move the offset of sub buffer. */ 186 - relay_reserve(guc->log.relay_chan, guc->log.vma->obj->base.size); 217 + relay_reserve(guc->log.runtime.relay_chan, guc->log.vma->obj->base.size); 187 218 188 219 /* Switch to the next sub buffer */ 189 - relay_flush(guc->log.relay_chan); 220 + relay_flush(guc->log.runtime.relay_chan); 190 221 } 191 222 192 223 static void *guc_get_write_buffer(struct intel_guc *guc) 193 224 { 194 - if (!guc->log.relay_chan) 225 + if (!guc->log.runtime.relay_chan) 195 226 return NULL; 196 227 197 228 /* Just get the base address of a new sub buffer and copy data into it ··· 202 233 * done without using relay_reserve() along with relay_write(). So its 203 234 * better to use relay_reserve() alone. 204 235 */ 205 - return relay_reserve(guc->log.relay_chan, 0); 236 + return relay_reserve(guc->log.runtime.relay_chan, 0); 206 237 } 207 238 208 239 static bool guc_check_log_buf_overflow(struct intel_guc *guc, ··· 253 284 void *src_data, *dst_data; 254 285 bool new_overflow; 255 286 256 - if (WARN_ON(!guc->log.buf_addr)) 287 + if (WARN_ON(!guc->log.runtime.buf_addr)) 257 288 return; 258 289 259 290 /* Get the pointer to shared GuC log buffer */ 260 - log_buf_state = src_data = guc->log.buf_addr; 291 + log_buf_state = src_data = guc->log.runtime.buf_addr; 261 292 262 293 /* Get the pointer to local buffer to store the logs */ 263 294 log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc); ··· 340 371 } 341 372 } 342 373 343 - static void guc_log_cleanup(struct intel_guc *guc) 344 - { 345 - struct drm_i915_private *dev_priv = guc_to_i915(guc); 346 - 347 - lockdep_assert_held(&dev_priv->drm.struct_mutex); 348 - 349 - /* First disable the flush interrupt */ 350 - gen9_disable_guc_interrupts(dev_priv); 351 - 352 - if (guc->log.flush_wq) 353 - destroy_workqueue(guc->log.flush_wq); 354 - 355 - guc->log.flush_wq = NULL; 356 - 357 - if (guc->log.relay_chan) 358 - guc_log_remove_relay_file(guc); 359 - 360 - guc->log.relay_chan = NULL; 361 - 362 - if (guc->log.buf_addr) 363 - i915_gem_object_unpin_map(guc->log.vma->obj); 364 - 365 - guc->log.buf_addr = NULL; 366 - } 367 - 368 374 static void capture_logs_work(struct work_struct *work) 369 375 { 370 376 struct intel_guc *guc = 371 - container_of(work, struct intel_guc, log.flush_work); 377 + container_of(work, struct intel_guc, log.runtime.flush_work); 372 378 373 379 guc_log_capture_logs(guc); 374 380 } 375 381 376 - static int guc_log_create_extras(struct intel_guc *guc) 382 + static bool guc_log_has_runtime(struct intel_guc *guc) 383 + { 384 + return guc->log.runtime.buf_addr != NULL; 385 + } 386 + 387 + static int guc_log_runtime_create(struct intel_guc *guc) 377 388 { 378 389 struct drm_i915_private *dev_priv = guc_to_i915(guc); 379 390 void *vaddr; 380 - int ret; 391 + struct rchan *guc_log_relay_chan; 392 + size_t n_subbufs, subbuf_size; 393 + int ret = 0; 381 394 382 395 lockdep_assert_held(&dev_priv->drm.struct_mutex); 383 396 384 - /* Nothing to do */ 385 - if (i915.guc_log_level < 0) 386 - return 0; 397 + GEM_BUG_ON(guc_log_has_runtime(guc)); 387 398 388 - if (!guc->log.buf_addr) { 389 - /* Create a WC (Uncached for read) vmalloc mapping of log 390 - * buffer pages, so that we can directly get the data 391 - * (up-to-date) from memory. 392 - */ 393 - vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC); 394 - if (IS_ERR(vaddr)) { 395 - ret = PTR_ERR(vaddr); 396 - DRM_ERROR("Couldn't map log buffer pages %d\n", ret); 397 - return ret; 398 - } 399 - 400 - guc->log.buf_addr = vaddr; 399 + /* Create a WC (Uncached for read) vmalloc mapping of log 400 + * buffer pages, so that we can directly get the data 401 + * (up-to-date) from memory. 402 + */ 403 + vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC); 404 + if (IS_ERR(vaddr)) { 405 + DRM_ERROR("Couldn't map log buffer pages %d\n", ret); 406 + return PTR_ERR(vaddr); 401 407 } 402 408 403 - if (!guc->log.relay_chan) { 404 - /* Create a relay channel, so that we have buffers for storing 405 - * the GuC firmware logs, the channel will be linked with a file 406 - * later on when debugfs is registered. 407 - */ 408 - ret = guc_log_create_relay_channel(guc); 409 - if (ret) 410 - return ret; 409 + guc->log.runtime.buf_addr = vaddr; 410 + 411 + /* Keep the size of sub buffers same as shared log buffer */ 412 + subbuf_size = guc->log.vma->obj->base.size; 413 + 414 + /* Store up to 8 snapshots, which is large enough to buffer sufficient 415 + * boot time logs and provides enough leeway to User, in terms of 416 + * latency, for consuming the logs from relay. Also doesn't take 417 + * up too much memory. 418 + */ 419 + n_subbufs = 8; 420 + 421 + /* Create a relay channel, so that we have buffers for storing 422 + * the GuC firmware logs, the channel will be linked with a file 423 + * later on when debugfs is registered. 424 + */ 425 + guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size, 426 + n_subbufs, &relay_callbacks, dev_priv); 427 + if (!guc_log_relay_chan) { 428 + DRM_ERROR("Couldn't create relay chan for GuC logging\n"); 429 + 430 + ret = -ENOMEM; 431 + goto err_vaddr; 411 432 } 412 433 413 - if (!guc->log.flush_wq) { 414 - INIT_WORK(&guc->log.flush_work, capture_logs_work); 434 + GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); 435 + guc->log.runtime.relay_chan = guc_log_relay_chan; 415 436 416 - /* 417 - * GuC log buffer flush work item has to do register access to 418 - * send the ack to GuC and this work item, if not synced before 419 - * suspend, can potentially get executed after the GFX device is 420 - * suspended. 421 - * By marking the WQ as freezable, we don't have to bother about 422 - * flushing of this work item from the suspend hooks, the pending 423 - * work item if any will be either executed before the suspend 424 - * or scheduled later on resume. This way the handling of work 425 - * item can be kept same between system suspend & rpm suspend. 426 - */ 427 - guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log", 428 - WQ_HIGHPRI | WQ_FREEZABLE); 429 - if (guc->log.flush_wq == NULL) { 430 - DRM_ERROR("Couldn't allocate the wq for GuC logging\n"); 431 - return -ENOMEM; 432 - } 437 + INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work); 438 + 439 + /* 440 + * GuC log buffer flush work item has to do register access to 441 + * send the ack to GuC and this work item, if not synced before 442 + * suspend, can potentially get executed after the GFX device is 443 + * suspended. 444 + * By marking the WQ as freezable, we don't have to bother about 445 + * flushing of this work item from the suspend hooks, the pending 446 + * work item if any will be either executed before the suspend 447 + * or scheduled later on resume. This way the handling of work 448 + * item can be kept same between system suspend & rpm suspend. 449 + */ 450 + guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log", 451 + WQ_HIGHPRI | WQ_FREEZABLE); 452 + if (!guc->log.runtime.flush_wq) { 453 + DRM_ERROR("Couldn't allocate the wq for GuC logging\n"); 454 + ret = -ENOMEM; 455 + goto err_relaychan; 433 456 } 434 457 435 458 return 0; 459 + 460 + err_relaychan: 461 + relay_close(guc->log.runtime.relay_chan); 462 + err_vaddr: 463 + i915_gem_object_unpin_map(guc->log.vma->obj); 464 + guc->log.runtime.buf_addr = NULL; 465 + return ret; 436 466 } 437 467 438 - void intel_guc_log_create(struct intel_guc *guc) 468 + static void guc_log_runtime_destroy(struct intel_guc *guc) 439 469 { 440 - struct i915_vma *vma; 441 - unsigned long offset; 442 - uint32_t size, flags; 470 + /* 471 + * It's possible that the runtime stuff was never allocated because 472 + * guc_log_level was < 0 at the time 473 + **/ 474 + if (!guc_log_has_runtime(guc)) 475 + return; 443 476 444 - if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX) 445 - i915.guc_log_level = GUC_LOG_VERBOSITY_MAX; 446 - 447 - /* The first page is to save log buffer state. Allocate one 448 - * extra page for others in case for overlap */ 449 - size = (1 + GUC_LOG_DPC_PAGES + 1 + 450 - GUC_LOG_ISR_PAGES + 1 + 451 - GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; 452 - 453 - vma = guc->log.vma; 454 - if (!vma) { 455 - /* We require SSE 4.1 for fast reads from the GuC log buffer and 456 - * it should be present on the chipsets supporting GuC based 457 - * submisssions. 458 - */ 459 - if (WARN_ON(!i915_has_memcpy_from_wc())) { 460 - /* logging will not be enabled */ 461 - i915.guc_log_level = -1; 462 - return; 463 - } 464 - 465 - vma = intel_guc_allocate_vma(guc, size); 466 - if (IS_ERR(vma)) { 467 - /* logging will be off */ 468 - i915.guc_log_level = -1; 469 - return; 470 - } 471 - 472 - guc->log.vma = vma; 473 - 474 - if (guc_log_create_extras(guc)) { 475 - guc_log_cleanup(guc); 476 - i915_vma_unpin_and_release(&guc->log.vma); 477 - i915.guc_log_level = -1; 478 - return; 479 - } 480 - } 481 - 482 - /* each allocated unit is a page */ 483 - flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | 484 - (GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) | 485 - (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | 486 - (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); 487 - 488 - offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */ 489 - guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; 477 + destroy_workqueue(guc->log.runtime.flush_wq); 478 + relay_close(guc->log.runtime.relay_chan); 479 + i915_gem_object_unpin_map(guc->log.vma->obj); 480 + guc->log.runtime.buf_addr = NULL; 490 481 } 491 482 492 483 static int guc_log_late_setup(struct intel_guc *guc) ··· 456 527 457 528 lockdep_assert_held(&dev_priv->drm.struct_mutex); 458 529 459 - if (i915.guc_log_level < 0) 460 - return -EINVAL; 530 + if (!guc_log_has_runtime(guc)) { 531 + /* If log_level was set as -1 at boot time, then setup needed to 532 + * handle log buffer flush interrupts would not have been done yet, 533 + * so do that now. 534 + */ 535 + ret = guc_log_runtime_create(guc); 536 + if (ret) 537 + goto err; 538 + } 461 539 462 - /* If log_level was set as -1 at boot time, then setup needed to 463 - * handle log buffer flush interrupts would not have been done yet, 464 - * so do that now. 465 - */ 466 - ret = guc_log_create_extras(guc); 540 + ret = guc_log_relay_file_create(guc); 467 541 if (ret) 468 - goto err; 469 - 470 - ret = guc_log_create_relay_file(guc); 471 - if (ret) 472 - goto err; 542 + goto err_runtime; 473 543 474 544 return 0; 545 + 546 + err_runtime: 547 + guc_log_runtime_destroy(guc); 475 548 err: 476 - guc_log_cleanup(guc); 477 549 /* logging will remain off */ 478 550 i915.guc_log_level = -1; 479 551 return ret; ··· 507 577 /* Before initiating the forceful flush, wait for any pending/ongoing 508 578 * flush to complete otherwise forceful flush may not actually happen. 509 579 */ 510 - flush_work(&guc->log.flush_work); 580 + flush_work(&guc->log.runtime.flush_work); 511 581 512 582 /* Ask GuC to update the log buffer state */ 513 583 guc_log_flush(guc); 514 584 515 585 /* GuC would have updated log buffer by now, so capture it */ 516 586 guc_log_capture_logs(guc); 587 + } 588 + 589 + int intel_guc_log_create(struct intel_guc *guc) 590 + { 591 + struct i915_vma *vma; 592 + unsigned long offset; 593 + uint32_t size, flags; 594 + int ret; 595 + 596 + GEM_BUG_ON(guc->log.vma); 597 + 598 + if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX) 599 + i915.guc_log_level = GUC_LOG_VERBOSITY_MAX; 600 + 601 + /* The first page is to save log buffer state. Allocate one 602 + * extra page for others in case for overlap */ 603 + size = (1 + GUC_LOG_DPC_PAGES + 1 + 604 + GUC_LOG_ISR_PAGES + 1 + 605 + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; 606 + 607 + /* We require SSE 4.1 for fast reads from the GuC log buffer and 608 + * it should be present on the chipsets supporting GuC based 609 + * submisssions. 610 + */ 611 + if (WARN_ON(!i915_has_memcpy_from_wc())) { 612 + ret = -EINVAL; 613 + goto err; 614 + } 615 + 616 + vma = intel_guc_allocate_vma(guc, size); 617 + if (IS_ERR(vma)) { 618 + ret = PTR_ERR(vma); 619 + goto err; 620 + } 621 + 622 + guc->log.vma = vma; 623 + 624 + if (i915.guc_log_level >= 0) { 625 + ret = guc_log_runtime_create(guc); 626 + if (ret < 0) 627 + goto err_vma; 628 + } 629 + 630 + /* each allocated unit is a page */ 631 + flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | 632 + (GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) | 633 + (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | 634 + (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); 635 + 636 + offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */ 637 + guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; 638 + 639 + return 0; 640 + 641 + err_vma: 642 + i915_vma_unpin_and_release(&guc->log.vma); 643 + err: 644 + /* logging will be off */ 645 + i915.guc_log_level = -1; 646 + return ret; 647 + } 648 + 649 + void intel_guc_log_destroy(struct intel_guc *guc) 650 + { 651 + guc_log_runtime_destroy(guc); 652 + i915_vma_unpin_and_release(&guc->log.vma); 517 653 } 518 654 519 655 int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) ··· 605 609 return ret; 606 610 } 607 611 608 - i915.guc_log_level = log_param.verbosity; 612 + if (log_param.logging_enabled) { 613 + i915.guc_log_level = log_param.verbosity; 609 614 610 - /* If log_level was set as -1 at boot time, then the relay channel file 611 - * wouldn't have been created by now and interrupts also would not have 612 - * been enabled. 613 - */ 614 - if (!dev_priv->guc.log.relay_chan) { 615 + /* If log_level was set as -1 at boot time, then the relay channel file 616 + * wouldn't have been created by now and interrupts also would not have 617 + * been enabled. Try again now, just in case. 618 + */ 615 619 ret = guc_log_late_setup(guc); 616 - if (!ret) 617 - gen9_enable_guc_interrupts(dev_priv); 618 - } else if (!log_param.logging_enabled) { 620 + if (ret < 0) { 621 + DRM_DEBUG_DRIVER("GuC log late setup failed %d\n", ret); 622 + return ret; 623 + } 624 + 625 + /* GuC logging is currently the only user of Guc2Host interrupts */ 626 + gen9_enable_guc_interrupts(dev_priv); 627 + } else { 619 628 /* Once logging is disabled, GuC won't generate logs & send an 620 629 * interrupt. But there could be some data in the log buffer 621 630 * which is yet to be captured. So request GuC to update the log ··· 630 629 631 630 /* As logging is disabled, update log level to reflect that */ 632 631 i915.guc_log_level = -1; 633 - } else { 634 - /* In case interrupts were disabled, enable them now */ 635 - gen9_enable_guc_interrupts(dev_priv); 636 632 } 637 633 638 634 return ret; ··· 637 639 638 640 void i915_guc_log_register(struct drm_i915_private *dev_priv) 639 641 { 640 - if (!i915.enable_guc_submission) 642 + if (!i915.enable_guc_submission || i915.guc_log_level < 0) 641 643 return; 642 644 643 645 mutex_lock(&dev_priv->drm.struct_mutex); ··· 651 653 return; 652 654 653 655 mutex_lock(&dev_priv->drm.struct_mutex); 654 - guc_log_cleanup(&dev_priv->guc); 656 + /* GuC logging is currently the only user of Guc2Host interrupts */ 657 + gen9_disable_guc_interrupts(dev_priv); 658 + guc_log_runtime_destroy(&dev_priv->guc); 655 659 mutex_unlock(&dev_priv->drm.struct_mutex); 656 660 }
+2
drivers/gpu/drm/i915/intel_gvt.c
··· 45 45 return true; 46 46 if (IS_SKYLAKE(dev_priv)) 47 47 return true; 48 + if (IS_KABYLAKE(dev_priv) && INTEL_DEVID(dev_priv) == 0x591D) 49 + return true; 48 50 return false; 49 51 } 50 52
+65
drivers/gpu/drm/i915/intel_hdmi.c
··· 34 34 #include <drm/drm_atomic_helper.h> 35 35 #include <drm/drm_crtc.h> 36 36 #include <drm/drm_edid.h> 37 + #include <drm/drm_scdc_helper.h> 37 38 #include "intel_drv.h" 38 39 #include <drm/i915_drm.h> 39 40 #include <drm/intel_lpe_audio.h> ··· 1209 1208 { 1210 1209 if (IS_G4X(dev_priv)) 1211 1210 return 165000; 1211 + else if (IS_GEMINILAKE(dev_priv)) 1212 + return 594000; 1212 1213 else if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8) 1213 1214 return 300000; 1214 1215 else ··· 1337 1334 struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); 1338 1335 struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); 1339 1336 struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; 1337 + struct drm_scdc *scdc = &conn_state->connector->display_info.hdmi.scdc; 1340 1338 int clock_8bpc = pipe_config->base.adjusted_mode.crtc_clock; 1341 1339 int clock_12bpc = clock_8bpc * 3 / 2; 1342 1340 int desired_bpp; ··· 1406 1402 adjusted_mode->picture_aspect_ratio = intel_hdmi->aspect_ratio; 1407 1403 1408 1404 pipe_config->lane_count = 4; 1405 + 1406 + if (scdc->scrambling.supported && IS_GEMINILAKE(dev_priv)) { 1407 + if (scdc->scrambling.low_rates) 1408 + pipe_config->hdmi_scrambling = true; 1409 + 1410 + if (pipe_config->port_clock > 340000) { 1411 + pipe_config->hdmi_scrambling = true; 1412 + pipe_config->hdmi_high_tmds_clock_ratio = true; 1413 + } 1414 + } 1409 1415 1410 1416 return true; 1411 1417 } ··· 1824 1810 intel_hdmi->color_range_auto = true; 1825 1811 intel_attach_aspect_ratio_property(connector); 1826 1812 intel_hdmi->aspect_ratio = HDMI_PICTURE_ASPECT_NONE; 1813 + } 1814 + 1815 + /* 1816 + * intel_hdmi_handle_sink_scrambling: handle sink scrambling/clock ratio setup 1817 + * @encoder: intel_encoder 1818 + * @connector: drm_connector 1819 + * @high_tmds_clock_ratio = bool to indicate if the function needs to set 1820 + * or reset the high tmds clock ratio for scrambling 1821 + * @scrambling: bool to Indicate if the function needs to set or reset 1822 + * sink scrambling 1823 + * 1824 + * This function handles scrambling on HDMI 2.0 capable sinks. 1825 + * If required clock rate is > 340 Mhz && scrambling is supported by sink 1826 + * it enables scrambling. This should be called before enabling the HDMI 1827 + * 2.0 port, as the sink can choose to disable the scrambling if it doesn't 1828 + * detect a scrambled clock within 100 ms. 1829 + */ 1830 + void intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder, 1831 + struct drm_connector *connector, 1832 + bool high_tmds_clock_ratio, 1833 + bool scrambling) 1834 + { 1835 + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); 1836 + struct drm_i915_private *dev_priv = connector->dev->dev_private; 1837 + struct drm_scrambling *sink_scrambling = 1838 + &connector->display_info.hdmi.scdc.scrambling; 1839 + struct i2c_adapter *adptr = intel_gmbus_get_adapter(dev_priv, 1840 + intel_hdmi->ddc_bus); 1841 + bool ret; 1842 + 1843 + if (!sink_scrambling->supported) 1844 + return; 1845 + 1846 + DRM_DEBUG_KMS("Setting sink scrambling for enc:%s connector:%s\n", 1847 + encoder->base.name, connector->name); 1848 + 1849 + /* Set TMDS bit clock ratio to 1/40 or 1/10 */ 1850 + ret = drm_scdc_set_high_tmds_clock_ratio(adptr, high_tmds_clock_ratio); 1851 + if (!ret) { 1852 + DRM_ERROR("Set TMDS ratio failed\n"); 1853 + return; 1854 + } 1855 + 1856 + /* Enable/disable sink scrambling */ 1857 + ret = drm_scdc_set_scrambling(adptr, scrambling); 1858 + if (!ret) { 1859 + DRM_ERROR("Set sink scrambling failed\n"); 1860 + return; 1861 + } 1862 + 1863 + DRM_DEBUG_KMS("sink scrambling handled\n"); 1827 1864 } 1828 1865 1829 1866 static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv,
-18
drivers/gpu/drm/i915/intel_huc.c
··· 251 251 } 252 252 253 253 /** 254 - * intel_huc_fini() - clean up resources allocated for HuC 255 - * @dev_priv: the drm_i915_private device 256 - * 257 - * Cleans up by releasing the huc firmware GEM obj. 258 - */ 259 - void intel_huc_fini(struct drm_i915_private *dev_priv) 260 - { 261 - struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; 262 - struct drm_i915_gem_object *obj; 263 - 264 - obj = fetch_and_zero(&huc_fw->obj); 265 - if (obj) 266 - i915_gem_object_put(obj); 267 - 268 - huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; 269 - } 270 - 271 - /** 272 254 * intel_guc_auth_huc() - authenticate ucode 273 255 * @dev_priv: the drm_i915_device 274 256 *
+1
drivers/gpu/drm/i915/intel_lpe_audio.c
··· 331 331 * audio driver and i915 332 332 * @dev_priv: the i915 drm device private data 333 333 * @eld : ELD data 334 + * @pipe: pipe id 334 335 * @port: port id 335 336 * @tmds_clk_speed: tmds clock frequency in Hz 336 337 *
+62 -67
drivers/gpu/drm/i915/intel_lrc.c
··· 326 326 rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; 327 327 u32 *reg_state = ce->lrc_reg_state; 328 328 329 - GEM_BUG_ON(!IS_ALIGNED(rq->tail, 8)); 329 + assert_ring_tail_valid(rq->ring, rq->tail); 330 330 reg_state[CTX_RING_TAIL+1] = rq->tail; 331 331 332 332 /* True 32b PPGTT with dynamic page allocation: update PDP ··· 399 399 { 400 400 struct drm_i915_gem_request *last; 401 401 struct execlist_port *port = engine->execlist_port; 402 - unsigned long flags; 403 402 struct rb_node *rb; 404 403 bool submit = false; 405 - 406 - /* After execlist_first is updated, the tasklet will be rescheduled. 407 - * 408 - * If we are currently running (inside the tasklet) and a third 409 - * party queues a request and so updates engine->execlist_first under 410 - * the spinlock (which we have elided), it will atomically set the 411 - * TASKLET_SCHED flag causing the us to be re-executed and pick up 412 - * the change in state (the update to TASKLET_SCHED incurs a memory 413 - * barrier making this cross-cpu checking safe). 414 - */ 415 - if (!READ_ONCE(engine->execlist_first)) 416 - return; 417 404 418 405 last = port->request; 419 406 if (last) ··· 435 448 * and context switches) submission. 436 449 */ 437 450 438 - spin_lock_irqsave(&engine->timeline->lock, flags); 451 + spin_lock_irq(&engine->timeline->lock); 439 452 rb = engine->execlist_first; 440 453 while (rb) { 441 454 struct drm_i915_gem_request *cursor = ··· 487 500 i915_gem_request_assign(&port->request, last); 488 501 engine->execlist_first = rb; 489 502 } 490 - spin_unlock_irqrestore(&engine->timeline->lock, flags); 503 + spin_unlock_irq(&engine->timeline->lock); 491 504 492 505 if (submit) 493 506 execlists_submit_ports(engine); ··· 517 530 518 531 intel_uncore_forcewake_get(dev_priv, engine->fw_domains); 519 532 520 - while (test_and_clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { 533 + /* Prefer doing test_and_clear_bit() as a two stage operation to avoid 534 + * imposing the cost of a locked atomic transaction when submitting a 535 + * new request (outside of the context-switch interrupt). 536 + */ 537 + while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { 521 538 u32 __iomem *csb_mmio = 522 539 dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); 523 540 u32 __iomem *buf = 524 541 dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)); 525 - unsigned int csb, head, tail; 542 + unsigned int head, tail; 526 543 527 - csb = readl(csb_mmio); 528 - head = GEN8_CSB_READ_PTR(csb); 529 - tail = GEN8_CSB_WRITE_PTR(csb); 530 - if (head == tail) 531 - break; 544 + /* The write will be ordered by the uncached read (itself 545 + * a memory barrier), so we do not need another in the form 546 + * of a locked instruction. The race between the interrupt 547 + * handler and the split test/clear is harmless as we order 548 + * our clear before the CSB read. If the interrupt arrived 549 + * first between the test and the clear, we read the updated 550 + * CSB and clear the bit. If the interrupt arrives as we read 551 + * the CSB or later (i.e. after we had cleared the bit) the bit 552 + * is set and we do a new loop. 553 + */ 554 + __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); 555 + head = readl(csb_mmio); 556 + tail = GEN8_CSB_WRITE_PTR(head); 557 + head = GEN8_CSB_READ_PTR(head); 558 + while (head != tail) { 559 + unsigned int status; 532 560 533 - if (tail < head) 534 - tail += GEN8_CSB_ENTRIES; 535 - do { 536 - unsigned int idx = ++head % GEN8_CSB_ENTRIES; 537 - unsigned int status = readl(buf + 2 * idx); 561 + if (++head == GEN8_CSB_ENTRIES) 562 + head = 0; 538 563 539 564 /* We are flying near dragons again. 540 565 * ··· 565 566 * status notifier. 566 567 */ 567 568 569 + status = readl(buf + 2 * head); 568 570 if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) 569 571 continue; 570 572 571 573 /* Check the context/desc id for this event matches */ 572 - GEM_DEBUG_BUG_ON(readl(buf + 2 * idx + 1) != 574 + GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) != 573 575 port[0].context_id); 574 576 575 577 GEM_BUG_ON(port[0].count == 0); ··· 588 588 589 589 GEM_BUG_ON(port[0].count == 0 && 590 590 !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); 591 - } while (head < tail); 591 + } 592 592 593 - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, 594 - GEN8_CSB_WRITE_PTR(csb) << 8), 593 + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), 595 594 csb_mmio); 596 595 } 597 596 ··· 646 647 static struct intel_engine_cs * 647 648 pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) 648 649 { 649 - struct intel_engine_cs *engine; 650 + struct intel_engine_cs *engine = 651 + container_of(pt, struct drm_i915_gem_request, priotree)->engine; 650 652 651 - engine = container_of(pt, 652 - struct drm_i915_gem_request, 653 - priotree)->engine; 653 + GEM_BUG_ON(!locked); 654 + 654 655 if (engine != locked) { 655 - if (locked) 656 - spin_unlock_irq(&locked->timeline->lock); 657 - spin_lock_irq(&engine->timeline->lock); 656 + spin_unlock(&locked->timeline->lock); 657 + spin_lock(&engine->timeline->lock); 658 658 } 659 659 660 660 return engine; ··· 661 663 662 664 static void execlists_schedule(struct drm_i915_gem_request *request, int prio) 663 665 { 664 - struct intel_engine_cs *engine = NULL; 666 + struct intel_engine_cs *engine; 665 667 struct i915_dependency *dep, *p; 666 668 struct i915_dependency stack; 667 669 LIST_HEAD(dfs); ··· 695 697 list_for_each_entry_safe(dep, p, &dfs, dfs_link) { 696 698 struct i915_priotree *pt = dep->signaler; 697 699 698 - list_for_each_entry(p, &pt->signalers_list, signal_link) 700 + /* Within an engine, there can be no cycle, but we may 701 + * refer to the same dependency chain multiple times 702 + * (redundant dependencies are not eliminated) and across 703 + * engines. 704 + */ 705 + list_for_each_entry(p, &pt->signalers_list, signal_link) { 706 + GEM_BUG_ON(p->signaler->priority < pt->priority); 699 707 if (prio > READ_ONCE(p->signaler->priority)) 700 708 list_move_tail(&p->dfs_link, &dfs); 709 + } 701 710 702 711 list_safe_reset_next(dep, p, dfs_link); 703 - if (!RB_EMPTY_NODE(&pt->node)) 704 - continue; 705 - 706 - engine = pt_lock_engine(pt, engine); 707 - 708 - /* If it is not already in the rbtree, we can update the 709 - * priority inplace and skip over it (and its dependencies) 710 - * if it is referenced *again* as we descend the dfs. 711 - */ 712 - if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) { 713 - pt->priority = prio; 714 - list_del_init(&dep->dfs_link); 715 - } 716 712 } 713 + 714 + engine = request->engine; 715 + spin_lock_irq(&engine->timeline->lock); 717 716 718 717 /* Fifo and depth-first replacement ensure our deps execute before us */ 719 718 list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { ··· 723 728 if (prio <= pt->priority) 724 729 continue; 725 730 726 - GEM_BUG_ON(RB_EMPTY_NODE(&pt->node)); 727 - 728 731 pt->priority = prio; 729 - rb_erase(&pt->node, &engine->execlist_queue); 730 - if (insert_request(pt, &engine->execlist_queue)) 731 - engine->execlist_first = &pt->node; 732 + if (!RB_EMPTY_NODE(&pt->node)) { 733 + rb_erase(&pt->node, &engine->execlist_queue); 734 + if (insert_request(pt, &engine->execlist_queue)) 735 + engine->execlist_first = &pt->node; 736 + } 732 737 } 733 738 734 - if (engine) 735 - spin_unlock_irq(&engine->timeline->lock); 739 + spin_unlock_irq(&engine->timeline->lock); 736 740 737 741 /* XXX Do we need to preempt to make room for us and our deps? */ 738 742 } ··· 1249 1255 ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix; 1250 1256 1251 1257 request->ring->head = request->postfix; 1252 - request->ring->last_retired_head = -1; 1253 1258 intel_ring_update_space(request->ring); 1254 1259 1255 1260 /* Catch up with any missed context-switch interrupts */ ··· 1261 1268 GEM_BUG_ON(request->ctx != port[0].request->ctx); 1262 1269 1263 1270 /* Reset WaIdleLiteRestore:bdw,skl as well */ 1264 - request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32); 1265 - GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); 1271 + request->tail = 1272 + intel_ring_wrap(request->ring, 1273 + request->wa_tail - WA_TAIL_DWORDS*sizeof(u32)); 1274 + assert_ring_tail_valid(request->ring, request->tail); 1266 1275 } 1267 1276 1268 1277 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) ··· 1475 1480 *cs++ = MI_USER_INTERRUPT; 1476 1481 *cs++ = MI_NOOP; 1477 1482 request->tail = intel_ring_offset(request, cs); 1478 - GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); 1483 + assert_ring_tail_valid(request->ring, request->tail); 1479 1484 1480 1485 gen8_emit_wa_tail(request, cs); 1481 1486 } ··· 1503 1508 *cs++ = MI_USER_INTERRUPT; 1504 1509 *cs++ = MI_NOOP; 1505 1510 request->tail = intel_ring_offset(request, cs); 1506 - GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); 1511 + assert_ring_tail_valid(request->ring, request->tail); 1507 1512 1508 1513 gen8_emit_wa_tail(request, cs); 1509 1514 } ··· 1570 1575 { 1571 1576 engine->submit_request = execlists_submit_request; 1572 1577 engine->schedule = execlists_schedule; 1578 + engine->irq_tasklet.func = intel_lrc_irq_handler; 1573 1579 } 1574 1580 1575 1581 static void ··· 2037 2041 i915_gem_object_unpin_map(ce->state->obj); 2038 2042 2039 2043 ce->ring->head = ce->ring->tail = 0; 2040 - ce->ring->last_retired_head = -1; 2041 2044 intel_ring_update_space(ce->ring); 2042 2045 } 2043 2046 }
+33 -30
drivers/gpu/drm/i915/intel_opregion.c
··· 920 920 char buf[sizeof(OPREGION_SIGNATURE)]; 921 921 int err = 0; 922 922 void *base; 923 + const void *vbt; 924 + u32 vbt_size; 923 925 924 926 BUILD_BUG_ON(sizeof(struct opregion_header) != 0x100); 925 927 BUILD_BUG_ON(sizeof(struct opregion_acpi) != 0x100); ··· 974 972 if (mboxes & MBOX_ASLE_EXT) 975 973 DRM_DEBUG_DRIVER("ASLE extension supported\n"); 976 974 977 - if (!dmi_check_system(intel_no_opregion_vbt)) { 978 - const void *vbt = NULL; 979 - u32 vbt_size = 0; 975 + if (dmi_check_system(intel_no_opregion_vbt)) 976 + goto out; 980 977 981 - if (opregion->header->opregion_ver >= 2 && opregion->asle && 982 - opregion->asle->rvda && opregion->asle->rvds) { 983 - opregion->rvda = memremap(opregion->asle->rvda, 984 - opregion->asle->rvds, 985 - MEMREMAP_WB); 986 - vbt = opregion->rvda; 987 - vbt_size = opregion->asle->rvds; 988 - } 989 - 978 + if (opregion->header->opregion_ver >= 2 && opregion->asle && 979 + opregion->asle->rvda && opregion->asle->rvds) { 980 + opregion->rvda = memremap(opregion->asle->rvda, 981 + opregion->asle->rvds, 982 + MEMREMAP_WB); 983 + vbt = opregion->rvda; 984 + vbt_size = opregion->asle->rvds; 990 985 if (intel_bios_is_valid_vbt(vbt, vbt_size)) { 991 986 DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (RVDA)\n"); 992 987 opregion->vbt = vbt; 993 988 opregion->vbt_size = vbt_size; 989 + goto out; 994 990 } else { 995 - vbt = base + OPREGION_VBT_OFFSET; 996 - /* 997 - * The VBT specification says that if the ASLE ext 998 - * mailbox is not used its area is reserved, but 999 - * on some CHT boards the VBT extends into the 1000 - * ASLE ext area. Allow this even though it is 1001 - * against the spec, so we do not end up rejecting 1002 - * the VBT on those boards (and end up not finding the 1003 - * LCD panel because of this). 1004 - */ 1005 - vbt_size = (mboxes & MBOX_ASLE_EXT) ? 1006 - OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE; 1007 - vbt_size -= OPREGION_VBT_OFFSET; 1008 - if (intel_bios_is_valid_vbt(vbt, vbt_size)) { 1009 - DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); 1010 - opregion->vbt = vbt; 1011 - opregion->vbt_size = vbt_size; 1012 - } 991 + DRM_DEBUG_KMS("Invalid VBT in ACPI OpRegion (RVDA)\n"); 1013 992 } 1014 993 } 1015 994 995 + vbt = base + OPREGION_VBT_OFFSET; 996 + /* 997 + * The VBT specification says that if the ASLE ext mailbox is not used 998 + * its area is reserved, but on some CHT boards the VBT extends into the 999 + * ASLE ext area. Allow this even though it is against the spec, so we 1000 + * do not end up rejecting the VBT on those boards (and end up not 1001 + * finding the LCD panel because of this). 1002 + */ 1003 + vbt_size = (mboxes & MBOX_ASLE_EXT) ? 1004 + OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE; 1005 + vbt_size -= OPREGION_VBT_OFFSET; 1006 + if (intel_bios_is_valid_vbt(vbt, vbt_size)) { 1007 + DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); 1008 + opregion->vbt = vbt; 1009 + opregion->vbt_size = vbt_size; 1010 + } else { 1011 + DRM_DEBUG_KMS("Invalid VBT in ACPI OpRegion (Mailbox #4)\n"); 1012 + } 1013 + 1014 + out: 1016 1015 return 0; 1017 1016 1018 1017 err_out:
+61 -27
drivers/gpu/drm/i915/intel_pm.c
··· 655 655 return wm_size; 656 656 } 657 657 658 + static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state, 659 + const struct intel_plane_state *plane_state) 660 + { 661 + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); 662 + 663 + /* FIXME check the 'enable' instead */ 664 + if (!crtc_state->base.active) 665 + return false; 666 + 667 + /* 668 + * Treat cursor with fb as always visible since cursor updates 669 + * can happen faster than the vrefresh rate, and the current 670 + * watermark code doesn't handle that correctly. Cursor updates 671 + * which set/clear the fb or change the cursor size are going 672 + * to get throttled by intel_legacy_cursor_update() to work 673 + * around this problem with the watermark code. 674 + */ 675 + if (plane->id == PLANE_CURSOR) 676 + return plane_state->base.fb != NULL; 677 + else 678 + return plane_state->base.visible; 679 + } 680 + 658 681 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv) 659 682 { 660 683 struct intel_crtc *crtc, *enabled = NULL; ··· 1984 1961 uint32_t method1, method2; 1985 1962 int cpp; 1986 1963 1987 - if (!cstate->base.active || !pstate->base.visible) 1964 + if (!intel_wm_plane_visible(cstate, pstate)) 1988 1965 return 0; 1989 1966 1990 1967 cpp = pstate->base.fb->format->cpp[0]; ··· 2013 1990 uint32_t method1, method2; 2014 1991 int cpp; 2015 1992 2016 - if (!cstate->base.active || !pstate->base.visible) 1993 + if (!intel_wm_plane_visible(cstate, pstate)) 2017 1994 return 0; 2018 1995 2019 1996 cpp = pstate->base.fb->format->cpp[0]; ··· 2036 2013 { 2037 2014 int cpp; 2038 2015 2039 - /* 2040 - * Treat cursor with fb as always visible since cursor updates 2041 - * can happen faster than the vrefresh rate, and the current 2042 - * watermark code doesn't handle that correctly. Cursor updates 2043 - * which set/clear the fb or change the cursor size are going 2044 - * to get throttled by intel_legacy_cursor_update() to work 2045 - * around this problem with the watermark code. 2046 - */ 2047 - if (!cstate->base.active || !pstate->base.fb) 2016 + if (!intel_wm_plane_visible(cstate, pstate)) 2048 2017 return 0; 2049 2018 2050 2019 cpp = pstate->base.fb->format->cpp[0]; ··· 2053 2038 { 2054 2039 int cpp; 2055 2040 2056 - if (!cstate->base.active || !pstate->base.visible) 2041 + if (!intel_wm_plane_visible(cstate, pstate)) 2057 2042 return 0; 2058 2043 2059 2044 cpp = pstate->base.fb->format->cpp[0]; ··· 3361 3346 * Caller should take care of dividing & rounding off the value. 3362 3347 */ 3363 3348 static uint32_t 3364 - skl_plane_downscale_amount(const struct intel_plane_state *pstate) 3349 + skl_plane_downscale_amount(const struct intel_crtc_state *cstate, 3350 + const struct intel_plane_state *pstate) 3365 3351 { 3352 + struct intel_plane *plane = to_intel_plane(pstate->base.plane); 3366 3353 uint32_t downscale_h, downscale_w; 3367 3354 uint32_t src_w, src_h, dst_w, dst_h; 3368 3355 3369 - if (WARN_ON(!pstate->base.visible)) 3356 + if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) 3370 3357 return DRM_PLANE_HELPER_NO_SCALING; 3371 3358 3372 3359 /* n.b., src is 16.16 fixed point, dst is whole integer */ 3373 - src_w = drm_rect_width(&pstate->base.src); 3374 - src_h = drm_rect_height(&pstate->base.src); 3375 - dst_w = drm_rect_width(&pstate->base.dst); 3376 - dst_h = drm_rect_height(&pstate->base.dst); 3360 + if (plane->id == PLANE_CURSOR) { 3361 + src_w = pstate->base.src_w; 3362 + src_h = pstate->base.src_h; 3363 + dst_w = pstate->base.crtc_w; 3364 + dst_h = pstate->base.crtc_h; 3365 + } else { 3366 + src_w = drm_rect_width(&pstate->base.src); 3367 + src_h = drm_rect_height(&pstate->base.src); 3368 + dst_w = drm_rect_width(&pstate->base.dst); 3369 + dst_h = drm_rect_height(&pstate->base.dst); 3370 + } 3371 + 3377 3372 if (drm_rotation_90_or_270(pstate->base.rotation)) 3378 3373 swap(dst_w, dst_h); 3379 3374 ··· 3399 3374 const struct drm_plane_state *pstate, 3400 3375 int y) 3401 3376 { 3377 + struct intel_plane *plane = to_intel_plane(pstate->plane); 3402 3378 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); 3403 3379 uint32_t down_scale_amount, data_rate; 3404 3380 uint32_t width = 0, height = 0; ··· 3412 3386 fb = pstate->fb; 3413 3387 format = fb->format->format; 3414 3388 3415 - if (pstate->plane->type == DRM_PLANE_TYPE_CURSOR) 3389 + if (plane->id == PLANE_CURSOR) 3416 3390 return 0; 3417 3391 if (y && format != DRM_FORMAT_NV12) 3418 3392 return 0; ··· 3436 3410 data_rate = width * height * fb->format->cpp[0]; 3437 3411 } 3438 3412 3439 - down_scale_amount = skl_plane_downscale_amount(intel_pstate); 3413 + down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate); 3440 3414 3441 3415 return (uint64_t)data_rate * down_scale_amount >> 16; 3442 3416 } ··· 3728 3702 uint64_t pixel_rate; 3729 3703 3730 3704 /* Shouldn't reach here on disabled planes... */ 3731 - if (WARN_ON(!pstate->base.visible)) 3705 + if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) 3732 3706 return 0; 3733 3707 3734 3708 /* ··· 3736 3710 * with additional adjustments for plane-specific scaling. 3737 3711 */ 3738 3712 adjusted_pixel_rate = cstate->pixel_rate; 3739 - downscale_amount = skl_plane_downscale_amount(pstate); 3713 + downscale_amount = skl_plane_downscale_amount(cstate, pstate); 3740 3714 3741 3715 pixel_rate = adjusted_pixel_rate * downscale_amount >> 16; 3742 3716 WARN_ON(pixel_rate != clamp_t(uint32_t, pixel_rate, 0, ~0)); ··· 3753 3727 uint8_t *out_lines, /* out */ 3754 3728 bool *enabled /* out */) 3755 3729 { 3730 + struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); 3756 3731 struct drm_plane_state *pstate = &intel_pstate->base; 3757 3732 struct drm_framebuffer *fb = pstate->fb; 3758 3733 uint32_t latency = dev_priv->wm.skl_latency[level]; ··· 3773 3746 bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); 3774 3747 bool y_tiled, x_tiled; 3775 3748 3776 - if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) { 3749 + if (latency == 0 || 3750 + !intel_wm_plane_visible(cstate, intel_pstate)) { 3777 3751 *enabled = false; 3778 3752 return 0; 3779 3753 } ··· 3790 3762 if (apply_memory_bw_wa && x_tiled) 3791 3763 latency += 15; 3792 3764 3793 - width = drm_rect_width(&intel_pstate->base.src) >> 16; 3794 - height = drm_rect_height(&intel_pstate->base.src) >> 16; 3765 + if (plane->id == PLANE_CURSOR) { 3766 + width = intel_pstate->base.crtc_w; 3767 + height = intel_pstate->base.crtc_h; 3768 + } else { 3769 + width = drm_rect_width(&intel_pstate->base.src) >> 16; 3770 + height = drm_rect_height(&intel_pstate->base.src) >> 16; 3771 + } 3795 3772 3796 3773 if (drm_rotation_90_or_270(pstate->rotation)) 3797 3774 swap(width, height); ··· 8088 8055 case GEN6_PCODE_TIMEOUT: 8089 8056 return -ETIMEDOUT; 8090 8057 default: 8091 - MISSING_CASE(flags) 8058 + MISSING_CASE(flags); 8092 8059 return 0; 8093 8060 } 8094 8061 } ··· 8388 8355 const i915_reg_t reg) 8389 8356 { 8390 8357 u32 lower, upper, tmp; 8358 + int loop = 2; 8391 8359 8392 8360 /* The register accessed do not need forcewake. We borrow 8393 8361 * uncore lock to prevent concurrent access to range reg. ··· 8417 8383 I915_WRITE_FW(VLV_COUNTER_CONTROL, 8418 8384 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); 8419 8385 upper = I915_READ_FW(reg); 8420 - } while (upper != tmp); 8386 + } while (upper != tmp && --loop); 8421 8387 8422 8388 /* Everywhere else we always use VLV_COUNTER_CONTROL with the 8423 8389 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
+10 -21
drivers/gpu/drm/i915/intel_ringbuffer.c
··· 49 49 50 50 void intel_ring_update_space(struct intel_ring *ring) 51 51 { 52 - if (ring->last_retired_head != -1) { 53 - ring->head = ring->last_retired_head; 54 - ring->last_retired_head = -1; 55 - } 56 - 57 - ring->space = __intel_ring_space(ring->head & HEAD_ADDR, 58 - ring->tail, ring->size); 52 + ring->space = __intel_ring_space(ring->head, ring->tail, ring->size); 59 53 } 60 54 61 55 static int ··· 612 618 } 613 619 614 620 /* If the rq hung, jump to its breadcrumb and skip the batch */ 615 - if (request->fence.error == -EIO) { 616 - struct intel_ring *ring = request->ring; 617 - 618 - ring->head = request->postfix; 619 - ring->last_retired_head = -1; 620 - } 621 + if (request->fence.error == -EIO) 622 + request->ring->head = request->postfix; 621 623 } else { 622 624 engine->legacy_active_context = NULL; 623 625 } ··· 774 784 775 785 i915_gem_request_submit(request); 776 786 777 - GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); 787 + assert_ring_tail_valid(request->ring, request->tail); 778 788 I915_WRITE_TAIL(request->engine, request->tail); 779 789 } 780 790 ··· 786 796 *cs++ = MI_USER_INTERRUPT; 787 797 788 798 req->tail = intel_ring_offset(req, cs); 789 - GEM_BUG_ON(!IS_ALIGNED(req->tail, 8)); 799 + assert_ring_tail_valid(req->ring, req->tail); 790 800 } 791 801 792 802 static const int i9xx_emit_breadcrumb_sz = 4; ··· 825 835 *cs++ = MI_NOOP; 826 836 827 837 req->tail = intel_ring_offset(req, cs); 828 - GEM_BUG_ON(!IS_ALIGNED(req->tail, 8)); 838 + assert_ring_tail_valid(req->ring, req->tail); 829 839 } 830 840 831 841 static const int gen8_render_emit_breadcrumb_sz = 8; ··· 1382 1392 if (IS_I830(engine->i915) || IS_I845G(engine->i915)) 1383 1393 ring->effective_size -= 2 * CACHELINE_BYTES; 1384 1394 1385 - ring->last_retired_head = -1; 1386 1395 intel_ring_update_space(ring); 1387 1396 1388 1397 vma = intel_ring_create_vma(engine->i915, size); ··· 1440 1451 ret = context_pin(ctx); 1441 1452 if (ret) 1442 1453 goto error; 1454 + 1455 + ce->state->obj->mm.dirty = true; 1443 1456 } 1444 1457 1445 1458 /* The kernel context is only used as a placeholder for flushing the ··· 1562 1571 struct intel_engine_cs *engine; 1563 1572 enum intel_engine_id id; 1564 1573 1565 - for_each_engine(engine, dev_priv, id) { 1574 + for_each_engine(engine, dev_priv, id) 1566 1575 engine->buffer->head = engine->buffer->tail; 1567 - engine->buffer->last_retired_head = -1; 1568 - } 1569 1576 } 1570 1577 1571 1578 static int ring_request_alloc(struct drm_i915_gem_request *request) ··· 2117 2128 2118 2129 num_rings = 2119 2130 hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1; 2120 - engine->emit_breadcrumb_sz += num_rings * 6; 2131 + engine->emit_breadcrumb_sz += num_rings * 8; 2121 2132 } 2122 2133 } else if (INTEL_GEN(dev_priv) >= 6) { 2123 2134 engine->init_context = intel_rcs_ctx_init;
+36 -29
drivers/gpu/drm/i915/intel_ringbuffer.h
··· 149 149 int space; 150 150 int size; 151 151 int effective_size; 152 - 153 - /** We track the position of the requests in the ring buffer, and 154 - * when each is retired we increment last_retired_head as the GPU 155 - * must have finished processing the request and so we know we 156 - * can advance the ringbuffer up to that position. 157 - * 158 - * last_retired_head is set to -1 after the value is consumed so 159 - * we can detect new retirements. 160 - */ 161 - u32 last_retired_head; 162 152 }; 163 153 164 154 struct i915_gem_context; ··· 432 442 u32 (*get_cmd_length_mask)(u32 cmd_header); 433 443 }; 434 444 435 - static inline unsigned 445 + static inline unsigned int 436 446 intel_engine_flag(const struct intel_engine_cs *engine) 437 447 { 438 - return 1 << engine->id; 439 - } 440 - 441 - static inline void 442 - intel_flush_status_page(struct intel_engine_cs *engine, int reg) 443 - { 444 - mb(); 445 - clflush(&engine->status_page.page_addr[reg]); 446 - mb(); 448 + return BIT(engine->id); 447 449 } 448 450 449 451 static inline u32 ··· 446 464 } 447 465 448 466 static inline void 449 - intel_write_status_page(struct intel_engine_cs *engine, 450 - int reg, u32 value) 467 + intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) 451 468 { 452 - mb(); 453 - clflush(&engine->status_page.page_addr[reg]); 454 - engine->status_page.page_addr[reg] = value; 455 - clflush(&engine->status_page.page_addr[reg]); 456 - mb(); 469 + /* Writing into the status page should be done sparingly. Since 470 + * we do when we are uncertain of the device state, we take a bit 471 + * of extra paranoia to try and ensure that the HWS takes the value 472 + * we give and that it doesn't end up trapped inside the CPU! 473 + */ 474 + if (static_cpu_has(X86_FEATURE_CLFLUSH)) { 475 + mb(); 476 + clflush(&engine->status_page.page_addr[reg]); 477 + engine->status_page.page_addr[reg] = value; 478 + clflush(&engine->status_page.page_addr[reg]); 479 + mb(); 480 + } else { 481 + WRITE_ONCE(engine->status_page.page_addr[reg], value); 482 + } 457 483 } 458 484 459 485 /* ··· 515 525 } 516 526 517 527 static inline u32 518 - intel_ring_offset(struct drm_i915_gem_request *req, void *addr) 528 + intel_ring_wrap(const struct intel_ring *ring, u32 pos) 529 + { 530 + return pos & (ring->size - 1); 531 + } 532 + 533 + static inline u32 534 + intel_ring_offset(const struct drm_i915_gem_request *req, void *addr) 519 535 { 520 536 /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ 521 537 u32 offset = addr - req->ring->vaddr; 522 538 GEM_BUG_ON(offset > req->ring->size); 523 - return offset & (req->ring->size - 1); 539 + return intel_ring_wrap(req->ring, offset); 540 + } 541 + 542 + static inline void 543 + assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) 544 + { 545 + /* We could combine these into a single tail operation, but keeping 546 + * them as seperate tests will help identify the cause should one 547 + * ever fire. 548 + */ 549 + GEM_BUG_ON(!IS_ALIGNED(tail, 8)); 550 + GEM_BUG_ON(tail >= ring->size); 524 551 } 525 552 526 553 void intel_ring_update_space(struct intel_ring *ring);
+9 -3
drivers/gpu/drm/i915/intel_runtime_pm.c
··· 2840 2840 { 2841 2841 struct pci_dev *pdev = dev_priv->drm.pdev; 2842 2842 struct device *kdev = &pdev->dev; 2843 + int ret; 2843 2844 2844 - pm_runtime_get_sync(kdev); 2845 + ret = pm_runtime_get_sync(kdev); 2846 + WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); 2845 2847 2846 2848 atomic_inc(&dev_priv->pm.wakeref_count); 2847 2849 assert_rpm_wakelock_held(dev_priv); ··· 2873 2871 * function, since the power state is undefined. This applies 2874 2872 * atm to the late/early system suspend/resume handlers. 2875 2873 */ 2876 - WARN_ON_ONCE(ret < 0); 2874 + WARN_ONCE(ret < 0, 2875 + "pm_runtime_get_if_in_use() failed: %d\n", ret); 2877 2876 if (ret <= 0) 2878 2877 return false; 2879 2878 } ··· 2958 2955 * platforms without RPM support. 2959 2956 */ 2960 2957 if (!HAS_RUNTIME_PM(dev_priv)) { 2958 + int ret; 2959 + 2961 2960 pm_runtime_dont_use_autosuspend(kdev); 2962 - pm_runtime_get_sync(kdev); 2961 + ret = pm_runtime_get_sync(kdev); 2962 + WARN(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); 2963 2963 } else { 2964 2964 pm_runtime_use_autosuspend(kdev); 2965 2965 }
+135 -151
drivers/gpu/drm/i915/intel_sprite.c
··· 217 217 struct drm_framebuffer *fb = plane_state->base.fb; 218 218 enum plane_id plane_id = intel_plane->id; 219 219 enum pipe pipe = intel_plane->pipe; 220 - u32 plane_ctl; 220 + u32 plane_ctl = plane_state->ctl; 221 221 const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; 222 222 u32 surf_addr = plane_state->main.offset; 223 223 unsigned int rotation = plane_state->base.rotation; ··· 231 231 uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; 232 232 uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; 233 233 unsigned long irqflags; 234 - 235 - plane_ctl = PLANE_CTL_ENABLE; 236 - 237 - if (!IS_GEMINILAKE(dev_priv)) { 238 - plane_ctl |= 239 - PLANE_CTL_PIPE_GAMMA_ENABLE | 240 - PLANE_CTL_PIPE_CSC_ENABLE | 241 - PLANE_CTL_PLANE_GAMMA_DISABLE; 242 - } 243 - 244 - plane_ctl |= skl_plane_ctl_format(fb->format->format); 245 - plane_ctl |= skl_plane_ctl_tiling(fb->modifier); 246 - plane_ctl |= skl_plane_ctl_rotation(rotation); 247 - 248 - if (key->flags & I915_SET_COLORKEY_DESTINATION) 249 - plane_ctl |= PLANE_CTL_KEY_ENABLE_DESTINATION; 250 - else if (key->flags & I915_SET_COLORKEY_SOURCE) 251 - plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE; 252 234 253 235 /* Sizes are 0 based */ 254 236 src_w--; ··· 343 361 I915_WRITE_FW(SPCSCCROCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); 344 362 } 345 363 346 - static void 347 - vlv_update_plane(struct drm_plane *dplane, 348 - const struct intel_crtc_state *crtc_state, 349 - const struct intel_plane_state *plane_state) 364 + static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, 365 + const struct intel_plane_state *plane_state) 350 366 { 351 - struct drm_device *dev = dplane->dev; 352 - struct drm_i915_private *dev_priv = to_i915(dev); 353 - struct intel_plane *intel_plane = to_intel_plane(dplane); 354 - struct drm_framebuffer *fb = plane_state->base.fb; 355 - enum pipe pipe = intel_plane->pipe; 356 - enum plane_id plane_id = intel_plane->id; 357 - u32 sprctl; 358 - u32 sprsurf_offset, linear_offset; 367 + const struct drm_framebuffer *fb = plane_state->base.fb; 359 368 unsigned int rotation = plane_state->base.rotation; 360 369 const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; 361 - int crtc_x = plane_state->base.dst.x1; 362 - int crtc_y = plane_state->base.dst.y1; 363 - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); 364 - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); 365 - uint32_t x = plane_state->base.src.x1 >> 16; 366 - uint32_t y = plane_state->base.src.y1 >> 16; 367 - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; 368 - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; 369 - unsigned long irqflags; 370 + u32 sprctl; 370 371 371 - sprctl = SP_ENABLE; 372 + sprctl = SP_ENABLE | SP_GAMMA_ENABLE; 372 373 373 374 switch (fb->format->format) { 374 375 case DRM_FORMAT_YUYV: ··· 388 423 sprctl |= SP_FORMAT_RGBA8888; 389 424 break; 390 425 default: 391 - /* 392 - * If we get here one of the upper layers failed to filter 393 - * out the unsupported plane formats 394 - */ 395 - BUG(); 396 - break; 426 + MISSING_CASE(fb->format->format); 427 + return 0; 397 428 } 398 - 399 - /* 400 - * Enable gamma to match primary/cursor plane behaviour. 401 - * FIXME should be user controllable via propertiesa. 402 - */ 403 - sprctl |= SP_GAMMA_ENABLE; 404 429 405 430 if (fb->modifier == I915_FORMAT_MOD_X_TILED) 406 431 sprctl |= SP_TILED; ··· 404 449 if (key->flags & I915_SET_COLORKEY_SOURCE) 405 450 sprctl |= SP_SOURCE_KEY; 406 451 452 + return sprctl; 453 + } 454 + 455 + static void 456 + vlv_update_plane(struct drm_plane *dplane, 457 + const struct intel_crtc_state *crtc_state, 458 + const struct intel_plane_state *plane_state) 459 + { 460 + struct drm_device *dev = dplane->dev; 461 + struct drm_i915_private *dev_priv = to_i915(dev); 462 + struct intel_plane *intel_plane = to_intel_plane(dplane); 463 + struct drm_framebuffer *fb = plane_state->base.fb; 464 + enum pipe pipe = intel_plane->pipe; 465 + enum plane_id plane_id = intel_plane->id; 466 + u32 sprctl = plane_state->ctl; 467 + u32 sprsurf_offset = plane_state->main.offset; 468 + u32 linear_offset; 469 + const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; 470 + int crtc_x = plane_state->base.dst.x1; 471 + int crtc_y = plane_state->base.dst.y1; 472 + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); 473 + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); 474 + uint32_t x = plane_state->main.x; 475 + uint32_t y = plane_state->main.y; 476 + unsigned long irqflags; 477 + 407 478 /* Sizes are 0 based */ 408 - src_w--; 409 - src_h--; 410 479 crtc_w--; 411 480 crtc_h--; 412 - 413 - intel_add_fb_offsets(&x, &y, plane_state, 0); 414 - sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); 415 - 416 - if (rotation & DRM_ROTATE_180) { 417 - x += src_w; 418 - y += src_h; 419 - } else if (rotation & DRM_REFLECT_X) { 420 - x += src_w; 421 - } 422 481 423 482 linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); 424 483 ··· 485 516 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); 486 517 } 487 518 488 - static void 489 - ivb_update_plane(struct drm_plane *plane, 490 - const struct intel_crtc_state *crtc_state, 491 - const struct intel_plane_state *plane_state) 519 + static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, 520 + const struct intel_plane_state *plane_state) 492 521 { 493 - struct drm_device *dev = plane->dev; 494 - struct drm_i915_private *dev_priv = to_i915(dev); 495 - struct intel_plane *intel_plane = to_intel_plane(plane); 496 - struct drm_framebuffer *fb = plane_state->base.fb; 497 - enum pipe pipe = intel_plane->pipe; 498 - u32 sprctl, sprscale = 0; 499 - u32 sprsurf_offset, linear_offset; 522 + struct drm_i915_private *dev_priv = 523 + to_i915(plane_state->base.plane->dev); 524 + const struct drm_framebuffer *fb = plane_state->base.fb; 500 525 unsigned int rotation = plane_state->base.rotation; 501 526 const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; 502 - int crtc_x = plane_state->base.dst.x1; 503 - int crtc_y = plane_state->base.dst.y1; 504 - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); 505 - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); 506 - uint32_t x = plane_state->base.src.x1 >> 16; 507 - uint32_t y = plane_state->base.src.y1 >> 16; 508 - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; 509 - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; 510 - unsigned long irqflags; 527 + u32 sprctl; 511 528 512 - sprctl = SPRITE_ENABLE; 529 + sprctl = SPRITE_ENABLE | SPRITE_GAMMA_ENABLE; 530 + 531 + if (IS_IVYBRIDGE(dev_priv)) 532 + sprctl |= SPRITE_TRICKLE_FEED_DISABLE; 533 + 534 + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 535 + sprctl |= SPRITE_PIPE_CSC_ENABLE; 513 536 514 537 switch (fb->format->format) { 515 538 case DRM_FORMAT_XBGR8888: ··· 523 562 sprctl |= SPRITE_FORMAT_YUV422 | SPRITE_YUV_ORDER_VYUY; 524 563 break; 525 564 default: 526 - BUG(); 565 + MISSING_CASE(fb->format->format); 566 + return 0; 527 567 } 528 - 529 - /* 530 - * Enable gamma to match primary/cursor plane behaviour. 531 - * FIXME should be user controllable via propertiesa. 532 - */ 533 - sprctl |= SPRITE_GAMMA_ENABLE; 534 568 535 569 if (fb->modifier == I915_FORMAT_MOD_X_TILED) 536 570 sprctl |= SPRITE_TILED; ··· 533 577 if (rotation & DRM_ROTATE_180) 534 578 sprctl |= SPRITE_ROTATE_180; 535 579 536 - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 537 - sprctl &= ~SPRITE_TRICKLE_FEED_DISABLE; 538 - else 539 - sprctl |= SPRITE_TRICKLE_FEED_DISABLE; 540 - 541 - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 542 - sprctl |= SPRITE_PIPE_CSC_ENABLE; 543 - 544 580 if (key->flags & I915_SET_COLORKEY_DESTINATION) 545 581 sprctl |= SPRITE_DEST_KEY; 546 582 else if (key->flags & I915_SET_COLORKEY_SOURCE) 547 583 sprctl |= SPRITE_SOURCE_KEY; 584 + 585 + return sprctl; 586 + } 587 + 588 + static void 589 + ivb_update_plane(struct drm_plane *plane, 590 + const struct intel_crtc_state *crtc_state, 591 + const struct intel_plane_state *plane_state) 592 + { 593 + struct drm_device *dev = plane->dev; 594 + struct drm_i915_private *dev_priv = to_i915(dev); 595 + struct intel_plane *intel_plane = to_intel_plane(plane); 596 + struct drm_framebuffer *fb = plane_state->base.fb; 597 + enum pipe pipe = intel_plane->pipe; 598 + u32 sprctl = plane_state->ctl, sprscale = 0; 599 + u32 sprsurf_offset = plane_state->main.offset; 600 + u32 linear_offset; 601 + const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; 602 + int crtc_x = plane_state->base.dst.x1; 603 + int crtc_y = plane_state->base.dst.y1; 604 + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); 605 + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); 606 + uint32_t x = plane_state->main.x; 607 + uint32_t y = plane_state->main.y; 608 + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; 609 + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; 610 + unsigned long irqflags; 548 611 549 612 /* Sizes are 0 based */ 550 613 src_w--; ··· 573 598 574 599 if (crtc_w != src_w || crtc_h != src_h) 575 600 sprscale = SPRITE_SCALE_ENABLE | (src_w << 16) | src_h; 576 - 577 - intel_add_fb_offsets(&x, &y, plane_state, 0); 578 - sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); 579 - 580 - /* HSW+ does this automagically in hardware */ 581 - if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv) && 582 - rotation & DRM_ROTATE_180) { 583 - x += src_w; 584 - y += src_h; 585 - } 586 601 587 602 linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); 588 603 ··· 629 664 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); 630 665 } 631 666 632 - static void 633 - ilk_update_plane(struct drm_plane *plane, 634 - const struct intel_crtc_state *crtc_state, 635 - const struct intel_plane_state *plane_state) 667 + static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state, 668 + const struct intel_plane_state *plane_state) 636 669 { 637 - struct drm_device *dev = plane->dev; 638 - struct drm_i915_private *dev_priv = to_i915(dev); 639 - struct intel_plane *intel_plane = to_intel_plane(plane); 640 - struct drm_framebuffer *fb = plane_state->base.fb; 641 - int pipe = intel_plane->pipe; 642 - u32 dvscntr, dvsscale; 643 - u32 dvssurf_offset, linear_offset; 670 + struct drm_i915_private *dev_priv = 671 + to_i915(plane_state->base.plane->dev); 672 + const struct drm_framebuffer *fb = plane_state->base.fb; 644 673 unsigned int rotation = plane_state->base.rotation; 645 674 const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; 646 - int crtc_x = plane_state->base.dst.x1; 647 - int crtc_y = plane_state->base.dst.y1; 648 - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); 649 - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); 650 - uint32_t x = plane_state->base.src.x1 >> 16; 651 - uint32_t y = plane_state->base.src.y1 >> 16; 652 - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; 653 - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; 654 - unsigned long irqflags; 675 + u32 dvscntr; 655 676 656 - dvscntr = DVS_ENABLE; 677 + dvscntr = DVS_ENABLE | DVS_GAMMA_ENABLE; 678 + 679 + if (IS_GEN6(dev_priv)) 680 + dvscntr |= DVS_TRICKLE_FEED_DISABLE; 657 681 658 682 switch (fb->format->format) { 659 683 case DRM_FORMAT_XBGR8888: ··· 664 710 dvscntr |= DVS_FORMAT_YUV422 | DVS_YUV_ORDER_VYUY; 665 711 break; 666 712 default: 667 - BUG(); 713 + MISSING_CASE(fb->format->format); 714 + return 0; 668 715 } 669 - 670 - /* 671 - * Enable gamma to match primary/cursor plane behaviour. 672 - * FIXME should be user controllable via propertiesa. 673 - */ 674 - dvscntr |= DVS_GAMMA_ENABLE; 675 716 676 717 if (fb->modifier == I915_FORMAT_MOD_X_TILED) 677 718 dvscntr |= DVS_TILED; ··· 674 725 if (rotation & DRM_ROTATE_180) 675 726 dvscntr |= DVS_ROTATE_180; 676 727 677 - if (IS_GEN6(dev_priv)) 678 - dvscntr |= DVS_TRICKLE_FEED_DISABLE; /* must disable */ 679 - 680 728 if (key->flags & I915_SET_COLORKEY_DESTINATION) 681 729 dvscntr |= DVS_DEST_KEY; 682 730 else if (key->flags & I915_SET_COLORKEY_SOURCE) 683 731 dvscntr |= DVS_SOURCE_KEY; 732 + 733 + return dvscntr; 734 + } 735 + 736 + static void 737 + ilk_update_plane(struct drm_plane *plane, 738 + const struct intel_crtc_state *crtc_state, 739 + const struct intel_plane_state *plane_state) 740 + { 741 + struct drm_device *dev = plane->dev; 742 + struct drm_i915_private *dev_priv = to_i915(dev); 743 + struct intel_plane *intel_plane = to_intel_plane(plane); 744 + struct drm_framebuffer *fb = plane_state->base.fb; 745 + int pipe = intel_plane->pipe; 746 + u32 dvscntr = plane_state->ctl, dvsscale = 0; 747 + u32 dvssurf_offset = plane_state->main.offset; 748 + u32 linear_offset; 749 + const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; 750 + int crtc_x = plane_state->base.dst.x1; 751 + int crtc_y = plane_state->base.dst.y1; 752 + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); 753 + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); 754 + uint32_t x = plane_state->main.x; 755 + uint32_t y = plane_state->main.y; 756 + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; 757 + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; 758 + unsigned long irqflags; 684 759 685 760 /* Sizes are 0 based */ 686 761 src_w--; ··· 712 739 crtc_w--; 713 740 crtc_h--; 714 741 715 - dvsscale = 0; 716 742 if (crtc_w != src_w || crtc_h != src_h) 717 743 dvsscale = DVS_SCALE_ENABLE | (src_w << 16) | src_h; 718 - 719 - intel_add_fb_offsets(&x, &y, plane_state, 0); 720 - dvssurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); 721 - 722 - if (rotation & DRM_ROTATE_180) { 723 - x += src_w; 724 - y += src_h; 725 - } 726 744 727 745 linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); 728 746 ··· 945 981 ret = skl_check_plane_surface(state); 946 982 if (ret) 947 983 return ret; 984 + 985 + state->ctl = skl_plane_ctl(crtc_state, state); 986 + } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 987 + ret = i9xx_check_plane_surface(state); 988 + if (ret) 989 + return ret; 990 + 991 + state->ctl = vlv_sprite_ctl(crtc_state, state); 992 + } else if (INTEL_GEN(dev_priv) >= 7) { 993 + ret = i9xx_check_plane_surface(state); 994 + if (ret) 995 + return ret; 996 + 997 + state->ctl = ivb_sprite_ctl(crtc_state, state); 998 + } else { 999 + ret = i9xx_check_plane_surface(state); 1000 + if (ret) 1001 + return ret; 1002 + 1003 + state->ctl = ilk_sprite_ctl(crtc_state, state); 948 1004 } 949 1005 950 1006 return 0;
+240 -196
drivers/gpu/drm/i915/intel_uc.c
··· 26 26 #include "intel_uc.h" 27 27 #include <linux/firmware.h> 28 28 29 + /* Cleans up uC firmware by releasing the firmware GEM obj. 30 + */ 31 + static void __intel_uc_fw_fini(struct intel_uc_fw *uc_fw) 32 + { 33 + struct drm_i915_gem_object *obj; 34 + 35 + obj = fetch_and_zero(&uc_fw->obj); 36 + if (obj) 37 + i915_gem_object_put(obj); 38 + 39 + uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; 40 + } 41 + 29 42 /* Reset GuC providing us with fresh state for both GuC and HuC. 30 43 */ 31 44 static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv) ··· 96 83 97 84 void intel_uc_init_early(struct drm_i915_private *dev_priv) 98 85 { 99 - mutex_init(&dev_priv->guc.send_mutex); 86 + struct intel_guc *guc = &dev_priv->guc; 87 + 88 + mutex_init(&guc->send_mutex); 89 + guc->send = intel_guc_send_mmio; 100 90 } 101 91 102 - void intel_uc_init_fw(struct drm_i915_private *dev_priv) 103 - { 104 - if (dev_priv->huc.fw.path) 105 - intel_uc_prepare_fw(dev_priv, &dev_priv->huc.fw); 106 - 107 - if (dev_priv->guc.fw.path) 108 - intel_uc_prepare_fw(dev_priv, &dev_priv->guc.fw); 109 - } 110 - 111 - int intel_uc_init_hw(struct drm_i915_private *dev_priv) 112 - { 113 - int ret, attempts; 114 - 115 - /* GuC not enabled, nothing to do */ 116 - if (!i915.enable_guc_loading) 117 - return 0; 118 - 119 - gen9_reset_guc_interrupts(dev_priv); 120 - 121 - /* We need to notify the guc whenever we change the GGTT */ 122 - i915_ggtt_enable_guc(dev_priv); 123 - 124 - if (i915.enable_guc_submission) { 125 - ret = i915_guc_submission_init(dev_priv); 126 - if (ret) 127 - goto err; 128 - } 129 - 130 - /* WaEnableuKernelHeaderValidFix:skl */ 131 - /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ 132 - if (IS_GEN9(dev_priv)) 133 - attempts = 3; 134 - else 135 - attempts = 1; 136 - 137 - while (attempts--) { 138 - /* 139 - * Always reset the GuC just before (re)loading, so 140 - * that the state and timing are fairly predictable 141 - */ 142 - ret = __intel_uc_reset_hw(dev_priv); 143 - if (ret) 144 - goto err_submission; 145 - 146 - intel_huc_init_hw(&dev_priv->huc); 147 - ret = intel_guc_init_hw(&dev_priv->guc); 148 - if (ret == 0 || ret != -EAGAIN) 149 - break; 150 - 151 - DRM_DEBUG_DRIVER("GuC fw load failed: %d; will reset and " 152 - "retry %d more time(s)\n", ret, attempts); 153 - } 154 - 155 - /* Did we succeded or run out of retries? */ 156 - if (ret) 157 - goto err_submission; 158 - 159 - intel_guc_auth_huc(dev_priv); 160 - if (i915.enable_guc_submission) { 161 - if (i915.guc_log_level >= 0) 162 - gen9_enable_guc_interrupts(dev_priv); 163 - 164 - ret = i915_guc_submission_enable(dev_priv); 165 - if (ret) 166 - goto err_submission; 167 - } 168 - 169 - return 0; 170 - 171 - /* 172 - * We've failed to load the firmware :( 173 - * 174 - * Decide whether to disable GuC submission and fall back to 175 - * execlist mode, and whether to hide the error by returning 176 - * zero or to return -EIO, which the caller will treat as a 177 - * nonfatal error (i.e. it doesn't prevent driver load, but 178 - * marks the GPU as wedged until reset). 179 - */ 180 - err_submission: 181 - if (i915.enable_guc_submission) 182 - i915_guc_submission_fini(dev_priv); 183 - 184 - err: 185 - i915_ggtt_disable_guc(dev_priv); 186 - 187 - DRM_ERROR("GuC init failed\n"); 188 - if (i915.enable_guc_loading > 1 || i915.enable_guc_submission > 1) 189 - ret = -EIO; 190 - else 191 - ret = 0; 192 - 193 - if (i915.enable_guc_submission) { 194 - i915.enable_guc_submission = 0; 195 - DRM_NOTE("Falling back from GuC submission to execlist mode\n"); 196 - } 197 - 198 - return ret; 199 - } 200 - 201 - /* 202 - * Read GuC command/status register (SOFT_SCRATCH_0) 203 - * Return true if it contains a response rather than a command 204 - */ 205 - static bool intel_guc_recv(struct intel_guc *guc, u32 *status) 206 - { 207 - struct drm_i915_private *dev_priv = guc_to_i915(guc); 208 - 209 - u32 val = I915_READ(SOFT_SCRATCH(0)); 210 - *status = val; 211 - return INTEL_GUC_RECV_IS_RESPONSE(val); 212 - } 213 - 214 - int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) 215 - { 216 - struct drm_i915_private *dev_priv = guc_to_i915(guc); 217 - u32 status; 218 - int i; 219 - int ret; 220 - 221 - if (WARN_ON(len < 1 || len > 15)) 222 - return -EINVAL; 223 - 224 - mutex_lock(&guc->send_mutex); 225 - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 226 - 227 - dev_priv->guc.action_count += 1; 228 - dev_priv->guc.action_cmd = action[0]; 229 - 230 - for (i = 0; i < len; i++) 231 - I915_WRITE(SOFT_SCRATCH(i), action[i]); 232 - 233 - POSTING_READ(SOFT_SCRATCH(i - 1)); 234 - 235 - I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); 236 - 237 - /* 238 - * Fast commands should complete in less than 10us, so sample quickly 239 - * up to that length of time, then switch to a slower sleep-wait loop. 240 - * No inte_guc_send command should ever take longer than 10ms. 241 - */ 242 - ret = wait_for_us(intel_guc_recv(guc, &status), 10); 243 - if (ret) 244 - ret = wait_for(intel_guc_recv(guc, &status), 10); 245 - if (status != INTEL_GUC_STATUS_SUCCESS) { 246 - /* 247 - * Either the GuC explicitly returned an error (which 248 - * we convert to -EIO here) or no response at all was 249 - * received within the timeout limit (-ETIMEDOUT) 250 - */ 251 - if (ret != -ETIMEDOUT) 252 - ret = -EIO; 253 - 254 - DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" 255 - " ret=%d status=0x%08X response=0x%08X\n", 256 - action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); 257 - 258 - dev_priv->guc.action_fail += 1; 259 - dev_priv->guc.action_err = ret; 260 - } 261 - dev_priv->guc.action_status = status; 262 - 263 - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 264 - mutex_unlock(&guc->send_mutex); 265 - 266 - return ret; 267 - } 268 - 269 - int intel_guc_sample_forcewake(struct intel_guc *guc) 270 - { 271 - struct drm_i915_private *dev_priv = guc_to_i915(guc); 272 - u32 action[2]; 273 - 274 - action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; 275 - /* WaRsDisableCoarsePowerGating:skl,bxt */ 276 - if (!intel_enable_rc6() || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) 277 - action[1] = 0; 278 - else 279 - /* bit 0 and 1 are for Render and Media domain separately */ 280 - action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; 281 - 282 - return intel_guc_send(guc, action, ARRAY_SIZE(action)); 283 - } 284 - 285 - void intel_uc_prepare_fw(struct drm_i915_private *dev_priv, 286 - struct intel_uc_fw *uc_fw) 92 + static void fetch_uc_fw(struct drm_i915_private *dev_priv, 93 + struct intel_uc_fw *uc_fw) 287 94 { 288 95 struct pci_dev *pdev = dev_priv->drm.pdev; 289 96 struct drm_i915_gem_object *obj; ··· 111 278 struct uc_css_header *css; 112 279 size_t size; 113 280 int err; 281 + 282 + if (!uc_fw->path) 283 + return; 114 284 115 285 uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; 116 286 ··· 127 291 goto fail; 128 292 129 293 DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", 130 - uc_fw->path, fw); 294 + uc_fw->path, fw); 131 295 132 296 /* Check the size of the blob before examining buffer contents */ 133 297 if (fw->size < sizeof(struct uc_css_header)) { ··· 140 304 /* Firmware bits always start from header */ 141 305 uc_fw->header_offset = 0; 142 306 uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - 143 - css->key_size_dw - css->exponent_size_dw) * sizeof(u32); 307 + css->key_size_dw - css->exponent_size_dw) * sizeof(u32); 144 308 145 309 if (uc_fw->header_size != sizeof(struct uc_css_header)) { 146 310 DRM_NOTE("CSS header definition mismatch\n"); ··· 198 362 } 199 363 200 364 if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { 201 - DRM_NOTE("Skipping uC firmware version check\n"); 365 + DRM_NOTE("Skipping %s firmware version check\n", 366 + intel_uc_fw_type_repr(uc_fw->type)); 202 367 } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || 203 368 uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { 204 - DRM_NOTE("uC firmware version %d.%d, required %d.%d\n", 205 - uc_fw->major_ver_found, uc_fw->minor_ver_found, 206 - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); 369 + DRM_NOTE("%s firmware version %d.%d, required %d.%d\n", 370 + intel_uc_fw_type_repr(uc_fw->type), 371 + uc_fw->major_ver_found, uc_fw->minor_ver_found, 372 + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); 207 373 err = -ENOEXEC; 208 374 goto fail; 209 375 } 210 376 211 377 DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", 212 - uc_fw->major_ver_found, uc_fw->minor_ver_found, 213 - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); 378 + uc_fw->major_ver_found, uc_fw->minor_ver_found, 379 + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); 214 380 215 381 obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); 216 382 if (IS_ERR(obj)) { ··· 224 386 uc_fw->size = fw->size; 225 387 226 388 DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", 227 - uc_fw->obj); 389 + uc_fw->obj); 228 390 229 391 release_firmware(fw); 230 392 uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; ··· 234 396 DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", 235 397 uc_fw->path, err); 236 398 DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", 237 - err, fw, uc_fw->obj); 399 + err, fw, uc_fw->obj); 238 400 239 401 release_firmware(fw); /* OK even if fw is NULL */ 240 402 uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; 403 + } 404 + 405 + void intel_uc_init_fw(struct drm_i915_private *dev_priv) 406 + { 407 + fetch_uc_fw(dev_priv, &dev_priv->huc.fw); 408 + fetch_uc_fw(dev_priv, &dev_priv->guc.fw); 409 + } 410 + 411 + void intel_uc_fini_fw(struct drm_i915_private *dev_priv) 412 + { 413 + __intel_uc_fw_fini(&dev_priv->guc.fw); 414 + __intel_uc_fw_fini(&dev_priv->huc.fw); 415 + } 416 + 417 + int intel_uc_init_hw(struct drm_i915_private *dev_priv) 418 + { 419 + int ret, attempts; 420 + 421 + if (!i915.enable_guc_loading) 422 + return 0; 423 + 424 + gen9_reset_guc_interrupts(dev_priv); 425 + 426 + /* We need to notify the guc whenever we change the GGTT */ 427 + i915_ggtt_enable_guc(dev_priv); 428 + 429 + if (i915.enable_guc_submission) { 430 + /* 431 + * This is stuff we need to have available at fw load time 432 + * if we are planning to enable submission later 433 + */ 434 + ret = i915_guc_submission_init(dev_priv); 435 + if (ret) 436 + goto err_guc; 437 + } 438 + 439 + /* WaEnableuKernelHeaderValidFix:skl */ 440 + /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ 441 + if (IS_GEN9(dev_priv)) 442 + attempts = 3; 443 + else 444 + attempts = 1; 445 + 446 + while (attempts--) { 447 + /* 448 + * Always reset the GuC just before (re)loading, so 449 + * that the state and timing are fairly predictable 450 + */ 451 + ret = __intel_uc_reset_hw(dev_priv); 452 + if (ret) 453 + goto err_submission; 454 + 455 + intel_huc_init_hw(&dev_priv->huc); 456 + ret = intel_guc_init_hw(&dev_priv->guc); 457 + if (ret == 0 || ret != -EAGAIN) 458 + break; 459 + 460 + DRM_DEBUG_DRIVER("GuC fw load failed: %d; will reset and " 461 + "retry %d more time(s)\n", ret, attempts); 462 + } 463 + 464 + /* Did we succeded or run out of retries? */ 465 + if (ret) 466 + goto err_submission; 467 + 468 + intel_guc_auth_huc(dev_priv); 469 + if (i915.enable_guc_submission) { 470 + if (i915.guc_log_level >= 0) 471 + gen9_enable_guc_interrupts(dev_priv); 472 + 473 + ret = i915_guc_submission_enable(dev_priv); 474 + if (ret) 475 + goto err_interrupts; 476 + } 477 + 478 + return 0; 479 + 480 + /* 481 + * We've failed to load the firmware :( 482 + * 483 + * Decide whether to disable GuC submission and fall back to 484 + * execlist mode, and whether to hide the error by returning 485 + * zero or to return -EIO, which the caller will treat as a 486 + * nonfatal error (i.e. it doesn't prevent driver load, but 487 + * marks the GPU as wedged until reset). 488 + */ 489 + err_interrupts: 490 + gen9_disable_guc_interrupts(dev_priv); 491 + err_submission: 492 + if (i915.enable_guc_submission) 493 + i915_guc_submission_fini(dev_priv); 494 + err_guc: 495 + i915_ggtt_disable_guc(dev_priv); 496 + 497 + DRM_ERROR("GuC init failed\n"); 498 + if (i915.enable_guc_loading > 1 || i915.enable_guc_submission > 1) 499 + ret = -EIO; 500 + else 501 + ret = 0; 502 + 503 + if (i915.enable_guc_submission) { 504 + i915.enable_guc_submission = 0; 505 + DRM_NOTE("Falling back from GuC submission to execlist mode\n"); 506 + } 507 + 508 + return ret; 509 + } 510 + 511 + void intel_uc_fini_hw(struct drm_i915_private *dev_priv) 512 + { 513 + if (!i915.enable_guc_loading) 514 + return; 515 + 516 + if (i915.enable_guc_submission) { 517 + i915_guc_submission_disable(dev_priv); 518 + gen9_disable_guc_interrupts(dev_priv); 519 + i915_guc_submission_fini(dev_priv); 520 + } 521 + i915_ggtt_disable_guc(dev_priv); 522 + } 523 + 524 + /* 525 + * Read GuC command/status register (SOFT_SCRATCH_0) 526 + * Return true if it contains a response rather than a command 527 + */ 528 + static bool guc_recv(struct intel_guc *guc, u32 *status) 529 + { 530 + struct drm_i915_private *dev_priv = guc_to_i915(guc); 531 + 532 + u32 val = I915_READ(SOFT_SCRATCH(0)); 533 + *status = val; 534 + return INTEL_GUC_RECV_IS_RESPONSE(val); 535 + } 536 + 537 + /* 538 + * This function implements the MMIO based host to GuC interface. 539 + */ 540 + int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) 541 + { 542 + struct drm_i915_private *dev_priv = guc_to_i915(guc); 543 + u32 status; 544 + int i; 545 + int ret; 546 + 547 + if (WARN_ON(len < 1 || len > 15)) 548 + return -EINVAL; 549 + 550 + mutex_lock(&guc->send_mutex); 551 + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_BLITTER); 552 + 553 + dev_priv->guc.action_count += 1; 554 + dev_priv->guc.action_cmd = action[0]; 555 + 556 + for (i = 0; i < len; i++) 557 + I915_WRITE(SOFT_SCRATCH(i), action[i]); 558 + 559 + POSTING_READ(SOFT_SCRATCH(i - 1)); 560 + 561 + I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); 562 + 563 + /* 564 + * Fast commands should complete in less than 10us, so sample quickly 565 + * up to that length of time, then switch to a slower sleep-wait loop. 566 + * No inte_guc_send command should ever take longer than 10ms. 567 + */ 568 + ret = wait_for_us(guc_recv(guc, &status), 10); 569 + if (ret) 570 + ret = wait_for(guc_recv(guc, &status), 10); 571 + if (status != INTEL_GUC_STATUS_SUCCESS) { 572 + /* 573 + * Either the GuC explicitly returned an error (which 574 + * we convert to -EIO here) or no response at all was 575 + * received within the timeout limit (-ETIMEDOUT) 576 + */ 577 + if (ret != -ETIMEDOUT) 578 + ret = -EIO; 579 + 580 + DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" 581 + " ret=%d status=0x%08X response=0x%08X\n", 582 + action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); 583 + 584 + dev_priv->guc.action_fail += 1; 585 + dev_priv->guc.action_err = ret; 586 + } 587 + dev_priv->guc.action_status = status; 588 + 589 + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER); 590 + mutex_unlock(&guc->send_mutex); 591 + 592 + return ret; 593 + } 594 + 595 + int intel_guc_sample_forcewake(struct intel_guc *guc) 596 + { 597 + struct drm_i915_private *dev_priv = guc_to_i915(guc); 598 + u32 action[2]; 599 + 600 + action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; 601 + /* WaRsDisableCoarsePowerGating:skl,bxt */ 602 + if (!intel_enable_rc6() || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) 603 + action[1] = 0; 604 + else 605 + /* bit 0 and 1 are for Render and Media domain separately */ 606 + action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; 607 + 608 + return intel_guc_send(guc, action, ARRAY_SIZE(action)); 241 609 }
+61 -23
drivers/gpu/drm/i915/intel_uc.h
··· 34 34 35 35 /* 36 36 * This structure primarily describes the GEM object shared with the GuC. 37 - * The GEM object is held for the entire lifetime of our interaction with 37 + * The specs sometimes refer to this object as a "GuC context", but we use 38 + * the term "client" to avoid confusion with hardware contexts. This 39 + * GEM object is held for the entire lifetime of our interaction with 38 40 * the GuC, being allocated before the GuC is loaded with its firmware. 39 41 * Because there's no way to update the address used by the GuC after 40 42 * initialisation, the shared object must stay pinned into the GGTT as ··· 46 44 * 47 45 * The single GEM object described here is actually made up of several 48 46 * separate areas, as far as the GuC is concerned. The first page (kept 49 - * kmap'd) includes the "process decriptor" which holds sequence data for 47 + * kmap'd) includes the "process descriptor" which holds sequence data for 50 48 * the doorbell, and one cacheline which actually *is* the doorbell; a 51 49 * write to this will "ring the doorbell" (i.e. send an interrupt to the 52 50 * GuC). The subsequent pages of the client object constitute the work ··· 74 72 75 73 uint32_t engines; /* bitmap of (host) engine ids */ 76 74 uint32_t priority; 77 - uint32_t ctx_index; 75 + u32 stage_id; 78 76 uint32_t proc_desc_offset; 79 77 80 - uint32_t doorbell_offset; 81 - uint32_t doorbell_cookie; 82 - uint16_t doorbell_id; 83 - uint16_t padding[3]; /* Maintain alignment */ 78 + u16 doorbell_id; 79 + unsigned long doorbell_offset; 80 + u32 doorbell_cookie; 84 81 85 82 spinlock_t wq_lock; 86 83 uint32_t wq_offset; ··· 101 100 INTEL_UC_FIRMWARE_SUCCESS 102 101 }; 103 102 103 + /* User-friendly representation of an enum */ 104 + static inline 105 + const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) 106 + { 107 + switch (status) { 108 + case INTEL_UC_FIRMWARE_FAIL: 109 + return "FAIL"; 110 + case INTEL_UC_FIRMWARE_NONE: 111 + return "NONE"; 112 + case INTEL_UC_FIRMWARE_PENDING: 113 + return "PENDING"; 114 + case INTEL_UC_FIRMWARE_SUCCESS: 115 + return "SUCCESS"; 116 + } 117 + return "<invalid>"; 118 + } 119 + 104 120 enum intel_uc_fw_type { 105 121 INTEL_UC_FW_TYPE_GUC, 106 122 INTEL_UC_FW_TYPE_HUC 107 123 }; 124 + 125 + /* User-friendly representation of an enum */ 126 + static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) 127 + { 128 + switch (type) { 129 + case INTEL_UC_FW_TYPE_GUC: 130 + return "GuC"; 131 + case INTEL_UC_FW_TYPE_HUC: 132 + return "HuC"; 133 + } 134 + return "uC"; 135 + } 108 136 109 137 /* 110 138 * This structure encapsulates all the data needed during the process ··· 163 133 struct intel_guc_log { 164 134 uint32_t flags; 165 135 struct i915_vma *vma; 166 - void *buf_addr; 167 - struct workqueue_struct *flush_wq; 168 - struct work_struct flush_work; 169 - struct rchan *relay_chan; 170 - 136 + /* The runtime stuff gets created only when GuC logging gets enabled */ 137 + struct { 138 + void *buf_addr; 139 + struct workqueue_struct *flush_wq; 140 + struct work_struct flush_work; 141 + struct rchan *relay_chan; 142 + } runtime; 171 143 /* logging related stats */ 172 144 u32 capture_miss_count; 173 145 u32 flush_interrupt_count; ··· 186 154 bool interrupts_enabled; 187 155 188 156 struct i915_vma *ads_vma; 189 - struct i915_vma *ctx_pool_vma; 190 - struct ida ctx_ids; 157 + struct i915_vma *stage_desc_pool; 158 + void *stage_desc_pool_vaddr; 159 + struct ida stage_ids; 191 160 192 161 struct i915_guc_client *execbuf_client; 193 162 194 - DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS); 163 + DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); 195 164 uint32_t db_cacheline; /* Cyclic counter mod pagesize */ 196 165 197 166 /* Action status & statistics */ ··· 207 174 208 175 /* To serialize the intel_guc_send actions */ 209 176 struct mutex send_mutex; 177 + 178 + /* GuC's FW specific send function */ 179 + int (*send)(struct intel_guc *guc, const u32 *data, u32 len); 210 180 }; 211 181 212 182 struct intel_huc { ··· 223 187 void intel_uc_sanitize_options(struct drm_i915_private *dev_priv); 224 188 void intel_uc_init_early(struct drm_i915_private *dev_priv); 225 189 void intel_uc_init_fw(struct drm_i915_private *dev_priv); 190 + void intel_uc_fini_fw(struct drm_i915_private *dev_priv); 226 191 int intel_uc_init_hw(struct drm_i915_private *dev_priv); 227 - void intel_uc_prepare_fw(struct drm_i915_private *dev_priv, 228 - struct intel_uc_fw *uc_fw); 229 - int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len); 192 + void intel_uc_fini_hw(struct drm_i915_private *dev_priv); 230 193 int intel_guc_sample_forcewake(struct intel_guc *guc); 194 + int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); 195 + static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) 196 + { 197 + return guc->send(guc, action, len); 198 + } 231 199 232 200 /* intel_guc_loader.c */ 233 201 int intel_guc_select_fw(struct intel_guc *guc); 234 202 int intel_guc_init_hw(struct intel_guc *guc); 235 - void intel_guc_fini(struct drm_i915_private *dev_priv); 236 - const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status); 237 203 int intel_guc_suspend(struct drm_i915_private *dev_priv); 238 204 int intel_guc_resume(struct drm_i915_private *dev_priv); 239 205 u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); ··· 250 212 struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); 251 213 252 214 /* intel_guc_log.c */ 253 - void intel_guc_log_create(struct intel_guc *guc); 215 + int intel_guc_log_create(struct intel_guc *guc); 216 + void intel_guc_log_destroy(struct intel_guc *guc); 217 + int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); 254 218 void i915_guc_log_register(struct drm_i915_private *dev_priv); 255 219 void i915_guc_log_unregister(struct drm_i915_private *dev_priv); 256 - int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); 257 220 258 221 static inline u32 guc_ggtt_offset(struct i915_vma *vma) 259 222 { ··· 266 227 267 228 /* intel_huc.c */ 268 229 void intel_huc_select_fw(struct intel_huc *huc); 269 - void intel_huc_fini(struct drm_i915_private *dev_priv); 270 230 int intel_huc_init_hw(struct intel_huc *huc); 271 231 void intel_guc_auth_huc(struct drm_i915_private *dev_priv); 272 232
+73 -79
drivers/gpu/drm/i915/intel_uncore.c
··· 52 52 } 53 53 54 54 static inline void 55 - fw_domain_reset(const struct intel_uncore_forcewake_domain *d) 55 + fw_domain_reset(struct drm_i915_private *i915, 56 + const struct intel_uncore_forcewake_domain *d) 56 57 { 57 - WARN_ON(!i915_mmio_reg_valid(d->reg_set)); 58 - __raw_i915_write32(d->i915, d->reg_set, d->val_reset); 58 + __raw_i915_write32(i915, d->reg_set, i915->uncore.fw_reset); 59 59 } 60 60 61 61 static inline void ··· 69 69 } 70 70 71 71 static inline void 72 - fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d) 72 + fw_domain_wait_ack_clear(const struct drm_i915_private *i915, 73 + const struct intel_uncore_forcewake_domain *d) 73 74 { 74 - if (wait_for_atomic((__raw_i915_read32(d->i915, d->reg_ack) & 75 + if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & 75 76 FORCEWAKE_KERNEL) == 0, 76 77 FORCEWAKE_ACK_TIMEOUT_MS)) 77 78 DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n", ··· 80 79 } 81 80 82 81 static inline void 83 - fw_domain_get(const struct intel_uncore_forcewake_domain *d) 82 + fw_domain_get(struct drm_i915_private *i915, 83 + const struct intel_uncore_forcewake_domain *d) 84 84 { 85 - __raw_i915_write32(d->i915, d->reg_set, d->val_set); 85 + __raw_i915_write32(i915, d->reg_set, i915->uncore.fw_set); 86 86 } 87 87 88 88 static inline void 89 - fw_domain_wait_ack(const struct intel_uncore_forcewake_domain *d) 89 + fw_domain_wait_ack(const struct drm_i915_private *i915, 90 + const struct intel_uncore_forcewake_domain *d) 90 91 { 91 - if (wait_for_atomic((__raw_i915_read32(d->i915, d->reg_ack) & 92 + if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & 92 93 FORCEWAKE_KERNEL), 93 94 FORCEWAKE_ACK_TIMEOUT_MS)) 94 95 DRM_ERROR("%s: timed out waiting for forcewake ack request.\n", ··· 98 95 } 99 96 100 97 static inline void 101 - fw_domain_put(const struct intel_uncore_forcewake_domain *d) 98 + fw_domain_put(const struct drm_i915_private *i915, 99 + const struct intel_uncore_forcewake_domain *d) 102 100 { 103 - __raw_i915_write32(d->i915, d->reg_set, d->val_clear); 104 - } 105 - 106 - static inline void 107 - fw_domain_posting_read(const struct intel_uncore_forcewake_domain *d) 108 - { 109 - /* something from same cacheline, but not from the set register */ 110 - if (i915_mmio_reg_valid(d->reg_post)) 111 - __raw_posting_read(d->i915, d->reg_post); 101 + __raw_i915_write32(i915, d->reg_set, i915->uncore.fw_clear); 112 102 } 113 103 114 104 static void 115 - fw_domains_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) 105 + fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains) 116 106 { 117 107 struct intel_uncore_forcewake_domain *d; 108 + unsigned int tmp; 118 109 119 - for_each_fw_domain_masked(d, fw_domains, dev_priv) { 120 - fw_domain_wait_ack_clear(d); 121 - fw_domain_get(d); 110 + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); 111 + 112 + for_each_fw_domain_masked(d, fw_domains, i915, tmp) { 113 + fw_domain_wait_ack_clear(i915, d); 114 + fw_domain_get(i915, d); 122 115 } 123 116 124 - for_each_fw_domain_masked(d, fw_domains, dev_priv) 125 - fw_domain_wait_ack(d); 117 + for_each_fw_domain_masked(d, fw_domains, i915, tmp) 118 + fw_domain_wait_ack(i915, d); 126 119 127 - dev_priv->uncore.fw_domains_active |= fw_domains; 120 + i915->uncore.fw_domains_active |= fw_domains; 128 121 } 129 122 130 123 static void 131 - fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) 124 + fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains) 132 125 { 133 126 struct intel_uncore_forcewake_domain *d; 127 + unsigned int tmp; 134 128 135 - for_each_fw_domain_masked(d, fw_domains, dev_priv) { 136 - fw_domain_put(d); 137 - fw_domain_posting_read(d); 138 - } 129 + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); 139 130 140 - dev_priv->uncore.fw_domains_active &= ~fw_domains; 131 + for_each_fw_domain_masked(d, fw_domains, i915, tmp) 132 + fw_domain_put(i915, d); 133 + 134 + i915->uncore.fw_domains_active &= ~fw_domains; 141 135 } 142 136 143 137 static void 144 - fw_domains_posting_read(struct drm_i915_private *dev_priv) 138 + fw_domains_reset(struct drm_i915_private *i915, 139 + enum forcewake_domains fw_domains) 145 140 { 146 141 struct intel_uncore_forcewake_domain *d; 142 + unsigned int tmp; 147 143 148 - /* No need to do for all, just do for first found */ 149 - for_each_fw_domain(d, dev_priv) { 150 - fw_domain_posting_read(d); 151 - break; 152 - } 153 - } 154 - 155 - static void 156 - fw_domains_reset(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) 157 - { 158 - struct intel_uncore_forcewake_domain *d; 159 - 160 - if (dev_priv->uncore.fw_domains == 0) 144 + if (!fw_domains) 161 145 return; 162 146 163 - for_each_fw_domain_masked(d, fw_domains, dev_priv) 164 - fw_domain_reset(d); 147 + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); 165 148 166 - fw_domains_posting_read(dev_priv); 149 + for_each_fw_domain_masked(d, fw_domains, i915, tmp) 150 + fw_domain_reset(i915, d); 167 151 } 168 152 169 153 static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv) ··· 226 236 { 227 237 struct intel_uncore_forcewake_domain *domain = 228 238 container_of(timer, struct intel_uncore_forcewake_domain, timer); 229 - struct drm_i915_private *dev_priv = domain->i915; 239 + struct drm_i915_private *dev_priv = 240 + container_of(domain, struct drm_i915_private, uncore.fw_domain[domain->id]); 230 241 unsigned long irqflags; 231 242 232 243 assert_rpm_device_not_suspended(dev_priv); ··· 257 266 * timers are run before holding. 258 267 */ 259 268 while (1) { 269 + unsigned int tmp; 270 + 260 271 active_domains = 0; 261 272 262 - for_each_fw_domain(domain, dev_priv) { 273 + for_each_fw_domain(domain, dev_priv, tmp) { 263 274 if (hrtimer_cancel(&domain->timer) == 0) 264 275 continue; 265 276 ··· 270 277 271 278 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); 272 279 273 - for_each_fw_domain(domain, dev_priv) { 280 + for_each_fw_domain(domain, dev_priv, tmp) { 274 281 if (hrtimer_active(&domain->timer)) 275 282 active_domains |= domain->mask; 276 283 } ··· 293 300 if (fw) 294 301 dev_priv->uncore.funcs.force_wake_put(dev_priv, fw); 295 302 296 - fw_domains_reset(dev_priv, FORCEWAKE_ALL); 303 + fw_domains_reset(dev_priv, dev_priv->uncore.fw_domains); 297 304 298 305 if (restore) { /* If reset with a user forcewake, try to restore */ 299 306 if (fw) ··· 450 457 enum forcewake_domains fw_domains) 451 458 { 452 459 struct intel_uncore_forcewake_domain *domain; 460 + unsigned int tmp; 453 461 454 462 fw_domains &= dev_priv->uncore.fw_domains; 455 463 456 - for_each_fw_domain_masked(domain, fw_domains, dev_priv) { 464 + for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) 457 465 if (domain->wake_count++) 458 466 fw_domains &= ~domain->mask; 459 - } 460 467 461 468 if (fw_domains) 462 469 dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains); ··· 513 520 enum forcewake_domains fw_domains) 514 521 { 515 522 struct intel_uncore_forcewake_domain *domain; 523 + unsigned int tmp; 516 524 517 525 fw_domains &= dev_priv->uncore.fw_domains; 518 526 519 - for_each_fw_domain_masked(domain, fw_domains, dev_priv) { 527 + for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) { 520 528 if (WARN_ON(domain->wake_count == 0)) 521 529 continue; 522 530 ··· 922 928 enum forcewake_domains fw_domains) 923 929 { 924 930 struct intel_uncore_forcewake_domain *domain; 931 + unsigned int tmp; 925 932 926 - for_each_fw_domain_masked(domain, fw_domains, dev_priv) 933 + GEM_BUG_ON(fw_domains & ~dev_priv->uncore.fw_domains); 934 + 935 + for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) 927 936 fw_domain_arm_timer(domain); 928 937 929 938 dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains); ··· 1138 1141 1139 1142 WARN_ON(d->wake_count); 1140 1143 1144 + WARN_ON(!i915_mmio_reg_valid(reg_set)); 1145 + WARN_ON(!i915_mmio_reg_valid(reg_ack)); 1146 + 1141 1147 d->wake_count = 0; 1142 1148 d->reg_set = reg_set; 1143 1149 d->reg_ack = reg_ack; 1144 1150 1145 - if (IS_GEN6(dev_priv)) { 1146 - d->val_reset = 0; 1147 - d->val_set = FORCEWAKE_KERNEL; 1148 - d->val_clear = 0; 1149 - } else { 1150 - /* WaRsClearFWBitsAtReset:bdw,skl */ 1151 - d->val_reset = _MASKED_BIT_DISABLE(0xffff); 1152 - d->val_set = _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL); 1153 - d->val_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL); 1154 - } 1155 - 1156 - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) 1157 - d->reg_post = FORCEWAKE_ACK_VLV; 1158 - else if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv) || IS_GEN8(dev_priv)) 1159 - d->reg_post = ECOBUS; 1160 - 1161 - d->i915 = dev_priv; 1162 1151 d->id = domain_id; 1163 1152 1164 1153 BUILD_BUG_ON(FORCEWAKE_RENDER != (1 << FW_DOMAIN_ID_RENDER)); 1165 1154 BUILD_BUG_ON(FORCEWAKE_BLITTER != (1 << FW_DOMAIN_ID_BLITTER)); 1166 1155 BUILD_BUG_ON(FORCEWAKE_MEDIA != (1 << FW_DOMAIN_ID_MEDIA)); 1167 1156 1168 - d->mask = 1 << domain_id; 1157 + d->mask = BIT(domain_id); 1169 1158 1170 1159 hrtimer_init(&d->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1171 1160 d->timer.function = intel_uncore_fw_release_timer; 1172 1161 1173 - dev_priv->uncore.fw_domains |= (1 << domain_id); 1162 + dev_priv->uncore.fw_domains |= BIT(domain_id); 1174 1163 1175 - fw_domain_reset(d); 1164 + fw_domain_reset(dev_priv, d); 1176 1165 } 1177 1166 1178 1167 static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) 1179 1168 { 1180 1169 if (INTEL_GEN(dev_priv) <= 5 || intel_vgpu_active(dev_priv)) 1181 1170 return; 1171 + 1172 + if (IS_GEN6(dev_priv)) { 1173 + dev_priv->uncore.fw_reset = 0; 1174 + dev_priv->uncore.fw_set = FORCEWAKE_KERNEL; 1175 + dev_priv->uncore.fw_clear = 0; 1176 + } else { 1177 + /* WaRsClearFWBitsAtReset:bdw,skl */ 1178 + dev_priv->uncore.fw_reset = _MASKED_BIT_DISABLE(0xffff); 1179 + dev_priv->uncore.fw_set = _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL); 1180 + dev_priv->uncore.fw_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL); 1181 + } 1182 1182 1183 1183 if (IS_GEN9(dev_priv)) { 1184 1184 dev_priv->uncore.funcs.force_wake_get = fw_domains_get; ··· 1240 1246 FORCEWAKE_MT, FORCEWAKE_MT_ACK); 1241 1247 1242 1248 spin_lock_irq(&dev_priv->uncore.lock); 1243 - fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_ALL); 1249 + fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_RENDER); 1244 1250 ecobus = __raw_i915_read32(dev_priv, ECOBUS); 1245 - fw_domains_put_with_fifo(dev_priv, FORCEWAKE_ALL); 1251 + fw_domains_put_with_fifo(dev_priv, FORCEWAKE_RENDER); 1246 1252 spin_unlock_irq(&dev_priv->uncore.lock); 1247 1253 1248 1254 if (!(ecobus & FORCEWAKE_MT_ENABLE)) {
+3 -3
drivers/gpu/drm/i915/selftests/i915_gem_request.c
··· 291 291 return err; 292 292 } 293 293 294 - i915_gem_retire_requests(i915); 295 - 296 294 i915->gpu_error.missed_irq_rings = 0; 297 295 t->reset_count = i915_reset_count(&i915->gpu_error); 298 296 ··· 301 303 { 302 304 struct drm_i915_private *i915 = t->i915; 303 305 304 - if (wait_for(intel_engines_are_idle(i915), 1)) { 306 + i915_gem_retire_requests(i915); 307 + 308 + if (wait_for(intel_engines_are_idle(i915), 10)) { 305 309 pr_err("%s(%s): GPU not idle\n", t->func, t->name); 306 310 return -EIO; 307 311 }
-1
drivers/gpu/drm/i915/selftests/intel_hangcheck.c
··· 235 235 i915_gem_object_put(h->hws); 236 236 237 237 i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED); 238 - i915_gem_retire_requests(h->i915); 239 238 } 240 239 241 240 static int igt_hang_sanitycheck(void *arg)
-1
drivers/gpu/drm/i915/selftests/mock_engine.c
··· 118 118 ring->vaddr = (void *)(ring + 1); 119 119 120 120 INIT_LIST_HEAD(&ring->request_list); 121 - ring->last_retired_head = -1; 122 121 intel_ring_update_space(ring); 123 122 124 123 return ring;
+10 -1
drivers/gpu/drm/i915/selftests/scatterlist.c
··· 189 189 return 1 + (prandom_u32_state(rnd) % 1024); 190 190 } 191 191 192 + static inline bool page_contiguous(struct page *first, 193 + struct page *last, 194 + unsigned long npages) 195 + { 196 + return first + npages == last; 197 + } 198 + 192 199 static int alloc_table(struct pfn_table *pt, 193 200 unsigned long count, unsigned long max, 194 201 npages_fn_t npages_fn, ··· 223 216 unsigned long npages = npages_fn(n, count, rnd); 224 217 225 218 /* Nobody expects the Sparse Memmap! */ 226 - if (pfn_to_page(pfn + npages) != pfn_to_page(pfn) + npages) { 219 + if (!page_contiguous(pfn_to_page(pfn), 220 + pfn_to_page(pfn + npages), 221 + npages)) { 227 222 sg_free_table(&pt->st); 228 223 return -ENOSPC; 229 224 }