Merge tag 'drm-intel-next-2018-05-14' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

+7 -3

drivers/gpu/drm/drm_atomic.c

··· 1716 1716 } 1717 1717 } 1718 1718 1719 - if (config->funcs->atomic_check) 1719 + if (config->funcs->atomic_check) { 1720 1720 ret = config->funcs->atomic_check(state->dev, state); 1721 1721 1722 - if (ret) 1723 - return ret; 1722 + if (ret) { 1723 + DRM_DEBUG_ATOMIC("atomic driver check for %p failed: %d\n", 1724 + state, ret); 1725 + return ret; 1726 + } 1727 + } 1724 1728 1725 1729 if (!state->allow_modeset) { 1726 1730 for_each_new_crtc_in_state(state, crtc, crtc_state, i) {

+7 -2

drivers/gpu/drm/i915/Makefile

··· 18 18 subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) 19 19 subdir-ccflags-y += $(call cc-disable-warning, implicit-fallthrough) 20 20 subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) 21 + # clang warnings 22 + subdir-ccflags-y += $(call cc-disable-warning, sign-compare) 23 + subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) 24 + subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) 21 25 subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror 22 26 23 27 # Fine grained warnings disable ··· 71 67 i915_gem_shrinker.o \ 72 68 i915_gem_stolen.o \ 73 69 i915_gem_tiling.o \ 74 - i915_gem_timeline.o \ 75 70 i915_gem_userptr.o \ 76 71 i915_gemfs.o \ 77 72 i915_query.o \ 78 73 i915_request.o \ 74 + i915_timeline.o \ 79 75 i915_trace_points.o \ 80 76 i915_vma.o \ 81 77 intel_breadcrumbs.o \ ··· 158 154 i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o 159 155 i915-$(CONFIG_DRM_I915_SELFTEST) += \ 160 156 selftests/i915_random.o \ 161 - selftests/i915_selftest.o 157 + selftests/i915_selftest.o \ 158 + selftests/igt_flush_test.o 162 159 163 160 # virtual gpu code 164 161 i915-y += i915_vgpu.o

+63 -18

drivers/gpu/drm/i915/gvt/cmd_parser.c

··· 813 813 } 814 814 815 815 static int force_nonpriv_reg_handler(struct parser_exec_state *s, 816 - unsigned int offset, unsigned int index) 816 + unsigned int offset, unsigned int index, char *cmd) 817 817 { 818 818 struct intel_gvt *gvt = s->vgpu->gvt; 819 - unsigned int data = cmd_val(s, index + 1); 819 + unsigned int data; 820 + u32 ring_base; 821 + u32 nopid; 822 + struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; 820 823 821 - if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data)) { 824 + if (!strcmp(cmd, "lri")) 825 + data = cmd_val(s, index + 1); 826 + else { 827 + gvt_err("Unexpected forcenonpriv 0x%x write from cmd %s\n", 828 + offset, cmd); 829 + return -EINVAL; 830 + } 831 + 832 + ring_base = dev_priv->engine[s->ring_id]->mmio_base; 833 + nopid = i915_mmio_reg_offset(RING_NOPID(ring_base)); 834 + 835 + if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data) && 836 + data != nopid) { 822 837 gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n", 823 838 offset, data); 824 - return -EPERM; 839 + patch_value(s, cmd_ptr(s, index), nopid); 840 + return 0; 825 841 } 826 842 return 0; 827 843 } ··· 885 869 return -EINVAL; 886 870 887 871 if (is_force_nonpriv_mmio(offset) && 888 - force_nonpriv_reg_handler(s, offset, index)) 872 + force_nonpriv_reg_handler(s, offset, index, cmd)) 889 873 return -EPERM; 890 874 891 875 if (offset == i915_mmio_reg_offset(DERRMR) || ··· 1620 1604 if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) 1621 1605 || IS_KABYLAKE(gvt->dev_priv)) { 1622 1606 /* BDW decides privilege based on address space */ 1623 - if (cmd_val(s, 0) & (1 << 8)) 1607 + if (cmd_val(s, 0) & (1 << 8) && 1608 + !(s->vgpu->scan_nonprivbb & (1 << s->ring_id))) 1624 1609 return 0; 1625 1610 } 1626 1611 return 1; ··· 1635 1618 bool bb_end = false; 1636 1619 struct intel_vgpu *vgpu = s->vgpu; 1637 1620 u32 cmd; 1621 + struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ? 1622 + s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm; 1638 1623 1639 1624 *bb_size = 0; 1640 1625 ··· 1648 1629 cmd = cmd_val(s, 0); 1649 1630 info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); 1650 1631 if (info == NULL) { 1651 - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", 1652 - cmd, get_opcode(cmd, s->ring_id)); 1632 + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", 1633 + cmd, get_opcode(cmd, s->ring_id), 1634 + (s->buf_addr_type == PPGTT_BUFFER) ? 1635 + "ppgtt" : "ggtt", s->ring_id, s->workload); 1653 1636 return -EBADRQC; 1654 1637 } 1655 1638 do { 1656 - if (copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, 1639 + if (copy_gma_to_hva(s->vgpu, mm, 1657 1640 gma, gma + 4, &cmd) < 0) 1658 1641 return -EFAULT; 1659 1642 info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); 1660 1643 if (info == NULL) { 1661 - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", 1662 - cmd, get_opcode(cmd, s->ring_id)); 1644 + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", 1645 + cmd, get_opcode(cmd, s->ring_id), 1646 + (s->buf_addr_type == PPGTT_BUFFER) ? 1647 + "ppgtt" : "ggtt", s->ring_id, s->workload); 1663 1648 return -EBADRQC; 1664 1649 } 1665 1650 ··· 1689 1666 unsigned long gma = 0; 1690 1667 unsigned long bb_size; 1691 1668 int ret = 0; 1669 + struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ? 1670 + s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm; 1671 + unsigned long gma_start_offset = 0; 1692 1672 1693 1673 /* get the start gm address of the batch buffer */ 1694 1674 gma = get_gma_bb_from_cmd(s, 1); ··· 1706 1680 if (!bb) 1707 1681 return -ENOMEM; 1708 1682 1683 + bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true; 1684 + 1685 + /* the gma_start_offset stores the batch buffer's start gma's 1686 + * offset relative to page boundary. so for non-privileged batch 1687 + * buffer, the shadowed gem object holds exactly the same page 1688 + * layout as original gem object. This is for the convience of 1689 + * replacing the whole non-privilged batch buffer page to this 1690 + * shadowed one in PPGTT at the same gma address. (this replacing 1691 + * action is not implemented yet now, but may be necessary in 1692 + * future). 1693 + * for prileged batch buffer, we just change start gma address to 1694 + * that of shadowed page. 1695 + */ 1696 + if (bb->ppgtt) 1697 + gma_start_offset = gma & ~I915_GTT_PAGE_MASK; 1698 + 1709 1699 bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv, 1710 - roundup(bb_size, PAGE_SIZE)); 1700 + roundup(bb_size + gma_start_offset, PAGE_SIZE)); 1711 1701 if (IS_ERR(bb->obj)) { 1712 1702 ret = PTR_ERR(bb->obj); 1713 1703 goto err_free_bb; ··· 1744 1702 bb->clflush &= ~CLFLUSH_BEFORE; 1745 1703 } 1746 1704 1747 - ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, 1705 + ret = copy_gma_to_hva(s->vgpu, mm, 1748 1706 gma, gma + bb_size, 1749 - bb->va); 1707 + bb->va + gma_start_offset); 1750 1708 if (ret < 0) { 1751 1709 gvt_vgpu_err("fail to copy guest ring buffer\n"); 1752 1710 ret = -EFAULT; ··· 1772 1730 * buffer's gma in pair. After all, we don't want to pin the shadow 1773 1731 * buffer here (too early). 1774 1732 */ 1775 - s->ip_va = bb->va; 1733 + s->ip_va = bb->va + gma_start_offset; 1776 1734 s->ip_gma = gma; 1777 1735 return 0; 1778 1736 err_unmap: ··· 2511 2469 2512 2470 info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); 2513 2471 if (info == NULL) { 2514 - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", 2515 - cmd, get_opcode(cmd, s->ring_id)); 2472 + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", 2473 + cmd, get_opcode(cmd, s->ring_id), 2474 + (s->buf_addr_type == PPGTT_BUFFER) ? 2475 + "ppgtt" : "ggtt", s->ring_id, s->workload); 2516 2476 return -EBADRQC; 2517 2477 } 2518 2478 2519 2479 s->info = info; 2520 2480 2521 2481 trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va, 2522 - cmd_length(s), s->buf_type); 2482 + cmd_length(s), s->buf_type, s->buf_addr_type, 2483 + s->workload, info->name); 2523 2484 2524 2485 if (info->handler) { 2525 2486 ret = info->handler(s);

+67

drivers/gpu/drm/i915/gvt/debugfs.c

··· 124 124 } 125 125 DEFINE_SHOW_ATTRIBUTE(vgpu_mmio_diff); 126 126 127 + static int 128 + vgpu_scan_nonprivbb_get(void *data, u64 *val) 129 + { 130 + struct intel_vgpu *vgpu = (struct intel_vgpu *)data; 131 + *val = vgpu->scan_nonprivbb; 132 + return 0; 133 + } 134 + 135 + /* 136 + * set/unset bit engine_id of vgpu->scan_nonprivbb to turn on/off scanning 137 + * of non-privileged batch buffer. e.g. 138 + * if vgpu->scan_nonprivbb=3, then it will scan non-privileged batch buffer 139 + * on engine 0 and 1. 140 + */ 141 + static int 142 + vgpu_scan_nonprivbb_set(void *data, u64 val) 143 + { 144 + struct intel_vgpu *vgpu = (struct intel_vgpu *)data; 145 + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; 146 + enum intel_engine_id id; 147 + char buf[128], *s; 148 + int len; 149 + 150 + val &= (1 << I915_NUM_ENGINES) - 1; 151 + 152 + if (vgpu->scan_nonprivbb == val) 153 + return 0; 154 + 155 + if (!val) 156 + goto done; 157 + 158 + len = sprintf(buf, 159 + "gvt: vgpu %d turns on non-privileged batch buffers scanning on Engines:", 160 + vgpu->id); 161 + 162 + s = buf + len; 163 + 164 + for (id = 0; id < I915_NUM_ENGINES; id++) { 165 + struct intel_engine_cs *engine; 166 + 167 + engine = dev_priv->engine[id]; 168 + if (engine && (val & (1 << id))) { 169 + len = snprintf(s, 4, "%d, ", engine->id); 170 + s += len; 171 + } else 172 + val &= ~(1 << id); 173 + } 174 + 175 + if (val) 176 + sprintf(s, "low performance expected."); 177 + 178 + pr_warn("%s\n", buf); 179 + 180 + done: 181 + vgpu->scan_nonprivbb = val; 182 + return 0; 183 + } 184 + 185 + DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops, 186 + vgpu_scan_nonprivbb_get, vgpu_scan_nonprivbb_set, 187 + "0x%llx\n"); 188 + 127 189 /** 128 190 * intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU 129 191 * @vgpu: a vGPU ··· 210 148 211 149 ent = debugfs_create_file("mmio_diff", 0444, vgpu->debugfs, 212 150 vgpu, &vgpu_mmio_diff_fops); 151 + if (!ent) 152 + return -ENOMEM; 153 + 154 + ent = debugfs_create_file("scan_nonprivbb", 0644, vgpu->debugfs, 155 + vgpu, &vgpu_scan_nonprivbb_fops); 213 156 if (!ent) 214 157 return -ENOMEM; 215 158

+1 -1

drivers/gpu/drm/i915/gvt/gvt.h

··· 99 99 struct intel_vgpu_mmio { 100 100 void *vreg; 101 101 void *sreg; 102 - bool disable_warn_untrack; 103 102 }; 104 103 105 104 #define INTEL_GVT_MAX_BAR_NUM 4 ··· 225 226 226 227 struct completion vblank_done; 227 228 229 + u32 scan_nonprivbb; 228 230 }; 229 231 230 232 /* validating GM healthy status*/

+18 -17

drivers/gpu/drm/i915/gvt/handlers.c

··· 191 191 unsigned int max_fence = vgpu_fence_sz(vgpu); 192 192 193 193 if (fence_num >= max_fence) { 194 + gvt_vgpu_err("access oob fence reg %d/%d\n", 195 + fence_num, max_fence); 194 196 195 197 /* When guest access oob fence regs without access 196 198 * pv_info first, we treat guest not supporting GVT, ··· 202 200 enter_failsafe_mode(vgpu, 203 201 GVT_FAILSAFE_UNSUPPORTED_GUEST); 204 202 205 - if (!vgpu->mmio.disable_warn_untrack) { 206 - gvt_vgpu_err("found oob fence register access\n"); 207 - gvt_vgpu_err("total fence %d, access fence %d\n", 208 - max_fence, fence_num); 209 - } 210 203 memset(p_data, 0, bytes); 211 204 return -EINVAL; 212 205 } ··· 474 477 unsigned int offset, void *p_data, unsigned int bytes) 475 478 { 476 479 u32 reg_nonpriv = *(u32 *)p_data; 480 + int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); 481 + u32 ring_base; 482 + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; 477 483 int ret = -EINVAL; 478 484 479 - if ((bytes != 4) || ((offset & (bytes - 1)) != 0)) { 480 - gvt_err("vgpu(%d) Invalid FORCE_NONPRIV offset %x(%dB)\n", 481 - vgpu->id, offset, bytes); 485 + if ((bytes != 4) || ((offset & (bytes - 1)) != 0) || ring_id < 0) { 486 + gvt_err("vgpu(%d) ring %d Invalid FORCE_NONPRIV offset %x(%dB)\n", 487 + vgpu->id, ring_id, offset, bytes); 482 488 return ret; 483 489 } 484 490 485 - if (in_whitelist(reg_nonpriv)) { 491 + ring_base = dev_priv->engine[ring_id]->mmio_base; 492 + 493 + if (in_whitelist(reg_nonpriv) || 494 + reg_nonpriv == i915_mmio_reg_offset(RING_NOPID(ring_base))) { 486 495 ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data, 487 496 bytes); 488 - } else { 489 - gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x\n", 490 - vgpu->id, reg_nonpriv); 491 - } 492 - return ret; 497 + } else 498 + gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x at offset %x\n", 499 + vgpu->id, reg_nonpriv, offset); 500 + 501 + return 0; 493 502 } 494 503 495 504 static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, ··· 3095 3092 */ 3096 3093 mmio_info = find_mmio_info(gvt, offset); 3097 3094 if (!mmio_info) { 3098 - if (!vgpu->mmio.disable_warn_untrack) 3099 - gvt_vgpu_err("untracked MMIO %08x len %d\n", 3100 - offset, bytes); 3095 + gvt_dbg_mmio("untracked MMIO %08x len %d\n", offset, bytes); 3101 3096 goto default_rw; 3102 3097 } 3103 3098

-2

drivers/gpu/drm/i915/gvt/mmio.c

··· 244 244 245 245 /* set the bit 0:2(Core C-State ) to C0 */ 246 246 vgpu_vreg_t(vgpu, GEN6_GT_CORE_STATUS) = 0; 247 - 248 - vgpu->mmio.disable_warn_untrack = false; 249 247 } else { 250 248 #define GVT_GEN8_MMIO_RESET_OFFSET (0x44200) 251 249 /* only reset the engine related, so starting with 0x44200

+1 -1

drivers/gpu/drm/i915/gvt/mmio_context.c

··· 448 448 449 449 bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id) 450 450 { 451 - u32 *reg_state = ctx->engine[ring_id].lrc_reg_state; 451 + u32 *reg_state = ctx->__engine[ring_id].lrc_reg_state; 452 452 u32 inhibit_mask = 453 453 _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 454 454

+18 -13

drivers/gpu/drm/i915/gvt/sched_policy.c

··· 53 53 bool active; 54 54 55 55 ktime_t sched_in_time; 56 - ktime_t sched_out_time; 57 56 ktime_t sched_time; 58 57 ktime_t left_ts; 59 58 ktime_t allocated_ts; ··· 65 66 struct hrtimer timer; 66 67 unsigned long period; 67 68 struct list_head lru_runq_head; 69 + ktime_t expire_time; 68 70 }; 69 71 70 - static void vgpu_update_timeslice(struct intel_vgpu *pre_vgpu) 72 + static void vgpu_update_timeslice(struct intel_vgpu *vgpu, ktime_t cur_time) 71 73 { 72 74 ktime_t delta_ts; 73 - struct vgpu_sched_data *vgpu_data = pre_vgpu->sched_data; 75 + struct vgpu_sched_data *vgpu_data; 74 76 75 - delta_ts = vgpu_data->sched_out_time - vgpu_data->sched_in_time; 77 + if (!vgpu || vgpu == vgpu->gvt->idle_vgpu) 78 + return; 76 79 77 - vgpu_data->sched_time += delta_ts; 78 - vgpu_data->left_ts -= delta_ts; 80 + vgpu_data = vgpu->sched_data; 81 + delta_ts = ktime_sub(cur_time, vgpu_data->sched_in_time); 82 + vgpu_data->sched_time = ktime_add(vgpu_data->sched_time, delta_ts); 83 + vgpu_data->left_ts = ktime_sub(vgpu_data->left_ts, delta_ts); 84 + vgpu_data->sched_in_time = cur_time; 79 85 } 80 86 81 87 #define GVT_TS_BALANCE_PERIOD_MS 100 ··· 154 150 } 155 151 156 152 cur_time = ktime_get(); 157 - if (scheduler->current_vgpu) { 158 - vgpu_data = scheduler->current_vgpu->sched_data; 159 - vgpu_data->sched_out_time = cur_time; 160 - vgpu_update_timeslice(scheduler->current_vgpu); 161 - } 153 + vgpu_update_timeslice(scheduler->current_vgpu, cur_time); 162 154 vgpu_data = scheduler->next_vgpu->sched_data; 163 155 vgpu_data->sched_in_time = cur_time; 164 156 ··· 226 226 void intel_gvt_schedule(struct intel_gvt *gvt) 227 227 { 228 228 struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; 229 - static uint64_t timer_check; 229 + ktime_t cur_time; 230 230 231 231 mutex_lock(&gvt->lock); 232 + cur_time = ktime_get(); 232 233 233 234 if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED, 234 235 (void *)&gvt->service_request)) { 235 - if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS)) 236 + if (cur_time >= sched_data->expire_time) { 236 237 gvt_balance_timeslice(sched_data); 238 + sched_data->expire_time = ktime_add_ms( 239 + cur_time, GVT_TS_BALANCE_PERIOD_MS); 240 + } 237 241 } 238 242 clear_bit(INTEL_GVT_REQUEST_EVENT_SCHED, (void *)&gvt->service_request); 239 243 244 + vgpu_update_timeslice(gvt->scheduler.current_vgpu, cur_time); 240 245 tbs_sched_func(sched_data); 241 246 242 247 mutex_unlock(&gvt->lock);

+56 -37

drivers/gpu/drm/i915/gvt/scheduler.c

··· 58 58 int ring_id = workload->ring_id; 59 59 struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; 60 60 struct drm_i915_gem_object *ctx_obj = 61 - shadow_ctx->engine[ring_id].state->obj; 61 + shadow_ctx->__engine[ring_id].state->obj; 62 62 struct execlist_ring_context *shadow_ring_context; 63 63 struct page *page; 64 64 ··· 97 97 i915_mmio_reg_offset(EU_PERF_CNTL6), 98 98 }; 99 99 100 - if (!workload || !reg_state || workload->ring_id != RCS) 100 + if (workload->ring_id != RCS) 101 101 return; 102 102 103 103 if (save) { ··· 130 130 int ring_id = workload->ring_id; 131 131 struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; 132 132 struct drm_i915_gem_object *ctx_obj = 133 - shadow_ctx->engine[ring_id].state->obj; 133 + shadow_ctx->__engine[ring_id].state->obj; 134 134 struct execlist_ring_context *shadow_ring_context; 135 135 struct page *page; 136 136 void *dst; ··· 283 283 static void shadow_context_descriptor_update(struct i915_gem_context *ctx, 284 284 struct intel_engine_cs *engine) 285 285 { 286 - struct intel_context *ce = &ctx->engine[engine->id]; 286 + struct intel_context *ce = to_intel_context(ctx, engine); 287 287 u64 desc = 0; 288 288 289 289 desc = ce->lrc_desc; ··· 389 389 * shadow_ctx pages invalid. So gvt need to pin itself. After update 390 390 * the guest context, gvt can unpin the shadow_ctx safely. 391 391 */ 392 - ring = engine->context_pin(engine, shadow_ctx); 392 + ring = intel_context_pin(shadow_ctx, engine); 393 393 if (IS_ERR(ring)) { 394 394 ret = PTR_ERR(ring); 395 395 gvt_vgpu_err("fail to pin shadow context\n"); ··· 403 403 return 0; 404 404 405 405 err_unpin: 406 - engine->context_unpin(engine, shadow_ctx); 406 + intel_context_unpin(shadow_ctx, engine); 407 407 err_shadow: 408 408 release_shadow_wa_ctx(&workload->wa_ctx); 409 409 err_scan: ··· 437 437 return 0; 438 438 439 439 err_unpin: 440 - engine->context_unpin(engine, shadow_ctx); 440 + intel_context_unpin(shadow_ctx, engine); 441 441 release_shadow_wa_ctx(&workload->wa_ctx); 442 442 return ret; 443 443 } ··· 452 452 int ret; 453 453 454 454 list_for_each_entry(bb, &workload->shadow_bb, list) { 455 - bb->vma = i915_gem_object_ggtt_pin(bb->obj, NULL, 0, 0, 0); 456 - if (IS_ERR(bb->vma)) { 457 - ret = PTR_ERR(bb->vma); 458 - goto err; 459 - } 460 - 461 455 /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va 462 456 * is only updated into ring_scan_buffer, not real ring address 463 457 * allocated in later copy_workload_to_ring_buffer. pls be noted ··· 463 469 bb->bb_start_cmd_va = workload->shadow_ring_buffer_va 464 470 + bb->bb_offset; 465 471 466 - /* relocate shadow batch buffer */ 467 - bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); 468 - if (gmadr_bytes == 8) 469 - bb->bb_start_cmd_va[2] = 0; 472 + if (bb->ppgtt) { 473 + /* for non-priv bb, scan&shadow is only for 474 + * debugging purpose, so the content of shadow bb 475 + * is the same as original bb. Therefore, 476 + * here, rather than switch to shadow bb's gma 477 + * address, we directly use original batch buffer's 478 + * gma address, and send original bb to hardware 479 + * directly 480 + */ 481 + if (bb->clflush & CLFLUSH_AFTER) { 482 + drm_clflush_virt_range(bb->va, 483 + bb->obj->base.size); 484 + bb->clflush &= ~CLFLUSH_AFTER; 485 + } 486 + i915_gem_obj_finish_shmem_access(bb->obj); 487 + bb->accessing = false; 470 488 471 - /* No one is going to touch shadow bb from now on. */ 472 - if (bb->clflush & CLFLUSH_AFTER) { 473 - drm_clflush_virt_range(bb->va, bb->obj->base.size); 474 - bb->clflush &= ~CLFLUSH_AFTER; 489 + } else { 490 + bb->vma = i915_gem_object_ggtt_pin(bb->obj, 491 + NULL, 0, 0, 0); 492 + if (IS_ERR(bb->vma)) { 493 + ret = PTR_ERR(bb->vma); 494 + goto err; 495 + } 496 + 497 + /* relocate shadow batch buffer */ 498 + bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); 499 + if (gmadr_bytes == 8) 500 + bb->bb_start_cmd_va[2] = 0; 501 + 502 + /* No one is going to touch shadow bb from now on. */ 503 + if (bb->clflush & CLFLUSH_AFTER) { 504 + drm_clflush_virt_range(bb->va, 505 + bb->obj->base.size); 506 + bb->clflush &= ~CLFLUSH_AFTER; 507 + } 508 + 509 + ret = i915_gem_object_set_to_gtt_domain(bb->obj, 510 + false); 511 + if (ret) 512 + goto err; 513 + 514 + i915_gem_obj_finish_shmem_access(bb->obj); 515 + bb->accessing = false; 516 + 517 + i915_vma_move_to_active(bb->vma, workload->req, 0); 475 518 } 476 - 477 - ret = i915_gem_object_set_to_gtt_domain(bb->obj, false); 478 - if (ret) 479 - goto err; 480 - 481 - i915_gem_obj_finish_shmem_access(bb->obj); 482 - bb->accessing = false; 483 - 484 - i915_vma_move_to_active(bb->vma, workload->req, 0); 485 519 } 486 520 return 0; 487 521 err: ··· 526 504 struct intel_vgpu_submission *s = &workload->vgpu->submission; 527 505 struct i915_gem_context *shadow_ctx = s->shadow_ctx; 528 506 struct drm_i915_gem_object *ctx_obj = 529 - shadow_ctx->engine[ring_id].state->obj; 507 + shadow_ctx->__engine[ring_id].state->obj; 530 508 struct execlist_ring_context *shadow_ring_context; 531 509 struct page *page; 532 510 ··· 688 666 689 667 ret = prepare_workload(workload); 690 668 if (ret) { 691 - engine->context_unpin(engine, shadow_ctx); 669 + intel_context_unpin(shadow_ctx, engine); 692 670 goto out; 693 671 } 694 672 ··· 771 749 struct i915_gem_context *shadow_ctx = s->shadow_ctx; 772 750 int ring_id = workload->ring_id; 773 751 struct drm_i915_gem_object *ctx_obj = 774 - shadow_ctx->engine[ring_id].state->obj; 752 + shadow_ctx->__engine[ring_id].state->obj; 775 753 struct execlist_ring_context *shadow_ring_context; 776 754 struct page *page; 777 755 void *src; ··· 898 876 } 899 877 mutex_lock(&dev_priv->drm.struct_mutex); 900 878 /* unpin shadow ctx as the shadow_ctx update is done */ 901 - engine->context_unpin(engine, s->shadow_ctx); 879 + intel_context_unpin(s->shadow_ctx, engine); 902 880 mutex_unlock(&dev_priv->drm.struct_mutex); 903 881 } 904 882 ··· 1155 1133 &vgpu->gvt->dev_priv->drm); 1156 1134 if (IS_ERR(s->shadow_ctx)) 1157 1135 return PTR_ERR(s->shadow_ctx); 1158 - 1159 - if (HAS_LOGICAL_RING_PREEMPTION(vgpu->gvt->dev_priv)) 1160 - s->shadow_ctx->priority = INT_MAX; 1161 1136 1162 1137 bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); 1163 1138

+1

drivers/gpu/drm/i915/gvt/scheduler.h

··· 125 125 unsigned int clflush; 126 126 bool accessing; 127 127 unsigned long bb_offset; 128 + bool ppgtt; 128 129 }; 129 130 130 131 #define workload_q_head(vgpu, ring_id) \

+19 -5

drivers/gpu/drm/i915/gvt/trace.h

··· 224 224 TP_printk("%s", __entry->buf) 225 225 ); 226 226 227 + #define GVT_CMD_STR_LEN 40 227 228 TRACE_EVENT(gvt_command, 228 - TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len, 229 - u32 buf_type), 229 + TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, 230 + u32 cmd_len, u32 buf_type, u32 buf_addr_type, 231 + void *workload, char *cmd_name), 230 232 231 - TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type), 233 + TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type, 234 + buf_addr_type, workload, cmd_name), 232 235 233 236 TP_STRUCT__entry( 234 237 __field(u8, vgpu_id) 235 238 __field(u8, ring_id) 236 239 __field(u32, ip_gma) 237 240 __field(u32, buf_type) 241 + __field(u32, buf_addr_type) 238 242 __field(u32, cmd_len) 243 + __field(void*, workload) 239 244 __dynamic_array(u32, raw_cmd, cmd_len) 245 + __array(char, cmd_name, GVT_CMD_STR_LEN) 240 246 ), 241 247 242 248 TP_fast_assign( ··· 250 244 __entry->ring_id = ring_id; 251 245 __entry->ip_gma = ip_gma; 252 246 __entry->buf_type = buf_type; 247 + __entry->buf_addr_type = buf_addr_type; 253 248 __entry->cmd_len = cmd_len; 249 + __entry->workload = workload; 250 + snprintf(__entry->cmd_name, GVT_CMD_STR_LEN, "%s", cmd_name); 254 251 memcpy(__get_dynamic_array(raw_cmd), cmd_va, cmd_len * sizeof(*cmd_va)); 255 252 ), 256 253 257 254 258 - TP_printk("vgpu%d ring %d: buf_type %u, ip_gma %08x, raw cmd %s", 255 + TP_printk("vgpu%d ring %d: address_type %u, buf_type %u, ip_gma %08x,cmd (name=%s,len=%u,raw cmd=%s), workload=%p\n", 259 256 __entry->vgpu_id, 260 257 __entry->ring_id, 258 + __entry->buf_addr_type, 261 259 __entry->buf_type, 262 260 __entry->ip_gma, 263 - __print_array(__get_dynamic_array(raw_cmd), __entry->cmd_len, 4)) 261 + __entry->cmd_name, 262 + __entry->cmd_len, 263 + __print_array(__get_dynamic_array(raw_cmd), 264 + __entry->cmd_len, 4), 265 + __entry->workload) 264 266 ); 265 267 266 268 #define GVT_TEMP_STR_LEN 10

+108 -140

drivers/gpu/drm/i915/i915_debugfs.c

··· 377 377 print_file_stats(m, "[k]batch pool", stats); 378 378 } 379 379 380 - static int per_file_ctx_stats(int id, void *ptr, void *data) 380 + static int per_file_ctx_stats(int idx, void *ptr, void *data) 381 381 { 382 382 struct i915_gem_context *ctx = ptr; 383 - int n; 383 + struct intel_engine_cs *engine; 384 + enum intel_engine_id id; 384 385 385 - for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) { 386 - if (ctx->engine[n].state) 387 - per_file_stats(0, ctx->engine[n].state->obj, data); 388 - if (ctx->engine[n].ring) 389 - per_file_stats(0, ctx->engine[n].ring->vma->obj, data); 386 + for_each_engine(engine, ctx->i915, id) { 387 + struct intel_context *ce = to_intel_context(ctx, engine); 388 + 389 + if (ce->state) 390 + per_file_stats(0, ce->state->obj, data); 391 + if (ce->ring) 392 + per_file_stats(0, ce->ring->vma->obj, data); 390 393 } 391 394 392 395 return 0; ··· 1343 1340 struct rb_node *rb; 1344 1341 1345 1342 seq_printf(m, "%s:\n", engine->name); 1346 - seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n", 1343 + seq_printf(m, "\tseqno = %x [current %x, last %x]\n", 1347 1344 engine->hangcheck.seqno, seqno[id], 1348 - intel_engine_last_submit(engine), 1349 - engine->timeline->inflight_seqnos); 1345 + intel_engine_last_submit(engine)); 1350 1346 seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n", 1351 1347 yesno(intel_engine_has_waiter(engine)), 1352 1348 yesno(test_bit(engine->id, ··· 1962 1960 seq_putc(m, '\n'); 1963 1961 1964 1962 for_each_engine(engine, dev_priv, id) { 1965 - struct intel_context *ce = &ctx->engine[engine->id]; 1963 + struct intel_context *ce = 1964 + to_intel_context(ctx, engine); 1966 1965 1967 1966 seq_printf(m, "%s: ", engine->name); 1968 1967 if (ce->state) ··· 2606 2603 return "unknown"; 2607 2604 } 2608 2605 2606 + static const char *psr_sink_status(u8 val) 2607 + { 2608 + static const char * const sink_status[] = { 2609 + "inactive", 2610 + "transition to active, capture and display", 2611 + "active, display from RFB", 2612 + "active, capture and display on sink device timings", 2613 + "transition to inactive, capture and display, timing re-sync", 2614 + "reserved", 2615 + "reserved", 2616 + "sink internal error" 2617 + }; 2618 + 2619 + val &= DP_PSR_SINK_STATE_MASK; 2620 + if (val < ARRAY_SIZE(sink_status)) 2621 + return sink_status[val]; 2622 + 2623 + return "unknown"; 2624 + } 2625 + 2609 2626 static int i915_edp_psr_status(struct seq_file *m, void *data) 2610 2627 { 2611 2628 struct drm_i915_private *dev_priv = node_to_i915(m->private); ··· 2707 2684 seq_printf(m, "EDP_PSR2_STATUS: %x [%s]\n", 2708 2685 psr2, psr2_live_status(psr2)); 2709 2686 } 2687 + 2688 + if (dev_priv->psr.enabled) { 2689 + struct drm_dp_aux *aux = &dev_priv->psr.enabled->aux; 2690 + u8 val; 2691 + 2692 + if (drm_dp_dpcd_readb(aux, DP_PSR_STATUS, &val) == 1) 2693 + seq_printf(m, "Sink PSR status: 0x%x [%s]\n", val, 2694 + psr_sink_status(val)); 2695 + } 2710 2696 mutex_unlock(&dev_priv->psr.lock); 2697 + 2698 + if (READ_ONCE(dev_priv->psr.debug)) { 2699 + seq_printf(m, "Last attempted entry at: %lld\n", 2700 + dev_priv->psr.last_entry_attempt); 2701 + seq_printf(m, "Last exit at: %lld\n", 2702 + dev_priv->psr.last_exit); 2703 + } 2711 2704 2712 2705 intel_runtime_pm_put(dev_priv); 2713 2706 return 0; 2714 2707 } 2708 + 2709 + static int 2710 + i915_edp_psr_debug_set(void *data, u64 val) 2711 + { 2712 + struct drm_i915_private *dev_priv = data; 2713 + 2714 + if (!CAN_PSR(dev_priv)) 2715 + return -ENODEV; 2716 + 2717 + DRM_DEBUG_KMS("PSR debug %s\n", enableddisabled(val)); 2718 + 2719 + intel_runtime_pm_get(dev_priv); 2720 + intel_psr_irq_control(dev_priv, !!val); 2721 + intel_runtime_pm_put(dev_priv); 2722 + 2723 + return 0; 2724 + } 2725 + 2726 + static int 2727 + i915_edp_psr_debug_get(void *data, u64 *val) 2728 + { 2729 + struct drm_i915_private *dev_priv = data; 2730 + 2731 + if (!CAN_PSR(dev_priv)) 2732 + return -ENODEV; 2733 + 2734 + *val = READ_ONCE(dev_priv->psr.debug); 2735 + return 0; 2736 + } 2737 + 2738 + DEFINE_SIMPLE_ATTRIBUTE(i915_edp_psr_debug_fops, 2739 + i915_edp_psr_debug_get, i915_edp_psr_debug_set, 2740 + "%llu\n"); 2715 2741 2716 2742 static int i915_sink_crc(struct seq_file *m, void *data) 2717 2743 { ··· 3368 3296 seq_printf(m, " fp0: 0x%08x\n", pll->state.hw_state.fp0); 3369 3297 seq_printf(m, " fp1: 0x%08x\n", pll->state.hw_state.fp1); 3370 3298 seq_printf(m, " wrpll: 0x%08x\n", pll->state.hw_state.wrpll); 3299 + seq_printf(m, " cfgcr0: 0x%08x\n", pll->state.hw_state.cfgcr0); 3300 + seq_printf(m, " cfgcr1: 0x%08x\n", pll->state.hw_state.cfgcr1); 3301 + seq_printf(m, " mg_refclkin_ctl: 0x%08x\n", 3302 + pll->state.hw_state.mg_refclkin_ctl); 3303 + seq_printf(m, " mg_clktop2_coreclkctl1: 0x%08x\n", 3304 + pll->state.hw_state.mg_clktop2_coreclkctl1); 3305 + seq_printf(m, " mg_clktop2_hsclkctl: 0x%08x\n", 3306 + pll->state.hw_state.mg_clktop2_hsclkctl); 3307 + seq_printf(m, " mg_pll_div0: 0x%08x\n", 3308 + pll->state.hw_state.mg_pll_div0); 3309 + seq_printf(m, " mg_pll_div1: 0x%08x\n", 3310 + pll->state.hw_state.mg_pll_div1); 3311 + seq_printf(m, " mg_pll_lf: 0x%08x\n", 3312 + pll->state.hw_state.mg_pll_lf); 3313 + seq_printf(m, " mg_pll_frac_lock: 0x%08x\n", 3314 + pll->state.hw_state.mg_pll_frac_lock); 3315 + seq_printf(m, " mg_pll_ssc: 0x%08x\n", 3316 + pll->state.hw_state.mg_pll_ssc); 3317 + seq_printf(m, " mg_pll_bias: 0x%08x\n", 3318 + pll->state.hw_state.mg_pll_bias); 3319 + seq_printf(m, " mg_pll_tdc_coldst_bias: 0x%08x\n", 3320 + pll->state.hw_state.mg_pll_tdc_coldst_bias); 3371 3321 } 3372 3322 drm_modeset_unlock_all(dev); 3373 3323 ··· 3398 3304 3399 3305 static int i915_wa_registers(struct seq_file *m, void *unused) 3400 3306 { 3401 - int i; 3402 - int ret; 3403 - struct intel_engine_cs *engine; 3404 3307 struct drm_i915_private *dev_priv = node_to_i915(m->private); 3405 - struct drm_device *dev = &dev_priv->drm; 3406 3308 struct i915_workarounds *workarounds = &dev_priv->workarounds; 3407 - enum intel_engine_id id; 3408 - 3409 - ret = mutex_lock_interruptible(&dev->struct_mutex); 3410 - if (ret) 3411 - return ret; 3309 + int i; 3412 3310 3413 3311 intel_runtime_pm_get(dev_priv); 3414 3312 3415 3313 seq_printf(m, "Workarounds applied: %d\n", workarounds->count); 3416 - for_each_engine(engine, dev_priv, id) 3417 - seq_printf(m, "HW whitelist count for %s: %d\n", 3418 - engine->name, workarounds->hw_whitelist_count[id]); 3419 3314 for (i = 0; i < workarounds->count; ++i) { 3420 3315 i915_reg_t addr; 3421 3316 u32 mask, value, read; ··· 3420 3337 } 3421 3338 3422 3339 intel_runtime_pm_put(dev_priv); 3423 - mutex_unlock(&dev->struct_mutex); 3424 3340 3425 3341 return 0; 3426 3342 } ··· 4259 4177 "0x%08llx\n"); 4260 4178 4261 4179 static int 4262 - i915_max_freq_get(void *data, u64 *val) 4263 - { 4264 - struct drm_i915_private *dev_priv = data; 4265 - 4266 - if (INTEL_GEN(dev_priv) < 6) 4267 - return -ENODEV; 4268 - 4269 - *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.max_freq_softlimit); 4270 - return 0; 4271 - } 4272 - 4273 - static int 4274 - i915_max_freq_set(void *data, u64 val) 4275 - { 4276 - struct drm_i915_private *dev_priv = data; 4277 - struct intel_rps *rps = &dev_priv->gt_pm.rps; 4278 - u32 hw_max, hw_min; 4279 - int ret; 4280 - 4281 - if (INTEL_GEN(dev_priv) < 6) 4282 - return -ENODEV; 4283 - 4284 - DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); 4285 - 4286 - ret = mutex_lock_interruptible(&dev_priv->pcu_lock); 4287 - if (ret) 4288 - return ret; 4289 - 4290 - /* 4291 - * Turbo will still be enabled, but won't go above the set value. 4292 - */ 4293 - val = intel_freq_opcode(dev_priv, val); 4294 - 4295 - hw_max = rps->max_freq; 4296 - hw_min = rps->min_freq; 4297 - 4298 - if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) { 4299 - mutex_unlock(&dev_priv->pcu_lock); 4300 - return -EINVAL; 4301 - } 4302 - 4303 - rps->max_freq_softlimit = val; 4304 - 4305 - if (intel_set_rps(dev_priv, val)) 4306 - DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); 4307 - 4308 - mutex_unlock(&dev_priv->pcu_lock); 4309 - 4310 - return 0; 4311 - } 4312 - 4313 - DEFINE_SIMPLE_ATTRIBUTE(i915_max_freq_fops, 4314 - i915_max_freq_get, i915_max_freq_set, 4315 - "%llu\n"); 4316 - 4317 - static int 4318 - i915_min_freq_get(void *data, u64 *val) 4319 - { 4320 - struct drm_i915_private *dev_priv = data; 4321 - 4322 - if (INTEL_GEN(dev_priv) < 6) 4323 - return -ENODEV; 4324 - 4325 - *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.min_freq_softlimit); 4326 - return 0; 4327 - } 4328 - 4329 - static int 4330 - i915_min_freq_set(void *data, u64 val) 4331 - { 4332 - struct drm_i915_private *dev_priv = data; 4333 - struct intel_rps *rps = &dev_priv->gt_pm.rps; 4334 - u32 hw_max, hw_min; 4335 - int ret; 4336 - 4337 - if (INTEL_GEN(dev_priv) < 6) 4338 - return -ENODEV; 4339 - 4340 - DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); 4341 - 4342 - ret = mutex_lock_interruptible(&dev_priv->pcu_lock); 4343 - if (ret) 4344 - return ret; 4345 - 4346 - /* 4347 - * Turbo will still be enabled, but won't go below the set value. 4348 - */ 4349 - val = intel_freq_opcode(dev_priv, val); 4350 - 4351 - hw_max = rps->max_freq; 4352 - hw_min = rps->min_freq; 4353 - 4354 - if (val < hw_min || 4355 - val > hw_max || val > rps->max_freq_softlimit) { 4356 - mutex_unlock(&dev_priv->pcu_lock); 4357 - return -EINVAL; 4358 - } 4359 - 4360 - rps->min_freq_softlimit = val; 4361 - 4362 - if (intel_set_rps(dev_priv, val)) 4363 - DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); 4364 - 4365 - mutex_unlock(&dev_priv->pcu_lock); 4366 - 4367 - return 0; 4368 - } 4369 - 4370 - DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops, 4371 - i915_min_freq_get, i915_min_freq_set, 4372 - "%llu\n"); 4373 - 4374 - static int 4375 4180 i915_cache_sharing_get(void *data, u64 *val) 4376 4181 { 4377 4182 struct drm_i915_private *dev_priv = data; ··· 4819 4850 const struct file_operations *fops; 4820 4851 } i915_debugfs_files[] = { 4821 4852 {"i915_wedged", &i915_wedged_fops}, 4822 - {"i915_max_freq", &i915_max_freq_fops}, 4823 - {"i915_min_freq", &i915_min_freq_fops}, 4824 4853 {"i915_cache_sharing", &i915_cache_sharing_fops}, 4825 4854 {"i915_ring_missed_irq", &i915_ring_missed_irq_fops}, 4826 4855 {"i915_ring_test_irq", &i915_ring_test_irq_fops}, ··· 4841 4874 {"i915_guc_log_relay", &i915_guc_log_relay_fops}, 4842 4875 {"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops}, 4843 4876 {"i915_ipc_status", &i915_ipc_status_fops}, 4844 - {"i915_drrs_ctl", &i915_drrs_ctl_fops} 4877 + {"i915_drrs_ctl", &i915_drrs_ctl_fops}, 4878 + {"i915_edp_psr_debug", &i915_edp_psr_debug_fops} 4845 4879 }; 4846 4880 4847 4881 int i915_debugfs_register(struct drm_i915_private *dev_priv)

+12 -3

drivers/gpu/drm/i915/i915_drv.c

··· 101 101 __builtin_return_address(0), &vaf); 102 102 103 103 if (is_error && !shown_bug_once) { 104 - dev_notice(kdev, "%s", FDO_BUG_MSG); 104 + /* 105 + * Ask the user to file a bug report for the error, except 106 + * if they may have caused the bug by fiddling with unsafe 107 + * module parameters. 108 + */ 109 + if (!test_taint(TAINT_USER)) 110 + dev_notice(kdev, "%s", FDO_BUG_MSG); 105 111 shown_bug_once = true; 106 112 } 107 113 ··· 2474 2468 /* 2475 2469 * RC6 transitioning can be delayed up to 2 msec (see 2476 2470 * valleyview_enable_rps), use 3 msec for safety. 2471 + * 2472 + * This can fail to turn off the rc6 if the GPU is stuck after a failed 2473 + * reset and we are trying to force the machine to sleep. 2477 2474 */ 2478 2475 if (vlv_wait_for_pw_status(dev_priv, mask, val)) 2479 - DRM_ERROR("timeout waiting for GT wells to go %s\n", 2480 - onoff(wait_for_on)); 2476 + DRM_DEBUG_DRIVER("timeout waiting for GT wells to go %s\n", 2477 + onoff(wait_for_on)); 2481 2478 } 2482 2479 2483 2480 static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv)

+23 -16

drivers/gpu/drm/i915/i915_drv.h

··· 72 72 #include "i915_gem_fence_reg.h" 73 73 #include "i915_gem_object.h" 74 74 #include "i915_gem_gtt.h" 75 - #include "i915_gem_timeline.h" 76 75 #include "i915_gpu_error.h" 77 76 #include "i915_request.h" 77 + #include "i915_scheduler.h" 78 + #include "i915_timeline.h" 78 79 #include "i915_vma.h" 79 80 80 81 #include "intel_gvt.h" ··· 85 84 86 85 #define DRIVER_NAME "i915" 87 86 #define DRIVER_DESC "Intel Graphics" 88 - #define DRIVER_DATE "20180413" 89 - #define DRIVER_TIMESTAMP 1523611258 87 + #define DRIVER_DATE "20180514" 88 + #define DRIVER_TIMESTAMP 1526300884 90 89 91 90 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and 92 91 * WARN_ON()) for hw state sanity checks to check for unexpected conditions ··· 610 609 bool has_hw_tracking; 611 610 bool psr2_enabled; 612 611 u8 sink_sync_latency; 612 + bool debug; 613 + ktime_t last_entry_attempt; 614 + ktime_t last_exit; 613 615 614 616 void (*enable_source)(struct intel_dp *, 615 617 const struct intel_crtc_state *); ··· 1073 1069 } edp; 1074 1070 1075 1071 struct { 1072 + bool enable; 1076 1073 bool full_link; 1077 1074 bool require_aux_wakeup; 1078 1075 int idle_frames; ··· 1190 1185 /* packed/y */ 1191 1186 struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES]; 1192 1187 struct skl_ddb_entry uv_plane[I915_MAX_PIPES][I915_MAX_PLANES]; 1188 + u8 enabled_slices; /* GEN11 has configurable 2 slices */ 1193 1189 }; 1194 1190 1195 1191 struct skl_ddb_values { ··· 1303 1297 struct i915_workarounds { 1304 1298 struct i915_wa_reg reg[I915_MAX_WA_REGS]; 1305 1299 u32 count; 1306 - u32 hw_whitelist_count[I915_NUM_ENGINES]; 1307 1300 }; 1308 1301 1309 1302 struct i915_virtual_gpu { ··· 2061 2056 void (*cleanup_engine)(struct intel_engine_cs *engine); 2062 2057 2063 2058 struct list_head timelines; 2064 - struct i915_gem_timeline global_timeline; 2059 + 2060 + struct list_head active_rings; 2061 + struct list_head closed_vma; 2065 2062 u32 active_requests; 2063 + u32 request_serial; 2066 2064 2067 2065 /** 2068 2066 * Is the GPU currently considered idle, or busy executing ··· 2469 2461 2470 2462 #define IS_CNL_REVID(p, since, until) \ 2471 2463 (IS_CANNONLAKE(p) && IS_REVID(p, since, until)) 2464 + 2465 + #define ICL_REVID_A0 0x0 2466 + #define ICL_REVID_A2 0x1 2467 + #define ICL_REVID_B0 0x3 2468 + #define ICL_REVID_B2 0x4 2469 + #define ICL_REVID_C0 0x5 2470 + 2471 + #define IS_ICL_REVID(p, since, until) \ 2472 + (IS_ICELAKE(p) && IS_REVID(p, since, until)) 2472 2473 2473 2474 /* 2474 2475 * The genX designation typically refers to the render engine, so render ··· 3176 3159 struct intel_rps_client *rps); 3177 3160 int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 3178 3161 unsigned int flags, 3179 - int priority); 3162 + const struct i915_sched_attr *attr); 3180 3163 #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX 3181 3164 3182 3165 int __must_check ··· 3243 3226 rcu_read_unlock(); 3244 3227 3245 3228 return ctx; 3246 - } 3247 - 3248 - static inline struct intel_timeline * 3249 - i915_gem_context_lookup_timeline(struct i915_gem_context *ctx, 3250 - struct intel_engine_cs *engine) 3251 - { 3252 - struct i915_address_space *vm; 3253 - 3254 - vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; 3255 - return &vm->timeline.engine[engine->id]; 3256 3229 } 3257 3230 3258 3231 int i915_perf_open_ioctl(struct drm_device *dev, void *data,

+78 -85

drivers/gpu/drm/i915/i915_gem.c

··· 141 141 { 142 142 lockdep_assert_held(&i915->drm.struct_mutex); 143 143 GEM_BUG_ON(i915->gt.active_requests); 144 + GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); 144 145 145 146 if (!i915->gt.awake) 146 147 return I915_EPOCH_INVALID; ··· 162 161 synchronize_irq(i915->drm.irq); 163 162 164 163 intel_engines_park(i915); 165 - i915_gem_timelines_park(i915); 164 + i915_timelines_park(i915); 166 165 167 166 i915_pmu_gt_parked(i915); 167 + i915_vma_parked(i915); 168 168 169 169 i915->gt.awake = false; 170 170 ··· 566 564 return timeout; 567 565 } 568 566 569 - static void __fence_set_priority(struct dma_fence *fence, int prio) 567 + static void __fence_set_priority(struct dma_fence *fence, 568 + const struct i915_sched_attr *attr) 570 569 { 571 570 struct i915_request *rq; 572 571 struct intel_engine_cs *engine; ··· 578 575 rq = to_request(fence); 579 576 engine = rq->engine; 580 577 581 - rcu_read_lock(); 578 + local_bh_disable(); 579 + rcu_read_lock(); /* RCU serialisation for set-wedged protection */ 582 580 if (engine->schedule) 583 - engine->schedule(rq, prio); 581 + engine->schedule(rq, attr); 584 582 rcu_read_unlock(); 583 + local_bh_enable(); /* kick the tasklets if queues were reprioritised */ 585 584 } 586 585 587 - static void fence_set_priority(struct dma_fence *fence, int prio) 586 + static void fence_set_priority(struct dma_fence *fence, 587 + const struct i915_sched_attr *attr) 588 588 { 589 589 /* Recurse once into a fence-array */ 590 590 if (dma_fence_is_array(fence)) { ··· 595 589 int i; 596 590 597 591 for (i = 0; i < array->num_fences; i++) 598 - __fence_set_priority(array->fences[i], prio); 592 + __fence_set_priority(array->fences[i], attr); 599 593 } else { 600 - __fence_set_priority(fence, prio); 594 + __fence_set_priority(fence, attr); 601 595 } 602 596 } 603 597 604 598 int 605 599 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 606 600 unsigned int flags, 607 - int prio) 601 + const struct i915_sched_attr *attr) 608 602 { 609 603 struct dma_fence *excl; 610 604 ··· 619 613 return ret; 620 614 621 615 for (i = 0; i < count; i++) { 622 - fence_set_priority(shared[i], prio); 616 + fence_set_priority(shared[i], attr); 623 617 dma_fence_put(shared[i]); 624 618 } 625 619 ··· 629 623 } 630 624 631 625 if (excl) { 632 - fence_set_priority(excl, prio); 626 + fence_set_priority(excl, attr); 633 627 dma_fence_put(excl); 634 628 } 635 629 return 0; ··· 2980 2974 * extra delay for a recent interrupt is pointless. Hence, we do 2981 2975 * not need an engine->irq_seqno_barrier() before the seqno reads. 2982 2976 */ 2983 - spin_lock_irqsave(&engine->timeline->lock, flags); 2984 - list_for_each_entry(request, &engine->timeline->requests, link) { 2977 + spin_lock_irqsave(&engine->timeline.lock, flags); 2978 + list_for_each_entry(request, &engine->timeline.requests, link) { 2985 2979 if (__i915_request_completed(request, request->global_seqno)) 2986 2980 continue; 2987 2981 ··· 2992 2986 active = request; 2993 2987 break; 2994 2988 } 2995 - spin_unlock_irqrestore(&engine->timeline->lock, flags); 2989 + spin_unlock_irqrestore(&engine->timeline.lock, flags); 2996 2990 2997 2991 return active; 2998 2992 } ··· 3113 3107 { 3114 3108 struct intel_engine_cs *engine = request->engine; 3115 3109 struct i915_gem_context *hung_ctx = request->ctx; 3116 - struct intel_timeline *timeline; 3110 + struct i915_timeline *timeline = request->timeline; 3117 3111 unsigned long flags; 3118 3112 3119 - timeline = i915_gem_context_lookup_timeline(hung_ctx, engine); 3113 + GEM_BUG_ON(timeline == &engine->timeline); 3120 3114 3121 - spin_lock_irqsave(&engine->timeline->lock, flags); 3122 - spin_lock(&timeline->lock); 3115 + spin_lock_irqsave(&engine->timeline.lock, flags); 3116 + spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING); 3123 3117 3124 - list_for_each_entry_continue(request, &engine->timeline->requests, link) 3118 + list_for_each_entry_continue(request, &engine->timeline.requests, link) 3125 3119 if (request->ctx == hung_ctx) 3126 3120 skip_request(request); 3127 3121 ··· 3129 3123 skip_request(request); 3130 3124 3131 3125 spin_unlock(&timeline->lock); 3132 - spin_unlock_irqrestore(&engine->timeline->lock, flags); 3126 + spin_unlock_irqrestore(&engine->timeline.lock, flags); 3133 3127 } 3134 3128 3135 3129 /* Returns the request if it was guilty of the hang */ ··· 3186 3180 dma_fence_set_error(&request->fence, -EAGAIN); 3187 3181 3188 3182 /* Rewind the engine to replay the incomplete rq */ 3189 - spin_lock_irq(&engine->timeline->lock); 3183 + spin_lock_irq(&engine->timeline.lock); 3190 3184 request = list_prev_entry(request, link); 3191 - if (&request->link == &engine->timeline->requests) 3185 + if (&request->link == &engine->timeline.requests) 3192 3186 request = NULL; 3193 - spin_unlock_irq(&engine->timeline->lock); 3187 + spin_unlock_irq(&engine->timeline.lock); 3194 3188 } 3195 3189 } 3196 3190 ··· 3238 3232 stalled_mask & ENGINE_MASK(id)); 3239 3233 ctx = fetch_and_zero(&engine->last_retired_context); 3240 3234 if (ctx) 3241 - engine->context_unpin(engine, ctx); 3235 + intel_context_unpin(ctx, engine); 3242 3236 3243 3237 /* 3244 3238 * Ostensibily, we always want a context loaded for powersaving, ··· 3303 3297 request->fence.context, request->fence.seqno); 3304 3298 dma_fence_set_error(&request->fence, -EIO); 3305 3299 3306 - spin_lock_irqsave(&request->engine->timeline->lock, flags); 3300 + spin_lock_irqsave(&request->engine->timeline.lock, flags); 3307 3301 __i915_request_submit(request); 3308 3302 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3309 - spin_unlock_irqrestore(&request->engine->timeline->lock, flags); 3303 + spin_unlock_irqrestore(&request->engine->timeline.lock, flags); 3310 3304 } 3311 3305 3312 3306 void i915_gem_set_wedged(struct drm_i915_private *i915) ··· 3316 3310 3317 3311 GEM_TRACE("start\n"); 3318 3312 3319 - if (drm_debug & DRM_UT_DRIVER) { 3313 + if (GEM_SHOW_DEBUG()) { 3320 3314 struct drm_printer p = drm_debug_printer(__func__); 3321 3315 3322 3316 for_each_engine(engine, i915, id) ··· 3375 3369 * (lockless) lookup doesn't try and wait upon the request as we 3376 3370 * reset it. 3377 3371 */ 3378 - spin_lock_irqsave(&engine->timeline->lock, flags); 3372 + spin_lock_irqsave(&engine->timeline.lock, flags); 3379 3373 intel_engine_init_global_seqno(engine, 3380 3374 intel_engine_last_submit(engine)); 3381 - spin_unlock_irqrestore(&engine->timeline->lock, flags); 3375 + spin_unlock_irqrestore(&engine->timeline.lock, flags); 3382 3376 3383 3377 i915_gem_reset_finish_engine(engine); 3384 3378 } ··· 3390 3384 3391 3385 bool i915_gem_unset_wedged(struct drm_i915_private *i915) 3392 3386 { 3393 - struct i915_gem_timeline *tl; 3394 - int i; 3387 + struct i915_timeline *tl; 3395 3388 3396 3389 lockdep_assert_held(&i915->drm.struct_mutex); 3397 3390 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) ··· 3409 3404 * No more can be submitted until we reset the wedged bit. 3410 3405 */ 3411 3406 list_for_each_entry(tl, &i915->gt.timelines, link) { 3412 - for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { 3413 - struct i915_request *rq; 3407 + struct i915_request *rq; 3414 3408 3415 - rq = i915_gem_active_peek(&tl->engine[i].last_request, 3416 - &i915->drm.struct_mutex); 3417 - if (!rq) 3418 - continue; 3409 + rq = i915_gem_active_peek(&tl->last_request, 3410 + &i915->drm.struct_mutex); 3411 + if (!rq) 3412 + continue; 3419 3413 3420 - /* 3421 - * We can't use our normal waiter as we want to 3422 - * avoid recursively trying to handle the current 3423 - * reset. The basic dma_fence_default_wait() installs 3424 - * a callback for dma_fence_signal(), which is 3425 - * triggered by our nop handler (indirectly, the 3426 - * callback enables the signaler thread which is 3427 - * woken by the nop_submit_request() advancing the seqno 3428 - * and when the seqno passes the fence, the signaler 3429 - * then signals the fence waking us up). 3430 - */ 3431 - if (dma_fence_default_wait(&rq->fence, true, 3432 - MAX_SCHEDULE_TIMEOUT) < 0) 3433 - return false; 3434 - } 3414 + /* 3415 + * We can't use our normal waiter as we want to 3416 + * avoid recursively trying to handle the current 3417 + * reset. The basic dma_fence_default_wait() installs 3418 + * a callback for dma_fence_signal(), which is 3419 + * triggered by our nop handler (indirectly, the 3420 + * callback enables the signaler thread which is 3421 + * woken by the nop_submit_request() advancing the seqno 3422 + * and when the seqno passes the fence, the signaler 3423 + * then signals the fence waking us up). 3424 + */ 3425 + if (dma_fence_default_wait(&rq->fence, true, 3426 + MAX_SCHEDULE_TIMEOUT) < 0) 3427 + return false; 3435 3428 } 3436 3429 i915_retire_requests(i915); 3437 3430 GEM_BUG_ON(i915->gt.active_requests); ··· 3734 3731 return ret; 3735 3732 } 3736 3733 3737 - static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) 3734 + static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags) 3738 3735 { 3739 - int ret, i; 3740 - 3741 - for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { 3742 - ret = i915_gem_active_wait(&tl->engine[i].last_request, flags); 3743 - if (ret) 3744 - return ret; 3745 - } 3746 - 3747 - return 0; 3736 + return i915_gem_active_wait(&tl->last_request, flags); 3748 3737 } 3749 3738 3750 3739 static int wait_for_engines(struct drm_i915_private *i915) ··· 3754 3759 3755 3760 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) 3756 3761 { 3757 - int ret; 3758 - 3759 3762 /* If the device is asleep, we have no requests outstanding */ 3760 3763 if (!READ_ONCE(i915->gt.awake)) 3761 3764 return 0; 3762 3765 3763 3766 if (flags & I915_WAIT_LOCKED) { 3764 - struct i915_gem_timeline *tl; 3767 + struct i915_timeline *tl; 3768 + int err; 3765 3769 3766 3770 lockdep_assert_held(&i915->drm.struct_mutex); 3767 3771 3768 3772 list_for_each_entry(tl, &i915->gt.timelines, link) { 3769 - ret = wait_for_timeline(tl, flags); 3770 - if (ret) 3771 - return ret; 3773 + err = wait_for_timeline(tl, flags); 3774 + if (err) 3775 + return err; 3772 3776 } 3773 3777 i915_retire_requests(i915); 3774 3778 3775 - ret = wait_for_engines(i915); 3779 + return wait_for_engines(i915); 3776 3780 } else { 3777 - ret = wait_for_timeline(&i915->gt.global_timeline, flags); 3778 - } 3781 + struct intel_engine_cs *engine; 3782 + enum intel_engine_id id; 3783 + int err; 3779 3784 3780 - return ret; 3785 + for_each_engine(engine, i915, id) { 3786 + err = wait_for_timeline(&engine->timeline, flags); 3787 + if (err) 3788 + return err; 3789 + } 3790 + 3791 + return 0; 3792 + } 3781 3793 } 3782 3794 3783 3795 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) ··· 4798 4796 &obj->vma_list, obj_link) { 4799 4797 GEM_BUG_ON(i915_vma_is_active(vma)); 4800 4798 vma->flags &= ~I915_VMA_PIN_MASK; 4801 - i915_vma_close(vma); 4799 + i915_vma_destroy(vma); 4802 4800 } 4803 4801 GEM_BUG_ON(!list_empty(&obj->vma_list)); 4804 4802 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); ··· 4953 4951 enum intel_engine_id id; 4954 4952 4955 4953 for_each_engine(engine, i915, id) { 4956 - GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline->last_request)); 4954 + GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); 4957 4955 GEM_BUG_ON(engine->last_retired_context != kernel_context); 4958 4956 } 4959 4957 } ··· 5291 5289 for_each_engine(engine, i915, id) { 5292 5290 struct i915_vma *state; 5293 5291 5294 - state = ctx->engine[id].state; 5292 + state = to_intel_context(ctx, engine)->state; 5295 5293 if (!state) 5296 5294 continue; 5297 5295 ··· 5599 5597 if (!dev_priv->priorities) 5600 5598 goto err_dependencies; 5601 5599 5602 - mutex_lock(&dev_priv->drm.struct_mutex); 5603 5600 INIT_LIST_HEAD(&dev_priv->gt.timelines); 5604 - err = i915_gem_timeline_init__global(dev_priv); 5605 - mutex_unlock(&dev_priv->drm.struct_mutex); 5606 - if (err) 5607 - goto err_priorities; 5601 + INIT_LIST_HEAD(&dev_priv->gt.active_rings); 5602 + INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 5608 5603 5609 5604 i915_gem_init__mm(dev_priv); 5610 5605 ··· 5622 5623 5623 5624 return 0; 5624 5625 5625 - err_priorities: 5626 - kmem_cache_destroy(dev_priv->priorities); 5627 5626 err_dependencies: 5628 5627 kmem_cache_destroy(dev_priv->dependencies); 5629 5628 err_requests: ··· 5642 5645 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 5643 5646 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 5644 5647 WARN_ON(dev_priv->mm.object_count); 5645 - 5646 - mutex_lock(&dev_priv->drm.struct_mutex); 5647 - i915_gem_timeline_fini(&dev_priv->gt.global_timeline); 5648 5648 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 5649 - mutex_unlock(&dev_priv->drm.struct_mutex); 5650 5649 5651 5650 kmem_cache_destroy(dev_priv->priorities); 5652 5651 kmem_cache_destroy(dev_priv->dependencies);

+6

drivers/gpu/drm/i915/i915_gem.h

··· 30 30 struct drm_i915_private; 31 31 32 32 #ifdef CONFIG_DRM_I915_DEBUG_GEM 33 + 34 + #define GEM_SHOW_DEBUG() (drm_debug & DRM_UT_DRIVER) 35 + 33 36 #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ 34 37 pr_err("%s:%d GEM_BUG_ON(%s)\n", \ 35 38 __func__, __LINE__, __stringify(condition)); \ ··· 48 45 #define GEM_DEBUG_BUG_ON(expr) GEM_BUG_ON(expr) 49 46 50 47 #else 48 + 49 + #define GEM_SHOW_DEBUG() (0) 50 + 51 51 #define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) 52 52 #define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0) 53 53

+29 -22

drivers/gpu/drm/i915/i915_gem_context.c

··· 117 117 118 118 static void i915_gem_context_free(struct i915_gem_context *ctx) 119 119 { 120 - int i; 120 + unsigned int n; 121 121 122 122 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 123 123 GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); 124 124 125 125 i915_ppgtt_put(ctx->ppgtt); 126 126 127 - for (i = 0; i < I915_NUM_ENGINES; i++) { 128 - struct intel_context *ce = &ctx->engine[i]; 127 + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { 128 + struct intel_context *ce = &ctx->__engine[n]; 129 129 130 130 if (!ce->state) 131 131 continue; ··· 281 281 kref_init(&ctx->ref); 282 282 list_add_tail(&ctx->link, &dev_priv->contexts.list); 283 283 ctx->i915 = dev_priv; 284 - ctx->priority = I915_PRIORITY_NORMAL; 284 + ctx->sched.priority = I915_PRIORITY_NORMAL; 285 285 286 286 INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); 287 287 INIT_LIST_HEAD(&ctx->handles_list); ··· 431 431 return ctx; 432 432 433 433 i915_gem_context_clear_bannable(ctx); 434 - ctx->priority = prio; 434 + ctx->sched.priority = prio; 435 435 ctx->ring_size = PAGE_SIZE; 436 436 437 437 GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); ··· 521 521 if (!engine->last_retired_context) 522 522 continue; 523 523 524 - engine->context_unpin(engine, engine->last_retired_context); 524 + intel_context_unpin(engine->last_retired_context, engine); 525 525 engine->last_retired_context = NULL; 526 526 } 527 527 } ··· 577 577 idr_destroy(&file_priv->context_idr); 578 578 } 579 579 580 + static struct i915_request * 581 + last_request_on_engine(struct i915_timeline *timeline, 582 + struct intel_engine_cs *engine) 583 + { 584 + struct i915_request *rq; 585 + 586 + if (timeline == &engine->timeline) 587 + return NULL; 588 + 589 + rq = i915_gem_active_raw(&timeline->last_request, 590 + &engine->i915->drm.struct_mutex); 591 + if (rq && rq->engine == engine) 592 + return rq; 593 + 594 + return NULL; 595 + } 596 + 580 597 static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) 581 598 { 582 - struct i915_gem_timeline *timeline; 599 + struct i915_timeline *timeline; 583 600 584 601 list_for_each_entry(timeline, &engine->i915->gt.timelines, link) { 585 - struct intel_timeline *tl; 586 - 587 - if (timeline == &engine->i915->gt.global_timeline) 588 - continue; 589 - 590 - tl = &timeline->engine[engine->id]; 591 - if (i915_gem_active_peek(&tl->last_request, 592 - &engine->i915->drm.struct_mutex)) 602 + if (last_request_on_engine(timeline, engine)) 593 603 return false; 594 604 } 595 605 ··· 609 599 int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) 610 600 { 611 601 struct intel_engine_cs *engine; 612 - struct i915_gem_timeline *timeline; 602 + struct i915_timeline *timeline; 613 603 enum intel_engine_id id; 614 604 615 605 lockdep_assert_held(&dev_priv->drm.struct_mutex); ··· 629 619 /* Queue this switch after all other activity */ 630 620 list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { 631 621 struct i915_request *prev; 632 - struct intel_timeline *tl; 633 622 634 - tl = &timeline->engine[engine->id]; 635 - prev = i915_gem_active_raw(&tl->last_request, 636 - &dev_priv->drm.struct_mutex); 623 + prev = last_request_on_engine(timeline, engine); 637 624 if (prev) 638 625 i915_sw_fence_await_sw_fence_gfp(&rq->submit, 639 626 &prev->submit, ··· 760 753 args->value = i915_gem_context_is_bannable(ctx); 761 754 break; 762 755 case I915_CONTEXT_PARAM_PRIORITY: 763 - args->value = ctx->priority; 756 + args->value = ctx->sched.priority; 764 757 break; 765 758 default: 766 759 ret = -EINVAL; ··· 833 826 !capable(CAP_SYS_NICE)) 834 827 ret = -EPERM; 835 828 else 836 - ctx->priority = priority; 829 + ctx->sched.priority = priority; 837 830 } 838 831 break; 839 832

+30 -13

drivers/gpu/drm/i915/i915_gem_context.h

··· 137 137 */ 138 138 u32 user_handle; 139 139 140 - /** 141 - * @priority: execution and service priority 142 - * 143 - * All clients are equal, but some are more equal than others! 144 - * 145 - * Requests from a context with a greater (more positive) value of 146 - * @priority will be executed before those with a lower @priority 147 - * value, forming a simple QoS. 148 - * 149 - * The &drm_i915_private.kernel_context is assigned the lowest priority. 150 - */ 151 - int priority; 140 + struct i915_sched_attr sched; 152 141 153 142 /** ggtt_offset_bias: placement restriction for context objects */ 154 143 u32 ggtt_offset_bias; ··· 149 160 u32 *lrc_reg_state; 150 161 u64 lrc_desc; 151 162 int pin_count; 152 - } engine[I915_NUM_ENGINES]; 163 + } __engine[I915_NUM_ENGINES]; 153 164 154 165 /** ring_size: size for allocating the per-engine ring buffer */ 155 166 u32 ring_size; ··· 254 265 static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) 255 266 { 256 267 return !ctx->file_priv; 268 + } 269 + 270 + static inline struct intel_context * 271 + to_intel_context(struct i915_gem_context *ctx, 272 + const struct intel_engine_cs *engine) 273 + { 274 + return &ctx->__engine[engine->id]; 275 + } 276 + 277 + static inline struct intel_ring * 278 + intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 279 + { 280 + return engine->context_pin(engine, ctx); 281 + } 282 + 283 + static inline void __intel_context_pin(struct i915_gem_context *ctx, 284 + const struct intel_engine_cs *engine) 285 + { 286 + struct intel_context *ce = to_intel_context(ctx, engine); 287 + 288 + GEM_BUG_ON(!ce->pin_count); 289 + ce->pin_count++; 290 + } 291 + 292 + static inline void intel_context_unpin(struct i915_gem_context *ctx, 293 + struct intel_engine_cs *engine) 294 + { 295 + engine->context_unpin(engine, ctx); 257 296 } 258 297 259 298 /* i915_gem_context.c */

+2 -1

drivers/gpu/drm/i915/i915_gem_execbuffer.c

··· 762 762 } 763 763 764 764 /* transfer ref to ctx */ 765 - vma->open_count++; 765 + if (!vma->open_count++) 766 + i915_vma_reopen(vma); 766 767 list_add(&lut->obj_link, &obj->lut_list); 767 768 list_add(&lut->ctx_link, &eb->ctx->handles_list); 768 769 lut->ctx = eb->ctx;

+43 -27

drivers/gpu/drm/i915/i915_gem_gtt.c

··· 110 110 111 111 static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv) 112 112 { 113 - /* Note that as an uncached mmio write, this should flush the 113 + /* 114 + * Note that as an uncached mmio write, this will flush the 114 115 * WCB of the writes into the GGTT before it triggers the invalidate. 115 116 */ 116 117 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); ··· 1162 1161 vaddr[idx.pde] |= GEN8_PDE_IPS_64K; 1163 1162 kunmap_atomic(vaddr); 1164 1163 page_size = I915_GTT_PAGE_SIZE_64K; 1164 + 1165 + /* 1166 + * We write all 4K page entries, even when using 64K 1167 + * pages. In order to verify that the HW isn't cheating 1168 + * by using the 4K PTE instead of the 64K PTE, we want 1169 + * to remove all the surplus entries. If the HW skipped 1170 + * the 64K PTE, it will read/write into the scratch page 1171 + * instead - which we detect as missing results during 1172 + * selftests. 1173 + */ 1174 + if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { 1175 + u16 i; 1176 + 1177 + encode = pte_encode | vma->vm->scratch_page.daddr; 1178 + vaddr = kmap_atomic_px(pd->page_table[idx.pde]); 1179 + 1180 + for (i = 1; i < index; i += 16) 1181 + memset64(vaddr + i, encode, 15); 1182 + 1183 + kunmap_atomic(vaddr); 1184 + } 1165 1185 } 1166 1186 1167 1187 vma->page_sizes.gtt |= page_size; ··· 2133 2111 struct drm_i915_private *dev_priv, 2134 2112 const char *name) 2135 2113 { 2136 - i915_gem_timeline_init(dev_priv, &vm->timeline, name); 2137 - 2138 2114 drm_mm_init(&vm->mm, 0, vm->total); 2139 2115 vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; 2140 2116 ··· 2149 2129 if (pagevec_count(&vm->free_pages)) 2150 2130 vm_free_pages_release(vm, true); 2151 2131 2152 - i915_gem_timeline_fini(&vm->timeline); 2153 2132 drm_mm_takedown(&vm->mm); 2154 2133 list_del(&vm->global_link); 2155 2134 } ··· 2159 2140 * called on driver load and after a GPU reset, so you can place 2160 2141 * workarounds here even if they get overwritten by GPU reset. 2161 2142 */ 2162 - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */ 2143 + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ 2163 2144 if (IS_BROADWELL(dev_priv)) 2164 2145 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 2165 2146 else if (IS_CHERRYVIEW(dev_priv)) 2166 2147 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 2167 - else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv)) 2168 - I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 2169 2148 else if (IS_GEN9_LP(dev_priv)) 2170 2149 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 2150 + else if (INTEL_GEN(dev_priv) >= 9) 2151 + I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 2171 2152 2172 2153 /* 2173 2154 * To support 64K PTEs we need to first enable the use of the ··· 2241 2222 2242 2223 void i915_ppgtt_close(struct i915_address_space *vm) 2243 2224 { 2225 + GEM_BUG_ON(vm->closed); 2226 + vm->closed = true; 2227 + } 2228 + 2229 + static void ppgtt_destroy_vma(struct i915_address_space *vm) 2230 + { 2244 2231 struct list_head *phases[] = { 2245 2232 &vm->active_list, 2246 2233 &vm->inactive_list, ··· 2254 2229 NULL, 2255 2230 }, **phase; 2256 2231 2257 - GEM_BUG_ON(vm->closed); 2258 2232 vm->closed = true; 2259 - 2260 2233 for (phase = phases; *phase; phase++) { 2261 2234 struct i915_vma *vma, *vn; 2262 2235 2263 2236 list_for_each_entry_safe(vma, vn, *phase, vm_link) 2264 - if (!i915_vma_is_closed(vma)) 2265 - i915_vma_close(vma); 2237 + i915_vma_destroy(vma); 2266 2238 } 2267 2239 } 2268 2240 ··· 2270 2248 2271 2249 trace_i915_ppgtt_release(&ppgtt->base); 2272 2250 2273 - /* vmas should already be unbound and destroyed */ 2251 + ppgtt_destroy_vma(&ppgtt->base); 2252 + 2274 2253 GEM_BUG_ON(!list_empty(&ppgtt->base.active_list)); 2275 2254 GEM_BUG_ON(!list_empty(&ppgtt->base.inactive_list)); 2276 2255 GEM_BUG_ON(!list_empty(&ppgtt->base.unbound_list)); ··· 2440 2417 for_each_sgt_dma(addr, sgt_iter, vma->pages) 2441 2418 gen8_set_pte(gtt_entries++, pte_encode | addr); 2442 2419 2443 - wmb(); 2444 - 2445 - /* This next bit makes the above posting read even more important. We 2446 - * want to flush the TLBs only after we're certain all the PTE updates 2447 - * have finished. 2420 + /* 2421 + * We want to flush the TLBs only after we're certain all the PTE 2422 + * updates have finished. 2448 2423 */ 2449 2424 ggtt->invalidate(vm->i915); 2450 2425 } ··· 2480 2459 dma_addr_t addr; 2481 2460 for_each_sgt_dma(addr, iter, vma->pages) 2482 2461 iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); 2483 - wmb(); 2484 2462 2485 - /* This next bit makes the above posting read even more important. We 2486 - * want to flush the TLBs only after we're certain all the PTE updates 2487 - * have finished. 2463 + /* 2464 + * We want to flush the TLBs only after we're certain all the PTE 2465 + * updates have finished. 2488 2466 */ 2489 2467 ggtt->invalidate(vm->i915); 2490 2468 } ··· 3345 3325 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); 3346 3326 3347 3327 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3348 - 3349 - if (INTEL_GEN(dev_priv) >= 9) { 3350 - size = gen8_get_total_gtt_size(snb_gmch_ctl); 3351 - } else if (IS_CHERRYVIEW(dev_priv)) { 3328 + if (IS_CHERRYVIEW(dev_priv)) 3352 3329 size = chv_get_total_gtt_size(snb_gmch_ctl); 3353 - } else { 3330 + else 3354 3331 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3355 - } 3356 3332 3357 3333 ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 3358 3334 ggtt->base.cleanup = gen6_gmch_remove;

+2 -3

drivers/gpu/drm/i915/i915_gem_gtt.h

··· 38 38 #include <linux/mm.h> 39 39 #include <linux/pagevec.h> 40 40 41 - #include "i915_gem_timeline.h" 42 - 43 41 #include "i915_request.h" 44 42 #include "i915_selftest.h" 43 + #include "i915_timeline.h" 45 44 46 45 #define I915_GTT_PAGE_SIZE_4K BIT(12) 47 46 #define I915_GTT_PAGE_SIZE_64K BIT(16) ··· 256 257 257 258 struct i915_address_space { 258 259 struct drm_mm mm; 259 - struct i915_gem_timeline timeline; 260 260 struct drm_i915_private *i915; 261 261 struct device *dma; 262 262 /* Every address space belongs to a struct file - except for the global ··· 342 344 void (*clear_pages)(struct i915_vma *vma); 343 345 344 346 I915_SELFTEST_DECLARE(struct fault_attr fault_attr); 347 + I915_SELFTEST_DECLARE(bool scrub_64K); 345 348 }; 346 349 347 350 #define i915_is_ggtt(V) (!(V)->file)

+6 -9

drivers/gpu/drm/i915/i915_gem_stolen.c

··· 51 51 if (!drm_mm_initialized(&dev_priv->mm.stolen)) 52 52 return -ENODEV; 53 53 54 + /* WaSkipStolenMemoryFirstPage:bdw+ */ 55 + if (INTEL_GEN(dev_priv) >= 8 && start < 4096) 56 + start = 4096; 57 + 54 58 mutex_lock(&dev_priv->mm.stolen_lock); 55 59 ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, 56 60 size, alignment, 0, ··· 347 343 { 348 344 resource_size_t reserved_base, stolen_top; 349 345 resource_size_t reserved_total, reserved_size; 350 - resource_size_t stolen_usable_start; 351 346 352 347 mutex_init(&dev_priv->mm.stolen_lock); 353 348 ··· 438 435 (u64)resource_size(&dev_priv->dsm) >> 10, 439 436 ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); 440 437 441 - stolen_usable_start = 0; 442 - /* WaSkipStolenMemoryFirstPage:bdw+ */ 443 - if (INTEL_GEN(dev_priv) >= 8) 444 - stolen_usable_start = 4096; 445 - 446 438 dev_priv->stolen_usable_size = 447 - resource_size(&dev_priv->dsm) - reserved_total - stolen_usable_start; 439 + resource_size(&dev_priv->dsm) - reserved_total; 448 440 449 441 /* Basic memrange allocator for stolen space. */ 450 - drm_mm_init(&dev_priv->mm.stolen, stolen_usable_start, 451 - dev_priv->stolen_usable_size); 442 + drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size); 452 443 453 444 return 0; 454 445 }

-154

drivers/gpu/drm/i915/i915_gem_timeline.c

··· 1 - /* 2 - * Copyright © 2016 Intel Corporation 3 - * 4 - * Permission is hereby granted, free of charge, to any person obtaining a 5 - * copy of this software and associated documentation files (the "Software"), 6 - * to deal in the Software without restriction, including without limitation 7 - * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 - * and/or sell copies of the Software, and to permit persons to whom the 9 - * Software is furnished to do so, subject to the following conditions: 10 - * 11 - * The above copyright notice and this permission notice (including the next 12 - * paragraph) shall be included in all copies or substantial portions of the 13 - * Software. 14 - * 15 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 - * IN THE SOFTWARE. 22 - * 23 - */ 24 - 25 - #include "i915_drv.h" 26 - #include "i915_syncmap.h" 27 - 28 - static void __intel_timeline_init(struct intel_timeline *tl, 29 - struct i915_gem_timeline *parent, 30 - u64 context, 31 - struct lock_class_key *lockclass, 32 - const char *lockname) 33 - { 34 - tl->fence_context = context; 35 - tl->common = parent; 36 - spin_lock_init(&tl->lock); 37 - lockdep_set_class_and_name(&tl->lock, lockclass, lockname); 38 - init_request_active(&tl->last_request, NULL); 39 - INIT_LIST_HEAD(&tl->requests); 40 - i915_syncmap_init(&tl->sync); 41 - } 42 - 43 - static void __intel_timeline_fini(struct intel_timeline *tl) 44 - { 45 - GEM_BUG_ON(!list_empty(&tl->requests)); 46 - 47 - i915_syncmap_free(&tl->sync); 48 - } 49 - 50 - static int __i915_gem_timeline_init(struct drm_i915_private *i915, 51 - struct i915_gem_timeline *timeline, 52 - const char *name, 53 - struct lock_class_key *lockclass, 54 - const char *lockname) 55 - { 56 - unsigned int i; 57 - u64 fences; 58 - 59 - lockdep_assert_held(&i915->drm.struct_mutex); 60 - 61 - /* 62 - * Ideally we want a set of engines on a single leaf as we expect 63 - * to mostly be tracking synchronisation between engines. It is not 64 - * a huge issue if this is not the case, but we may want to mitigate 65 - * any page crossing penalties if they become an issue. 66 - */ 67 - BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); 68 - 69 - timeline->i915 = i915; 70 - timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL); 71 - if (!timeline->name) 72 - return -ENOMEM; 73 - 74 - list_add(&timeline->link, &i915->gt.timelines); 75 - 76 - /* Called during early_init before we know how many engines there are */ 77 - fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine)); 78 - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) 79 - __intel_timeline_init(&timeline->engine[i], 80 - timeline, fences++, 81 - lockclass, lockname); 82 - 83 - return 0; 84 - } 85 - 86 - int i915_gem_timeline_init(struct drm_i915_private *i915, 87 - struct i915_gem_timeline *timeline, 88 - const char *name) 89 - { 90 - static struct lock_class_key class; 91 - 92 - return __i915_gem_timeline_init(i915, timeline, name, 93 - &class, "&timeline->lock"); 94 - } 95 - 96 - int i915_gem_timeline_init__global(struct drm_i915_private *i915) 97 - { 98 - static struct lock_class_key class; 99 - 100 - return __i915_gem_timeline_init(i915, 101 - &i915->gt.global_timeline, 102 - "[execution]", 103 - &class, "&global_timeline->lock"); 104 - } 105 - 106 - /** 107 - * i915_gem_timelines_park - called when the driver idles 108 - * @i915: the drm_i915_private device 109 - * 110 - * When the driver is completely idle, we know that all of our sync points 111 - * have been signaled and our tracking is then entirely redundant. Any request 112 - * to wait upon an older sync point will be completed instantly as we know 113 - * the fence is signaled and therefore we will not even look them up in the 114 - * sync point map. 115 - */ 116 - void i915_gem_timelines_park(struct drm_i915_private *i915) 117 - { 118 - struct i915_gem_timeline *timeline; 119 - int i; 120 - 121 - lockdep_assert_held(&i915->drm.struct_mutex); 122 - 123 - list_for_each_entry(timeline, &i915->gt.timelines, link) { 124 - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { 125 - struct intel_timeline *tl = &timeline->engine[i]; 126 - 127 - /* 128 - * All known fences are completed so we can scrap 129 - * the current sync point tracking and start afresh, 130 - * any attempt to wait upon a previous sync point 131 - * will be skipped as the fence was signaled. 132 - */ 133 - i915_syncmap_free(&tl->sync); 134 - } 135 - } 136 - } 137 - 138 - void i915_gem_timeline_fini(struct i915_gem_timeline *timeline) 139 - { 140 - int i; 141 - 142 - lockdep_assert_held(&timeline->i915->drm.struct_mutex); 143 - 144 - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) 145 - __intel_timeline_fini(&timeline->engine[i]); 146 - 147 - list_del(&timeline->link); 148 - kfree(timeline->name); 149 - } 150 - 151 - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 152 - #include "selftests/mock_timeline.c" 153 - #include "selftests/i915_gem_timeline.c" 154 - #endif

+37 -34

drivers/gpu/drm/i915/i915_gem_timeline.h drivers/gpu/drm/i915/i915_timeline.h

··· 22 22 * 23 23 */ 24 24 25 - #ifndef I915_GEM_TIMELINE_H 26 - #define I915_GEM_TIMELINE_H 25 + #ifndef I915_TIMELINE_H 26 + #define I915_TIMELINE_H 27 27 28 28 #include <linux/list.h> 29 + #include <linux/kref.h> 29 30 30 31 #include "i915_request.h" 31 32 #include "i915_syncmap.h" 32 33 #include "i915_utils.h" 33 34 34 - struct i915_gem_timeline; 35 - 36 - struct intel_timeline { 35 + struct i915_timeline { 37 36 u64 fence_context; 38 37 u32 seqno; 39 - 40 - /** 41 - * Count of outstanding requests, from the time they are constructed 42 - * to the moment they are retired. Loosely coupled to hardware. 43 - */ 44 - u32 inflight_seqnos; 45 38 46 39 spinlock_t lock; 47 40 ··· 70 77 */ 71 78 u32 global_sync[I915_NUM_ENGINES]; 72 79 73 - struct i915_gem_timeline *common; 74 - }; 75 - 76 - struct i915_gem_timeline { 77 80 struct list_head link; 78 - 79 - struct drm_i915_private *i915; 80 81 const char *name; 81 82 82 - struct intel_timeline engine[I915_NUM_ENGINES]; 83 + struct kref kref; 83 84 }; 84 85 85 - int i915_gem_timeline_init(struct drm_i915_private *i915, 86 - struct i915_gem_timeline *tl, 87 - const char *name); 88 - int i915_gem_timeline_init__global(struct drm_i915_private *i915); 89 - void i915_gem_timelines_park(struct drm_i915_private *i915); 90 - void i915_gem_timeline_fini(struct i915_gem_timeline *tl); 86 + void i915_timeline_init(struct drm_i915_private *i915, 87 + struct i915_timeline *tl, 88 + const char *name); 89 + void i915_timeline_fini(struct i915_timeline *tl); 91 90 92 - static inline int __intel_timeline_sync_set(struct intel_timeline *tl, 93 - u64 context, u32 seqno) 91 + struct i915_timeline * 92 + i915_timeline_create(struct drm_i915_private *i915, const char *name); 93 + 94 + static inline struct i915_timeline * 95 + i915_timeline_get(struct i915_timeline *timeline) 96 + { 97 + kref_get(&timeline->kref); 98 + return timeline; 99 + } 100 + 101 + void __i915_timeline_free(struct kref *kref); 102 + static inline void i915_timeline_put(struct i915_timeline *timeline) 103 + { 104 + kref_put(&timeline->kref, __i915_timeline_free); 105 + } 106 + 107 + static inline int __i915_timeline_sync_set(struct i915_timeline *tl, 108 + u64 context, u32 seqno) 94 109 { 95 110 return i915_syncmap_set(&tl->sync, context, seqno); 96 111 } 97 112 98 - static inline int intel_timeline_sync_set(struct intel_timeline *tl, 99 - const struct dma_fence *fence) 113 + static inline int i915_timeline_sync_set(struct i915_timeline *tl, 114 + const struct dma_fence *fence) 100 115 { 101 - return __intel_timeline_sync_set(tl, fence->context, fence->seqno); 116 + return __i915_timeline_sync_set(tl, fence->context, fence->seqno); 102 117 } 103 118 104 - static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl, 105 - u64 context, u32 seqno) 119 + static inline bool __i915_timeline_sync_is_later(struct i915_timeline *tl, 120 + u64 context, u32 seqno) 106 121 { 107 122 return i915_syncmap_is_later(&tl->sync, context, seqno); 108 123 } 109 124 110 - static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, 111 - const struct dma_fence *fence) 125 + static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, 126 + const struct dma_fence *fence) 112 127 { 113 - return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); 128 + return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); 114 129 } 130 + 131 + void i915_timelines_park(struct drm_i915_private *i915); 115 132 116 133 #endif

+3

drivers/gpu/drm/i915/i915_gem_userptr.c

··· 778 778 I915_USERPTR_UNSYNCHRONIZED)) 779 779 return -EINVAL; 780 780 781 + if (!args->user_size) 782 + return -EINVAL; 783 + 781 784 if (offset_in_page(args->user_ptr | args->user_size)) 782 785 return -EINVAL; 783 786

+48 -17

drivers/gpu/drm/i915/i915_gpu_error.c

··· 404 404 405 405 static void error_print_request(struct drm_i915_error_state_buf *m, 406 406 const char *prefix, 407 - const struct drm_i915_error_request *erq) 407 + const struct drm_i915_error_request *erq, 408 + const unsigned long epoch) 408 409 { 409 410 if (!erq->seqno) 410 411 return; 411 412 412 - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n", 413 + err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", 413 414 prefix, erq->pid, erq->ban_score, 414 - erq->context, erq->seqno, erq->priority, 415 - jiffies_to_msecs(jiffies - erq->jiffies), 416 - erq->head, erq->tail); 415 + erq->context, erq->seqno, erq->sched_attr.priority, 416 + jiffies_to_msecs(erq->jiffies - epoch), 417 + erq->start, erq->head, erq->tail); 417 418 } 418 419 419 420 static void error_print_context(struct drm_i915_error_state_buf *m, ··· 423 422 { 424 423 err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n", 425 424 header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, 426 - ctx->priority, ctx->ban_score, bannable(ctx), 425 + ctx->sched_attr.priority, ctx->ban_score, bannable(ctx), 427 426 ctx->guilty, ctx->active); 428 427 } 429 428 430 429 static void error_print_engine(struct drm_i915_error_state_buf *m, 431 - const struct drm_i915_error_engine *ee) 430 + const struct drm_i915_error_engine *ee, 431 + const unsigned long epoch) 432 432 { 433 433 int n; 434 434 ··· 499 497 err_printf(m, " hangcheck stall: %s\n", yesno(ee->hangcheck_stalled)); 500 498 err_printf(m, " hangcheck action: %s\n", 501 499 hangcheck_action_to_str(ee->hangcheck_action)); 502 - err_printf(m, " hangcheck action timestamp: %lu, %u ms ago\n", 500 + err_printf(m, " hangcheck action timestamp: %dms (%lu%s)\n", 501 + jiffies_to_msecs(ee->hangcheck_timestamp - epoch), 503 502 ee->hangcheck_timestamp, 504 - jiffies_to_msecs(jiffies - ee->hangcheck_timestamp)); 503 + ee->hangcheck_timestamp == epoch ? "; epoch" : ""); 505 504 err_printf(m, " engine reset count: %u\n", ee->reset_count); 506 505 507 506 for (n = 0; n < ee->num_ports; n++) { 508 507 err_printf(m, " ELSP[%d]:", n); 509 - error_print_request(m, " ", &ee->execlist[n]); 508 + error_print_request(m, " ", &ee->execlist[n], epoch); 510 509 } 511 510 512 511 error_print_context(m, " Active context: ", &ee->context); ··· 653 650 ts = ktime_to_timespec64(error->uptime); 654 651 err_printf(m, "Uptime: %lld s %ld us\n", 655 652 (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); 653 + err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ); 654 + err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n", 655 + error->capture, 656 + jiffies_to_msecs(jiffies - error->capture), 657 + jiffies_to_msecs(error->capture - error->epoch)); 656 658 657 659 for (i = 0; i < ARRAY_SIZE(error->engine); i++) { 658 660 if (error->engine[i].hangcheck_stalled && ··· 718 710 719 711 for (i = 0; i < ARRAY_SIZE(error->engine); i++) { 720 712 if (error->engine[i].engine_id != -1) 721 - error_print_engine(m, &error->engine[i]); 713 + error_print_engine(m, &error->engine[i], error->epoch); 722 714 } 723 715 724 716 for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) { ··· 777 769 dev_priv->engine[i]->name, 778 770 ee->num_requests); 779 771 for (j = 0; j < ee->num_requests; j++) 780 - error_print_request(m, " ", &ee->requests[j]); 772 + error_print_request(m, " ", 773 + &ee->requests[j], 774 + error->epoch); 781 775 } 782 776 783 777 if (IS_ERR(ee->waiters)) { ··· 1288 1278 struct drm_i915_error_request *erq) 1289 1279 { 1290 1280 erq->context = request->ctx->hw_id; 1291 - erq->priority = request->priotree.priority; 1281 + erq->sched_attr = request->sched.attr; 1292 1282 erq->ban_score = atomic_read(&request->ctx->ban_score); 1293 1283 erq->seqno = request->global_seqno; 1294 1284 erq->jiffies = request->emitted_jiffies; 1285 + erq->start = i915_ggtt_offset(request->ring->vma); 1295 1286 erq->head = request->head; 1296 1287 erq->tail = request->tail; 1297 1288 ··· 1310 1299 1311 1300 count = 0; 1312 1301 request = first; 1313 - list_for_each_entry_from(request, &engine->timeline->requests, link) 1302 + list_for_each_entry_from(request, &engine->timeline.requests, link) 1314 1303 count++; 1315 1304 if (!count) 1316 1305 return; ··· 1323 1312 1324 1313 count = 0; 1325 1314 request = first; 1326 - list_for_each_entry_from(request, &engine->timeline->requests, link) { 1315 + list_for_each_entry_from(request, &engine->timeline.requests, link) { 1327 1316 if (count >= ee->num_requests) { 1328 1317 /* 1329 1318 * If the ring request list was changed in ··· 1383 1372 1384 1373 e->handle = ctx->user_handle; 1385 1374 e->hw_id = ctx->hw_id; 1386 - e->priority = ctx->priority; 1375 + e->sched_attr = ctx->sched; 1387 1376 e->ban_score = atomic_read(&ctx->ban_score); 1388 1377 e->bannable = i915_gem_context_is_bannable(ctx); 1389 1378 e->guilty = atomic_read(&ctx->guilty_count); ··· 1483 1472 1484 1473 ee->ctx = 1485 1474 i915_error_object_create(i915, 1486 - request->ctx->engine[i].state); 1475 + to_intel_context(request->ctx, 1476 + engine)->state); 1487 1477 1488 1478 error->simulated |= 1489 1479 i915_gem_context_no_error_capture(request->ctx); ··· 1747 1735 #undef DUP 1748 1736 } 1749 1737 1738 + static unsigned long capture_find_epoch(const struct i915_gpu_state *error) 1739 + { 1740 + unsigned long epoch = error->capture; 1741 + int i; 1742 + 1743 + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { 1744 + const struct drm_i915_error_engine *ee = &error->engine[i]; 1745 + 1746 + if (ee->hangcheck_stalled && 1747 + time_before(ee->hangcheck_timestamp, epoch)) 1748 + epoch = ee->hangcheck_timestamp; 1749 + } 1750 + 1751 + return epoch; 1752 + } 1753 + 1750 1754 static int capture(void *data) 1751 1755 { 1752 1756 struct i915_gpu_state *error = data; ··· 1771 1743 error->boottime = ktime_get_boottime(); 1772 1744 error->uptime = ktime_sub(ktime_get(), 1773 1745 error->i915->gt.last_init_time); 1746 + error->capture = jiffies; 1774 1747 1775 1748 capture_params(error); 1776 1749 capture_gen_state(error); ··· 1784 1755 1785 1756 error->overlay = intel_overlay_capture_error_state(error->i915); 1786 1757 error->display = intel_display_capture_error_state(error->i915); 1758 + 1759 + error->epoch = capture_find_epoch(error); 1787 1760 1788 1761 return 0; 1789 1762 }

+6 -2

drivers/gpu/drm/i915/i915_gpu_error.h

··· 20 20 #include "i915_gem.h" 21 21 #include "i915_gem_gtt.h" 22 22 #include "i915_params.h" 23 + #include "i915_scheduler.h" 23 24 24 25 struct drm_i915_private; 25 26 struct intel_overlay_error_state; ··· 31 30 ktime_t time; 32 31 ktime_t boottime; 33 32 ktime_t uptime; 33 + unsigned long capture; 34 + unsigned long epoch; 34 35 35 36 struct drm_i915_private *i915; 36 37 ··· 125 122 pid_t pid; 126 123 u32 handle; 127 124 u32 hw_id; 128 - int priority; 129 125 int ban_score; 130 126 int active; 131 127 int guilty; 132 128 bool bannable; 129 + struct i915_sched_attr sched_attr; 133 130 } context; 134 131 135 132 struct drm_i915_error_object { ··· 150 147 long jiffies; 151 148 pid_t pid; 152 149 u32 context; 153 - int priority; 154 150 int ban_score; 155 151 u32 seqno; 152 + u32 start; 156 153 u32 head; 157 154 u32 tail; 155 + struct i915_sched_attr sched_attr; 158 156 } *requests, execlist[EXECLIST_MAX_PORTS]; 159 157 unsigned int num_ports; 160 158

+44 -6

drivers/gpu/drm/i915/i915_irq.c

··· 247 247 gen11_gt_engine_identity(struct drm_i915_private * const i915, 248 248 const unsigned int bank, const unsigned int bit); 249 249 250 - static bool gen11_reset_one_iir(struct drm_i915_private * const i915, 251 - const unsigned int bank, 252 - const unsigned int bit) 250 + bool gen11_reset_one_iir(struct drm_i915_private * const i915, 251 + const unsigned int bank, 252 + const unsigned int bit) 253 253 { 254 254 void __iomem * const regs = i915->regs; 255 255 u32 dw; ··· 2464 2464 if (de_iir & DE_ERR_INT_IVB) 2465 2465 ivb_err_int_handler(dev_priv); 2466 2466 2467 + if (de_iir & DE_EDP_PSR_INT_HSW) { 2468 + u32 psr_iir = I915_READ(EDP_PSR_IIR); 2469 + 2470 + intel_psr_irq_handler(dev_priv, psr_iir); 2471 + I915_WRITE(EDP_PSR_IIR, psr_iir); 2472 + } 2473 + 2467 2474 if (de_iir & DE_AUX_CHANNEL_A_IVB) 2468 2475 dp_aux_irq_handler(dev_priv); 2469 2476 ··· 2600 2593 if (master_ctl & GEN8_DE_MISC_IRQ) { 2601 2594 iir = I915_READ(GEN8_DE_MISC_IIR); 2602 2595 if (iir) { 2596 + bool found = false; 2597 + 2603 2598 I915_WRITE(GEN8_DE_MISC_IIR, iir); 2604 2599 ret = IRQ_HANDLED; 2605 - if (iir & GEN8_DE_MISC_GSE) 2600 + 2601 + if (iir & GEN8_DE_MISC_GSE) { 2606 2602 intel_opregion_asle_intr(dev_priv); 2607 - else 2603 + found = true; 2604 + } 2605 + 2606 + if (iir & GEN8_DE_EDP_PSR) { 2607 + u32 psr_iir = I915_READ(EDP_PSR_IIR); 2608 + 2609 + intel_psr_irq_handler(dev_priv, psr_iir); 2610 + I915_WRITE(EDP_PSR_IIR, psr_iir); 2611 + found = true; 2612 + } 2613 + 2614 + if (!found) 2608 2615 DRM_ERROR("Unexpected DE Misc interrupt\n"); 2609 2616 } 2610 2617 else ··· 3369 3348 if (IS_GEN7(dev_priv)) 3370 3349 I915_WRITE(GEN7_ERR_INT, 0xffffffff); 3371 3350 3351 + if (IS_HASWELL(dev_priv)) { 3352 + I915_WRITE(EDP_PSR_IMR, 0xffffffff); 3353 + I915_WRITE(EDP_PSR_IIR, 0xffffffff); 3354 + } 3355 + 3372 3356 gen5_gt_irq_reset(dev_priv); 3373 3357 3374 3358 ibx_irq_reset(dev_priv); ··· 3411 3385 POSTING_READ(GEN8_MASTER_IRQ); 3412 3386 3413 3387 gen8_gt_irq_reset(dev_priv); 3388 + 3389 + I915_WRITE(EDP_PSR_IMR, 0xffffffff); 3390 + I915_WRITE(EDP_PSR_IIR, 0xffffffff); 3414 3391 3415 3392 for_each_pipe(dev_priv, pipe) 3416 3393 if (intel_display_power_is_enabled(dev_priv, ··· 3791 3762 DE_DP_A_HOTPLUG); 3792 3763 } 3793 3764 3765 + if (IS_HASWELL(dev_priv)) { 3766 + gen3_assert_iir_is_zero(dev_priv, EDP_PSR_IIR); 3767 + intel_psr_irq_control(dev_priv, dev_priv->psr.debug); 3768 + display_mask |= DE_EDP_PSR_INT_HSW; 3769 + } 3770 + 3794 3771 dev_priv->irq_mask = ~display_mask; 3795 3772 3796 3773 ibx_irq_pre_postinstall(dev); ··· 3907 3872 uint32_t de_pipe_enables; 3908 3873 u32 de_port_masked = GEN8_AUX_CHANNEL_A; 3909 3874 u32 de_port_enables; 3910 - u32 de_misc_masked = GEN8_DE_MISC_GSE; 3875 + u32 de_misc_masked = GEN8_DE_MISC_GSE | GEN8_DE_EDP_PSR; 3911 3876 enum pipe pipe; 3912 3877 3913 3878 if (INTEL_GEN(dev_priv) >= 9) { ··· 3931 3896 de_port_enables |= BXT_DE_PORT_HOTPLUG_MASK; 3932 3897 else if (IS_BROADWELL(dev_priv)) 3933 3898 de_port_enables |= GEN8_PORT_DP_A_HOTPLUG; 3899 + 3900 + gen3_assert_iir_is_zero(dev_priv, EDP_PSR_IIR); 3901 + intel_psr_irq_control(dev_priv, dev_priv->psr.debug); 3934 3902 3935 3903 for_each_pipe(dev_priv, pipe) { 3936 3904 dev_priv->de_irq_mask[pipe] = ~de_pipe_masked;

+3

drivers/gpu/drm/i915/i915_params.c

··· 164 164 i915_param_named_unsafe(huc_firmware_path, charp, 0400, 165 165 "HuC firmware path to use instead of the default one"); 166 166 167 + i915_param_named_unsafe(dmc_firmware_path, charp, 0400, 168 + "DMC firmware path to use instead of the default one"); 169 + 167 170 i915_param_named_unsafe(enable_dp_mst, bool, 0600, 168 171 "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); 169 172

+1

drivers/gpu/drm/i915/i915_params.h

··· 51 51 param(int, guc_log_level, -1) \ 52 52 param(char *, guc_firmware_path, NULL) \ 53 53 param(char *, huc_firmware_path, NULL) \ 54 + param(char *, dmc_firmware_path, NULL) \ 54 55 param(int, mmio_debug, 0) \ 55 56 param(int, edp_vswing, 0) \ 56 57 param(int, reset, 2) \

+16 -11

drivers/gpu/drm/i915/i915_perf.c

··· 1234 1234 * 1235 1235 * NB: implied RCS engine... 1236 1236 */ 1237 - ring = engine->context_pin(engine, stream->ctx); 1237 + ring = intel_context_pin(stream->ctx, engine); 1238 1238 mutex_unlock(&dev_priv->drm.struct_mutex); 1239 1239 if (IS_ERR(ring)) 1240 1240 return PTR_ERR(ring); ··· 1246 1246 * with gen8+ and execlists 1247 1247 */ 1248 1248 dev_priv->perf.oa.specific_ctx_id = 1249 - i915_ggtt_offset(stream->ctx->engine[engine->id].state); 1249 + i915_ggtt_offset(to_intel_context(stream->ctx, engine)->state); 1250 1250 } 1251 1251 1252 1252 return 0; ··· 1271 1271 mutex_lock(&dev_priv->drm.struct_mutex); 1272 1272 1273 1273 dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; 1274 - engine->context_unpin(engine, stream->ctx); 1274 + intel_context_unpin(stream->ctx, engine); 1275 1275 1276 1276 mutex_unlock(&dev_priv->drm.struct_mutex); 1277 1277 } ··· 1695 1695 const struct i915_oa_config *oa_config) 1696 1696 { 1697 1697 struct intel_engine_cs *engine = dev_priv->engine[RCS]; 1698 - struct i915_gem_timeline *timeline; 1698 + struct i915_timeline *timeline; 1699 1699 struct i915_request *rq; 1700 1700 int ret; 1701 1701 ··· 1716 1716 /* Queue this switch after all other activity */ 1717 1717 list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { 1718 1718 struct i915_request *prev; 1719 - struct intel_timeline *tl; 1720 1719 1721 - tl = &timeline->engine[engine->id]; 1722 - prev = i915_gem_active_raw(&tl->last_request, 1720 + prev = i915_gem_active_raw(&timeline->last_request, 1723 1721 &dev_priv->drm.struct_mutex); 1724 1722 if (prev) 1725 - i915_sw_fence_await_sw_fence_gfp(&rq->submit, 1726 - &prev->submit, 1727 - GFP_KERNEL); 1723 + i915_request_await_dma_fence(rq, &prev->fence); 1728 1724 } 1729 1725 1730 1726 i915_request_add(rq); ··· 1755 1759 static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, 1756 1760 const struct i915_oa_config *oa_config) 1757 1761 { 1762 + struct intel_engine_cs *engine = dev_priv->engine[RCS]; 1758 1763 struct i915_gem_context *ctx; 1759 1764 int ret; 1760 1765 unsigned int wait_flags = I915_WAIT_LOCKED; ··· 1786 1789 1787 1790 /* Update all contexts now that we've stalled the submission. */ 1788 1791 list_for_each_entry(ctx, &dev_priv->contexts.list, link) { 1789 - struct intel_context *ce = &ctx->engine[RCS]; 1792 + struct intel_context *ce = to_intel_context(ctx, engine); 1790 1793 u32 *regs; 1791 1794 1792 1795 /* OA settings will be set upon first use */ ··· 1960 1963 static void gen7_oa_disable(struct drm_i915_private *dev_priv) 1961 1964 { 1962 1965 I915_WRITE(GEN7_OACONTROL, 0); 1966 + if (intel_wait_for_register(dev_priv, 1967 + GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0, 1968 + 50)) 1969 + DRM_ERROR("wait for OA to be disabled timed out\n"); 1963 1970 } 1964 1971 1965 1972 static void gen8_oa_disable(struct drm_i915_private *dev_priv) 1966 1973 { 1967 1974 I915_WRITE(GEN8_OACONTROL, 0); 1975 + if (intel_wait_for_register(dev_priv, 1976 + GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0, 1977 + 50)) 1978 + DRM_ERROR("wait for OA to be disabled timed out\n"); 1968 1979 } 1969 1980 1970 1981 /**

+231 -7

drivers/gpu/drm/i915/i915_reg.h

··· 3840 3840 #define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) 3841 3841 #define SARBUNIT_CLKGATE_DIS (1 << 5) 3842 3842 #define RCCUNIT_CLKGATE_DIS (1 << 7) 3843 + #define MSCUNIT_CLKGATE_DIS (1 << 10) 3843 3844 3844 3845 #define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) 3845 3846 #define GWUNIT_CLKGATE_DIS (1 << 16) 3846 3847 3847 3848 #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) 3848 3849 #define VFUNIT_CLKGATE_DIS (1 << 20) 3850 + 3851 + #define INF_UNIT_LEVEL_CLKGATE _MMIO(0x9560) 3852 + #define CGPSF_CLKGATE_DIS (1 << 3) 3849 3853 3850 3854 /* 3851 3855 * Display engine regs ··· 4030 4026 #define EDP_PSR_TP1_TIME_0us (3<<4) 4031 4027 #define EDP_PSR_IDLE_FRAME_SHIFT 0 4032 4028 4029 + /* Bspec claims those aren't shifted but stay at 0x64800 */ 4030 + #define EDP_PSR_IMR _MMIO(0x64834) 4031 + #define EDP_PSR_IIR _MMIO(0x64838) 4032 + #define EDP_PSR_ERROR(trans) (1 << (((trans) * 8 + 10) & 31)) 4033 + #define EDP_PSR_POST_EXIT(trans) (1 << (((trans) * 8 + 9) & 31)) 4034 + #define EDP_PSR_PRE_ENTRY(trans) (1 << (((trans) * 8 + 8) & 31)) 4035 + 4033 4036 #define EDP_PSR_AUX_CTL _MMIO(dev_priv->psr_mmio_base + 0x10) 4034 4037 #define EDP_PSR_AUX_CTL_TIME_OUT_MASK (3 << 26) 4035 4038 #define EDP_PSR_AUX_CTL_MESSAGE_SIZE_MASK (0x1f << 20) ··· 4098 4087 #define EDP_PSR2_FRAME_BEFORE_SU(a) ((a)<<4) 4099 4088 #define EDP_PSR2_IDLE_FRAME_MASK 0xf 4100 4089 #define EDP_PSR2_IDLE_FRAME_SHIFT 0 4090 + 4091 + #define _PSR_EVENT_TRANS_A 0x60848 4092 + #define _PSR_EVENT_TRANS_B 0x61848 4093 + #define _PSR_EVENT_TRANS_C 0x62848 4094 + #define _PSR_EVENT_TRANS_D 0x63848 4095 + #define _PSR_EVENT_TRANS_EDP 0x6F848 4096 + #define PSR_EVENT(trans) _MMIO_TRANS2(trans, _PSR_EVENT_TRANS_A) 4097 + #define PSR_EVENT_PSR2_WD_TIMER_EXPIRE (1 << 17) 4098 + #define PSR_EVENT_PSR2_DISABLED (1 << 16) 4099 + #define PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN (1 << 15) 4100 + #define PSR_EVENT_SU_CRC_FIFO_UNDERRUN (1 << 14) 4101 + #define PSR_EVENT_GRAPHICS_RESET (1 << 12) 4102 + #define PSR_EVENT_PCH_INTERRUPT (1 << 11) 4103 + #define PSR_EVENT_MEMORY_UP (1 << 10) 4104 + #define PSR_EVENT_FRONT_BUFFER_MODIFY (1 << 9) 4105 + #define PSR_EVENT_WD_TIMER_EXPIRE (1 << 8) 4106 + #define PSR_EVENT_PIPE_REGISTERS_UPDATE (1 << 6) 4107 + #define PSR_EVENT_REGISTER_UPDATE (1 << 5) 4108 + #define PSR_EVENT_HDCP_ENABLE (1 << 4) 4109 + #define PSR_EVENT_KVMR_SESSION_ENABLE (1 << 3) 4110 + #define PSR_EVENT_VBI_ENABLE (1 << 2) 4111 + #define PSR_EVENT_LPSP_MODE_EXIT (1 << 1) 4112 + #define PSR_EVENT_PSR_DISABLE (1 << 0) 4101 4113 4102 4114 #define EDP_PSR2_STATUS _MMIO(0x6f940) 4103 4115 #define EDP_PSR2_STATUS_STATE_MASK (0xf<<28) ··· 6411 6377 #define _PLANE_COLOR_CTL_1_A 0x701CC /* GLK+ */ 6412 6378 #define _PLANE_COLOR_CTL_2_A 0x702CC /* GLK+ */ 6413 6379 #define _PLANE_COLOR_CTL_3_A 0x703CC /* GLK+ */ 6414 - #define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) 6380 + #define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) /* Pre-ICL */ 6415 6381 #define PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE (1 << 28) 6416 - #define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) 6382 + #define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) /* Pre-ICL */ 6417 6383 #define PLANE_COLOR_CSC_MODE_BYPASS (0 << 17) 6418 6384 #define PLANE_COLOR_CSC_MODE_YUV601_TO_RGB709 (1 << 17) 6419 6385 #define PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709 (2 << 17) ··· 6508 6474 6509 6475 #define _PLANE_BUF_CFG_1_B 0x7127c 6510 6476 #define _PLANE_BUF_CFG_2_B 0x7137c 6477 + #define SKL_DDB_ENTRY_MASK 0x3FF 6478 + #define ICL_DDB_ENTRY_MASK 0x7FF 6479 + #define DDB_ENTRY_END_SHIFT 16 6511 6480 #define _PLANE_BUF_CFG_1(pipe) \ 6512 6481 _PIPE(pipe, _PLANE_BUF_CFG_1_A, _PLANE_BUF_CFG_1_B) 6513 6482 #define _PLANE_BUF_CFG_2(pipe) \ ··· 6874 6837 #define DE_PCH_EVENT_IVB (1<<28) 6875 6838 #define DE_DP_A_HOTPLUG_IVB (1<<27) 6876 6839 #define DE_AUX_CHANNEL_A_IVB (1<<26) 6840 + #define DE_EDP_PSR_INT_HSW (1<<19) 6877 6841 #define DE_SPRITEC_FLIP_DONE_IVB (1<<14) 6878 6842 #define DE_PLANEC_FLIP_DONE_IVB (1<<13) 6879 6843 #define DE_PIPEC_VBLANK_IVB (1<<10) ··· 6999 6961 #define GEN8_DE_MISC_IIR _MMIO(0x44468) 7000 6962 #define GEN8_DE_MISC_IER _MMIO(0x4446c) 7001 6963 #define GEN8_DE_MISC_GSE (1 << 27) 6964 + #define GEN8_DE_EDP_PSR (1 << 19) 7002 6965 7003 6966 #define GEN8_PCU_ISR _MMIO(0x444e0) 7004 6967 #define GEN8_PCU_IMR _MMIO(0x444e4) ··· 7230 7191 #define GEN7_L3CNTLREG3 _MMIO(0xB024) 7231 7192 7232 7193 #define GEN7_L3_CHICKEN_MODE_REGISTER _MMIO(0xB030) 7233 - #define GEN7_WA_L3_CHICKEN_MODE 0x20000000 7194 + #define GEN7_WA_L3_CHICKEN_MODE 0x20000000 7195 + #define GEN10_L3_CHICKEN_MODE_REGISTER _MMIO(0xB114) 7196 + #define GEN11_I2M_WRITE_DISABLE (1 << 28) 7234 7197 7235 7198 #define GEN7_L3SQCREG4 _MMIO(0xb034) 7236 7199 #define L3SQ_URB_READ_CAM_MATCH_DISABLE (1<<27) 7237 7200 7238 7201 #define GEN8_L3SQCREG4 _MMIO(0xb118) 7239 - #define GEN8_LQSC_RO_PERF_DIS (1<<27) 7240 - #define GEN8_LQSC_FLUSH_COHERENT_LINES (1<<21) 7202 + #define GEN11_LQSC_CLEAN_EVICT_DISABLE (1 << 6) 7203 + #define GEN8_LQSC_RO_PERF_DIS (1 << 27) 7204 + #define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21) 7241 7205 7242 7206 /* GEN8 chicken */ 7243 7207 #define HDC_CHICKEN0 _MMIO(0x7300) 7244 7208 #define CNL_HDC_CHICKEN0 _MMIO(0xE5F0) 7209 + #define ICL_HDC_MODE _MMIO(0xE5F4) 7245 7210 #define HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE (1<<15) 7246 7211 #define HDC_FENCE_DEST_SLM_DISABLE (1<<14) 7247 7212 #define HDC_DONOT_FETCH_MEM_WHEN_MASKED (1<<11) ··· 7258 7215 /* GEN9 chicken */ 7259 7216 #define SLICE_ECO_CHICKEN0 _MMIO(0x7308) 7260 7217 #define PIXEL_MASK_CAMMING_DISABLE (1 << 14) 7218 + 7219 + #define GEN9_WM_CHICKEN3 _MMIO(0x5588) 7220 + #define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) 7261 7221 7262 7222 /* WaCatErrorRejectionIssue */ 7263 7223 #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG _MMIO(0x9030) ··· 8260 8214 #define GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1<<4) 8261 8215 #define GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE (1<<6) 8262 8216 8263 - #define GEN8_GARBCNTL _MMIO(0xB004) 8264 - #define GEN9_GAPS_TSV_CREDIT_DISABLE (1<<7) 8217 + #define GEN8_GARBCNTL _MMIO(0xB004) 8218 + #define GEN9_GAPS_TSV_CREDIT_DISABLE (1 << 7) 8219 + #define GEN11_ARBITRATION_PRIO_ORDER_MASK (0x3f << 22) 8220 + #define GEN11_HASH_CTRL_EXCL_MASK (0x7f << 0) 8221 + #define GEN11_HASH_CTRL_EXCL_BIT0 (1 << 0) 8222 + 8223 + #define GEN11_GLBLINVL _MMIO(0xB404) 8224 + #define GEN11_BANK_HASH_ADDR_EXCL_MASK (0x7f << 5) 8225 + #define GEN11_BANK_HASH_ADDR_EXCL_BIT0 (1 << 5) 8226 + 8227 + #define GEN10_DFR_RATIO_EN_AND_CHICKEN _MMIO(0x9550) 8228 + #define DFR_DISABLE (1 << 9) 8229 + 8230 + #define GEN11_GACB_PERF_CTRL _MMIO(0x4B80) 8231 + #define GEN11_HASH_CTRL_MASK (0x3 << 12 | 0xf << 0) 8232 + #define GEN11_HASH_CTRL_BIT0 (1 << 0) 8233 + #define GEN11_HASH_CTRL_BIT4 (1 << 12) 8234 + 8235 + #define GEN11_LSN_UNSLCVC _MMIO(0xB43C) 8236 + #define GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC (1 << 9) 8237 + #define GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC (1 << 7) 8238 + 8239 + #define GAMW_ECO_DEV_RW_IA_REG _MMIO(0x4080) 8240 + #define GAMW_ECO_DEV_CTX_RELOAD_DISABLE (1 << 7) 8265 8241 8266 8242 /* IVYBRIDGE DPF */ 8267 8243 #define GEN7_L3CDERRST1(slice) _MMIO(0xB008 + (slice) * 0x200) /* L3CD Error Status 1 */ ··· 8792 8724 #define PORT_CLK_SEL_NONE (7<<29) 8793 8725 #define PORT_CLK_SEL_MASK (7<<29) 8794 8726 8727 + /* On ICL+ this is the same as PORT_CLK_SEL, but all bits change. */ 8728 + #define DDI_CLK_SEL(port) PORT_CLK_SEL(port) 8729 + #define DDI_CLK_SEL_NONE (0x0 << 28) 8730 + #define DDI_CLK_SEL_MG (0x8 << 28) 8731 + #define DDI_CLK_SEL_MASK (0xF << 28) 8732 + 8795 8733 /* Transcoder clock selection */ 8796 8734 #define _TRANS_CLK_SEL_A 0x46140 8797 8735 #define _TRANS_CLK_SEL_B 0x46144 ··· 8928 8854 * CNL Clocks 8929 8855 */ 8930 8856 #define DPCLKA_CFGCR0 _MMIO(0x6C200) 8857 + #define DPCLKA_CFGCR0_ICL _MMIO(0x164280) 8931 8858 #define DPCLKA_CFGCR0_DDI_CLK_OFF(port) (1 << ((port) == PORT_F ? 23 : \ 8932 8859 (port)+10)) 8933 8860 #define DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port) ((port) == PORT_F ? 21 : \ ··· 8945 8870 #define PLL_POWER_STATE (1 << 26) 8946 8871 #define CNL_DPLL_ENABLE(pll) _MMIO_PLL(pll, DPLL0_ENABLE, DPLL1_ENABLE) 8947 8872 8873 + #define _MG_PLL1_ENABLE 0x46030 8874 + #define _MG_PLL2_ENABLE 0x46034 8875 + #define _MG_PLL3_ENABLE 0x46038 8876 + #define _MG_PLL4_ENABLE 0x4603C 8877 + /* Bits are the same as DPLL0_ENABLE */ 8878 + #define MG_PLL_ENABLE(port) _MMIO_PORT((port) - PORT_C, _MG_PLL1_ENABLE, \ 8879 + _MG_PLL2_ENABLE) 8880 + 8881 + #define _MG_REFCLKIN_CTL_PORT1 0x16892C 8882 + #define _MG_REFCLKIN_CTL_PORT2 0x16992C 8883 + #define _MG_REFCLKIN_CTL_PORT3 0x16A92C 8884 + #define _MG_REFCLKIN_CTL_PORT4 0x16B92C 8885 + #define MG_REFCLKIN_CTL_OD_2_MUX(x) ((x) << 8) 8886 + #define MG_REFCLKIN_CTL(port) _MMIO_PORT((port) - PORT_C, \ 8887 + _MG_REFCLKIN_CTL_PORT1, \ 8888 + _MG_REFCLKIN_CTL_PORT2) 8889 + 8890 + #define _MG_CLKTOP2_CORECLKCTL1_PORT1 0x1688D8 8891 + #define _MG_CLKTOP2_CORECLKCTL1_PORT2 0x1698D8 8892 + #define _MG_CLKTOP2_CORECLKCTL1_PORT3 0x16A8D8 8893 + #define _MG_CLKTOP2_CORECLKCTL1_PORT4 0x16B8D8 8894 + #define MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO(x) ((x) << 16) 8895 + #define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x) ((x) << 8) 8896 + #define MG_CLKTOP2_CORECLKCTL1(port) _MMIO_PORT((port) - PORT_C, \ 8897 + _MG_CLKTOP2_CORECLKCTL1_PORT1, \ 8898 + _MG_CLKTOP2_CORECLKCTL1_PORT2) 8899 + 8900 + #define _MG_CLKTOP2_HSCLKCTL_PORT1 0x1688D4 8901 + #define _MG_CLKTOP2_HSCLKCTL_PORT2 0x1698D4 8902 + #define _MG_CLKTOP2_HSCLKCTL_PORT3 0x16A8D4 8903 + #define _MG_CLKTOP2_HSCLKCTL_PORT4 0x16B8D4 8904 + #define MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL(x) ((x) << 16) 8905 + #define MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL(x) ((x) << 14) 8906 + #define MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO(x) ((x) << 12) 8907 + #define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x) ((x) << 8) 8908 + #define MG_CLKTOP2_HSCLKCTL(port) _MMIO_PORT((port) - PORT_C, \ 8909 + _MG_CLKTOP2_HSCLKCTL_PORT1, \ 8910 + _MG_CLKTOP2_HSCLKCTL_PORT2) 8911 + 8912 + #define _MG_PLL_DIV0_PORT1 0x168A00 8913 + #define _MG_PLL_DIV0_PORT2 0x169A00 8914 + #define _MG_PLL_DIV0_PORT3 0x16AA00 8915 + #define _MG_PLL_DIV0_PORT4 0x16BA00 8916 + #define MG_PLL_DIV0_FRACNEN_H (1 << 30) 8917 + #define MG_PLL_DIV0_FBDIV_FRAC(x) ((x) << 8) 8918 + #define MG_PLL_DIV0_FBDIV_INT(x) ((x) << 0) 8919 + #define MG_PLL_DIV0(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV0_PORT1, \ 8920 + _MG_PLL_DIV0_PORT2) 8921 + 8922 + #define _MG_PLL_DIV1_PORT1 0x168A04 8923 + #define _MG_PLL_DIV1_PORT2 0x169A04 8924 + #define _MG_PLL_DIV1_PORT3 0x16AA04 8925 + #define _MG_PLL_DIV1_PORT4 0x16BA04 8926 + #define MG_PLL_DIV1_IREF_NDIVRATIO(x) ((x) << 16) 8927 + #define MG_PLL_DIV1_DITHER_DIV_1 (0 << 12) 8928 + #define MG_PLL_DIV1_DITHER_DIV_2 (1 << 12) 8929 + #define MG_PLL_DIV1_DITHER_DIV_4 (2 << 12) 8930 + #define MG_PLL_DIV1_DITHER_DIV_8 (3 << 12) 8931 + #define MG_PLL_DIV1_NDIVRATIO(x) ((x) << 4) 8932 + #define MG_PLL_DIV1_FBPREDIV(x) ((x) << 0) 8933 + #define MG_PLL_DIV1(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV1_PORT1, \ 8934 + _MG_PLL_DIV1_PORT2) 8935 + 8936 + #define _MG_PLL_LF_PORT1 0x168A08 8937 + #define _MG_PLL_LF_PORT2 0x169A08 8938 + #define _MG_PLL_LF_PORT3 0x16AA08 8939 + #define _MG_PLL_LF_PORT4 0x16BA08 8940 + #define MG_PLL_LF_TDCTARGETCNT(x) ((x) << 24) 8941 + #define MG_PLL_LF_AFCCNTSEL_256 (0 << 20) 8942 + #define MG_PLL_LF_AFCCNTSEL_512 (1 << 20) 8943 + #define MG_PLL_LF_GAINCTRL(x) ((x) << 16) 8944 + #define MG_PLL_LF_INT_COEFF(x) ((x) << 8) 8945 + #define MG_PLL_LF_PROP_COEFF(x) ((x) << 0) 8946 + #define MG_PLL_LF(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_LF_PORT1, \ 8947 + _MG_PLL_LF_PORT2) 8948 + 8949 + #define _MG_PLL_FRAC_LOCK_PORT1 0x168A0C 8950 + #define _MG_PLL_FRAC_LOCK_PORT2 0x169A0C 8951 + #define _MG_PLL_FRAC_LOCK_PORT3 0x16AA0C 8952 + #define _MG_PLL_FRAC_LOCK_PORT4 0x16BA0C 8953 + #define MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 (1 << 18) 8954 + #define MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 (1 << 16) 8955 + #define MG_PLL_FRAC_LOCK_LOCKTHRESH(x) ((x) << 11) 8956 + #define MG_PLL_FRAC_LOCK_DCODITHEREN (1 << 10) 8957 + #define MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN (1 << 8) 8958 + #define MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(x) ((x) << 0) 8959 + #define MG_PLL_FRAC_LOCK(port) _MMIO_PORT((port) - PORT_C, \ 8960 + _MG_PLL_FRAC_LOCK_PORT1, \ 8961 + _MG_PLL_FRAC_LOCK_PORT2) 8962 + 8963 + #define _MG_PLL_SSC_PORT1 0x168A10 8964 + #define _MG_PLL_SSC_PORT2 0x169A10 8965 + #define _MG_PLL_SSC_PORT3 0x16AA10 8966 + #define _MG_PLL_SSC_PORT4 0x16BA10 8967 + #define MG_PLL_SSC_EN (1 << 28) 8968 + #define MG_PLL_SSC_TYPE(x) ((x) << 26) 8969 + #define MG_PLL_SSC_STEPLENGTH(x) ((x) << 16) 8970 + #define MG_PLL_SSC_STEPNUM(x) ((x) << 10) 8971 + #define MG_PLL_SSC_FLLEN (1 << 9) 8972 + #define MG_PLL_SSC_STEPSIZE(x) ((x) << 0) 8973 + #define MG_PLL_SSC(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_SSC_PORT1, \ 8974 + _MG_PLL_SSC_PORT2) 8975 + 8976 + #define _MG_PLL_BIAS_PORT1 0x168A14 8977 + #define _MG_PLL_BIAS_PORT2 0x169A14 8978 + #define _MG_PLL_BIAS_PORT3 0x16AA14 8979 + #define _MG_PLL_BIAS_PORT4 0x16BA14 8980 + #define MG_PLL_BIAS_BIAS_GB_SEL(x) ((x) << 30) 8981 + #define MG_PLL_BIAS_INIT_DCOAMP(x) ((x) << 24) 8982 + #define MG_PLL_BIAS_BIAS_BONUS(x) ((x) << 16) 8983 + #define MG_PLL_BIAS_BIASCAL_EN (1 << 15) 8984 + #define MG_PLL_BIAS_CTRIM(x) ((x) << 8) 8985 + #define MG_PLL_BIAS_VREF_RDAC(x) ((x) << 5) 8986 + #define MG_PLL_BIAS_IREFTRIM(x) ((x) << 0) 8987 + #define MG_PLL_BIAS(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_BIAS_PORT1, \ 8988 + _MG_PLL_BIAS_PORT2) 8989 + 8990 + #define _MG_PLL_TDC_COLDST_BIAS_PORT1 0x168A18 8991 + #define _MG_PLL_TDC_COLDST_BIAS_PORT2 0x169A18 8992 + #define _MG_PLL_TDC_COLDST_BIAS_PORT3 0x16AA18 8993 + #define _MG_PLL_TDC_COLDST_BIAS_PORT4 0x16BA18 8994 + #define MG_PLL_TDC_COLDST_IREFINT_EN (1 << 27) 8995 + #define MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(x) ((x) << 17) 8996 + #define MG_PLL_TDC_COLDST_COLDSTART (1 << 16) 8997 + #define MG_PLL_TDC_TDCOVCCORR_EN (1 << 2) 8998 + #define MG_PLL_TDC_TDCSEL(x) ((x) << 0) 8999 + #define MG_PLL_TDC_COLDST_BIAS(port) _MMIO_PORT((port) - PORT_C, \ 9000 + _MG_PLL_TDC_COLDST_BIAS_PORT1, \ 9001 + _MG_PLL_TDC_COLDST_BIAS_PORT2) 9002 + 8948 9003 #define _CNL_DPLL0_CFGCR0 0x6C000 8949 9004 #define _CNL_DPLL1_CFGCR0 0x6C080 8950 9005 #define DPLL_CFGCR0_HDMI_MODE (1 << 30) 8951 9006 #define DPLL_CFGCR0_SSC_ENABLE (1 << 29) 9007 + #define DPLL_CFGCR0_SSC_ENABLE_ICL (1 << 25) 8952 9008 #define DPLL_CFGCR0_LINK_RATE_MASK (0xf << 25) 8953 9009 #define DPLL_CFGCR0_LINK_RATE_2700 (0 << 25) 8954 9010 #define DPLL_CFGCR0_LINK_RATE_1350 (1 << 25) ··· 9113 8907 #define DPLL_CFGCR1_PDIV_5 (4 << 2) 9114 8908 #define DPLL_CFGCR1_PDIV_7 (8 << 2) 9115 8909 #define DPLL_CFGCR1_CENTRAL_FREQ (3 << 0) 8910 + #define DPLL_CFGCR1_CENTRAL_FREQ_8400 (3 << 0) 9116 8911 #define CNL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _CNL_DPLL0_CFGCR1, _CNL_DPLL1_CFGCR1) 8912 + 8913 + #define _ICL_DPLL0_CFGCR0 0x164000 8914 + #define _ICL_DPLL1_CFGCR0 0x164080 8915 + #define ICL_DPLL_CFGCR0(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR0, \ 8916 + _ICL_DPLL1_CFGCR0) 8917 + 8918 + #define _ICL_DPLL0_CFGCR1 0x164004 8919 + #define _ICL_DPLL1_CFGCR1 0x164084 8920 + #define ICL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR1, \ 8921 + _ICL_DPLL1_CFGCR1) 9117 8922 9118 8923 /* BXT display engine PLL */ 9119 8924 #define BXT_DE_PLL_CTL _MMIO(0x6d000) ··· 9897 9680 #define GEN9_MFX1_MOCS(i) _MMIO(0xca00 + (i) * 4) /* Media 1 MOCS registers */ 9898 9681 #define GEN9_VEBOX_MOCS(i) _MMIO(0xcb00 + (i) * 4) /* Video MOCS registers */ 9899 9682 #define GEN9_BLT_MOCS(i) _MMIO(0xcc00 + (i) * 4) /* Blitter MOCS registers */ 9683 + /* Media decoder 2 MOCS registers */ 9684 + #define GEN11_MFX2_MOCS(i) _MMIO(0x10000 + (i) * 4) 9685 + 9686 + #define GEN10_SCRATCH_LNCF2 _MMIO(0xb0a0) 9687 + #define PMFLUSHDONE_LNICRSDROP (1 << 20) 9688 + #define PMFLUSH_GAPL3UNBLOCK (1 << 21) 9689 + #define PMFLUSHDONE_LNEBLK (1 << 22) 9900 9690 9901 9691 /* gamt regs */ 9902 9692 #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4)

+191 -150

drivers/gpu/drm/i915/i915_request.c

··· 49 49 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 50 50 return "signaled"; 51 51 52 - return to_request(fence)->timeline->common->name; 52 + return to_request(fence)->timeline->name; 53 53 } 54 54 55 55 static bool i915_fence_signaled(struct dma_fence *fence) ··· 125 125 } 126 126 127 127 static void 128 - __i915_priotree_add_dependency(struct i915_priotree *pt, 129 - struct i915_priotree *signal, 130 - struct i915_dependency *dep, 131 - unsigned long flags) 128 + __i915_sched_node_add_dependency(struct i915_sched_node *node, 129 + struct i915_sched_node *signal, 130 + struct i915_dependency *dep, 131 + unsigned long flags) 132 132 { 133 133 INIT_LIST_HEAD(&dep->dfs_link); 134 134 list_add(&dep->wait_link, &signal->waiters_list); 135 - list_add(&dep->signal_link, &pt->signalers_list); 135 + list_add(&dep->signal_link, &node->signalers_list); 136 136 dep->signaler = signal; 137 137 dep->flags = flags; 138 138 } 139 139 140 140 static int 141 - i915_priotree_add_dependency(struct drm_i915_private *i915, 142 - struct i915_priotree *pt, 143 - struct i915_priotree *signal) 141 + i915_sched_node_add_dependency(struct drm_i915_private *i915, 142 + struct i915_sched_node *node, 143 + struct i915_sched_node *signal) 144 144 { 145 145 struct i915_dependency *dep; 146 146 ··· 148 148 if (!dep) 149 149 return -ENOMEM; 150 150 151 - __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC); 151 + __i915_sched_node_add_dependency(node, signal, dep, 152 + I915_DEPENDENCY_ALLOC); 152 153 return 0; 153 154 } 154 155 155 156 static void 156 - i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) 157 + i915_sched_node_fini(struct drm_i915_private *i915, 158 + struct i915_sched_node *node) 157 159 { 158 - struct i915_dependency *dep, *next; 160 + struct i915_dependency *dep, *tmp; 159 161 160 - GEM_BUG_ON(!list_empty(&pt->link)); 162 + GEM_BUG_ON(!list_empty(&node->link)); 161 163 162 164 /* 163 165 * Everyone we depended upon (the fences we wait to be signaled) ··· 167 165 * However, retirement is run independently on each timeline and 168 166 * so we may be called out-of-order. 169 167 */ 170 - list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { 171 - GEM_BUG_ON(!i915_priotree_signaled(dep->signaler)); 168 + list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { 169 + GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler)); 172 170 GEM_BUG_ON(!list_empty(&dep->dfs_link)); 173 171 174 172 list_del(&dep->wait_link); ··· 177 175 } 178 176 179 177 /* Remove ourselves from everyone who depends upon us */ 180 - list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { 181 - GEM_BUG_ON(dep->signaler != pt); 178 + list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { 179 + GEM_BUG_ON(dep->signaler != node); 182 180 GEM_BUG_ON(!list_empty(&dep->dfs_link)); 183 181 184 182 list_del(&dep->signal_link); ··· 188 186 } 189 187 190 188 static void 191 - i915_priotree_init(struct i915_priotree *pt) 189 + i915_sched_node_init(struct i915_sched_node *node) 192 190 { 193 - INIT_LIST_HEAD(&pt->signalers_list); 194 - INIT_LIST_HEAD(&pt->waiters_list); 195 - INIT_LIST_HEAD(&pt->link); 196 - pt->priority = I915_PRIORITY_INVALID; 191 + INIT_LIST_HEAD(&node->signalers_list); 192 + INIT_LIST_HEAD(&node->waiters_list); 193 + INIT_LIST_HEAD(&node->link); 194 + node->attr.priority = I915_PRIORITY_INVALID; 197 195 } 198 196 199 197 static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) 200 198 { 201 199 struct intel_engine_cs *engine; 200 + struct i915_timeline *timeline; 202 201 enum intel_engine_id id; 203 202 int ret; 204 203 ··· 214 211 215 212 /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ 216 213 for_each_engine(engine, i915, id) { 217 - struct i915_gem_timeline *timeline; 218 - struct intel_timeline *tl = engine->timeline; 219 - 220 214 GEM_TRACE("%s seqno %d (current %d) -> %d\n", 221 215 engine->name, 222 - tl->seqno, 216 + engine->timeline.seqno, 223 217 intel_engine_get_seqno(engine), 224 218 seqno); 225 219 226 - if (!i915_seqno_passed(seqno, tl->seqno)) { 220 + if (!i915_seqno_passed(seqno, engine->timeline.seqno)) { 227 221 /* Flush any waiters before we reuse the seqno */ 228 222 intel_engine_disarm_breadcrumbs(engine); 223 + intel_engine_init_hangcheck(engine); 229 224 GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); 230 225 } 231 226 232 227 /* Check we are idle before we fiddle with hw state! */ 233 228 GEM_BUG_ON(!intel_engine_is_idle(engine)); 234 - GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); 229 + GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request)); 235 230 236 231 /* Finally reset hw state */ 237 232 intel_engine_init_global_seqno(engine, seqno); 238 - tl->seqno = seqno; 239 - 240 - list_for_each_entry(timeline, &i915->gt.timelines, link) 241 - memset(timeline->engine[id].global_sync, 0, 242 - sizeof(timeline->engine[id].global_sync)); 233 + engine->timeline.seqno = seqno; 243 234 } 235 + 236 + list_for_each_entry(timeline, &i915->gt.timelines, link) 237 + memset(timeline->global_sync, 0, sizeof(timeline->global_sync)); 238 + 239 + i915->gt.request_serial = seqno; 244 240 245 241 return 0; 246 242 } ··· 257 255 return reset_all_global_seqno(i915, seqno - 1); 258 256 } 259 257 260 - static int reserve_engine(struct intel_engine_cs *engine) 258 + static int reserve_gt(struct drm_i915_private *i915) 261 259 { 262 - struct drm_i915_private *i915 = engine->i915; 263 - u32 active = ++engine->timeline->inflight_seqnos; 264 - u32 seqno = engine->timeline->seqno; 265 260 int ret; 266 261 267 - /* Reservation is fine until we need to wrap around */ 268 - if (unlikely(add_overflows(seqno, active))) { 262 + /* 263 + * Reservation is fine until we may need to wrap around 264 + * 265 + * By incrementing the serial for every request, we know that no 266 + * individual engine may exceed that serial (as each is reset to 0 267 + * on any wrap). This protects even the most pessimistic of migrations 268 + * of every request from all engines onto just one. 269 + */ 270 + while (unlikely(++i915->gt.request_serial == 0)) { 269 271 ret = reset_all_global_seqno(i915, 0); 270 272 if (ret) { 271 - engine->timeline->inflight_seqnos--; 273 + i915->gt.request_serial--; 272 274 return ret; 273 275 } 274 276 } ··· 283 277 return 0; 284 278 } 285 279 286 - static void unreserve_engine(struct intel_engine_cs *engine) 280 + static void unreserve_gt(struct drm_i915_private *i915) 287 281 { 288 - struct drm_i915_private *i915 = engine->i915; 289 - 282 + GEM_BUG_ON(!i915->gt.active_requests); 290 283 if (!--i915->gt.active_requests) 291 284 i915_gem_park(i915); 292 - 293 - GEM_BUG_ON(!engine->timeline->inflight_seqnos); 294 - engine->timeline->inflight_seqnos--; 295 285 } 296 286 297 287 void i915_gem_retire_noop(struct i915_gem_active *active, ··· 298 296 299 297 static void advance_ring(struct i915_request *request) 300 298 { 299 + struct intel_ring *ring = request->ring; 301 300 unsigned int tail; 302 301 303 302 /* ··· 310 307 * Note this requires that we are always called in request 311 308 * completion order. 312 309 */ 313 - if (list_is_last(&request->ring_link, &request->ring->request_list)) { 310 + GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list)); 311 + if (list_is_last(&request->ring_link, &ring->request_list)) { 314 312 /* 315 313 * We may race here with execlists resubmitting this request 316 314 * as we retire it. The resubmission will move the ring->tail ··· 321 317 * noops - they are safe to be replayed on a reset. 322 318 */ 323 319 tail = READ_ONCE(request->tail); 320 + list_del(&ring->active_link); 324 321 } else { 325 322 tail = request->postfix; 326 323 } 327 - list_del(&request->ring_link); 324 + list_del_init(&request->ring_link); 328 325 329 - request->ring->head = tail; 326 + ring->head = tail; 330 327 } 331 328 332 329 static void free_capture_list(struct i915_request *request) ··· 343 338 } 344 339 } 345 340 341 + static void __retire_engine_request(struct intel_engine_cs *engine, 342 + struct i915_request *rq) 343 + { 344 + GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n", 345 + __func__, engine->name, 346 + rq->fence.context, rq->fence.seqno, 347 + rq->global_seqno, 348 + intel_engine_get_seqno(engine)); 349 + 350 + GEM_BUG_ON(!i915_request_completed(rq)); 351 + 352 + local_irq_disable(); 353 + 354 + spin_lock(&engine->timeline.lock); 355 + GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests)); 356 + list_del_init(&rq->link); 357 + spin_unlock(&engine->timeline.lock); 358 + 359 + spin_lock(&rq->lock); 360 + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) 361 + dma_fence_signal_locked(&rq->fence); 362 + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) 363 + intel_engine_cancel_signaling(rq); 364 + if (rq->waitboost) { 365 + GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); 366 + atomic_dec(&rq->i915->gt_pm.rps.num_waiters); 367 + } 368 + spin_unlock(&rq->lock); 369 + 370 + local_irq_enable(); 371 + 372 + /* 373 + * The backing object for the context is done after switching to the 374 + * *next* context. Therefore we cannot retire the previous context until 375 + * the next context has already started running. However, since we 376 + * cannot take the required locks at i915_request_submit() we 377 + * defer the unpinning of the active context to now, retirement of 378 + * the subsequent request. 379 + */ 380 + if (engine->last_retired_context) 381 + intel_context_unpin(engine->last_retired_context, engine); 382 + engine->last_retired_context = rq->ctx; 383 + } 384 + 385 + static void __retire_engine_upto(struct intel_engine_cs *engine, 386 + struct i915_request *rq) 387 + { 388 + struct i915_request *tmp; 389 + 390 + if (list_empty(&rq->link)) 391 + return; 392 + 393 + do { 394 + tmp = list_first_entry(&engine->timeline.requests, 395 + typeof(*tmp), link); 396 + 397 + GEM_BUG_ON(tmp->engine != engine); 398 + __retire_engine_request(engine, tmp); 399 + } while (tmp != rq); 400 + } 401 + 346 402 static void i915_request_retire(struct i915_request *request) 347 403 { 348 - struct intel_engine_cs *engine = request->engine; 349 404 struct i915_gem_active *active, *next; 350 405 351 406 GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", 352 - engine->name, 407 + request->engine->name, 353 408 request->fence.context, request->fence.seqno, 354 409 request->global_seqno, 355 - intel_engine_get_seqno(engine)); 410 + intel_engine_get_seqno(request->engine)); 356 411 357 412 lockdep_assert_held(&request->i915->drm.struct_mutex); 358 413 GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); 359 414 GEM_BUG_ON(!i915_request_completed(request)); 360 - GEM_BUG_ON(!request->i915->gt.active_requests); 361 415 362 416 trace_i915_request_retire(request); 363 417 364 - spin_lock_irq(&engine->timeline->lock); 365 - list_del_init(&request->link); 366 - spin_unlock_irq(&engine->timeline->lock); 367 - 368 - unreserve_engine(request->engine); 369 418 advance_ring(request); 370 - 371 419 free_capture_list(request); 372 420 373 421 /* ··· 456 398 457 399 /* Retirement decays the ban score as it is a sign of ctx progress */ 458 400 atomic_dec_if_positive(&request->ctx->ban_score); 401 + intel_context_unpin(request->ctx, request->engine); 459 402 460 - /* 461 - * The backing object for the context is done after switching to the 462 - * *next* context. Therefore we cannot retire the previous context until 463 - * the next context has already started running. However, since we 464 - * cannot take the required locks at i915_request_submit() we 465 - * defer the unpinning of the active context to now, retirement of 466 - * the subsequent request. 467 - */ 468 - if (engine->last_retired_context) 469 - engine->context_unpin(engine, engine->last_retired_context); 470 - engine->last_retired_context = request->ctx; 403 + __retire_engine_upto(request->engine, request); 471 404 472 - spin_lock_irq(&request->lock); 473 - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)) 474 - dma_fence_signal_locked(&request->fence); 475 - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) 476 - intel_engine_cancel_signaling(request); 477 - if (request->waitboost) { 478 - GEM_BUG_ON(!atomic_read(&request->i915->gt_pm.rps.num_waiters)); 479 - atomic_dec(&request->i915->gt_pm.rps.num_waiters); 480 - } 481 - spin_unlock_irq(&request->lock); 405 + unreserve_gt(request->i915); 482 406 483 - i915_priotree_fini(request->i915, &request->priotree); 407 + i915_sched_node_fini(request->i915, &request->sched); 484 408 i915_request_put(request); 485 409 } 486 410 487 411 void i915_request_retire_upto(struct i915_request *rq) 488 412 { 489 - struct intel_engine_cs *engine = rq->engine; 413 + struct intel_ring *ring = rq->ring; 490 414 struct i915_request *tmp; 415 + 416 + GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", 417 + rq->engine->name, 418 + rq->fence.context, rq->fence.seqno, 419 + rq->global_seqno, 420 + intel_engine_get_seqno(rq->engine)); 491 421 492 422 lockdep_assert_held(&rq->i915->drm.struct_mutex); 493 423 GEM_BUG_ON(!i915_request_completed(rq)); 494 424 495 - if (list_empty(&rq->link)) 425 + if (list_empty(&rq->ring_link)) 496 426 return; 497 427 498 428 do { 499 - tmp = list_first_entry(&engine->timeline->requests, 500 - typeof(*tmp), link); 429 + tmp = list_first_entry(&ring->request_list, 430 + typeof(*tmp), ring_link); 501 431 502 432 i915_request_retire(tmp); 503 433 } while (tmp != rq); 504 434 } 505 435 506 - static u32 timeline_get_seqno(struct intel_timeline *tl) 436 + static u32 timeline_get_seqno(struct i915_timeline *tl) 507 437 { 508 438 return ++tl->seqno; 509 439 } 510 440 511 441 static void move_to_timeline(struct i915_request *request, 512 - struct intel_timeline *timeline) 442 + struct i915_timeline *timeline) 513 443 { 514 - GEM_BUG_ON(request->timeline == request->engine->timeline); 515 - lockdep_assert_held(&request->engine->timeline->lock); 444 + GEM_BUG_ON(request->timeline == &request->engine->timeline); 445 + lockdep_assert_held(&request->engine->timeline.lock); 516 446 517 - spin_lock(&request->timeline->lock); 447 + spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING); 518 448 list_move_tail(&request->link, &timeline->requests); 519 449 spin_unlock(&request->timeline->lock); 520 450 } ··· 515 469 GEM_TRACE("%s fence %llx:%d -> global=%d, current %d\n", 516 470 engine->name, 517 471 request->fence.context, request->fence.seqno, 518 - engine->timeline->seqno + 1, 472 + engine->timeline.seqno + 1, 519 473 intel_engine_get_seqno(engine)); 520 474 521 475 GEM_BUG_ON(!irqs_disabled()); 522 - lockdep_assert_held(&engine->timeline->lock); 476 + lockdep_assert_held(&engine->timeline.lock); 523 477 524 478 GEM_BUG_ON(request->global_seqno); 525 479 526 - seqno = timeline_get_seqno(engine->timeline); 480 + seqno = timeline_get_seqno(&engine->timeline); 527 481 GEM_BUG_ON(!seqno); 528 482 GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); 529 483 ··· 538 492 request->ring->vaddr + request->postfix); 539 493 540 494 /* Transfer from per-context onto the global per-engine timeline */ 541 - move_to_timeline(request, engine->timeline); 495 + move_to_timeline(request, &engine->timeline); 542 496 543 497 trace_i915_request_execute(request); 544 498 ··· 551 505 unsigned long flags; 552 506 553 507 /* Will be called from irq-context when using foreign fences. */ 554 - spin_lock_irqsave(&engine->timeline->lock, flags); 508 + spin_lock_irqsave(&engine->timeline.lock, flags); 555 509 556 510 __i915_request_submit(request); 557 511 558 - spin_unlock_irqrestore(&engine->timeline->lock, flags); 512 + spin_unlock_irqrestore(&engine->timeline.lock, flags); 559 513 } 560 514 561 515 void __i915_request_unsubmit(struct i915_request *request) ··· 569 523 intel_engine_get_seqno(engine)); 570 524 571 525 GEM_BUG_ON(!irqs_disabled()); 572 - lockdep_assert_held(&engine->timeline->lock); 526 + lockdep_assert_held(&engine->timeline.lock); 573 527 574 528 /* 575 529 * Only unwind in reverse order, required so that the per-context list 576 530 * is kept in seqno/ring order. 577 531 */ 578 532 GEM_BUG_ON(!request->global_seqno); 579 - GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); 533 + GEM_BUG_ON(request->global_seqno != engine->timeline.seqno); 580 534 GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), 581 535 request->global_seqno)); 582 - engine->timeline->seqno--; 536 + engine->timeline.seqno--; 583 537 584 538 /* We may be recursing from the signal callback of another i915 fence */ 585 539 spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); ··· 606 560 unsigned long flags; 607 561 608 562 /* Will be called from irq-context when using foreign fences. */ 609 - spin_lock_irqsave(&engine->timeline->lock, flags); 563 + spin_lock_irqsave(&engine->timeline.lock, flags); 610 564 611 565 __i915_request_unsubmit(request); 612 566 613 - spin_unlock_irqrestore(&engine->timeline->lock, flags); 567 + spin_unlock_irqrestore(&engine->timeline.lock, flags); 614 568 } 615 569 616 570 static int __i915_sw_fence_call ··· 681 635 * GGTT space, so do this first before we reserve a seqno for 682 636 * ourselves. 683 637 */ 684 - ring = engine->context_pin(engine, ctx); 638 + ring = intel_context_pin(ctx, engine); 685 639 if (IS_ERR(ring)) 686 640 return ERR_CAST(ring); 687 641 GEM_BUG_ON(!ring); 688 642 689 - ret = reserve_engine(engine); 643 + ret = reserve_gt(i915); 690 644 if (ret) 691 645 goto err_unpin; 692 646 ··· 694 648 if (ret) 695 649 goto err_unreserve; 696 650 697 - /* Move the oldest request to the slab-cache (if not in use!) */ 698 - rq = list_first_entry_or_null(&engine->timeline->requests, 699 - typeof(*rq), link); 700 - if (rq && i915_request_completed(rq)) 651 + /* Move our oldest request to the slab-cache (if not in use!) */ 652 + rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link); 653 + if (!list_is_last(&rq->ring_link, &ring->request_list) && 654 + i915_request_completed(rq)) 701 655 i915_request_retire(rq); 702 656 703 657 /* ··· 757 711 } 758 712 } 759 713 760 - rq->timeline = i915_gem_context_lookup_timeline(ctx, engine); 761 - GEM_BUG_ON(rq->timeline == engine->timeline); 714 + INIT_LIST_HEAD(&rq->active_list); 715 + rq->i915 = i915; 716 + rq->engine = engine; 717 + rq->ctx = ctx; 718 + rq->ring = ring; 719 + rq->timeline = ring->timeline; 720 + GEM_BUG_ON(rq->timeline == &engine->timeline); 762 721 763 722 spin_lock_init(&rq->lock); 764 723 dma_fence_init(&rq->fence, ··· 776 725 i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); 777 726 init_waitqueue_head(&rq->execute); 778 727 779 - i915_priotree_init(&rq->priotree); 780 - 781 - INIT_LIST_HEAD(&rq->active_list); 782 - rq->i915 = i915; 783 - rq->engine = engine; 784 - rq->ctx = ctx; 785 - rq->ring = ring; 728 + i915_sched_node_init(&rq->sched); 786 729 787 730 /* No zalloc, must clear what we need by hand */ 788 731 rq->global_seqno = 0; ··· 813 768 if (ret) 814 769 goto err_unwind; 815 770 771 + /* Keep a second pin for the dual retirement along engine and ring */ 772 + __intel_context_pin(rq->ctx, engine); 773 + 816 774 /* Check that we didn't interrupt ourselves with a new request */ 817 775 GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); 818 776 return rq; ··· 825 777 826 778 /* Make sure we didn't add ourselves to external state before freeing */ 827 779 GEM_BUG_ON(!list_empty(&rq->active_list)); 828 - GEM_BUG_ON(!list_empty(&rq->priotree.signalers_list)); 829 - GEM_BUG_ON(!list_empty(&rq->priotree.waiters_list)); 780 + GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); 781 + GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); 830 782 831 783 kmem_cache_free(i915->requests, rq); 832 784 err_unreserve: 833 - unreserve_engine(engine); 785 + unreserve_gt(i915); 834 786 err_unpin: 835 - engine->context_unpin(engine, ctx); 787 + intel_context_unpin(ctx, engine); 836 788 return ERR_PTR(ret); 837 789 } 838 790 ··· 848 800 return 0; 849 801 850 802 if (to->engine->schedule) { 851 - ret = i915_priotree_add_dependency(to->i915, 852 - &to->priotree, 853 - &from->priotree); 803 + ret = i915_sched_node_add_dependency(to->i915, 804 + &to->sched, 805 + &from->sched); 854 806 if (ret < 0) 855 807 return ret; 856 808 } ··· 928 880 929 881 /* Squash repeated waits to the same timelines */ 930 882 if (fence->context != rq->i915->mm.unordered_timeline && 931 - intel_timeline_sync_is_later(rq->timeline, fence)) 883 + i915_timeline_sync_is_later(rq->timeline, fence)) 932 884 continue; 933 885 934 886 if (dma_fence_is_i915(fence)) ··· 942 894 943 895 /* Record the latest fence used against each timeline */ 944 896 if (fence->context != rq->i915->mm.unordered_timeline) 945 - intel_timeline_sync_set(rq->timeline, fence); 897 + i915_timeline_sync_set(rq->timeline, fence); 946 898 } while (--nchild); 947 899 948 900 return 0; ··· 1019 971 { 1020 972 struct intel_engine_cs *engine = request->engine; 1021 973 struct intel_ring *ring = request->ring; 1022 - struct intel_timeline *timeline = request->timeline; 974 + struct i915_timeline *timeline = request->timeline; 1023 975 struct i915_request *prev; 1024 976 u32 *cs; 1025 977 int err; ··· 1081 1033 i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, 1082 1034 &request->submitq); 1083 1035 if (engine->schedule) 1084 - __i915_priotree_add_dependency(&request->priotree, 1085 - &prev->priotree, 1086 - &request->dep, 1087 - 0); 1036 + __i915_sched_node_add_dependency(&request->sched, 1037 + &prev->sched, 1038 + &request->dep, 1039 + 0); 1088 1040 } 1089 1041 1090 1042 spin_lock_irq(&timeline->lock); ··· 1095 1047 i915_gem_active_set(&timeline->last_request, request); 1096 1048 1097 1049 list_add_tail(&request->ring_link, &ring->request_list); 1050 + if (list_is_first(&request->ring_link, &ring->request_list)) 1051 + list_add(&ring->active_link, &request->i915->gt.active_rings); 1098 1052 request->emitted_jiffies = jiffies; 1099 1053 1100 1054 /* ··· 1110 1060 * decide whether to preempt the entire chain so that it is ready to 1111 1061 * run at the earliest possible convenience. 1112 1062 */ 1113 - rcu_read_lock(); 1114 - if (engine->schedule) 1115 - engine->schedule(request, request->ctx->priority); 1116 - rcu_read_unlock(); 1117 - 1118 1063 local_bh_disable(); 1064 + rcu_read_lock(); /* RCU serialisation for set-wedged protection */ 1065 + if (engine->schedule) 1066 + engine->schedule(request, &request->ctx->sched); 1067 + rcu_read_unlock(); 1119 1068 i915_sw_fence_commit(&request->submit); 1120 1069 local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ 1121 1070 ··· 1403 1354 return timeout; 1404 1355 } 1405 1356 1406 - static void engine_retire_requests(struct intel_engine_cs *engine) 1357 + static void ring_retire_requests(struct intel_ring *ring) 1407 1358 { 1408 1359 struct i915_request *request, *next; 1409 - u32 seqno = intel_engine_get_seqno(engine); 1410 - LIST_HEAD(retire); 1411 1360 1412 - spin_lock_irq(&engine->timeline->lock); 1413 1361 list_for_each_entry_safe(request, next, 1414 - &engine->timeline->requests, link) { 1415 - if (!i915_seqno_passed(seqno, request->global_seqno)) 1362 + &ring->request_list, ring_link) { 1363 + if (!i915_request_completed(request)) 1416 1364 break; 1417 1365 1418 - list_move_tail(&request->link, &retire); 1419 - } 1420 - spin_unlock_irq(&engine->timeline->lock); 1421 - 1422 - list_for_each_entry_safe(request, next, &retire, link) 1423 1366 i915_request_retire(request); 1367 + } 1424 1368 } 1425 1369 1426 1370 void i915_retire_requests(struct drm_i915_private *i915) 1427 1371 { 1428 - struct intel_engine_cs *engine; 1429 - enum intel_engine_id id; 1372 + struct intel_ring *ring, *tmp; 1430 1373 1431 1374 lockdep_assert_held(&i915->drm.struct_mutex); 1432 1375 1433 1376 if (!i915->gt.active_requests) 1434 1377 return; 1435 1378 1436 - for_each_engine(engine, i915, id) 1437 - engine_retire_requests(engine); 1379 + list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link) 1380 + ring_retire_requests(ring); 1438 1381 } 1439 1382 1440 1383 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)

+7 -42

drivers/gpu/drm/i915/i915_request.h

··· 28 28 #include <linux/dma-fence.h> 29 29 30 30 #include "i915_gem.h" 31 + #include "i915_scheduler.h" 31 32 #include "i915_sw_fence.h" 33 + #include "i915_scheduler.h" 32 34 33 35 #include <uapi/drm/i915_drm.h> 34 36 35 37 struct drm_file; 36 38 struct drm_i915_gem_object; 37 39 struct i915_request; 40 + struct i915_timeline; 38 41 39 42 struct intel_wait { 40 43 struct rb_node node; ··· 49 46 struct intel_signal_node { 50 47 struct intel_wait wait; 51 48 struct list_head link; 52 - }; 53 - 54 - struct i915_dependency { 55 - struct i915_priotree *signaler; 56 - struct list_head signal_link; 57 - struct list_head wait_link; 58 - struct list_head dfs_link; 59 - unsigned long flags; 60 - #define I915_DEPENDENCY_ALLOC BIT(0) 61 - }; 62 - 63 - /* 64 - * "People assume that time is a strict progression of cause to effect, but 65 - * actually, from a nonlinear, non-subjective viewpoint, it's more like a big 66 - * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 67 - * 68 - * Requests exist in a complex web of interdependencies. Each request 69 - * has to wait for some other request to complete before it is ready to be run 70 - * (e.g. we have to wait until the pixels have been rendering into a texture 71 - * before we can copy from it). We track the readiness of a request in terms 72 - * of fences, but we also need to keep the dependency tree for the lifetime 73 - * of the request (beyond the life of an individual fence). We use the tree 74 - * at various points to reorder the requests whilst keeping the requests 75 - * in order with respect to their various dependencies. 76 - */ 77 - struct i915_priotree { 78 - struct list_head signalers_list; /* those before us, we depend upon */ 79 - struct list_head waiters_list; /* those after us, they depend upon us */ 80 - struct list_head link; 81 - int priority; 82 - }; 83 - 84 - enum { 85 - I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, 86 - I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, 87 - I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, 88 - 89 - I915_PRIORITY_INVALID = INT_MIN 90 49 }; 91 50 92 51 struct i915_capture_list { ··· 96 131 struct i915_gem_context *ctx; 97 132 struct intel_engine_cs *engine; 98 133 struct intel_ring *ring; 99 - struct intel_timeline *timeline; 134 + struct i915_timeline *timeline; 100 135 struct intel_signal_node signaling; 101 136 102 137 /* ··· 119 154 * to retirement), i.e. bidirectional dependency information for the 120 155 * request not tied to individual fences. 121 156 */ 122 - struct i915_priotree priotree; 157 + struct i915_sched_node sched; 123 158 struct i915_dependency dep; 124 159 125 160 /** ··· 308 343 seqno - 1); 309 344 } 310 345 311 - static inline bool i915_priotree_signaled(const struct i915_priotree *pt) 346 + static inline bool i915_sched_node_signaled(const struct i915_sched_node *node) 312 347 { 313 348 const struct i915_request *rq = 314 - container_of(pt, const struct i915_request, priotree); 349 + container_of(node, const struct i915_request, sched); 315 350 316 351 return i915_request_completed(rq); 317 352 }

+72

drivers/gpu/drm/i915/i915_scheduler.h

··· 1 + /* 2 + * SPDX-License-Identifier: MIT 3 + * 4 + * Copyright © 2018 Intel Corporation 5 + */ 6 + 7 + #ifndef _I915_SCHEDULER_H_ 8 + #define _I915_SCHEDULER_H_ 9 + 10 + #include <linux/bitops.h> 11 + 12 + #include <uapi/drm/i915_drm.h> 13 + 14 + enum { 15 + I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, 16 + I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, 17 + I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, 18 + 19 + I915_PRIORITY_INVALID = INT_MIN 20 + }; 21 + 22 + struct i915_sched_attr { 23 + /** 24 + * @priority: execution and service priority 25 + * 26 + * All clients are equal, but some are more equal than others! 27 + * 28 + * Requests from a context with a greater (more positive) value of 29 + * @priority will be executed before those with a lower @priority 30 + * value, forming a simple QoS. 31 + * 32 + * The &drm_i915_private.kernel_context is assigned the lowest priority. 33 + */ 34 + int priority; 35 + }; 36 + 37 + /* 38 + * "People assume that time is a strict progression of cause to effect, but 39 + * actually, from a nonlinear, non-subjective viewpoint, it's more like a big 40 + * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 41 + * 42 + * Requests exist in a complex web of interdependencies. Each request 43 + * has to wait for some other request to complete before it is ready to be run 44 + * (e.g. we have to wait until the pixels have been rendering into a texture 45 + * before we can copy from it). We track the readiness of a request in terms 46 + * of fences, but we also need to keep the dependency tree for the lifetime 47 + * of the request (beyond the life of an individual fence). We use the tree 48 + * at various points to reorder the requests whilst keeping the requests 49 + * in order with respect to their various dependencies. 50 + * 51 + * There is no active component to the "scheduler". As we know the dependency 52 + * DAG of each request, we are able to insert it into a sorted queue when it 53 + * is ready, and are able to reorder its portion of the graph to accommodate 54 + * dynamic priority changes. 55 + */ 56 + struct i915_sched_node { 57 + struct list_head signalers_list; /* those before us, we depend upon */ 58 + struct list_head waiters_list; /* those after us, they depend upon us */ 59 + struct list_head link; 60 + struct i915_sched_attr attr; 61 + }; 62 + 63 + struct i915_dependency { 64 + struct i915_sched_node *signaler; 65 + struct list_head signal_link; 66 + struct list_head wait_link; 67 + struct list_head dfs_link; 68 + unsigned long flags; 69 + #define I915_DEPENDENCY_ALLOC BIT(0) 70 + }; 71 + 72 + #endif /* _I915_SCHEDULER_H_ */

+105

drivers/gpu/drm/i915/i915_timeline.c

··· 1 + /* 2 + * SPDX-License-Identifier: MIT 3 + * 4 + * Copyright © 2016-2018 Intel Corporation 5 + */ 6 + 7 + #include "i915_drv.h" 8 + 9 + #include "i915_timeline.h" 10 + #include "i915_syncmap.h" 11 + 12 + void i915_timeline_init(struct drm_i915_private *i915, 13 + struct i915_timeline *timeline, 14 + const char *name) 15 + { 16 + lockdep_assert_held(&i915->drm.struct_mutex); 17 + 18 + /* 19 + * Ideally we want a set of engines on a single leaf as we expect 20 + * to mostly be tracking synchronisation between engines. It is not 21 + * a huge issue if this is not the case, but we may want to mitigate 22 + * any page crossing penalties if they become an issue. 23 + */ 24 + BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); 25 + 26 + timeline->name = name; 27 + 28 + list_add(&timeline->link, &i915->gt.timelines); 29 + 30 + /* Called during early_init before we know how many engines there are */ 31 + 32 + timeline->fence_context = dma_fence_context_alloc(1); 33 + 34 + spin_lock_init(&timeline->lock); 35 + 36 + init_request_active(&timeline->last_request, NULL); 37 + INIT_LIST_HEAD(&timeline->requests); 38 + 39 + i915_syncmap_init(&timeline->sync); 40 + } 41 + 42 + /** 43 + * i915_timelines_park - called when the driver idles 44 + * @i915: the drm_i915_private device 45 + * 46 + * When the driver is completely idle, we know that all of our sync points 47 + * have been signaled and our tracking is then entirely redundant. Any request 48 + * to wait upon an older sync point will be completed instantly as we know 49 + * the fence is signaled and therefore we will not even look them up in the 50 + * sync point map. 51 + */ 52 + void i915_timelines_park(struct drm_i915_private *i915) 53 + { 54 + struct i915_timeline *timeline; 55 + 56 + lockdep_assert_held(&i915->drm.struct_mutex); 57 + 58 + list_for_each_entry(timeline, &i915->gt.timelines, link) { 59 + /* 60 + * All known fences are completed so we can scrap 61 + * the current sync point tracking and start afresh, 62 + * any attempt to wait upon a previous sync point 63 + * will be skipped as the fence was signaled. 64 + */ 65 + i915_syncmap_free(&timeline->sync); 66 + } 67 + } 68 + 69 + void i915_timeline_fini(struct i915_timeline *timeline) 70 + { 71 + GEM_BUG_ON(!list_empty(&timeline->requests)); 72 + 73 + i915_syncmap_free(&timeline->sync); 74 + 75 + list_del(&timeline->link); 76 + } 77 + 78 + struct i915_timeline * 79 + i915_timeline_create(struct drm_i915_private *i915, const char *name) 80 + { 81 + struct i915_timeline *timeline; 82 + 83 + timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); 84 + if (!timeline) 85 + return ERR_PTR(-ENOMEM); 86 + 87 + i915_timeline_init(i915, timeline, name); 88 + kref_init(&timeline->kref); 89 + 90 + return timeline; 91 + } 92 + 93 + void __i915_timeline_free(struct kref *kref) 94 + { 95 + struct i915_timeline *timeline = 96 + container_of(kref, typeof(*timeline), kref); 97 + 98 + i915_timeline_fini(timeline); 99 + kfree(timeline); 100 + } 101 + 102 + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 103 + #include "selftests/mock_timeline.c" 104 + #include "selftests/i915_timeline.c" 105 + #endif

+55 -68

drivers/gpu/drm/i915/i915_trace.h

··· 679 679 TP_ARGS(rq) 680 680 ); 681 681 682 - DECLARE_EVENT_CLASS(i915_request_hw, 683 - TP_PROTO(struct i915_request *rq, unsigned int port), 684 - TP_ARGS(rq, port), 682 + TRACE_EVENT(i915_request_in, 683 + TP_PROTO(struct i915_request *rq, unsigned int port), 684 + TP_ARGS(rq, port), 685 685 686 - TP_STRUCT__entry( 687 - __field(u32, dev) 688 - __field(u32, hw_id) 689 - __field(u32, ring) 690 - __field(u32, ctx) 691 - __field(u32, seqno) 692 - __field(u32, global_seqno) 693 - __field(u32, port) 694 - ), 686 + TP_STRUCT__entry( 687 + __field(u32, dev) 688 + __field(u32, hw_id) 689 + __field(u32, ring) 690 + __field(u32, ctx) 691 + __field(u32, seqno) 692 + __field(u32, global_seqno) 693 + __field(u32, port) 694 + __field(u32, prio) 695 + ), 695 696 696 - TP_fast_assign( 697 - __entry->dev = rq->i915->drm.primary->index; 698 - __entry->hw_id = rq->ctx->hw_id; 699 - __entry->ring = rq->engine->id; 700 - __entry->ctx = rq->fence.context; 701 - __entry->seqno = rq->fence.seqno; 702 - __entry->global_seqno = rq->global_seqno; 703 - __entry->port = port; 704 - ), 697 + TP_fast_assign( 698 + __entry->dev = rq->i915->drm.primary->index; 699 + __entry->hw_id = rq->ctx->hw_id; 700 + __entry->ring = rq->engine->id; 701 + __entry->ctx = rq->fence.context; 702 + __entry->seqno = rq->fence.seqno; 703 + __entry->global_seqno = rq->global_seqno; 704 + __entry->prio = rq->sched.attr.priority; 705 + __entry->port = port; 706 + ), 705 707 706 - TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", 708 + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, prio=%u, global=%u, port=%u", 709 + __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx, 710 + __entry->seqno, __entry->prio, __entry->global_seqno, 711 + __entry->port) 712 + ); 713 + 714 + TRACE_EVENT(i915_request_out, 715 + TP_PROTO(struct i915_request *rq), 716 + TP_ARGS(rq), 717 + 718 + TP_STRUCT__entry( 719 + __field(u32, dev) 720 + __field(u32, hw_id) 721 + __field(u32, ring) 722 + __field(u32, ctx) 723 + __field(u32, seqno) 724 + __field(u32, global_seqno) 725 + __field(u32, completed) 726 + ), 727 + 728 + TP_fast_assign( 729 + __entry->dev = rq->i915->drm.primary->index; 730 + __entry->hw_id = rq->ctx->hw_id; 731 + __entry->ring = rq->engine->id; 732 + __entry->ctx = rq->fence.context; 733 + __entry->seqno = rq->fence.seqno; 734 + __entry->global_seqno = rq->global_seqno; 735 + __entry->completed = i915_request_completed(rq); 736 + ), 737 + 738 + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, completed?=%u", 707 739 __entry->dev, __entry->hw_id, __entry->ring, 708 740 __entry->ctx, __entry->seqno, 709 - __entry->global_seqno, __entry->port) 741 + __entry->global_seqno, __entry->completed) 710 742 ); 711 743 712 - DEFINE_EVENT(i915_request_hw, i915_request_in, 713 - TP_PROTO(struct i915_request *rq, unsigned int port), 714 - TP_ARGS(rq, port) 715 - ); 716 - 717 - DEFINE_EVENT(i915_request, i915_request_out, 718 - TP_PROTO(struct i915_request *rq), 719 - TP_ARGS(rq) 720 - ); 721 744 #else 722 745 #if !defined(TRACE_HEADER_MULTI_READ) 723 746 static inline void ··· 832 809 DEFINE_EVENT(i915_request, i915_request_wait_end, 833 810 TP_PROTO(struct i915_request *rq), 834 811 TP_ARGS(rq) 835 - ); 836 - 837 - TRACE_EVENT(i915_flip_request, 838 - TP_PROTO(int plane, struct drm_i915_gem_object *obj), 839 - 840 - TP_ARGS(plane, obj), 841 - 842 - TP_STRUCT__entry( 843 - __field(int, plane) 844 - __field(struct drm_i915_gem_object *, obj) 845 - ), 846 - 847 - TP_fast_assign( 848 - __entry->plane = plane; 849 - __entry->obj = obj; 850 - ), 851 - 852 - TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj) 853 - ); 854 - 855 - TRACE_EVENT(i915_flip_complete, 856 - TP_PROTO(int plane, struct drm_i915_gem_object *obj), 857 - 858 - TP_ARGS(plane, obj), 859 - 860 - TP_STRUCT__entry( 861 - __field(int, plane) 862 - __field(struct drm_i915_gem_object *, obj) 863 - ), 864 - 865 - TP_fast_assign( 866 - __entry->plane = plane; 867 - __entry->obj = obj; 868 - ), 869 - 870 - TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj) 871 812 ); 872 813 873 814 TRACE_EVENT_CONDITION(i915_reg_rw,

+6

drivers/gpu/drm/i915/i915_utils.h

··· 120 120 121 121 #include <linux/list.h> 122 122 123 + static inline int list_is_first(const struct list_head *list, 124 + const struct list_head *head) 125 + { 126 + return head->next == list; 127 + } 128 + 123 129 static inline void __list_del_many(struct list_head *head, 124 130 struct list_head *first) 125 131 {

+56 -17

drivers/gpu/drm/i915/i915_vma.c

··· 46 46 47 47 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 48 48 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 49 - if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) 50 - WARN_ON(i915_vma_unbind(vma)); 51 49 52 50 GEM_BUG_ON(!i915_gem_object_is_active(obj)); 53 51 if (--obj->active_count) ··· 230 232 if (!vma) 231 233 vma = vma_create(obj, vm, view); 232 234 233 - GEM_BUG_ON(!IS_ERR(vma) && i915_vma_is_closed(vma)); 234 235 GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); 235 236 GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma); 236 237 return vma; ··· 681 684 return ret; 682 685 } 683 686 684 - static void i915_vma_destroy(struct i915_vma *vma) 687 + void i915_vma_close(struct i915_vma *vma) 688 + { 689 + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 690 + 691 + GEM_BUG_ON(i915_vma_is_closed(vma)); 692 + vma->flags |= I915_VMA_CLOSED; 693 + 694 + /* 695 + * We defer actually closing, unbinding and destroying the VMA until 696 + * the next idle point, or if the object is freed in the meantime. By 697 + * postponing the unbind, we allow for it to be resurrected by the 698 + * client, avoiding the work required to rebind the VMA. This is 699 + * advantageous for DRI, where the client/server pass objects 700 + * between themselves, temporarily opening a local VMA to the 701 + * object, and then closing it again. The same object is then reused 702 + * on the next frame (or two, depending on the depth of the swap queue) 703 + * causing us to rebind the VMA once more. This ends up being a lot 704 + * of wasted work for the steady state. 705 + */ 706 + list_add_tail(&vma->closed_link, &vma->vm->i915->gt.closed_vma); 707 + } 708 + 709 + void i915_vma_reopen(struct i915_vma *vma) 710 + { 711 + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 712 + 713 + if (vma->flags & I915_VMA_CLOSED) { 714 + vma->flags &= ~I915_VMA_CLOSED; 715 + list_del(&vma->closed_link); 716 + } 717 + } 718 + 719 + static void __i915_vma_destroy(struct i915_vma *vma) 685 720 { 686 721 int i; 687 722 688 723 GEM_BUG_ON(vma->node.allocated); 689 - GEM_BUG_ON(i915_vma_is_active(vma)); 690 - GEM_BUG_ON(!i915_vma_is_closed(vma)); 691 724 GEM_BUG_ON(vma->fence); 692 725 693 726 for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) ··· 726 699 727 700 list_del(&vma->obj_link); 728 701 list_del(&vma->vm_link); 702 + rb_erase(&vma->obj_node, &vma->obj->vma_tree); 729 703 730 704 if (!i915_vma_is_ggtt(vma)) 731 705 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); ··· 734 706 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); 735 707 } 736 708 737 - void i915_vma_close(struct i915_vma *vma) 709 + void i915_vma_destroy(struct i915_vma *vma) 738 710 { 739 - GEM_BUG_ON(i915_vma_is_closed(vma)); 740 - vma->flags |= I915_VMA_CLOSED; 711 + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 741 712 742 - rb_erase(&vma->obj_node, &vma->obj->vma_tree); 713 + GEM_BUG_ON(i915_vma_is_active(vma)); 714 + GEM_BUG_ON(i915_vma_is_pinned(vma)); 743 715 744 - if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) 745 - WARN_ON(i915_vma_unbind(vma)); 716 + if (i915_vma_is_closed(vma)) 717 + list_del(&vma->closed_link); 718 + 719 + WARN_ON(i915_vma_unbind(vma)); 720 + __i915_vma_destroy(vma); 721 + } 722 + 723 + void i915_vma_parked(struct drm_i915_private *i915) 724 + { 725 + struct i915_vma *vma, *next; 726 + 727 + list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) { 728 + GEM_BUG_ON(!i915_vma_is_closed(vma)); 729 + i915_vma_destroy(vma); 730 + } 731 + 732 + GEM_BUG_ON(!list_empty(&i915->gt.closed_vma)); 746 733 } 747 734 748 735 static void __i915_vma_iounmap(struct i915_vma *vma) ··· 847 804 return -EBUSY; 848 805 849 806 if (!drm_mm_node_allocated(&vma->node)) 850 - goto destroy; 807 + return 0; 851 808 852 809 GEM_BUG_ON(obj->bind_count == 0); 853 810 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); ··· 883 840 vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); 884 841 885 842 i915_vma_remove(vma); 886 - 887 - destroy: 888 - if (unlikely(i915_vma_is_closed(vma))) 889 - i915_vma_destroy(vma); 890 843 891 844 return 0; 892 845 }

+6

drivers/gpu/drm/i915/i915_vma.h

··· 119 119 /** This vma's place in the eviction list */ 120 120 struct list_head evict_link; 121 121 122 + struct list_head closed_link; 123 + 122 124 /** 123 125 * Used for performing relocations during execbuffer insertion. 124 126 */ ··· 287 285 int __must_check i915_vma_unbind(struct i915_vma *vma); 288 286 void i915_vma_unlink_ctx(struct i915_vma *vma); 289 287 void i915_vma_close(struct i915_vma *vma); 288 + void i915_vma_reopen(struct i915_vma *vma); 289 + void i915_vma_destroy(struct i915_vma *vma); 290 290 291 291 int __i915_vma_do_pin(struct i915_vma *vma, 292 292 u64 size, u64 alignment, u64 flags); ··· 411 407 if (vma->fence) 412 408 __i915_vma_unpin_fence(vma); 413 409 } 410 + 411 + void i915_vma_parked(struct drm_i915_private *i915); 414 412 415 413 #define for_each_until(cond) if (cond) break; else 416 414

+6 -1

drivers/gpu/drm/i915/intel_atomic_plane.c

··· 183 183 } 184 184 185 185 /* FIXME pre-g4x don't work like this */ 186 - if (intel_state->base.visible) 186 + if (state->visible) 187 187 crtc_state->active_planes |= BIT(intel_plane->id); 188 188 else 189 189 crtc_state->active_planes &= ~BIT(intel_plane->id); 190 + 191 + if (state->visible && state->fb->format->format == DRM_FORMAT_NV12) 192 + crtc_state->nv12_planes |= BIT(intel_plane->id); 193 + else 194 + crtc_state->nv12_planes &= ~BIT(intel_plane->id); 190 195 191 196 return intel_plane_atomic_calc_changes(old_crtc_state, 192 197 &crtc_state->base,

+1

drivers/gpu/drm/i915/intel_bios.c

··· 530 530 */ 531 531 if (!driver->drrs_enabled) 532 532 dev_priv->vbt.drrs_type = DRRS_NOT_SUPPORTED; 533 + dev_priv->vbt.psr.enable = driver->psr_enabled; 533 534 } 534 535 535 536 static void

+21 -10

drivers/gpu/drm/i915/intel_breadcrumbs.c

··· 82 82 83 83 static noinline void missed_breadcrumb(struct intel_engine_cs *engine) 84 84 { 85 - if (drm_debug & DRM_UT_DRIVER) { 85 + if (GEM_SHOW_DEBUG()) { 86 86 struct drm_printer p = drm_debug_printer(__func__); 87 87 88 88 intel_engine_dump(engine, &p, ··· 130 130 131 131 static void intel_breadcrumbs_fake_irq(struct timer_list *t) 132 132 { 133 - struct intel_engine_cs *engine = from_timer(engine, t, 134 - breadcrumbs.fake_irq); 133 + struct intel_engine_cs *engine = 134 + from_timer(engine, t, breadcrumbs.fake_irq); 135 135 struct intel_breadcrumbs *b = &engine->breadcrumbs; 136 136 137 - /* The timer persists in case we cannot enable interrupts, 137 + /* 138 + * The timer persists in case we cannot enable interrupts, 138 139 * or if we have previously seen seqno/interrupt incoherency 139 140 * ("missed interrupt" syndrome, better known as a "missed breadcrumb"). 140 141 * Here the worker will wake up every jiffie in order to kick the ··· 148 147 spin_unlock_irq(&b->irq_lock); 149 148 if (!b->irq_armed) 150 149 return; 150 + 151 + /* If the user has disabled the fake-irq, restore the hangchecking */ 152 + if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) { 153 + mod_timer(&b->hangcheck, wait_timeout()); 154 + return; 155 + } 151 156 152 157 mod_timer(&b->fake_irq, jiffies + 1); 153 158 } ··· 838 831 { 839 832 struct intel_breadcrumbs *b = &engine->breadcrumbs; 840 833 834 + del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */ 841 835 del_timer_sync(&b->hangcheck); 842 - del_timer_sync(&b->fake_irq); 843 836 clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); 844 837 } 845 838 ··· 847 840 { 848 841 struct intel_breadcrumbs *b = &engine->breadcrumbs; 849 842 850 - cancel_fake_irq(engine); 851 843 spin_lock_irq(&b->irq_lock); 844 + 845 + /* 846 + * Leave the fake_irq timer enabled (if it is running), but clear the 847 + * bit so that it turns itself off on its next wake up and goes back 848 + * to the long hangcheck interval if still required. 849 + */ 850 + clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); 852 851 853 852 if (b->irq_enabled) 854 853 irq_enable(engine); 855 854 else 856 855 irq_disable(engine); 857 856 858 - /* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the 857 + /* 858 + * We set the IRQ_BREADCRUMB bit when we enable the irq presuming the 859 859 * GPU is active and may have already executed the MI_USER_INTERRUPT 860 860 * before the CPU is ready to receive. However, the engine is currently 861 861 * idle (we haven't started it yet), there is no possibility for a ··· 870 856 * immediate wakeup (until a real interrupt arrives for the waiter). 871 857 */ 872 858 clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); 873 - 874 - if (b->irq_armed) 875 - enable_fake_irq(b); 876 859 877 860 spin_unlock_irq(&b->irq_lock); 878 861 }

+37 -4

drivers/gpu/drm/i915/intel_cdclk.c

··· 2302 2302 return 0; 2303 2303 } 2304 2304 2305 + static int skl_dpll0_vco(struct intel_atomic_state *intel_state) 2306 + { 2307 + struct drm_i915_private *dev_priv = to_i915(intel_state->base.dev); 2308 + struct intel_crtc *crtc; 2309 + struct intel_crtc_state *crtc_state; 2310 + int vco, i; 2311 + 2312 + vco = intel_state->cdclk.logical.vco; 2313 + if (!vco) 2314 + vco = dev_priv->skl_preferred_vco_freq; 2315 + 2316 + for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) { 2317 + if (!crtc_state->base.enable) 2318 + continue; 2319 + 2320 + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) 2321 + continue; 2322 + 2323 + /* 2324 + * DPLL0 VCO may need to be adjusted to get the correct 2325 + * clock for eDP. This will affect cdclk as well. 2326 + */ 2327 + switch (crtc_state->port_clock / 2) { 2328 + case 108000: 2329 + case 216000: 2330 + vco = 8640000; 2331 + break; 2332 + default: 2333 + vco = 8100000; 2334 + break; 2335 + } 2336 + } 2337 + 2338 + return vco; 2339 + } 2340 + 2305 2341 static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) 2306 2342 { 2307 - struct drm_i915_private *dev_priv = to_i915(state->dev); 2308 2343 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 2309 2344 int min_cdclk, cdclk, vco; 2310 2345 ··· 2347 2312 if (min_cdclk < 0) 2348 2313 return min_cdclk; 2349 2314 2350 - vco = intel_state->cdclk.logical.vco; 2351 - if (!vco) 2352 - vco = dev_priv->skl_preferred_vco_freq; 2315 + vco = skl_dpll0_vco(intel_state); 2353 2316 2354 2317 /* 2355 2318 * FIXME should also account for plane ratio

+7 -2

drivers/gpu/drm/i915/intel_csr.c

··· 298 298 299 299 csr->version = css_header->version; 300 300 301 - if (IS_CANNONLAKE(dev_priv)) { 301 + if (csr->fw_path == i915_modparams.dmc_firmware_path) { 302 + /* Bypass version check for firmware override. */ 303 + required_version = csr->version; 304 + } else if (IS_CANNONLAKE(dev_priv)) { 302 305 required_version = CNL_CSR_VERSION_REQUIRED; 303 306 } else if (IS_GEMINILAKE(dev_priv)) { 304 307 required_version = GLK_CSR_VERSION_REQUIRED; ··· 456 453 if (!HAS_CSR(dev_priv)) 457 454 return; 458 455 459 - if (IS_CANNONLAKE(dev_priv)) 456 + if (i915_modparams.dmc_firmware_path) 457 + csr->fw_path = i915_modparams.dmc_firmware_path; 458 + else if (IS_CANNONLAKE(dev_priv)) 460 459 csr->fw_path = I915_CSR_CNL; 461 460 else if (IS_GEMINILAKE(dev_priv)) 462 461 csr->fw_path = I915_CSR_GLK;

+289 -8

drivers/gpu/drm/i915/intel_ddi.c

··· 870 870 } 871 871 } 872 872 873 + static const struct icl_combo_phy_ddi_buf_trans * 874 + icl_get_combo_buf_trans(struct drm_i915_private *dev_priv, enum port port, 875 + int type, int *n_entries) 876 + { 877 + u32 voltage = I915_READ(ICL_PORT_COMP_DW3(port)) & VOLTAGE_INFO_MASK; 878 + 879 + if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) { 880 + switch (voltage) { 881 + case VOLTAGE_INFO_0_85V: 882 + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_85V); 883 + return icl_combo_phy_ddi_translations_edp_0_85V; 884 + case VOLTAGE_INFO_0_95V: 885 + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_95V); 886 + return icl_combo_phy_ddi_translations_edp_0_95V; 887 + case VOLTAGE_INFO_1_05V: 888 + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_1_05V); 889 + return icl_combo_phy_ddi_translations_edp_1_05V; 890 + default: 891 + MISSING_CASE(voltage); 892 + return NULL; 893 + } 894 + } else { 895 + switch (voltage) { 896 + case VOLTAGE_INFO_0_85V: 897 + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_85V); 898 + return icl_combo_phy_ddi_translations_dp_hdmi_0_85V; 899 + case VOLTAGE_INFO_0_95V: 900 + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_95V); 901 + return icl_combo_phy_ddi_translations_dp_hdmi_0_95V; 902 + case VOLTAGE_INFO_1_05V: 903 + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_1_05V); 904 + return icl_combo_phy_ddi_translations_dp_hdmi_1_05V; 905 + default: 906 + MISSING_CASE(voltage); 907 + return NULL; 908 + } 909 + } 910 + } 911 + 873 912 static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) 874 913 { 875 914 int n_entries, level, default_entry; ··· 1049 1010 default: 1050 1011 MISSING_CASE(pll->info->id); 1051 1012 return PORT_CLK_SEL_NONE; 1013 + } 1014 + } 1015 + 1016 + static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, 1017 + const struct intel_shared_dpll *pll) 1018 + { 1019 + const enum intel_dpll_id id = pll->info->id; 1020 + 1021 + switch (id) { 1022 + default: 1023 + MISSING_CASE(id); 1024 + case DPLL_ID_ICL_DPLL0: 1025 + case DPLL_ID_ICL_DPLL1: 1026 + return DDI_CLK_SEL_NONE; 1027 + case DPLL_ID_ICL_MGPLL1: 1028 + case DPLL_ID_ICL_MGPLL2: 1029 + case DPLL_ID_ICL_MGPLL3: 1030 + case DPLL_ID_ICL_MGPLL4: 1031 + return DDI_CLK_SEL_MG; 1052 1032 } 1053 1033 } 1054 1034 ··· 2083 2025 enum port port = encoder->port; 2084 2026 int n_entries; 2085 2027 2086 - if (IS_CANNONLAKE(dev_priv)) { 2028 + if (IS_ICELAKE(dev_priv)) { 2029 + if (port == PORT_A || port == PORT_B) 2030 + icl_get_combo_buf_trans(dev_priv, port, encoder->type, 2031 + &n_entries); 2032 + else 2033 + n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations); 2034 + } else if (IS_CANNONLAKE(dev_priv)) { 2087 2035 if (encoder->type == INTEL_OUTPUT_EDP) 2088 2036 cnl_get_buf_trans_edp(dev_priv, &n_entries); 2089 2037 else ··· 2246 2182 I915_WRITE(CNL_PORT_TX_DW5_GRP(port), val); 2247 2183 } 2248 2184 2185 + static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv, 2186 + u32 level, enum port port, int type) 2187 + { 2188 + const struct icl_combo_phy_ddi_buf_trans *ddi_translations = NULL; 2189 + u32 n_entries, val; 2190 + int ln; 2191 + 2192 + ddi_translations = icl_get_combo_buf_trans(dev_priv, port, type, 2193 + &n_entries); 2194 + if (!ddi_translations) 2195 + return; 2196 + 2197 + if (level >= n_entries) { 2198 + DRM_DEBUG_KMS("DDI translation not found for level %d. Using %d instead.", level, n_entries - 1); 2199 + level = n_entries - 1; 2200 + } 2201 + 2202 + /* Set PORT_TX_DW5 Rterm Sel to 110b. */ 2203 + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); 2204 + val &= ~RTERM_SELECT_MASK; 2205 + val |= RTERM_SELECT(0x6); 2206 + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); 2207 + 2208 + /* Program PORT_TX_DW5 */ 2209 + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); 2210 + /* Set DisableTap2 and DisableTap3 if MIPI DSI 2211 + * Clear DisableTap2 and DisableTap3 for all other Ports 2212 + */ 2213 + if (type == INTEL_OUTPUT_DSI) { 2214 + val |= TAP2_DISABLE; 2215 + val |= TAP3_DISABLE; 2216 + } else { 2217 + val &= ~TAP2_DISABLE; 2218 + val &= ~TAP3_DISABLE; 2219 + } 2220 + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); 2221 + 2222 + /* Program PORT_TX_DW2 */ 2223 + val = I915_READ(ICL_PORT_TX_DW2_LN0(port)); 2224 + val &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | 2225 + RCOMP_SCALAR_MASK); 2226 + val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_select); 2227 + val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_select); 2228 + /* Program Rcomp scalar for every table entry */ 2229 + val |= RCOMP_SCALAR(ddi_translations[level].dw2_swing_scalar); 2230 + I915_WRITE(ICL_PORT_TX_DW2_GRP(port), val); 2231 + 2232 + /* Program PORT_TX_DW4 */ 2233 + /* We cannot write to GRP. It would overwrite individual loadgen. */ 2234 + for (ln = 0; ln <= 3; ln++) { 2235 + val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln)); 2236 + val &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | 2237 + CURSOR_COEFF_MASK); 2238 + val |= ddi_translations[level].dw4_scaling; 2239 + I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val); 2240 + } 2241 + } 2242 + 2243 + static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, 2244 + u32 level, 2245 + enum intel_output_type type) 2246 + { 2247 + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); 2248 + enum port port = encoder->port; 2249 + int width = 0; 2250 + int rate = 0; 2251 + u32 val; 2252 + int ln = 0; 2253 + 2254 + if (type == INTEL_OUTPUT_HDMI) { 2255 + width = 4; 2256 + /* Rate is always < than 6GHz for HDMI */ 2257 + } else { 2258 + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); 2259 + 2260 + width = intel_dp->lane_count; 2261 + rate = intel_dp->link_rate; 2262 + } 2263 + 2264 + /* 2265 + * 1. If port type is eDP or DP, 2266 + * set PORT_PCS_DW1 cmnkeeper_enable to 1b, 2267 + * else clear to 0b. 2268 + */ 2269 + val = I915_READ(ICL_PORT_PCS_DW1_LN0(port)); 2270 + if (type == INTEL_OUTPUT_HDMI) 2271 + val &= ~COMMON_KEEPER_EN; 2272 + else 2273 + val |= COMMON_KEEPER_EN; 2274 + I915_WRITE(ICL_PORT_PCS_DW1_GRP(port), val); 2275 + 2276 + /* 2. Program loadgen select */ 2277 + /* 2278 + * Program PORT_TX_DW4_LN depending on Bit rate and used lanes 2279 + * <= 6 GHz and 4 lanes (LN0=0, LN1=1, LN2=1, LN3=1) 2280 + * <= 6 GHz and 1,2 lanes (LN0=0, LN1=1, LN2=1, LN3=0) 2281 + * > 6 GHz (LN0=0, LN1=0, LN2=0, LN3=0) 2282 + */ 2283 + for (ln = 0; ln <= 3; ln++) { 2284 + val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln)); 2285 + val &= ~LOADGEN_SELECT; 2286 + 2287 + if ((rate <= 600000 && width == 4 && ln >= 1) || 2288 + (rate <= 600000 && width < 4 && (ln == 1 || ln == 2))) { 2289 + val |= LOADGEN_SELECT; 2290 + } 2291 + I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val); 2292 + } 2293 + 2294 + /* 3. Set PORT_CL_DW5 SUS Clock Config to 11b */ 2295 + val = I915_READ(ICL_PORT_CL_DW5(port)); 2296 + val |= SUS_CLOCK_CONFIG; 2297 + I915_WRITE(ICL_PORT_CL_DW5(port), val); 2298 + 2299 + /* 4. Clear training enable to change swing values */ 2300 + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); 2301 + val &= ~TX_TRAINING_EN; 2302 + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); 2303 + 2304 + /* 5. Program swing and de-emphasis */ 2305 + icl_ddi_combo_vswing_program(dev_priv, level, port, type); 2306 + 2307 + /* 6. Set training enable to trigger update */ 2308 + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); 2309 + val |= TX_TRAINING_EN; 2310 + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); 2311 + } 2312 + 2313 + static void icl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level, 2314 + enum intel_output_type type) 2315 + { 2316 + enum port port = encoder->port; 2317 + 2318 + if (port == PORT_A || port == PORT_B) 2319 + icl_combo_phy_ddi_vswing_sequence(encoder, level, type); 2320 + else 2321 + /* Not Implemented Yet */ 2322 + WARN_ON(1); 2323 + } 2324 + 2249 2325 static uint32_t translate_signal_level(int signal_levels) 2250 2326 { 2251 2327 int i; ··· 2417 2213 struct intel_encoder *encoder = &dport->base; 2418 2214 int level = intel_ddi_dp_level(intel_dp); 2419 2215 2420 - if (IS_CANNONLAKE(dev_priv)) 2216 + if (IS_ICELAKE(dev_priv)) 2217 + icl_ddi_vswing_sequence(encoder, level, encoder->type); 2218 + else if (IS_CANNONLAKE(dev_priv)) 2421 2219 cnl_ddi_vswing_sequence(encoder, level, encoder->type); 2422 2220 else 2423 2221 bxt_ddi_vswing_sequence(encoder, level, encoder->type); ··· 2440 2234 return DDI_BUF_TRANS_SELECT(level); 2441 2235 } 2442 2236 2237 + void icl_map_plls_to_ports(struct drm_crtc *crtc, 2238 + struct intel_crtc_state *crtc_state, 2239 + struct drm_atomic_state *old_state) 2240 + { 2241 + struct intel_shared_dpll *pll = crtc_state->shared_dpll; 2242 + struct drm_i915_private *dev_priv = to_i915(crtc->dev); 2243 + struct drm_connector_state *conn_state; 2244 + struct drm_connector *conn; 2245 + int i; 2246 + 2247 + for_each_new_connector_in_state(old_state, conn, conn_state, i) { 2248 + struct intel_encoder *encoder = 2249 + to_intel_encoder(conn_state->best_encoder); 2250 + enum port port = encoder->port; 2251 + uint32_t val; 2252 + 2253 + if (conn_state->crtc != crtc) 2254 + continue; 2255 + 2256 + mutex_lock(&dev_priv->dpll_lock); 2257 + 2258 + val = I915_READ(DPCLKA_CFGCR0_ICL); 2259 + WARN_ON((val & DPCLKA_CFGCR0_DDI_CLK_OFF(port)) == 0); 2260 + 2261 + if (port == PORT_A || port == PORT_B) { 2262 + val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); 2263 + val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port); 2264 + I915_WRITE(DPCLKA_CFGCR0_ICL, val); 2265 + POSTING_READ(DPCLKA_CFGCR0_ICL); 2266 + } 2267 + 2268 + val &= ~DPCLKA_CFGCR0_DDI_CLK_OFF(port); 2269 + I915_WRITE(DPCLKA_CFGCR0_ICL, val); 2270 + 2271 + mutex_unlock(&dev_priv->dpll_lock); 2272 + } 2273 + } 2274 + 2275 + void icl_unmap_plls_to_ports(struct drm_crtc *crtc, 2276 + struct intel_crtc_state *crtc_state, 2277 + struct drm_atomic_state *old_state) 2278 + { 2279 + struct drm_i915_private *dev_priv = to_i915(crtc->dev); 2280 + struct drm_connector_state *old_conn_state; 2281 + struct drm_connector *conn; 2282 + int i; 2283 + 2284 + for_each_old_connector_in_state(old_state, conn, old_conn_state, i) { 2285 + struct intel_encoder *encoder = 2286 + to_intel_encoder(old_conn_state->best_encoder); 2287 + enum port port = encoder->port; 2288 + 2289 + if (old_conn_state->crtc != crtc) 2290 + continue; 2291 + 2292 + mutex_lock(&dev_priv->dpll_lock); 2293 + I915_WRITE(DPCLKA_CFGCR0_ICL, 2294 + I915_READ(DPCLKA_CFGCR0_ICL) | 2295 + DPCLKA_CFGCR0_DDI_CLK_OFF(port)); 2296 + mutex_unlock(&dev_priv->dpll_lock); 2297 + } 2298 + } 2299 + 2443 2300 static void intel_ddi_clk_select(struct intel_encoder *encoder, 2444 2301 const struct intel_shared_dpll *pll) 2445 2302 { ··· 2515 2246 2516 2247 mutex_lock(&dev_priv->dpll_lock); 2517 2248 2518 - if (IS_CANNONLAKE(dev_priv)) { 2249 + if (IS_ICELAKE(dev_priv)) { 2250 + if (port >= PORT_C) 2251 + I915_WRITE(DDI_CLK_SEL(port), 2252 + icl_pll_to_ddi_pll_sel(encoder, pll)); 2253 + } else if (IS_CANNONLAKE(dev_priv)) { 2519 2254 /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ 2520 2255 val = I915_READ(DPCLKA_CFGCR0); 2521 2256 val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); ··· 2557 2284 struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); 2558 2285 enum port port = encoder->port; 2559 2286 2560 - if (IS_CANNONLAKE(dev_priv)) 2287 + if (IS_ICELAKE(dev_priv)) { 2288 + if (port >= PORT_C) 2289 + I915_WRITE(DDI_CLK_SEL(port), DDI_CLK_SEL_NONE); 2290 + } else if (IS_CANNONLAKE(dev_priv)) { 2561 2291 I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) | 2562 2292 DPCLKA_CFGCR0_DDI_CLK_OFF(port)); 2563 - else if (IS_GEN9_BC(dev_priv)) 2293 + } else if (IS_GEN9_BC(dev_priv)) { 2564 2294 I915_WRITE(DPLL_CTRL2, I915_READ(DPLL_CTRL2) | 2565 2295 DPLL_CTRL2_DDI_CLK_OFF(port)); 2566 - else if (INTEL_GEN(dev_priv) < 9) 2296 + } else if (INTEL_GEN(dev_priv) < 9) { 2567 2297 I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE); 2298 + } 2568 2299 } 2569 2300 2570 2301 static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, ··· 2593 2316 2594 2317 intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); 2595 2318 2596 - if (IS_CANNONLAKE(dev_priv)) 2319 + if (IS_ICELAKE(dev_priv)) 2320 + icl_ddi_vswing_sequence(encoder, level, encoder->type); 2321 + else if (IS_CANNONLAKE(dev_priv)) 2597 2322 cnl_ddi_vswing_sequence(encoder, level, encoder->type); 2598 2323 else if (IS_GEN9_LP(dev_priv)) 2599 2324 bxt_ddi_vswing_sequence(encoder, level, encoder->type); ··· 2626 2347 2627 2348 intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); 2628 2349 2629 - if (IS_CANNONLAKE(dev_priv)) 2350 + if (IS_ICELAKE(dev_priv)) 2351 + icl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); 2352 + else if (IS_CANNONLAKE(dev_priv)) 2630 2353 cnl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); 2631 2354 else if (IS_GEN9_LP(dev_priv)) 2632 2355 bxt_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI);

+1 -1

drivers/gpu/drm/i915/intel_device_info.c

··· 848 848 gen9_sseu_info_init(dev_priv); 849 849 else if (INTEL_GEN(dev_priv) == 10) 850 850 gen10_sseu_info_init(dev_priv); 851 - else if (INTEL_INFO(dev_priv)->gen >= 11) 851 + else if (INTEL_GEN(dev_priv) >= 11) 852 852 gen11_sseu_info_init(dev_priv); 853 853 854 854 /* Initialize command stream timestamp frequency */

+215 -42

drivers/gpu/drm/i915/intel_display.c

··· 88 88 DRM_FORMAT_VYUY, 89 89 }; 90 90 91 + static const uint32_t skl_pri_planar_formats[] = { 92 + DRM_FORMAT_C8, 93 + DRM_FORMAT_RGB565, 94 + DRM_FORMAT_XRGB8888, 95 + DRM_FORMAT_XBGR8888, 96 + DRM_FORMAT_ARGB8888, 97 + DRM_FORMAT_ABGR8888, 98 + DRM_FORMAT_XRGB2101010, 99 + DRM_FORMAT_XBGR2101010, 100 + DRM_FORMAT_YUYV, 101 + DRM_FORMAT_YVYU, 102 + DRM_FORMAT_UYVY, 103 + DRM_FORMAT_VYUY, 104 + DRM_FORMAT_NV12, 105 + }; 106 + 91 107 static const uint64_t skl_format_modifiers_noccs[] = { 92 108 I915_FORMAT_MOD_Yf_TILED, 93 109 I915_FORMAT_MOD_Y_TILED, ··· 505 489 }; 506 490 507 491 static void 492 + skl_wa_528(struct drm_i915_private *dev_priv, int pipe, bool enable) 493 + { 494 + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) 495 + return; 496 + 497 + if (enable) 498 + I915_WRITE(CHICKEN_PIPESL_1(pipe), HSW_FBCQ_DIS); 499 + else 500 + I915_WRITE(CHICKEN_PIPESL_1(pipe), 0); 501 + } 502 + 503 + static void 508 504 skl_wa_clkgate(struct drm_i915_private *dev_priv, int pipe, bool enable) 509 505 { 510 - if (IS_SKYLAKE(dev_priv)) 506 + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) 511 507 return; 512 508 513 509 if (enable) ··· 3118 3090 return 0; 3119 3091 } 3120 3092 3093 + static int 3094 + skl_check_nv12_surface(const struct intel_crtc_state *crtc_state, 3095 + struct intel_plane_state *plane_state) 3096 + { 3097 + /* Display WA #1106 */ 3098 + if (plane_state->base.rotation != 3099 + (DRM_MODE_REFLECT_X | DRM_MODE_ROTATE_90) && 3100 + plane_state->base.rotation != DRM_MODE_ROTATE_270) 3101 + return 0; 3102 + 3103 + /* 3104 + * src coordinates are rotated here. 3105 + * We check height but report it as width 3106 + */ 3107 + if (((drm_rect_height(&plane_state->base.src) >> 16) % 4) != 0) { 3108 + DRM_DEBUG_KMS("src width must be multiple " 3109 + "of 4 for rotated NV12\n"); 3110 + return -EINVAL; 3111 + } 3112 + 3113 + return 0; 3114 + } 3115 + 3121 3116 static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) 3122 3117 { 3123 3118 const struct drm_framebuffer *fb = plane_state->base.fb; ··· 3224 3173 * the main surface setup depends on it. 3225 3174 */ 3226 3175 if (fb->format->format == DRM_FORMAT_NV12) { 3176 + ret = skl_check_nv12_surface(crtc_state, plane_state); 3177 + if (ret) 3178 + return ret; 3227 3179 ret = skl_check_nv12_aux_surface(plane_state); 3228 3180 if (ret) 3229 3181 return ret; ··· 3677 3623 u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, 3678 3624 const struct intel_plane_state *plane_state) 3679 3625 { 3626 + struct drm_i915_private *dev_priv = 3627 + to_i915(plane_state->base.plane->dev); 3680 3628 const struct drm_framebuffer *fb = plane_state->base.fb; 3681 3629 u32 plane_color_ctl = 0; 3682 3630 3683 - plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; 3684 - plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; 3631 + if (INTEL_GEN(dev_priv) < 11) { 3632 + plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; 3633 + plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; 3634 + } 3685 3635 plane_color_ctl |= PLANE_COLOR_PLANE_GAMMA_DISABLE; 3686 3636 plane_color_ctl |= glk_plane_color_ctl_alpha(fb->format->format); 3687 3637 ··· 4848 4790 } 4849 4791 4850 4792 if (plane_scaler_check && pixel_format == DRM_FORMAT_NV12 && 4851 - (src_h < SKL_MIN_YUV_420_SRC_H || (src_w % 4) != 0 || 4852 - (src_h % 4) != 0)) { 4793 + (src_h < SKL_MIN_YUV_420_SRC_H || src_w < SKL_MIN_YUV_420_SRC_W)) { 4853 4794 DRM_DEBUG_KMS("NV12: src dimensions not met\n"); 4854 4795 return -EINVAL; 4855 4796 } ··· 5195 5138 return !old_crtc_state->ips_enabled; 5196 5139 } 5197 5140 5141 + static bool needs_nv12_wa(struct drm_i915_private *dev_priv, 5142 + const struct intel_crtc_state *crtc_state) 5143 + { 5144 + if (!crtc_state->nv12_planes) 5145 + return false; 5146 + 5147 + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) 5148 + return false; 5149 + 5150 + if ((INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv)) || 5151 + IS_CANNONLAKE(dev_priv)) 5152 + return true; 5153 + 5154 + return false; 5155 + } 5156 + 5198 5157 static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) 5199 5158 { 5200 5159 struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); ··· 5235 5162 if (old_primary_state) { 5236 5163 struct drm_plane_state *new_primary_state = 5237 5164 drm_atomic_get_new_plane_state(old_state, primary); 5238 - struct drm_framebuffer *fb = new_primary_state->fb; 5239 5165 5240 5166 intel_fbc_post_update(crtc); 5241 5167 ··· 5242 5170 (needs_modeset(&pipe_config->base) || 5243 5171 !old_primary_state->visible)) 5244 5172 intel_post_enable_primary(&crtc->base, pipe_config); 5173 + } 5245 5174 5246 - /* Display WA 827 */ 5247 - if ((INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv)) || 5248 - IS_CANNONLAKE(dev_priv)) { 5249 - if (fb && fb->format->format == DRM_FORMAT_NV12) 5250 - skl_wa_clkgate(dev_priv, crtc->pipe, false); 5251 - } 5252 - 5175 + /* Display WA 827 */ 5176 + if (needs_nv12_wa(dev_priv, old_crtc_state) && 5177 + !needs_nv12_wa(dev_priv, pipe_config)) { 5178 + skl_wa_clkgate(dev_priv, crtc->pipe, false); 5179 + skl_wa_528(dev_priv, crtc->pipe, false); 5253 5180 } 5254 5181 } 5255 5182 ··· 5273 5202 struct intel_plane_state *new_primary_state = 5274 5203 intel_atomic_get_new_plane_state(old_intel_state, 5275 5204 to_intel_plane(primary)); 5276 - struct drm_framebuffer *fb = new_primary_state->base.fb; 5277 - 5278 - /* Display WA 827 */ 5279 - if ((INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv)) || 5280 - IS_CANNONLAKE(dev_priv)) { 5281 - if (fb && fb->format->format == DRM_FORMAT_NV12) 5282 - skl_wa_clkgate(dev_priv, crtc->pipe, true); 5283 - } 5284 5205 5285 5206 intel_fbc_pre_update(crtc, pipe_config, new_primary_state); 5286 5207 /* ··· 5282 5219 if (IS_GEN2(dev_priv) && old_primary_state->visible && 5283 5220 (modeset || !new_primary_state->base.visible)) 5284 5221 intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); 5222 + } 5223 + 5224 + /* Display WA 827 */ 5225 + if (!needs_nv12_wa(dev_priv, old_crtc_state) && 5226 + needs_nv12_wa(dev_priv, pipe_config)) { 5227 + skl_wa_clkgate(dev_priv, crtc->pipe, true); 5228 + skl_wa_528(dev_priv, crtc->pipe, true); 5285 5229 } 5286 5230 5287 5231 /* ··· 5625 5555 if (intel_crtc->config->shared_dpll) 5626 5556 intel_enable_shared_dpll(intel_crtc); 5627 5557 5558 + if (INTEL_GEN(dev_priv) >= 11) 5559 + icl_map_plls_to_ports(crtc, pipe_config, old_state); 5560 + 5628 5561 if (intel_crtc_has_dp_encoder(intel_crtc->config)) 5629 5562 intel_dp_set_m_n(intel_crtc, M1_N1); 5630 5563 ··· 5825 5752 intel_ddi_disable_pipe_clock(intel_crtc->config); 5826 5753 5827 5754 intel_encoders_post_disable(crtc, old_crtc_state, old_state); 5755 + 5756 + if (INTEL_GEN(dev_priv) >= 11) 5757 + icl_unmap_plls_to_ports(crtc, old_crtc_state, old_state); 5828 5758 } 5829 5759 5830 5760 static void i9xx_pfit_enable(struct intel_crtc *crtc) ··· 11219 11143 (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && 11220 11144 !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); 11221 11145 11222 - #define PIPE_CONF_CHECK_X(name) \ 11146 + #define PIPE_CONF_CHECK_X(name) do { \ 11223 11147 if (current_config->name != pipe_config->name) { \ 11224 11148 pipe_config_err(adjust, __stringify(name), \ 11225 11149 "(expected 0x%08x, found 0x%08x)\n", \ 11226 11150 current_config->name, \ 11227 11151 pipe_config->name); \ 11228 11152 ret = false; \ 11229 - } 11153 + } \ 11154 + } while (0) 11230 11155 11231 - #define PIPE_CONF_CHECK_I(name) \ 11156 + #define PIPE_CONF_CHECK_I(name) do { \ 11232 11157 if (current_config->name != pipe_config->name) { \ 11233 11158 pipe_config_err(adjust, __stringify(name), \ 11234 11159 "(expected %i, found %i)\n", \ 11235 11160 current_config->name, \ 11236 11161 pipe_config->name); \ 11237 11162 ret = false; \ 11238 - } 11163 + } \ 11164 + } while (0) 11239 11165 11240 - #define PIPE_CONF_CHECK_BOOL(name) \ 11166 + #define PIPE_CONF_CHECK_BOOL(name) do { \ 11241 11167 if (current_config->name != pipe_config->name) { \ 11242 11168 pipe_config_err(adjust, __stringify(name), \ 11243 11169 "(expected %s, found %s)\n", \ 11244 11170 yesno(current_config->name), \ 11245 11171 yesno(pipe_config->name)); \ 11246 11172 ret = false; \ 11247 - } 11173 + } \ 11174 + } while (0) 11248 11175 11249 11176 /* 11250 11177 * Checks state where we only read out the enabling, but not the entire 11251 11178 * state itself (like full infoframes or ELD for audio). These states 11252 11179 * require a full modeset on bootup to fix up. 11253 11180 */ 11254 - #define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) \ 11181 + #define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) do { \ 11255 11182 if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \ 11256 11183 PIPE_CONF_CHECK_BOOL(name); \ 11257 11184 } else { \ ··· 11263 11184 yesno(current_config->name), \ 11264 11185 yesno(pipe_config->name)); \ 11265 11186 ret = false; \ 11266 - } 11187 + } \ 11188 + } while (0) 11267 11189 11268 - #define PIPE_CONF_CHECK_P(name) \ 11190 + #define PIPE_CONF_CHECK_P(name) do { \ 11269 11191 if (current_config->name != pipe_config->name) { \ 11270 11192 pipe_config_err(adjust, __stringify(name), \ 11271 11193 "(expected %p, found %p)\n", \ 11272 11194 current_config->name, \ 11273 11195 pipe_config->name); \ 11274 11196 ret = false; \ 11275 - } 11197 + } \ 11198 + } while (0) 11276 11199 11277 - #define PIPE_CONF_CHECK_M_N(name) \ 11200 + #define PIPE_CONF_CHECK_M_N(name) do { \ 11278 11201 if (!intel_compare_link_m_n(&current_config->name, \ 11279 11202 &pipe_config->name,\ 11280 11203 adjust)) { \ ··· 11294 11213 pipe_config->name.link_m, \ 11295 11214 pipe_config->name.link_n); \ 11296 11215 ret = false; \ 11297 - } 11216 + } \ 11217 + } while (0) 11298 11218 11299 11219 /* This is required for BDW+ where there is only one set of registers for 11300 11220 * switching between high and low RR. 11301 11221 * This macro can be used whenever a comparison has to be made between one 11302 11222 * hw state and multiple sw state variables. 11303 11223 */ 11304 - #define PIPE_CONF_CHECK_M_N_ALT(name, alt_name) \ 11224 + #define PIPE_CONF_CHECK_M_N_ALT(name, alt_name) do { \ 11305 11225 if (!intel_compare_link_m_n(&current_config->name, \ 11306 11226 &pipe_config->name, adjust) && \ 11307 11227 !intel_compare_link_m_n(&current_config->alt_name, \ ··· 11327 11245 pipe_config->name.link_m, \ 11328 11246 pipe_config->name.link_n); \ 11329 11247 ret = false; \ 11330 - } 11248 + } \ 11249 + } while (0) 11331 11250 11332 - #define PIPE_CONF_CHECK_FLAGS(name, mask) \ 11251 + #define PIPE_CONF_CHECK_FLAGS(name, mask) do { \ 11333 11252 if ((current_config->name ^ pipe_config->name) & (mask)) { \ 11334 11253 pipe_config_err(adjust, __stringify(name), \ 11335 11254 "(%x) (expected %i, found %i)\n", \ ··· 11338 11255 current_config->name & (mask), \ 11339 11256 pipe_config->name & (mask)); \ 11340 11257 ret = false; \ 11341 - } 11258 + } \ 11259 + } while (0) 11342 11260 11343 - #define PIPE_CONF_CHECK_CLOCK_FUZZY(name) \ 11261 + #define PIPE_CONF_CHECK_CLOCK_FUZZY(name) do { \ 11344 11262 if (!intel_fuzzy_clock_check(current_config->name, pipe_config->name)) { \ 11345 11263 pipe_config_err(adjust, __stringify(name), \ 11346 11264 "(expected %i, found %i)\n", \ 11347 11265 current_config->name, \ 11348 11266 pipe_config->name); \ 11349 11267 ret = false; \ 11350 - } 11268 + } \ 11269 + } while (0) 11351 11270 11352 11271 #define PIPE_CONF_QUIRK(quirk) \ 11353 11272 ((current_config->quirks | pipe_config->quirks) & (quirk)) ··· 11458 11373 PIPE_CONF_CHECK_X(dpll_hw_state.pll9); 11459 11374 PIPE_CONF_CHECK_X(dpll_hw_state.pll10); 11460 11375 PIPE_CONF_CHECK_X(dpll_hw_state.pcsdw12); 11376 + PIPE_CONF_CHECK_X(dpll_hw_state.mg_refclkin_ctl); 11377 + PIPE_CONF_CHECK_X(dpll_hw_state.mg_clktop2_coreclkctl1); 11378 + PIPE_CONF_CHECK_X(dpll_hw_state.mg_clktop2_hsclkctl); 11379 + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_div0); 11380 + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_div1); 11381 + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_lf); 11382 + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_frac_lock); 11383 + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_ssc); 11384 + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_bias); 11385 + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_tdc_coldst_bias); 11461 11386 11462 11387 PIPE_CONF_CHECK_X(dsi_pll.ctrl); 11463 11388 PIPE_CONF_CHECK_X(dsi_pll.div); ··· 11531 11436 skl_ddb_get_hw_state(dev_priv, &hw_ddb); 11532 11437 sw_ddb = &dev_priv->wm.skl_hw.ddb; 11533 11438 11439 + if (INTEL_GEN(dev_priv) >= 11) 11440 + if (hw_ddb.enabled_slices != sw_ddb->enabled_slices) 11441 + DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n", 11442 + sw_ddb->enabled_slices, 11443 + hw_ddb.enabled_slices); 11534 11444 /* planes */ 11535 11445 for_each_universal_plane(dev_priv, pipe, plane) { 11536 11446 hw_plane_wm = &hw_wm.planes[plane]; ··· 12342 12242 bool progress; 12343 12243 enum pipe pipe; 12344 12244 int i; 12245 + u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; 12246 + u8 required_slices = intel_state->wm_results.ddb.enabled_slices; 12345 12247 12346 12248 const struct skl_ddb_entry *entries[I915_MAX_PIPES] = {}; 12347 12249 ··· 12351 12249 /* ignore allocations for crtc's that have been turned off. */ 12352 12250 if (new_crtc_state->active) 12353 12251 entries[i] = &to_intel_crtc_state(old_crtc_state)->wm.skl.ddb; 12252 + 12253 + /* If 2nd DBuf slice required, enable it here */ 12254 + if (INTEL_GEN(dev_priv) >= 11 && required_slices > hw_enabled_slices) 12255 + icl_dbuf_slices_update(dev_priv, required_slices); 12354 12256 12355 12257 /* 12356 12258 * Whenever the number of active pipes changes, we need to make sure we ··· 12406 12300 progress = true; 12407 12301 } 12408 12302 } while (progress); 12303 + 12304 + /* If 2nd DBuf slice is no more required disable it */ 12305 + if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices) 12306 + icl_dbuf_slices_update(dev_priv, required_slices); 12409 12307 } 12410 12308 12411 12309 static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv) ··· 12873 12763 intel_unpin_fb_vma(vma, old_plane_state->flags); 12874 12764 } 12875 12765 12766 + static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) 12767 + { 12768 + struct i915_sched_attr attr = { 12769 + .priority = I915_PRIORITY_DISPLAY, 12770 + }; 12771 + 12772 + i915_gem_object_wait_priority(obj, 0, &attr); 12773 + } 12774 + 12876 12775 /** 12877 12776 * intel_prepare_plane_fb - Prepare fb for usage on plane 12878 12777 * @plane: drm plane to prepare for ··· 12958 12839 12959 12840 ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); 12960 12841 12961 - i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); 12842 + fb_obj_bump_render_priority(obj); 12962 12843 12963 12844 mutex_unlock(&dev_priv->drm.struct_mutex); 12964 12845 i915_gem_object_unpin_pages(obj); ··· 13234 13115 case DRM_FORMAT_YVYU: 13235 13116 case DRM_FORMAT_UYVY: 13236 13117 case DRM_FORMAT_VYUY: 13118 + case DRM_FORMAT_NV12: 13237 13119 if (modifier == I915_FORMAT_MOD_Yf_TILED) 13238 13120 return true; 13239 13121 /* fall through */ ··· 13442 13322 return pipe == PIPE_A && plane_id == PLANE_PRIMARY; 13443 13323 } 13444 13324 13325 + bool skl_plane_has_planar(struct drm_i915_private *dev_priv, 13326 + enum pipe pipe, enum plane_id plane_id) 13327 + { 13328 + if (plane_id == PLANE_PRIMARY) { 13329 + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) 13330 + return false; 13331 + else if ((INTEL_GEN(dev_priv) == 9 && pipe == PIPE_C) && 13332 + !IS_GEMINILAKE(dev_priv)) 13333 + return false; 13334 + } else if (plane_id >= PLANE_SPRITE0) { 13335 + if (plane_id == PLANE_CURSOR) 13336 + return false; 13337 + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) == 10) { 13338 + if (plane_id != PLANE_SPRITE0) 13339 + return false; 13340 + } else { 13341 + if (plane_id != PLANE_SPRITE0 || pipe == PIPE_C || 13342 + IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) 13343 + return false; 13344 + } 13345 + } 13346 + return true; 13347 + } 13348 + 13445 13349 static struct intel_plane * 13446 13350 intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) 13447 13351 { ··· 13526 13382 primary->check_plane = intel_check_primary_plane; 13527 13383 13528 13384 if (INTEL_GEN(dev_priv) >= 9) { 13529 - intel_primary_formats = skl_primary_formats; 13530 - num_formats = ARRAY_SIZE(skl_primary_formats); 13385 + if (skl_plane_has_planar(dev_priv, pipe, PLANE_PRIMARY)) { 13386 + intel_primary_formats = skl_pri_planar_formats; 13387 + num_formats = ARRAY_SIZE(skl_pri_planar_formats); 13388 + } else { 13389 + intel_primary_formats = skl_primary_formats; 13390 + num_formats = ARRAY_SIZE(skl_primary_formats); 13391 + } 13531 13392 13532 13393 if (skl_plane_has_ccs(dev_priv, pipe, PLANE_PRIMARY)) 13533 13394 modifiers = skl_format_modifiers_ccs; ··· 14353 14204 goto err; 14354 14205 } 14355 14206 break; 14207 + case DRM_FORMAT_NV12: 14208 + if (mode_cmd->modifier[0] == I915_FORMAT_MOD_Y_TILED_CCS || 14209 + mode_cmd->modifier[0] == I915_FORMAT_MOD_Yf_TILED_CCS) { 14210 + DRM_DEBUG_KMS("RC not to be enabled with NV12\n"); 14211 + goto err; 14212 + } 14213 + if (INTEL_GEN(dev_priv) < 9 || IS_SKYLAKE(dev_priv) || 14214 + IS_BROXTON(dev_priv)) { 14215 + DRM_DEBUG_KMS("unsupported pixel format: %s\n", 14216 + drm_get_format_name(mode_cmd->pixel_format, 14217 + &format_name)); 14218 + goto err; 14219 + } 14220 + break; 14356 14221 default: 14357 14222 DRM_DEBUG_KMS("unsupported pixel format: %s\n", 14358 14223 drm_get_format_name(mode_cmd->pixel_format, &format_name)); ··· 14378 14215 goto err; 14379 14216 14380 14217 drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); 14218 + 14219 + if (fb->format->format == DRM_FORMAT_NV12 && 14220 + (fb->width < SKL_MIN_YUV_420_SRC_W || 14221 + fb->height < SKL_MIN_YUV_420_SRC_H || 14222 + (fb->width % 4) != 0 || (fb->height % 4) != 0)) { 14223 + DRM_DEBUG_KMS("src dimensions not correct for NV12\n"); 14224 + return -EINVAL; 14225 + } 14381 14226 14382 14227 for (i = 0; i < fb->format->num_planes; i++) { 14383 14228 u32 stride_alignment; ··· 15441 15270 memset(&crtc->base.mode, 0, sizeof(crtc->base.mode)); 15442 15271 if (crtc_state->base.active) { 15443 15272 intel_mode_from_pipe_config(&crtc->base.mode, crtc_state); 15273 + crtc->base.mode.hdisplay = crtc_state->pipe_src_w; 15274 + crtc->base.mode.vdisplay = crtc_state->pipe_src_h; 15444 15275 intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state); 15445 15276 WARN_ON(drm_atomic_set_mode_for_crtc(crtc->base.state, &crtc->base.mode)); 15446 15277

+4

drivers/gpu/drm/i915/intel_display.h

··· 218 218 for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \ 219 219 for_each_if((__mask) & BIT(__p)) 220 220 221 + #define for_each_cpu_transcoder_masked(__dev_priv, __t, __mask) \ 222 + for ((__t) = 0; (__t) < I915_MAX_TRANSCODERS; (__t)++) \ 223 + for_each_if ((__mask) & (1 << (__t))) 224 + 221 225 #define for_each_universal_plane(__dev_priv, __pipe, __p) \ 222 226 for ((__p) = 0; \ 223 227 (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1; \

+164 -133

drivers/gpu/drm/i915/intel_dp.c

··· 1647 1647 } 1648 1648 } 1649 1649 1650 + struct link_config_limits { 1651 + int min_clock, max_clock; 1652 + int min_lane_count, max_lane_count; 1653 + int min_bpp, max_bpp; 1654 + }; 1655 + 1650 1656 static int intel_dp_compute_bpp(struct intel_dp *intel_dp, 1651 1657 struct intel_crtc_state *pipe_config) 1652 1658 { 1659 + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); 1660 + struct intel_connector *intel_connector = intel_dp->attached_connector; 1653 1661 int bpp, bpc; 1654 1662 1655 1663 bpp = pipe_config->pipe_bpp; ··· 1666 1658 if (bpc > 0) 1667 1659 bpp = min(bpp, 3*bpc); 1668 1660 1669 - /* For DP Compliance we override the computed bpp for the pipe */ 1670 - if (intel_dp->compliance.test_data.bpc != 0) { 1671 - pipe_config->pipe_bpp = 3*intel_dp->compliance.test_data.bpc; 1672 - pipe_config->dither_force_disable = pipe_config->pipe_bpp == 6*3; 1673 - DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", 1674 - pipe_config->pipe_bpp); 1661 + if (intel_dp_is_edp(intel_dp)) { 1662 + /* Get bpp from vbt only for panels that dont have bpp in edid */ 1663 + if (intel_connector->base.display_info.bpc == 0 && 1664 + dev_priv->vbt.edp.bpp && dev_priv->vbt.edp.bpp < bpp) { 1665 + DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n", 1666 + dev_priv->vbt.edp.bpp); 1667 + bpp = dev_priv->vbt.edp.bpp; 1668 + } 1675 1669 } 1670 + 1676 1671 return bpp; 1677 1672 } 1678 1673 ··· 1696 1685 return bres; 1697 1686 } 1698 1687 1688 + /* Adjust link config limits based on compliance test requests. */ 1689 + static void 1690 + intel_dp_adjust_compliance_config(struct intel_dp *intel_dp, 1691 + struct intel_crtc_state *pipe_config, 1692 + struct link_config_limits *limits) 1693 + { 1694 + /* For DP Compliance we override the computed bpp for the pipe */ 1695 + if (intel_dp->compliance.test_data.bpc != 0) { 1696 + int bpp = 3 * intel_dp->compliance.test_data.bpc; 1697 + 1698 + limits->min_bpp = limits->max_bpp = bpp; 1699 + pipe_config->dither_force_disable = bpp == 6 * 3; 1700 + 1701 + DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", bpp); 1702 + } 1703 + 1704 + /* Use values requested by Compliance Test Request */ 1705 + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { 1706 + int index; 1707 + 1708 + /* Validate the compliance test data since max values 1709 + * might have changed due to link train fallback. 1710 + */ 1711 + if (intel_dp_link_params_valid(intel_dp, intel_dp->compliance.test_link_rate, 1712 + intel_dp->compliance.test_lane_count)) { 1713 + index = intel_dp_rate_index(intel_dp->common_rates, 1714 + intel_dp->num_common_rates, 1715 + intel_dp->compliance.test_link_rate); 1716 + if (index >= 0) 1717 + limits->min_clock = limits->max_clock = index; 1718 + limits->min_lane_count = limits->max_lane_count = 1719 + intel_dp->compliance.test_lane_count; 1720 + } 1721 + } 1722 + } 1723 + 1724 + /* Optimize link config in order: max bpp, min clock, min lanes */ 1725 + static bool 1726 + intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, 1727 + struct intel_crtc_state *pipe_config, 1728 + const struct link_config_limits *limits) 1729 + { 1730 + struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; 1731 + int bpp, clock, lane_count; 1732 + int mode_rate, link_clock, link_avail; 1733 + 1734 + for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) { 1735 + mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, 1736 + bpp); 1737 + 1738 + for (clock = limits->min_clock; clock <= limits->max_clock; clock++) { 1739 + for (lane_count = limits->min_lane_count; 1740 + lane_count <= limits->max_lane_count; 1741 + lane_count <<= 1) { 1742 + link_clock = intel_dp->common_rates[clock]; 1743 + link_avail = intel_dp_max_data_rate(link_clock, 1744 + lane_count); 1745 + 1746 + if (mode_rate <= link_avail) { 1747 + pipe_config->lane_count = lane_count; 1748 + pipe_config->pipe_bpp = bpp; 1749 + pipe_config->port_clock = link_clock; 1750 + 1751 + return true; 1752 + } 1753 + } 1754 + } 1755 + } 1756 + 1757 + return false; 1758 + } 1759 + 1760 + static bool 1761 + intel_dp_compute_link_config(struct intel_encoder *encoder, 1762 + struct intel_crtc_state *pipe_config) 1763 + { 1764 + struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; 1765 + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); 1766 + struct link_config_limits limits; 1767 + int common_len; 1768 + 1769 + common_len = intel_dp_common_len_rate_limit(intel_dp, 1770 + intel_dp->max_link_rate); 1771 + 1772 + /* No common link rates between source and sink */ 1773 + WARN_ON(common_len <= 0); 1774 + 1775 + limits.min_clock = 0; 1776 + limits.max_clock = common_len - 1; 1777 + 1778 + limits.min_lane_count = 1; 1779 + limits.max_lane_count = intel_dp_max_lane_count(intel_dp); 1780 + 1781 + limits.min_bpp = 6 * 3; 1782 + limits.max_bpp = intel_dp_compute_bpp(intel_dp, pipe_config); 1783 + 1784 + if (intel_dp_is_edp(intel_dp)) { 1785 + /* 1786 + * Use the maximum clock and number of lanes the eDP panel 1787 + * advertizes being capable of. The panels are generally 1788 + * designed to support only a single clock and lane 1789 + * configuration, and typically these values correspond to the 1790 + * native resolution of the panel. 1791 + */ 1792 + limits.min_lane_count = limits.max_lane_count; 1793 + limits.min_clock = limits.max_clock; 1794 + } 1795 + 1796 + intel_dp_adjust_compliance_config(intel_dp, pipe_config, &limits); 1797 + 1798 + DRM_DEBUG_KMS("DP link computation with max lane count %i " 1799 + "max rate %d max bpp %d pixel clock %iKHz\n", 1800 + limits.max_lane_count, 1801 + intel_dp->common_rates[limits.max_clock], 1802 + limits.max_bpp, adjusted_mode->crtc_clock); 1803 + 1804 + /* 1805 + * Optimize for slow and wide. This is the place to add alternative 1806 + * optimization policy. 1807 + */ 1808 + if (!intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits)) 1809 + return false; 1810 + 1811 + DRM_DEBUG_KMS("DP lane count %d clock %d bpp %d\n", 1812 + pipe_config->lane_count, pipe_config->port_clock, 1813 + pipe_config->pipe_bpp); 1814 + 1815 + DRM_DEBUG_KMS("DP link rate required %i available %i\n", 1816 + intel_dp_link_required(adjusted_mode->crtc_clock, 1817 + pipe_config->pipe_bpp), 1818 + intel_dp_max_data_rate(pipe_config->port_clock, 1819 + pipe_config->lane_count)); 1820 + 1821 + return true; 1822 + } 1823 + 1699 1824 bool 1700 1825 intel_dp_compute_config(struct intel_encoder *encoder, 1701 1826 struct intel_crtc_state *pipe_config, ··· 1845 1698 struct intel_connector *intel_connector = intel_dp->attached_connector; 1846 1699 struct intel_digital_connector_state *intel_conn_state = 1847 1700 to_intel_digital_connector_state(conn_state); 1848 - int lane_count, clock; 1849 - int min_lane_count = 1; 1850 - int max_lane_count = intel_dp_max_lane_count(intel_dp); 1851 - /* Conveniently, the link BW constants become indices with a shift...*/ 1852 - int min_clock = 0; 1853 - int max_clock; 1854 - int bpp, mode_rate; 1855 - int link_avail, link_clock; 1856 - int common_len; 1857 - uint8_t link_bw, rate_select; 1858 1701 bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc, 1859 1702 DP_DPCD_QUIRK_LIMITED_M_N); 1860 - 1861 - common_len = intel_dp_common_len_rate_limit(intel_dp, 1862 - intel_dp->max_link_rate); 1863 - 1864 - /* No common link rates between source and sink */ 1865 - WARN_ON(common_len <= 0); 1866 - 1867 - max_clock = common_len - 1; 1868 1703 1869 1704 if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv) && port != PORT_A) 1870 1705 pipe_config->has_pch_encoder = true; ··· 1873 1744 1874 1745 if (INTEL_GEN(dev_priv) >= 9) { 1875 1746 int ret; 1747 + 1876 1748 ret = skl_update_scaler_crtc(pipe_config); 1877 1749 if (ret) 1878 1750 return ret; ··· 1894 1764 if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) 1895 1765 return false; 1896 1766 1897 - /* Use values requested by Compliance Test Request */ 1898 - if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { 1899 - int index; 1767 + if (!intel_dp_compute_link_config(encoder, pipe_config)) 1768 + return false; 1900 1769 1901 - /* Validate the compliance test data since max values 1902 - * might have changed due to link train fallback. 1903 - */ 1904 - if (intel_dp_link_params_valid(intel_dp, intel_dp->compliance.test_link_rate, 1905 - intel_dp->compliance.test_lane_count)) { 1906 - index = intel_dp_rate_index(intel_dp->common_rates, 1907 - intel_dp->num_common_rates, 1908 - intel_dp->compliance.test_link_rate); 1909 - if (index >= 0) 1910 - min_clock = max_clock = index; 1911 - min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count; 1912 - } 1913 - } 1914 - DRM_DEBUG_KMS("DP link computation with max lane count %i " 1915 - "max bw %d pixel clock %iKHz\n", 1916 - max_lane_count, intel_dp->common_rates[max_clock], 1917 - adjusted_mode->crtc_clock); 1918 - 1919 - /* Walk through all bpp values. Luckily they're all nicely spaced with 2 1920 - * bpc in between. */ 1921 - bpp = intel_dp_compute_bpp(intel_dp, pipe_config); 1922 - if (intel_dp_is_edp(intel_dp)) { 1923 - 1924 - /* Get bpp from vbt only for panels that dont have bpp in edid */ 1925 - if (intel_connector->base.display_info.bpc == 0 && 1926 - (dev_priv->vbt.edp.bpp && dev_priv->vbt.edp.bpp < bpp)) { 1927 - DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n", 1928 - dev_priv->vbt.edp.bpp); 1929 - bpp = dev_priv->vbt.edp.bpp; 1930 - } 1931 - 1932 - /* 1933 - * Use the maximum clock and number of lanes the eDP panel 1934 - * advertizes being capable of. The panels are generally 1935 - * designed to support only a single clock and lane 1936 - * configuration, and typically these values correspond to the 1937 - * native resolution of the panel. 1938 - */ 1939 - min_lane_count = max_lane_count; 1940 - min_clock = max_clock; 1941 - } 1942 - 1943 - for (; bpp >= 6*3; bpp -= 2*3) { 1944 - mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, 1945 - bpp); 1946 - 1947 - for (clock = min_clock; clock <= max_clock; clock++) { 1948 - for (lane_count = min_lane_count; 1949 - lane_count <= max_lane_count; 1950 - lane_count <<= 1) { 1951 - 1952 - link_clock = intel_dp->common_rates[clock]; 1953 - link_avail = intel_dp_max_data_rate(link_clock, 1954 - lane_count); 1955 - 1956 - if (mode_rate <= link_avail) { 1957 - goto found; 1958 - } 1959 - } 1960 - } 1961 - } 1962 - 1963 - return false; 1964 - 1965 - found: 1966 1770 if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { 1967 1771 /* 1968 1772 * See: ··· 1904 1840 * VESA DisplayPort Ver.1.2a - 5.1.1.1 Video Colorimetry 1905 1841 */ 1906 1842 pipe_config->limited_color_range = 1907 - bpp != 18 && 1843 + pipe_config->pipe_bpp != 18 && 1908 1844 drm_default_rgb_quant_range(adjusted_mode) == 1909 1845 HDMI_QUANTIZATION_RANGE_LIMITED; 1910 1846 } else { ··· 1912 1848 intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED; 1913 1849 } 1914 1850 1915 - pipe_config->lane_count = lane_count; 1916 - 1917 - pipe_config->pipe_bpp = bpp; 1918 - pipe_config->port_clock = intel_dp->common_rates[clock]; 1919 - 1920 - intel_dp_compute_rate(intel_dp, pipe_config->port_clock, 1921 - &link_bw, &rate_select); 1922 - 1923 - DRM_DEBUG_KMS("DP link bw %02x rate select %02x lane count %d clock %d bpp %d\n", 1924 - link_bw, rate_select, pipe_config->lane_count, 1925 - pipe_config->port_clock, bpp); 1926 - DRM_DEBUG_KMS("DP link bw required %i available %i\n", 1927 - mode_rate, link_avail); 1928 - 1929 - intel_link_compute_m_n(bpp, lane_count, 1851 + intel_link_compute_m_n(pipe_config->pipe_bpp, pipe_config->lane_count, 1930 1852 adjusted_mode->crtc_clock, 1931 1853 pipe_config->port_clock, 1932 1854 &pipe_config->dp_m_n, ··· 1921 1871 if (intel_connector->panel.downclock_mode != NULL && 1922 1872 dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) { 1923 1873 pipe_config->has_drrs = true; 1924 - intel_link_compute_m_n(bpp, lane_count, 1925 - intel_connector->panel.downclock_mode->clock, 1926 - pipe_config->port_clock, 1927 - &pipe_config->dp_m2_n2, 1928 - reduce_m_n); 1929 - } 1930 - 1931 - /* 1932 - * DPLL0 VCO may need to be adjusted to get the correct 1933 - * clock for eDP. This will affect cdclk as well. 1934 - */ 1935 - if (intel_dp_is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) { 1936 - int vco; 1937 - 1938 - switch (pipe_config->port_clock / 2) { 1939 - case 108000: 1940 - case 216000: 1941 - vco = 8640000; 1942 - break; 1943 - default: 1944 - vco = 8100000; 1945 - break; 1946 - } 1947 - 1948 - to_intel_atomic_state(pipe_config->base.state)->cdclk.logical.vco = vco; 1874 + intel_link_compute_m_n(pipe_config->pipe_bpp, 1875 + pipe_config->lane_count, 1876 + intel_connector->panel.downclock_mode->clock, 1877 + pipe_config->port_clock, 1878 + &pipe_config->dp_m2_n2, 1879 + reduce_m_n); 1949 1880 } 1950 1881 1951 1882 if (!HAS_DDI(dev_priv))

+5

drivers/gpu/drm/i915/intel_dp_link_training.c

··· 139 139 intel_dp_compute_rate(intel_dp, intel_dp->link_rate, 140 140 &link_bw, &rate_select); 141 141 142 + if (link_bw) 143 + DRM_DEBUG_KMS("Using LINK_BW_SET value %02x\n", link_bw); 144 + else 145 + DRM_DEBUG_KMS("Using LINK_RATE_SET value %02x\n", rate_select); 146 + 142 147 /* Write the link configuration data */ 143 148 link_config[0] = link_bw; 144 149 link_config[1] = intel_dp->lane_count;

+653 -3

drivers/gpu/drm/i915/intel_dpll_mgr.c

··· 2218 2218 struct skl_wrpll_params *wrpll_params) 2219 2219 { 2220 2220 u32 afe_clock = clock * 5; 2221 + uint32_t ref_clock; 2221 2222 u32 dco_min = 7998000; 2222 2223 u32 dco_max = 10000000; 2223 2224 u32 dco_mid = (dco_min + dco_max) / 2; ··· 2251 2250 2252 2251 cnl_wrpll_get_multipliers(best_div, &pdiv, &qdiv, &kdiv); 2253 2252 2254 - cnl_wrpll_params_populate(wrpll_params, best_dco, 2255 - dev_priv->cdclk.hw.ref, pdiv, qdiv, kdiv); 2253 + ref_clock = dev_priv->cdclk.hw.ref; 2254 + 2255 + /* 2256 + * For ICL, the spec states: if reference frequency is 38.4, use 19.2 2257 + * because the DPLL automatically divides that by 2. 2258 + */ 2259 + if (IS_ICELAKE(dev_priv) && ref_clock == 38400) 2260 + ref_clock = 19200; 2261 + 2262 + cnl_wrpll_params_populate(wrpll_params, best_dco, ref_clock, pdiv, qdiv, 2263 + kdiv); 2256 2264 2257 2265 return true; 2258 2266 } ··· 2409 2399 .dump_hw_state = cnl_dump_hw_state, 2410 2400 }; 2411 2401 2402 + /* 2403 + * These values alrea already adjusted: they're the bits we write to the 2404 + * registers, not the logical values. 2405 + */ 2406 + static const struct skl_wrpll_params icl_dp_combo_pll_24MHz_values[] = { 2407 + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [0]: 5.4 */ 2408 + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, 2409 + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [1]: 2.7 */ 2410 + .pdiv = 0x2 /* 3 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, 2411 + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [2]: 1.62 */ 2412 + .pdiv = 0x4 /* 5 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, 2413 + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [3]: 3.24 */ 2414 + .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, 2415 + { .dco_integer = 0x168, .dco_fraction = 0x0000, /* [4]: 2.16 */ 2416 + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 1, .qdiv_ratio = 2}, 2417 + { .dco_integer = 0x168, .dco_fraction = 0x0000, /* [5]: 4.32 */ 2418 + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, 2419 + { .dco_integer = 0x195, .dco_fraction = 0x0000, /* [6]: 6.48 */ 2420 + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, 2421 + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [7]: 8.1 */ 2422 + .pdiv = 0x1 /* 2 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, 2423 + }; 2424 + 2425 + /* Also used for 38.4 MHz values. */ 2426 + static const struct skl_wrpll_params icl_dp_combo_pll_19_2MHz_values[] = { 2427 + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [0]: 5.4 */ 2428 + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, 2429 + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [1]: 2.7 */ 2430 + .pdiv = 0x2 /* 3 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, 2431 + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [2]: 1.62 */ 2432 + .pdiv = 0x4 /* 5 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, 2433 + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [3]: 3.24 */ 2434 + .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, 2435 + { .dco_integer = 0x1C2, .dco_fraction = 0x0000, /* [4]: 2.16 */ 2436 + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 1, .qdiv_ratio = 2}, 2437 + { .dco_integer = 0x1C2, .dco_fraction = 0x0000, /* [5]: 4.32 */ 2438 + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, 2439 + { .dco_integer = 0x1FA, .dco_fraction = 0x2000, /* [6]: 6.48 */ 2440 + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, 2441 + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [7]: 8.1 */ 2442 + .pdiv = 0x1 /* 2 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, 2443 + }; 2444 + 2445 + static bool icl_calc_dp_combo_pll(struct drm_i915_private *dev_priv, int clock, 2446 + struct skl_wrpll_params *pll_params) 2447 + { 2448 + const struct skl_wrpll_params *params; 2449 + 2450 + params = dev_priv->cdclk.hw.ref == 24000 ? 2451 + icl_dp_combo_pll_24MHz_values : 2452 + icl_dp_combo_pll_19_2MHz_values; 2453 + 2454 + switch (clock) { 2455 + case 540000: 2456 + *pll_params = params[0]; 2457 + break; 2458 + case 270000: 2459 + *pll_params = params[1]; 2460 + break; 2461 + case 162000: 2462 + *pll_params = params[2]; 2463 + break; 2464 + case 324000: 2465 + *pll_params = params[3]; 2466 + break; 2467 + case 216000: 2468 + *pll_params = params[4]; 2469 + break; 2470 + case 432000: 2471 + *pll_params = params[5]; 2472 + break; 2473 + case 648000: 2474 + *pll_params = params[6]; 2475 + break; 2476 + case 810000: 2477 + *pll_params = params[7]; 2478 + break; 2479 + default: 2480 + MISSING_CASE(clock); 2481 + return false; 2482 + } 2483 + 2484 + return true; 2485 + } 2486 + 2487 + static bool icl_calc_dpll_state(struct intel_crtc_state *crtc_state, 2488 + struct intel_encoder *encoder, int clock, 2489 + struct intel_dpll_hw_state *pll_state) 2490 + { 2491 + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); 2492 + uint32_t cfgcr0, cfgcr1; 2493 + struct skl_wrpll_params pll_params = { 0 }; 2494 + bool ret; 2495 + 2496 + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) 2497 + ret = cnl_ddi_calculate_wrpll(clock, dev_priv, &pll_params); 2498 + else 2499 + ret = icl_calc_dp_combo_pll(dev_priv, clock, &pll_params); 2500 + 2501 + if (!ret) 2502 + return false; 2503 + 2504 + cfgcr0 = DPLL_CFGCR0_DCO_FRACTION(pll_params.dco_fraction) | 2505 + pll_params.dco_integer; 2506 + 2507 + cfgcr1 = DPLL_CFGCR1_QDIV_RATIO(pll_params.qdiv_ratio) | 2508 + DPLL_CFGCR1_QDIV_MODE(pll_params.qdiv_mode) | 2509 + DPLL_CFGCR1_KDIV(pll_params.kdiv) | 2510 + DPLL_CFGCR1_PDIV(pll_params.pdiv) | 2511 + DPLL_CFGCR1_CENTRAL_FREQ_8400; 2512 + 2513 + pll_state->cfgcr0 = cfgcr0; 2514 + pll_state->cfgcr1 = cfgcr1; 2515 + return true; 2516 + } 2517 + 2518 + static enum port icl_mg_pll_id_to_port(enum intel_dpll_id id) 2519 + { 2520 + return id - DPLL_ID_ICL_MGPLL1 + PORT_C; 2521 + } 2522 + 2523 + static enum intel_dpll_id icl_port_to_mg_pll_id(enum port port) 2524 + { 2525 + return port - PORT_C + DPLL_ID_ICL_MGPLL1; 2526 + } 2527 + 2528 + static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, 2529 + uint32_t *target_dco_khz, 2530 + struct intel_dpll_hw_state *state) 2531 + { 2532 + uint32_t dco_min_freq, dco_max_freq; 2533 + int div1_vals[] = {7, 5, 3, 2}; 2534 + unsigned int i; 2535 + int div2; 2536 + 2537 + dco_min_freq = is_dp ? 8100000 : use_ssc ? 8000000 : 7992000; 2538 + dco_max_freq = is_dp ? 8100000 : 10000000; 2539 + 2540 + for (i = 0; i < ARRAY_SIZE(div1_vals); i++) { 2541 + int div1 = div1_vals[i]; 2542 + 2543 + for (div2 = 10; div2 > 0; div2--) { 2544 + int dco = div1 * div2 * clock_khz * 5; 2545 + int a_divratio, tlinedrv, inputsel, hsdiv; 2546 + 2547 + if (dco < dco_min_freq || dco > dco_max_freq) 2548 + continue; 2549 + 2550 + if (div2 >= 2) { 2551 + a_divratio = is_dp ? 10 : 5; 2552 + tlinedrv = 2; 2553 + } else { 2554 + a_divratio = 5; 2555 + tlinedrv = 0; 2556 + } 2557 + inputsel = is_dp ? 0 : 1; 2558 + 2559 + switch (div1) { 2560 + default: 2561 + MISSING_CASE(div1); 2562 + case 2: 2563 + hsdiv = 0; 2564 + break; 2565 + case 3: 2566 + hsdiv = 1; 2567 + break; 2568 + case 5: 2569 + hsdiv = 2; 2570 + break; 2571 + case 7: 2572 + hsdiv = 3; 2573 + break; 2574 + } 2575 + 2576 + *target_dco_khz = dco; 2577 + 2578 + state->mg_refclkin_ctl = MG_REFCLKIN_CTL_OD_2_MUX(1); 2579 + 2580 + state->mg_clktop2_coreclkctl1 = 2581 + MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(a_divratio); 2582 + 2583 + state->mg_clktop2_hsclkctl = 2584 + MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL(tlinedrv) | 2585 + MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL(inputsel) | 2586 + MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO(hsdiv) | 2587 + MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(div2); 2588 + 2589 + return true; 2590 + } 2591 + } 2592 + 2593 + return false; 2594 + } 2595 + 2596 + /* 2597 + * The specification for this function uses real numbers, so the math had to be 2598 + * adapted to integer-only calculation, that's why it looks so different. 2599 + */ 2600 + static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, 2601 + struct intel_encoder *encoder, int clock, 2602 + struct intel_dpll_hw_state *pll_state) 2603 + { 2604 + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); 2605 + int refclk_khz = dev_priv->cdclk.hw.ref; 2606 + uint32_t dco_khz, m1div, m2div_int, m2div_rem, m2div_frac; 2607 + uint32_t iref_ndiv, iref_trim, iref_pulse_w; 2608 + uint32_t prop_coeff, int_coeff; 2609 + uint32_t tdc_targetcnt, feedfwgain; 2610 + uint64_t ssc_stepsize, ssc_steplen, ssc_steplog; 2611 + uint64_t tmp; 2612 + bool use_ssc = false; 2613 + bool is_dp = !intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI); 2614 + 2615 + if (!icl_mg_pll_find_divisors(clock, is_dp, use_ssc, &dco_khz, 2616 + pll_state)) { 2617 + DRM_DEBUG_KMS("Failed to find divisors for clock %d\n", clock); 2618 + return false; 2619 + } 2620 + 2621 + m1div = 2; 2622 + m2div_int = dco_khz / (refclk_khz * m1div); 2623 + if (m2div_int > 255) { 2624 + m1div = 4; 2625 + m2div_int = dco_khz / (refclk_khz * m1div); 2626 + if (m2div_int > 255) { 2627 + DRM_DEBUG_KMS("Failed to find mdiv for clock %d\n", 2628 + clock); 2629 + return false; 2630 + } 2631 + } 2632 + m2div_rem = dco_khz % (refclk_khz * m1div); 2633 + 2634 + tmp = (uint64_t)m2div_rem * (1 << 22); 2635 + do_div(tmp, refclk_khz * m1div); 2636 + m2div_frac = tmp; 2637 + 2638 + switch (refclk_khz) { 2639 + case 19200: 2640 + iref_ndiv = 1; 2641 + iref_trim = 28; 2642 + iref_pulse_w = 1; 2643 + break; 2644 + case 24000: 2645 + iref_ndiv = 1; 2646 + iref_trim = 25; 2647 + iref_pulse_w = 2; 2648 + break; 2649 + case 38400: 2650 + iref_ndiv = 2; 2651 + iref_trim = 28; 2652 + iref_pulse_w = 1; 2653 + break; 2654 + default: 2655 + MISSING_CASE(refclk_khz); 2656 + return false; 2657 + } 2658 + 2659 + /* 2660 + * tdc_res = 0.000003 2661 + * tdc_targetcnt = int(2 / (tdc_res * 8 * 50 * 1.1) / refclk_mhz + 0.5) 2662 + * 2663 + * The multiplication by 1000 is due to refclk MHz to KHz conversion. It 2664 + * was supposed to be a division, but we rearranged the operations of 2665 + * the formula to avoid early divisions so we don't multiply the 2666 + * rounding errors. 2667 + * 2668 + * 0.000003 * 8 * 50 * 1.1 = 0.00132, also known as 132 / 100000, which 2669 + * we also rearrange to work with integers. 2670 + * 2671 + * The 0.5 transformed to 5 results in a multiplication by 10 and the 2672 + * last division by 10. 2673 + */ 2674 + tdc_targetcnt = (2 * 1000 * 100000 * 10 / (132 * refclk_khz) + 5) / 10; 2675 + 2676 + /* 2677 + * Here we divide dco_khz by 10 in order to allow the dividend to fit in 2678 + * 32 bits. That's not a problem since we round the division down 2679 + * anyway. 2680 + */ 2681 + feedfwgain = (use_ssc || m2div_rem > 0) ? 2682 + m1div * 1000000 * 100 / (dco_khz * 3 / 10) : 0; 2683 + 2684 + if (dco_khz >= 9000000) { 2685 + prop_coeff = 5; 2686 + int_coeff = 10; 2687 + } else { 2688 + prop_coeff = 4; 2689 + int_coeff = 8; 2690 + } 2691 + 2692 + if (use_ssc) { 2693 + tmp = (uint64_t)dco_khz * 47 * 32; 2694 + do_div(tmp, refclk_khz * m1div * 10000); 2695 + ssc_stepsize = tmp; 2696 + 2697 + tmp = (uint64_t)dco_khz * 1000; 2698 + ssc_steplen = DIV_ROUND_UP_ULL(tmp, 32 * 2 * 32); 2699 + } else { 2700 + ssc_stepsize = 0; 2701 + ssc_steplen = 0; 2702 + } 2703 + ssc_steplog = 4; 2704 + 2705 + pll_state->mg_pll_div0 = (m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) | 2706 + MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) | 2707 + MG_PLL_DIV0_FBDIV_INT(m2div_int); 2708 + 2709 + pll_state->mg_pll_div1 = MG_PLL_DIV1_IREF_NDIVRATIO(iref_ndiv) | 2710 + MG_PLL_DIV1_DITHER_DIV_2 | 2711 + MG_PLL_DIV1_NDIVRATIO(1) | 2712 + MG_PLL_DIV1_FBPREDIV(m1div); 2713 + 2714 + pll_state->mg_pll_lf = MG_PLL_LF_TDCTARGETCNT(tdc_targetcnt) | 2715 + MG_PLL_LF_AFCCNTSEL_512 | 2716 + MG_PLL_LF_GAINCTRL(1) | 2717 + MG_PLL_LF_INT_COEFF(int_coeff) | 2718 + MG_PLL_LF_PROP_COEFF(prop_coeff); 2719 + 2720 + pll_state->mg_pll_frac_lock = MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 | 2721 + MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 | 2722 + MG_PLL_FRAC_LOCK_LOCKTHRESH(10) | 2723 + MG_PLL_FRAC_LOCK_DCODITHEREN | 2724 + MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(feedfwgain); 2725 + if (use_ssc || m2div_rem > 0) 2726 + pll_state->mg_pll_frac_lock |= MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN; 2727 + 2728 + pll_state->mg_pll_ssc = (use_ssc ? MG_PLL_SSC_EN : 0) | 2729 + MG_PLL_SSC_TYPE(2) | 2730 + MG_PLL_SSC_STEPLENGTH(ssc_steplen) | 2731 + MG_PLL_SSC_STEPNUM(ssc_steplog) | 2732 + MG_PLL_SSC_FLLEN | 2733 + MG_PLL_SSC_STEPSIZE(ssc_stepsize); 2734 + 2735 + pll_state->mg_pll_tdc_coldst_bias = MG_PLL_TDC_COLDST_COLDSTART; 2736 + 2737 + if (refclk_khz != 38400) { 2738 + pll_state->mg_pll_tdc_coldst_bias |= 2739 + MG_PLL_TDC_COLDST_IREFINT_EN | 2740 + MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) | 2741 + MG_PLL_TDC_COLDST_COLDSTART | 2742 + MG_PLL_TDC_TDCOVCCORR_EN | 2743 + MG_PLL_TDC_TDCSEL(3); 2744 + 2745 + pll_state->mg_pll_bias = MG_PLL_BIAS_BIAS_GB_SEL(3) | 2746 + MG_PLL_BIAS_INIT_DCOAMP(0x3F) | 2747 + MG_PLL_BIAS_BIAS_BONUS(10) | 2748 + MG_PLL_BIAS_BIASCAL_EN | 2749 + MG_PLL_BIAS_CTRIM(12) | 2750 + MG_PLL_BIAS_VREF_RDAC(4) | 2751 + MG_PLL_BIAS_IREFTRIM(iref_trim); 2752 + } 2753 + 2754 + return true; 2755 + } 2756 + 2757 + static struct intel_shared_dpll * 2758 + icl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, 2759 + struct intel_encoder *encoder) 2760 + { 2761 + struct intel_shared_dpll *pll; 2762 + struct intel_dpll_hw_state pll_state = {}; 2763 + enum port port = encoder->port; 2764 + enum intel_dpll_id min, max; 2765 + int clock = crtc_state->port_clock; 2766 + bool ret; 2767 + 2768 + switch (port) { 2769 + case PORT_A: 2770 + case PORT_B: 2771 + min = DPLL_ID_ICL_DPLL0; 2772 + max = DPLL_ID_ICL_DPLL1; 2773 + ret = icl_calc_dpll_state(crtc_state, encoder, clock, 2774 + &pll_state); 2775 + break; 2776 + case PORT_C: 2777 + case PORT_D: 2778 + case PORT_E: 2779 + case PORT_F: 2780 + min = icl_port_to_mg_pll_id(port); 2781 + max = min; 2782 + ret = icl_calc_mg_pll_state(crtc_state, encoder, clock, 2783 + &pll_state); 2784 + break; 2785 + default: 2786 + MISSING_CASE(port); 2787 + return NULL; 2788 + } 2789 + 2790 + if (!ret) { 2791 + DRM_DEBUG_KMS("Could not calculate PLL state.\n"); 2792 + return NULL; 2793 + } 2794 + 2795 + crtc_state->dpll_hw_state = pll_state; 2796 + 2797 + pll = intel_find_shared_dpll(crtc, crtc_state, min, max); 2798 + if (!pll) { 2799 + DRM_DEBUG_KMS("No PLL selected\n"); 2800 + return NULL; 2801 + } 2802 + 2803 + intel_reference_shared_dpll(pll, crtc_state); 2804 + 2805 + return pll; 2806 + } 2807 + 2808 + static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id) 2809 + { 2810 + switch (id) { 2811 + default: 2812 + MISSING_CASE(id); 2813 + case DPLL_ID_ICL_DPLL0: 2814 + case DPLL_ID_ICL_DPLL1: 2815 + return CNL_DPLL_ENABLE(id); 2816 + case DPLL_ID_ICL_MGPLL1: 2817 + case DPLL_ID_ICL_MGPLL2: 2818 + case DPLL_ID_ICL_MGPLL3: 2819 + case DPLL_ID_ICL_MGPLL4: 2820 + return MG_PLL_ENABLE(icl_mg_pll_id_to_port(id)); 2821 + } 2822 + } 2823 + 2824 + static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, 2825 + struct intel_shared_dpll *pll, 2826 + struct intel_dpll_hw_state *hw_state) 2827 + { 2828 + const enum intel_dpll_id id = pll->info->id; 2829 + uint32_t val; 2830 + enum port port; 2831 + bool ret = false; 2832 + 2833 + if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) 2834 + return false; 2835 + 2836 + val = I915_READ(icl_pll_id_to_enable_reg(id)); 2837 + if (!(val & PLL_ENABLE)) 2838 + goto out; 2839 + 2840 + switch (id) { 2841 + case DPLL_ID_ICL_DPLL0: 2842 + case DPLL_ID_ICL_DPLL1: 2843 + hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id)); 2844 + hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id)); 2845 + break; 2846 + case DPLL_ID_ICL_MGPLL1: 2847 + case DPLL_ID_ICL_MGPLL2: 2848 + case DPLL_ID_ICL_MGPLL3: 2849 + case DPLL_ID_ICL_MGPLL4: 2850 + port = icl_mg_pll_id_to_port(id); 2851 + hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(port)); 2852 + hw_state->mg_clktop2_coreclkctl1 = 2853 + I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); 2854 + hw_state->mg_clktop2_hsclkctl = 2855 + I915_READ(MG_CLKTOP2_HSCLKCTL(port)); 2856 + hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); 2857 + hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(port)); 2858 + hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port)); 2859 + hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(port)); 2860 + hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(port)); 2861 + hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(port)); 2862 + hw_state->mg_pll_tdc_coldst_bias = 2863 + I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); 2864 + break; 2865 + default: 2866 + MISSING_CASE(id); 2867 + } 2868 + 2869 + ret = true; 2870 + out: 2871 + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); 2872 + return ret; 2873 + } 2874 + 2875 + static void icl_dpll_write(struct drm_i915_private *dev_priv, 2876 + struct intel_shared_dpll *pll) 2877 + { 2878 + struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; 2879 + const enum intel_dpll_id id = pll->info->id; 2880 + 2881 + I915_WRITE(ICL_DPLL_CFGCR0(id), hw_state->cfgcr0); 2882 + I915_WRITE(ICL_DPLL_CFGCR1(id), hw_state->cfgcr1); 2883 + POSTING_READ(ICL_DPLL_CFGCR1(id)); 2884 + } 2885 + 2886 + static void icl_mg_pll_write(struct drm_i915_private *dev_priv, 2887 + struct intel_shared_dpll *pll) 2888 + { 2889 + struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; 2890 + enum port port = icl_mg_pll_id_to_port(pll->info->id); 2891 + 2892 + I915_WRITE(MG_REFCLKIN_CTL(port), hw_state->mg_refclkin_ctl); 2893 + I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), 2894 + hw_state->mg_clktop2_coreclkctl1); 2895 + I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), hw_state->mg_clktop2_hsclkctl); 2896 + I915_WRITE(MG_PLL_DIV0(port), hw_state->mg_pll_div0); 2897 + I915_WRITE(MG_PLL_DIV1(port), hw_state->mg_pll_div1); 2898 + I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf); 2899 + I915_WRITE(MG_PLL_FRAC_LOCK(port), hw_state->mg_pll_frac_lock); 2900 + I915_WRITE(MG_PLL_SSC(port), hw_state->mg_pll_ssc); 2901 + I915_WRITE(MG_PLL_BIAS(port), hw_state->mg_pll_bias); 2902 + I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port), 2903 + hw_state->mg_pll_tdc_coldst_bias); 2904 + POSTING_READ(MG_PLL_TDC_COLDST_BIAS(port)); 2905 + } 2906 + 2907 + static void icl_pll_enable(struct drm_i915_private *dev_priv, 2908 + struct intel_shared_dpll *pll) 2909 + { 2910 + const enum intel_dpll_id id = pll->info->id; 2911 + i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id); 2912 + uint32_t val; 2913 + 2914 + val = I915_READ(enable_reg); 2915 + val |= PLL_POWER_ENABLE; 2916 + I915_WRITE(enable_reg, val); 2917 + 2918 + /* 2919 + * The spec says we need to "wait" but it also says it should be 2920 + * immediate. 2921 + */ 2922 + if (intel_wait_for_register(dev_priv, enable_reg, PLL_POWER_STATE, 2923 + PLL_POWER_STATE, 1)) 2924 + DRM_ERROR("PLL %d Power not enabled\n", id); 2925 + 2926 + switch (id) { 2927 + case DPLL_ID_ICL_DPLL0: 2928 + case DPLL_ID_ICL_DPLL1: 2929 + icl_dpll_write(dev_priv, pll); 2930 + break; 2931 + case DPLL_ID_ICL_MGPLL1: 2932 + case DPLL_ID_ICL_MGPLL2: 2933 + case DPLL_ID_ICL_MGPLL3: 2934 + case DPLL_ID_ICL_MGPLL4: 2935 + icl_mg_pll_write(dev_priv, pll); 2936 + break; 2937 + default: 2938 + MISSING_CASE(id); 2939 + } 2940 + 2941 + /* 2942 + * DVFS pre sequence would be here, but in our driver the cdclk code 2943 + * paths should already be setting the appropriate voltage, hence we do 2944 + * nothign here. 2945 + */ 2946 + 2947 + val = I915_READ(enable_reg); 2948 + val |= PLL_ENABLE; 2949 + I915_WRITE(enable_reg, val); 2950 + 2951 + if (intel_wait_for_register(dev_priv, enable_reg, PLL_LOCK, PLL_LOCK, 2952 + 1)) /* 600us actually. */ 2953 + DRM_ERROR("PLL %d not locked\n", id); 2954 + 2955 + /* DVFS post sequence would be here. See the comment above. */ 2956 + } 2957 + 2958 + static void icl_pll_disable(struct drm_i915_private *dev_priv, 2959 + struct intel_shared_dpll *pll) 2960 + { 2961 + const enum intel_dpll_id id = pll->info->id; 2962 + i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id); 2963 + uint32_t val; 2964 + 2965 + /* The first steps are done by intel_ddi_post_disable(). */ 2966 + 2967 + /* 2968 + * DVFS pre sequence would be here, but in our driver the cdclk code 2969 + * paths should already be setting the appropriate voltage, hence we do 2970 + * nothign here. 2971 + */ 2972 + 2973 + val = I915_READ(enable_reg); 2974 + val &= ~PLL_ENABLE; 2975 + I915_WRITE(enable_reg, val); 2976 + 2977 + /* Timeout is actually 1us. */ 2978 + if (intel_wait_for_register(dev_priv, enable_reg, PLL_LOCK, 0, 1)) 2979 + DRM_ERROR("PLL %d locked\n", id); 2980 + 2981 + /* DVFS post sequence would be here. See the comment above. */ 2982 + 2983 + val = I915_READ(enable_reg); 2984 + val &= ~PLL_POWER_ENABLE; 2985 + I915_WRITE(enable_reg, val); 2986 + 2987 + /* 2988 + * The spec says we need to "wait" but it also says it should be 2989 + * immediate. 2990 + */ 2991 + if (intel_wait_for_register(dev_priv, enable_reg, PLL_POWER_STATE, 0, 2992 + 1)) 2993 + DRM_ERROR("PLL %d Power not disabled\n", id); 2994 + } 2995 + 2996 + static void icl_dump_hw_state(struct drm_i915_private *dev_priv, 2997 + struct intel_dpll_hw_state *hw_state) 2998 + { 2999 + DRM_DEBUG_KMS("dpll_hw_state: cfgcr0: 0x%x, cfgcr1: 0x%x, " 3000 + "mg_refclkin_ctl: 0x%x, hg_clktop2_coreclkctl1: 0x%x, " 3001 + "mg_clktop2_hsclkctl: 0x%x, mg_pll_div0: 0x%x, " 3002 + "mg_pll_div2: 0x%x, mg_pll_lf: 0x%x, " 3003 + "mg_pll_frac_lock: 0x%x, mg_pll_ssc: 0x%x, " 3004 + "mg_pll_bias: 0x%x, mg_pll_tdc_coldst_bias: 0x%x\n", 3005 + hw_state->cfgcr0, hw_state->cfgcr1, 3006 + hw_state->mg_refclkin_ctl, 3007 + hw_state->mg_clktop2_coreclkctl1, 3008 + hw_state->mg_clktop2_hsclkctl, 3009 + hw_state->mg_pll_div0, 3010 + hw_state->mg_pll_div1, 3011 + hw_state->mg_pll_lf, 3012 + hw_state->mg_pll_frac_lock, 3013 + hw_state->mg_pll_ssc, 3014 + hw_state->mg_pll_bias, 3015 + hw_state->mg_pll_tdc_coldst_bias); 3016 + } 3017 + 3018 + static const struct intel_shared_dpll_funcs icl_pll_funcs = { 3019 + .enable = icl_pll_enable, 3020 + .disable = icl_pll_disable, 3021 + .get_hw_state = icl_pll_get_hw_state, 3022 + }; 3023 + 3024 + static const struct dpll_info icl_plls[] = { 3025 + { "DPLL 0", &icl_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, 3026 + { "DPLL 1", &icl_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, 3027 + { "MG PLL 1", &icl_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 }, 3028 + { "MG PLL 2", &icl_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 }, 3029 + { "MG PLL 3", &icl_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 }, 3030 + { "MG PLL 4", &icl_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 }, 3031 + { }, 3032 + }; 3033 + 3034 + static const struct intel_dpll_mgr icl_pll_mgr = { 3035 + .dpll_info = icl_plls, 3036 + .get_dpll = icl_get_dpll, 3037 + .dump_hw_state = icl_dump_hw_state, 3038 + }; 3039 + 2412 3040 /** 2413 3041 * intel_shared_dpll_init - Initialize shared DPLLs 2414 3042 * @dev: drm device ··· 3060 2412 const struct dpll_info *dpll_info; 3061 2413 int i; 3062 2414 3063 - if (IS_CANNONLAKE(dev_priv)) 2415 + if (IS_ICELAKE(dev_priv)) 2416 + dpll_mgr = &icl_pll_mgr; 2417 + else if (IS_CANNONLAKE(dev_priv)) 3064 2418 dpll_mgr = &cnl_pll_mgr; 3065 2419 else if (IS_GEN9_BC(dev_priv)) 3066 2420 dpll_mgr = &skl_pll_mgr;

+41

drivers/gpu/drm/i915/intel_dpll_mgr.h

··· 103 103 * @DPLL_ID_SKL_DPLL3: SKL and later DPLL3 104 104 */ 105 105 DPLL_ID_SKL_DPLL3 = 3, 106 + 107 + 108 + /** 109 + * @DPLL_ID_ICL_DPLL0: ICL combo PHY DPLL0 110 + */ 111 + DPLL_ID_ICL_DPLL0 = 0, 112 + /** 113 + * @DPLL_ID_ICL_DPLL1: ICL combo PHY DPLL1 114 + */ 115 + DPLL_ID_ICL_DPLL1 = 1, 116 + /** 117 + * @DPLL_ID_ICL_MGPLL1: ICL MG PLL 1 port 1 (C) 118 + */ 119 + DPLL_ID_ICL_MGPLL1 = 2, 120 + /** 121 + * @DPLL_ID_ICL_MGPLL2: ICL MG PLL 1 port 2 (D) 122 + */ 123 + DPLL_ID_ICL_MGPLL2 = 3, 124 + /** 125 + * @DPLL_ID_ICL_MGPLL3: ICL MG PLL 1 port 3 (E) 126 + */ 127 + DPLL_ID_ICL_MGPLL3 = 4, 128 + /** 129 + * @DPLL_ID_ICL_MGPLL4: ICL MG PLL 1 port 4 (F) 130 + */ 131 + DPLL_ID_ICL_MGPLL4 = 5, 106 132 }; 107 133 #define I915_NUM_PLLS 6 108 134 ··· 161 135 /* bxt */ 162 136 uint32_t ebb0, ebb4, pll0, pll1, pll2, pll3, pll6, pll8, pll9, pll10, 163 137 pcsdw12; 138 + 139 + /* 140 + * ICL uses the following, already defined: 141 + * uint32_t cfgcr0, cfgcr1; 142 + */ 143 + uint32_t mg_refclkin_ctl; 144 + uint32_t mg_clktop2_coreclkctl1; 145 + uint32_t mg_clktop2_hsclkctl; 146 + uint32_t mg_pll_div0; 147 + uint32_t mg_pll_div1; 148 + uint32_t mg_pll_lf; 149 + uint32_t mg_pll_frac_lock; 150 + uint32_t mg_pll_ssc; 151 + uint32_t mg_pll_bias; 152 + uint32_t mg_pll_tdc_coldst_bias; 164 153 }; 165 154 166 155 /**

+24 -2

drivers/gpu/drm/i915/intel_drv.h

··· 56 56 for (;;) { \ 57 57 const bool expired__ = ktime_after(ktime_get_raw(), end__); \ 58 58 OP; \ 59 + /* Guarantee COND check prior to timeout */ \ 60 + barrier(); \ 59 61 if (COND) { \ 60 62 ret__ = 0; \ 61 63 break; \ ··· 98 96 u64 now = local_clock(); \ 99 97 if (!(ATOMIC)) \ 100 98 preempt_enable(); \ 99 + /* Guarantee COND check prior to timeout */ \ 100 + barrier(); \ 101 101 if (COND) { \ 102 102 ret = 0; \ 103 103 break; \ ··· 143 139 144 140 #define KHz(x) (1000 * (x)) 145 141 #define MHz(x) KHz(1000 * (x)) 142 + 143 + #define KBps(x) (1000 * (x)) 144 + #define MBps(x) KBps(1000 * (x)) 145 + #define GBps(x) ((u64)1000 * MBps((x))) 146 146 147 147 /* 148 148 * Display related stuff ··· 890 882 891 883 /* bitmask of visible planes (enum plane_id) */ 892 884 u8 active_planes; 885 + u8 nv12_planes; 893 886 894 887 /* HDMI scrambling status */ 895 888 bool hdmi_scrambling; ··· 1338 1329 void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv); 1339 1330 1340 1331 /* i915_irq.c */ 1332 + bool gen11_reset_one_iir(struct drm_i915_private * const i915, 1333 + const unsigned int bank, 1334 + const unsigned int bit); 1341 1335 void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); 1342 1336 void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); 1343 1337 void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); ··· 1410 1398 u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); 1411 1399 int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, 1412 1400 bool enable); 1401 + void icl_map_plls_to_ports(struct drm_crtc *crtc, 1402 + struct intel_crtc_state *crtc_state, 1403 + struct drm_atomic_state *old_state); 1404 + void icl_unmap_plls_to_ports(struct drm_crtc *crtc, 1405 + struct intel_crtc_state *crtc_state, 1406 + struct drm_atomic_state *old_state); 1413 1407 1414 1408 unsigned int intel_fb_align_height(const struct drm_framebuffer *fb, 1415 1409 int plane, unsigned int height); ··· 1598 1580 void bxt_disable_dc9(struct drm_i915_private *dev_priv); 1599 1581 void gen9_enable_dc5(struct drm_i915_private *dev_priv); 1600 1582 unsigned int skl_cdclk_get_vco(unsigned int freq); 1601 - void skl_enable_dc6(struct drm_i915_private *dev_priv); 1602 - void skl_disable_dc6(struct drm_i915_private *dev_priv); 1603 1583 void intel_dp_get_m_n(struct intel_crtc *crtc, 1604 1584 struct intel_crtc_state *pipe_config); 1605 1585 void intel_dp_set_m_n(struct intel_crtc *crtc, enum link_m_n_set m_n); ··· 1917 1901 unsigned frontbuffer_bits); 1918 1902 void intel_psr_compute_config(struct intel_dp *intel_dp, 1919 1903 struct intel_crtc_state *crtc_state); 1904 + void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug); 1905 + void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir); 1920 1906 1921 1907 /* intel_runtime_pm.c */ 1922 1908 int intel_power_domains_init(struct drm_i915_private *); ··· 1942 1924 enum intel_display_power_domain domain); 1943 1925 void intel_display_power_put(struct drm_i915_private *dev_priv, 1944 1926 enum intel_display_power_domain domain); 1927 + void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, 1928 + u8 req_slices); 1945 1929 1946 1930 static inline void 1947 1931 assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv) ··· 2082 2062 bool skl_plane_has_ccs(struct drm_i915_private *dev_priv, 2083 2063 enum pipe pipe, enum plane_id plane_id); 2084 2064 bool intel_format_is_yuv(uint32_t format); 2065 + bool skl_plane_has_planar(struct drm_i915_private *dev_priv, 2066 + enum pipe pipe, enum plane_id plane_id); 2085 2067 2086 2068 /* intel_tv.c */ 2087 2069 void intel_tv_init(struct drm_i915_private *dev_priv);

+18 -16

drivers/gpu/drm/i915/intel_dsi_vbt.c

··· 647 647 /* prepare count */ 648 648 prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * mul); 649 649 650 + if (prepare_cnt > PREPARE_CNT_MAX) { 651 + DRM_DEBUG_KMS("prepare count too high %u\n", prepare_cnt); 652 + prepare_cnt = PREPARE_CNT_MAX; 653 + } 654 + 650 655 /* exit zero count */ 651 656 exit_zero_cnt = DIV_ROUND_UP( 652 657 (ths_prepare_hszero - ths_prepare_ns) * ui_den, ··· 667 662 if (exit_zero_cnt < (55 * ui_den / ui_num) && (55 * ui_den) % ui_num) 668 663 exit_zero_cnt += 1; 669 664 665 + if (exit_zero_cnt > EXIT_ZERO_CNT_MAX) { 666 + DRM_DEBUG_KMS("exit zero count too high %u\n", exit_zero_cnt); 667 + exit_zero_cnt = EXIT_ZERO_CNT_MAX; 668 + } 669 + 670 670 /* clk zero count */ 671 671 clk_zero_cnt = DIV_ROUND_UP( 672 672 (tclk_prepare_clkzero - ths_prepare_ns) 673 673 * ui_den, ui_num * mul); 674 674 675 + if (clk_zero_cnt > CLK_ZERO_CNT_MAX) { 676 + DRM_DEBUG_KMS("clock zero count too high %u\n", clk_zero_cnt); 677 + clk_zero_cnt = CLK_ZERO_CNT_MAX; 678 + } 679 + 675 680 /* trail count */ 676 681 tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail); 677 682 trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, ui_num * mul); 678 683 679 - if (prepare_cnt > PREPARE_CNT_MAX || 680 - exit_zero_cnt > EXIT_ZERO_CNT_MAX || 681 - clk_zero_cnt > CLK_ZERO_CNT_MAX || 682 - trail_cnt > TRAIL_CNT_MAX) 683 - DRM_DEBUG_DRIVER("Values crossing maximum limits, restricting to max values\n"); 684 - 685 - if (prepare_cnt > PREPARE_CNT_MAX) 686 - prepare_cnt = PREPARE_CNT_MAX; 687 - 688 - if (exit_zero_cnt > EXIT_ZERO_CNT_MAX) 689 - exit_zero_cnt = EXIT_ZERO_CNT_MAX; 690 - 691 - if (clk_zero_cnt > CLK_ZERO_CNT_MAX) 692 - clk_zero_cnt = CLK_ZERO_CNT_MAX; 693 - 694 - if (trail_cnt > TRAIL_CNT_MAX) 684 + if (trail_cnt > TRAIL_CNT_MAX) { 685 + DRM_DEBUG_KMS("trail count too high %u\n", trail_cnt); 695 686 trail_cnt = TRAIL_CNT_MAX; 687 + } 696 688 697 689 /* B080 */ 698 690 intel_dsi->dphy_reg = exit_zero_cnt << 24 | trail_cnt << 16 |

+92 -57

drivers/gpu/drm/i915/intel_engine_cs.c

··· 306 306 /* Nothing to do here, execute in order of dependencies */ 307 307 engine->schedule = NULL; 308 308 309 - spin_lock_init(&engine->stats.lock); 309 + seqlock_init(&engine->stats.lock); 310 310 311 311 ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); 312 312 ··· 451 451 GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); 452 452 } 453 453 454 - static void intel_engine_init_timeline(struct intel_engine_cs *engine) 455 - { 456 - engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id]; 457 - } 458 - 459 454 static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) 460 455 { 461 456 i915_gem_batch_pool_init(&engine->batch_pool, engine); ··· 458 463 459 464 static bool csb_force_mmio(struct drm_i915_private *i915) 460 465 { 461 - /* 462 - * IOMMU adds unpredictable latency causing the CSB write (from the 463 - * GPU into the HWSP) to only be visible some time after the interrupt 464 - * (missed breadcrumb syndrome). 465 - */ 466 - if (intel_vtd_active()) 467 - return true; 468 - 469 466 /* Older GVT emulation depends upon intercepting CSB mmio */ 470 467 if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) 471 - return true; 472 - 473 - if (IS_CANNONLAKE(i915)) 474 468 return true; 475 469 476 470 return false; ··· 491 507 */ 492 508 void intel_engine_setup_common(struct intel_engine_cs *engine) 493 509 { 510 + i915_timeline_init(engine->i915, &engine->timeline, engine->name); 511 + 494 512 intel_engine_init_execlist(engine); 495 - intel_engine_init_timeline(engine); 496 513 intel_engine_init_hangcheck(engine); 497 514 intel_engine_init_batch_pool(engine); 498 515 intel_engine_init_cmd_parser(engine); ··· 526 541 goto err_unref; 527 542 528 543 engine->scratch = vma; 529 - DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", 530 - engine->name, i915_ggtt_offset(vma)); 531 544 return 0; 532 545 533 546 err_unref: ··· 619 636 engine->status_page.vma = vma; 620 637 engine->status_page.ggtt_offset = i915_ggtt_offset(vma); 621 638 engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); 622 - 623 - DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", 624 - engine->name, i915_ggtt_offset(vma)); 625 639 return 0; 626 640 627 641 err_unpin: ··· 670 690 * be available. To avoid this we always pin the default 671 691 * context. 672 692 */ 673 - ring = engine->context_pin(engine, engine->i915->kernel_context); 693 + ring = intel_context_pin(engine->i915->kernel_context, engine); 674 694 if (IS_ERR(ring)) 675 695 return PTR_ERR(ring); 676 696 ··· 679 699 * we can interrupt the engine at any time. 680 700 */ 681 701 if (engine->i915->preempt_context) { 682 - ring = engine->context_pin(engine, 683 - engine->i915->preempt_context); 702 + ring = intel_context_pin(engine->i915->preempt_context, engine); 684 703 if (IS_ERR(ring)) { 685 704 ret = PTR_ERR(ring); 686 705 goto err_unpin_kernel; ··· 703 724 intel_engine_fini_breadcrumbs(engine); 704 725 err_unpin_preempt: 705 726 if (engine->i915->preempt_context) 706 - engine->context_unpin(engine, engine->i915->preempt_context); 727 + intel_context_unpin(engine->i915->preempt_context, engine); 707 728 err_unpin_kernel: 708 - engine->context_unpin(engine, engine->i915->kernel_context); 729 + intel_context_unpin(engine->i915->kernel_context, engine); 709 730 return ret; 710 731 } 711 732 ··· 733 754 i915_gem_object_put(engine->default_state); 734 755 735 756 if (engine->i915->preempt_context) 736 - engine->context_unpin(engine, engine->i915->preempt_context); 737 - engine->context_unpin(engine, engine->i915->kernel_context); 757 + intel_context_unpin(engine->i915->preempt_context, engine); 758 + intel_context_unpin(engine->i915->kernel_context, engine); 759 + 760 + i915_timeline_fini(&engine->timeline); 738 761 } 739 762 740 763 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) ··· 989 1008 * the last request that remains in the timeline. When idle, it is 990 1009 * the last executed context as tracked by retirement. 991 1010 */ 992 - rq = __i915_gem_active_peek(&engine->timeline->last_request); 1011 + rq = __i915_gem_active_peek(&engine->timeline.last_request); 993 1012 if (rq) 994 1013 return rq->ctx == kernel_context; 995 1014 else ··· 1062 1081 for_each_engine(engine, i915, id) { 1063 1082 if (engine->unpark) 1064 1083 engine->unpark(engine); 1084 + 1085 + intel_engine_init_hangcheck(engine); 1065 1086 } 1066 1087 } 1067 1088 ··· 1096 1113 return which; 1097 1114 } 1098 1115 1116 + static int print_sched_attr(struct drm_i915_private *i915, 1117 + const struct i915_sched_attr *attr, 1118 + char *buf, int x, int len) 1119 + { 1120 + if (attr->priority == I915_PRIORITY_INVALID) 1121 + return x; 1122 + 1123 + x += snprintf(buf + x, len - x, 1124 + " prio=%d", attr->priority); 1125 + 1126 + return x; 1127 + } 1128 + 1099 1129 static void print_request(struct drm_printer *m, 1100 1130 struct i915_request *rq, 1101 1131 const char *prefix) 1102 1132 { 1103 1133 const char *name = rq->fence.ops->get_timeline_name(&rq->fence); 1134 + char buf[80]; 1135 + int x = 0; 1104 1136 1105 - drm_printf(m, "%s%x%s [%llx:%x] prio=%d @ %dms: %s\n", prefix, 1137 + x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); 1138 + 1139 + drm_printf(m, "%s%x%s [%llx:%x]%s @ %dms: %s\n", 1140 + prefix, 1106 1141 rq->global_seqno, 1107 1142 i915_request_completed(rq) ? "!" : "", 1108 1143 rq->fence.context, rq->fence.seqno, 1109 - rq->priotree.priority, 1144 + buf, 1110 1145 jiffies_to_msecs(jiffies - rq->emitted_jiffies), 1111 1146 name); 1112 1147 } ··· 1267 1266 char hdr[80]; 1268 1267 1269 1268 snprintf(hdr, sizeof(hdr), 1270 - "\t\tELSP[%d] count=%d, rq: ", 1271 - idx, count); 1269 + "\t\tELSP[%d] count=%d, ring->start=%08x, rq: ", 1270 + idx, count, 1271 + i915_ggtt_offset(rq->ring->vma)); 1272 1272 print_request(m, rq, hdr); 1273 1273 } else { 1274 1274 drm_printf(m, "\t\tELSP[%d] idle\n", idx); ··· 1291 1289 struct drm_printer *m, 1292 1290 const char *header, ...) 1293 1291 { 1292 + const int MAX_REQUESTS_TO_SHOW = 8; 1294 1293 struct intel_breadcrumbs * const b = &engine->breadcrumbs; 1295 1294 const struct intel_engine_execlists * const execlists = &engine->execlists; 1296 1295 struct i915_gpu_error * const error = &engine->i915->gpu_error; 1297 - struct i915_request *rq; 1296 + struct i915_request *rq, *last; 1298 1297 struct rb_node *rb; 1298 + int count; 1299 1299 1300 1300 if (header) { 1301 1301 va_list ap; ··· 1310 1306 if (i915_terminally_wedged(&engine->i915->gpu_error)) 1311 1307 drm_printf(m, "*** WEDGED ***\n"); 1312 1308 1313 - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", 1309 + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", 1314 1310 intel_engine_get_seqno(engine), 1315 1311 intel_engine_last_submit(engine), 1316 1312 engine->hangcheck.seqno, 1317 - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), 1318 - engine->timeline->inflight_seqnos); 1313 + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); 1319 1314 drm_printf(m, "\tReset count: %d (global %d)\n", 1320 1315 i915_reset_engine_count(error, engine), 1321 1316 i915_reset_count(error)); ··· 1323 1320 1324 1321 drm_printf(m, "\tRequests:\n"); 1325 1322 1326 - rq = list_first_entry(&engine->timeline->requests, 1323 + rq = list_first_entry(&engine->timeline.requests, 1327 1324 struct i915_request, link); 1328 - if (&rq->link != &engine->timeline->requests) 1325 + if (&rq->link != &engine->timeline.requests) 1329 1326 print_request(m, rq, "\t\tfirst "); 1330 1327 1331 - rq = list_last_entry(&engine->timeline->requests, 1328 + rq = list_last_entry(&engine->timeline.requests, 1332 1329 struct i915_request, link); 1333 - if (&rq->link != &engine->timeline->requests) 1330 + if (&rq->link != &engine->timeline.requests) 1334 1331 print_request(m, rq, "\t\tlast "); 1335 1332 1336 1333 rq = i915_gem_find_active_request(engine); ··· 1362 1359 drm_printf(m, "\tDevice is asleep; skipping register dump\n"); 1363 1360 } 1364 1361 1365 - spin_lock_irq(&engine->timeline->lock); 1366 - list_for_each_entry(rq, &engine->timeline->requests, link) 1367 - print_request(m, rq, "\t\tE "); 1362 + spin_lock_irq(&engine->timeline.lock); 1363 + 1364 + last = NULL; 1365 + count = 0; 1366 + list_for_each_entry(rq, &engine->timeline.requests, link) { 1367 + if (count++ < MAX_REQUESTS_TO_SHOW - 1) 1368 + print_request(m, rq, "\t\tE "); 1369 + else 1370 + last = rq; 1371 + } 1372 + if (last) { 1373 + if (count > MAX_REQUESTS_TO_SHOW) { 1374 + drm_printf(m, 1375 + "\t\t...skipping %d executing requests...\n", 1376 + count - MAX_REQUESTS_TO_SHOW); 1377 + } 1378 + print_request(m, last, "\t\tE "); 1379 + } 1380 + 1381 + last = NULL; 1382 + count = 0; 1368 1383 drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); 1369 1384 for (rb = execlists->first; rb; rb = rb_next(rb)) { 1370 1385 struct i915_priolist *p = 1371 1386 rb_entry(rb, typeof(*p), node); 1372 1387 1373 - list_for_each_entry(rq, &p->requests, priotree.link) 1374 - print_request(m, rq, "\t\tQ "); 1388 + list_for_each_entry(rq, &p->requests, sched.link) { 1389 + if (count++ < MAX_REQUESTS_TO_SHOW - 1) 1390 + print_request(m, rq, "\t\tQ "); 1391 + else 1392 + last = rq; 1393 + } 1375 1394 } 1376 - spin_unlock_irq(&engine->timeline->lock); 1395 + if (last) { 1396 + if (count > MAX_REQUESTS_TO_SHOW) { 1397 + drm_printf(m, 1398 + "\t\t...skipping %d queued requests...\n", 1399 + count - MAX_REQUESTS_TO_SHOW); 1400 + } 1401 + print_request(m, last, "\t\tQ "); 1402 + } 1403 + 1404 + spin_unlock_irq(&engine->timeline.lock); 1377 1405 1378 1406 spin_lock_irq(&b->rb_lock); 1379 1407 for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { ··· 1469 1435 return -ENODEV; 1470 1436 1471 1437 tasklet_disable(&execlists->tasklet); 1472 - spin_lock_irqsave(&engine->stats.lock, flags); 1438 + write_seqlock_irqsave(&engine->stats.lock, flags); 1473 1439 1474 1440 if (unlikely(engine->stats.enabled == ~0)) { 1475 1441 err = -EBUSY; ··· 1493 1459 } 1494 1460 1495 1461 unlock: 1496 - spin_unlock_irqrestore(&engine->stats.lock, flags); 1462 + write_sequnlock_irqrestore(&engine->stats.lock, flags); 1497 1463 tasklet_enable(&execlists->tasklet); 1498 1464 1499 1465 return err; ··· 1522 1488 */ 1523 1489 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) 1524 1490 { 1491 + unsigned int seq; 1525 1492 ktime_t total; 1526 - unsigned long flags; 1527 1493 1528 - spin_lock_irqsave(&engine->stats.lock, flags); 1529 - total = __intel_engine_get_busy_time(engine); 1530 - spin_unlock_irqrestore(&engine->stats.lock, flags); 1494 + do { 1495 + seq = read_seqbegin(&engine->stats.lock); 1496 + total = __intel_engine_get_busy_time(engine); 1497 + } while (read_seqretry(&engine->stats.lock, seq)); 1531 1498 1532 1499 return total; 1533 1500 } ··· 1546 1511 if (!intel_engine_supports_stats(engine)) 1547 1512 return; 1548 1513 1549 - spin_lock_irqsave(&engine->stats.lock, flags); 1514 + write_seqlock_irqsave(&engine->stats.lock, flags); 1550 1515 WARN_ON_ONCE(engine->stats.enabled == 0); 1551 1516 if (--engine->stats.enabled == 0) { 1552 1517 engine->stats.total = __intel_engine_get_busy_time(engine); 1553 1518 engine->stats.active = 0; 1554 1519 } 1555 - spin_unlock_irqrestore(&engine->stats.lock, flags); 1520 + write_sequnlock_irqrestore(&engine->stats.lock, flags); 1556 1521 } 1557 1522 1558 1523 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)

-24

drivers/gpu/drm/i915/intel_guc.c

··· 203 203 guc_shared_data_destroy(guc); 204 204 } 205 205 206 - static u32 get_gt_type(struct drm_i915_private *dev_priv) 207 - { 208 - /* XXX: GT type based on PCI device ID? field seems unused by fw */ 209 - return 0; 210 - } 211 - 212 - static u32 get_core_family(struct drm_i915_private *dev_priv) 213 - { 214 - u32 gen = INTEL_GEN(dev_priv); 215 - 216 - switch (gen) { 217 - case 9: 218 - return GUC_CORE_FAMILY_GEN9; 219 - 220 - default: 221 - MISSING_CASE(gen); 222 - return GUC_CORE_FAMILY_UNKNOWN; 223 - } 224 - } 225 - 226 206 static u32 get_log_control_flags(void) 227 207 { 228 208 u32 level = i915_modparams.guc_log_level; ··· 234 254 int i; 235 255 236 256 memset(params, 0, sizeof(params)); 237 - 238 - params[GUC_CTL_DEVICE_INFO] |= 239 - (get_gt_type(dev_priv) << GUC_CTL_GT_TYPE_SHIFT) | 240 - (get_core_family(dev_priv) << GUC_CTL_CORE_FAMILY_SHIFT); 241 257 242 258 /* 243 259 * GuC ARAT increment is 10 ns. GuC default scheduler quantum is one

+2 -1

drivers/gpu/drm/i915/intel_guc_ads.c

··· 121 121 * to find it. Note that we have to skip our header (1 page), 122 122 * because our GuC shared data is there. 123 123 */ 124 - kernel_ctx_vma = dev_priv->kernel_context->engine[RCS].state; 124 + kernel_ctx_vma = to_intel_context(dev_priv->kernel_context, 125 + dev_priv->engine[RCS])->state; 125 126 blob->ads.golden_context_lrca = 126 127 intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset; 127 128

-5

drivers/gpu/drm/i915/intel_guc_fwif.h

··· 23 23 #ifndef _INTEL_GUC_FWIF_H 24 24 #define _INTEL_GUC_FWIF_H 25 25 26 - #define GUC_CORE_FAMILY_GEN9 12 27 - #define GUC_CORE_FAMILY_UNKNOWN 0x7fffffff 28 - 29 26 #define GUC_CLIENT_PRIORITY_KMD_HIGH 0 30 27 #define GUC_CLIENT_PRIORITY_HIGH 1 31 28 #define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 ··· 79 82 #define GUC_CTL_ARAT_LOW 2 80 83 81 84 #define GUC_CTL_DEVICE_INFO 3 82 - #define GUC_CTL_GT_TYPE_SHIFT 0 83 - #define GUC_CTL_CORE_FAMILY_SHIFT 7 84 85 85 86 #define GUC_CTL_LOG_PARAMS 4 86 87 #define GUC_LOG_VALID (1 << 0)

+44 -25

drivers/gpu/drm/i915/intel_guc_submission.c

··· 124 124 return 0; 125 125 } 126 126 127 + static bool has_doorbell(struct intel_guc_client *client) 128 + { 129 + if (client->doorbell_id == GUC_DOORBELL_INVALID) 130 + return false; 131 + 132 + return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); 133 + } 134 + 127 135 static void unreserve_doorbell(struct intel_guc_client *client) 128 136 { 129 - GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID); 137 + GEM_BUG_ON(!has_doorbell(client)); 130 138 131 139 __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap); 132 140 client->doorbell_id = GUC_DOORBELL_INVALID; ··· 192 184 return client->vaddr + client->doorbell_offset; 193 185 } 194 186 195 - static bool has_doorbell(struct intel_guc_client *client) 196 - { 197 - if (client->doorbell_id == GUC_DOORBELL_INVALID) 198 - return false; 199 - 200 - return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); 201 - } 202 - 203 187 static void __create_doorbell(struct intel_guc_client *client) 204 188 { 205 189 struct guc_doorbell_info *doorbell; ··· 206 206 struct drm_i915_private *dev_priv = guc_to_i915(client->guc); 207 207 struct guc_doorbell_info *doorbell; 208 208 u16 db_id = client->doorbell_id; 209 - 210 209 211 210 doorbell = __get_doorbell(client); 212 211 doorbell->db_status = GUC_DOORBELL_DISABLED; ··· 222 223 static int create_doorbell(struct intel_guc_client *client) 223 224 { 224 225 int ret; 226 + 227 + if (WARN_ON(!has_doorbell(client))) 228 + return -ENODEV; /* internal setup error, should never happen */ 225 229 226 230 __update_doorbell_desc(client, client->doorbell_id); 227 231 __create_doorbell(client); ··· 364 362 desc->db_id = client->doorbell_id; 365 363 366 364 for_each_engine_masked(engine, dev_priv, client->engines, tmp) { 367 - struct intel_context *ce = &ctx->engine[engine->id]; 365 + struct intel_context *ce = to_intel_context(ctx, engine); 368 366 u32 guc_engine_id = engine->guc_id; 369 367 struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; 370 368 ··· 661 659 662 660 static inline int rq_prio(const struct i915_request *rq) 663 661 { 664 - return rq->priotree.priority; 662 + return rq->sched.attr.priority; 665 663 } 666 664 667 665 static inline int port_prio(const struct execlist_port *port) ··· 669 667 return rq_prio(port_request(port)); 670 668 } 671 669 672 - static void guc_dequeue(struct intel_engine_cs *engine) 670 + static bool __guc_dequeue(struct intel_engine_cs *engine) 673 671 { 674 672 struct intel_engine_execlists * const execlists = &engine->execlists; 675 673 struct execlist_port *port = execlists->port; ··· 679 677 bool submit = false; 680 678 struct rb_node *rb; 681 679 682 - spin_lock_irq(&engine->timeline->lock); 680 + lockdep_assert_held(&engine->timeline.lock); 681 + 683 682 rb = execlists->first; 684 683 GEM_BUG_ON(rb_first(&execlists->queue) != rb); 685 684 ··· 695 692 EXECLISTS_ACTIVE_PREEMPT); 696 693 queue_work(engine->i915->guc.preempt_wq, 697 694 &preempt_work->work); 698 - goto unlock; 695 + return false; 699 696 } 700 697 } 701 698 702 699 port++; 703 700 if (port_isset(port)) 704 - goto unlock; 701 + return false; 705 702 } 706 703 GEM_BUG_ON(port_isset(port)); 707 704 ··· 709 706 struct i915_priolist *p = to_priolist(rb); 710 707 struct i915_request *rq, *rn; 711 708 712 - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { 709 + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { 713 710 if (last && rq->ctx != last->ctx) { 714 711 if (port == last_port) { 715 712 __list_del_many(&p->requests, 716 - &rq->priotree.link); 713 + &rq->sched.link); 717 714 goto done; 718 715 } 719 716 ··· 722 719 port++; 723 720 } 724 721 725 - INIT_LIST_HEAD(&rq->priotree.link); 722 + INIT_LIST_HEAD(&rq->sched.link); 726 723 727 724 __i915_request_submit(rq); 728 725 trace_i915_request_in(rq, port_index(port, execlists)); ··· 739 736 done: 740 737 execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; 741 738 execlists->first = rb; 742 - if (submit) { 739 + if (submit) 743 740 port_assign(port, last); 741 + if (last) 744 742 execlists_user_begin(execlists, execlists->port); 745 - guc_submit(engine); 746 - } 747 743 748 744 /* We must always keep the beast fed if we have work piled up */ 749 745 GEM_BUG_ON(port_isset(execlists->port) && 750 746 !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); 751 747 GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); 752 748 753 - unlock: 754 - spin_unlock_irq(&engine->timeline->lock); 749 + return submit; 750 + } 751 + 752 + static void guc_dequeue(struct intel_engine_cs *engine) 753 + { 754 + unsigned long flags; 755 + bool submit; 756 + 757 + local_irq_save(flags); 758 + 759 + spin_lock(&engine->timeline.lock); 760 + submit = __guc_dequeue(engine); 761 + spin_unlock(&engine->timeline.lock); 762 + 763 + if (submit) 764 + guc_submit(engine); 765 + 766 + local_irq_restore(flags); 755 767 } 756 768 757 769 static void guc_submission_tasklet(unsigned long data) ··· 1008 990 enum intel_engine_id id; 1009 991 1010 992 for_each_engine(engine, dev_priv, id) { 1011 - struct intel_context *ce = &client->owner->engine[id]; 993 + struct intel_context *ce = 994 + to_intel_context(client->owner, engine); 1012 995 u32 addr = intel_hws_preempt_done_address(engine); 1013 996 u32 *cs; 1014 997

+2 -1

drivers/gpu/drm/i915/intel_hangcheck.c

··· 356 356 break; 357 357 358 358 case ENGINE_DEAD: 359 - if (drm_debug & DRM_UT_DRIVER) { 359 + if (GEM_SHOW_DEBUG()) { 360 360 struct drm_printer p = drm_debug_printer("hangcheck"); 361 361 intel_engine_dump(engine, &p, "%s\n", engine->name); 362 362 } ··· 452 452 void intel_engine_init_hangcheck(struct intel_engine_cs *engine) 453 453 { 454 454 memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); 455 + engine->hangcheck.action_timestamp = jiffies; 455 456 } 456 457 457 458 void intel_hangcheck_init(struct drm_i915_private *i915)

+207 -132

drivers/gpu/drm/i915/intel_lrc.c

··· 177 177 178 178 static inline int rq_prio(const struct i915_request *rq) 179 179 { 180 - return rq->priotree.priority; 180 + return rq->sched.attr.priority; 181 181 } 182 182 183 183 static inline bool need_preempt(const struct intel_engine_cs *engine, ··· 185 185 int prio) 186 186 { 187 187 return (intel_engine_has_preemption(engine) && 188 - __execlists_need_preempt(prio, rq_prio(last))); 188 + __execlists_need_preempt(prio, rq_prio(last)) && 189 + !i915_request_completed(last)); 189 190 } 190 191 191 192 /** ··· 224 223 intel_lr_context_descriptor_update(struct i915_gem_context *ctx, 225 224 struct intel_engine_cs *engine) 226 225 { 227 - struct intel_context *ce = &ctx->engine[engine->id]; 226 + struct intel_context *ce = to_intel_context(ctx, engine); 228 227 u64 desc; 229 228 230 229 BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); ··· 258 257 } 259 258 260 259 static struct i915_priolist * 261 - lookup_priolist(struct intel_engine_cs *engine, 262 - struct i915_priotree *pt, 263 - int prio) 260 + lookup_priolist(struct intel_engine_cs *engine, int prio) 264 261 { 265 262 struct intel_engine_execlists * const execlists = &engine->execlists; 266 263 struct i915_priolist *p; ··· 329 330 struct i915_priolist *uninitialized_var(p); 330 331 int last_prio = I915_PRIORITY_INVALID; 331 332 332 - lockdep_assert_held(&engine->timeline->lock); 333 + lockdep_assert_held(&engine->timeline.lock); 333 334 334 335 list_for_each_entry_safe_reverse(rq, rn, 335 - &engine->timeline->requests, 336 + &engine->timeline.requests, 336 337 link) { 337 338 if (i915_request_completed(rq)) 338 339 return; ··· 343 344 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 344 345 if (rq_prio(rq) != last_prio) { 345 346 last_prio = rq_prio(rq); 346 - p = lookup_priolist(engine, &rq->priotree, last_prio); 347 + p = lookup_priolist(engine, last_prio); 347 348 } 348 349 349 - list_add(&rq->priotree.link, &p->requests); 350 + GEM_BUG_ON(p->priority != rq_prio(rq)); 351 + list_add(&rq->sched.link, &p->requests); 350 352 } 351 353 } 352 354 ··· 356 356 { 357 357 struct intel_engine_cs *engine = 358 358 container_of(execlists, typeof(*engine), execlists); 359 + unsigned long flags; 359 360 360 - spin_lock_irq(&engine->timeline->lock); 361 + spin_lock_irqsave(&engine->timeline.lock, flags); 362 + 361 363 __unwind_incomplete_requests(engine); 362 - spin_unlock_irq(&engine->timeline->lock); 364 + 365 + spin_unlock_irqrestore(&engine->timeline.lock, flags); 363 366 } 364 367 365 368 static inline void ··· 400 397 } 401 398 402 399 static inline void 403 - execlists_context_schedule_out(struct i915_request *rq) 400 + execlists_context_schedule_out(struct i915_request *rq, unsigned long status) 404 401 { 405 402 intel_engine_context_out(rq->engine); 406 - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); 403 + execlists_context_status_change(rq, status); 404 + trace_i915_request_out(rq); 407 405 } 408 406 409 407 static void ··· 418 414 419 415 static u64 execlists_update_context(struct i915_request *rq) 420 416 { 421 - struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; 417 + struct intel_context *ce = to_intel_context(rq->ctx, rq->engine); 422 418 struct i915_hw_ppgtt *ppgtt = 423 419 rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; 424 420 u32 *reg_state = ce->lrc_reg_state; ··· 527 523 { 528 524 struct intel_engine_execlists *execlists = &engine->execlists; 529 525 struct intel_context *ce = 530 - &engine->i915->preempt_context->engine[engine->id]; 526 + to_intel_context(engine->i915->preempt_context, engine); 531 527 unsigned int n; 532 528 533 529 GEM_BUG_ON(execlists->preempt_complete_status != ··· 556 552 execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); 557 553 } 558 554 559 - static void execlists_dequeue(struct intel_engine_cs *engine) 555 + static bool __execlists_dequeue(struct intel_engine_cs *engine) 560 556 { 561 557 struct intel_engine_execlists * const execlists = &engine->execlists; 562 558 struct execlist_port *port = execlists->port; ··· 565 561 struct i915_request *last = port_request(port); 566 562 struct rb_node *rb; 567 563 bool submit = false; 564 + 565 + lockdep_assert_held(&engine->timeline.lock); 568 566 569 567 /* Hardware submission is through 2 ports. Conceptually each port 570 568 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is ··· 589 583 * and context switches) submission. 590 584 */ 591 585 592 - spin_lock_irq(&engine->timeline->lock); 593 586 rb = execlists->first; 594 587 GEM_BUG_ON(rb_first(&execlists->queue) != rb); 595 588 ··· 603 598 EXECLISTS_ACTIVE_USER)); 604 599 GEM_BUG_ON(!port_count(&port[0])); 605 600 if (port_count(&port[0]) > 1) 606 - goto unlock; 601 + return false; 607 602 608 603 /* 609 604 * If we write to ELSP a second time before the HW has had ··· 613 608 * the HW to indicate that it has had a chance to respond. 614 609 */ 615 610 if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) 616 - goto unlock; 611 + return false; 617 612 618 613 if (need_preempt(engine, last, execlists->queue_priority)) { 619 614 inject_preempt_context(engine); 620 - goto unlock; 615 + return false; 621 616 } 622 617 623 618 /* ··· 642 637 * priorities of the ports haven't been switch. 643 638 */ 644 639 if (port_count(&port[1])) 645 - goto unlock; 640 + return false; 646 641 647 642 /* 648 643 * WaIdleLiteRestore:bdw,skl ··· 659 654 struct i915_priolist *p = to_priolist(rb); 660 655 struct i915_request *rq, *rn; 661 656 662 - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { 657 + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { 663 658 /* 664 659 * Can we combine this request with the current port? 665 660 * It has to be the same context/ringbuffer and not ··· 679 674 */ 680 675 if (port == last_port) { 681 676 __list_del_many(&p->requests, 682 - &rq->priotree.link); 677 + &rq->sched.link); 683 678 goto done; 684 679 } 685 680 ··· 693 688 if (ctx_single_port_submission(last->ctx) || 694 689 ctx_single_port_submission(rq->ctx)) { 695 690 __list_del_many(&p->requests, 696 - &rq->priotree.link); 691 + &rq->sched.link); 697 692 goto done; 698 693 } 699 694 ··· 706 701 GEM_BUG_ON(port_isset(port)); 707 702 } 708 703 709 - INIT_LIST_HEAD(&rq->priotree.link); 704 + INIT_LIST_HEAD(&rq->sched.link); 710 705 __i915_request_submit(rq); 711 706 trace_i915_request_in(rq, port_index(port, execlists)); 712 707 last = rq; ··· 747 742 /* We must always keep the beast fed if we have work piled up */ 748 743 GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); 749 744 750 - unlock: 751 - spin_unlock_irq(&engine->timeline->lock); 752 - 753 - if (submit) { 745 + /* Re-evaluate the executing context setup after each preemptive kick */ 746 + if (last) 754 747 execlists_user_begin(execlists, execlists->port); 748 + 749 + return submit; 750 + } 751 + 752 + static void execlists_dequeue(struct intel_engine_cs *engine) 753 + { 754 + struct intel_engine_execlists * const execlists = &engine->execlists; 755 + unsigned long flags; 756 + bool submit; 757 + 758 + spin_lock_irqsave(&engine->timeline.lock, flags); 759 + submit = __execlists_dequeue(engine); 760 + spin_unlock_irqrestore(&engine->timeline.lock, flags); 761 + 762 + if (submit) 755 763 execlists_submit_ports(engine); 756 - } 757 764 758 765 GEM_BUG_ON(port_isset(execlists->port) && 759 766 !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); ··· 788 771 intel_engine_get_seqno(rq->engine)); 789 772 790 773 GEM_BUG_ON(!execlists->active); 791 - intel_engine_context_out(rq->engine); 792 - 793 - execlists_context_status_change(rq, 794 - i915_request_completed(rq) ? 795 - INTEL_CONTEXT_SCHEDULE_OUT : 796 - INTEL_CONTEXT_SCHEDULE_PREEMPTED); 774 + execlists_context_schedule_out(rq, 775 + i915_request_completed(rq) ? 776 + INTEL_CONTEXT_SCHEDULE_OUT : 777 + INTEL_CONTEXT_SCHEDULE_PREEMPTED); 797 778 798 779 i915_request_put(rq); 799 780 ··· 804 789 805 790 static void clear_gtiir(struct intel_engine_cs *engine) 806 791 { 807 - static const u8 gtiir[] = { 808 - [RCS] = 0, 809 - [BCS] = 0, 810 - [VCS] = 1, 811 - [VCS2] = 1, 812 - [VECS] = 3, 813 - }; 814 792 struct drm_i915_private *dev_priv = engine->i915; 815 793 int i; 816 - 817 - /* TODO: correctly reset irqs for gen11 */ 818 - if (WARN_ON_ONCE(INTEL_GEN(engine->i915) >= 11)) 819 - return; 820 - 821 - GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); 822 794 823 795 /* 824 796 * Clear any pending interrupt state. ··· 814 812 * double buffered, and so if we only reset it once there may 815 813 * still be an interrupt pending. 816 814 */ 817 - for (i = 0; i < 2; i++) { 818 - I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), 815 + if (INTEL_GEN(dev_priv) >= 11) { 816 + static const struct { 817 + u8 bank; 818 + u8 bit; 819 + } gen11_gtiir[] = { 820 + [RCS] = {0, GEN11_RCS0}, 821 + [BCS] = {0, GEN11_BCS}, 822 + [_VCS(0)] = {1, GEN11_VCS(0)}, 823 + [_VCS(1)] = {1, GEN11_VCS(1)}, 824 + [_VCS(2)] = {1, GEN11_VCS(2)}, 825 + [_VCS(3)] = {1, GEN11_VCS(3)}, 826 + [_VECS(0)] = {1, GEN11_VECS(0)}, 827 + [_VECS(1)] = {1, GEN11_VECS(1)}, 828 + }; 829 + unsigned long irqflags; 830 + 831 + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gen11_gtiir)); 832 + 833 + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); 834 + for (i = 0; i < 2; i++) { 835 + gen11_reset_one_iir(dev_priv, 836 + gen11_gtiir[engine->id].bank, 837 + gen11_gtiir[engine->id].bit); 838 + } 839 + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); 840 + } else { 841 + static const u8 gtiir[] = { 842 + [RCS] = 0, 843 + [BCS] = 0, 844 + [VCS] = 1, 845 + [VCS2] = 1, 846 + [VECS] = 3, 847 + }; 848 + 849 + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); 850 + 851 + for (i = 0; i < 2; i++) { 852 + I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), 853 + engine->irq_keep_mask); 854 + POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); 855 + } 856 + GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & 819 857 engine->irq_keep_mask); 820 - POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); 821 858 } 822 - GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & 823 - engine->irq_keep_mask); 824 859 } 825 860 826 861 static void reset_irq(struct intel_engine_cs *engine) ··· 907 868 execlists_cancel_port_requests(execlists); 908 869 reset_irq(engine); 909 870 910 - spin_lock(&engine->timeline->lock); 871 + spin_lock(&engine->timeline.lock); 911 872 912 873 /* Mark all executing requests as skipped. */ 913 - list_for_each_entry(rq, &engine->timeline->requests, link) { 874 + list_for_each_entry(rq, &engine->timeline.requests, link) { 914 875 GEM_BUG_ON(!rq->global_seqno); 915 876 if (!i915_request_completed(rq)) 916 877 dma_fence_set_error(&rq->fence, -EIO); ··· 921 882 while (rb) { 922 883 struct i915_priolist *p = to_priolist(rb); 923 884 924 - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { 925 - INIT_LIST_HEAD(&rq->priotree.link); 885 + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { 886 + INIT_LIST_HEAD(&rq->sched.link); 926 887 927 888 dma_fence_set_error(&rq->fence, -EIO); 928 889 __i915_request_submit(rq); ··· 942 903 execlists->first = NULL; 943 904 GEM_BUG_ON(port_isset(execlists->port)); 944 905 945 - spin_unlock(&engine->timeline->lock); 906 + spin_unlock(&engine->timeline.lock); 946 907 947 908 local_irq_restore(flags); 948 909 } ··· 1008 969 1009 970 head = execlists->csb_head; 1010 971 tail = READ_ONCE(buf[write_idx]); 972 + rmb(); /* Hopefully paired with a wmb() in HW */ 1011 973 } 1012 974 GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", 1013 975 engine->name, ··· 1119 1079 */ 1120 1080 GEM_BUG_ON(!i915_request_completed(rq)); 1121 1081 1122 - execlists_context_schedule_out(rq); 1123 - trace_i915_request_out(rq); 1082 + execlists_context_schedule_out(rq, 1083 + INTEL_CONTEXT_SCHEDULE_OUT); 1124 1084 i915_request_put(rq); 1125 1085 1126 1086 GEM_TRACE("%s completed ctx=%d\n", ··· 1156 1116 } 1157 1117 1158 1118 static void queue_request(struct intel_engine_cs *engine, 1159 - struct i915_priotree *pt, 1119 + struct i915_sched_node *node, 1160 1120 int prio) 1161 1121 { 1162 - list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests); 1122 + list_add_tail(&node->link, 1123 + &lookup_priolist(engine, prio)->requests); 1163 1124 } 1164 1125 1165 1126 static void __submit_queue(struct intel_engine_cs *engine, int prio) ··· 1181 1140 unsigned long flags; 1182 1141 1183 1142 /* Will be called from irq-context when using foreign fences. */ 1184 - spin_lock_irqsave(&engine->timeline->lock, flags); 1143 + spin_lock_irqsave(&engine->timeline.lock, flags); 1185 1144 1186 - queue_request(engine, &request->priotree, rq_prio(request)); 1145 + queue_request(engine, &request->sched, rq_prio(request)); 1187 1146 submit_queue(engine, rq_prio(request)); 1188 1147 1189 1148 GEM_BUG_ON(!engine->execlists.first); 1190 - GEM_BUG_ON(list_empty(&request->priotree.link)); 1149 + GEM_BUG_ON(list_empty(&request->sched.link)); 1191 1150 1192 - spin_unlock_irqrestore(&engine->timeline->lock, flags); 1151 + spin_unlock_irqrestore(&engine->timeline.lock, flags); 1193 1152 } 1194 1153 1195 - static struct i915_request *pt_to_request(struct i915_priotree *pt) 1154 + static struct i915_request *sched_to_request(struct i915_sched_node *node) 1196 1155 { 1197 - return container_of(pt, struct i915_request, priotree); 1156 + return container_of(node, struct i915_request, sched); 1198 1157 } 1199 1158 1200 1159 static struct intel_engine_cs * 1201 - pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) 1160 + sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) 1202 1161 { 1203 - struct intel_engine_cs *engine = pt_to_request(pt)->engine; 1162 + struct intel_engine_cs *engine = sched_to_request(node)->engine; 1204 1163 1205 1164 GEM_BUG_ON(!locked); 1206 1165 1207 1166 if (engine != locked) { 1208 - spin_unlock(&locked->timeline->lock); 1209 - spin_lock(&engine->timeline->lock); 1167 + spin_unlock(&locked->timeline.lock); 1168 + spin_lock(&engine->timeline.lock); 1210 1169 } 1211 1170 1212 1171 return engine; 1213 1172 } 1214 1173 1215 - static void execlists_schedule(struct i915_request *request, int prio) 1174 + static void execlists_schedule(struct i915_request *request, 1175 + const struct i915_sched_attr *attr) 1216 1176 { 1217 - struct intel_engine_cs *engine; 1177 + struct i915_priolist *uninitialized_var(pl); 1178 + struct intel_engine_cs *engine, *last; 1218 1179 struct i915_dependency *dep, *p; 1219 1180 struct i915_dependency stack; 1181 + const int prio = attr->priority; 1220 1182 LIST_HEAD(dfs); 1221 1183 1222 1184 GEM_BUG_ON(prio == I915_PRIORITY_INVALID); ··· 1227 1183 if (i915_request_completed(request)) 1228 1184 return; 1229 1185 1230 - if (prio <= READ_ONCE(request->priotree.priority)) 1186 + if (prio <= READ_ONCE(request->sched.attr.priority)) 1231 1187 return; 1232 1188 1233 1189 /* Need BKL in order to use the temporary link inside i915_dependency */ 1234 1190 lockdep_assert_held(&request->i915->drm.struct_mutex); 1235 1191 1236 - stack.signaler = &request->priotree; 1192 + stack.signaler = &request->sched; 1237 1193 list_add(&stack.dfs_link, &dfs); 1238 1194 1239 1195 /* 1240 1196 * Recursively bump all dependent priorities to match the new request. 1241 1197 * 1242 1198 * A naive approach would be to use recursion: 1243 - * static void update_priorities(struct i915_priotree *pt, prio) { 1244 - * list_for_each_entry(dep, &pt->signalers_list, signal_link) 1199 + * static void update_priorities(struct i915_sched_node *node, prio) { 1200 + * list_for_each_entry(dep, &node->signalers_list, signal_link) 1245 1201 * update_priorities(dep->signal, prio) 1246 - * queue_request(pt); 1202 + * queue_request(node); 1247 1203 * } 1248 1204 * but that may have unlimited recursion depth and so runs a very 1249 1205 * real risk of overunning the kernel stack. Instead, we build ··· 1255 1211 * last element in the list is the request we must execute first. 1256 1212 */ 1257 1213 list_for_each_entry(dep, &dfs, dfs_link) { 1258 - struct i915_priotree *pt = dep->signaler; 1214 + struct i915_sched_node *node = dep->signaler; 1259 1215 1260 1216 /* 1261 1217 * Within an engine, there can be no cycle, but we may ··· 1263 1219 * (redundant dependencies are not eliminated) and across 1264 1220 * engines. 1265 1221 */ 1266 - list_for_each_entry(p, &pt->signalers_list, signal_link) { 1222 + list_for_each_entry(p, &node->signalers_list, signal_link) { 1267 1223 GEM_BUG_ON(p == dep); /* no cycles! */ 1268 1224 1269 - if (i915_priotree_signaled(p->signaler)) 1225 + if (i915_sched_node_signaled(p->signaler)) 1270 1226 continue; 1271 1227 1272 - GEM_BUG_ON(p->signaler->priority < pt->priority); 1273 - if (prio > READ_ONCE(p->signaler->priority)) 1228 + GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); 1229 + if (prio > READ_ONCE(p->signaler->attr.priority)) 1274 1230 list_move_tail(&p->dfs_link, &dfs); 1275 1231 } 1276 1232 } ··· 1281 1237 * execlists_submit_request()), we can set our own priority and skip 1282 1238 * acquiring the engine locks. 1283 1239 */ 1284 - if (request->priotree.priority == I915_PRIORITY_INVALID) { 1285 - GEM_BUG_ON(!list_empty(&request->priotree.link)); 1286 - request->priotree.priority = prio; 1240 + if (request->sched.attr.priority == I915_PRIORITY_INVALID) { 1241 + GEM_BUG_ON(!list_empty(&request->sched.link)); 1242 + request->sched.attr = *attr; 1287 1243 if (stack.dfs_link.next == stack.dfs_link.prev) 1288 1244 return; 1289 1245 __list_del_entry(&stack.dfs_link); 1290 1246 } 1291 1247 1248 + last = NULL; 1292 1249 engine = request->engine; 1293 - spin_lock_irq(&engine->timeline->lock); 1250 + spin_lock_irq(&engine->timeline.lock); 1294 1251 1295 1252 /* Fifo and depth-first replacement ensure our deps execute before us */ 1296 1253 list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { 1297 - struct i915_priotree *pt = dep->signaler; 1254 + struct i915_sched_node *node = dep->signaler; 1298 1255 1299 1256 INIT_LIST_HEAD(&dep->dfs_link); 1300 1257 1301 - engine = pt_lock_engine(pt, engine); 1258 + engine = sched_lock_engine(node, engine); 1302 1259 1303 - if (prio <= pt->priority) 1260 + if (prio <= node->attr.priority) 1304 1261 continue; 1305 1262 1306 - pt->priority = prio; 1307 - if (!list_empty(&pt->link)) { 1308 - __list_del_entry(&pt->link); 1309 - queue_request(engine, pt, prio); 1263 + node->attr.priority = prio; 1264 + if (!list_empty(&node->link)) { 1265 + if (last != engine) { 1266 + pl = lookup_priolist(engine, prio); 1267 + last = engine; 1268 + } 1269 + GEM_BUG_ON(pl->priority != prio); 1270 + list_move_tail(&node->link, &pl->requests); 1310 1271 } 1311 1272 1312 1273 if (prio > engine->execlists.queue_priority && 1313 - i915_sw_fence_done(&pt_to_request(pt)->submit)) 1274 + i915_sw_fence_done(&sched_to_request(node)->submit)) 1314 1275 __submit_queue(engine, prio); 1315 1276 } 1316 1277 1317 - spin_unlock_irq(&engine->timeline->lock); 1278 + spin_unlock_irq(&engine->timeline.lock); 1318 1279 } 1319 1280 1320 1281 static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) ··· 1349 1300 execlists_context_pin(struct intel_engine_cs *engine, 1350 1301 struct i915_gem_context *ctx) 1351 1302 { 1352 - struct intel_context *ce = &ctx->engine[engine->id]; 1303 + struct intel_context *ce = to_intel_context(ctx, engine); 1353 1304 void *vaddr; 1354 1305 int ret; 1355 1306 ··· 1402 1353 static void execlists_context_unpin(struct intel_engine_cs *engine, 1403 1354 struct i915_gem_context *ctx) 1404 1355 { 1405 - struct intel_context *ce = &ctx->engine[engine->id]; 1356 + struct intel_context *ce = to_intel_context(ctx, engine); 1406 1357 1407 1358 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 1408 1359 GEM_BUG_ON(ce->pin_count == 0); ··· 1421 1372 1422 1373 static int execlists_request_alloc(struct i915_request *request) 1423 1374 { 1424 - struct intel_engine_cs *engine = request->engine; 1425 - struct intel_context *ce = &request->ctx->engine[engine->id]; 1375 + struct intel_context *ce = 1376 + to_intel_context(request->ctx, request->engine); 1426 1377 int ret; 1427 1378 1428 1379 GEM_BUG_ON(!ce->pin_count); ··· 1682 1633 return -EINVAL; 1683 1634 1684 1635 switch (INTEL_GEN(engine->i915)) { 1636 + case 11: 1637 + return 0; 1685 1638 case 10: 1686 1639 wa_bb_fn[0] = gen10_init_indirectctx_bb; 1687 1640 wa_bb_fn[1] = NULL; ··· 1795 1744 if (ret) 1796 1745 return ret; 1797 1746 1798 - ret = intel_whitelist_workarounds_apply(engine); 1799 - if (ret) 1800 - return ret; 1747 + intel_whitelist_workarounds_apply(engine); 1801 1748 1802 1749 /* We need to disable the AsyncFlip performance optimisations in order 1803 1750 * to use MI_WAIT_FOR_EVENT within the CS. It should already be ··· 1818 1769 if (ret) 1819 1770 return ret; 1820 1771 1821 - ret = intel_whitelist_workarounds_apply(engine); 1822 - if (ret) 1823 - return ret; 1772 + intel_whitelist_workarounds_apply(engine); 1824 1773 1825 1774 return 0; 1826 1775 } ··· 1827 1780 struct i915_request *request) 1828 1781 { 1829 1782 struct intel_engine_execlists * const execlists = &engine->execlists; 1830 - struct intel_context *ce; 1831 1783 unsigned long flags; 1784 + u32 *regs; 1832 1785 1833 1786 GEM_TRACE("%s request global=%x, current=%d\n", 1834 1787 engine->name, request ? request->global_seqno : 0, ··· 1850 1803 reset_irq(engine); 1851 1804 1852 1805 /* Push back any incomplete requests for replay after the reset. */ 1853 - spin_lock(&engine->timeline->lock); 1806 + spin_lock(&engine->timeline.lock); 1854 1807 __unwind_incomplete_requests(engine); 1855 - spin_unlock(&engine->timeline->lock); 1808 + spin_unlock(&engine->timeline.lock); 1856 1809 1857 1810 local_irq_restore(flags); 1858 1811 ··· 1878 1831 * future request will be after userspace has had the opportunity 1879 1832 * to recreate its own state. 1880 1833 */ 1881 - ce = &request->ctx->engine[engine->id]; 1882 - execlists_init_reg_state(ce->lrc_reg_state, 1883 - request->ctx, engine, ce->ring); 1834 + regs = to_intel_context(request->ctx, engine)->lrc_reg_state; 1835 + if (engine->default_state) { 1836 + void *defaults; 1837 + 1838 + defaults = i915_gem_object_pin_map(engine->default_state, 1839 + I915_MAP_WB); 1840 + if (!IS_ERR(defaults)) { 1841 + memcpy(regs, /* skip restoring the vanilla PPHWSP */ 1842 + defaults + LRC_STATE_PN * PAGE_SIZE, 1843 + engine->context_size - PAGE_SIZE); 1844 + i915_gem_object_unpin_map(engine->default_state); 1845 + } 1846 + } 1847 + execlists_init_reg_state(regs, request->ctx, engine, request->ring); 1884 1848 1885 1849 /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ 1886 - ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = 1887 - i915_ggtt_offset(ce->ring->vma); 1888 - ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix; 1850 + regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma); 1851 + regs[CTX_RING_HEAD + 1] = request->postfix; 1889 1852 1890 1853 request->ring->head = request->postfix; 1891 1854 intel_ring_update_space(request->ring); ··· 1956 1899 rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine); 1957 1900 } 1958 1901 1959 - cs = intel_ring_begin(rq, 4); 1902 + cs = intel_ring_begin(rq, 6); 1960 1903 if (IS_ERR(cs)) 1961 1904 return PTR_ERR(cs); 1962 1905 ··· 1985 1928 (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); 1986 1929 *cs++ = lower_32_bits(offset); 1987 1930 *cs++ = upper_32_bits(offset); 1931 + 1932 + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1933 + *cs++ = MI_NOOP; 1988 1934 intel_ring_advance(rq, cs); 1989 1935 1990 1936 return 0; ··· 2130 2070 cs = gen8_emit_ggtt_write(cs, request->global_seqno, 2131 2071 intel_hws_seqno_address(request->engine)); 2132 2072 *cs++ = MI_USER_INTERRUPT; 2133 - *cs++ = MI_NOOP; 2073 + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2134 2074 request->tail = intel_ring_offset(request, cs); 2135 2075 assert_ring_tail_valid(request->ring, request->tail); 2136 2076 ··· 2146 2086 cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno, 2147 2087 intel_hws_seqno_address(request->engine)); 2148 2088 *cs++ = MI_USER_INTERRUPT; 2149 - *cs++ = MI_NOOP; 2089 + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2150 2090 request->tail = intel_ring_offset(request, cs); 2151 2091 assert_ring_tail_valid(request->ring, request->tail); 2152 2092 ··· 2332 2272 } 2333 2273 2334 2274 engine->execlists.preempt_complete_status = ~0u; 2335 - if (engine->i915->preempt_context) 2275 + if (engine->i915->preempt_context) { 2276 + struct intel_context *ce = 2277 + to_intel_context(engine->i915->preempt_context, engine); 2278 + 2336 2279 engine->execlists.preempt_complete_status = 2337 - upper_32_bits(engine->i915->preempt_context->engine[engine->id].lrc_desc); 2280 + upper_32_bits(ce->lrc_desc); 2281 + } 2338 2282 2339 2283 return 0; 2340 2284 ··· 2592 2528 2593 2529 defaults = i915_gem_object_pin_map(engine->default_state, 2594 2530 I915_MAP_WB); 2595 - if (IS_ERR(defaults)) 2596 - return PTR_ERR(defaults); 2531 + if (IS_ERR(defaults)) { 2532 + ret = PTR_ERR(defaults); 2533 + goto err_unpin_ctx; 2534 + } 2597 2535 2598 2536 memcpy(vaddr + start, defaults + start, engine->context_size); 2599 2537 i915_gem_object_unpin_map(engine->default_state); ··· 2613 2547 _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | 2614 2548 CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); 2615 2549 2550 + err_unpin_ctx: 2616 2551 i915_gem_object_unpin_map(ctx_obj); 2617 - 2618 - return 0; 2552 + return ret; 2619 2553 } 2620 2554 2621 2555 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, 2622 2556 struct intel_engine_cs *engine) 2623 2557 { 2624 2558 struct drm_i915_gem_object *ctx_obj; 2625 - struct intel_context *ce = &ctx->engine[engine->id]; 2559 + struct intel_context *ce = to_intel_context(ctx, engine); 2626 2560 struct i915_vma *vma; 2627 2561 uint32_t context_size; 2628 2562 struct intel_ring *ring; 2563 + struct i915_timeline *timeline; 2629 2564 int ret; 2630 2565 2631 2566 if (ce->state) ··· 2642 2575 2643 2576 ctx_obj = i915_gem_object_create(ctx->i915, context_size); 2644 2577 if (IS_ERR(ctx_obj)) { 2645 - DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n"); 2646 - return PTR_ERR(ctx_obj); 2578 + ret = PTR_ERR(ctx_obj); 2579 + goto error_deref_obj; 2647 2580 } 2648 2581 2649 2582 vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL); ··· 2652 2585 goto error_deref_obj; 2653 2586 } 2654 2587 2655 - ring = intel_engine_create_ring(engine, ctx->ring_size); 2588 + timeline = i915_timeline_create(ctx->i915, ctx->name); 2589 + if (IS_ERR(timeline)) { 2590 + ret = PTR_ERR(timeline); 2591 + goto error_deref_obj; 2592 + } 2593 + 2594 + ring = intel_engine_create_ring(engine, timeline, ctx->ring_size); 2595 + i915_timeline_put(timeline); 2656 2596 if (IS_ERR(ring)) { 2657 2597 ret = PTR_ERR(ring); 2658 2598 goto error_deref_obj; ··· 2701 2627 */ 2702 2628 list_for_each_entry(ctx, &dev_priv->contexts.list, link) { 2703 2629 for_each_engine(engine, dev_priv, id) { 2704 - struct intel_context *ce = &ctx->engine[engine->id]; 2630 + struct intel_context *ce = 2631 + to_intel_context(ctx, engine); 2705 2632 u32 *reg; 2706 2633 2707 2634 if (!ce->state)

+1 -1

drivers/gpu/drm/i915/intel_lrc.h

··· 108 108 intel_lr_context_descriptor(struct i915_gem_context *ctx, 109 109 struct intel_engine_cs *engine) 110 110 { 111 - return ctx->engine[engine->id].lrc_desc; 111 + return to_intel_context(ctx, engine)->lrc_desc; 112 112 } 113 113 114 114 #endif /* _INTEL_LRC_H_ */

+2 -1

drivers/gpu/drm/i915/intel_lvds.c

··· 326 326 327 327 I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) | PANEL_POWER_ON); 328 328 POSTING_READ(lvds_encoder->reg); 329 - if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 1000)) 329 + 330 + if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 5000)) 330 331 DRM_ERROR("timed out waiting for panel to power on\n"); 331 332 332 333 intel_panel_enable_backlight(pipe_config, conn_state);

+4 -1

drivers/gpu/drm/i915/intel_mocs.c

··· 178 178 { 179 179 bool result = false; 180 180 181 - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { 181 + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) || 182 + IS_ICELAKE(dev_priv)) { 182 183 table->size = ARRAY_SIZE(skylake_mocs_table); 183 184 table->table = skylake_mocs_table; 184 185 result = true; ··· 218 217 return GEN9_VEBOX_MOCS(index); 219 218 case VCS2: 220 219 return GEN9_MFX1_MOCS(index); 220 + case VCS3: 221 + return GEN11_MFX2_MOCS(index); 221 222 default: 222 223 MISSING_CASE(engine_id); 223 224 return INVALID_MMIO_REG;

+10 -12

drivers/gpu/drm/i915/intel_pipe_crc.c

··· 766 766 { 767 767 int i; 768 768 769 - for (i = 0; i < ARRAY_SIZE(pipe_crc_objects); i++) 770 - if (!strcmp(buf, pipe_crc_objects[i])) { 771 - *o = i; 772 - return 0; 773 - } 769 + i = match_string(pipe_crc_objects, ARRAY_SIZE(pipe_crc_objects), buf); 770 + if (i < 0) 771 + return i; 774 772 775 - return -EINVAL; 773 + *o = i; 774 + return 0; 776 775 } 777 776 778 777 static int display_crc_ctl_parse_pipe(struct drm_i915_private *dev_priv, ··· 797 798 return 0; 798 799 } 799 800 800 - for (i = 0; i < ARRAY_SIZE(pipe_crc_sources); i++) 801 - if (!strcmp(buf, pipe_crc_sources[i])) { 802 - *s = i; 803 - return 0; 804 - } 801 + i = match_string(pipe_crc_sources, ARRAY_SIZE(pipe_crc_sources), buf); 802 + if (i < 0) 803 + return i; 805 804 806 - return -EINVAL; 805 + *s = i; 806 + return 0; 807 807 } 808 808 809 809 static int display_crc_ctl_parse(struct drm_i915_private *dev_priv,

+95 -19

drivers/gpu/drm/i915/intel_pm.c

··· 3567 3567 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); 3568 3568 } 3569 3569 3570 + static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv) 3571 + { 3572 + u8 enabled_slices; 3573 + 3574 + /* Slice 1 will always be enabled */ 3575 + enabled_slices = 1; 3576 + 3577 + /* Gen prior to GEN11 have only one DBuf slice */ 3578 + if (INTEL_GEN(dev_priv) < 11) 3579 + return enabled_slices; 3580 + 3581 + if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE) 3582 + enabled_slices++; 3583 + 3584 + return enabled_slices; 3585 + } 3586 + 3570 3587 /* 3571 3588 * FIXME: We still don't have the proper code detect if we need to apply the WA, 3572 3589 * so assume we'll always need it in order to avoid underruns. ··· 3771 3754 return true; 3772 3755 } 3773 3756 3757 + static unsigned int intel_get_ddb_size(struct drm_i915_private *dev_priv, 3758 + const struct intel_crtc_state *cstate, 3759 + const unsigned int total_data_rate, 3760 + const int num_active, 3761 + struct skl_ddb_allocation *ddb) 3762 + { 3763 + const struct drm_display_mode *adjusted_mode; 3764 + u64 total_data_bw; 3765 + u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size; 3766 + 3767 + WARN_ON(ddb_size == 0); 3768 + 3769 + if (INTEL_GEN(dev_priv) < 11) 3770 + return ddb_size - 4; /* 4 blocks for bypass path allocation */ 3771 + 3772 + adjusted_mode = &cstate->base.adjusted_mode; 3773 + total_data_bw = (u64)total_data_rate * drm_mode_vrefresh(adjusted_mode); 3774 + 3775 + /* 3776 + * 12GB/s is maximum BW supported by single DBuf slice. 3777 + */ 3778 + if (total_data_bw >= GBps(12) || num_active > 1) { 3779 + ddb->enabled_slices = 2; 3780 + } else { 3781 + ddb->enabled_slices = 1; 3782 + ddb_size /= 2; 3783 + } 3784 + 3785 + return ddb_size; 3786 + } 3787 + 3774 3788 static void 3775 3789 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, 3776 3790 const struct intel_crtc_state *cstate, 3791 + const unsigned int total_data_rate, 3792 + struct skl_ddb_allocation *ddb, 3777 3793 struct skl_ddb_entry *alloc, /* out */ 3778 3794 int *num_active /* out */) 3779 3795 { ··· 3829 3779 else 3830 3780 *num_active = hweight32(dev_priv->active_crtcs); 3831 3781 3832 - ddb_size = INTEL_INFO(dev_priv)->ddb_size; 3833 - WARN_ON(ddb_size == 0); 3834 - 3835 - if (INTEL_GEN(dev_priv) < 11) 3836 - ddb_size -= 4; /* 4 blocks for bypass path allocation */ 3782 + ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate, 3783 + *num_active, ddb); 3837 3784 3838 3785 /* 3839 3786 * If the state doesn't change the active CRTC's, then there's ··· 3864 3817 return 8; 3865 3818 } 3866 3819 3867 - static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg) 3820 + static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv, 3821 + struct skl_ddb_entry *entry, u32 reg) 3868 3822 { 3869 - entry->start = reg & 0x3ff; 3870 - entry->end = (reg >> 16) & 0x3ff; 3823 + u16 mask; 3824 + 3825 + if (INTEL_GEN(dev_priv) >= 11) 3826 + mask = ICL_DDB_ENTRY_MASK; 3827 + else 3828 + mask = SKL_DDB_ENTRY_MASK; 3829 + entry->start = reg & mask; 3830 + entry->end = (reg >> DDB_ENTRY_END_SHIFT) & mask; 3831 + 3871 3832 if (entry->end) 3872 3833 entry->end += 1; 3873 3834 } ··· 3892 3837 /* Cursor doesn't support NV12/planar, so no extra calculation needed */ 3893 3838 if (plane_id == PLANE_CURSOR) { 3894 3839 val = I915_READ(CUR_BUF_CFG(pipe)); 3895 - skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val); 3840 + skl_ddb_entry_init_from_hw(dev_priv, 3841 + &ddb->plane[pipe][plane_id], val); 3896 3842 return; 3897 3843 } 3898 3844 ··· 3912 3856 val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id)); 3913 3857 3914 3858 if (fourcc == DRM_FORMAT_NV12) { 3915 - skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val2); 3916 - skl_ddb_entry_init_from_hw(&ddb->uv_plane[pipe][plane_id], val); 3859 + skl_ddb_entry_init_from_hw(dev_priv, 3860 + &ddb->plane[pipe][plane_id], val2); 3861 + skl_ddb_entry_init_from_hw(dev_priv, 3862 + &ddb->uv_plane[pipe][plane_id], val); 3917 3863 } else { 3918 - skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val); 3864 + skl_ddb_entry_init_from_hw(dev_priv, 3865 + &ddb->plane[pipe][plane_id], val); 3919 3866 } 3920 3867 } 3921 3868 ··· 3928 3869 struct intel_crtc *crtc; 3929 3870 3930 3871 memset(ddb, 0, sizeof(*ddb)); 3872 + 3873 + ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv); 3931 3874 3932 3875 for_each_intel_crtc(&dev_priv->drm, crtc) { 3933 3876 enum intel_display_power_domain power_domain; ··· 4303 4242 return 0; 4304 4243 } 4305 4244 4306 - skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active); 4245 + total_data_rate = skl_get_total_relative_data_rate(cstate, 4246 + plane_data_rate, 4247 + uv_plane_data_rate); 4248 + skl_ddb_get_pipe_allocation_limits(dev, cstate, total_data_rate, ddb, 4249 + alloc, &num_active); 4307 4250 alloc_size = skl_ddb_entry_size(alloc); 4308 4251 if (alloc_size == 0) 4309 4252 return 0; ··· 4342 4277 * 4343 4278 * FIXME: we may not allocate every single block here. 4344 4279 */ 4345 - total_data_rate = skl_get_total_relative_data_rate(cstate, 4346 - plane_data_rate, 4347 - uv_plane_data_rate); 4348 4280 if (total_data_rate == 0) 4349 4281 return 0; 4350 4282 ··· 5150 5088 sizeof(dst->ddb.uv_plane[pipe])); 5151 5089 memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe], 5152 5090 sizeof(dst->ddb.plane[pipe])); 5091 + dst->ddb.enabled_slices = src->ddb.enabled_slices; 5153 5092 } 5154 5093 5155 5094 static void ··· 5535 5472 /* Fully recompute DDB on first atomic commit */ 5536 5473 dev_priv->wm.distrust_bios_wm = true; 5537 5474 } else { 5538 - /* Easy/common case; just sanitize DDB now if everything off */ 5539 - memset(ddb, 0, sizeof(*ddb)); 5475 + /* 5476 + * Easy/common case; just sanitize DDB now if everything off 5477 + * Keep dbuf slice info intact 5478 + */ 5479 + memset(ddb->plane, 0, sizeof(ddb->plane)); 5480 + memset(ddb->uv_plane, 0, sizeof(ddb->uv_plane)); 5540 5481 } 5541 5482 } 5542 5483 ··· 8664 8597 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 8665 8598 } 8666 8599 8600 + static void icl_init_clock_gating(struct drm_i915_private *dev_priv) 8601 + { 8602 + /* This is not an Wa. Enable to reduce Sampler power */ 8603 + I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN, 8604 + I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE); 8605 + } 8606 + 8667 8607 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) 8668 8608 { 8669 8609 if (!HAS_PCH_CNP(dev_priv)) ··· 9197 9123 */ 9198 9124 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) 9199 9125 { 9200 - if (IS_CANNONLAKE(dev_priv)) 9126 + if (IS_ICELAKE(dev_priv)) 9127 + dev_priv->display.init_clock_gating = icl_init_clock_gating; 9128 + else if (IS_CANNONLAKE(dev_priv)) 9201 9129 dev_priv->display.init_clock_gating = cnl_init_clock_gating; 9202 9130 else if (IS_COFFEELAKE(dev_priv)) 9203 9131 dev_priv->display.init_clock_gating = cfl_init_clock_gating;

+117 -6

drivers/gpu/drm/i915/intel_psr.c

··· 93 93 intel_display_power_put(dev_priv, psr_aux_domain(intel_dp)); 94 94 } 95 95 96 + void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug) 97 + { 98 + u32 debug_mask, mask; 99 + 100 + /* No PSR interrupts on VLV/CHV */ 101 + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) 102 + return; 103 + 104 + mask = EDP_PSR_ERROR(TRANSCODER_EDP); 105 + debug_mask = EDP_PSR_POST_EXIT(TRANSCODER_EDP) | 106 + EDP_PSR_PRE_ENTRY(TRANSCODER_EDP); 107 + 108 + if (INTEL_GEN(dev_priv) >= 8) { 109 + mask |= EDP_PSR_ERROR(TRANSCODER_A) | 110 + EDP_PSR_ERROR(TRANSCODER_B) | 111 + EDP_PSR_ERROR(TRANSCODER_C); 112 + 113 + debug_mask |= EDP_PSR_POST_EXIT(TRANSCODER_A) | 114 + EDP_PSR_PRE_ENTRY(TRANSCODER_A) | 115 + EDP_PSR_POST_EXIT(TRANSCODER_B) | 116 + EDP_PSR_PRE_ENTRY(TRANSCODER_B) | 117 + EDP_PSR_POST_EXIT(TRANSCODER_C) | 118 + EDP_PSR_PRE_ENTRY(TRANSCODER_C); 119 + } 120 + 121 + if (debug) 122 + mask |= debug_mask; 123 + 124 + WRITE_ONCE(dev_priv->psr.debug, debug); 125 + I915_WRITE(EDP_PSR_IMR, ~mask); 126 + } 127 + 128 + static void psr_event_print(u32 val, bool psr2_enabled) 129 + { 130 + DRM_DEBUG_KMS("PSR exit events: 0x%x\n", val); 131 + if (val & PSR_EVENT_PSR2_WD_TIMER_EXPIRE) 132 + DRM_DEBUG_KMS("\tPSR2 watchdog timer expired\n"); 133 + if ((val & PSR_EVENT_PSR2_DISABLED) && psr2_enabled) 134 + DRM_DEBUG_KMS("\tPSR2 disabled\n"); 135 + if (val & PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN) 136 + DRM_DEBUG_KMS("\tSU dirty FIFO underrun\n"); 137 + if (val & PSR_EVENT_SU_CRC_FIFO_UNDERRUN) 138 + DRM_DEBUG_KMS("\tSU CRC FIFO underrun\n"); 139 + if (val & PSR_EVENT_GRAPHICS_RESET) 140 + DRM_DEBUG_KMS("\tGraphics reset\n"); 141 + if (val & PSR_EVENT_PCH_INTERRUPT) 142 + DRM_DEBUG_KMS("\tPCH interrupt\n"); 143 + if (val & PSR_EVENT_MEMORY_UP) 144 + DRM_DEBUG_KMS("\tMemory up\n"); 145 + if (val & PSR_EVENT_FRONT_BUFFER_MODIFY) 146 + DRM_DEBUG_KMS("\tFront buffer modification\n"); 147 + if (val & PSR_EVENT_WD_TIMER_EXPIRE) 148 + DRM_DEBUG_KMS("\tPSR watchdog timer expired\n"); 149 + if (val & PSR_EVENT_PIPE_REGISTERS_UPDATE) 150 + DRM_DEBUG_KMS("\tPIPE registers updated\n"); 151 + if (val & PSR_EVENT_REGISTER_UPDATE) 152 + DRM_DEBUG_KMS("\tRegister updated\n"); 153 + if (val & PSR_EVENT_HDCP_ENABLE) 154 + DRM_DEBUG_KMS("\tHDCP enabled\n"); 155 + if (val & PSR_EVENT_KVMR_SESSION_ENABLE) 156 + DRM_DEBUG_KMS("\tKVMR session enabled\n"); 157 + if (val & PSR_EVENT_VBI_ENABLE) 158 + DRM_DEBUG_KMS("\tVBI enabled\n"); 159 + if (val & PSR_EVENT_LPSP_MODE_EXIT) 160 + DRM_DEBUG_KMS("\tLPSP mode exited\n"); 161 + if ((val & PSR_EVENT_PSR_DISABLE) && !psr2_enabled) 162 + DRM_DEBUG_KMS("\tPSR disabled\n"); 163 + } 164 + 165 + void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir) 166 + { 167 + u32 transcoders = BIT(TRANSCODER_EDP); 168 + enum transcoder cpu_transcoder; 169 + ktime_t time_ns = ktime_get(); 170 + 171 + if (INTEL_GEN(dev_priv) >= 8) 172 + transcoders |= BIT(TRANSCODER_A) | 173 + BIT(TRANSCODER_B) | 174 + BIT(TRANSCODER_C); 175 + 176 + for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) { 177 + /* FIXME: Exit PSR and link train manually when this happens. */ 178 + if (psr_iir & EDP_PSR_ERROR(cpu_transcoder)) 179 + DRM_DEBUG_KMS("[transcoder %s] PSR aux error\n", 180 + transcoder_name(cpu_transcoder)); 181 + 182 + if (psr_iir & EDP_PSR_PRE_ENTRY(cpu_transcoder)) { 183 + dev_priv->psr.last_entry_attempt = time_ns; 184 + DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n", 185 + transcoder_name(cpu_transcoder)); 186 + } 187 + 188 + if (psr_iir & EDP_PSR_POST_EXIT(cpu_transcoder)) { 189 + dev_priv->psr.last_exit = time_ns; 190 + DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n", 191 + transcoder_name(cpu_transcoder)); 192 + 193 + if (INTEL_GEN(dev_priv) >= 9) { 194 + u32 val = I915_READ(PSR_EVENT(cpu_transcoder)); 195 + bool psr2_enabled = dev_priv->psr.psr2_enabled; 196 + 197 + I915_WRITE(PSR_EVENT(cpu_transcoder), val); 198 + psr_event_print(val, psr2_enabled); 199 + } 200 + } 201 + } 202 + } 203 + 96 204 static bool intel_dp_get_y_coord_required(struct intel_dp *intel_dp) 97 205 { 98 206 uint8_t psr_caps = 0; ··· 508 400 * mesh at all with our frontbuffer tracking. And the hw alone isn't 509 401 * good enough. */ 510 402 val |= EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE; 511 - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { 512 - val |= EDP_Y_COORDINATE_VALID | EDP_Y_COORDINATE_ENABLE; 513 - } 403 + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) 404 + val |= EDP_Y_COORDINATE_ENABLE; 514 405 515 406 val |= EDP_PSR2_FRAME_BEFORE_SU(dev_priv->psr.sink_sync_latency + 1); 516 407 ··· 711 604 I915_WRITE(EDP_PSR_DEBUG, 712 605 EDP_PSR_DEBUG_MASK_MEMUP | 713 606 EDP_PSR_DEBUG_MASK_HPD | 714 - EDP_PSR_DEBUG_MASK_LPSP); 607 + EDP_PSR_DEBUG_MASK_LPSP | 608 + EDP_PSR_DEBUG_MASK_DISP_REG_WRITE); 715 609 } 716 610 } 717 611 ··· 1173 1065 if (!dev_priv->psr.sink_support) 1174 1066 return; 1175 1067 1176 - /* Per platform default: all disabled. */ 1177 - if (i915_modparams.enable_psr == -1) 1068 + if (i915_modparams.enable_psr == -1) { 1069 + i915_modparams.enable_psr = dev_priv->vbt.psr.enable; 1070 + 1071 + /* Per platform default: all disabled. */ 1178 1072 i915_modparams.enable_psr = 0; 1073 + } 1179 1074 1180 1075 /* Set link_standby x link_off defaults */ 1181 1076 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))

+38 -24

drivers/gpu/drm/i915/intel_ringbuffer.c

··· 558 558 */ 559 559 if (request) { 560 560 struct drm_i915_private *dev_priv = request->i915; 561 - struct intel_context *ce = &request->ctx->engine[engine->id]; 561 + struct intel_context *ce = to_intel_context(request->ctx, 562 + engine); 562 563 struct i915_hw_ppgtt *ppgtt; 563 564 564 565 if (ce->state) { ··· 619 618 if (ret) 620 619 return ret; 621 620 622 - ret = intel_whitelist_workarounds_apply(engine); 623 - if (ret) 624 - return ret; 621 + intel_whitelist_workarounds_apply(engine); 625 622 626 623 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ 627 624 if (IS_GEN(dev_priv, 4, 6)) ··· 697 698 struct i915_request *request; 698 699 unsigned long flags; 699 700 700 - spin_lock_irqsave(&engine->timeline->lock, flags); 701 + spin_lock_irqsave(&engine->timeline.lock, flags); 701 702 702 703 /* Mark all submitted requests as skipped. */ 703 - list_for_each_entry(request, &engine->timeline->requests, link) { 704 + list_for_each_entry(request, &engine->timeline.requests, link) { 704 705 GEM_BUG_ON(!request->global_seqno); 705 706 if (!i915_request_completed(request)) 706 707 dma_fence_set_error(&request->fence, -EIO); 707 708 } 708 709 /* Remaining _unready_ requests will be nop'ed when submitted */ 709 710 710 - spin_unlock_irqrestore(&engine->timeline->lock, flags); 711 + spin_unlock_irqrestore(&engine->timeline.lock, flags); 711 712 } 712 713 713 714 static void i9xx_submit_request(struct i915_request *request) ··· 1066 1067 1067 1068 void intel_ring_reset(struct intel_ring *ring, u32 tail) 1068 1069 { 1069 - GEM_BUG_ON(!list_empty(&ring->request_list)); 1070 1070 ring->tail = tail; 1071 1071 ring->head = tail; 1072 1072 ring->emit = tail; ··· 1117 1119 } 1118 1120 1119 1121 struct intel_ring * 1120 - intel_engine_create_ring(struct intel_engine_cs *engine, int size) 1122 + intel_engine_create_ring(struct intel_engine_cs *engine, 1123 + struct i915_timeline *timeline, 1124 + int size) 1121 1125 { 1122 1126 struct intel_ring *ring; 1123 1127 struct i915_vma *vma; 1124 1128 1125 1129 GEM_BUG_ON(!is_power_of_2(size)); 1126 1130 GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); 1131 + GEM_BUG_ON(timeline == &engine->timeline); 1132 + lockdep_assert_held(&engine->i915->drm.struct_mutex); 1127 1133 1128 1134 ring = kzalloc(sizeof(*ring), GFP_KERNEL); 1129 1135 if (!ring) 1130 1136 return ERR_PTR(-ENOMEM); 1131 1137 1132 1138 INIT_LIST_HEAD(&ring->request_list); 1139 + ring->timeline = i915_timeline_get(timeline); 1133 1140 1134 1141 ring->size = size; 1135 1142 /* Workaround an erratum on the i830 which causes a hang if ··· 1165 1162 i915_vma_close(ring->vma); 1166 1163 __i915_gem_object_release_unless_active(obj); 1167 1164 1165 + i915_timeline_put(ring->timeline); 1168 1166 kfree(ring); 1169 1167 } 1170 1168 1171 - static int context_pin(struct i915_gem_context *ctx) 1169 + static int context_pin(struct intel_context *ce) 1172 1170 { 1173 - struct i915_vma *vma = ctx->engine[RCS].state; 1171 + struct i915_vma *vma = ce->state; 1174 1172 int ret; 1175 1173 1176 1174 /* ··· 1262 1258 intel_ring_context_pin(struct intel_engine_cs *engine, 1263 1259 struct i915_gem_context *ctx) 1264 1260 { 1265 - struct intel_context *ce = &ctx->engine[engine->id]; 1261 + struct intel_context *ce = to_intel_context(ctx, engine); 1266 1262 int ret; 1267 1263 1268 1264 lockdep_assert_held(&ctx->i915->drm.struct_mutex); ··· 1284 1280 } 1285 1281 1286 1282 if (ce->state) { 1287 - ret = context_pin(ctx); 1283 + ret = context_pin(ce); 1288 1284 if (ret) 1289 1285 goto err; 1290 1286 ··· 1305 1301 static void intel_ring_context_unpin(struct intel_engine_cs *engine, 1306 1302 struct i915_gem_context *ctx) 1307 1303 { 1308 - struct intel_context *ce = &ctx->engine[engine->id]; 1304 + struct intel_context *ce = to_intel_context(ctx, engine); 1309 1305 1310 1306 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 1311 1307 GEM_BUG_ON(ce->pin_count == 0); ··· 1324 1320 static int intel_init_ring_buffer(struct intel_engine_cs *engine) 1325 1321 { 1326 1322 struct intel_ring *ring; 1323 + struct i915_timeline *timeline; 1327 1324 int err; 1328 1325 1329 1326 intel_engine_setup_common(engine); ··· 1333 1328 if (err) 1334 1329 goto err; 1335 1330 1336 - ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); 1331 + timeline = i915_timeline_create(engine->i915, engine->name); 1332 + if (IS_ERR(timeline)) { 1333 + err = PTR_ERR(timeline); 1334 + goto err; 1335 + } 1336 + 1337 + ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); 1338 + i915_timeline_put(timeline); 1337 1339 if (IS_ERR(ring)) { 1338 1340 err = PTR_ERR(ring); 1339 1341 goto err; ··· 1441 1429 1442 1430 *cs++ = MI_NOOP; 1443 1431 *cs++ = MI_SET_CONTEXT; 1444 - *cs++ = i915_ggtt_offset(rq->ctx->engine[RCS].state) | flags; 1432 + *cs++ = i915_ggtt_offset(to_intel_context(rq->ctx, engine)->state) | flags; 1445 1433 /* 1446 1434 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP 1447 1435 * WaMiSetContext_Hang:snb,ivb,vlv ··· 1532 1520 hw_flags = MI_FORCE_RESTORE; 1533 1521 } 1534 1522 1535 - if (to_ctx->engine[engine->id].state && 1523 + if (to_intel_context(to_ctx, engine)->state && 1536 1524 (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { 1537 1525 GEM_BUG_ON(engine->id != RCS); 1538 1526 ··· 1580 1568 { 1581 1569 int ret; 1582 1570 1583 - GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); 1571 + GEM_BUG_ON(!to_intel_context(request->ctx, request->engine)->pin_count); 1584 1572 1585 1573 /* Flush enough space to reduce the likelihood of waiting after 1586 1574 * we start building the request - in which case we will just ··· 1731 1719 /* Align the ring tail to a cacheline boundary */ 1732 1720 int intel_ring_cacheline_align(struct i915_request *rq) 1733 1721 { 1734 - int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); 1735 - u32 *cs; 1722 + int num_dwords; 1723 + void *cs; 1736 1724 1725 + num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); 1737 1726 if (num_dwords == 0) 1738 1727 return 0; 1739 1728 1740 - num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords; 1729 + num_dwords = CACHELINE_DWORDS - num_dwords; 1730 + GEM_BUG_ON(num_dwords & 1); 1731 + 1741 1732 cs = intel_ring_begin(rq, num_dwords); 1742 1733 if (IS_ERR(cs)) 1743 1734 return PTR_ERR(cs); 1744 1735 1745 - while (num_dwords--) 1746 - *cs++ = MI_NOOP; 1747 - 1736 + memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); 1748 1737 intel_ring_advance(rq, cs); 1749 1738 1739 + GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); 1750 1740 return 0; 1751 1741 } 1752 1742

+18 -10

drivers/gpu/drm/i915/intel_ringbuffer.h

··· 3 3 #define _INTEL_RINGBUFFER_H_ 4 4 5 5 #include <linux/hashtable.h> 6 + #include <linux/seqlock.h> 6 7 7 8 #include "i915_gem_batch_pool.h" 8 - #include "i915_gem_timeline.h" 9 9 10 10 #include "i915_reg.h" 11 11 #include "i915_pmu.h" 12 12 #include "i915_request.h" 13 13 #include "i915_selftest.h" 14 + #include "i915_timeline.h" 14 15 #include "intel_gpu_commands.h" 15 16 16 17 struct drm_printer; 18 + struct i915_sched_attr; 17 19 18 20 #define I915_CMD_HASH_ORDER 9 19 21 ··· 129 127 struct i915_vma *vma; 130 128 void *vaddr; 131 129 130 + struct i915_timeline *timeline; 132 131 struct list_head request_list; 132 + struct list_head active_link; 133 133 134 134 u32 head; 135 135 u32 tail; ··· 338 334 u32 mmio_base; 339 335 340 336 struct intel_ring *buffer; 341 - struct intel_timeline *timeline; 337 + 338 + struct i915_timeline timeline; 342 339 343 340 struct drm_i915_gem_object *default_state; 344 341 ··· 465 460 * 466 461 * Called under the struct_mutex. 467 462 */ 468 - void (*schedule)(struct i915_request *request, int priority); 463 + void (*schedule)(struct i915_request *request, 464 + const struct i915_sched_attr *attr); 469 465 470 466 /* 471 467 * Cancel all requests on the hardware, or queued for execution. ··· 599 593 /** 600 594 * @lock: Lock protecting the below fields. 601 595 */ 602 - spinlock_t lock; 596 + seqlock_t lock; 603 597 /** 604 598 * @enabled: Reference count indicating number of listeners. 605 599 */ ··· 770 764 #define CNL_HWS_CSB_WRITE_INDEX 0x2f 771 765 772 766 struct intel_ring * 773 - intel_engine_create_ring(struct intel_engine_cs *engine, int size); 767 + intel_engine_create_ring(struct intel_engine_cs *engine, 768 + struct i915_timeline *timeline, 769 + int size); 774 770 int intel_ring_pin(struct intel_ring *ring, 775 771 struct drm_i915_private *i915, 776 772 unsigned int offset_bias); ··· 890 882 * wtih serialising this hint with anything, so document it as 891 883 * a hint and nothing more. 892 884 */ 893 - return READ_ONCE(engine->timeline->seqno); 885 + return READ_ONCE(engine->timeline.seqno); 894 886 } 895 887 896 888 void intel_engine_get_instdone(struct intel_engine_cs *engine, ··· 1070 1062 if (READ_ONCE(engine->stats.enabled) == 0) 1071 1063 return; 1072 1064 1073 - spin_lock_irqsave(&engine->stats.lock, flags); 1065 + write_seqlock_irqsave(&engine->stats.lock, flags); 1074 1066 1075 1067 if (engine->stats.enabled > 0) { 1076 1068 if (engine->stats.active++ == 0) ··· 1078 1070 GEM_BUG_ON(engine->stats.active == 0); 1079 1071 } 1080 1072 1081 - spin_unlock_irqrestore(&engine->stats.lock, flags); 1073 + write_sequnlock_irqrestore(&engine->stats.lock, flags); 1082 1074 } 1083 1075 1084 1076 static inline void intel_engine_context_out(struct intel_engine_cs *engine) ··· 1088 1080 if (READ_ONCE(engine->stats.enabled) == 0) 1089 1081 return; 1090 1082 1091 - spin_lock_irqsave(&engine->stats.lock, flags); 1083 + write_seqlock_irqsave(&engine->stats.lock, flags); 1092 1084 1093 1085 if (engine->stats.enabled > 0) { 1094 1086 ktime_t last; ··· 1115 1107 } 1116 1108 } 1117 1109 1118 - spin_unlock_irqrestore(&engine->stats.lock, flags); 1110 + write_sequnlock_irqrestore(&engine->stats.lock, flags); 1119 1111 } 1120 1112 1121 1113 int intel_enable_engine_stats(struct intel_engine_cs *engine);

+81 -24

drivers/gpu/drm/i915/intel_runtime_pm.c

··· 542 542 dev_priv->csr.dc_state = val; 543 543 } 544 544 545 + /** 546 + * gen9_set_dc_state - set target display C power state 547 + * @dev_priv: i915 device instance 548 + * @state: target DC power state 549 + * - DC_STATE_DISABLE 550 + * - DC_STATE_EN_UPTO_DC5 551 + * - DC_STATE_EN_UPTO_DC6 552 + * - DC_STATE_EN_DC9 553 + * 554 + * Signal to DMC firmware/HW the target DC power state passed in @state. 555 + * DMC/HW can turn off individual display clocks and power rails when entering 556 + * a deeper DC power state (higher in number) and turns these back when exiting 557 + * that state to a shallower power state (lower in number). The HW will decide 558 + * when to actually enter a given state on an on-demand basis, for instance 559 + * depending on the active state of display pipes. The state of display 560 + * registers backed by affected power rails are saved/restored as needed. 561 + * 562 + * Based on the above enabling a deeper DC power state is asynchronous wrt. 563 + * enabling it. Disabling a deeper power state is synchronous: for instance 564 + * setting %DC_STATE_DISABLE won't complete until all HW resources are turned 565 + * back on and register state is restored. This is guaranteed by the MMIO write 566 + * to DC_STATE_EN blocking until the state is restored. 567 + */ 545 568 static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state) 546 569 { 547 570 uint32_t val; ··· 658 635 assert_csr_loaded(dev_priv); 659 636 } 660 637 661 - void skl_enable_dc6(struct drm_i915_private *dev_priv) 638 + static void skl_enable_dc6(struct drm_i915_private *dev_priv) 662 639 { 663 640 assert_can_enable_dc6(dev_priv); 664 641 ··· 670 647 SKL_SELECT_ALTERNATE_DC_EXIT); 671 648 672 649 gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); 673 - } 674 - 675 - void skl_disable_dc6(struct drm_i915_private *dev_priv) 676 - { 677 - DRM_DEBUG_KMS("Disabling DC6\n"); 678 - 679 - gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); 680 650 } 681 651 682 652 static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, ··· 2642 2626 mutex_unlock(&power_domains->lock); 2643 2627 } 2644 2628 2645 - static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) 2629 + static inline 2630 + bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv, 2631 + i915_reg_t reg, bool enable) 2646 2632 { 2647 - I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) | DBUF_POWER_REQUEST); 2648 - POSTING_READ(DBUF_CTL); 2633 + u32 val, status; 2649 2634 2635 + val = I915_READ(reg); 2636 + val = enable ? (val | DBUF_POWER_REQUEST) : (val & ~DBUF_POWER_REQUEST); 2637 + I915_WRITE(reg, val); 2638 + POSTING_READ(reg); 2650 2639 udelay(10); 2651 2640 2652 - if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE)) 2653 - DRM_ERROR("DBuf power enable timeout\n"); 2641 + status = I915_READ(reg) & DBUF_POWER_STATE; 2642 + if ((enable && !status) || (!enable && status)) { 2643 + DRM_ERROR("DBus power %s timeout!\n", 2644 + enable ? "enable" : "disable"); 2645 + return false; 2646 + } 2647 + return true; 2648 + } 2649 + 2650 + static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) 2651 + { 2652 + intel_dbuf_slice_set(dev_priv, DBUF_CTL, true); 2654 2653 } 2655 2654 2656 2655 static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) 2657 2656 { 2658 - I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) & ~DBUF_POWER_REQUEST); 2659 - POSTING_READ(DBUF_CTL); 2660 - 2661 - udelay(10); 2662 - 2663 - if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE) 2664 - DRM_ERROR("DBuf power disable timeout!\n"); 2657 + intel_dbuf_slice_set(dev_priv, DBUF_CTL, false); 2665 2658 } 2666 2659 2667 - /* 2668 - * TODO: we shouldn't always enable DBUF_CTL_S2, we should only enable it when 2669 - * needed and keep it disabled as much as possible. 2670 - */ 2660 + static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv) 2661 + { 2662 + if (INTEL_GEN(dev_priv) < 11) 2663 + return 1; 2664 + return 2; 2665 + } 2666 + 2667 + void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, 2668 + u8 req_slices) 2669 + { 2670 + u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; 2671 + u32 val; 2672 + bool ret; 2673 + 2674 + if (req_slices > intel_dbuf_max_slices(dev_priv)) { 2675 + DRM_ERROR("Invalid number of dbuf slices requested\n"); 2676 + return; 2677 + } 2678 + 2679 + if (req_slices == hw_enabled_slices || req_slices == 0) 2680 + return; 2681 + 2682 + val = I915_READ(DBUF_CTL_S2); 2683 + if (req_slices > hw_enabled_slices) 2684 + ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true); 2685 + else 2686 + ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, false); 2687 + 2688 + if (ret) 2689 + dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices; 2690 + } 2691 + 2671 2692 static void icl_dbuf_enable(struct drm_i915_private *dev_priv) 2672 2693 { 2673 2694 I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST); ··· 2716 2663 if (!(I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) || 2717 2664 !(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)) 2718 2665 DRM_ERROR("DBuf power enable timeout\n"); 2666 + else 2667 + dev_priv->wm.skl_hw.ddb.enabled_slices = 2; 2719 2668 } 2720 2669 2721 2670 static void icl_dbuf_disable(struct drm_i915_private *dev_priv) ··· 2731 2676 if ((I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) || 2732 2677 (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)) 2733 2678 DRM_ERROR("DBuf power disable timeout!\n"); 2679 + else 2680 + dev_priv->wm.skl_hw.ddb.enabled_slices = 0; 2734 2681 } 2735 2682 2736 2683 static void icl_mbus_init(struct drm_i915_private *dev_priv)

+24 -3

drivers/gpu/drm/i915/intel_sprite.c

··· 131 131 if (scanline < min || scanline > max) 132 132 break; 133 133 134 - if (timeout <= 0) { 134 + if (!timeout) { 135 135 DRM_ERROR("Potential atomic update failure on pipe %c\n", 136 136 pipe_name(crtc->pipe)); 137 137 break; ··· 1011 1011 src->y2 = (src_y + src_h) << 16; 1012 1012 1013 1013 if (intel_format_is_yuv(fb->format->format) && 1014 + fb->format->format != DRM_FORMAT_NV12 && 1014 1015 (src_x % 2 || src_w % 2)) { 1015 1016 DRM_DEBUG_KMS("src x/w (%u, %u) must be a multiple of 2 for YUV planes\n", 1016 1017 src_x, src_w); ··· 1180 1179 DRM_FORMAT_VYUY, 1181 1180 }; 1182 1181 1182 + static uint32_t skl_planar_formats[] = { 1183 + DRM_FORMAT_RGB565, 1184 + DRM_FORMAT_ABGR8888, 1185 + DRM_FORMAT_ARGB8888, 1186 + DRM_FORMAT_XBGR8888, 1187 + DRM_FORMAT_XRGB8888, 1188 + DRM_FORMAT_YUYV, 1189 + DRM_FORMAT_YVYU, 1190 + DRM_FORMAT_UYVY, 1191 + DRM_FORMAT_VYUY, 1192 + DRM_FORMAT_NV12, 1193 + }; 1194 + 1183 1195 static const uint64_t skl_plane_format_modifiers_noccs[] = { 1184 1196 I915_FORMAT_MOD_Yf_TILED, 1185 1197 I915_FORMAT_MOD_Y_TILED, ··· 1287 1273 case DRM_FORMAT_YVYU: 1288 1274 case DRM_FORMAT_UYVY: 1289 1275 case DRM_FORMAT_VYUY: 1276 + case DRM_FORMAT_NV12: 1290 1277 if (modifier == I915_FORMAT_MOD_Yf_TILED) 1291 1278 return true; 1292 1279 /* fall through */ ··· 1387 1372 intel_plane->disable_plane = skl_disable_plane; 1388 1373 intel_plane->get_hw_state = skl_plane_get_hw_state; 1389 1374 1390 - plane_formats = skl_plane_formats; 1391 - num_plane_formats = ARRAY_SIZE(skl_plane_formats); 1375 + if (skl_plane_has_planar(dev_priv, pipe, 1376 + PLANE_SPRITE0 + plane)) { 1377 + plane_formats = skl_planar_formats; 1378 + num_plane_formats = ARRAY_SIZE(skl_planar_formats); 1379 + } else { 1380 + plane_formats = skl_plane_formats; 1381 + num_plane_formats = ARRAY_SIZE(skl_plane_formats); 1382 + } 1392 1383 1393 1384 if (skl_plane_has_ccs(dev_priv, pipe, PLANE_SPRITE0 + plane)) 1394 1385 modifiers = skl_plane_format_modifiers_ccs;

+1 -1

drivers/gpu/drm/i915/intel_uc_fw.h

··· 30 30 struct i915_vma; 31 31 32 32 /* Home of GuC, HuC and DMC firmwares */ 33 - #define INTEL_UC_FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware" 33 + #define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915" 34 34 35 35 enum intel_uc_fw_status { 36 36 INTEL_UC_FIRMWARE_FAIL = -1,

+5 -2

drivers/gpu/drm/i915/intel_uncore.c

··· 139 139 * in the hope that the original ack will be delivered along with 140 140 * the fallback ack. 141 141 * 142 - * This workaround is described in HSDES #1604254524 142 + * This workaround is described in HSDES #1604254524 and it's known as: 143 + * WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl,icl 144 + * although the name is a bit misleading. 143 145 */ 144 146 145 147 pass = 1; ··· 1396 1394 if (INTEL_GEN(dev_priv) >= 11) { 1397 1395 int i; 1398 1396 1399 - dev_priv->uncore.funcs.force_wake_get = fw_domains_get; 1397 + dev_priv->uncore.funcs.force_wake_get = 1398 + fw_domains_get_with_fallback; 1400 1399 dev_priv->uncore.funcs.force_wake_put = fw_domains_put; 1401 1400 fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, 1402 1401 FORCEWAKE_RENDER_GEN9,

+210 -117

drivers/gpu/drm/i915/intel_workarounds.c

··· 270 270 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 271 271 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 272 272 273 + /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ 274 + if (IS_GEN9_LP(dev_priv)) 275 + WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); 276 + 273 277 return 0; 274 278 } 275 279 ··· 445 441 return 0; 446 442 } 447 443 444 + static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv) 445 + { 446 + /* Wa_1604370585:icl (pre-prod) 447 + * Formerly known as WaPushConstantDereferenceHoldDisable 448 + */ 449 + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0)) 450 + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 451 + PUSH_CONSTANT_DEREF_DISABLE); 452 + 453 + /* WaForceEnableNonCoherent:icl 454 + * This is not the same workaround as in early Gen9 platforms, where 455 + * lacking this could cause system hangs, but coherency performance 456 + * overhead is high and only a few compute workloads really need it 457 + * (the register is whitelisted in hardware now, so UMDs can opt in 458 + * for coherency if they have a good reason). 459 + */ 460 + WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); 461 + 462 + return 0; 463 + } 464 + 448 465 int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv) 449 466 { 450 467 int err = 0; ··· 490 465 err = cfl_ctx_workarounds_init(dev_priv); 491 466 else if (IS_CANNONLAKE(dev_priv)) 492 467 err = cnl_ctx_workarounds_init(dev_priv); 468 + else if (IS_ICELAKE(dev_priv)) 469 + err = icl_ctx_workarounds_init(dev_priv); 493 470 else 494 471 MISSING_CASE(INTEL_GEN(dev_priv)); 495 472 if (err) ··· 690 663 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); 691 664 } 692 665 666 + static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv) 667 + { 668 + /* This is not an Wa. Enable for better image quality */ 669 + I915_WRITE(_3D_CHICKEN3, 670 + _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE)); 671 + 672 + /* WaInPlaceDecompressionHang:icl */ 673 + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 674 + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 675 + 676 + /* WaPipelineFlushCoherentLines:icl */ 677 + I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | 678 + GEN8_LQSC_FLUSH_COHERENT_LINES); 679 + 680 + /* Wa_1405543622:icl 681 + * Formerly known as WaGAPZPriorityScheme 682 + */ 683 + I915_WRITE(GEN8_GARBCNTL, I915_READ(GEN8_GARBCNTL) | 684 + GEN11_ARBITRATION_PRIO_ORDER_MASK); 685 + 686 + /* Wa_1604223664:icl 687 + * Formerly known as WaL3BankAddressHashing 688 + */ 689 + I915_WRITE(GEN8_GARBCNTL, 690 + (I915_READ(GEN8_GARBCNTL) & ~GEN11_HASH_CTRL_EXCL_MASK) | 691 + GEN11_HASH_CTRL_EXCL_BIT0); 692 + I915_WRITE(GEN11_GLBLINVL, 693 + (I915_READ(GEN11_GLBLINVL) & ~GEN11_BANK_HASH_ADDR_EXCL_MASK) | 694 + GEN11_BANK_HASH_ADDR_EXCL_BIT0); 695 + 696 + /* WaModifyGamTlbPartitioning:icl */ 697 + I915_WRITE(GEN11_GACB_PERF_CTRL, 698 + (I915_READ(GEN11_GACB_PERF_CTRL) & ~GEN11_HASH_CTRL_MASK) | 699 + GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); 700 + 701 + /* Wa_1405733216:icl 702 + * Formerly known as WaDisableCleanEvicts 703 + */ 704 + I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | 705 + GEN11_LQSC_CLEAN_EVICT_DISABLE); 706 + 707 + /* Wa_1405766107:icl 708 + * Formerly known as WaCL2SFHalfMaxAlloc 709 + */ 710 + I915_WRITE(GEN11_LSN_UNSLCVC, I915_READ(GEN11_LSN_UNSLCVC) | 711 + GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | 712 + GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); 713 + 714 + /* Wa_220166154:icl 715 + * Formerly known as WaDisCtxReload 716 + */ 717 + I915_WRITE(GAMW_ECO_DEV_RW_IA_REG, I915_READ(GAMW_ECO_DEV_RW_IA_REG) | 718 + GAMW_ECO_DEV_CTX_RELOAD_DISABLE); 719 + 720 + /* Wa_1405779004:icl (pre-prod) */ 721 + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0)) 722 + I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, 723 + I915_READ(SLICE_UNIT_LEVEL_CLKGATE) | 724 + MSCUNIT_CLKGATE_DIS); 725 + 726 + /* Wa_1406680159:icl */ 727 + I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, 728 + I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE) | 729 + GWUNIT_CLKGATE_DIS); 730 + 731 + /* Wa_1604302699:icl */ 732 + I915_WRITE(GEN10_L3_CHICKEN_MODE_REGISTER, 733 + I915_READ(GEN10_L3_CHICKEN_MODE_REGISTER) | 734 + GEN11_I2M_WRITE_DISABLE); 735 + 736 + /* Wa_1406838659:icl (pre-prod) */ 737 + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0)) 738 + I915_WRITE(INF_UNIT_LEVEL_CLKGATE, 739 + I915_READ(INF_UNIT_LEVEL_CLKGATE) | 740 + CGPSF_CLKGATE_DIS); 741 + 742 + /* WaForwardProgressSoftReset:icl */ 743 + I915_WRITE(GEN10_SCRATCH_LNCF2, 744 + I915_READ(GEN10_SCRATCH_LNCF2) | 745 + PMFLUSHDONE_LNICRSDROP | 746 + PMFLUSH_GAPL3UNBLOCK | 747 + PMFLUSHDONE_LNEBLK); 748 + } 749 + 693 750 void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv) 694 751 { 695 752 if (INTEL_GEN(dev_priv) < 8) ··· 794 683 cfl_gt_workarounds_apply(dev_priv); 795 684 else if (IS_CANNONLAKE(dev_priv)) 796 685 cnl_gt_workarounds_apply(dev_priv); 686 + else if (IS_ICELAKE(dev_priv)) 687 + icl_gt_workarounds_apply(dev_priv); 797 688 else 798 689 MISSING_CASE(INTEL_GEN(dev_priv)); 799 690 } 800 691 801 - static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, 802 - i915_reg_t reg) 692 + struct whitelist { 693 + i915_reg_t reg[RING_MAX_NONPRIV_SLOTS]; 694 + unsigned int count; 695 + u32 nopid; 696 + }; 697 + 698 + static void whitelist_reg(struct whitelist *w, i915_reg_t reg) 803 699 { 804 - struct drm_i915_private *dev_priv = engine->i915; 805 - struct i915_workarounds *wa = &dev_priv->workarounds; 806 - const unsigned int index = wa->hw_whitelist_count[engine->id]; 700 + if (GEM_WARN_ON(w->count >= RING_MAX_NONPRIV_SLOTS)) 701 + return; 807 702 808 - if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) 809 - return -EINVAL; 810 - 811 - I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), 812 - i915_mmio_reg_offset(reg)); 813 - wa->hw_whitelist_count[engine->id]++; 814 - 815 - return 0; 703 + w->reg[w->count++] = reg; 816 704 } 817 705 818 - static int bdw_whitelist_workarounds_apply(struct intel_engine_cs *engine) 706 + static void bdw_whitelist_build(struct whitelist *w) 819 707 { 820 - return 0; 821 708 } 822 709 823 - static int chv_whitelist_workarounds_apply(struct intel_engine_cs *engine) 710 + static void chv_whitelist_build(struct whitelist *w) 824 711 { 825 - return 0; 826 712 } 827 713 828 - static int gen9_whitelist_workarounds_apply(struct intel_engine_cs *engine) 714 + static void gen9_whitelist_build(struct whitelist *w) 829 715 { 830 - int ret; 831 - 832 716 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ 833 - ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); 834 - if (ret) 835 - return ret; 717 + whitelist_reg(w, GEN9_CTX_PREEMPT_REG); 836 718 837 719 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ 838 - ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); 839 - if (ret) 840 - return ret; 720 + whitelist_reg(w, GEN8_CS_CHICKEN1); 841 721 842 722 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ 843 - ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); 844 - if (ret) 845 - return ret; 846 - 847 - return 0; 723 + whitelist_reg(w, GEN8_HDC_CHICKEN1); 848 724 } 849 725 850 - static int skl_whitelist_workarounds_apply(struct intel_engine_cs *engine) 726 + static void skl_whitelist_build(struct whitelist *w) 851 727 { 852 - int ret; 853 - 854 - ret = gen9_whitelist_workarounds_apply(engine); 855 - if (ret) 856 - return ret; 728 + gen9_whitelist_build(w); 857 729 858 730 /* WaDisableLSQCROPERFforOCL:skl */ 859 - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 860 - if (ret) 861 - return ret; 862 - 863 - return 0; 731 + whitelist_reg(w, GEN8_L3SQCREG4); 864 732 } 865 733 866 - static int bxt_whitelist_workarounds_apply(struct intel_engine_cs *engine) 734 + static void bxt_whitelist_build(struct whitelist *w) 867 735 { 868 - int ret; 869 - 870 - ret = gen9_whitelist_workarounds_apply(engine); 871 - if (ret) 872 - return ret; 873 - 874 - return 0; 736 + gen9_whitelist_build(w); 875 737 } 876 738 877 - static int kbl_whitelist_workarounds_apply(struct intel_engine_cs *engine) 739 + static void kbl_whitelist_build(struct whitelist *w) 878 740 { 879 - int ret; 880 - 881 - ret = gen9_whitelist_workarounds_apply(engine); 882 - if (ret) 883 - return ret; 741 + gen9_whitelist_build(w); 884 742 885 743 /* WaDisableLSQCROPERFforOCL:kbl */ 886 - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 887 - if (ret) 888 - return ret; 889 - 890 - return 0; 744 + whitelist_reg(w, GEN8_L3SQCREG4); 891 745 } 892 746 893 - static int glk_whitelist_workarounds_apply(struct intel_engine_cs *engine) 747 + static void glk_whitelist_build(struct whitelist *w) 894 748 { 895 - int ret; 896 - 897 - ret = gen9_whitelist_workarounds_apply(engine); 898 - if (ret) 899 - return ret; 749 + gen9_whitelist_build(w); 900 750 901 751 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ 902 - ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1); 903 - if (ret) 904 - return ret; 905 - 906 - return 0; 752 + whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); 907 753 } 908 754 909 - static int cfl_whitelist_workarounds_apply(struct intel_engine_cs *engine) 755 + static void cfl_whitelist_build(struct whitelist *w) 910 756 { 911 - int ret; 912 - 913 - ret = gen9_whitelist_workarounds_apply(engine); 914 - if (ret) 915 - return ret; 916 - 917 - return 0; 757 + gen9_whitelist_build(w); 918 758 } 919 759 920 - static int cnl_whitelist_workarounds_apply(struct intel_engine_cs *engine) 760 + static void cnl_whitelist_build(struct whitelist *w) 921 761 { 922 - int ret; 923 - 924 762 /* WaEnablePreemptionGranularityControlByUMD:cnl */ 925 - ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); 926 - if (ret) 927 - return ret; 928 - 929 - return 0; 763 + whitelist_reg(w, GEN8_CS_CHICKEN1); 930 764 } 931 765 932 - int intel_whitelist_workarounds_apply(struct intel_engine_cs *engine) 766 + static void icl_whitelist_build(struct whitelist *w) 767 + { 768 + } 769 + 770 + static struct whitelist *whitelist_build(struct intel_engine_cs *engine, 771 + struct whitelist *w) 772 + { 773 + struct drm_i915_private *i915 = engine->i915; 774 + 775 + GEM_BUG_ON(engine->id != RCS); 776 + 777 + w->count = 0; 778 + w->nopid = i915_mmio_reg_offset(RING_NOPID(engine->mmio_base)); 779 + 780 + if (INTEL_GEN(i915) < 8) 781 + return NULL; 782 + else if (IS_BROADWELL(i915)) 783 + bdw_whitelist_build(w); 784 + else if (IS_CHERRYVIEW(i915)) 785 + chv_whitelist_build(w); 786 + else if (IS_SKYLAKE(i915)) 787 + skl_whitelist_build(w); 788 + else if (IS_BROXTON(i915)) 789 + bxt_whitelist_build(w); 790 + else if (IS_KABYLAKE(i915)) 791 + kbl_whitelist_build(w); 792 + else if (IS_GEMINILAKE(i915)) 793 + glk_whitelist_build(w); 794 + else if (IS_COFFEELAKE(i915)) 795 + cfl_whitelist_build(w); 796 + else if (IS_CANNONLAKE(i915)) 797 + cnl_whitelist_build(w); 798 + else if (IS_ICELAKE(i915)) 799 + icl_whitelist_build(w); 800 + else 801 + MISSING_CASE(INTEL_GEN(i915)); 802 + 803 + return w; 804 + } 805 + 806 + static void whitelist_apply(struct intel_engine_cs *engine, 807 + const struct whitelist *w) 933 808 { 934 809 struct drm_i915_private *dev_priv = engine->i915; 935 - int err = 0; 810 + const u32 base = engine->mmio_base; 811 + unsigned int i; 936 812 937 - WARN_ON(engine->id != RCS); 813 + if (!w) 814 + return; 938 815 939 - dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; 816 + intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); 940 817 941 - if (INTEL_GEN(dev_priv) < 8) 942 - err = 0; 943 - else if (IS_BROADWELL(dev_priv)) 944 - err = bdw_whitelist_workarounds_apply(engine); 945 - else if (IS_CHERRYVIEW(dev_priv)) 946 - err = chv_whitelist_workarounds_apply(engine); 947 - else if (IS_SKYLAKE(dev_priv)) 948 - err = skl_whitelist_workarounds_apply(engine); 949 - else if (IS_BROXTON(dev_priv)) 950 - err = bxt_whitelist_workarounds_apply(engine); 951 - else if (IS_KABYLAKE(dev_priv)) 952 - err = kbl_whitelist_workarounds_apply(engine); 953 - else if (IS_GEMINILAKE(dev_priv)) 954 - err = glk_whitelist_workarounds_apply(engine); 955 - else if (IS_COFFEELAKE(dev_priv)) 956 - err = cfl_whitelist_workarounds_apply(engine); 957 - else if (IS_CANNONLAKE(dev_priv)) 958 - err = cnl_whitelist_workarounds_apply(engine); 959 - else 960 - MISSING_CASE(INTEL_GEN(dev_priv)); 961 - if (err) 962 - return err; 818 + for (i = 0; i < w->count; i++) 819 + I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), 820 + i915_mmio_reg_offset(w->reg[i])); 963 821 964 - DRM_DEBUG_DRIVER("%s: Number of whitelist w/a: %d\n", engine->name, 965 - dev_priv->workarounds.hw_whitelist_count[engine->id]); 966 - return 0; 822 + /* And clear the rest just in case of garbage */ 823 + for (; i < RING_MAX_NONPRIV_SLOTS; i++) 824 + I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), w->nopid); 825 + 826 + intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); 967 827 } 828 + 829 + void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine) 830 + { 831 + struct whitelist w; 832 + 833 + whitelist_apply(engine, whitelist_build(engine, &w)); 834 + } 835 + 836 + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 837 + #include "selftests/intel_workarounds.c" 838 + #endif

+1 -1

drivers/gpu/drm/i915/intel_workarounds.h

··· 12 12 13 13 void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv); 14 14 15 - int intel_whitelist_workarounds_apply(struct intel_engine_cs *engine); 15 + void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine); 16 16 17 17 #endif

+4 -1

drivers/gpu/drm/i915/selftests/huge_pages.c

··· 1091 1091 out_vma_unpin: 1092 1092 i915_vma_unpin(vma); 1093 1093 out_vma_close: 1094 - i915_vma_close(vma); 1094 + i915_vma_destroy(vma); 1095 1095 1096 1096 return err; 1097 1097 } ··· 1756 1756 err = PTR_ERR(ctx); 1757 1757 goto out_unlock; 1758 1758 } 1759 + 1760 + if (ctx->ppgtt) 1761 + ctx->ppgtt->base.scrub_64K = true; 1759 1762 1760 1763 err = i915_subtests(tests, ctx); 1761 1764

+3

drivers/gpu/drm/i915/selftests/i915_gem_context.c

··· 23 23 */ 24 24 25 25 #include "../i915_selftest.h" 26 + #include "igt_flush_test.h" 26 27 27 28 #include "mock_drm.h" 28 29 #include "huge_gem_object.h" ··· 412 411 } 413 412 414 413 out_unlock: 414 + if (igt_flush_test(i915, I915_WAIT_LOCKED)) 415 + err = -EIO; 415 416 mutex_unlock(&i915->drm.struct_mutex); 416 417 417 418 mock_file_free(i915, file);

+31 -63

drivers/gpu/drm/i915/selftests/i915_gem_timeline.c drivers/gpu/drm/i915/selftests/i915_timeline.c

··· 1 1 /* 2 - * Copyright © 2017 Intel Corporation 2 + * SPDX-License-Identifier: MIT 3 3 * 4 - * Permission is hereby granted, free of charge, to any person obtaining a 5 - * copy of this software and associated documentation files (the "Software"), 6 - * to deal in the Software without restriction, including without limitation 7 - * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 - * and/or sell copies of the Software, and to permit persons to whom the 9 - * Software is furnished to do so, subject to the following conditions: 10 - * 11 - * The above copyright notice and this permission notice (including the next 12 - * paragraph) shall be included in all copies or substantial portions of the 13 - * Software. 14 - * 15 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 - * IN THE SOFTWARE. 22 - * 4 + * Copyright © 2017-2018 Intel Corporation 23 5 */ 24 6 25 7 #include "../i915_selftest.h" ··· 17 35 bool set; 18 36 }; 19 37 20 - static int __igt_sync(struct intel_timeline *tl, 38 + static int __igt_sync(struct i915_timeline *tl, 21 39 u64 ctx, 22 40 const struct __igt_sync *p, 23 41 const char *name) 24 42 { 25 43 int ret; 26 44 27 - if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { 45 + if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { 28 46 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", 29 47 name, p->name, ctx, p->seqno, yesno(p->expected)); 30 48 return -EINVAL; 31 49 } 32 50 33 51 if (p->set) { 34 - ret = __intel_timeline_sync_set(tl, ctx, p->seqno); 52 + ret = __i915_timeline_sync_set(tl, ctx, p->seqno); 35 53 if (ret) 36 54 return ret; 37 55 } ··· 59 77 { "unwrap", UINT_MAX, true, false }, 60 78 {}, 61 79 }, *p; 62 - struct intel_timeline *tl; 80 + struct i915_timeline tl; 63 81 int order, offset; 64 82 int ret = -ENODEV; 65 83 66 - tl = mock_timeline(0); 67 - if (!tl) 68 - return -ENOMEM; 69 - 84 + mock_timeline_init(&tl, 0); 70 85 for (p = pass; p->name; p++) { 71 86 for (order = 1; order < 64; order++) { 72 87 for (offset = -1; offset <= (order > 1); offset++) { 73 88 u64 ctx = BIT_ULL(order) + offset; 74 89 75 - ret = __igt_sync(tl, ctx, p, "1"); 90 + ret = __igt_sync(&tl, ctx, p, "1"); 76 91 if (ret) 77 92 goto out; 78 93 } 79 94 } 80 95 } 81 - mock_timeline_destroy(tl); 96 + mock_timeline_fini(&tl); 82 97 83 - tl = mock_timeline(0); 84 - if (!tl) 85 - return -ENOMEM; 86 - 98 + mock_timeline_init(&tl, 0); 87 99 for (order = 1; order < 64; order++) { 88 100 for (offset = -1; offset <= (order > 1); offset++) { 89 101 u64 ctx = BIT_ULL(order) + offset; 90 102 91 103 for (p = pass; p->name; p++) { 92 - ret = __igt_sync(tl, ctx, p, "2"); 104 + ret = __igt_sync(&tl, ctx, p, "2"); 93 105 if (ret) 94 106 goto out; 95 107 } ··· 91 115 } 92 116 93 117 out: 94 - mock_timeline_destroy(tl); 118 + mock_timeline_fini(&tl); 95 119 return ret; 96 120 } 97 121 ··· 103 127 static int bench_sync(void *arg) 104 128 { 105 129 struct rnd_state prng; 106 - struct intel_timeline *tl; 130 + struct i915_timeline tl; 107 131 unsigned long end_time, count; 108 132 u64 prng32_1M; 109 133 ktime_t kt; 110 134 int order, last_order; 111 135 112 - tl = mock_timeline(0); 113 - if (!tl) 114 - return -ENOMEM; 136 + mock_timeline_init(&tl, 0); 115 137 116 138 /* Lookups from cache are very fast and so the random number generation 117 139 * and the loop itself becomes a significant factor in the per-iteration ··· 141 167 do { 142 168 u64 id = i915_prandom_u64_state(&prng); 143 169 144 - __intel_timeline_sync_set(tl, id, 0); 170 + __i915_timeline_sync_set(&tl, id, 0); 145 171 count++; 146 172 } while (!time_after(jiffies, end_time)); 147 173 kt = ktime_sub(ktime_get(), kt); ··· 156 182 while (end_time--) { 157 183 u64 id = i915_prandom_u64_state(&prng); 158 184 159 - if (!__intel_timeline_sync_is_later(tl, id, 0)) { 160 - mock_timeline_destroy(tl); 185 + if (!__i915_timeline_sync_is_later(&tl, id, 0)) { 186 + mock_timeline_fini(&tl); 161 187 pr_err("Lookup of %llu failed\n", id); 162 188 return -EINVAL; 163 189 } ··· 167 193 pr_info("%s: %lu random lookups, %lluns/lookup\n", 168 194 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 169 195 170 - mock_timeline_destroy(tl); 196 + mock_timeline_fini(&tl); 171 197 cond_resched(); 172 198 173 - tl = mock_timeline(0); 174 - if (!tl) 175 - return -ENOMEM; 199 + mock_timeline_init(&tl, 0); 176 200 177 201 /* Benchmark setting the first N (in order) contexts */ 178 202 count = 0; 179 203 kt = ktime_get(); 180 204 end_time = jiffies + HZ/10; 181 205 do { 182 - __intel_timeline_sync_set(tl, count++, 0); 206 + __i915_timeline_sync_set(&tl, count++, 0); 183 207 } while (!time_after(jiffies, end_time)); 184 208 kt = ktime_sub(ktime_get(), kt); 185 209 pr_info("%s: %lu in-order insertions, %lluns/insert\n", ··· 187 215 end_time = count; 188 216 kt = ktime_get(); 189 217 while (end_time--) { 190 - if (!__intel_timeline_sync_is_later(tl, end_time, 0)) { 218 + if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) { 191 219 pr_err("Lookup of %lu failed\n", end_time); 192 - mock_timeline_destroy(tl); 220 + mock_timeline_fini(&tl); 193 221 return -EINVAL; 194 222 } 195 223 } ··· 197 225 pr_info("%s: %lu in-order lookups, %lluns/lookup\n", 198 226 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 199 227 200 - mock_timeline_destroy(tl); 228 + mock_timeline_fini(&tl); 201 229 cond_resched(); 202 230 203 - tl = mock_timeline(0); 204 - if (!tl) 205 - return -ENOMEM; 231 + mock_timeline_init(&tl, 0); 206 232 207 233 /* Benchmark searching for a random context id and maybe changing it */ 208 234 prandom_seed_state(&prng, i915_selftest.random_seed); ··· 211 241 u32 id = random_engine(&prng); 212 242 u32 seqno = prandom_u32_state(&prng); 213 243 214 - if (!__intel_timeline_sync_is_later(tl, id, seqno)) 215 - __intel_timeline_sync_set(tl, id, seqno); 244 + if (!__i915_timeline_sync_is_later(&tl, id, seqno)) 245 + __i915_timeline_sync_set(&tl, id, seqno); 216 246 217 247 count++; 218 248 } while (!time_after(jiffies, end_time)); ··· 220 250 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); 221 251 pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", 222 252 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 223 - mock_timeline_destroy(tl); 253 + mock_timeline_fini(&tl); 224 254 cond_resched(); 225 255 226 256 /* Benchmark searching for a known context id and changing the seqno */ ··· 228 258 ({ int tmp = last_order; last_order = order; order += tmp; })) { 229 259 unsigned int mask = BIT(order) - 1; 230 260 231 - tl = mock_timeline(0); 232 - if (!tl) 233 - return -ENOMEM; 261 + mock_timeline_init(&tl, 0); 234 262 235 263 count = 0; 236 264 kt = ktime_get(); ··· 240 272 */ 241 273 u64 id = (u64)(count & mask) << order; 242 274 243 - __intel_timeline_sync_is_later(tl, id, 0); 244 - __intel_timeline_sync_set(tl, id, 0); 275 + __i915_timeline_sync_is_later(&tl, id, 0); 276 + __i915_timeline_sync_set(&tl, id, 0); 245 277 246 278 count++; 247 279 } while (!time_after(jiffies, end_time)); ··· 249 281 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", 250 282 __func__, count, order, 251 283 (long long)div64_ul(ktime_to_ns(kt), count)); 252 - mock_timeline_destroy(tl); 284 + mock_timeline_fini(&tl); 253 285 cond_resched(); 254 286 } 255 287

+1

drivers/gpu/drm/i915/selftests/i915_live_selftests.h

··· 11 11 */ 12 12 selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ 13 13 selftest(uncore, intel_uncore_live_selftests) 14 + selftest(workarounds, intel_workarounds_live_selftests) 14 15 selftest(requests, i915_request_live_selftests) 15 16 selftest(objects, i915_gem_object_live_selftests) 16 17 selftest(dmabuf, i915_gem_dmabuf_live_selftests)

+1 -1

drivers/gpu/drm/i915/selftests/i915_vma.c

··· 81 81 } 82 82 83 83 if (i915_vma_compare(vma, vm, view)) { 84 - pr_err("i915_vma_compare failed with create parmaters!\n"); 84 + pr_err("i915_vma_compare failed with create parameters!\n"); 85 85 return ERR_PTR(-EINVAL); 86 86 } 87 87

+70

drivers/gpu/drm/i915/selftests/igt_flush_test.c

··· 1 + /* 2 + * SPDX-License-Identifier: MIT 3 + * 4 + * Copyright © 2018 Intel Corporation 5 + */ 6 + 7 + #include "../i915_drv.h" 8 + 9 + #include "../i915_selftest.h" 10 + #include "igt_flush_test.h" 11 + 12 + struct wedge_me { 13 + struct delayed_work work; 14 + struct drm_i915_private *i915; 15 + const void *symbol; 16 + }; 17 + 18 + static void wedge_me(struct work_struct *work) 19 + { 20 + struct wedge_me *w = container_of(work, typeof(*w), work.work); 21 + 22 + pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); 23 + 24 + GEM_TRACE("%pS timed out.\n", w->symbol); 25 + GEM_TRACE_DUMP(); 26 + 27 + i915_gem_set_wedged(w->i915); 28 + } 29 + 30 + static void __init_wedge(struct wedge_me *w, 31 + struct drm_i915_private *i915, 32 + long timeout, 33 + const void *symbol) 34 + { 35 + w->i915 = i915; 36 + w->symbol = symbol; 37 + 38 + INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); 39 + schedule_delayed_work(&w->work, timeout); 40 + } 41 + 42 + static void __fini_wedge(struct wedge_me *w) 43 + { 44 + cancel_delayed_work_sync(&w->work); 45 + destroy_delayed_work_on_stack(&w->work); 46 + w->i915 = NULL; 47 + } 48 + 49 + #define wedge_on_timeout(W, DEV, TIMEOUT) \ 50 + for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ 51 + (W)->i915; \ 52 + __fini_wedge((W))) 53 + 54 + int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) 55 + { 56 + struct wedge_me w; 57 + 58 + cond_resched(); 59 + 60 + if (flags & I915_WAIT_LOCKED && 61 + i915_gem_switch_to_kernel_context(i915)) { 62 + pr_err("Failed to switch back to kernel context; declaring wedged\n"); 63 + i915_gem_set_wedged(i915); 64 + } 65 + 66 + wedge_on_timeout(&w, i915, HZ) 67 + i915_gem_wait_for_idle(i915, flags); 68 + 69 + return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; 70 + }

+14

drivers/gpu/drm/i915/selftests/igt_flush_test.h

··· 1 + /* 2 + * SPDX-License-Identifier: MIT 3 + * 4 + * Copyright © 2018 Intel Corporation 5 + */ 6 + 7 + #ifndef IGT_FLUSH_TEST_H 8 + #define IGT_FLUSH_TEST_H 9 + 10 + struct drm_i915_private; 11 + 12 + int igt_flush_test(struct drm_i915_private *i915, unsigned int flags); 13 + 14 + #endif /* IGT_FLUSH_TEST_H */

+3 -2

drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c

··· 412 412 * that they are ready for the next test. We wait until all 413 413 * threads are complete and waiting for us (i.e. not a seqno). 414 414 */ 415 - err = wait_var_event_timeout(&done, !atomic_read(&done), 10 * HZ); 416 - if (err) { 415 + if (!wait_var_event_timeout(&done, 416 + !atomic_read(&done), 10 * HZ)) { 417 417 pr_err("Timed out waiting for %d remaining waiters\n", 418 418 atomic_read(&done)); 419 + err = -ETIMEDOUT; 419 420 break; 420 421 } 421 422

+57 -63

drivers/gpu/drm/i915/selftests/intel_hangcheck.c

··· 26 26 27 27 #include "../i915_selftest.h" 28 28 #include "i915_random.h" 29 + #include "igt_flush_test.h" 29 30 30 31 #include "mock_context.h" 31 32 #include "mock_drm.h" 33 + 34 + #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ 32 35 33 36 struct hang { 34 37 struct drm_i915_private *i915; ··· 254 251 return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); 255 252 } 256 253 257 - struct wedge_me { 258 - struct delayed_work work; 259 - struct drm_i915_private *i915; 260 - const void *symbol; 261 - }; 262 - 263 - static void wedge_me(struct work_struct *work) 264 - { 265 - struct wedge_me *w = container_of(work, typeof(*w), work.work); 266 - 267 - pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); 268 - 269 - GEM_TRACE("%pS timed out.\n", w->symbol); 270 - GEM_TRACE_DUMP(); 271 - 272 - i915_gem_set_wedged(w->i915); 273 - } 274 - 275 - static void __init_wedge(struct wedge_me *w, 276 - struct drm_i915_private *i915, 277 - long timeout, 278 - const void *symbol) 279 - { 280 - w->i915 = i915; 281 - w->symbol = symbol; 282 - 283 - INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); 284 - schedule_delayed_work(&w->work, timeout); 285 - } 286 - 287 - static void __fini_wedge(struct wedge_me *w) 288 - { 289 - cancel_delayed_work_sync(&w->work); 290 - destroy_delayed_work_on_stack(&w->work); 291 - w->i915 = NULL; 292 - } 293 - 294 - #define wedge_on_timeout(W, DEV, TIMEOUT) \ 295 - for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ 296 - (W)->i915; \ 297 - __fini_wedge((W))) 298 - 299 - static noinline int 300 - flush_test(struct drm_i915_private *i915, unsigned int flags) 301 - { 302 - struct wedge_me w; 303 - 304 - cond_resched(); 305 - 306 - wedge_on_timeout(&w, i915, HZ) 307 - i915_gem_wait_for_idle(i915, flags); 308 - 309 - return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; 310 - } 311 - 312 254 static void hang_fini(struct hang *h) 313 255 { 314 256 *h->batch = MI_BATCH_BUFFER_END; ··· 267 319 268 320 kernel_context_close(h->ctx); 269 321 270 - flush_test(h->i915, I915_WAIT_LOCKED); 322 + igt_flush_test(h->i915, I915_WAIT_LOCKED); 271 323 } 272 324 273 325 static bool wait_until_running(struct hang *h, struct i915_request *rq) ··· 402 454 return err; 403 455 } 404 456 457 + static bool wait_for_idle(struct intel_engine_cs *engine) 458 + { 459 + return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0; 460 + } 461 + 405 462 static int __igt_reset_engine(struct drm_i915_private *i915, bool active) 406 463 { 407 464 struct intel_engine_cs *engine; ··· 433 480 434 481 if (active && !intel_engine_can_store_dword(engine)) 435 482 continue; 483 + 484 + if (!wait_for_idle(engine)) { 485 + pr_err("%s failed to idle before reset\n", 486 + engine->name); 487 + err = -EIO; 488 + break; 489 + } 436 490 437 491 reset_count = i915_reset_count(&i915->gpu_error); 438 492 reset_engine_count = i915_reset_engine_count(&i915->gpu_error, ··· 502 542 err = -EINVAL; 503 543 break; 504 544 } 545 + 546 + if (!wait_for_idle(engine)) { 547 + struct drm_printer p = 548 + drm_info_printer(i915->drm.dev); 549 + 550 + pr_err("%s failed to idle after reset\n", 551 + engine->name); 552 + intel_engine_dump(engine, &p, 553 + "%s\n", engine->name); 554 + 555 + err = -EIO; 556 + break; 557 + } 505 558 } while (time_before(jiffies, end_time)); 506 559 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); 507 560 508 561 if (err) 509 562 break; 510 563 511 - err = flush_test(i915, 0); 564 + err = igt_flush_test(i915, 0); 512 565 if (err) 513 566 break; 514 567 } ··· 601 628 } 602 629 603 630 if (arg->flags & TEST_PRIORITY) 604 - ctx[idx]->priority = 631 + ctx[idx]->sched.priority = 605 632 i915_prandom_u32_max_state(512, &prng); 606 633 607 634 rq[idx] = i915_request_get(new); ··· 656 683 return err; 657 684 658 685 if (flags & TEST_PRIORITY) 659 - h.ctx->priority = 1024; 686 + h.ctx->sched.priority = 1024; 660 687 } 661 688 662 689 for_each_engine(engine, i915, id) { ··· 668 695 if (flags & TEST_ACTIVE && 669 696 !intel_engine_can_store_dword(engine)) 670 697 continue; 698 + 699 + if (!wait_for_idle(engine)) { 700 + pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n", 701 + engine->name, test_name); 702 + err = -EIO; 703 + break; 704 + } 671 705 672 706 memset(threads, 0, sizeof(threads)); 673 707 for_each_engine(other, i915, tmp) { ··· 752 772 i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 753 773 i915_request_put(rq); 754 774 } 775 + 776 + if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { 777 + struct drm_printer p = 778 + drm_info_printer(i915->drm.dev); 779 + 780 + pr_err("i915_reset_engine(%s:%s):" 781 + " failed to idle after reset\n", 782 + engine->name, test_name); 783 + intel_engine_dump(engine, &p, 784 + "%s\n", engine->name); 785 + 786 + err = -EIO; 787 + break; 788 + } 755 789 } while (time_before(jiffies, end_time)); 756 790 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); 757 791 pr_info("i915_reset_engine(%s:%s): %lu resets\n", ··· 820 826 if (err) 821 827 break; 822 828 823 - err = flush_test(i915, 0); 829 + err = igt_flush_test(i915, 0); 824 830 if (err) 825 831 break; 826 832 } ··· 975 981 if (engine == exclude) 976 982 continue; 977 983 978 - if (wait_for(intel_engine_is_idle(engine), 10)) 984 + if (!wait_for_idle(engine)) 979 985 return -EIO; 980 986 } 981 987 ··· 1114 1120 1115 1121 i915_request_put(prev); 1116 1122 1117 - err = flush_test(i915, I915_WAIT_LOCKED); 1123 + err = igt_flush_test(i915, I915_WAIT_LOCKED); 1118 1124 if (err) 1119 1125 break; 1120 1126 } ··· 1226 1232 err = i915_subtests(tests, i915); 1227 1233 1228 1234 mutex_lock(&i915->drm.struct_mutex); 1229 - flush_test(i915, I915_WAIT_LOCKED); 1235 + igt_flush_test(i915, I915_WAIT_LOCKED); 1230 1236 mutex_unlock(&i915->drm.struct_mutex); 1231 1237 1232 1238 i915_modparams.enable_hangcheck = saved_hangcheck;

+16 -64

drivers/gpu/drm/i915/selftests/intel_lrc.c

··· 5 5 */ 6 6 7 7 #include "../i915_selftest.h" 8 + #include "igt_flush_test.h" 8 9 9 10 #include "mock_context.h" 10 11 ··· 169 168 return READ_ONCE(*seqno); 170 169 } 171 170 172 - struct wedge_me { 173 - struct delayed_work work; 174 - struct drm_i915_private *i915; 175 - const void *symbol; 176 - }; 177 - 178 - static void wedge_me(struct work_struct *work) 179 - { 180 - struct wedge_me *w = container_of(work, typeof(*w), work.work); 181 - 182 - pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); 183 - 184 - GEM_TRACE("%pS timed out.\n", w->symbol); 185 - GEM_TRACE_DUMP(); 186 - 187 - i915_gem_set_wedged(w->i915); 188 - } 189 - 190 - static void __init_wedge(struct wedge_me *w, 191 - struct drm_i915_private *i915, 192 - long timeout, 193 - const void *symbol) 194 - { 195 - w->i915 = i915; 196 - w->symbol = symbol; 197 - 198 - INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); 199 - schedule_delayed_work(&w->work, timeout); 200 - } 201 - 202 - static void __fini_wedge(struct wedge_me *w) 203 - { 204 - cancel_delayed_work_sync(&w->work); 205 - destroy_delayed_work_on_stack(&w->work); 206 - w->i915 = NULL; 207 - } 208 - 209 - #define wedge_on_timeout(W, DEV, TIMEOUT) \ 210 - for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ 211 - (W)->i915; \ 212 - __fini_wedge((W))) 213 - 214 - static noinline int 215 - flush_test(struct drm_i915_private *i915, unsigned int flags) 216 - { 217 - struct wedge_me w; 218 - 219 - cond_resched(); 220 - 221 - wedge_on_timeout(&w, i915, HZ) 222 - i915_gem_wait_for_idle(i915, flags); 223 - 224 - return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; 225 - } 226 - 227 171 static void spinner_end(struct spinner *spin) 228 172 { 229 173 *spin->batch = MI_BATCH_BUFFER_END; ··· 241 295 } 242 296 243 297 spinner_end(&spin); 244 - if (flush_test(i915, I915_WAIT_LOCKED)) { 298 + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { 245 299 err = -EIO; 246 300 goto err_ctx; 247 301 } ··· 253 307 err_spin: 254 308 spinner_fini(&spin); 255 309 err_unlock: 256 - flush_test(i915, I915_WAIT_LOCKED); 310 + igt_flush_test(i915, I915_WAIT_LOCKED); 257 311 mutex_unlock(&i915->drm.struct_mutex); 258 312 return err; 259 313 } ··· 281 335 ctx_hi = kernel_context(i915); 282 336 if (!ctx_hi) 283 337 goto err_spin_lo; 284 - ctx_hi->priority = I915_CONTEXT_MAX_USER_PRIORITY; 338 + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; 285 339 286 340 ctx_lo = kernel_context(i915); 287 341 if (!ctx_lo) 288 342 goto err_ctx_hi; 289 - ctx_lo->priority = I915_CONTEXT_MIN_USER_PRIORITY; 343 + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; 290 344 291 345 for_each_engine(engine, i915, id) { 292 346 struct i915_request *rq; ··· 326 380 327 381 spinner_end(&spin_hi); 328 382 spinner_end(&spin_lo); 329 - if (flush_test(i915, I915_WAIT_LOCKED)) { 383 + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { 330 384 err = -EIO; 331 385 goto err_ctx_lo; 332 386 } ··· 342 396 err_spin_hi: 343 397 spinner_fini(&spin_hi); 344 398 err_unlock: 345 - flush_test(i915, I915_WAIT_LOCKED); 399 + igt_flush_test(i915, I915_WAIT_LOCKED); 346 400 mutex_unlock(&i915->drm.struct_mutex); 347 401 return err; 348 402 } ··· 353 407 struct i915_gem_context *ctx_hi, *ctx_lo; 354 408 struct spinner spin_hi, spin_lo; 355 409 struct intel_engine_cs *engine; 410 + struct i915_sched_attr attr = {}; 356 411 enum intel_engine_id id; 357 412 int err = -ENOMEM; 358 413 ··· 405 458 goto err_wedged; 406 459 } 407 460 408 - engine->schedule(rq, I915_PRIORITY_MAX); 461 + attr.priority = I915_PRIORITY_MAX; 462 + engine->schedule(rq, &attr); 409 463 410 464 if (!wait_for_spinner(&spin_hi, rq)) { 411 465 pr_err("High priority context failed to preempt the low priority context\n"); ··· 416 468 417 469 spinner_end(&spin_hi); 418 470 spinner_end(&spin_lo); 419 - if (flush_test(i915, I915_WAIT_LOCKED)) { 471 + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { 420 472 err = -EIO; 421 473 goto err_ctx_lo; 422 474 } ··· 432 484 err_spin_hi: 433 485 spinner_fini(&spin_hi); 434 486 err_unlock: 435 - flush_test(i915, I915_WAIT_LOCKED); 487 + igt_flush_test(i915, I915_WAIT_LOCKED); 436 488 mutex_unlock(&i915->drm.struct_mutex); 437 489 return err; 438 490 ··· 451 503 SUBTEST(live_preempt), 452 504 SUBTEST(live_late_preempt), 453 505 }; 506 + 507 + if (!HAS_EXECLISTS(i915)) 508 + return 0; 509 + 454 510 return i915_subtests(tests, i915); 455 511 }

+291

drivers/gpu/drm/i915/selftests/intel_workarounds.c

··· 1 + /* 2 + * SPDX-License-Identifier: MIT 3 + * 4 + * Copyright © 2018 Intel Corporation 5 + */ 6 + 7 + #include "../i915_selftest.h" 8 + 9 + #include "mock_context.h" 10 + 11 + static struct drm_i915_gem_object * 12 + read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 13 + { 14 + struct drm_i915_gem_object *result; 15 + struct i915_request *rq; 16 + struct i915_vma *vma; 17 + const u32 base = engine->mmio_base; 18 + u32 srm, *cs; 19 + int err; 20 + int i; 21 + 22 + result = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); 23 + if (IS_ERR(result)) 24 + return result; 25 + 26 + i915_gem_object_set_cache_level(result, I915_CACHE_LLC); 27 + 28 + cs = i915_gem_object_pin_map(result, I915_MAP_WB); 29 + if (IS_ERR(cs)) { 30 + err = PTR_ERR(cs); 31 + goto err_obj; 32 + } 33 + memset(cs, 0xc5, PAGE_SIZE); 34 + i915_gem_object_unpin_map(result); 35 + 36 + vma = i915_vma_instance(result, &engine->i915->ggtt.base, NULL); 37 + if (IS_ERR(vma)) { 38 + err = PTR_ERR(vma); 39 + goto err_obj; 40 + } 41 + 42 + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 43 + if (err) 44 + goto err_obj; 45 + 46 + rq = i915_request_alloc(engine, ctx); 47 + if (IS_ERR(rq)) { 48 + err = PTR_ERR(rq); 49 + goto err_pin; 50 + } 51 + 52 + srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 53 + if (INTEL_GEN(ctx->i915) >= 8) 54 + srm++; 55 + 56 + cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS); 57 + if (IS_ERR(cs)) { 58 + err = PTR_ERR(cs); 59 + goto err_req; 60 + } 61 + 62 + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { 63 + *cs++ = srm; 64 + *cs++ = i915_mmio_reg_offset(RING_FORCE_TO_NONPRIV(base, i)); 65 + *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; 66 + *cs++ = 0; 67 + } 68 + intel_ring_advance(rq, cs); 69 + 70 + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 71 + reservation_object_lock(vma->resv, NULL); 72 + reservation_object_add_excl_fence(vma->resv, &rq->fence); 73 + reservation_object_unlock(vma->resv); 74 + 75 + i915_gem_object_get(result); 76 + i915_gem_object_set_active_reference(result); 77 + 78 + __i915_request_add(rq, true); 79 + i915_vma_unpin(vma); 80 + 81 + return result; 82 + 83 + err_req: 84 + i915_request_add(rq); 85 + err_pin: 86 + i915_vma_unpin(vma); 87 + err_obj: 88 + i915_gem_object_put(result); 89 + return ERR_PTR(err); 90 + } 91 + 92 + static u32 get_whitelist_reg(const struct whitelist *w, unsigned int i) 93 + { 94 + return i < w->count ? i915_mmio_reg_offset(w->reg[i]) : w->nopid; 95 + } 96 + 97 + static void print_results(const struct whitelist *w, const u32 *results) 98 + { 99 + unsigned int i; 100 + 101 + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { 102 + u32 expected = get_whitelist_reg(w, i); 103 + u32 actual = results[i]; 104 + 105 + pr_info("RING_NONPRIV[%d]: expected 0x%08x, found 0x%08x\n", 106 + i, expected, actual); 107 + } 108 + } 109 + 110 + static int check_whitelist(const struct whitelist *w, 111 + struct i915_gem_context *ctx, 112 + struct intel_engine_cs *engine) 113 + { 114 + struct drm_i915_gem_object *results; 115 + u32 *vaddr; 116 + int err; 117 + int i; 118 + 119 + results = read_nonprivs(ctx, engine); 120 + if (IS_ERR(results)) 121 + return PTR_ERR(results); 122 + 123 + err = i915_gem_object_set_to_cpu_domain(results, false); 124 + if (err) 125 + goto out_put; 126 + 127 + vaddr = i915_gem_object_pin_map(results, I915_MAP_WB); 128 + if (IS_ERR(vaddr)) { 129 + err = PTR_ERR(vaddr); 130 + goto out_put; 131 + } 132 + 133 + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { 134 + u32 expected = get_whitelist_reg(w, i); 135 + u32 actual = vaddr[i]; 136 + 137 + if (expected != actual) { 138 + print_results(w, vaddr); 139 + pr_err("Invalid RING_NONPRIV[%d], expected 0x%08x, found 0x%08x\n", 140 + i, expected, actual); 141 + 142 + err = -EINVAL; 143 + break; 144 + } 145 + } 146 + 147 + i915_gem_object_unpin_map(results); 148 + out_put: 149 + i915_gem_object_put(results); 150 + return err; 151 + } 152 + 153 + static int do_device_reset(struct intel_engine_cs *engine) 154 + { 155 + i915_reset(engine->i915, ENGINE_MASK(engine->id), NULL); 156 + return 0; 157 + } 158 + 159 + static int do_engine_reset(struct intel_engine_cs *engine) 160 + { 161 + return i915_reset_engine(engine, NULL); 162 + } 163 + 164 + static int switch_to_scratch_context(struct intel_engine_cs *engine) 165 + { 166 + struct i915_gem_context *ctx; 167 + struct i915_request *rq; 168 + 169 + ctx = kernel_context(engine->i915); 170 + if (IS_ERR(ctx)) 171 + return PTR_ERR(ctx); 172 + 173 + rq = i915_request_alloc(engine, ctx); 174 + kernel_context_close(ctx); 175 + if (IS_ERR(rq)) 176 + return PTR_ERR(rq); 177 + 178 + i915_request_add(rq); 179 + 180 + return 0; 181 + } 182 + 183 + static int check_whitelist_across_reset(struct intel_engine_cs *engine, 184 + int (*reset)(struct intel_engine_cs *), 185 + const struct whitelist *w, 186 + const char *name) 187 + { 188 + struct i915_gem_context *ctx; 189 + int err; 190 + 191 + ctx = kernel_context(engine->i915); 192 + if (IS_ERR(ctx)) 193 + return PTR_ERR(ctx); 194 + 195 + err = check_whitelist(w, ctx, engine); 196 + if (err) { 197 + pr_err("Invalid whitelist *before* %s reset!\n", name); 198 + goto out; 199 + } 200 + 201 + err = switch_to_scratch_context(engine); 202 + if (err) 203 + goto out; 204 + 205 + err = reset(engine); 206 + if (err) { 207 + pr_err("%s reset failed\n", name); 208 + goto out; 209 + } 210 + 211 + err = check_whitelist(w, ctx, engine); 212 + if (err) { 213 + pr_err("Whitelist not preserved in context across %s reset!\n", 214 + name); 215 + goto out; 216 + } 217 + 218 + kernel_context_close(ctx); 219 + 220 + ctx = kernel_context(engine->i915); 221 + if (IS_ERR(ctx)) 222 + return PTR_ERR(ctx); 223 + 224 + err = check_whitelist(w, ctx, engine); 225 + if (err) { 226 + pr_err("Invalid whitelist *after* %s reset in fresh context!\n", 227 + name); 228 + goto out; 229 + } 230 + 231 + out: 232 + kernel_context_close(ctx); 233 + return err; 234 + } 235 + 236 + static int live_reset_whitelist(void *arg) 237 + { 238 + struct drm_i915_private *i915 = arg; 239 + struct intel_engine_cs *engine = i915->engine[RCS]; 240 + struct i915_gpu_error *error = &i915->gpu_error; 241 + struct whitelist w; 242 + int err = 0; 243 + 244 + /* If we reset the gpu, we should not lose the RING_NONPRIV */ 245 + 246 + if (!engine) 247 + return 0; 248 + 249 + if (!whitelist_build(engine, &w)) 250 + return 0; 251 + 252 + pr_info("Checking %d whitelisted registers (RING_NONPRIV)\n", w.count); 253 + 254 + set_bit(I915_RESET_BACKOFF, &error->flags); 255 + set_bit(I915_RESET_ENGINE + engine->id, &error->flags); 256 + 257 + if (intel_has_reset_engine(i915)) { 258 + err = check_whitelist_across_reset(engine, 259 + do_engine_reset, &w, 260 + "engine"); 261 + if (err) 262 + goto out; 263 + } 264 + 265 + if (intel_has_gpu_reset(i915)) { 266 + err = check_whitelist_across_reset(engine, 267 + do_device_reset, &w, 268 + "device"); 269 + if (err) 270 + goto out; 271 + } 272 + 273 + out: 274 + clear_bit(I915_RESET_ENGINE + engine->id, &error->flags); 275 + clear_bit(I915_RESET_BACKOFF, &error->flags); 276 + return err; 277 + } 278 + 279 + int intel_workarounds_live_selftests(struct drm_i915_private *i915) 280 + { 281 + static const struct i915_subtest tests[] = { 282 + SUBTEST(live_reset_whitelist), 283 + }; 284 + int err; 285 + 286 + mutex_lock(&i915->drm.struct_mutex); 287 + err = i915_subtests(tests, i915); 288 + mutex_unlock(&i915->drm.struct_mutex); 289 + 290 + return err; 291 + }

+47 -20

drivers/gpu/drm/i915/selftests/mock_engine.c

··· 25 25 #include "mock_engine.h" 26 26 #include "mock_request.h" 27 27 28 + struct mock_ring { 29 + struct intel_ring base; 30 + struct i915_timeline timeline; 31 + }; 32 + 28 33 static struct mock_request *first_request(struct mock_engine *engine) 29 34 { 30 35 return list_first_entry_or_null(&engine->hw_queue, ··· 76 71 mock_context_pin(struct intel_engine_cs *engine, 77 72 struct i915_gem_context *ctx) 78 73 { 79 - i915_gem_context_get(ctx); 74 + struct intel_context *ce = to_intel_context(ctx, engine); 75 + 76 + if (!ce->pin_count++) 77 + i915_gem_context_get(ctx); 78 + 80 79 return engine->buffer; 81 80 } 82 81 83 82 static void mock_context_unpin(struct intel_engine_cs *engine, 84 83 struct i915_gem_context *ctx) 85 84 { 86 - i915_gem_context_put(ctx); 85 + struct intel_context *ce = to_intel_context(ctx, engine); 86 + 87 + if (!--ce->pin_count) 88 + i915_gem_context_put(ctx); 87 89 } 88 90 89 91 static int mock_request_alloc(struct i915_request *request) ··· 137 125 static struct intel_ring *mock_ring(struct intel_engine_cs *engine) 138 126 { 139 127 const unsigned long sz = PAGE_SIZE / 2; 140 - struct intel_ring *ring; 128 + struct mock_ring *ring; 141 129 142 130 BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz); 143 131 ··· 145 133 if (!ring) 146 134 return NULL; 147 135 148 - ring->size = sz; 149 - ring->effective_size = sz; 150 - ring->vaddr = (void *)(ring + 1); 136 + i915_timeline_init(engine->i915, &ring->timeline, engine->name); 151 137 152 - INIT_LIST_HEAD(&ring->request_list); 153 - intel_ring_update_space(ring); 138 + ring->base.size = sz; 139 + ring->base.effective_size = sz; 140 + ring->base.vaddr = (void *)(ring + 1); 141 + ring->base.timeline = &ring->timeline; 154 142 155 - return ring; 143 + INIT_LIST_HEAD(&ring->base.request_list); 144 + intel_ring_update_space(&ring->base); 145 + 146 + return &ring->base; 147 + } 148 + 149 + static void mock_ring_free(struct intel_ring *base) 150 + { 151 + struct mock_ring *ring = container_of(base, typeof(*ring), base); 152 + 153 + i915_timeline_fini(&ring->timeline); 154 + kfree(ring); 156 155 } 157 156 158 157 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, ··· 178 155 if (!engine) 179 156 return NULL; 180 157 181 - engine->base.buffer = mock_ring(&engine->base); 182 - if (!engine->base.buffer) { 183 - kfree(engine); 184 - return NULL; 185 - } 186 - 187 158 /* minimal engine setup for requests */ 188 159 engine->base.i915 = i915; 189 160 snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); ··· 191 174 engine->base.emit_breadcrumb = mock_emit_breadcrumb; 192 175 engine->base.submit_request = mock_submit_request; 193 176 194 - engine->base.timeline = 195 - &i915->gt.global_timeline.engine[engine->base.id]; 196 - 177 + i915_timeline_init(i915, &engine->base.timeline, engine->base.name); 197 178 intel_engine_init_breadcrumbs(&engine->base); 198 179 engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ 199 180 ··· 200 185 timer_setup(&engine->hw_delay, hw_delay_complete, 0); 201 186 INIT_LIST_HEAD(&engine->hw_queue); 202 187 188 + engine->base.buffer = mock_ring(&engine->base); 189 + if (!engine->base.buffer) 190 + goto err_breadcrumbs; 191 + 203 192 return &engine->base; 193 + 194 + err_breadcrumbs: 195 + intel_engine_fini_breadcrumbs(&engine->base); 196 + i915_timeline_fini(&engine->base.timeline); 197 + kfree(engine); 198 + return NULL; 204 199 } 205 200 206 201 void mock_engine_flush(struct intel_engine_cs *engine) ··· 242 217 GEM_BUG_ON(timer_pending(&mock->hw_delay)); 243 218 244 219 if (engine->last_retired_context) 245 - engine->context_unpin(engine, engine->last_retired_context); 220 + intel_context_unpin(engine->last_retired_context, engine); 221 + 222 + mock_ring_free(engine->buffer); 246 223 247 224 intel_engine_fini_breadcrumbs(engine); 225 + i915_timeline_fini(&engine->timeline); 248 226 249 - kfree(engine->buffer); 250 227 kfree(engine); 251 228 }

+11 -10

drivers/gpu/drm/i915/selftests/mock_gem_device.c

··· 44 44 mock_engine_flush(engine); 45 45 46 46 i915_retire_requests(i915); 47 + GEM_BUG_ON(i915->gt.active_requests); 47 48 } 48 49 49 50 static void mock_device_release(struct drm_device *dev) ··· 73 72 74 73 mutex_lock(&i915->drm.struct_mutex); 75 74 mock_fini_ggtt(i915); 76 - i915_gem_timeline_fini(&i915->gt.global_timeline); 77 75 mutex_unlock(&i915->drm.struct_mutex); 76 + WARN_ON(!list_empty(&i915->gt.timelines)); 78 77 79 78 destroy_workqueue(i915->wq); 80 79 ··· 224 223 if (!i915->priorities) 225 224 goto err_dependencies; 226 225 227 - mutex_lock(&i915->drm.struct_mutex); 228 226 INIT_LIST_HEAD(&i915->gt.timelines); 229 - err = i915_gem_timeline_init__global(i915); 230 - if (err) { 231 - mutex_unlock(&i915->drm.struct_mutex); 232 - goto err_priorities; 233 - } 227 + INIT_LIST_HEAD(&i915->gt.active_rings); 228 + INIT_LIST_HEAD(&i915->gt.closed_vma); 229 + 230 + mutex_lock(&i915->drm.struct_mutex); 234 231 235 232 mock_init_ggtt(i915); 236 - mutex_unlock(&i915->drm.struct_mutex); 237 233 238 234 mkwrite_device_info(i915)->ring_mask = BIT(0); 239 235 i915->engine[RCS] = mock_engine(i915, "mock", RCS); 240 236 if (!i915->engine[RCS]) 241 - goto err_priorities; 237 + goto err_unlock; 242 238 243 239 i915->kernel_context = mock_context(i915, NULL); 244 240 if (!i915->kernel_context) 245 241 goto err_engine; 242 + 243 + mutex_unlock(&i915->drm.struct_mutex); 246 244 247 245 WARN_ON(i915_gemfs_init(i915)); 248 246 ··· 250 250 err_engine: 251 251 for_each_engine(engine, i915, id) 252 252 mock_engine_free(engine); 253 - err_priorities: 253 + err_unlock: 254 + mutex_unlock(&i915->drm.struct_mutex); 254 255 kmem_cache_destroy(i915->priorities); 255 256 err_dependencies: 256 257 kmem_cache_destroy(i915->dependencies);

-1

drivers/gpu/drm/i915/selftests/mock_gtt.c

··· 76 76 77 77 INIT_LIST_HEAD(&ppgtt->base.global_link); 78 78 drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total); 79 - i915_gem_timeline_init(i915, &ppgtt->base.timeline, name); 80 79 81 80 ppgtt->base.clear_range = nop_clear_range; 82 81 ppgtt->base.insert_page = mock_insert_page;

+14 -31

drivers/gpu/drm/i915/selftests/mock_timeline.c

··· 1 1 /* 2 - * Copyright © 2017 Intel Corporation 2 + * SPDX-License-Identifier: MIT 3 3 * 4 - * Permission is hereby granted, free of charge, to any person obtaining a 5 - * copy of this software and associated documentation files (the "Software"), 6 - * to deal in the Software without restriction, including without limitation 7 - * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 - * and/or sell copies of the Software, and to permit persons to whom the 9 - * Software is furnished to do so, subject to the following conditions: 10 - * 11 - * The above copyright notice and this permission notice (including the next 12 - * paragraph) shall be included in all copies or substantial portions of the 13 - * Software. 14 - * 15 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 - * IN THE SOFTWARE. 22 - * 4 + * Copyright © 2017-2018 Intel Corporation 23 5 */ 6 + 7 + #include "../i915_timeline.h" 24 8 25 9 #include "mock_timeline.h" 26 10 27 - struct intel_timeline *mock_timeline(u64 context) 11 + void mock_timeline_init(struct i915_timeline *timeline, u64 context) 28 12 { 29 - static struct lock_class_key class; 30 - struct intel_timeline *tl; 13 + timeline->fence_context = context; 31 14 32 - tl = kzalloc(sizeof(*tl), GFP_KERNEL); 33 - if (!tl) 34 - return NULL; 15 + spin_lock_init(&timeline->lock); 35 16 36 - __intel_timeline_init(tl, NULL, context, &class, "mock"); 17 + init_request_active(&timeline->last_request, NULL); 18 + INIT_LIST_HEAD(&timeline->requests); 37 19 38 - return tl; 20 + i915_syncmap_init(&timeline->sync); 21 + 22 + INIT_LIST_HEAD(&timeline->link); 39 23 } 40 24 41 - void mock_timeline_destroy(struct intel_timeline *tl) 25 + void mock_timeline_fini(struct i915_timeline *timeline) 42 26 { 43 - __intel_timeline_fini(tl); 44 - kfree(tl); 27 + i915_timeline_fini(timeline); 45 28 }

+5 -23

drivers/gpu/drm/i915/selftests/mock_timeline.h

··· 1 1 /* 2 - * Copyright © 2017 Intel Corporation 2 + * SPDX-License-Identifier: MIT 3 3 * 4 - * Permission is hereby granted, free of charge, to any person obtaining a 5 - * copy of this software and associated documentation files (the "Software"), 6 - * to deal in the Software without restriction, including without limitation 7 - * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 - * and/or sell copies of the Software, and to permit persons to whom the 9 - * Software is furnished to do so, subject to the following conditions: 10 - * 11 - * The above copyright notice and this permission notice (including the next 12 - * paragraph) shall be included in all copies or substantial portions of the 13 - * Software. 14 - * 15 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 - * IN THE SOFTWARE. 22 - * 4 + * Copyright © 2017-2018 Intel Corporation 23 5 */ 24 6 25 7 #ifndef __MOCK_TIMELINE__ 26 8 #define __MOCK_TIMELINE__ 27 9 28 - #include "../i915_gem_timeline.h" 10 + struct i915_timeline; 29 11 30 - struct intel_timeline *mock_timeline(u64 context); 31 - void mock_timeline_destroy(struct intel_timeline *tl); 12 + void mock_timeline_init(struct i915_timeline *timeline, u64 context); 13 + void mock_timeline_fini(struct i915_timeline *timeline); 32 14 33 15 #endif /* !__MOCK_TIMELINE__ */

+1

include/drm/i915_pciids.h

··· 349 349 #define INTEL_KBL_GT2_IDS(info) \ 350 350 INTEL_VGA_DEVICE(0x5916, info), /* ULT GT2 */ \ 351 351 INTEL_VGA_DEVICE(0x5917, info), /* Mobile GT2 */ \ 352 + INTEL_VGA_DEVICE(0x591C, info), /* ULX GT2 */ \ 352 353 INTEL_VGA_DEVICE(0x5921, info), /* ULT GT2F */ \ 353 354 INTEL_VGA_DEVICE(0x591E, info), /* ULX GT2 */ \ 354 355 INTEL_VGA_DEVICE(0x5912, info), /* DT GT2 */ \