Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'drm-fixes-2020-06-19' of git://anongit.freedesktop.org/drm/drm

Pull drm fixes from Dave Airlie:
"Just i915 and amd here.

i915 has some workaround movement so they get applied at the right
times, and a timeslicing fix, along with some display fixes.

AMD has a few display floating point fix and a devcgroup fix for
amdkfd.

i915:
- Fix for timeslicing and virtual engines/unpremptable requests (+ 1
dependency patch)
- Fixes into TypeC register programming and interrupt storm detecting
- Disable DIP on MST ports with the transcoder clock still on
- Avoid missing GT workarounds at reset for HSW and older gens
- Fix for unwinding multiple requests missing force restore
- Fix encoder type check for DDI vswing sequence
- Build warning fixes

amdgpu:
- Fix kvfree/kfree mixup
- Fix hawaii device id in powertune configuration
- Display FP fixes
- Documentation fixes

amdkfd:
- devcgroup check fix"

* tag 'drm-fixes-2020-06-19' of git://anongit.freedesktop.org/drm/drm: (23 commits)
drm/amdgpu: fix documentation around busy_percentage
drm/amdgpu/pm: update comment to clarify Overdrive interfaces
drm/amdkfd: Use correct major in devcgroup check
drm/i915/display: Fix the encoder type check
drm/i915/icl+: Fix hotplug interrupt disabling after storm detection
drm/i915/gt: Move gen4 GT workarounds from init_clock_gating to workarounds
drm/i915/gt: Move ilk GT workarounds from init_clock_gating to workarounds
drm/i915/gt: Move snb GT workarounds from init_clock_gating to workarounds
drm/i915/gt: Move vlv GT workarounds from init_clock_gating to workarounds
drm/i915/gt: Move ivb GT workarounds from init_clock_gating to workarounds
drm/i915/gt: Move hsw GT workarounds from init_clock_gating to workarounds
drm/i915/icl: Disable DIP on MST ports with the transcoder clock still on
drm/i915/gt: Incrementally check for rewinding
drm/i915/tc: fix the reset of ln0
drm/i915/gt: Prevent timeslicing into unpreemptable requests
drm/i915/selftests: Restore to default heartbeat
drm/i915: work around false-positive maybe-uninitialized warning
drm/i915/pmu: avoid an maybe-uninitialized warning
drm/i915/gt: Incorporate the virtual engine into timeslicing
drm/amd/display: Rework dsc to isolate FPU operations
...

+834 -432
+6 -3
Documentation/gpu/amdgpu.rst
··· 197 197 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 198 198 :doc: pp_power_profile_mode 199 199 200 - busy_percent 201 - ~~~~~~~~~~~~ 200 + *_busy_percent 201 + ~~~~~~~~~~~~~~ 202 202 203 203 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 204 - :doc: busy_percent 204 + :doc: gpu_busy_percent 205 + 206 + .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 207 + :doc: mem_busy_percent 205 208 206 209 GPU Product Information 207 210 =======================
+2 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
··· 696 696 * default power levels, write "r" (reset) to the file to reset them. 697 697 * 698 698 * 699 - * < For Vega20 > 699 + * < For Vega20 and newer ASICs > 700 700 * 701 701 * Reading the file will display: 702 702 * ··· 1668 1668 } 1669 1669 1670 1670 /** 1671 - * DOC: busy_percent 1671 + * DOC: gpu_busy_percent 1672 1672 * 1673 1673 * The amdgpu driver provides a sysfs API for reading how busy the GPU 1674 1674 * is as a percentage. The file gpu_busy_percent is used for this.
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 40 40 #include <drm/drm_file.h> 41 41 #include <drm/drm_drv.h> 42 42 #include <drm/drm_device.h> 43 + #include <drm/drm_ioctl.h> 43 44 #include <kgd_kfd_interface.h> 44 45 #include <linux/swap.h> 45 46 ··· 1077 1076 #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) 1078 1077 struct drm_device *ddev = kfd->ddev; 1079 1078 1080 - return devcgroup_check_permission(DEVCG_DEV_CHAR, ddev->driver->major, 1079 + return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR, 1081 1080 ddev->render->index, 1082 1081 DEVCG_ACC_WRITE | DEVCG_ACC_READ); 1083 1082 #else
-2
drivers/gpu/drm/amd/display/dc/dsc/Makefile
··· 28 28 endif 29 29 30 30 CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_ccflags) 31 - CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc_dpi.o := $(dsc_ccflags) 32 - CFLAGS_$(AMDDALPATH)/dc/dsc/dc_dsc.o := $(dsc_ccflags) 33 31 34 32 DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o 35 33
+2 -16
drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
··· 22 22 * Author: AMD 23 23 */ 24 24 25 + #include <drm/drm_dsc.h> 25 26 #include "dc_hw_types.h" 26 27 #include "dsc.h" 27 28 #include <drm/drm_dp_helper.h> 28 29 #include "dc.h" 30 + #include "rc_calc.h" 29 31 30 32 /* This module's internal functions */ 31 33 ··· 304 302 static inline uint32_t dsc_div_by_10_round_up(uint32_t value) 305 303 { 306 304 return (value + 9) / 10; 307 - } 308 - 309 - static inline uint32_t calc_dsc_bpp_x16(uint32_t stream_bandwidth_kbps, uint32_t pix_clk_100hz, uint32_t bpp_increment_div) 310 - { 311 - uint32_t dsc_target_bpp_x16; 312 - float f_dsc_target_bpp; 313 - float f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f; 314 - uint32_t precision = bpp_increment_div; // bpp_increment_div is actually precision 315 - 316 - f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz; 317 - 318 - // Round down to the nearest precision stop to bring it into DSC spec range 319 - dsc_target_bpp_x16 = (uint32_t)(f_dsc_target_bpp * precision); 320 - dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision; 321 - 322 - return dsc_target_bpp_x16; 323 305 } 324 306 325 307 /* Get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range, and timing's pixel clock
+145 -6
drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
··· 23 23 * Authors: AMD 24 24 * 25 25 */ 26 + #include <drm/drm_dsc.h> 26 27 27 28 #include "os_types.h" 28 29 #include "rc_calc.h" ··· 41 40 break 42 41 43 42 44 - void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum max_min max_min, float bpp) 43 + static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, 44 + enum max_min max_min, float bpp) 45 45 { 46 46 int mode = MODE_SELECT(444, 422, 420); 47 47 int sel = table_hash(mode, bpc, max_min); ··· 87 85 memcpy(qps, table[index].qps, sizeof(qp_set)); 88 86 } 89 87 90 - double dsc_roundf(double num) 88 + static double dsc_roundf(double num) 91 89 { 92 90 if (num < 0.0) 93 91 num = num - 0.5; ··· 97 95 return (int)(num); 98 96 } 99 97 100 - double dsc_ceil(double num) 98 + static double dsc_ceil(double num) 101 99 { 102 100 double retval = (int)num; 103 101 ··· 107 105 return (int)retval; 108 106 } 109 107 110 - void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp) 108 + static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp) 111 109 { 112 110 int *p = ofs; 113 111 ··· 162 160 } 163 161 } 164 162 165 - int median3(int a, int b, int c) 163 + static int median3(int a, int b, int c) 166 164 { 167 165 if (a > b) 168 166 swap(a, b); ··· 174 172 return b; 175 173 } 176 174 177 - void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version) 175 + static void _do_calc_rc_params(struct rc_params *rc, enum colour_mode cm, 176 + enum bits_per_comp bpc, u8 drm_bpp, 177 + bool is_navite_422_or_420, 178 + int slice_width, int slice_height, 179 + int minor_version) 178 180 { 181 + float bpp; 179 182 float bpp_group; 180 183 float initial_xmit_delay_factor; 181 184 int padding_pixels; 182 185 int i; 186 + 187 + bpp = ((float)drm_bpp / 16.0); 188 + /* in native_422 or native_420 modes, the bits_per_pixel is double the 189 + * target bpp (the latter is what calc_rc_params expects) 190 + */ 191 + if (is_navite_422_or_420) 192 + bpp /= 2.0; 183 193 184 194 rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); 185 195 rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); ··· 265 251 rc->rc_buf_thresh[13] = 8064; 266 252 } 267 253 254 + static u32 _do_bytes_per_pixel_calc(int slice_width, u8 drm_bpp, 255 + bool is_navite_422_or_420) 256 + { 257 + float bpp; 258 + u32 bytes_per_pixel; 259 + double d_bytes_per_pixel; 260 + 261 + bpp = ((float)drm_bpp / 16.0); 262 + d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width; 263 + // TODO: Make sure the formula for calculating this is precise (ceiling 264 + // vs. floor, and at what point they should be applied) 265 + if (is_navite_422_or_420) 266 + d_bytes_per_pixel /= 2; 267 + 268 + bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000); 269 + 270 + return bytes_per_pixel; 271 + } 272 + 273 + static u32 _do_calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, 274 + u32 bpp_increment_div) 275 + { 276 + u32 dsc_target_bpp_x16; 277 + float f_dsc_target_bpp; 278 + float f_stream_bandwidth_100bps; 279 + // bpp_increment_div is actually precision 280 + u32 precision = bpp_increment_div; 281 + 282 + f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f; 283 + f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz; 284 + 285 + // Round down to the nearest precision stop to bring it into DSC spec 286 + // range 287 + dsc_target_bpp_x16 = (u32)(f_dsc_target_bpp * precision); 288 + dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision; 289 + 290 + return dsc_target_bpp_x16; 291 + } 292 + 293 + /** 294 + * calc_rc_params - reads the user's cmdline mode 295 + * @rc: DC internal DSC parameters 296 + * @pps: DRM struct with all required DSC values 297 + * 298 + * This function expects a drm_dsc_config data struct with all the required DSC 299 + * values previously filled out by our driver and based on this information it 300 + * computes some of the DSC values. 301 + * 302 + * @note This calculation requires float point operation, most of it executes 303 + * under kernel_fpu_{begin,end}. 304 + */ 305 + void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps) 306 + { 307 + enum colour_mode mode; 308 + enum bits_per_comp bpc; 309 + bool is_navite_422_or_420; 310 + u8 drm_bpp = pps->bits_per_pixel; 311 + int slice_width = pps->slice_width; 312 + int slice_height = pps->slice_height; 313 + 314 + mode = pps->convert_rgb ? CM_RGB : (pps->simple_422 ? CM_444 : 315 + (pps->native_422 ? CM_422 : 316 + pps->native_420 ? CM_420 : CM_444)); 317 + bpc = (pps->bits_per_component == 8) ? BPC_8 : (pps->bits_per_component == 10) 318 + ? BPC_10 : BPC_12; 319 + 320 + is_navite_422_or_420 = pps->native_422 || pps->native_420; 321 + 322 + DC_FP_START(); 323 + _do_calc_rc_params(rc, mode, bpc, drm_bpp, is_navite_422_or_420, 324 + slice_width, slice_height, 325 + pps->dsc_version_minor); 326 + DC_FP_END(); 327 + } 328 + 329 + /** 330 + * calc_dsc_bytes_per_pixel - calculate bytes per pixel 331 + * @pps: DRM struct with all required DSC values 332 + * 333 + * Based on the information inside drm_dsc_config, this function calculates the 334 + * total of bytes per pixel. 335 + * 336 + * @note This calculation requires float point operation, most of it executes 337 + * under kernel_fpu_{begin,end}. 338 + * 339 + * Return: 340 + * Return the number of bytes per pixel 341 + */ 342 + u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps) 343 + 344 + { 345 + u32 ret; 346 + u8 drm_bpp = pps->bits_per_pixel; 347 + int slice_width = pps->slice_width; 348 + bool is_navite_422_or_420 = pps->native_422 || pps->native_420; 349 + 350 + DC_FP_START(); 351 + ret = _do_bytes_per_pixel_calc(slice_width, drm_bpp, 352 + is_navite_422_or_420); 353 + DC_FP_END(); 354 + return ret; 355 + } 356 + 357 + /** 358 + * calc_dsc_bpp_x16 - retrieve the dsc bits per pixel 359 + * @stream_bandwidth_kbps: 360 + * @pix_clk_100hz: 361 + * @bpp_increment_div: 362 + * 363 + * Calculate the total of bits per pixel for DSC configuration. 364 + * 365 + * @note This calculation requires float point operation, most of it executes 366 + * under kernel_fpu_{begin,end}. 367 + */ 368 + u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, 369 + u32 bpp_increment_div) 370 + { 371 + u32 dsc_bpp; 372 + 373 + DC_FP_START(); 374 + dsc_bpp = _do_calc_dsc_bpp_x16(stream_bandwidth_kbps, pix_clk_100hz, 375 + bpp_increment_div); 376 + DC_FP_END(); 377 + return dsc_bpp; 378 + }
+4 -1
drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h
··· 77 77 78 78 typedef struct qp_entry qp_table[]; 79 79 80 - void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version); 80 + void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps); 81 + u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps); 82 + u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, 83 + u32 bpp_increment_div); 81 84 82 85 #endif 83 86
+2 -25
drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
··· 27 27 #include "dscc_types.h" 28 28 #include "rc_calc.h" 29 29 30 - double dsc_ceil(double num); 31 - 32 30 static void copy_pps_fields(struct drm_dsc_config *to, const struct drm_dsc_config *from) 33 31 { 34 32 to->line_buf_depth = from->line_buf_depth; ··· 98 100 99 101 int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps, struct dsc_parameters *dsc_params) 100 102 { 101 - enum colour_mode mode = pps->convert_rgb ? CM_RGB : 102 - (pps->simple_422 ? CM_444 : 103 - (pps->native_422 ? CM_422 : 104 - pps->native_420 ? CM_420 : CM_444)); 105 - enum bits_per_comp bpc = (pps->bits_per_component == 8) ? BPC_8 : 106 - (pps->bits_per_component == 10) ? BPC_10 : BPC_12; 107 - float bpp = ((float) pps->bits_per_pixel / 16.0); 108 - int slice_width = pps->slice_width; 109 - int slice_height = pps->slice_height; 110 103 int ret; 111 104 struct rc_params rc; 112 105 struct drm_dsc_config dsc_cfg; 113 106 114 - double d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width; 107 + dsc_params->bytes_per_pixel = calc_dsc_bytes_per_pixel(pps); 115 108 116 - // TODO: Make sure the formula for calculating this is precise (ceiling vs. floor, and at what point they should be applied) 117 - if (pps->native_422 || pps->native_420) 118 - d_bytes_per_pixel /= 2; 119 - 120 - dsc_params->bytes_per_pixel = (uint32_t)dsc_ceil(d_bytes_per_pixel * 0x10000000); 121 - 122 - /* in native_422 or native_420 modes, the bits_per_pixel is double the target bpp 123 - * (the latter is what calc_rc_params expects) 124 - */ 125 - if (pps->native_422 || pps->native_420) 126 - bpp /= 2.0; 127 - 128 - calc_rc_params(&rc, mode, bpc, bpp, slice_width, slice_height, pps->dsc_version_minor); 109 + calc_rc_params(&rc, pps); 129 110 dsc_params->pps = *pps; 130 111 dsc_params->pps.initial_scale_value = 8 * rc.rc_model_size / (rc.rc_model_size - rc.initial_fullness_offset); 131 112
+2 -2
drivers/gpu/drm/amd/display/modules/color/color_gamma.c
··· 843 843 pow_buffer_ptr = -1; // reset back to no optimize 844 844 ret = true; 845 845 release: 846 - kfree(coeff); 846 + kvfree(coeff); 847 847 return ret; 848 848 } 849 849 ··· 1777 1777 1778 1778 kfree(rgb_regamma); 1779 1779 rgb_regamma_alloc_fail: 1780 - kvfree(rgb_user); 1780 + kfree(rgb_user); 1781 1781 rgb_user_alloc_fail: 1782 1782 return ret; 1783 1783 }
+1 -1
drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
··· 239 239 240 240 switch (dev_id) { 241 241 case 0x67BA: 242 - case 0x66B1: 242 + case 0x67B1: 243 243 smu_data->power_tune_defaults = &defaults_hawaii_pro; 244 244 break; 245 245 case 0x67B8:
+7 -5
drivers/gpu/drm/i915/display/intel_ddi.c
··· 2579 2579 2580 2580 static void 2581 2581 tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, 2582 - u32 level) 2582 + u32 level, enum intel_output_type type) 2583 2583 { 2584 2584 struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); 2585 2585 enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); 2586 2586 const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations; 2587 2587 u32 n_entries, val, ln, dpcnt_mask, dpcnt_val; 2588 2588 2589 - if (encoder->type == INTEL_OUTPUT_HDMI) { 2589 + if (type == INTEL_OUTPUT_HDMI) { 2590 2590 n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); 2591 2591 ddi_translations = tgl_dkl_phy_hdmi_ddi_trans; 2592 2592 } else { ··· 2638 2638 if (intel_phy_is_combo(dev_priv, phy)) 2639 2639 icl_combo_phy_ddi_vswing_sequence(encoder, level, type); 2640 2640 else 2641 - tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level); 2641 + tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level, type); 2642 2642 } 2643 2643 2644 2644 static u32 translate_signal_level(struct intel_dp *intel_dp, int signal_levels) ··· 2987 2987 ln1 = intel_de_read(dev_priv, MG_DP_MODE(1, tc_port)); 2988 2988 } 2989 2989 2990 - ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X1_MODE); 2990 + ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); 2991 2991 ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); 2992 2992 2993 2993 /* DPPATC */ ··· 3472 3472 INTEL_OUTPUT_DP_MST); 3473 3473 enum phy phy = intel_port_to_phy(dev_priv, encoder->port); 3474 3474 3475 - intel_dp_set_infoframes(encoder, false, old_crtc_state, old_conn_state); 3475 + if (!is_mst) 3476 + intel_dp_set_infoframes(encoder, false, 3477 + old_crtc_state, old_conn_state); 3476 3478 3477 3479 /* 3478 3480 * Power down sink before disabling the port, otherwise we end
+8
drivers/gpu/drm/i915/display/intel_dp_mst.c
··· 397 397 */ 398 398 drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, 399 399 false); 400 + 401 + /* 402 + * BSpec 4287: disable DIP after the transcoder is disabled and before 403 + * the transcoder clock select is set to none. 404 + */ 405 + if (last_mst_stream) 406 + intel_dp_set_infoframes(&intel_dig_port->base, false, 407 + old_crtc_state, NULL); 400 408 /* 401 409 * From TGL spec: "If multi-stream slave transcoder: Configure 402 410 * Transcoder Clock Select to direct no clock to the transcoder"
+3 -1
drivers/gpu/drm/i915/gt/intel_engine_cs.c
··· 646 646 struct measure_breadcrumb { 647 647 struct i915_request rq; 648 648 struct intel_ring ring; 649 - u32 cs[1024]; 649 + u32 cs[2048]; 650 650 }; 651 651 652 652 static int measure_breadcrumb_dw(struct intel_context *ce) ··· 668 668 669 669 frame->ring.vaddr = frame->cs; 670 670 frame->ring.size = sizeof(frame->cs); 671 + frame->ring.wrap = 672 + BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size); 671 673 frame->ring.effective_size = frame->ring.size; 672 674 intel_ring_update_space(&frame->ring); 673 675 frame->rq.ring = &frame->ring;
+45 -7
drivers/gpu/drm/i915/gt/intel_lrc.c
··· 1134 1134 list_move(&rq->sched.link, pl); 1135 1135 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1136 1136 1137 + /* Check in case we rollback so far we wrap [size/2] */ 1138 + if (intel_ring_direction(rq->ring, 1139 + intel_ring_wrap(rq->ring, 1140 + rq->tail), 1141 + rq->ring->tail) > 0) 1142 + rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; 1143 + 1137 1144 active = rq; 1138 1145 } else { 1139 1146 struct intel_engine_cs *owner = rq->context->engine; ··· 1505 1498 * HW has a tendency to ignore us rewinding the TAIL to the end of 1506 1499 * an earlier request. 1507 1500 */ 1501 + GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail); 1502 + prev = rq->ring->tail; 1508 1503 tail = intel_ring_set_tail(rq->ring, rq->tail); 1509 - prev = ce->lrc_reg_state[CTX_RING_TAIL]; 1510 1504 if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) 1511 1505 desc |= CTX_DESC_FORCE_RESTORE; 1512 1506 ce->lrc_reg_state[CTX_RING_TAIL] = tail; ··· 1903 1895 1904 1896 static bool 1905 1897 need_timeslice(const struct intel_engine_cs *engine, 1906 - const struct i915_request *rq) 1898 + const struct i915_request *rq, 1899 + const struct rb_node *rb) 1907 1900 { 1908 1901 int hint; 1909 1902 ··· 1912 1903 return false; 1913 1904 1914 1905 hint = engine->execlists.queue_priority_hint; 1906 + 1907 + if (rb) { 1908 + const struct virtual_engine *ve = 1909 + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 1910 + const struct intel_engine_cs *inflight = 1911 + intel_context_inflight(&ve->context); 1912 + 1913 + if (!inflight || inflight == engine) { 1914 + struct i915_request *next; 1915 + 1916 + rcu_read_lock(); 1917 + next = READ_ONCE(ve->request); 1918 + if (next) 1919 + hint = max(hint, rq_prio(next)); 1920 + rcu_read_unlock(); 1921 + } 1922 + } 1923 + 1915 1924 if (!list_is_last(&rq->sched.link, &engine->active.requests)) 1916 1925 hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); 1917 1926 1927 + GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE); 1918 1928 return hint >= effective_prio(rq); 1919 1929 } 1920 1930 ··· 2005 1977 set_timer_ms(&engine->execlists.timer, duration); 2006 1978 } 2007 1979 2008 - static void start_timeslice(struct intel_engine_cs *engine) 1980 + static void start_timeslice(struct intel_engine_cs *engine, int prio) 2009 1981 { 2010 1982 struct intel_engine_execlists *execlists = &engine->execlists; 2011 - const int prio = queue_prio(execlists); 2012 1983 unsigned long duration; 2013 1984 2014 1985 if (!intel_engine_has_timeslices(engine)) ··· 2167 2140 __unwind_incomplete_requests(engine); 2168 2141 2169 2142 last = NULL; 2170 - } else if (need_timeslice(engine, last) && 2143 + } else if (need_timeslice(engine, last, rb) && 2171 2144 timeslice_expired(execlists, last)) { 2172 2145 if (i915_request_completed(last)) { 2173 2146 tasklet_hi_schedule(&execlists->tasklet); ··· 2215 2188 * Even if ELSP[1] is occupied and not worthy 2216 2189 * of timeslices, our queue might be. 2217 2190 */ 2218 - start_timeslice(engine); 2191 + start_timeslice(engine, queue_prio(execlists)); 2219 2192 return; 2220 2193 } 2221 2194 } ··· 2250 2223 2251 2224 if (last && !can_merge_rq(last, rq)) { 2252 2225 spin_unlock(&ve->base.active.lock); 2253 - start_timeslice(engine); 2226 + start_timeslice(engine, rq_prio(rq)); 2254 2227 return; /* leave this for another sibling */ 2255 2228 } 2256 2229 ··· 4766 4739 return 0; 4767 4740 } 4768 4741 4742 + static void assert_request_valid(struct i915_request *rq) 4743 + { 4744 + struct intel_ring *ring __maybe_unused = rq->ring; 4745 + 4746 + /* Can we unwind this request without appearing to go forwards? */ 4747 + GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0); 4748 + } 4749 + 4769 4750 /* 4770 4751 * Reserve space for 2 NOOPs at the end of each request to be 4771 4752 * used as a workaround for not being allowed to do lite ··· 4785 4750 *cs++ = MI_ARB_CHECK; 4786 4751 *cs++ = MI_NOOP; 4787 4752 request->wa_tail = intel_ring_offset(request, cs); 4753 + 4754 + /* Check that entire request is less than half the ring */ 4755 + assert_request_valid(request); 4788 4756 4789 4757 return cs; 4790 4758 }
+4
drivers/gpu/drm/i915/gt/intel_ring.c
··· 315 315 GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); 316 316 return 0; 317 317 } 318 + 319 + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 320 + #include "selftest_ring.c" 321 + #endif
+241
drivers/gpu/drm/i915/gt/intel_workarounds.c
··· 179 179 } 180 180 181 181 static void 182 + wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) 183 + { 184 + wa_write_masked_or(wal, reg, clr, 0); 185 + } 186 + 187 + static void 182 188 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 183 189 { 184 190 wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val); ··· 693 687 } 694 688 695 689 static void 690 + gen4_gt_workarounds_init(struct drm_i915_private *i915, 691 + struct i915_wa_list *wal) 692 + { 693 + /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */ 694 + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); 695 + } 696 + 697 + static void 698 + g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 699 + { 700 + gen4_gt_workarounds_init(i915, wal); 701 + 702 + /* WaDisableRenderCachePipelinedFlush:g4x,ilk */ 703 + wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); 704 + } 705 + 706 + static void 707 + ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 708 + { 709 + g4x_gt_workarounds_init(i915, wal); 710 + 711 + wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); 712 + } 713 + 714 + static void 715 + snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 716 + { 717 + /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ 718 + wa_masked_en(wal, 719 + _3D_CHICKEN, 720 + _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB); 721 + 722 + /* WaDisable_RenderCache_OperationalFlush:snb */ 723 + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); 724 + 725 + /* 726 + * BSpec recommends 8x4 when MSAA is used, 727 + * however in practice 16x4 seems fastest. 728 + * 729 + * Note that PS/WM thread counts depend on the WIZ hashing 730 + * disable bit, which we don't touch here, but it's good 731 + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 732 + */ 733 + wa_add(wal, 734 + GEN6_GT_MODE, 0, 735 + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), 736 + GEN6_WIZ_HASHING_16x4); 737 + 738 + wa_masked_dis(wal, CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB); 739 + 740 + wa_masked_en(wal, 741 + _3D_CHICKEN3, 742 + /* WaStripsFansDisableFastClipPerformanceFix:snb */ 743 + _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL | 744 + /* 745 + * Bspec says: 746 + * "This bit must be set if 3DSTATE_CLIP clip mode is set 747 + * to normal and 3DSTATE_SF number of SF output attributes 748 + * is more than 16." 749 + */ 750 + _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH); 751 + } 752 + 753 + static void 754 + ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 755 + { 756 + /* WaDisableEarlyCull:ivb */ 757 + wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); 758 + 759 + /* WaDisablePSDDualDispatchEnable:ivb */ 760 + if (IS_IVB_GT1(i915)) 761 + wa_masked_en(wal, 762 + GEN7_HALF_SLICE_CHICKEN1, 763 + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); 764 + 765 + /* WaDisable_RenderCache_OperationalFlush:ivb */ 766 + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); 767 + 768 + /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ 769 + wa_masked_dis(wal, 770 + GEN7_COMMON_SLICE_CHICKEN1, 771 + GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); 772 + 773 + /* WaApplyL3ControlAndL3ChickenMode:ivb */ 774 + wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL); 775 + wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE); 776 + 777 + /* WaForceL3Serialization:ivb */ 778 + wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); 779 + 780 + /* 781 + * WaVSThreadDispatchOverride:ivb,vlv 782 + * 783 + * This actually overrides the dispatch 784 + * mode for all thread types. 785 + */ 786 + wa_write_masked_or(wal, GEN7_FF_THREAD_MODE, 787 + GEN7_FF_SCHED_MASK, 788 + GEN7_FF_TS_SCHED_HW | 789 + GEN7_FF_VS_SCHED_HW | 790 + GEN7_FF_DS_SCHED_HW); 791 + 792 + if (0) { /* causes HiZ corruption on ivb:gt1 */ 793 + /* enable HiZ Raw Stall Optimization */ 794 + wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 795 + } 796 + 797 + /* WaDisable4x2SubspanOptimization:ivb */ 798 + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); 799 + 800 + /* 801 + * BSpec recommends 8x4 when MSAA is used, 802 + * however in practice 16x4 seems fastest. 803 + * 804 + * Note that PS/WM thread counts depend on the WIZ hashing 805 + * disable bit, which we don't touch here, but it's good 806 + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 807 + */ 808 + wa_add(wal, GEN7_GT_MODE, 0, 809 + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), 810 + GEN6_WIZ_HASHING_16x4); 811 + } 812 + 813 + static void 814 + vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 815 + { 816 + /* WaDisableEarlyCull:vlv */ 817 + wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); 818 + 819 + /* WaPsdDispatchEnable:vlv */ 820 + /* WaDisablePSDDualDispatchEnable:vlv */ 821 + wa_masked_en(wal, 822 + GEN7_HALF_SLICE_CHICKEN1, 823 + GEN7_MAX_PS_THREAD_DEP | 824 + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); 825 + 826 + /* WaDisable_RenderCache_OperationalFlush:vlv */ 827 + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); 828 + 829 + /* WaForceL3Serialization:vlv */ 830 + wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); 831 + 832 + /* 833 + * WaVSThreadDispatchOverride:ivb,vlv 834 + * 835 + * This actually overrides the dispatch 836 + * mode for all thread types. 837 + */ 838 + wa_write_masked_or(wal, 839 + GEN7_FF_THREAD_MODE, 840 + GEN7_FF_SCHED_MASK, 841 + GEN7_FF_TS_SCHED_HW | 842 + GEN7_FF_VS_SCHED_HW | 843 + GEN7_FF_DS_SCHED_HW); 844 + 845 + /* 846 + * BSpec says this must be set, even though 847 + * WaDisable4x2SubspanOptimization isn't listed for VLV. 848 + */ 849 + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); 850 + 851 + /* 852 + * BSpec recommends 8x4 when MSAA is used, 853 + * however in practice 16x4 seems fastest. 854 + * 855 + * Note that PS/WM thread counts depend on the WIZ hashing 856 + * disable bit, which we don't touch here, but it's good 857 + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 858 + */ 859 + wa_add(wal, GEN7_GT_MODE, 0, 860 + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), 861 + GEN6_WIZ_HASHING_16x4); 862 + 863 + /* 864 + * WaIncreaseL3CreditsForVLVB0:vlv 865 + * This is the hardware default actually. 866 + */ 867 + wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); 868 + } 869 + 870 + static void 871 + hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 872 + { 873 + /* L3 caching of data atomics doesn't work -- disable it. */ 874 + wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); 875 + 876 + wa_add(wal, 877 + HSW_ROW_CHICKEN3, 0, 878 + _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), 879 + 0 /* XXX does this reg exist? */); 880 + 881 + /* WaVSRefCountFullforceMissDisable:hsw */ 882 + wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME); 883 + 884 + wa_masked_dis(wal, 885 + CACHE_MODE_0_GEN7, 886 + /* WaDisable_RenderCache_OperationalFlush:hsw */ 887 + RC_OP_FLUSH_ENABLE | 888 + /* enable HiZ Raw Stall Optimization */ 889 + HIZ_RAW_STALL_OPT_DISABLE); 890 + 891 + /* WaDisable4x2SubspanOptimization:hsw */ 892 + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); 893 + 894 + /* 895 + * BSpec recommends 8x4 when MSAA is used, 896 + * however in practice 16x4 seems fastest. 897 + * 898 + * Note that PS/WM thread counts depend on the WIZ hashing 899 + * disable bit, which we don't touch here, but it's good 900 + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 901 + */ 902 + wa_add(wal, GEN7_GT_MODE, 0, 903 + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), 904 + GEN6_WIZ_HASHING_16x4); 905 + 906 + /* WaSampleCChickenBitEnable:hsw */ 907 + wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); 908 + } 909 + 910 + static void 696 911 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 697 912 { 698 913 /* WaDisableKillLogic:bxt,skl,kbl */ ··· 1190 963 bxt_gt_workarounds_init(i915, wal); 1191 964 else if (IS_SKYLAKE(i915)) 1192 965 skl_gt_workarounds_init(i915, wal); 966 + else if (IS_HASWELL(i915)) 967 + hsw_gt_workarounds_init(i915, wal); 968 + else if (IS_VALLEYVIEW(i915)) 969 + vlv_gt_workarounds_init(i915, wal); 970 + else if (IS_IVYBRIDGE(i915)) 971 + ivb_gt_workarounds_init(i915, wal); 972 + else if (IS_GEN(i915, 6)) 973 + snb_gt_workarounds_init(i915, wal); 974 + else if (IS_GEN(i915, 5)) 975 + ilk_gt_workarounds_init(i915, wal); 976 + else if (IS_G4X(i915)) 977 + g4x_gt_workarounds_init(i915, wal); 978 + else if (IS_GEN(i915, 4)) 979 + gen4_gt_workarounds_init(i915, wal); 1193 980 else if (INTEL_GEN(i915) <= 8) 1194 981 return; 1195 982 else
+10 -15
drivers/gpu/drm/i915/gt/selftest_hangcheck.c
··· 310 310 1000)); 311 311 } 312 312 313 - static void engine_heartbeat_disable(struct intel_engine_cs *engine, 314 - unsigned long *saved) 313 + static void engine_heartbeat_disable(struct intel_engine_cs *engine) 315 314 { 316 - *saved = engine->props.heartbeat_interval_ms; 317 315 engine->props.heartbeat_interval_ms = 0; 318 316 319 317 intel_engine_pm_get(engine); 320 318 intel_engine_park_heartbeat(engine); 321 319 } 322 320 323 - static void engine_heartbeat_enable(struct intel_engine_cs *engine, 324 - unsigned long saved) 321 + static void engine_heartbeat_enable(struct intel_engine_cs *engine) 325 322 { 326 323 intel_engine_pm_put(engine); 327 324 328 - engine->props.heartbeat_interval_ms = saved; 325 + engine->props.heartbeat_interval_ms = 326 + engine->defaults.heartbeat_interval_ms; 329 327 } 330 328 331 329 static int igt_hang_sanitycheck(void *arg) ··· 471 473 for_each_engine(engine, gt, id) { 472 474 unsigned int reset_count, reset_engine_count, count; 473 475 struct intel_context *ce; 474 - unsigned long heartbeat; 475 476 IGT_TIMEOUT(end_time); 476 477 int err; 477 478 ··· 482 485 reset_engine_count = i915_reset_engine_count(global, engine); 483 486 count = 0; 484 487 485 - engine_heartbeat_disable(engine, &heartbeat); 488 + engine_heartbeat_disable(engine); 486 489 set_bit(I915_RESET_ENGINE + id, &gt->reset.flags); 487 490 do { 488 491 int i; ··· 526 529 } 527 530 } while (time_before(jiffies, end_time)); 528 531 clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags); 529 - engine_heartbeat_enable(engine, heartbeat); 532 + engine_heartbeat_enable(engine); 530 533 531 534 pr_info("%s(%s): %d resets\n", __func__, engine->name, count); 532 535 ··· 561 564 562 565 for_each_engine(engine, gt, id) { 563 566 unsigned int reset_count, reset_engine_count; 564 - unsigned long heartbeat; 565 567 IGT_TIMEOUT(end_time); 566 568 567 569 if (active && !intel_engine_can_store_dword(engine)) ··· 576 580 reset_count = i915_reset_count(global); 577 581 reset_engine_count = i915_reset_engine_count(global, engine); 578 582 579 - engine_heartbeat_disable(engine, &heartbeat); 583 + engine_heartbeat_disable(engine); 580 584 set_bit(I915_RESET_ENGINE + id, &gt->reset.flags); 581 585 do { 582 586 if (active) { ··· 628 632 } 629 633 } while (time_before(jiffies, end_time)); 630 634 clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags); 631 - engine_heartbeat_enable(engine, heartbeat); 635 + engine_heartbeat_enable(engine); 632 636 633 637 if (err) 634 638 break; ··· 785 789 struct active_engine threads[I915_NUM_ENGINES] = {}; 786 790 unsigned long device = i915_reset_count(global); 787 791 unsigned long count = 0, reported; 788 - unsigned long heartbeat; 789 792 IGT_TIMEOUT(end_time); 790 793 791 794 if (flags & TEST_ACTIVE && ··· 827 832 828 833 yield(); /* start all threads before we begin */ 829 834 830 - engine_heartbeat_disable(engine, &heartbeat); 835 + engine_heartbeat_disable(engine); 831 836 set_bit(I915_RESET_ENGINE + id, &gt->reset.flags); 832 837 do { 833 838 struct i915_request *rq = NULL; ··· 901 906 } 902 907 } while (time_before(jiffies, end_time)); 903 908 clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags); 904 - engine_heartbeat_enable(engine, heartbeat); 909 + engine_heartbeat_enable(engine); 905 910 906 911 pr_info("i915_reset_engine(%s:%s): %lu resets\n", 907 912 engine->name, test_name, count);
+140 -45
drivers/gpu/drm/i915/gt/selftest_lrc.c
··· 51 51 return vma; 52 52 } 53 53 54 - static void engine_heartbeat_disable(struct intel_engine_cs *engine, 55 - unsigned long *saved) 54 + static void engine_heartbeat_disable(struct intel_engine_cs *engine) 56 55 { 57 - *saved = engine->props.heartbeat_interval_ms; 58 56 engine->props.heartbeat_interval_ms = 0; 59 57 60 58 intel_engine_pm_get(engine); 61 59 intel_engine_park_heartbeat(engine); 62 60 } 63 61 64 - static void engine_heartbeat_enable(struct intel_engine_cs *engine, 65 - unsigned long saved) 62 + static void engine_heartbeat_enable(struct intel_engine_cs *engine) 66 63 { 67 64 intel_engine_pm_put(engine); 68 65 69 - engine->props.heartbeat_interval_ms = saved; 66 + engine->props.heartbeat_interval_ms = 67 + engine->defaults.heartbeat_interval_ms; 70 68 } 71 69 72 70 static bool is_active(struct i915_request *rq) ··· 222 224 struct intel_context *ce[2] = {}; 223 225 struct i915_request *rq[2]; 224 226 struct igt_live_test t; 225 - unsigned long saved; 226 227 int n; 227 228 228 229 if (prio && !intel_engine_has_preemption(engine)) ··· 234 237 err = -EIO; 235 238 break; 236 239 } 237 - engine_heartbeat_disable(engine, &saved); 240 + engine_heartbeat_disable(engine); 238 241 239 242 for (n = 0; n < ARRAY_SIZE(ce); n++) { 240 243 struct intel_context *tmp; ··· 342 345 intel_context_put(ce[n]); 343 346 } 344 347 345 - engine_heartbeat_enable(engine, saved); 348 + engine_heartbeat_enable(engine); 346 349 if (igt_live_test_end(&t)) 347 350 err = -EIO; 348 351 if (err) ··· 463 466 464 467 for_each_engine(engine, gt, id) { 465 468 struct intel_context *ce; 466 - unsigned long heartbeat; 467 469 struct i915_request *rq; 468 470 469 471 ce = intel_context_create(engine); ··· 471 475 break; 472 476 } 473 477 474 - engine_heartbeat_disable(engine, &heartbeat); 478 + engine_heartbeat_disable(engine); 475 479 476 480 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 477 481 if (IS_ERR(rq)) { ··· 531 535 i915_request_put(rq); 532 536 533 537 out: 534 - engine_heartbeat_enable(engine, heartbeat); 538 + engine_heartbeat_enable(engine); 535 539 intel_context_put(ce); 536 540 if (err) 537 541 break; ··· 576 580 577 581 for_each_engine(engine, gt, id) { 578 582 const struct error_phase *p; 579 - unsigned long heartbeat; 580 583 int err = 0; 581 584 582 - engine_heartbeat_disable(engine, &heartbeat); 585 + engine_heartbeat_disable(engine); 583 586 584 587 for (p = phases; p->error[0] != GOOD; p++) { 585 588 struct i915_request *client[ARRAY_SIZE(phases->error)]; ··· 677 682 } 678 683 } 679 684 680 - engine_heartbeat_enable(engine, heartbeat); 685 + engine_heartbeat_enable(engine); 681 686 if (err) { 682 687 intel_gt_set_wedged(gt); 683 688 return err; ··· 823 828 } 824 829 } 825 830 826 - err = release_queue(outer, vma, n, INT_MAX); 831 + err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); 827 832 if (err) 828 833 goto out; 829 834 ··· 890 895 enum intel_engine_id id; 891 896 892 897 for_each_engine(engine, gt, id) { 893 - unsigned long saved; 894 - 895 898 if (!intel_engine_has_preemption(engine)) 896 899 continue; 897 900 898 901 memset(vaddr, 0, PAGE_SIZE); 899 902 900 - engine_heartbeat_disable(engine, &saved); 903 + engine_heartbeat_disable(engine); 901 904 err = slice_semaphore_queue(engine, vma, count); 902 - engine_heartbeat_enable(engine, saved); 905 + engine_heartbeat_enable(engine); 903 906 if (err) 904 907 goto err_pin; 905 908 ··· 1002 1009 enum { X = 1, Z, Y }; 1003 1010 struct i915_request *rq[3] = {}; 1004 1011 struct intel_context *ce; 1005 - unsigned long heartbeat; 1006 1012 unsigned long timeslice; 1007 1013 int i, err = 0; 1008 1014 u32 *slot; ··· 1020 1028 * Expect execution/evaluation order XZY 1021 1029 */ 1022 1030 1023 - engine_heartbeat_disable(engine, &heartbeat); 1031 + engine_heartbeat_disable(engine); 1024 1032 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1025 1033 1026 1034 slot = memset32(engine->status_page.addr + 1000, 0, 4); ··· 1114 1122 wmb(); 1115 1123 1116 1124 engine->props.timeslice_duration_ms = timeslice; 1117 - engine_heartbeat_enable(engine, heartbeat); 1125 + engine_heartbeat_enable(engine); 1118 1126 for (i = 0; i < 3; i++) 1119 1127 i915_request_put(rq[i]); 1120 1128 if (igt_flush_test(gt->i915)) ··· 1194 1202 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 1195 1203 }; 1196 1204 struct i915_request *rq, *nop; 1197 - unsigned long saved; 1198 1205 1199 1206 if (!intel_engine_has_preemption(engine)) 1200 1207 continue; 1201 1208 1202 - engine_heartbeat_disable(engine, &saved); 1209 + engine_heartbeat_disable(engine); 1203 1210 memset(vaddr, 0, PAGE_SIZE); 1204 1211 1205 1212 /* ELSP[0]: semaphore wait */ ··· 1275 1284 err_rq: 1276 1285 i915_request_put(rq); 1277 1286 err_heartbeat: 1278 - engine_heartbeat_enable(engine, saved); 1287 + engine_heartbeat_enable(engine); 1279 1288 if (err) 1280 1289 break; 1281 1290 } ··· 1286 1295 i915_gem_object_unpin_map(obj); 1287 1296 err_obj: 1288 1297 i915_gem_object_put(obj); 1298 + return err; 1299 + } 1300 + 1301 + static int live_timeslice_nopreempt(void *arg) 1302 + { 1303 + struct intel_gt *gt = arg; 1304 + struct intel_engine_cs *engine; 1305 + enum intel_engine_id id; 1306 + struct igt_spinner spin; 1307 + int err = 0; 1308 + 1309 + /* 1310 + * We should not timeslice into a request that is marked with 1311 + * I915_REQUEST_NOPREEMPT. 1312 + */ 1313 + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1314 + return 0; 1315 + 1316 + if (igt_spinner_init(&spin, gt)) 1317 + return -ENOMEM; 1318 + 1319 + for_each_engine(engine, gt, id) { 1320 + struct intel_context *ce; 1321 + struct i915_request *rq; 1322 + unsigned long timeslice; 1323 + 1324 + if (!intel_engine_has_preemption(engine)) 1325 + continue; 1326 + 1327 + ce = intel_context_create(engine); 1328 + if (IS_ERR(ce)) { 1329 + err = PTR_ERR(ce); 1330 + break; 1331 + } 1332 + 1333 + engine_heartbeat_disable(engine); 1334 + timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1335 + 1336 + /* Create an unpreemptible spinner */ 1337 + 1338 + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 1339 + intel_context_put(ce); 1340 + if (IS_ERR(rq)) { 1341 + err = PTR_ERR(rq); 1342 + goto out_heartbeat; 1343 + } 1344 + 1345 + i915_request_get(rq); 1346 + i915_request_add(rq); 1347 + 1348 + if (!igt_wait_for_spinner(&spin, rq)) { 1349 + i915_request_put(rq); 1350 + err = -ETIME; 1351 + goto out_spin; 1352 + } 1353 + 1354 + set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); 1355 + i915_request_put(rq); 1356 + 1357 + /* Followed by a maximum priority barrier (heartbeat) */ 1358 + 1359 + ce = intel_context_create(engine); 1360 + if (IS_ERR(ce)) { 1361 + err = PTR_ERR(rq); 1362 + goto out_spin; 1363 + } 1364 + 1365 + rq = intel_context_create_request(ce); 1366 + intel_context_put(ce); 1367 + if (IS_ERR(rq)) { 1368 + err = PTR_ERR(rq); 1369 + goto out_spin; 1370 + } 1371 + 1372 + rq->sched.attr.priority = I915_PRIORITY_BARRIER; 1373 + i915_request_get(rq); 1374 + i915_request_add(rq); 1375 + 1376 + /* 1377 + * Wait until the barrier is in ELSP, and we know timeslicing 1378 + * will have been activated. 1379 + */ 1380 + if (wait_for_submit(engine, rq, HZ / 2)) { 1381 + i915_request_put(rq); 1382 + err = -ETIME; 1383 + goto out_spin; 1384 + } 1385 + 1386 + /* 1387 + * Since the ELSP[0] request is unpreemptible, it should not 1388 + * allow the maximum priority barrier through. Wait long 1389 + * enough to see if it is timesliced in by mistake. 1390 + */ 1391 + if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) { 1392 + pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", 1393 + engine->name); 1394 + err = -EINVAL; 1395 + } 1396 + i915_request_put(rq); 1397 + 1398 + out_spin: 1399 + igt_spinner_end(&spin); 1400 + out_heartbeat: 1401 + xchg(&engine->props.timeslice_duration_ms, timeslice); 1402 + engine_heartbeat_enable(engine); 1403 + if (err) 1404 + break; 1405 + 1406 + if (igt_flush_test(gt->i915)) { 1407 + err = -EIO; 1408 + break; 1409 + } 1410 + } 1411 + 1412 + igt_spinner_fini(&spin); 1289 1413 return err; 1290 1414 } 1291 1415 ··· 4259 4153 { 4260 4154 struct intel_engine_cs *engine; 4261 4155 struct intel_context *ve; 4262 - unsigned long *heartbeat; 4263 4156 struct igt_spinner spin; 4264 4157 struct i915_request *rq; 4265 4158 unsigned int n; ··· 4270 4165 * descendents are not executed while the capture is in progress. 4271 4166 */ 4272 4167 4273 - heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL); 4274 - if (!heartbeat) 4168 + if (igt_spinner_init(&spin, gt)) 4275 4169 return -ENOMEM; 4276 - 4277 - if (igt_spinner_init(&spin, gt)) { 4278 - err = -ENOMEM; 4279 - goto out_free; 4280 - } 4281 4170 4282 4171 ve = intel_execlists_create_virtual(siblings, nsibling); 4283 4172 if (IS_ERR(ve)) { ··· 4280 4181 } 4281 4182 4282 4183 for (n = 0; n < nsibling; n++) 4283 - engine_heartbeat_disable(siblings[n], &heartbeat[n]); 4184 + engine_heartbeat_disable(siblings[n]); 4284 4185 4285 4186 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); 4286 4187 if (IS_ERR(rq)) { ··· 4351 4252 i915_request_put(rq); 4352 4253 out_heartbeat: 4353 4254 for (n = 0; n < nsibling; n++) 4354 - engine_heartbeat_enable(siblings[n], heartbeat[n]); 4255 + engine_heartbeat_enable(siblings[n]); 4355 4256 4356 4257 intel_context_put(ve); 4357 4258 out_spin: 4358 4259 igt_spinner_fini(&spin); 4359 - out_free: 4360 - kfree(heartbeat); 4361 4260 return err; 4362 4261 } 4363 4262 ··· 4411 4314 SUBTEST(live_timeslice_preempt), 4412 4315 SUBTEST(live_timeslice_rewind), 4413 4316 SUBTEST(live_timeslice_queue), 4317 + SUBTEST(live_timeslice_nopreempt), 4414 4318 SUBTEST(live_busywait_preempt), 4415 4319 SUBTEST(live_preempt), 4416 4320 SUBTEST(live_late_preempt), ··· 5030 4932 return PTR_ERR(scratch); 5031 4933 5032 4934 for_each_engine(engine, gt, id) { 5033 - unsigned long heartbeat; 5034 - 5035 - engine_heartbeat_disable(engine, &heartbeat); 4935 + engine_heartbeat_disable(engine); 5036 4936 5037 4937 err = __live_lrc_gpr(engine, scratch, false); 5038 4938 if (err) ··· 5041 4945 goto err; 5042 4946 5043 4947 err: 5044 - engine_heartbeat_enable(engine, heartbeat); 4948 + engine_heartbeat_enable(engine); 5045 4949 if (igt_flush_test(gt->i915)) 5046 4950 err = -EIO; 5047 4951 if (err) ··· 5188 5092 */ 5189 5093 5190 5094 for_each_engine(data.engine, gt, id) { 5191 - unsigned long heartbeat; 5192 5095 int i, err = 0; 5193 5096 5194 - engine_heartbeat_disable(data.engine, &heartbeat); 5097 + engine_heartbeat_disable(data.engine); 5195 5098 5196 5099 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 5197 5100 struct intel_context *tmp; ··· 5223 5128 } 5224 5129 5225 5130 err: 5226 - engine_heartbeat_enable(data.engine, heartbeat); 5131 + engine_heartbeat_enable(data.engine); 5227 5132 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 5228 5133 if (!data.ce[i]) 5229 5134 break;
+16 -2
drivers/gpu/drm/i915/gt/selftest_mocs.c
··· 18 18 void *vaddr; 19 19 }; 20 20 21 + static struct intel_context *mocs_context_create(struct intel_engine_cs *engine) 22 + { 23 + struct intel_context *ce; 24 + 25 + ce = intel_context_create(engine); 26 + if (IS_ERR(ce)) 27 + return ce; 28 + 29 + /* We build large requests to read the registers from the ring */ 30 + ce->ring = __intel_context_ring_size(SZ_16K); 31 + 32 + return ce; 33 + } 34 + 21 35 static int request_add_sync(struct i915_request *rq, int err) 22 36 { 23 37 i915_request_get(rq); ··· 315 301 for_each_engine(engine, gt, id) { 316 302 struct intel_context *ce; 317 303 318 - ce = intel_context_create(engine); 304 + ce = mocs_context_create(engine); 319 305 if (IS_ERR(ce)) { 320 306 err = PTR_ERR(ce); 321 307 break; ··· 409 395 for_each_engine(engine, gt, id) { 410 396 struct intel_context *ce; 411 397 412 - ce = intel_context_create(engine); 398 + ce = mocs_context_create(engine); 413 399 if (IS_ERR(ce)) { 414 400 err = PTR_ERR(ce); 415 401 break;
+110
drivers/gpu/drm/i915/gt/selftest_ring.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright © 2020 Intel Corporation 4 + */ 5 + 6 + static struct intel_ring *mock_ring(unsigned long sz) 7 + { 8 + struct intel_ring *ring; 9 + 10 + ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); 11 + if (!ring) 12 + return NULL; 13 + 14 + kref_init(&ring->ref); 15 + ring->size = sz; 16 + ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(sz); 17 + ring->effective_size = sz; 18 + ring->vaddr = (void *)(ring + 1); 19 + atomic_set(&ring->pin_count, 1); 20 + 21 + intel_ring_update_space(ring); 22 + 23 + return ring; 24 + } 25 + 26 + static void mock_ring_free(struct intel_ring *ring) 27 + { 28 + kfree(ring); 29 + } 30 + 31 + static int check_ring_direction(struct intel_ring *ring, 32 + u32 next, u32 prev, 33 + int expected) 34 + { 35 + int result; 36 + 37 + result = intel_ring_direction(ring, next, prev); 38 + if (result < 0) 39 + result = -1; 40 + else if (result > 0) 41 + result = 1; 42 + 43 + if (result != expected) { 44 + pr_err("intel_ring_direction(%u, %u):%d != %d\n", 45 + next, prev, result, expected); 46 + return -EINVAL; 47 + } 48 + 49 + return 0; 50 + } 51 + 52 + static int check_ring_step(struct intel_ring *ring, u32 x, u32 step) 53 + { 54 + u32 prev = x, next = intel_ring_wrap(ring, x + step); 55 + int err = 0; 56 + 57 + err |= check_ring_direction(ring, next, next, 0); 58 + err |= check_ring_direction(ring, prev, prev, 0); 59 + err |= check_ring_direction(ring, next, prev, 1); 60 + err |= check_ring_direction(ring, prev, next, -1); 61 + 62 + return err; 63 + } 64 + 65 + static int check_ring_offset(struct intel_ring *ring, u32 x, u32 step) 66 + { 67 + int err = 0; 68 + 69 + err |= check_ring_step(ring, x, step); 70 + err |= check_ring_step(ring, intel_ring_wrap(ring, x + 1), step); 71 + err |= check_ring_step(ring, intel_ring_wrap(ring, x - 1), step); 72 + 73 + return err; 74 + } 75 + 76 + static int igt_ring_direction(void *dummy) 77 + { 78 + struct intel_ring *ring; 79 + unsigned int half = 2048; 80 + int step, err = 0; 81 + 82 + ring = mock_ring(2 * half); 83 + if (!ring) 84 + return -ENOMEM; 85 + 86 + GEM_BUG_ON(ring->size != 2 * half); 87 + 88 + /* Precision of wrap detection is limited to ring->size / 2 */ 89 + for (step = 1; step < half; step <<= 1) { 90 + err |= check_ring_offset(ring, 0, step); 91 + err |= check_ring_offset(ring, half, step); 92 + } 93 + err |= check_ring_step(ring, 0, half - 64); 94 + 95 + /* And check unwrapped handling for good measure */ 96 + err |= check_ring_offset(ring, 0, 2 * half + 64); 97 + err |= check_ring_offset(ring, 3 * half, 1); 98 + 99 + mock_ring_free(ring); 100 + return err; 101 + } 102 + 103 + int intel_ring_mock_selftests(void) 104 + { 105 + static const struct i915_subtest tests[] = { 106 + SUBTEST(igt_ring_direction), 107 + }; 108 + 109 + return i915_subtests(tests, NULL); 110 + }
+28 -41
drivers/gpu/drm/i915/gt/selftest_rps.c
··· 20 20 /* Try to isolate the impact of cstates from determing frequency response */ 21 21 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ 22 22 23 - static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine) 23 + static void engine_heartbeat_disable(struct intel_engine_cs *engine) 24 24 { 25 - unsigned long old; 26 - 27 - old = fetch_and_zero(&engine->props.heartbeat_interval_ms); 25 + engine->props.heartbeat_interval_ms = 0; 28 26 29 27 intel_engine_pm_get(engine); 30 28 intel_engine_park_heartbeat(engine); 31 - 32 - return old; 33 29 } 34 30 35 - static void engine_heartbeat_enable(struct intel_engine_cs *engine, 36 - unsigned long saved) 31 + static void engine_heartbeat_enable(struct intel_engine_cs *engine) 37 32 { 38 33 intel_engine_pm_put(engine); 39 34 40 - engine->props.heartbeat_interval_ms = saved; 35 + engine->props.heartbeat_interval_ms = 36 + engine->defaults.heartbeat_interval_ms; 41 37 } 42 38 43 39 static void dummy_rps_work(struct work_struct *wrk) ··· 242 246 intel_gt_check_clock_frequency(gt); 243 247 244 248 for_each_engine(engine, gt, id) { 245 - unsigned long saved_heartbeat; 246 249 struct i915_request *rq; 247 250 u32 cycles; 248 251 u64 dt; ··· 249 254 if (!intel_engine_can_store_dword(engine)) 250 255 continue; 251 256 252 - saved_heartbeat = engine_heartbeat_disable(engine); 257 + engine_heartbeat_disable(engine); 253 258 254 259 rq = igt_spinner_create_request(&spin, 255 260 engine->kernel_context, 256 261 MI_NOOP); 257 262 if (IS_ERR(rq)) { 258 - engine_heartbeat_enable(engine, saved_heartbeat); 263 + engine_heartbeat_enable(engine); 259 264 err = PTR_ERR(rq); 260 265 break; 261 266 } ··· 266 271 pr_err("%s: RPS spinner did not start\n", 267 272 engine->name); 268 273 igt_spinner_end(&spin); 269 - engine_heartbeat_enable(engine, saved_heartbeat); 274 + engine_heartbeat_enable(engine); 270 275 intel_gt_set_wedged(engine->gt); 271 276 err = -EIO; 272 277 break; ··· 322 327 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 323 328 324 329 igt_spinner_end(&spin); 325 - engine_heartbeat_enable(engine, saved_heartbeat); 330 + engine_heartbeat_enable(engine); 326 331 327 332 if (err == 0) { 328 333 u64 time = intel_gt_pm_interval_to_ns(gt, cycles); ··· 400 405 401 406 intel_gt_pm_get(gt); 402 407 for_each_engine(engine, gt, id) { 403 - unsigned long saved_heartbeat; 404 408 struct i915_request *rq; 405 409 ktime_t min_dt, max_dt; 406 410 int f, limit; ··· 408 414 if (!intel_engine_can_store_dword(engine)) 409 415 continue; 410 416 411 - saved_heartbeat = engine_heartbeat_disable(engine); 417 + engine_heartbeat_disable(engine); 412 418 413 419 rq = igt_spinner_create_request(&spin, 414 420 engine->kernel_context, ··· 424 430 pr_err("%s: RPS spinner did not start\n", 425 431 engine->name); 426 432 igt_spinner_end(&spin); 427 - engine_heartbeat_enable(engine, saved_heartbeat); 433 + engine_heartbeat_enable(engine); 428 434 intel_gt_set_wedged(engine->gt); 429 435 err = -EIO; 430 436 break; ··· 434 440 pr_err("%s: could not set minimum frequency [%x], only %x!\n", 435 441 engine->name, rps->min_freq, read_cagf(rps)); 436 442 igt_spinner_end(&spin); 437 - engine_heartbeat_enable(engine, saved_heartbeat); 443 + engine_heartbeat_enable(engine); 438 444 show_pstate_limits(rps); 439 445 err = -EINVAL; 440 446 break; ··· 451 457 pr_err("%s: could not restore minimum frequency [%x], only %x!\n", 452 458 engine->name, rps->min_freq, read_cagf(rps)); 453 459 igt_spinner_end(&spin); 454 - engine_heartbeat_enable(engine, saved_heartbeat); 460 + engine_heartbeat_enable(engine); 455 461 show_pstate_limits(rps); 456 462 err = -EINVAL; 457 463 break; ··· 466 472 min_dt = ktime_sub(ktime_get(), min_dt); 467 473 468 474 igt_spinner_end(&spin); 469 - engine_heartbeat_enable(engine, saved_heartbeat); 475 + engine_heartbeat_enable(engine); 470 476 471 477 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", 472 478 engine->name, ··· 629 635 rps->work.func = dummy_rps_work; 630 636 631 637 for_each_engine(engine, gt, id) { 632 - unsigned long saved_heartbeat; 633 638 struct i915_request *rq; 634 639 struct i915_vma *vma; 635 640 u32 *cancel, *cntr; ··· 637 644 int freq; 638 645 } min, max; 639 646 640 - saved_heartbeat = engine_heartbeat_disable(engine); 647 + engine_heartbeat_disable(engine); 641 648 642 649 vma = create_spin_counter(engine, 643 650 engine->kernel_context->vm, false, 644 651 &cancel, &cntr); 645 652 if (IS_ERR(vma)) { 646 653 err = PTR_ERR(vma); 647 - engine_heartbeat_enable(engine, saved_heartbeat); 654 + engine_heartbeat_enable(engine); 648 655 break; 649 656 } 650 657 ··· 725 732 i915_vma_unpin(vma); 726 733 i915_vma_put(vma); 727 734 728 - engine_heartbeat_enable(engine, saved_heartbeat); 735 + engine_heartbeat_enable(engine); 729 736 if (igt_flush_test(gt->i915)) 730 737 err = -EIO; 731 738 if (err) ··· 771 778 rps->work.func = dummy_rps_work; 772 779 773 780 for_each_engine(engine, gt, id) { 774 - unsigned long saved_heartbeat; 775 781 struct i915_request *rq; 776 782 struct i915_vma *vma; 777 783 u32 *cancel, *cntr; ··· 779 787 int freq; 780 788 } min, max; 781 789 782 - saved_heartbeat = engine_heartbeat_disable(engine); 790 + engine_heartbeat_disable(engine); 783 791 784 792 vma = create_spin_counter(engine, 785 793 engine->kernel_context->vm, true, 786 794 &cancel, &cntr); 787 795 if (IS_ERR(vma)) { 788 796 err = PTR_ERR(vma); 789 - engine_heartbeat_enable(engine, saved_heartbeat); 797 + engine_heartbeat_enable(engine); 790 798 break; 791 799 } 792 800 ··· 866 874 i915_vma_unpin(vma); 867 875 i915_vma_put(vma); 868 876 869 - engine_heartbeat_enable(engine, saved_heartbeat); 877 + engine_heartbeat_enable(engine); 870 878 if (igt_flush_test(gt->i915)) 871 879 err = -EIO; 872 880 if (err) ··· 1058 1066 for_each_engine(engine, gt, id) { 1059 1067 /* Keep the engine busy with a spinner; expect an UP! */ 1060 1068 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { 1061 - unsigned long saved_heartbeat; 1062 - 1063 1069 intel_gt_pm_wait_for_idle(engine->gt); 1064 1070 GEM_BUG_ON(intel_rps_is_active(rps)); 1065 1071 1066 - saved_heartbeat = engine_heartbeat_disable(engine); 1072 + engine_heartbeat_disable(engine); 1067 1073 1068 1074 err = __rps_up_interrupt(rps, engine, &spin); 1069 1075 1070 - engine_heartbeat_enable(engine, saved_heartbeat); 1076 + engine_heartbeat_enable(engine); 1071 1077 if (err) 1072 1078 goto out; 1073 1079 ··· 1074 1084 1075 1085 /* Keep the engine awake but idle and check for DOWN */ 1076 1086 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { 1077 - unsigned long saved_heartbeat; 1078 - 1079 - saved_heartbeat = engine_heartbeat_disable(engine); 1087 + engine_heartbeat_disable(engine); 1080 1088 intel_rc6_disable(&gt->rc6); 1081 1089 1082 1090 err = __rps_down_interrupt(rps, engine); 1083 1091 1084 1092 intel_rc6_enable(&gt->rc6); 1085 - engine_heartbeat_enable(engine, saved_heartbeat); 1093 + engine_heartbeat_enable(engine); 1086 1094 if (err) 1087 1095 goto out; 1088 1096 } ··· 1156 1168 rps->work.func = dummy_rps_work; 1157 1169 1158 1170 for_each_engine(engine, gt, id) { 1159 - unsigned long saved_heartbeat; 1160 1171 struct i915_request *rq; 1161 1172 struct { 1162 1173 u64 power; ··· 1165 1178 if (!intel_engine_can_store_dword(engine)) 1166 1179 continue; 1167 1180 1168 - saved_heartbeat = engine_heartbeat_disable(engine); 1181 + engine_heartbeat_disable(engine); 1169 1182 1170 1183 rq = igt_spinner_create_request(&spin, 1171 1184 engine->kernel_context, 1172 1185 MI_NOOP); 1173 1186 if (IS_ERR(rq)) { 1174 - engine_heartbeat_enable(engine, saved_heartbeat); 1187 + engine_heartbeat_enable(engine); 1175 1188 err = PTR_ERR(rq); 1176 1189 break; 1177 1190 } ··· 1182 1195 pr_err("%s: RPS spinner did not start\n", 1183 1196 engine->name); 1184 1197 igt_spinner_end(&spin); 1185 - engine_heartbeat_enable(engine, saved_heartbeat); 1198 + engine_heartbeat_enable(engine); 1186 1199 intel_gt_set_wedged(engine->gt); 1187 1200 err = -EIO; 1188 1201 break; ··· 1195 1208 min.power = measure_power_at(rps, &min.freq); 1196 1209 1197 1210 igt_spinner_end(&spin); 1198 - engine_heartbeat_enable(engine, saved_heartbeat); 1211 + engine_heartbeat_enable(engine); 1199 1212 1200 1213 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", 1201 1214 engine->name,
+6 -9
drivers/gpu/drm/i915/gt/selftest_timeline.c
··· 751 751 return err; 752 752 } 753 753 754 - static void engine_heartbeat_disable(struct intel_engine_cs *engine, 755 - unsigned long *saved) 754 + static void engine_heartbeat_disable(struct intel_engine_cs *engine) 756 755 { 757 - *saved = engine->props.heartbeat_interval_ms; 758 756 engine->props.heartbeat_interval_ms = 0; 759 757 760 758 intel_engine_pm_get(engine); 761 759 intel_engine_park_heartbeat(engine); 762 760 } 763 761 764 - static void engine_heartbeat_enable(struct intel_engine_cs *engine, 765 - unsigned long saved) 762 + static void engine_heartbeat_enable(struct intel_engine_cs *engine) 766 763 { 767 764 intel_engine_pm_put(engine); 768 765 769 - engine->props.heartbeat_interval_ms = saved; 766 + engine->props.heartbeat_interval_ms = 767 + engine->defaults.heartbeat_interval_ms; 770 768 } 771 769 772 770 static int live_hwsp_rollover_kernel(void *arg) ··· 783 785 struct intel_context *ce = engine->kernel_context; 784 786 struct intel_timeline *tl = ce->timeline; 785 787 struct i915_request *rq[3] = {}; 786 - unsigned long heartbeat; 787 788 int i; 788 789 789 - engine_heartbeat_disable(engine, &heartbeat); 790 + engine_heartbeat_disable(engine); 790 791 if (intel_gt_wait_for_idle(gt, HZ / 2)) { 791 792 err = -EIO; 792 793 goto out; ··· 836 839 out: 837 840 for (i = 0; i < ARRAY_SIZE(rq); i++) 838 841 i915_request_put(rq[i]); 839 - engine_heartbeat_enable(engine, heartbeat); 842 + engine_heartbeat_enable(engine); 840 843 if (err) 841 844 break; 842 845 }
+2
drivers/gpu/drm/i915/gt/selftest_workarounds.c
··· 623 623 err = -EINVAL; 624 624 goto out_unpin; 625 625 } 626 + } else { 627 + rsvd = 0; 626 628 } 627 629 628 630 expect = results[0];
+1
drivers/gpu/drm/i915/i915_irq.c
··· 3125 3125 3126 3126 val = I915_READ(GEN11_DE_HPD_IMR); 3127 3127 val &= ~hotplug_irqs; 3128 + val |= ~enabled_irqs & hotplug_irqs; 3128 3129 I915_WRITE(GEN11_DE_HPD_IMR, val); 3129 3130 POSTING_READ(GEN11_DE_HPD_IMR); 3130 3131
+42 -42
drivers/gpu/drm/i915/i915_pmu.c
··· 269 269 return IS_GEN(i915, 7); 270 270 } 271 271 272 + static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) 273 + { 274 + struct intel_engine_pmu *pmu = &engine->pmu; 275 + bool busy; 276 + u32 val; 277 + 278 + val = ENGINE_READ_FW(engine, RING_CTL); 279 + if (val == 0) /* powerwell off => engine idle */ 280 + return; 281 + 282 + if (val & RING_WAIT) 283 + add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); 284 + if (val & RING_WAIT_SEMAPHORE) 285 + add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); 286 + 287 + /* No need to sample when busy stats are supported. */ 288 + if (intel_engine_supports_stats(engine)) 289 + return; 290 + 291 + /* 292 + * While waiting on a semaphore or event, MI_MODE reports the 293 + * ring as idle. However, previously using the seqno, and with 294 + * execlists sampling, we account for the ring waiting as the 295 + * engine being busy. Therefore, we record the sample as being 296 + * busy if either waiting or !idle. 297 + */ 298 + busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); 299 + if (!busy) { 300 + val = ENGINE_READ_FW(engine, RING_MI_MODE); 301 + busy = !(val & MODE_IDLE); 302 + } 303 + if (busy) 304 + add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); 305 + } 306 + 272 307 static void 273 308 engines_sample(struct intel_gt *gt, unsigned int period_ns) 274 309 { 275 310 struct drm_i915_private *i915 = gt->i915; 276 311 struct intel_engine_cs *engine; 277 312 enum intel_engine_id id; 313 + unsigned long flags; 278 314 279 315 if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) 280 316 return; ··· 319 283 return; 320 284 321 285 for_each_engine(engine, gt, id) { 322 - struct intel_engine_pmu *pmu = &engine->pmu; 323 - spinlock_t *mmio_lock; 324 - unsigned long flags; 325 - bool busy; 326 - u32 val; 327 - 328 286 if (!intel_engine_pm_get_if_awake(engine)) 329 287 continue; 330 288 331 - mmio_lock = NULL; 332 - if (exclusive_mmio_access(i915)) 333 - mmio_lock = &engine->uncore->lock; 334 - 335 - if (unlikely(mmio_lock)) 336 - spin_lock_irqsave(mmio_lock, flags); 337 - 338 - val = ENGINE_READ_FW(engine, RING_CTL); 339 - if (val == 0) /* powerwell off => engine idle */ 340 - goto skip; 341 - 342 - if (val & RING_WAIT) 343 - add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); 344 - if (val & RING_WAIT_SEMAPHORE) 345 - add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); 346 - 347 - /* No need to sample when busy stats are supported. */ 348 - if (intel_engine_supports_stats(engine)) 349 - goto skip; 350 - 351 - /* 352 - * While waiting on a semaphore or event, MI_MODE reports the 353 - * ring as idle. However, previously using the seqno, and with 354 - * execlists sampling, we account for the ring waiting as the 355 - * engine being busy. Therefore, we record the sample as being 356 - * busy if either waiting or !idle. 357 - */ 358 - busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); 359 - if (!busy) { 360 - val = ENGINE_READ_FW(engine, RING_MI_MODE); 361 - busy = !(val & MODE_IDLE); 289 + if (exclusive_mmio_access(i915)) { 290 + spin_lock_irqsave(&engine->uncore->lock, flags); 291 + engine_sample(engine, period_ns); 292 + spin_unlock_irqrestore(&engine->uncore->lock, flags); 293 + } else { 294 + engine_sample(engine, period_ns); 362 295 } 363 - if (busy) 364 - add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); 365 296 366 - skip: 367 - if (unlikely(mmio_lock)) 368 - spin_unlock_irqrestore(mmio_lock, flags); 369 297 intel_engine_pm_put_async(engine); 370 298 } 371 299 }
+1 -1
drivers/gpu/drm/i915/i915_priolist_types.h
··· 42 42 * active request. 43 43 */ 44 44 #define I915_PRIORITY_UNPREEMPTABLE INT_MAX 45 - #define I915_PRIORITY_BARRIER INT_MAX 45 + #define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1) 46 46 47 47 struct i915_priolist { 48 48 struct list_head requests[I915_PRIORITY_COUNT];
+1 -1
drivers/gpu/drm/i915/i915_reg.h
··· 7896 7896 7897 7897 /* GEN7 chicken */ 7898 7898 #define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010) 7899 - #define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1 << 10) | (1 << 26)) 7899 + #define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC (1 << 10) 7900 7900 #define GEN9_RHWO_OPTIMIZATION_DISABLE (1 << 14) 7901 7901 7902 7902 #define COMMON_SLICE_CHICKEN2 _MMIO(0x7014)
+2 -204
drivers/gpu/drm/i915/intel_pm.c
··· 6830 6830 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6831 6831 I915_READ(ILK_DISPLAY_CHICKEN2) | 6832 6832 ILK_ELPIN_409_SELECT); 6833 - I915_WRITE(_3D_CHICKEN2, 6834 - _3D_CHICKEN2_WM_READ_PIPELINED << 16 | 6835 - _3D_CHICKEN2_WM_READ_PIPELINED); 6836 - 6837 - /* WaDisableRenderCachePipelinedFlush:ilk */ 6838 - I915_WRITE(CACHE_MODE_0, 6839 - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 6840 - 6841 - /* WaDisable_RenderCache_OperationalFlush:ilk */ 6842 - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6843 6833 6844 6834 g4x_disable_trickle_feed(dev_priv); 6845 6835 ··· 6892 6902 I915_READ(ILK_DISPLAY_CHICKEN2) | 6893 6903 ILK_ELPIN_409_SELECT); 6894 6904 6895 - /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ 6896 - I915_WRITE(_3D_CHICKEN, 6897 - _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); 6898 - 6899 - /* WaDisable_RenderCache_OperationalFlush:snb */ 6900 - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6901 - 6902 - /* 6903 - * BSpec recoomends 8x4 when MSAA is used, 6904 - * however in practice 16x4 seems fastest. 6905 - * 6906 - * Note that PS/WM thread counts depend on the WIZ hashing 6907 - * disable bit, which we don't touch here, but it's good 6908 - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 6909 - */ 6910 - I915_WRITE(GEN6_GT_MODE, 6911 - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 6912 - 6913 - I915_WRITE(CACHE_MODE_0, 6914 - _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 6915 - 6916 6905 I915_WRITE(GEN6_UCGCTL1, 6917 6906 I915_READ(GEN6_UCGCTL1) | 6918 6907 GEN6_BLBUNIT_CLOCK_GATE_DISABLE | ··· 6913 6944 I915_WRITE(GEN6_UCGCTL2, 6914 6945 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | 6915 6946 GEN6_RCCUNIT_CLOCK_GATE_DISABLE); 6916 - 6917 - /* WaStripsFansDisableFastClipPerformanceFix:snb */ 6918 - I915_WRITE(_3D_CHICKEN3, 6919 - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); 6920 - 6921 - /* 6922 - * Bspec says: 6923 - * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and 6924 - * 3DSTATE_SF number of SF output attributes is more than 16." 6925 - */ 6926 - I915_WRITE(_3D_CHICKEN3, 6927 - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); 6928 6947 6929 6948 /* 6930 6949 * According to the spec the following bits should be ··· 6941 6984 cpt_init_clock_gating(dev_priv); 6942 6985 6943 6986 gen6_check_mch_setup(dev_priv); 6944 - } 6945 - 6946 - static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) 6947 - { 6948 - u32 reg = I915_READ(GEN7_FF_THREAD_MODE); 6949 - 6950 - /* 6951 - * WaVSThreadDispatchOverride:ivb,vlv 6952 - * 6953 - * This actually overrides the dispatch 6954 - * mode for all thread types. 6955 - */ 6956 - reg &= ~GEN7_FF_SCHED_MASK; 6957 - reg |= GEN7_FF_TS_SCHED_HW; 6958 - reg |= GEN7_FF_VS_SCHED_HW; 6959 - reg |= GEN7_FF_DS_SCHED_HW; 6960 - 6961 - I915_WRITE(GEN7_FF_THREAD_MODE, reg); 6962 6987 } 6963 6988 6964 6989 static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) ··· 7169 7230 7170 7231 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) 7171 7232 { 7172 - /* L3 caching of data atomics doesn't work -- disable it. */ 7173 - I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); 7174 - I915_WRITE(HSW_ROW_CHICKEN3, 7175 - _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); 7176 - 7177 7233 /* This is required by WaCatErrorRejectionIssue:hsw */ 7178 7234 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 7179 - I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 7180 - GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 7181 - 7182 - /* WaVSRefCountFullforceMissDisable:hsw */ 7183 - I915_WRITE(GEN7_FF_THREAD_MODE, 7184 - I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); 7185 - 7186 - /* WaDisable_RenderCache_OperationalFlush:hsw */ 7187 - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7188 - 7189 - /* enable HiZ Raw Stall Optimization */ 7190 - I915_WRITE(CACHE_MODE_0_GEN7, 7191 - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 7192 - 7193 - /* WaDisable4x2SubspanOptimization:hsw */ 7194 - I915_WRITE(CACHE_MODE_1, 7195 - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 7196 - 7197 - /* 7198 - * BSpec recommends 8x4 when MSAA is used, 7199 - * however in practice 16x4 seems fastest. 7200 - * 7201 - * Note that PS/WM thread counts depend on the WIZ hashing 7202 - * disable bit, which we don't touch here, but it's good 7203 - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 7204 - */ 7205 - I915_WRITE(GEN7_GT_MODE, 7206 - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 7207 - 7208 - /* WaSampleCChickenBitEnable:hsw */ 7209 - I915_WRITE(HALF_SLICE_CHICKEN3, 7210 - _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE)); 7235 + I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 7236 + GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 7211 7237 7212 7238 /* WaSwitchSolVfFArbitrationPriority:hsw */ 7213 7239 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); ··· 7186 7282 7187 7283 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); 7188 7284 7189 - /* WaDisableEarlyCull:ivb */ 7190 - I915_WRITE(_3D_CHICKEN3, 7191 - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 7192 - 7193 7285 /* WaDisableBackToBackFlipFix:ivb */ 7194 7286 I915_WRITE(IVB_CHICKEN3, 7195 7287 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 7196 7288 CHICKEN3_DGMG_DONE_FIX_DISABLE); 7197 7289 7198 - /* WaDisablePSDDualDispatchEnable:ivb */ 7199 - if (IS_IVB_GT1(dev_priv)) 7200 - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 7201 - _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 7202 - 7203 - /* WaDisable_RenderCache_OperationalFlush:ivb */ 7204 - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7205 - 7206 - /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ 7207 - I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, 7208 - GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); 7209 - 7210 - /* WaApplyL3ControlAndL3ChickenMode:ivb */ 7211 - I915_WRITE(GEN7_L3CNTLREG1, 7212 - GEN7_WA_FOR_GEN7_L3_CONTROL); 7213 - I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, 7214 - GEN7_WA_L3_CHICKEN_MODE); 7215 7290 if (IS_IVB_GT1(dev_priv)) 7216 7291 I915_WRITE(GEN7_ROW_CHICKEN2, 7217 7292 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); ··· 7201 7318 I915_WRITE(GEN7_ROW_CHICKEN2_GT2, 7202 7319 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 7203 7320 } 7204 - 7205 - /* WaForceL3Serialization:ivb */ 7206 - I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 7207 - ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 7208 7321 7209 7322 /* 7210 7323 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. ··· 7216 7337 7217 7338 g4x_disable_trickle_feed(dev_priv); 7218 7339 7219 - gen7_setup_fixed_func_scheduler(dev_priv); 7220 - 7221 - if (0) { /* causes HiZ corruption on ivb:gt1 */ 7222 - /* enable HiZ Raw Stall Optimization */ 7223 - I915_WRITE(CACHE_MODE_0_GEN7, 7224 - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 7225 - } 7226 - 7227 - /* WaDisable4x2SubspanOptimization:ivb */ 7228 - I915_WRITE(CACHE_MODE_1, 7229 - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 7230 - 7231 - /* 7232 - * BSpec recommends 8x4 when MSAA is used, 7233 - * however in practice 16x4 seems fastest. 7234 - * 7235 - * Note that PS/WM thread counts depend on the WIZ hashing 7236 - * disable bit, which we don't touch here, but it's good 7237 - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 7238 - */ 7239 - I915_WRITE(GEN7_GT_MODE, 7240 - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 7241 - 7242 7340 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); 7243 7341 snpcr &= ~GEN6_MBC_SNPCR_MASK; 7244 7342 snpcr |= GEN6_MBC_SNPCR_MED; ··· 7229 7373 7230 7374 static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) 7231 7375 { 7232 - /* WaDisableEarlyCull:vlv */ 7233 - I915_WRITE(_3D_CHICKEN3, 7234 - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 7235 - 7236 7376 /* WaDisableBackToBackFlipFix:vlv */ 7237 7377 I915_WRITE(IVB_CHICKEN3, 7238 7378 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 7239 7379 CHICKEN3_DGMG_DONE_FIX_DISABLE); 7240 - 7241 - /* WaPsdDispatchEnable:vlv */ 7242 - /* WaDisablePSDDualDispatchEnable:vlv */ 7243 - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 7244 - _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | 7245 - GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 7246 - 7247 - /* WaDisable_RenderCache_OperationalFlush:vlv */ 7248 - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7249 - 7250 - /* WaForceL3Serialization:vlv */ 7251 - I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 7252 - ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 7253 7380 7254 7381 /* WaDisableDopClockGating:vlv */ 7255 7382 I915_WRITE(GEN7_ROW_CHICKEN2, ··· 7242 7403 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 7243 7404 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 7244 7405 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 7245 - 7246 - gen7_setup_fixed_func_scheduler(dev_priv); 7247 7406 7248 7407 /* 7249 7408 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. ··· 7255 7418 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */ 7256 7419 I915_WRITE(GEN7_UCGCTL4, 7257 7420 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); 7258 - 7259 - /* 7260 - * BSpec says this must be set, even though 7261 - * WaDisable4x2SubspanOptimization isn't listed for VLV. 7262 - */ 7263 - I915_WRITE(CACHE_MODE_1, 7264 - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 7265 - 7266 - /* 7267 - * BSpec recommends 8x4 when MSAA is used, 7268 - * however in practice 16x4 seems fastest. 7269 - * 7270 - * Note that PS/WM thread counts depend on the WIZ hashing 7271 - * disable bit, which we don't touch here, but it's good 7272 - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 7273 - */ 7274 - I915_WRITE(GEN7_GT_MODE, 7275 - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 7276 - 7277 - /* 7278 - * WaIncreaseL3CreditsForVLVB0:vlv 7279 - * This is the hardware default actually. 7280 - */ 7281 - I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); 7282 7421 7283 7422 /* 7284 7423 * WaDisableVLVClockGating_VBIIssue:vlv ··· 7308 7495 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; 7309 7496 I915_WRITE(DSPCLK_GATE_D, dspclk_gate); 7310 7497 7311 - /* WaDisableRenderCachePipelinedFlush */ 7312 - I915_WRITE(CACHE_MODE_0, 7313 - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 7314 - 7315 - /* WaDisable_RenderCache_OperationalFlush:g4x */ 7316 - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7317 - 7318 7498 g4x_disable_trickle_feed(dev_priv); 7319 7499 } 7320 7500 ··· 7323 7517 intel_uncore_write(uncore, 7324 7518 MI_ARB_STATE, 7325 7519 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7326 - 7327 - /* WaDisable_RenderCache_OperationalFlush:gen4 */ 7328 - intel_uncore_write(uncore, 7329 - CACHE_MODE_0, 7330 - _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7331 7520 } 7332 7521 7333 7522 static void i965g_init_clock_gating(struct drm_i915_private *dev_priv) ··· 7335 7534 I915_WRITE(RENCLK_GATE_D2, 0); 7336 7535 I915_WRITE(MI_ARB_STATE, 7337 7536 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7338 - 7339 - /* WaDisable_RenderCache_OperationalFlush:gen4 */ 7340 - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7341 7537 } 7342 7538 7343 7539 static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
+1
drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
··· 21 21 selftest(scatterlist, scatterlist_mock_selftests) 22 22 selftest(syncmap, i915_syncmap_mock_selftests) 23 23 selftest(uncore, intel_uncore_mock_selftests) 24 + selftest(ring, intel_ring_mock_selftests) 24 25 selftest(engine, intel_engine_cs_mock_selftests) 25 26 selftest(timelines, intel_timeline_mock_selftests) 26 27 selftest(requests, i915_request_mock_selftests)