Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915: Prevent machine hang from Broxton's vtd w/a and error capture

Since capturing the error state requires fiddling around with the GGTT
to read arbitrary buffers and is itself run under stop_machine(), it
deadlocks the machine (effectively a hard hang) when run in conjunction
with Broxton's VTd workaround to serialize GGTT access.

v2: Store the ERR_PTR in first_error so that the error can be reported
to the user via sysfs.
v3: Mention the quirk in dmesg (using info as per usual)

Fixes: 0ef34ad6222a ("drm/i915: Serialize GTT/Aperture accesses on BXT")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: John Harrison <john.C.Harrison@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181102161232.17742-5-chris@chris-wilson.co.uk
(cherry picked from commit fb6f0b64e455b207a636346588e65bf9598d30eb)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

authored by

Chris Wilson and committed by
Joonas Lahtinen
8830f26b 21556350

+26 -2
+5
drivers/gpu/drm/i915/i915_gem_gtt.c
··· 3413 3413 ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; 3414 3414 if (ggtt->vm.clear_range != nop_clear_range) 3415 3415 ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; 3416 + 3417 + /* Prevent recursively calling stop_machine() and deadlocks. */ 3418 + dev_info(dev_priv->drm.dev, 3419 + "Disabling error capture for VT-d workaround\n"); 3420 + i915_disable_error_state(dev_priv, -ENODEV); 3416 3421 } 3417 3422 3418 3423 ggtt->invalidate = gen6_ggtt_invalidate;
+14 -1
drivers/gpu/drm/i915/i915_gpu_error.c
··· 648 648 return 0; 649 649 } 650 650 651 + if (IS_ERR(error)) 652 + return PTR_ERR(error); 653 + 651 654 if (*error->error_msg) 652 655 err_printf(m, "%s\n", error->error_msg); 653 656 err_printf(m, "Kernel: " UTS_RELEASE "\n"); ··· 1862 1859 error = i915_capture_gpu_state(i915); 1863 1860 if (!error) { 1864 1861 DRM_DEBUG_DRIVER("out of memory, not capturing error state\n"); 1862 + i915_disable_error_state(i915, -ENOMEM); 1865 1863 return; 1866 1864 } 1867 1865 ··· 1918 1914 i915->gpu_error.first_error = NULL; 1919 1915 spin_unlock_irq(&i915->gpu_error.lock); 1920 1916 1921 - i915_gpu_state_put(error); 1917 + if (!IS_ERR(error)) 1918 + i915_gpu_state_put(error); 1919 + } 1920 + 1921 + void i915_disable_error_state(struct drm_i915_private *i915, int err) 1922 + { 1923 + spin_lock_irq(&i915->gpu_error.lock); 1924 + if (!i915->gpu_error.first_error) 1925 + i915->gpu_error.first_error = ERR_PTR(err); 1926 + spin_unlock_irq(&i915->gpu_error.lock); 1922 1927 }
+7 -1
drivers/gpu/drm/i915/i915_gpu_error.h
··· 343 343 344 344 struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); 345 345 void i915_reset_error_state(struct drm_i915_private *i915); 346 + void i915_disable_error_state(struct drm_i915_private *i915, int err); 346 347 347 348 #else 348 349 ··· 356 355 static inline struct i915_gpu_state * 357 356 i915_first_error_state(struct drm_i915_private *i915) 358 357 { 359 - return NULL; 358 + return ERR_PTR(-ENODEV); 360 359 } 361 360 362 361 static inline void i915_reset_error_state(struct drm_i915_private *i915) 362 + { 363 + } 364 + 365 + static inline void i915_disable_error_state(struct drm_i915_private *i915, 366 + int err) 363 367 { 364 368 } 365 369