Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915: Split obj->cache_coherent to track r/w

Another month, another story in the cache coherency saga. This time, we
come to the realisation that i915_gem_object_is_coherent() has been
reporting whether we can read from the target without requiring a cache
invalidate; but we were using it in places for testing whether we could
write into the object without requiring a cache flush. So split the
tracking into two, one to decide before reads, one after writes.

See commit e27ab73d17ef ("drm/i915: Mark CPU cache as dirty on every
transition for CPU writes") for the previous entry in this saga.

v2: Be verbose
v3: Remove unused function (i915_gem_object_is_coherent)
v4: Fix inverted coherency check prior to execbuf (from v2)
v5: Add comment for nasty code where we are optimising on gcc's behalf.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101109
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101555
Testcase: igt/kms_mmap_write_crc
Testcase: igt/kms_pwrite_crc
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Tested-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170811111116.10373-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

+96 -32
+1
drivers/gpu/drm/i915/Makefile
··· 39 39 i915_gem_gtt.o \ 40 40 i915_gem_internal.o \ 41 41 i915_gem.o \ 42 + i915_gem_object.o \ 42 43 i915_gem_render_state.o \ 43 44 i915_gem_request.o \ 44 45 i915_gem_shrinker.o \
-6
drivers/gpu/drm/i915/i915_drv.h
··· 4322 4322 unsigned long addr, unsigned long pfn, unsigned long size, 4323 4323 struct io_mapping *iomap); 4324 4324 4325 - static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj) 4326 - { 4327 - return (obj->cache_level != I915_CACHE_NONE || 4328 - HAS_LLC(to_i915(obj->base.dev))); 4329 - } 4330 - 4331 4325 #endif
+13 -12
drivers/gpu/drm/i915/i915_gem.c
··· 52 52 if (obj->cache_dirty) 53 53 return false; 54 54 55 - if (!obj->cache_coherent) 55 + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 56 56 return true; 57 57 58 58 return obj->pin_display; ··· 253 253 254 254 if (needs_clflush && 255 255 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && 256 - !obj->cache_coherent) 256 + !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 257 257 drm_clflush_sg(pages); 258 258 259 259 __start_cpu_write(obj); ··· 800 800 if (ret) 801 801 return ret; 802 802 803 - if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) { 803 + if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 804 + !static_cpu_has(X86_FEATURE_CLFLUSH)) { 804 805 ret = i915_gem_object_set_to_cpu_domain(obj, false); 805 806 if (ret) 806 807 goto err_unpin; ··· 853 852 if (ret) 854 853 return ret; 855 854 856 - if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) { 855 + if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 856 + !static_cpu_has(X86_FEATURE_CLFLUSH)) { 857 857 ret = i915_gem_object_set_to_cpu_domain(obj, true); 858 858 if (ret) 859 859 goto err_unpin; ··· 3675 3673 3676 3674 list_for_each_entry(vma, &obj->vma_list, obj_link) 3677 3675 vma->node.color = cache_level; 3678 - obj->cache_level = cache_level; 3679 - obj->cache_coherent = i915_gem_object_is_coherent(obj); 3676 + i915_gem_object_set_cache_coherency(obj, cache_level); 3680 3677 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 3681 3678 3682 3679 return 0; ··· 4280 4279 { 4281 4280 struct drm_i915_gem_object *obj; 4282 4281 struct address_space *mapping; 4282 + unsigned int cache_level; 4283 4283 gfp_t mask; 4284 4284 int ret; 4285 4285 ··· 4319 4317 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4320 4318 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4321 4319 4322 - if (HAS_LLC(dev_priv)) { 4320 + if (HAS_LLC(dev_priv)) 4323 4321 /* On some devices, we can have the GPU use the LLC (the CPU 4324 4322 * cache) for about a 10% performance improvement 4325 4323 * compared to uncached. Graphics requests other than ··· 4332 4330 * However, we maintain the display planes as UC, and so 4333 4331 * need to rebind when first used as such. 4334 4332 */ 4335 - obj->cache_level = I915_CACHE_LLC; 4336 - } else 4337 - obj->cache_level = I915_CACHE_NONE; 4333 + cache_level = I915_CACHE_LLC; 4334 + else 4335 + cache_level = I915_CACHE_NONE; 4338 4336 4339 - obj->cache_coherent = i915_gem_object_is_coherent(obj); 4340 - obj->cache_dirty = !obj->cache_coherent; 4337 + i915_gem_object_set_cache_coherency(obj, cache_level); 4341 4338 4342 4339 trace_i915_gem_object_create(obj); 4343 4340
+2 -1
drivers/gpu/drm/i915/i915_gem_clflush.c
··· 139 139 * snooping behaviour occurs naturally as the result of our domain 140 140 * tracking. 141 141 */ 142 - if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent) 142 + if (!(flags & I915_CLFLUSH_FORCE) && 143 + obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) 143 144 return false; 144 145 145 146 trace_i915_gem_object_clflush(obj);
+13 -1
drivers/gpu/drm/i915/i915_gem_execbuffer.c
··· 1842 1842 eb->request->capture_list = capture; 1843 1843 } 1844 1844 1845 - if (unlikely(obj->cache_dirty && !obj->cache_coherent)) { 1845 + /* 1846 + * If the GPU is not _reading_ through the CPU cache, we need 1847 + * to make sure that any writes (both previous GPU writes from 1848 + * before a change in snooping levels and normal CPU writes) 1849 + * caught in that cache are flushed to main memory. 1850 + * 1851 + * We want to say 1852 + * obj->cache_dirty && 1853 + * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) 1854 + * but gcc's optimiser doesn't handle that as well and emits 1855 + * two jumps instead of one. Maybe one day... 1856 + */ 1857 + if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) { 1846 1858 if (i915_gem_clflush_object(obj, 0)) 1847 1859 entry->flags &= ~EXEC_OBJECT_ASYNC; 1848 1860 }
+4 -3
drivers/gpu/drm/i915/i915_gem_internal.c
··· 174 174 phys_addr_t size) 175 175 { 176 176 struct drm_i915_gem_object *obj; 177 + unsigned int cache_level; 177 178 178 179 GEM_BUG_ON(!size); 179 180 GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); ··· 191 190 192 191 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 193 192 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 194 - obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; 195 - obj->cache_coherent = i915_gem_object_is_coherent(obj); 196 - obj->cache_dirty = !obj->cache_coherent; 193 + 194 + cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; 195 + i915_gem_object_set_cache_coherency(obj, cache_level); 197 196 198 197 return obj; 199 198 }
+48
drivers/gpu/drm/i915/i915_gem_object.c
··· 1 + /* 2 + * Copyright © 2017 Intel Corporation 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + * 23 + */ 24 + 25 + #include "i915_drv.h" 26 + #include "i915_gem_object.h" 27 + 28 + /** 29 + * Mark up the object's coherency levels for a given cache_level 30 + * @obj: #drm_i915_gem_object 31 + * @cache_level: cache level 32 + */ 33 + void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, 34 + unsigned int cache_level) 35 + { 36 + obj->cache_level = cache_level; 37 + 38 + if (cache_level != I915_CACHE_NONE) 39 + obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | 40 + I915_BO_CACHE_COHERENT_FOR_WRITE); 41 + else if (HAS_LLC(to_i915(obj->base.dev))) 42 + obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ; 43 + else 44 + obj->cache_coherent = 0; 45 + 46 + obj->cache_dirty = 47 + !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); 48 + }
+8 -1
drivers/gpu/drm/i915/i915_gem_object.h
··· 33 33 34 34 #include <drm/i915_drm.h> 35 35 36 + #include "i915_gem_request.h" 36 37 #include "i915_selftest.h" 38 + 39 + struct drm_i915_gem_object; 37 40 38 41 struct drm_i915_gem_object_ops { 39 42 unsigned int flags; ··· 121 118 */ 122 119 unsigned long gt_ro:1; 123 120 unsigned int cache_level:3; 121 + unsigned int cache_coherent:2; 122 + #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) 123 + #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1) 124 124 unsigned int cache_dirty:1; 125 - unsigned int cache_coherent:1; 126 125 127 126 atomic_t frontbuffer_bits; 128 127 unsigned int frontbuffer_ggtt_origin; /* write once */ ··· 396 391 return engine; 397 392 } 398 393 394 + void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, 395 + unsigned int cache_level); 399 396 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); 400 397 401 398 #endif
+3 -2
drivers/gpu/drm/i915/i915_gem_stolen.c
··· 580 580 struct drm_mm_node *stolen) 581 581 { 582 582 struct drm_i915_gem_object *obj; 583 + unsigned int cache_level; 583 584 584 585 obj = i915_gem_object_alloc(dev_priv); 585 586 if (obj == NULL) ··· 591 590 592 591 obj->stolen = stolen; 593 592 obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; 594 - obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; 595 - obj->cache_coherent = true; /* assumptions! more like cache_oblivious */ 593 + cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; 594 + i915_gem_object_set_cache_coherency(obj, cache_level); 596 595 597 596 if (i915_gem_object_pin_pages(obj)) 598 597 goto cleanup;
+1 -3
drivers/gpu/drm/i915/i915_gem_userptr.c
··· 804 804 i915_gem_object_init(obj, &i915_gem_userptr_ops); 805 805 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 806 806 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 807 - obj->cache_level = I915_CACHE_LLC; 808 - obj->cache_coherent = i915_gem_object_is_coherent(obj); 809 - obj->cache_dirty = !obj->cache_coherent; 807 + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 810 808 811 809 obj->userptr.ptr = args->user_ptr; 812 810 obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
+3 -3
drivers/gpu/drm/i915/selftests/huge_gem_object.c
··· 111 111 dma_addr_t dma_size) 112 112 { 113 113 struct drm_i915_gem_object *obj; 114 + unsigned int cache_level; 114 115 115 116 GEM_BUG_ON(!phys_size || phys_size > dma_size); 116 117 GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE)); ··· 129 128 130 129 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 131 130 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 132 - obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; 133 - obj->cache_coherent = i915_gem_object_is_coherent(obj); 134 - obj->cache_dirty = !obj->cache_coherent; 131 + cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; 132 + i915_gem_object_set_cache_coherency(obj, cache_level); 135 133 obj->scratch = phys_size; 136 134 137 135 return obj;