Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm: move read_domains and write_domain into i915

i915 is the only driver using those fields in the drm_gem_object
structure, so they only waste memory for all other drivers.

Move the fields into drm_i915_gem_object instead and patch the i915 code
with the following sed commands:

sed -i "s/obj->base.read_domains/obj->read_domains/g" drivers/gpu/drm/i915/*.c drivers/gpu/drm/i915/*/*.c
sed -i "s/obj->base.write_domain/obj->write_domain/g" drivers/gpu/drm/i915/*.c drivers/gpu/drm/i915/*/*.c

Change is only compile tested.

v2: move fields around as suggested by Chris.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180216124338.9087-1-christian.koenig@amd.com
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

authored by

Christian König and committed by
Chris Wilson
c0a51fd0 e1039626

+74 -74
+2 -2
drivers/gpu/drm/i915/gvt/dmabuf.c
··· 162 162 info->size << PAGE_SHIFT); 163 163 i915_gem_object_init(obj, &intel_vgpu_gem_ops); 164 164 165 - obj->base.read_domains = I915_GEM_DOMAIN_GTT; 166 - obj->base.write_domain = 0; 165 + obj->read_domains = I915_GEM_DOMAIN_GTT; 166 + obj->write_domain = 0; 167 167 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { 168 168 unsigned int tiling_mode = 0; 169 169 unsigned int stride = 0;
+2 -2
drivers/gpu/drm/i915/i915_debugfs.c
··· 150 150 get_global_flag(obj), 151 151 get_pin_mapped_flag(obj), 152 152 obj->base.size / 1024, 153 - obj->base.read_domains, 154 - obj->base.write_domain, 153 + obj->read_domains, 154 + obj->write_domain, 155 155 i915_cache_level_str(dev_priv, obj->cache_level), 156 156 obj->mm.dirty ? " dirty" : "", 157 157 obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
+30 -30
drivers/gpu/drm/i915/i915_gem.c
··· 240 240 241 241 static void __start_cpu_write(struct drm_i915_gem_object *obj) 242 242 { 243 - obj->base.read_domains = I915_GEM_DOMAIN_CPU; 244 - obj->base.write_domain = I915_GEM_DOMAIN_CPU; 243 + obj->read_domains = I915_GEM_DOMAIN_CPU; 244 + obj->write_domain = I915_GEM_DOMAIN_CPU; 245 245 if (cpu_write_needs_clflush(obj)) 246 246 obj->cache_dirty = true; 247 247 } ··· 257 257 obj->mm.dirty = false; 258 258 259 259 if (needs_clflush && 260 - (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && 260 + (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 261 261 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 262 262 drm_clflush_sg(pages); 263 263 ··· 703 703 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 704 704 struct i915_vma *vma; 705 705 706 - if (!(obj->base.write_domain & flush_domains)) 706 + if (!(obj->write_domain & flush_domains)) 707 707 return; 708 708 709 - switch (obj->base.write_domain) { 709 + switch (obj->write_domain) { 710 710 case I915_GEM_DOMAIN_GTT: 711 711 i915_gem_flush_ggtt_writes(dev_priv); 712 712 ··· 731 731 break; 732 732 } 733 733 734 - obj->base.write_domain = 0; 734 + obj->write_domain = 0; 735 735 } 736 736 737 737 static inline int ··· 831 831 * anyway again before the next pread happens. 832 832 */ 833 833 if (!obj->cache_dirty && 834 - !(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) 834 + !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 835 835 *needs_clflush = CLFLUSH_BEFORE; 836 836 837 837 out: ··· 890 890 * Same trick applies to invalidate partially written 891 891 * cachelines read before writing. 892 892 */ 893 - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) 893 + if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 894 894 *needs_clflush |= CLFLUSH_BEFORE; 895 895 } 896 896 ··· 2391 2391 * wasn't in the GTT, there shouldn't be any way it could have been in 2392 2392 * a GPU cache 2393 2393 */ 2394 - GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2395 - GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2394 + GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2395 + GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2396 2396 2397 2397 st = kmalloc(sizeof(*st), GFP_KERNEL); 2398 2398 if (st == NULL) ··· 3703 3703 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3704 3704 if (obj->cache_dirty) 3705 3705 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3706 - obj->base.write_domain = 0; 3706 + obj->write_domain = 0; 3707 3707 } 3708 3708 3709 3709 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) ··· 3740 3740 if (ret) 3741 3741 return ret; 3742 3742 3743 - if (obj->base.write_domain == I915_GEM_DOMAIN_WC) 3743 + if (obj->write_domain == I915_GEM_DOMAIN_WC) 3744 3744 return 0; 3745 3745 3746 3746 /* Flush and acquire obj->pages so that we are coherent through ··· 3761 3761 * coherent writes from the GPU, by effectively invalidating the 3762 3762 * WC domain upon first access. 3763 3763 */ 3764 - if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0) 3764 + if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 3765 3765 mb(); 3766 3766 3767 3767 /* It should now be out of any other write domains, and we can update 3768 3768 * the domain values for our changes. 3769 3769 */ 3770 - GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3771 - obj->base.read_domains |= I915_GEM_DOMAIN_WC; 3770 + GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3771 + obj->read_domains |= I915_GEM_DOMAIN_WC; 3772 3772 if (write) { 3773 - obj->base.read_domains = I915_GEM_DOMAIN_WC; 3774 - obj->base.write_domain = I915_GEM_DOMAIN_WC; 3773 + obj->read_domains = I915_GEM_DOMAIN_WC; 3774 + obj->write_domain = I915_GEM_DOMAIN_WC; 3775 3775 obj->mm.dirty = true; 3776 3776 } 3777 3777 ··· 3803 3803 if (ret) 3804 3804 return ret; 3805 3805 3806 - if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3806 + if (obj->write_domain == I915_GEM_DOMAIN_GTT) 3807 3807 return 0; 3808 3808 3809 3809 /* Flush and acquire obj->pages so that we are coherent through ··· 3824 3824 * coherent writes from the GPU, by effectively invalidating the 3825 3825 * GTT domain upon first access. 3826 3826 */ 3827 - if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3827 + if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 3828 3828 mb(); 3829 3829 3830 3830 /* It should now be out of any other write domains, and we can update 3831 3831 * the domain values for our changes. 3832 3832 */ 3833 - GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3834 - obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3833 + GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3834 + obj->read_domains |= I915_GEM_DOMAIN_GTT; 3835 3835 if (write) { 3836 - obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3837 - obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3836 + obj->read_domains = I915_GEM_DOMAIN_GTT; 3837 + obj->write_domain = I915_GEM_DOMAIN_GTT; 3838 3838 obj->mm.dirty = true; 3839 3839 } 3840 3840 ··· 4146 4146 /* It should now be out of any other write domains, and we can update 4147 4147 * the domain values for our changes. 4148 4148 */ 4149 - obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4149 + obj->read_domains |= I915_GEM_DOMAIN_GTT; 4150 4150 4151 4151 return vma; 4152 4152 ··· 4199 4199 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 4200 4200 4201 4201 /* Flush the CPU cache if it's still invalid. */ 4202 - if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4202 + if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4203 4203 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 4204 - obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4204 + obj->read_domains |= I915_GEM_DOMAIN_CPU; 4205 4205 } 4206 4206 4207 4207 /* It should now be out of any other write domains, and we can update 4208 4208 * the domain values for our changes. 4209 4209 */ 4210 - GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 4210 + GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 4211 4211 4212 4212 /* If we're writing through the CPU, then the GPU read domains will 4213 4213 * need to be invalidated at next use. ··· 4643 4643 4644 4644 i915_gem_object_init(obj, &i915_gem_object_ops); 4645 4645 4646 - obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4647 - obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4646 + obj->write_domain = I915_GEM_DOMAIN_CPU; 4647 + obj->read_domains = I915_GEM_DOMAIN_CPU; 4648 4648 4649 4649 if (HAS_LLC(dev_priv)) 4650 4650 /* On some devices, we can have the GPU use the LLC (the CPU ··· 5702 5702 if (IS_ERR(obj)) 5703 5703 return obj; 5704 5704 5705 - GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); 5705 + GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 5706 5706 5707 5707 file = obj->base.filp; 5708 5708 offset = 0;
+1 -1
drivers/gpu/drm/i915/i915_gem_clflush.c
··· 177 177 } else if (obj->mm.pages) { 178 178 __i915_do_clflush(obj); 179 179 } else { 180 - GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); 180 + GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 181 181 } 182 182 183 183 obj->cache_dirty = false;
+2 -2
drivers/gpu/drm/i915/i915_gem_dmabuf.c
··· 330 330 * write-combined buffer or a delay through the chipset for GTT 331 331 * writes that do require us to treat GTT as a separate cache domain.) 332 332 */ 333 - obj->base.read_domains = I915_GEM_DOMAIN_GTT; 334 - obj->base.write_domain = 0; 333 + obj->read_domains = I915_GEM_DOMAIN_GTT; 334 + obj->write_domain = 0; 335 335 336 336 return &obj->base; 337 337
+5 -5
drivers/gpu/drm/i915/i915_gem_execbuffer.c
··· 1073 1073 u32 *cmd; 1074 1074 int err; 1075 1075 1076 - GEM_BUG_ON(vma->obj->base.write_domain & I915_GEM_DOMAIN_CPU); 1076 + GEM_BUG_ON(vma->obj->write_domain & I915_GEM_DOMAIN_CPU); 1077 1077 1078 1078 obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE); 1079 1079 if (IS_ERR(obj)) ··· 1861 1861 i915_gem_active_set(&vma->last_read[idx], req); 1862 1862 list_move_tail(&vma->vm_link, &vma->vm->active_list); 1863 1863 1864 - obj->base.write_domain = 0; 1864 + obj->write_domain = 0; 1865 1865 if (flags & EXEC_OBJECT_WRITE) { 1866 - obj->base.write_domain = I915_GEM_DOMAIN_RENDER; 1866 + obj->write_domain = I915_GEM_DOMAIN_RENDER; 1867 1867 1868 1868 if (intel_fb_obj_invalidate(obj, ORIGIN_CS)) 1869 1869 i915_gem_active_set(&obj->frontbuffer_write, req); 1870 1870 1871 - obj->base.read_domains = 0; 1871 + obj->read_domains = 0; 1872 1872 } 1873 - obj->base.read_domains |= I915_GEM_GPU_DOMAINS; 1873 + obj->read_domains |= I915_GEM_GPU_DOMAINS; 1874 1874 1875 1875 if (flags & EXEC_OBJECT_NEEDS_FENCE) 1876 1876 i915_gem_active_set(&vma->last_fence, req);
+2 -2
drivers/gpu/drm/i915/i915_gem_internal.c
··· 201 201 drm_gem_private_object_init(&i915->drm, &obj->base, size); 202 202 i915_gem_object_init(obj, &i915_gem_object_internal_ops); 203 203 204 - obj->base.read_domains = I915_GEM_DOMAIN_CPU; 205 - obj->base.write_domain = I915_GEM_DOMAIN_CPU; 204 + obj->read_domains = I915_GEM_DOMAIN_CPU; 205 + obj->write_domain = I915_GEM_DOMAIN_CPU; 206 206 207 207 cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; 208 208 i915_gem_object_set_cache_coherency(obj, cache_level);
+15
drivers/gpu/drm/i915/i915_gem_object.h
··· 148 148 #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1) 149 149 unsigned int cache_dirty:1; 150 150 151 + /** 152 + * @read_domains: Read memory domains. 153 + * 154 + * These monitor which caches contain read/write data related to the 155 + * object. When transitioning from one set of domains to another, 156 + * the driver is called to ensure that caches are suitably flushed and 157 + * invalidated. 158 + */ 159 + u16 read_domains; 160 + 161 + /** 162 + * @write_domain: Corresponding unique write memory domain. 163 + */ 164 + u16 write_domain; 165 + 151 166 atomic_t frontbuffer_bits; 152 167 unsigned int frontbuffer_ggtt_origin; /* write once */ 153 168 struct i915_gem_active frontbuffer_write;
+1 -1
drivers/gpu/drm/i915/i915_gem_stolen.c
··· 516 516 i915_gem_object_init(obj, &i915_gem_object_stolen_ops); 517 517 518 518 obj->stolen = stolen; 519 - obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; 519 + obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; 520 520 cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; 521 521 i915_gem_object_set_cache_coherency(obj, cache_level); 522 522
+2 -2
drivers/gpu/drm/i915/i915_gem_userptr.c
··· 798 798 799 799 drm_gem_private_object_init(dev, &obj->base, args->user_size); 800 800 i915_gem_object_init(obj, &i915_gem_userptr_ops); 801 - obj->base.read_domains = I915_GEM_DOMAIN_CPU; 802 - obj->base.write_domain = I915_GEM_DOMAIN_CPU; 801 + obj->read_domains = I915_GEM_DOMAIN_CPU; 802 + obj->write_domain = I915_GEM_DOMAIN_CPU; 803 803 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 804 804 805 805 obj->userptr.ptr = args->user_ptr;
+2 -2
drivers/gpu/drm/i915/i915_gpu_error.c
··· 1021 1021 err->engine = __active_get_engine_id(&obj->frontbuffer_write); 1022 1022 1023 1023 err->gtt_offset = vma->node.start; 1024 - err->read_domains = obj->base.read_domains; 1025 - err->write_domain = obj->base.write_domain; 1024 + err->read_domains = obj->read_domains; 1025 + err->write_domain = obj->write_domain; 1026 1026 err->fence_reg = vma->fence ? vma->fence->id : -1; 1027 1027 err->tiling = i915_gem_object_get_tiling(obj); 1028 1028 err->dirty = obj->mm.dirty;
+2 -2
drivers/gpu/drm/i915/selftests/huge_gem_object.c
··· 129 129 drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); 130 130 i915_gem_object_init(obj, &huge_ops); 131 131 132 - obj->base.read_domains = I915_GEM_DOMAIN_CPU; 133 - obj->base.write_domain = I915_GEM_DOMAIN_CPU; 132 + obj->read_domains = I915_GEM_DOMAIN_CPU; 133 + obj->write_domain = I915_GEM_DOMAIN_CPU; 134 134 cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; 135 135 i915_gem_object_set_cache_coherency(obj, cache_level); 136 136 obj->scratch = phys_size;
+4 -4
drivers/gpu/drm/i915/selftests/huge_pages.c
··· 178 178 drm_gem_private_object_init(&i915->drm, &obj->base, size); 179 179 i915_gem_object_init(obj, &huge_page_ops); 180 180 181 - obj->base.write_domain = I915_GEM_DOMAIN_CPU; 182 - obj->base.read_domains = I915_GEM_DOMAIN_CPU; 181 + obj->write_domain = I915_GEM_DOMAIN_CPU; 182 + obj->read_domains = I915_GEM_DOMAIN_CPU; 183 183 obj->cache_level = I915_CACHE_NONE; 184 184 185 185 obj->mm.page_mask = page_mask; ··· 329 329 else 330 330 i915_gem_object_init(obj, &fake_ops); 331 331 332 - obj->base.write_domain = I915_GEM_DOMAIN_CPU; 333 - obj->base.read_domains = I915_GEM_DOMAIN_CPU; 332 + obj->write_domain = I915_GEM_DOMAIN_CPU; 333 + obj->read_domains = I915_GEM_DOMAIN_CPU; 334 334 obj->cache_level = I915_CACHE_NONE; 335 335 336 336 return obj;
+2 -2
drivers/gpu/drm/i915/selftests/i915_gem_context.c
··· 215 215 } 216 216 217 217 i915_gem_obj_finish_shmem_access(obj); 218 - obj->base.read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 219 - obj->base.write_domain = 0; 218 + obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 219 + obj->write_domain = 0; 220 220 return 0; 221 221 } 222 222
+2 -2
drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
··· 113 113 drm_gem_private_object_init(&i915->drm, &obj->base, size); 114 114 i915_gem_object_init(obj, &fake_ops); 115 115 116 - obj->base.write_domain = I915_GEM_DOMAIN_CPU; 117 - obj->base.read_domains = I915_GEM_DOMAIN_CPU; 116 + obj->write_domain = I915_GEM_DOMAIN_CPU; 117 + obj->read_domains = I915_GEM_DOMAIN_CPU; 118 118 obj->cache_level = I915_CACHE_NONE; 119 119 120 120 /* Preallocate the "backing storage" */
-15
include/drm/drm_gem.h
··· 116 116 int name; 117 117 118 118 /** 119 - * @read_domains: 120 - * 121 - * Read memory domains. These monitor which caches contain read/write data 122 - * related to the object. When transitioning from one set of domains 123 - * to another, the driver is called to ensure that caches are suitably 124 - * flushed and invalidated. 125 - */ 126 - uint32_t read_domains; 127 - 128 - /** 129 - * @write_domain: Corresponding unique write memory domain. 130 - */ 131 - uint32_t write_domain; 132 - 133 - /** 134 119 * @dma_buf: 135 120 * 136 121 * dma-buf associated with this GEM object.