Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915/gtt/xehpsdv: move scratch page to system memory

On some platforms the hw has dropped support for 4K GTT pages when
dealing with LMEM, and due to the design of 64K GTT pages in the hw, we
can only mark the *entire* page-table as operating in 64K GTT mode,
since the enable bit is still on the pde, and not the pte. And since we
we still need to allow 4K GTT pages for SMEM objects, we can't have a
"normal" 4K page-table with scratch pointing to LMEM, since that's
undefined from the hw pov. The simplest solution is to just move the 64K
scratch page to SMEM on such platforms and call it a day, since that
should work for all configurations.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
Reviewed-by: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211208141613.7251-4-ramalingam.c@intel.com

authored by

Matthew Auld and committed by
Ramalingam C
fef53be0 ca921624

+30 -3
+1
drivers/gpu/drm/i915/gt/gen6_ppgtt.c
··· 454 454 ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; 455 455 456 456 ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma; 457 + ppgtt->base.vm.alloc_scratch_dma = alloc_pt_dma; 457 458 ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; 458 459 459 460 err = gen6_ppgtt_init_scratch(ppgtt);
+21 -2
drivers/gpu/drm/i915/gt/gen8_ppgtt.c
··· 776 776 */ 777 777 ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12); 778 778 779 - if (HAS_LMEM(gt->i915)) 779 + if (HAS_LMEM(gt->i915)) { 780 780 ppgtt->vm.alloc_pt_dma = alloc_pt_lmem; 781 - else 781 + 782 + /* 783 + * On some platforms the hw has dropped support for 4K GTT pages 784 + * when dealing with LMEM, and due to the design of 64K GTT 785 + * pages in the hw, we can only mark the *entire* page-table as 786 + * operating in 64K GTT mode, since the enable bit is still on 787 + * the pde, and not the pte. And since we still need to allow 788 + * 4K GTT pages for SMEM objects, we can't have a "normal" 4K 789 + * page-table with scratch pointing to LMEM, since that's 790 + * undefined from the hw pov. The simplest solution is to just 791 + * move the 64K scratch page to SMEM on such platforms and call 792 + * it a day, since that should work for all configurations. 793 + */ 794 + if (HAS_64K_PAGES(gt->i915)) 795 + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; 796 + else 797 + ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem; 798 + } else { 782 799 ppgtt->vm.alloc_pt_dma = alloc_pt_dma; 800 + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; 801 + } 783 802 784 803 err = gen8_init_scratch(&ppgtt->vm); 785 804 if (err)
+3
drivers/gpu/drm/i915/gt/intel_ggtt.c
··· 924 924 size = gen8_get_total_gtt_size(snb_gmch_ctl); 925 925 926 926 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 927 + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; 927 928 ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; 928 929 929 930 ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; ··· 1078 1077 ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; 1079 1078 1080 1079 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1080 + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; 1081 1081 1082 1082 ggtt->vm.clear_range = nop_clear_range; 1083 1083 if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) ··· 1131 1129 (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); 1132 1130 1133 1131 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1132 + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; 1134 1133 1135 1134 if (needs_idle_maps(i915)) { 1136 1135 drm_notice(&i915->drm,
+1 -1
drivers/gpu/drm/i915/gt/intel_gtt.c
··· 301 301 do { 302 302 struct drm_i915_gem_object *obj; 303 303 304 - obj = vm->alloc_pt_dma(vm, size); 304 + obj = vm->alloc_scratch_dma(vm, size); 305 305 if (IS_ERR(obj)) 306 306 goto skip; 307 307
+2
drivers/gpu/drm/i915/gt/intel_gtt.h
··· 268 268 269 269 struct drm_i915_gem_object * 270 270 (*alloc_pt_dma)(struct i915_address_space *vm, int sz); 271 + struct drm_i915_gem_object * 272 + (*alloc_scratch_dma)(struct i915_address_space *vm, int sz); 271 273 272 274 u64 (*pte_encode)(dma_addr_t addr, 273 275 enum i915_cache_level level,
+2
drivers/gpu/drm/i915/selftests/mock_gtt.c
··· 78 78 i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); 79 79 80 80 ppgtt->vm.alloc_pt_dma = alloc_pt_dma; 81 + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; 81 82 82 83 ppgtt->vm.clear_range = mock_clear_range; 83 84 ppgtt->vm.insert_page = mock_insert_page; ··· 119 118 ggtt->vm.total = 4096 * PAGE_SIZE; 120 119 121 120 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 121 + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; 122 122 123 123 ggtt->vm.clear_range = mock_clear_range; 124 124 ggtt->vm.insert_page = mock_insert_page;