Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Revert "drm/xe/lnl: Offload system clear page activity to GPU"

This optimization relied on having to clear CCS on allocations.
If there is no need to clear CCS on allocations then this would mostly
help in reducing CPU utilization.

Revert this patch at this moment because of:
1 Currently Xe can't do clear on free and using a invalid ttm flag,
TTM_TT_FLAG_CLEARED_ON_FREE which could poison global ttm pool on
multi-device setup.

2 Also for LNL CPU:WB doesn't require clearing CCS as such BO will
not be allowed to bind with compression PTE. Subsequent patch will
disable clearing CCS for CPU:WB BOs for LNL.

This reverts commit 23683061805be368c8d1c7e7ff52abc470cac275.

Cc: Christian König <christian.koenig@amd.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240828083635.23601-1-nirmoy.das@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>

authored by

Nirmoy Das and committed by
Lucas De Marchi
7546a820 014125c6

+2 -38
+2 -24
drivers/gpu/drm/xe/xe_bo.c
··· 396 396 caching = ttm_uncached; 397 397 } 398 398 399 - /* 400 - * If the device can support gpu clear system pages then set proper ttm 401 - * flag. Zeroed pages are only required for ttm_bo_type_device so 402 - * unwanted data is not leaked to userspace. 403 - */ 404 - if (ttm_bo->type == ttm_bo_type_device && xe->mem.gpu_page_clear_sys) 405 - page_flags |= TTM_TT_FLAG_CLEARED_ON_FREE; 406 - 407 399 err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages); 408 400 if (err) { 409 401 kfree(tt); ··· 416 424 */ 417 425 if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) 418 426 return 0; 419 - 420 - /* Clear TTM_TT_FLAG_ZERO_ALLOC when GPU is set to clear system pages */ 421 - if (tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE) 422 - tt->page_flags &= ~TTM_TT_FLAG_ZERO_ALLOC; 423 427 424 428 err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx); 425 429 if (err) ··· 659 671 bool needs_clear; 660 672 bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) && 661 673 ttm && ttm_tt_is_populated(ttm)) ? true : false; 662 - bool clear_system_pages; 663 674 int ret = 0; 664 - 665 - /* 666 - * Clear TTM_TT_FLAG_CLEARED_ON_FREE on bo creation path when 667 - * moving to system as the bo doesn't have dma_mapping. 668 - */ 669 - if (!old_mem && ttm && !ttm_tt_is_populated(ttm)) 670 - ttm->page_flags &= ~TTM_TT_FLAG_CLEARED_ON_FREE; 671 675 672 676 /* Bo creation path, moving to system or TT. */ 673 677 if ((!old_mem && ttm) && !handle_system_ccs) { ··· 683 703 move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) : 684 704 (!mem_type_is_vram(old_mem_type) && !tt_has_data); 685 705 686 - clear_system_pages = ttm && (ttm->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE); 687 706 needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || 688 - (!ttm && ttm_bo->type == ttm_bo_type_device) || 689 - clear_system_pages; 707 + (!ttm && ttm_bo->type == ttm_bo_type_device); 690 708 691 709 if (new_mem->mem_type == XE_PL_TT) { 692 710 ret = xe_tt_map_sg(ttm); ··· 796 818 if (move_lacks_source) { 797 819 u32 flags = 0; 798 820 799 - if (mem_type_is_vram(new_mem->mem_type) || clear_system_pages) 821 + if (mem_type_is_vram(new_mem->mem_type)) 800 822 flags |= XE_MIGRATE_CLEAR_FLAG_FULL; 801 823 else if (handle_system_ccs) 802 824 flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
-2
drivers/gpu/drm/xe/xe_device_types.h
··· 339 339 struct xe_mem_region vram; 340 340 /** @mem.sys_mgr: system TTM manager */ 341 341 struct ttm_resource_manager sys_mgr; 342 - /** @mem.gpu_page_clear_sys: clear system pages offloaded to GPU */ 343 - bool gpu_page_clear_sys; 344 342 } mem; 345 343 346 344 /** @sriov: device level virtualization data */
-12
drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
··· 117 117 ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT); 118 118 ttm_set_driver_manager(&xe->ttm, XE_PL_TT, man); 119 119 ttm_resource_manager_set_used(man, true); 120 - 121 - /* 122 - * On iGFX device with flat CCS, we clear CCS metadata, let's extend that 123 - * and use GPU to clear pages as well. 124 - * 125 - * Disable this when init_on_free and/or init_on_alloc is on to avoid double 126 - * zeroing pages with CPU and GPU. 127 - */ 128 - if (xe_device_has_flat_ccs(xe) && !IS_DGFX(xe) && 129 - !want_init_on_alloc(GFP_USER) && !want_init_on_free()) 130 - xe->mem.gpu_page_clear_sys = true; 131 - 132 120 return drmm_add_action_or_reset(&xe->drm, ttm_sys_mgr_fini, xe); 133 121 }