Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe/nvm: Manage nvm aux cleanup with devres

Move nvm teardown to a devm-managed action registered from xe_nvm_init().
This ensures the auxiliary NVM device is deleted on probe failure and
device detach without requiring explicit calls from remove paths.

As part of this, drop xe_nvm_fini() from xe_device_remove() and from the
survivability sysfs teardown, and remove the public xe_nvm_fini() API from
the header.

This is to fix below warn message when there is probe failure after
xe_nvm_init(), then xe_device_probe() is called again:
"
[ 207.318152] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:01.0/0000:01:00.0/0000:02:01.0/0000:03:00.0/xe.nvm.768'
[ 207.318157] CPU: 5 UID: 0 PID: 10261 Comm: modprobe Tainted: G B W 6.19.0-rc2-lgci-xe-kernel+ #223 PREEMPT(voluntary)
[ 207.318160] Tainted: [B]=BAD_PAGE, [W]=WARN
[ 207.318161] Hardware name: ASUS System Product Name/PRIME Z790-P WIFI, BIOS 0812 02/24/2023
[ 207.318163] Call Trace:
[ 207.318163] <TASK>
[ 207.318165] dump_stack_lvl+0xa0/0xc0
[ 207.318170] dump_stack+0x10/0x20
[ 207.318171] sysfs_warn_dup+0xd5/0x110
[ 207.318175] sysfs_create_dir_ns+0x1f6/0x280
[ 207.318177] ? __pfx_sysfs_create_dir_ns+0x10/0x10
[ 207.318179] ? lock_acquire+0x1a4/0x2e0
[ 207.318182] ? __kasan_check_read+0x11/0x20
[ 207.318185] ? do_raw_spin_unlock+0x5c/0x240
[ 207.318187] kobject_add_internal+0x28d/0x8e0
[ 207.318189] kobject_add+0x11f/0x1f0
[ 207.318191] ? __pfx_kobject_add+0x10/0x10
[ 207.318193] ? lockdep_init_map_type+0x4b/0x230
[ 207.318195] ? get_device_parent.isra.0+0x43/0x4c0
[ 207.318197] ? kobject_get+0x55/0xf0
[ 207.318199] device_add+0x2d7/0x1500
[ 207.318201] ? __pfx_device_add+0x10/0x10
[ 207.318203] ? lockdep_init_map_type+0x4b/0x230
[ 207.318205] __auxiliary_device_add+0x99/0x140
[ 207.318208] xe_nvm_init+0x7a2/0xef0 [xe]
[ 207.318333] ? xe_devcoredump_init+0x80/0x110 [xe]
[ 207.318452] ? __devm_add_action+0x82/0xc0
[ 207.318454] ? fs_reclaim_release+0xc0/0x110
[ 207.318457] xe_device_probe+0x17dd/0x2c40 [xe]
[ 207.318574] ? __pfx___drm_dev_dbg+0x10/0x10
[ 207.318576] ? add_dr+0x180/0x220
[ 207.318579] ? __pfx___drmm_mutex_release+0x10/0x10
[ 207.318582] ? __pfx_xe_device_probe+0x10/0x10 [xe]
[ 207.318697] ? xe_pm_init_early+0x33a/0x410 [xe]
[ 207.318850] xe_pci_probe+0x936/0x1250 [xe]
[ 207.318999] ? lock_acquire+0x1a4/0x2e0
[ 207.319003] ? __pfx_xe_pci_probe+0x10/0x10 [xe]
[ 207.319151] local_pci_probe+0xe6/0x1a0
[ 207.319154] pci_device_probe+0x523/0x840
[ 207.319157] ? __pfx_pci_device_probe+0x10/0x10
[ 207.319159] ? sysfs_do_create_link_sd.isra.0+0x8c/0x110
[ 207.319162] ? sysfs_create_link+0x48/0xc0
...
"

Fixes: c28bfb107dac ("drm/xe/nvm: add on-die non-volatile memory device")
Reviewed-by: Alexander Usyskin <alexander.usyskin@intel.com>
Reviewed-by: Brian Nguyen <brian3.nguyen@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Riana Tauro <riana.tauro@intel.com>
Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patch.msgid.link/20260120183239.2966782-6-shuicheng.lin@intel.com
(cherry picked from commit 11035eab1b7d88daa7904440046e64d3810b1ca1)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>

authored by

Shuicheng Lin and committed by
Thomas Hellström
2da8fbb8 c1ed856c

+22 -25
-2
drivers/gpu/drm/xe/xe_device.c
··· 984 984 { 985 985 xe_display_unregister(xe); 986 986 987 - xe_nvm_fini(xe); 988 - 989 987 drm_dev_unplug(&xe->drm); 990 988 991 989 xe_bo_pci_dev_remove_all(xe);
+22 -21
drivers/gpu/drm/xe/xe_nvm.c
··· 83 83 return writable_override; 84 84 } 85 85 86 + static void xe_nvm_fini(void *arg) 87 + { 88 + struct xe_device *xe = arg; 89 + struct intel_dg_nvm_dev *nvm = xe->nvm; 90 + 91 + if (!xe->info.has_gsc_nvm) 92 + return; 93 + 94 + /* No access to internal NVM from VFs */ 95 + if (IS_SRIOV_VF(xe)) 96 + return; 97 + 98 + /* Nvm pointer should not be NULL here */ 99 + if (WARN_ON(!nvm)) 100 + return; 101 + 102 + auxiliary_device_delete(&nvm->aux_dev); 103 + auxiliary_device_uninit(&nvm->aux_dev); 104 + xe->nvm = NULL; 105 + } 106 + 86 107 int xe_nvm_init(struct xe_device *xe) 87 108 { 88 109 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); ··· 162 141 auxiliary_device_uninit(aux_dev); 163 142 goto err; 164 143 } 165 - return 0; 144 + return devm_add_action_or_reset(xe->drm.dev, xe_nvm_fini, xe); 166 145 167 146 err: 168 147 kfree(nvm); 169 148 xe->nvm = NULL; 170 149 return ret; 171 - } 172 - 173 - void xe_nvm_fini(struct xe_device *xe) 174 - { 175 - struct intel_dg_nvm_dev *nvm = xe->nvm; 176 - 177 - if (!xe->info.has_gsc_nvm) 178 - return; 179 - 180 - /* No access to internal NVM from VFs */ 181 - if (IS_SRIOV_VF(xe)) 182 - return; 183 - 184 - /* Nvm pointer should not be NULL here */ 185 - if (WARN_ON(!nvm)) 186 - return; 187 - 188 - auxiliary_device_delete(&nvm->aux_dev); 189 - auxiliary_device_uninit(&nvm->aux_dev); 190 - xe->nvm = NULL; 191 150 }
-2
drivers/gpu/drm/xe/xe_nvm.h
··· 10 10 11 11 int xe_nvm_init(struct xe_device *xe); 12 12 13 - void xe_nvm_fini(struct xe_device *xe); 14 - 15 13 #endif