Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/pseries/iommu: DLPAR add doesn't completely initialize pci_controller

When a PCI device is dynamically added, the kernel oopses with a NULL
pointer dereference:

BUG: Kernel NULL pointer dereference on read at 0x00000030
Faulting instruction address: 0xc0000000006bbe5c
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
Modules linked in: rpadlpar_io rpaphp rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs xsk_diag bonding nft_compat nf_tables nfnetlink rfkill binfmt_misc dm_multipath rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_target_mod target_core_mod ib_umad ib_iser libiscsi scsi_transport_iscsi ib_ipoib rdma_cm iw_cm ib_cm mlx5_ib ib_uverbs ib_core pseries_rng drm drm_panel_orientation_quirks xfs libcrc32c mlx5_core mlxfw sd_mod t10_pi sg tls ibmvscsi ibmveth scsi_transport_srp vmx_crypto pseries_wdt psample dm_mirror dm_region_hash dm_log dm_mod fuse
CPU: 17 PID: 2685 Comm: drmgr Not tainted 6.7.0-203405+ #66
Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1060.00 (NH1060_008) hv:phyp pSeries
NIP: c0000000006bbe5c LR: c000000000a13e68 CTR: c0000000000579f8
REGS: c00000009924f240 TRAP: 0300 Not tainted (6.7.0-203405+)
MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 24002220 XER: 20040006
CFAR: c000000000a13e64 DAR: 0000000000000030 DSISR: 40000000 IRQMASK: 0
...
NIP sysfs_add_link_to_group+0x34/0x94
LR iommu_device_link+0x5c/0x118
Call Trace:
iommu_init_device+0x26c/0x318 (unreliable)
iommu_device_link+0x5c/0x118
iommu_init_device+0xa8/0x318
iommu_probe_device+0xc0/0x134
iommu_bus_notifier+0x44/0x104
notifier_call_chain+0xb8/0x19c
blocking_notifier_call_chain+0x64/0x98
bus_notify+0x50/0x7c
device_add+0x640/0x918
pci_device_add+0x23c/0x298
of_create_pci_dev+0x400/0x884
of_scan_pci_dev+0x124/0x1b0
__of_scan_bus+0x78/0x18c
pcibios_scan_phb+0x2a4/0x3b0
init_phb_dynamic+0xb8/0x110
dlpar_add_slot+0x170/0x3b8 [rpadlpar_io]
add_slot_store.part.0+0xb4/0x130 [rpadlpar_io]
kobj_attr_store+0x2c/0x48
sysfs_kf_write+0x64/0x78
kernfs_fop_write_iter+0x1b0/0x290
vfs_write+0x350/0x4a0
ksys_write+0x84/0x140
system_call_exception+0x124/0x330
system_call_vectored_common+0x15c/0x2ec

Commit a940904443e4 ("powerpc/iommu: Add iommu_ops to report capabilities
and allow blocking domains") broke DLPAR add of PCI devices.

The above added iommu_device structure to pci_controller. During
system boot, PCI devices are discovered and this newly added iommu_device
structure is initialized by a call to iommu_device_register().

During DLPAR add of a PCI device, a new pci_controller structure is
allocated but there are no calls made to iommu_device_register()
interface.

Fix is to register the iommu device during DLPAR add as well.

Fixes: a940904443e4 ("powerpc/iommu: Add iommu_ops to report capabilities and allow blocking domains")
Signed-off-by: Gaurav Batra <gbatra@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/20240215221833.4817-1-gbatra@linux.ibm.com

authored by

Gaurav Batra and committed by
Michael Ellerman
a5c57fd2 0846dd77

+31 -6
+10
arch/powerpc/include/asm/ppc-pci.h
··· 30 30 void *data); 31 31 extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); 32 32 33 + #if defined(CONFIG_IOMMU_API) && (defined(CONFIG_PPC_PSERIES) || \ 34 + defined(CONFIG_PPC_POWERNV)) 35 + extern void ppc_iommu_register_device(struct pci_controller *phb); 36 + extern void ppc_iommu_unregister_device(struct pci_controller *phb); 37 + #else 38 + static inline void ppc_iommu_register_device(struct pci_controller *phb) { } 39 + static inline void ppc_iommu_unregister_device(struct pci_controller *phb) { } 40 + #endif 41 + 42 + 33 43 /* From rtas_pci.h */ 34 44 extern void init_pci_config_tokens (void); 35 45 extern unsigned long get_phb_buid (struct device_node *);
+17 -6
arch/powerpc/kernel/iommu.c
··· 1341 1341 struct pci_controller *hose; 1342 1342 1343 1343 if (!dev_is_pci(dev)) 1344 - return ERR_PTR(-EPERM); 1344 + return ERR_PTR(-ENODEV); 1345 1345 1346 1346 pdev = to_pci_dev(dev); 1347 1347 hose = pdev->bus->sysdata; ··· 1390 1390 NULL, 1391 1391 }; 1392 1392 1393 + void ppc_iommu_register_device(struct pci_controller *phb) 1394 + { 1395 + iommu_device_sysfs_add(&phb->iommu, phb->parent, 1396 + spapr_tce_iommu_groups, "iommu-phb%04x", 1397 + phb->global_number); 1398 + iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops, 1399 + phb->parent); 1400 + } 1401 + 1402 + void ppc_iommu_unregister_device(struct pci_controller *phb) 1403 + { 1404 + iommu_device_unregister(&phb->iommu); 1405 + iommu_device_sysfs_remove(&phb->iommu); 1406 + } 1407 + 1393 1408 /* 1394 1409 * This registers IOMMU devices of PHBs. This needs to happen 1395 1410 * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and ··· 1415 1400 struct pci_controller *hose; 1416 1401 1417 1402 list_for_each_entry(hose, &hose_list, list_node) { 1418 - iommu_device_sysfs_add(&hose->iommu, hose->parent, 1419 - spapr_tce_iommu_groups, "iommu-phb%04x", 1420 - hose->global_number); 1421 - iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops, 1422 - hose->parent); 1403 + ppc_iommu_register_device(hose); 1423 1404 } 1424 1405 return 0; 1425 1406 }
+4
arch/powerpc/platforms/pseries/pci_dlpar.c
··· 35 35 36 36 pseries_msi_allocate_domains(phb); 37 37 38 + ppc_iommu_register_device(phb); 39 + 38 40 /* Create EEH devices for the PHB */ 39 41 eeh_phb_pe_create(phb); 40 42 ··· 77 75 return 1; 78 76 } 79 77 } 78 + 79 + ppc_iommu_unregister_device(phb); 80 80 81 81 pseries_msi_free_domains(phb); 82 82