Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: ipr: Enable/disable IRQD_NO_BALANCING during reset

A dynamic remove/add storage adapter test hits EEH on PowerPC:

EEH: [c00000000004f75c] __eeh_send_failure_event+0x7c/0x160
EEH: [c000000000048444] eeh_dev_check_failure.part.0+0x254/0x650
EEH: [c008000001650678] eeh_readl+0x60/0x90 [ipr]
EEH: [c00800000166746c] ipr_cancel_op+0x2b8/0x524 [ipr]
EEH: [c008000001656524] ipr_eh_abort+0x6c/0x130 [ipr]
EEH: [c000000000ab0d20] scmd_eh_abort_handler+0x140/0x440
EEH: [c00000000017e558] process_one_work+0x298/0x590
EEH: [c00000000017eef8] worker_thread+0xa8/0x620
EEH: [c00000000018be34] kthread+0x124/0x130
EEH: [c00000000000cd64] ret_from_kernel_thread+0x5c/0x64

A PCIe bus trace reveals that a vector of MSI-X is cleared to 0 by
irqbalance daemon. If we disable irqbalance daemon, we won't see the
issue.

With debug enabled in ipr driver:

[ 44.103071] ipr: Entering __ipr_remove
[ 44.103083] ipr: Entering ipr_initiate_ioa_bringdown
[ 44.103091] ipr: Entering ipr_reset_shutdown_ioa
[ 44.103099] ipr: Leaving ipr_reset_shutdown_ioa
[ 44.103105] ipr: Leaving ipr_initiate_ioa_bringdown
[ 44.149918] ipr: Entering ipr_reset_ucode_download
[ 44.149935] ipr: Entering ipr_reset_alert
[ 44.150032] ipr: Entering ipr_reset_start_timer
[ 44.150038] ipr: Leaving ipr_reset_alert
[ 44.244343] scsi 1:2:3:0: alua: Detached
[ 44.254300] ipr: Entering ipr_reset_start_bist
[ 44.254320] ipr: Entering ipr_reset_start_timer
[ 44.254325] ipr: Leaving ipr_reset_start_bist
[ 44.364329] scsi 1:2:4:0: alua: Detached
[ 45.134341] scsi 1:2:5:0: alua: Detached
[ 45.860949] ipr: Entering ipr_reset_shutdown_ioa
[ 45.860962] ipr: Leaving ipr_reset_shutdown_ioa
[ 45.860966] ipr: Entering ipr_reset_alert
[ 45.861028] ipr: Entering ipr_reset_start_timer
[ 45.861035] ipr: Leaving ipr_reset_alert
[ 45.964302] ipr: Entering ipr_reset_start_bist
[ 45.964309] ipr: Entering ipr_reset_start_timer
[ 45.964313] ipr: Leaving ipr_reset_start_bist
[ 46.264301] ipr: Entering ipr_reset_bist_done
[ 46.264309] ipr: Leaving ipr_reset_bist_done

During adapter reset, ipr device driver blocks config space access but
can't block MMIO access for MSI-X entries. There is very small window:
irqbalance daemon kicks in during adapter reset before ipr driver calls
pci_restore_state(pdev) to restore MSI-X table.

irqbalance daemon reads back all 0 for that MSI-X vector in
__pci_read_msi_msg().

irqbalance daemon:

msi_domain_set_affinity()
->irq_chip_set_affinity_patent()
->xive_irq_set_affinity()
->irq_chip_compose_msi_msg()
->pseries_msi_compose_msg()
->__pci_read_msi_msg(): read all 0 since didn't call pci_restore_state
->irq_chip_write_msi_msg()
-> pci_write_msg_msi(): write 0 to the msix vector entry

When ipr driver calls pci_restore_state(pdev) in
ipr_reset_restore_cfg_space(), the MSI-X vector entry has been cleared
by irqbalance daemon in pci_write_msg_msix().

pci_restore_state()
->__pci_restore_msix_state()

Below is the MSI-X table for ipr adapter after irqbalance daemon kicked
in during adapter reset:

Dump MSIx table: index=0 address_lo=c800 address_hi=10000000 msg_data=0
Dump MSIx table: index=1 address_lo=c810 address_hi=10000000 msg_data=0
Dump MSIx table: index=2 address_lo=c820 address_hi=10000000 msg_data=0
Dump MSIx table: index=3 address_lo=c830 address_hi=10000000 msg_data=0
Dump MSIx table: index=4 address_lo=c840 address_hi=10000000 msg_data=0
Dump MSIx table: index=5 address_lo=c850 address_hi=10000000 msg_data=0
Dump MSIx table: index=6 address_lo=c860 address_hi=10000000 msg_data=0
Dump MSIx table: index=7 address_lo=c870 address_hi=10000000 msg_data=0
Dump MSIx table: index=8 address_lo=0 address_hi=0 msg_data=0
---------> Hit EEH since msix vector of index=8 are 0
Dump MSIx table: index=9 address_lo=c890 address_hi=10000000 msg_data=0
Dump MSIx table: index=10 address_lo=c8a0 address_hi=10000000 msg_data=0
Dump MSIx table: index=11 address_lo=c8b0 address_hi=10000000 msg_data=0
Dump MSIx table: index=12 address_lo=c8c0 address_hi=10000000 msg_data=0
Dump MSIx table: index=13 address_lo=c8d0 address_hi=10000000 msg_data=0
Dump MSIx table: index=14 address_lo=c8e0 address_hi=10000000 msg_data=0
Dump MSIx table: index=15 address_lo=c8f0 address_hi=10000000 msg_data=0

[ 46.264312] ipr: Entering ipr_reset_restore_cfg_space
[ 46.267439] ipr: Entering ipr_fail_all_ops
[ 46.267447] ipr: Leaving ipr_fail_all_ops
[ 46.267451] ipr: Leaving ipr_reset_restore_cfg_space
[ 46.267454] ipr: Entering ipr_ioa_bringdown_done
[ 46.267458] ipr: Leaving ipr_ioa_bringdown_done
[ 46.267467] ipr: Entering ipr_worker_thread
[ 46.267470] ipr: Leaving ipr_worker_thread

IRQ balancing is not required during adapter reset.

Enable "IRQ_NO_BALANCING" flag before starting adapter reset and disable
it after calling pci_restore_state(). The irqbalance daemon is disabled
for this short period of time (~2s).

Co-developed-by: Kyle Mahlkuch <Kyle.Mahlkuch@ibm.com>
Signed-off-by: Kyle Mahlkuch <Kyle.Mahlkuch@ibm.com>
Signed-off-by: Wen Xiong <wenxiong@linux.ibm.com>
Link: https://patch.msgid.link/20251028142427.3969819-2-wenxiong@linux.ibm.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

Wen Xiong and committed by
Martin K. Petersen
6ac3484f ab58153e

+27 -1
+27 -1
drivers/scsi/ipr.c
··· 61 61 #include <linux/hdreg.h> 62 62 #include <linux/reboot.h> 63 63 #include <linux/stringify.h> 64 + #include <linux/irq.h> 64 65 #include <asm/io.h> 65 - #include <asm/irq.h> 66 66 #include <asm/processor.h> 67 67 #include <scsi/scsi.h> 68 68 #include <scsi/scsi_host.h> ··· 7844 7844 } 7845 7845 7846 7846 /** 7847 + * ipr_set_affinity_nobalance 7848 + * @ioa_cfg: ipr_ioa_cfg struct for an ipr device 7849 + * @flag: bool 7850 + * true: ensable "IRQ_NO_BALANCING" bit for msix interrupt 7851 + * false: disable "IRQ_NO_BALANCING" bit for msix interrupt 7852 + * Description: This function will be called to disable/enable 7853 + * "IRQ_NO_BALANCING" to avoid irqbalance daemon 7854 + * kicking in during adapter reset. 7855 + **/ 7856 + static void ipr_set_affinity_nobalance(struct ipr_ioa_cfg *ioa_cfg, bool flag) 7857 + { 7858 + int irq, i; 7859 + 7860 + for (i = 0; i < ioa_cfg->nvectors; i++) { 7861 + irq = pci_irq_vector(ioa_cfg->pdev, i); 7862 + 7863 + if (flag) 7864 + irq_set_status_flags(irq, IRQ_NO_BALANCING); 7865 + else 7866 + irq_clear_status_flags(irq, IRQ_NO_BALANCING); 7867 + } 7868 + } 7869 + 7870 + /** 7847 7871 * ipr_reset_restore_cfg_space - Restore PCI config space. 7848 7872 * @ipr_cmd: ipr command struct 7849 7873 * ··· 7891 7867 return IPR_RC_JOB_CONTINUE; 7892 7868 } 7893 7869 7870 + ipr_set_affinity_nobalance(ioa_cfg, false); 7894 7871 ipr_fail_all_ops(ioa_cfg); 7895 7872 7896 7873 if (ioa_cfg->sis64) { ··· 7971 7946 rc = pci_write_config_byte(ioa_cfg->pdev, PCI_BIST, PCI_BIST_START); 7972 7947 7973 7948 if (rc == PCIBIOS_SUCCESSFUL) { 7949 + ipr_set_affinity_nobalance(ioa_cfg, true); 7974 7950 ipr_cmd->job_step = ipr_reset_bist_done; 7975 7951 ipr_reset_start_timer(ipr_cmd, IPR_WAIT_FOR_BIST_TIMEOUT); 7976 7952 rc = IPR_RC_JOB_RETURN;