Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/eeh: Manage EEH_PE_RECOVERING inside eeh_handle_normal_event()

Currently the EEH_PE_RECOVERING flag for a PE is managed by both the
caller and callee of eeh_handle_normal_event() (among other places not
considered here). This is complicated by the fact that the PE may
or may not have been invalidated by the call.

So move the callee's handling into eeh_handle_normal_event(), which
clarifies it and allows the return type to be changed to void (because
it no longer needs to indicate at the PE has been invalidated).

This should not change behaviour except in eeh_event_handler() where
it was previously possible to cause eeh_pe_state_clear() to be called
on an invalid PE, which is now avoided.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

authored by

Sam Bobroff and committed by
Michael Ellerman
37fd8125 68701780

+12 -21
+1 -1
arch/powerpc/include/asm/eeh_event.h
··· 34 34 int eeh_event_init(void); 35 35 int eeh_send_failure_event(struct eeh_pe *pe); 36 36 void eeh_remove_event(struct eeh_pe *pe, bool force); 37 - bool eeh_handle_normal_event(struct eeh_pe *pe); 37 + void eeh_handle_normal_event(struct eeh_pe *pe); 38 38 void eeh_handle_special_event(void); 39 39 40 40 #endif /* __KERNEL__ */
+11 -18
arch/powerpc/kernel/eeh_driver.c
··· 733 733 734 734 /** 735 735 * eeh_handle_normal_event - Handle EEH events on a specific PE 736 - * @pe: EEH PE 736 + * @pe: EEH PE - which should not be used after we return, as it may 737 + * have been invalidated. 737 738 * 738 739 * Attempts to recover the given PE. If recovery fails or the PE has failed 739 740 * too many times, remove the PE. ··· 751 750 * & devices under this slot, and then finally restarting the device 752 751 * drivers (which cause a second set of hotplug events to go out to 753 752 * userspace). 754 - * 755 - * Returns true if @pe should no longer be used, else false. 756 753 */ 757 - bool eeh_handle_normal_event(struct eeh_pe *pe) 754 + void eeh_handle_normal_event(struct eeh_pe *pe) 758 755 { 759 756 struct pci_bus *frozen_bus; 760 757 struct eeh_dev *edev, *tmp; ··· 764 765 if (!frozen_bus) { 765 766 pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", 766 767 __func__, pe->phb->global_number, pe->addr); 767 - return false; 768 + return; 768 769 } 770 + 771 + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); 769 772 770 773 eeh_pe_update_time_stamp(pe); 771 774 pe->freeze_count++; ··· 905 904 pr_info("EEH: Notify device driver to resume\n"); 906 905 eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); 907 906 908 - return false; 907 + goto final; 909 908 910 909 hard_fail: 911 910 /* ··· 941 940 pci_lock_rescan_remove(); 942 941 pci_hp_remove_devices(frozen_bus); 943 942 pci_unlock_rescan_remove(); 944 - 945 943 /* The passed PE should no longer be used */ 946 - return true; 944 + return; 947 945 } 948 946 } 949 - return false; 947 + final: 948 + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 950 949 } 951 950 952 951 /** ··· 1019 1018 */ 1020 1019 if (rc == EEH_NEXT_ERR_FROZEN_PE || 1021 1020 rc == EEH_NEXT_ERR_FENCED_PHB) { 1022 - /* 1023 - * eeh_handle_normal_event() can make the PE stale if it 1024 - * determines that the PE cannot possibly be recovered. 1025 - * Don't modify the PE state if that's the case. 1026 - */ 1027 - if (eeh_handle_normal_event(pe)) 1028 - continue; 1029 - 1030 - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 1021 + eeh_handle_normal_event(pe); 1031 1022 } else { 1032 1023 pci_lock_rescan_remove(); 1033 1024 list_for_each_entry(hose, &hose_list, list_node) {
-2
arch/powerpc/kernel/eeh_event.c
··· 73 73 /* We might have event without binding PE */ 74 74 pe = event->pe; 75 75 if (pe) { 76 - eeh_pe_state_mark(pe, EEH_PE_RECOVERING); 77 76 if (pe->type & EEH_PE_PHB) 78 77 pr_info("EEH: Detected error on PHB#%x\n", 79 78 pe->phb->global_number); ··· 81 82 "PHB#%x-PE#%x\n", 82 83 pe->phb->global_number, pe->addr); 83 84 eeh_handle_normal_event(pe); 84 - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 85 85 } else { 86 86 eeh_handle_special_event(); 87 87 }