Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

PCI AER: multiple error support

When a root port receives the same errors more than once before the
kernel process them, the Multiple Error Messages Received flags are set
by hardware. Because the root port could only save one kind of
correctable error source id and another uncorrectable error source id at
the same time, the second message sender id is lost if the 2 messages
are sent from 2 different devices. This patch makes the kernel search
all devices under the root port when multiple messages are received.

Reviewed-by: Andrew Patterson <andrew.patterson@hp.com>
Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

authored by

Zhang, Yanmin and committed by
Jesse Barnes
3d5505c5 28eb27cf

+69 -23
+3 -1
drivers/pci/pcie/aer/aerdrv.h
··· 57 57 unsigned int dw3; 58 58 }; 59 59 60 + #define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */ 60 61 struct aer_err_info { 61 - struct pci_dev *dev; 62 + struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; 63 + int error_dev_num; 62 64 u16 id; 63 65 int severity; /* 0:NONFATAL | 1:FATAL | 2:COR */ 64 66 int flags;
+66 -22
drivers/pci/pcie/aer/aerdrv_core.c
··· 158 158 return 0; 159 159 } 160 160 161 + static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) 162 + { 163 + if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { 164 + e_info->dev[e_info->error_dev_num] = dev; 165 + e_info->error_dev_num++; 166 + return 1; 167 + } else 168 + return 0; 169 + } 170 + 171 + 161 172 #define PCI_BUS(x) (((x) >> 8) & 0xff) 162 173 163 174 static int find_device_iter(struct pci_dev *dev, void *data) ··· 187 176 if (!nosourceid && (PCI_BUS(e_info->id) != 0)) { 188 177 result = compare_device_id(dev, e_info); 189 178 if (result) 190 - e_info->dev = dev; 191 - return result; 179 + add_error_device(e_info, dev); 180 + 181 + /* 182 + * If there is no multiple error, we stop 183 + * or continue based on the id comparing. 184 + */ 185 + if (!(e_info->flags & AER_MULTI_ERROR_VALID_FLAG)) 186 + return result; 187 + 188 + /* 189 + * If there are multiple errors and id does match, 190 + * We need continue to search other devices under 191 + * the root port. Return 0 means that. 192 + */ 193 + if (result) 194 + return 0; 192 195 } 193 196 194 197 /* 195 - * Next is to check when bus id is equal to 0 or 196 - * nosourceid==y. Some ports might lose the bus 197 - * id of error source id. We check AER status 198 - * registers to find the initial reporter. 198 + * When either 199 + * 1) nosourceid==y; 200 + * 2) bus id is equal to 0. Some ports might lose the bus 201 + * id of error source id; 202 + * 3) There are multiple errors and prior id comparing fails; 203 + * We check AER status registers to find the initial reporter. 199 204 */ 200 205 if (atomic_read(&dev->enable_cnt) == 0) 201 206 return 0; ··· 240 213 pos + PCI_ERR_COR_MASK, 241 214 &mask); 242 215 if (status & ERR_CORRECTABLE_ERROR_MASK & ~mask) { 243 - e_info->dev = dev; 244 - return 1; 216 + add_error_device(e_info, dev); 217 + goto added; 245 218 } 246 219 } else { 247 220 pci_read_config_dword(dev, ··· 251 224 pos + PCI_ERR_UNCOR_MASK, 252 225 &mask); 253 226 if (status & ERR_UNCORRECTABLE_ERROR_MASK & ~mask) { 254 - e_info->dev = dev; 255 - return 1; 227 + add_error_device(e_info, dev); 228 + goto added; 256 229 } 257 230 } 258 231 259 232 return 0; 233 + 234 + added: 235 + if (e_info->flags & AER_MULTI_ERROR_VALID_FLAG) 236 + return 0; 237 + else 238 + return 1; 260 239 } 261 240 262 241 /** ··· 742 709 return AER_SUCCESS; 743 710 } 744 711 712 + static inline void aer_process_err_devices(struct pcie_device *p_device, 713 + struct aer_err_info *e_info) 714 + { 715 + int i; 716 + 717 + if (!e_info->dev[0]) { 718 + dev_printk(KERN_DEBUG, &p_device->port->dev, 719 + "can't find device of ID%04x\n", 720 + e_info->id); 721 + } 722 + 723 + for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { 724 + if (get_device_error_info(e_info->dev[i], e_info) == 725 + AER_SUCCESS) { 726 + aer_print_error(e_info->dev[i], e_info); 727 + handle_error_source(p_device, 728 + e_info->dev[i], 729 + e_info); 730 + } 731 + } 732 + } 733 + 745 734 /** 746 735 * aer_isr_one_error - consume an error detected by root port 747 736 * @p_device: pointer to error root port service device ··· 809 754 e_info->flags |= AER_MULTI_ERROR_VALID_FLAG; 810 755 811 756 find_source_device(p_device->port, e_info); 812 - if (e_info->dev == NULL) { 813 - printk(KERN_DEBUG "%s->can't find device of ID%04x\n", 814 - __func__, e_info->id); 815 - continue; 816 - } 817 - if (get_device_error_info(e_info->dev, e_info) == 818 - AER_SUCCESS) { 819 - aer_print_error(e_info->dev, e_info); 820 - handle_error_source(p_device, 821 - e_info->dev, 822 - e_info); 823 - } 757 + aer_process_err_devices(p_device, e_info); 824 758 } 825 759 826 760 kfree(e_info);