Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

PCI AER: support Multiple Error Received and no error source id

Based on PCI Express AER specs, a root port might receive multiple
TLP errors while it could only save a correctable error source id
and an uncorrectable error source id at the same time. In addition,
some root port hardware might be unable to provide a correct source
id, i.e., the source id, or the bus id part of the source id provided
by root port might be equal to 0.

The patchset implements the support in kernel by searching the device
tree under the root port.

Patch 1 changes parameter cb of function pci_walk_bus to return a value.
When cb return non-zero, pci_walk_bus stops more searching on the
device tree.

Reviewed-by: Andrew Patterson <andrew.patterson@hp.com>
Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

authored by

Zhang, Yanmin and committed by
Jesse Barnes
70298c6e a6c0d5c6

+50 -31
+24 -14
arch/powerpc/platforms/pseries/eeh_driver.c
··· 122 122 * passed back in "userdata". 123 123 */ 124 124 125 - static void eeh_report_error(struct pci_dev *dev, void *userdata) 125 + static int eeh_report_error(struct pci_dev *dev, void *userdata) 126 126 { 127 127 enum pci_ers_result rc, *res = userdata; 128 128 struct pci_driver *driver = dev->driver; ··· 130 130 dev->error_state = pci_channel_io_frozen; 131 131 132 132 if (!driver) 133 - return; 133 + return 0; 134 134 135 135 eeh_disable_irq(dev); 136 136 137 137 if (!driver->err_handler || 138 138 !driver->err_handler->error_detected) 139 - return; 139 + return 0; 140 140 141 141 rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen); 142 142 143 143 /* A driver that needs a reset trumps all others */ 144 144 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; 145 145 if (*res == PCI_ERS_RESULT_NONE) *res = rc; 146 + 147 + return 0; 146 148 } 147 149 148 150 /** ··· 155 153 * Cumulative response passed back in "userdata". 156 154 */ 157 155 158 - static void eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) 156 + static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) 159 157 { 160 158 enum pci_ers_result rc, *res = userdata; 161 159 struct pci_driver *driver = dev->driver; ··· 163 161 if (!driver || 164 162 !driver->err_handler || 165 163 !driver->err_handler->mmio_enabled) 166 - return; 164 + return 0; 167 165 168 166 rc = driver->err_handler->mmio_enabled (dev); 169 167 170 168 /* A driver that needs a reset trumps all others */ 171 169 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; 172 170 if (*res == PCI_ERS_RESULT_NONE) *res = rc; 171 + 172 + return 0; 173 173 } 174 174 175 175 /** 176 176 * eeh_report_reset - tell device that slot has been reset 177 177 */ 178 178 179 - static void eeh_report_reset(struct pci_dev *dev, void *userdata) 179 + static int eeh_report_reset(struct pci_dev *dev, void *userdata) 180 180 { 181 181 enum pci_ers_result rc, *res = userdata; 182 182 struct pci_driver *driver = dev->driver; 183 183 184 184 if (!driver) 185 - return; 185 + return 0; 186 186 187 187 dev->error_state = pci_channel_io_normal; 188 188 ··· 192 188 193 189 if (!driver->err_handler || 194 190 !driver->err_handler->slot_reset) 195 - return; 191 + return 0; 196 192 197 193 rc = driver->err_handler->slot_reset(dev); 198 194 if ((*res == PCI_ERS_RESULT_NONE) || 199 195 (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc; 200 196 if (*res == PCI_ERS_RESULT_DISCONNECT && 201 197 rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; 198 + 199 + return 0; 202 200 } 203 201 204 202 /** 205 203 * eeh_report_resume - tell device to resume normal operations 206 204 */ 207 205 208 - static void eeh_report_resume(struct pci_dev *dev, void *userdata) 206 + static int eeh_report_resume(struct pci_dev *dev, void *userdata) 209 207 { 210 208 struct pci_driver *driver = dev->driver; 211 209 212 210 dev->error_state = pci_channel_io_normal; 213 211 214 212 if (!driver) 215 - return; 213 + return 0; 216 214 217 215 eeh_enable_irq(dev); 218 216 219 217 if (!driver->err_handler || 220 218 !driver->err_handler->resume) 221 - return; 219 + return 0; 222 220 223 221 driver->err_handler->resume(dev); 222 + 223 + return 0; 224 224 } 225 225 226 226 /** ··· 234 226 * dead, and that no further recovery attempts will be made on it. 235 227 */ 236 228 237 - static void eeh_report_failure(struct pci_dev *dev, void *userdata) 229 + static int eeh_report_failure(struct pci_dev *dev, void *userdata) 238 230 { 239 231 struct pci_driver *driver = dev->driver; 240 232 241 233 dev->error_state = pci_channel_io_perm_failure; 242 234 243 235 if (!driver) 244 - return; 236 + return 0; 245 237 246 238 eeh_disable_irq(dev); 247 239 248 240 if (!driver->err_handler || 249 241 !driver->err_handler->error_detected) 250 - return; 242 + return 0; 251 243 252 244 driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); 245 + 246 + return 0; 253 247 } 254 248 255 249 /* ------------------------------------------------------- */
+9 -2
drivers/pci/bus.c
··· 206 206 * Walk the given bus, including any bridged devices 207 207 * on buses under this bus. Call the provided callback 208 208 * on each device found. 209 + * 210 + * We check the return of @cb each time. If it returns anything 211 + * other than 0, we break out. 212 + * 209 213 */ 210 - void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), 214 + void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), 211 215 void *userdata) 212 216 { 213 217 struct pci_dev *dev; 214 218 struct pci_bus *bus; 215 219 struct list_head *next; 220 + int retval; 216 221 217 222 bus = top; 218 223 down_read(&pci_bus_sem); ··· 241 236 242 237 /* Run device routines with the device locked */ 243 238 down(&dev->dev.sem); 244 - cb(dev, userdata); 239 + retval = cb(dev, userdata); 245 240 up(&dev->dev.sem); 241 + if (retval) 242 + break; 246 243 } 247 244 up_read(&pci_bus_sem); 248 245 }
+16 -14
drivers/pci/pcie/aer/aerdrv_core.c
··· 109 109 #endif /* 0 */ 110 110 111 111 112 - static void set_device_error_reporting(struct pci_dev *dev, void *data) 112 + static int set_device_error_reporting(struct pci_dev *dev, void *data) 113 113 { 114 114 bool enable = *((bool *)data); 115 115 ··· 124 124 125 125 if (enable) 126 126 pcie_set_ecrc_checking(dev); 127 + 128 + return 0; 127 129 } 128 130 129 131 /** ··· 209 207 return NULL; 210 208 } 211 209 212 - static void report_error_detected(struct pci_dev *dev, void *data) 210 + static int report_error_detected(struct pci_dev *dev, void *data) 213 211 { 214 212 pci_ers_result_t vote; 215 213 struct pci_error_handlers *err_handler; ··· 234 232 dev->driver ? 235 233 "no AER-aware driver" : "no driver"); 236 234 } 237 - return; 235 + return 0; 238 236 } 239 237 240 238 err_handler = dev->driver->err_handler; 241 239 vote = err_handler->error_detected(dev, result_data->state); 242 240 result_data->result = merge_result(result_data->result, vote); 243 - return; 241 + return 0; 244 242 } 245 243 246 - static void report_mmio_enabled(struct pci_dev *dev, void *data) 244 + static int report_mmio_enabled(struct pci_dev *dev, void *data) 247 245 { 248 246 pci_ers_result_t vote; 249 247 struct pci_error_handlers *err_handler; ··· 253 251 if (!dev->driver || 254 252 !dev->driver->err_handler || 255 253 !dev->driver->err_handler->mmio_enabled) 256 - return; 254 + return 0; 257 255 258 256 err_handler = dev->driver->err_handler; 259 257 vote = err_handler->mmio_enabled(dev); 260 258 result_data->result = merge_result(result_data->result, vote); 261 - return; 259 + return 0; 262 260 } 263 261 264 - static void report_slot_reset(struct pci_dev *dev, void *data) 262 + static int report_slot_reset(struct pci_dev *dev, void *data) 265 263 { 266 264 pci_ers_result_t vote; 267 265 struct pci_error_handlers *err_handler; ··· 271 269 if (!dev->driver || 272 270 !dev->driver->err_handler || 273 271 !dev->driver->err_handler->slot_reset) 274 - return; 272 + return 0; 275 273 276 274 err_handler = dev->driver->err_handler; 277 275 vote = err_handler->slot_reset(dev); 278 276 result_data->result = merge_result(result_data->result, vote); 279 - return; 277 + return 0; 280 278 } 281 279 282 - static void report_resume(struct pci_dev *dev, void *data) 280 + static int report_resume(struct pci_dev *dev, void *data) 283 281 { 284 282 struct pci_error_handlers *err_handler; 285 283 ··· 288 286 if (!dev->driver || 289 287 !dev->driver->err_handler || 290 288 !dev->driver->err_handler->resume) 291 - return; 289 + return 0; 292 290 293 291 err_handler = dev->driver->err_handler; 294 292 err_handler->resume(dev); 295 - return; 293 + return 0; 296 294 } 297 295 298 296 /** ··· 309 307 static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, 310 308 enum pci_channel_state state, 311 309 char *error_mesg, 312 - void (*cb)(struct pci_dev *, void *)) 310 + int (*cb)(struct pci_dev *, void *)) 313 311 { 314 312 struct aer_broadcast_data result_data; 315 313
+1 -1
include/linux/pci.h
··· 789 789 int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, 790 790 int pass); 791 791 792 - void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), 792 + void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), 793 793 void *userdata); 794 794 int pci_cfg_space_size_ext(struct pci_dev *dev); 795 795 int pci_cfg_space_size(struct pci_dev *dev);