[PATCH] msi: Safer state caching.

There are two ways pci_save_state and pci_restore_state are used. As
helper functions during suspend/resume, and as helper functions around
a hardware reset event. When used as helper functions around a hardware
reset event there is no reason to believe the calls will be paired, nor
is there a good reason to believe that if we restore the msi state from
before the reset that it will match the current msi state. Since arch
code may change the msi message without going through the driver, drivers
currently do not have enough information to even know when to call
pci_save_state to ensure they will have msi state in sync with the other
kernel irq reception data structures.

It turns out the solution is straight forward, cache the state in the
existing msi data structures (not the magic pci saved things) and
have the msi code update the cached state each time we write to the hardware.
This means we never need to read the hardware to figure out what the hardware
state should be.

By modifying the caching in this manner we get to remove our save_state
routines and only need to provide restore_state routines.

The only fields that were at all tricky to regenerate were the msi and msi-x
control registers and the way we regenerate them currently is a bit dependent
upon assumptions on how we use the allow msi registers to be configured and used
making the code a little bit brittle. If we ever change what cases we allow
or how we configure the msi bits we can address the fragility then.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Auke Kok <auke-jan.h.kok@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by Eric W. Biederman and committed by Linus Torvalds 392ee1e6 529284a0

+29 -134
+25 -125
drivers/pci/msi.c
··· 100 BUG(); 101 break; 102 } 103 } 104 105 void read_msi_msg(unsigned int irq, struct msi_msg *msg) ··· 180 default: 181 BUG(); 182 } 183 } 184 185 void mask_msi_irq(unsigned int irq) ··· 227 } 228 229 #ifdef CONFIG_PM 230 - static int __pci_save_msi_state(struct pci_dev *dev) 231 - { 232 - int pos, i = 0; 233 - u16 control; 234 - struct pci_cap_saved_state *save_state; 235 - u32 *cap; 236 - 237 - if (!dev->msi_enabled) 238 - return 0; 239 - 240 - pos = pci_find_capability(dev, PCI_CAP_ID_MSI); 241 - if (pos <= 0) 242 - return 0; 243 - 244 - save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u32) * 5, 245 - GFP_KERNEL); 246 - if (!save_state) { 247 - printk(KERN_ERR "Out of memory in pci_save_msi_state\n"); 248 - return -ENOMEM; 249 - } 250 - cap = &save_state->data[0]; 251 - 252 - pci_read_config_dword(dev, pos, &cap[i++]); 253 - control = cap[0] >> 16; 254 - pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, &cap[i++]); 255 - if (control & PCI_MSI_FLAGS_64BIT) { 256 - pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, &cap[i++]); 257 - pci_read_config_dword(dev, pos + PCI_MSI_DATA_64, &cap[i++]); 258 - } else 259 - pci_read_config_dword(dev, pos + PCI_MSI_DATA_32, &cap[i++]); 260 - if (control & PCI_MSI_FLAGS_MASKBIT) 261 - pci_read_config_dword(dev, pos + PCI_MSI_MASK_BIT, &cap[i++]); 262 - save_state->cap_nr = PCI_CAP_ID_MSI; 263 - pci_add_saved_cap(dev, save_state); 264 - return 0; 265 - } 266 - 267 static void __pci_restore_msi_state(struct pci_dev *dev) 268 { 269 - int i = 0, pos; 270 u16 control; 271 - struct pci_cap_saved_state *save_state; 272 - u32 *cap; 273 274 if (!dev->msi_enabled) 275 return; 276 277 - save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSI); 278 - pos = pci_find_capability(dev, PCI_CAP_ID_MSI); 279 - if (!save_state || pos <= 0) 280 - return; 281 - cap = &save_state->data[0]; 282 283 pci_intx(dev, 0); /* disable intx */ 284 - control = cap[i++] >> 16; 285 msi_set_enable(dev, 0); 286 - pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, cap[i++]); 287 - if (control & PCI_MSI_FLAGS_64BIT) { 288 - pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, cap[i++]); 289 - pci_write_config_dword(dev, pos + PCI_MSI_DATA_64, cap[i++]); 290 - } else 291 - pci_write_config_dword(dev, pos + PCI_MSI_DATA_32, cap[i++]); 292 - if (control & PCI_MSI_FLAGS_MASKBIT) 293 - pci_write_config_dword(dev, pos + PCI_MSI_MASK_BIT, cap[i++]); 294 pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); 295 - pci_remove_saved_cap(save_state); 296 - kfree(save_state); 297 - } 298 - 299 - static int __pci_save_msix_state(struct pci_dev *dev) 300 - { 301 - int pos; 302 - int irq, head, tail = 0; 303 - u16 control; 304 - struct pci_cap_saved_state *save_state; 305 - 306 - if (!dev->msix_enabled) 307 - return 0; 308 - 309 - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); 310 - if (pos <= 0) 311 - return 0; 312 - 313 - /* save the capability */ 314 - pci_read_config_word(dev, msi_control_reg(pos), &control); 315 - save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u16), 316 - GFP_KERNEL); 317 - if (!save_state) { 318 - printk(KERN_ERR "Out of memory in pci_save_msix_state\n"); 319 - return -ENOMEM; 320 - } 321 - *((u16 *)&save_state->data[0]) = control; 322 - 323 - /* save the table */ 324 - irq = head = dev->first_msi_irq; 325 - while (head != tail) { 326 - struct msi_desc *entry; 327 - 328 - entry = get_irq_msi(irq); 329 - read_msi_msg(irq, &entry->msg_save); 330 - 331 - tail = entry->link.tail; 332 - irq = tail; 333 - } 334 - 335 - save_state->cap_nr = PCI_CAP_ID_MSIX; 336 - pci_add_saved_cap(dev, save_state); 337 - return 0; 338 - } 339 - 340 - int pci_save_msi_state(struct pci_dev *dev) 341 - { 342 - int rc; 343 - 344 - rc = __pci_save_msi_state(dev); 345 - if (rc) 346 - return rc; 347 - 348 - rc = __pci_save_msix_state(dev); 349 - 350 - return rc; 351 } 352 353 static void __pci_restore_msix_state(struct pci_dev *dev) 354 { 355 - u16 save; 356 int pos; 357 int irq, head, tail = 0; 358 struct msi_desc *entry; 359 - struct pci_cap_saved_state *save_state; 360 361 if (!dev->msix_enabled) 362 - return; 363 - 364 - save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSIX); 365 - if (!save_state) 366 - return; 367 - save = *((u16 *)&save_state->data[0]); 368 - pci_remove_saved_cap(save_state); 369 - kfree(save_state); 370 - 371 - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); 372 - if (pos <= 0) 373 return; 374 375 /* route the table */ 376 pci_intx(dev, 0); /* disable intx */ 377 msix_set_enable(dev, 0); 378 irq = head = dev->first_msi_irq; 379 while (head != tail) { 380 entry = get_irq_msi(irq); 381 - write_msi_msg(irq, &entry->msg_save); 382 383 tail = entry->link.tail; 384 irq = tail; 385 } 386 387 - pci_write_config_word(dev, msi_control_reg(pos), save); 388 } 389 390 void pci_restore_msi_state(struct pci_dev *dev) ··· 318 entry->msi_attrib.is_64 = is_64bit_address(control); 319 entry->msi_attrib.entry_nr = 0; 320 entry->msi_attrib.maskbit = is_mask_bit_support(control); 321 entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ 322 entry->msi_attrib.pos = pos; 323 if (is_mask_bit_support(control)) { ··· 406 entry->msi_attrib.is_64 = 1; 407 entry->msi_attrib.entry_nr = j; 408 entry->msi_attrib.maskbit = 1; 409 entry->msi_attrib.default_irq = dev->irq; 410 entry->msi_attrib.pos = pos; 411 entry->dev = dev;
··· 100 BUG(); 101 break; 102 } 103 + entry->msi_attrib.masked = !!flag; 104 } 105 106 void read_msi_msg(unsigned int irq, struct msi_msg *msg) ··· 179 default: 180 BUG(); 181 } 182 + entry->msg = *msg; 183 } 184 185 void mask_msi_irq(unsigned int irq) ··· 225 } 226 227 #ifdef CONFIG_PM 228 static void __pci_restore_msi_state(struct pci_dev *dev) 229 { 230 + int pos; 231 u16 control; 232 + struct msi_desc *entry; 233 234 if (!dev->msi_enabled) 235 return; 236 237 + entry = get_irq_msi(dev->irq); 238 + pos = entry->msi_attrib.pos; 239 240 pci_intx(dev, 0); /* disable intx */ 241 msi_set_enable(dev, 0); 242 + write_msi_msg(dev->irq, &entry->msg); 243 + if (entry->msi_attrib.maskbit) 244 + msi_set_mask_bit(dev->irq, entry->msi_attrib.masked); 245 + 246 + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); 247 + control &= ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); 248 + if (entry->msi_attrib.maskbit || !entry->msi_attrib.masked) 249 + control |= PCI_MSI_FLAGS_ENABLE; 250 pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); 251 } 252 253 static void __pci_restore_msix_state(struct pci_dev *dev) 254 { 255 int pos; 256 int irq, head, tail = 0; 257 struct msi_desc *entry; 258 + u16 control; 259 260 if (!dev->msix_enabled) 261 return; 262 263 /* route the table */ 264 pci_intx(dev, 0); /* disable intx */ 265 msix_set_enable(dev, 0); 266 irq = head = dev->first_msi_irq; 267 + entry = get_irq_msi(irq); 268 + pos = entry->msi_attrib.pos; 269 while (head != tail) { 270 entry = get_irq_msi(irq); 271 + write_msi_msg(irq, &entry->msg); 272 + msi_set_mask_bit(irq, entry->msi_attrib.masked); 273 274 tail = entry->link.tail; 275 irq = tail; 276 } 277 278 + pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control); 279 + control &= ~PCI_MSIX_FLAGS_MASKALL; 280 + control |= PCI_MSIX_FLAGS_ENABLE; 281 + pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); 282 } 283 284 void pci_restore_msi_state(struct pci_dev *dev) ··· 420 entry->msi_attrib.is_64 = is_64bit_address(control); 421 entry->msi_attrib.entry_nr = 0; 422 entry->msi_attrib.maskbit = is_mask_bit_support(control); 423 + entry->msi_attrib.masked = 1; 424 entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ 425 entry->msi_attrib.pos = pos; 426 if (is_mask_bit_support(control)) { ··· 507 entry->msi_attrib.is_64 = 1; 508 entry->msi_attrib.entry_nr = j; 509 entry->msi_attrib.maskbit = 1; 510 + entry->msi_attrib.masked = 1; 511 entry->msi_attrib.default_irq = dev->irq; 512 entry->msi_attrib.pos = pos; 513 entry->dev = dev;
-2
drivers/pci/pci.c
··· 638 /* XXX: 100% dword access ok here? */ 639 for (i = 0; i < 16; i++) 640 pci_read_config_dword(dev, i * 4,&dev->saved_config_space[i]); 641 - if ((i = pci_save_msi_state(dev)) != 0) 642 - return i; 643 if ((i = pci_save_pcie_state(dev)) != 0) 644 return i; 645 if ((i = pci_save_pcix_state(dev)) != 0)
··· 638 /* XXX: 100% dword access ok here? */ 639 for (i = 0; i < 16; i++) 640 pci_read_config_dword(dev, i * 4,&dev->saved_config_space[i]); 641 if ((i = pci_save_pcie_state(dev)) != 0) 642 return i; 643 if ((i = pci_save_pcix_state(dev)) != 0)
-2
drivers/pci/pci.h
··· 52 #endif 53 54 #if defined(CONFIG_PCI_MSI) && defined(CONFIG_PM) 55 - int pci_save_msi_state(struct pci_dev *dev); 56 void pci_restore_msi_state(struct pci_dev *dev); 57 #else 58 - static inline int pci_save_msi_state(struct pci_dev *dev) { return 0; } 59 static inline void pci_restore_msi_state(struct pci_dev *dev) {} 60 #endif 61
··· 52 #endif 53 54 #if defined(CONFIG_PCI_MSI) && defined(CONFIG_PM) 55 void pci_restore_msi_state(struct pci_dev *dev); 56 #else 57 static inline void pci_restore_msi_state(struct pci_dev *dev) {} 58 #endif 59
+3 -5
include/linux/msi.h
··· 17 struct { 18 __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */ 19 __u8 maskbit : 1; /* mask-pending bit supported ? */ 20 - __u8 unused : 1; 21 __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ 22 __u8 pos; /* Location of the msi capability */ 23 __u16 entry_nr; /* specific enabled entry */ ··· 32 void __iomem *mask_base; 33 struct pci_dev *dev; 34 35 - #ifdef CONFIG_PM 36 - /* PM save area for MSIX address/data */ 37 - struct msi_msg msg_save; 38 - #endif 39 }; 40 41 /*
··· 17 struct { 18 __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */ 19 __u8 maskbit : 1; /* mask-pending bit supported ? */ 20 + __u8 masked : 1; 21 __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ 22 __u8 pos; /* Location of the msi capability */ 23 __u16 entry_nr; /* specific enabled entry */ ··· 32 void __iomem *mask_base; 33 struct pci_dev *dev; 34 35 + /* Last set MSI message */ 36 + struct msi_msg msg; 37 }; 38 39 /*
+1
include/linux/pci_regs.h
··· 296 #define PCI_MSIX_FLAGS 2 297 #define PCI_MSIX_FLAGS_QSIZE 0x7FF 298 #define PCI_MSIX_FLAGS_ENABLE (1 << 15) 299 #define PCI_MSIX_FLAGS_BIRMASK (7 << 0) 300 #define PCI_MSIX_FLAGS_BITMASK (1 << 0) 301
··· 296 #define PCI_MSIX_FLAGS 2 297 #define PCI_MSIX_FLAGS_QSIZE 0x7FF 298 #define PCI_MSIX_FLAGS_ENABLE (1 << 15) 299 + #define PCI_MSIX_FLAGS_MASKALL (1 << 14) 300 #define PCI_MSIX_FLAGS_BIRMASK (7 << 0) 301 #define PCI_MSIX_FLAGS_BITMASK (1 << 0) 302