Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cxl: Add support for interrupts on the Mellanox CX4

The Mellanox CX4 in cxl mode uses a hybrid interrupt model, where
interrupts are routed from the networking hardware to the XSL using the
MSIX table, and from there will be transformed back into an MSIX
interrupt using the cxl style interrupts (i.e. using IVTE entries and
ranges to map a PE and AFU interrupt number to an MSIX address).

We want to hide the implementation details of cxl interrupts as much as
possible. To this end, we use a special version of the MSI setup &
teardown routines in the PHB while in cxl mode to allocate the cxl
interrupts and configure the IVTE entries in the process element.

This function does not configure the MSIX table - the CX4 card uses a
custom format in that table and it would not be appropriate to fill that
out in generic code. The rest of the functionality is similar to the
"Full MSI-X mode" described in the CAIA, and this could be easily
extended to support other adapters that use that mode in the future.

The interrupts will be associated with the default context. If the
maximum number of interrupts per context has been limited (e.g. by the
mlx5 driver), it will automatically allocate additional kernel contexts
to associate extra interrupts as required. These contexts will be
started using the same WED that was used to start the default context.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

authored by

Ian Munsie and committed by
Michael Ellerman
a2f67d5e cbce0917

+202
+84
arch/powerpc/platforms/powernv/pci-cxl.c
··· 8 8 */ 9 9 10 10 #include <linux/module.h> 11 + #include <linux/msi.h> 11 12 #include <asm/pci-bridge.h> 12 13 #include <asm/pnv-pci.h> 13 14 #include <asm/opal.h> ··· 281 280 282 281 cxl_pci_disable_device(dev); 283 282 cxl_afu_put(afu); 283 + } 284 + 285 + /* 286 + * This is a special version of pnv_setup_msi_irqs for cards in cxl mode. This 287 + * function handles setting up the IVTE entries for the XSL to use. 288 + * 289 + * We are currently not filling out the MSIX table, since the only currently 290 + * supported adapter (CX4) uses a custom MSIX table format in cxl mode and it 291 + * is up to their driver to fill that out. In the future we may fill out the 292 + * MSIX table (and change the IVTE entries to be an index to the MSIX table) 293 + * for adapters implementing the Full MSI-X mode described in the CAIA. 294 + */ 295 + int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) 296 + { 297 + struct pci_controller *hose = pci_bus_to_host(pdev->bus); 298 + struct pnv_phb *phb = hose->private_data; 299 + struct msi_desc *entry; 300 + struct cxl_context *ctx = NULL; 301 + unsigned int virq; 302 + int hwirq; 303 + int afu_irq = 0; 304 + int rc; 305 + 306 + if (WARN_ON(!phb) || !phb->msi_bmp.bitmap) 307 + return -ENODEV; 308 + 309 + if (pdev->no_64bit_msi && !phb->msi32_support) 310 + return -ENODEV; 311 + 312 + rc = cxl_cx4_setup_msi_irqs(pdev, nvec, type); 313 + if (rc) 314 + return rc; 315 + 316 + for_each_pci_msi_entry(entry, pdev) { 317 + if (!entry->msi_attrib.is_64 && !phb->msi32_support) { 318 + pr_warn("%s: Supports only 64-bit MSIs\n", 319 + pci_name(pdev)); 320 + return -ENXIO; 321 + } 322 + 323 + hwirq = cxl_next_msi_hwirq(pdev, &ctx, &afu_irq); 324 + if (WARN_ON(hwirq <= 0)) 325 + return (hwirq ? hwirq : -ENOMEM); 326 + 327 + virq = irq_create_mapping(NULL, hwirq); 328 + if (virq == NO_IRQ) { 329 + pr_warn("%s: Failed to map cxl mode MSI to linux irq\n", 330 + pci_name(pdev)); 331 + return -ENOMEM; 332 + } 333 + 334 + rc = pnv_cxl_ioda_msi_setup(pdev, hwirq, virq); 335 + if (rc) { 336 + pr_warn("%s: Failed to setup cxl mode MSI\n", pci_name(pdev)); 337 + irq_dispose_mapping(virq); 338 + return rc; 339 + } 340 + 341 + irq_set_msi_desc(virq, entry); 342 + } 343 + 344 + return 0; 345 + } 346 + 347 + void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) 348 + { 349 + struct pci_controller *hose = pci_bus_to_host(pdev->bus); 350 + struct pnv_phb *phb = hose->private_data; 351 + struct msi_desc *entry; 352 + irq_hw_number_t hwirq; 353 + 354 + if (WARN_ON(!phb)) 355 + return; 356 + 357 + for_each_pci_msi_entry(entry, pdev) { 358 + if (entry->irq == NO_IRQ) 359 + continue; 360 + hwirq = virq_to_hw(entry->irq); 361 + irq_set_msi_desc(entry->irq, NULL); 362 + irq_dispose_mapping(entry->irq); 363 + } 364 + 365 + cxl_cx4_teardown_msi_irqs(pdev); 284 366 }
+4
arch/powerpc/platforms/powernv/pci-ioda.c
··· 3465 3465 const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = { 3466 3466 .dma_dev_setup = pnv_pci_dma_dev_setup, 3467 3467 .dma_bus_setup = pnv_pci_dma_bus_setup, 3468 + #ifdef CONFIG_PCI_MSI 3469 + .setup_msi_irqs = pnv_cxl_cx4_setup_msi_irqs, 3470 + .teardown_msi_irqs = pnv_cxl_cx4_teardown_msi_irqs, 3471 + #endif 3468 3472 .enable_device_hook = pnv_cxl_enable_device_hook, 3469 3473 .disable_device = pnv_cxl_disable_device, 3470 3474 .release_device = pnv_pci_release_device,
+2
arch/powerpc/platforms/powernv/pci.h
··· 247 247 /* cxl functions */ 248 248 extern bool pnv_cxl_enable_device_hook(struct pci_dev *dev); 249 249 extern void pnv_cxl_disable_device(struct pci_dev *dev); 250 + extern int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); 251 + extern void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); 250 252 251 253 252 254 /* phb ops (cxl switches these when enabling the kernel api on the phb) */
+71
drivers/misc/cxl/api.c
··· 14 14 #include <misc/cxl.h> 15 15 #include <linux/fs.h> 16 16 #include <asm/pnv-pci.h> 17 + #include <linux/msi.h> 17 18 18 19 #include "cxl.h" 19 20 ··· 490 489 return afu->irqs_max; 491 490 } 492 491 EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process); 492 + 493 + /* 494 + * This is a special interrupt allocation routine called from the PHB's MSI 495 + * setup function. When capi interrupts are allocated in this manner they must 496 + * still be associated with a running context, but since the MSI APIs have no 497 + * way to specify this we use the default context associated with the device. 498 + * 499 + * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU 500 + * interrupt number, so in order to overcome this their driver informs us of 501 + * the restriction by setting the maximum interrupts per context, and we 502 + * allocate additional contexts as necessary so that we can keep the AFU 503 + * interrupt number within the supported range. 504 + */ 505 + int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) 506 + { 507 + struct cxl_context *ctx, *new_ctx, *default_ctx; 508 + int remaining; 509 + int rc; 510 + 511 + ctx = default_ctx = cxl_get_context(pdev); 512 + if (WARN_ON(!default_ctx)) 513 + return -ENODEV; 514 + 515 + remaining = nvec; 516 + while (remaining > 0) { 517 + rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max)); 518 + if (rc) { 519 + pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev)); 520 + return rc; 521 + } 522 + remaining -= ctx->afu->irqs_max; 523 + 524 + if (ctx != default_ctx && default_ctx->status == STARTED) { 525 + WARN_ON(cxl_start_context(ctx, 526 + be64_to_cpu(default_ctx->elem->common.wed), 527 + NULL)); 528 + } 529 + 530 + if (remaining > 0) { 531 + new_ctx = cxl_dev_context_init(pdev); 532 + if (!new_ctx) { 533 + pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev)); 534 + return -ENOSPC; 535 + } 536 + list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts); 537 + ctx = new_ctx; 538 + } 539 + } 540 + 541 + return 0; 542 + } 543 + /* Exported via cxl_base */ 544 + 545 + void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) 546 + { 547 + struct cxl_context *ctx, *pos, *tmp; 548 + 549 + ctx = cxl_get_context(pdev); 550 + if (WARN_ON(!ctx)) 551 + return; 552 + 553 + cxl_free_afu_irqs(ctx); 554 + list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) { 555 + cxl_stop_context(pos); 556 + cxl_free_afu_irqs(pos); 557 + list_del(&pos->extra_irq_contexts); 558 + cxl_release_context(pos); 559 + } 560 + } 561 + /* Exported via cxl_base */
+31
drivers/misc/cxl/base.c
··· 158 158 } 159 159 EXPORT_SYMBOL_GPL(cxl_next_msi_hwirq); 160 160 161 + int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) 162 + { 163 + int ret; 164 + struct cxl_calls *calls; 165 + 166 + calls = cxl_calls_get(); 167 + if (!calls) 168 + return false; 169 + 170 + ret = calls->cxl_cx4_setup_msi_irqs(pdev, nvec, type); 171 + 172 + cxl_calls_put(calls); 173 + 174 + return ret; 175 + } 176 + EXPORT_SYMBOL_GPL(cxl_cx4_setup_msi_irqs); 177 + 178 + void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) 179 + { 180 + struct cxl_calls *calls; 181 + 182 + calls = cxl_calls_get(); 183 + if (!calls) 184 + return; 185 + 186 + calls->cxl_cx4_teardown_msi_irqs(pdev); 187 + 188 + cxl_calls_put(calls); 189 + } 190 + EXPORT_SYMBOL_GPL(cxl_cx4_teardown_msi_irqs); 191 + 161 192 static int __init cxl_base_init(void) 162 193 { 163 194 struct device_node *np;
+4
drivers/misc/cxl/cxl.h
··· 731 731 bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); 732 732 void _cxl_pci_disable_device(struct pci_dev *dev); 733 733 int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); 734 + int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); 735 + void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); 734 736 735 737 struct cxl_calls { 736 738 void (*cxl_slbia)(struct mm_struct *mm); 737 739 bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu); 738 740 void (*cxl_pci_disable_device)(struct pci_dev *dev); 739 741 int (*cxl_next_msi_hwirq)(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); 742 + int (*cxl_cx4_setup_msi_irqs)(struct pci_dev *pdev, int nvec, int type); 743 + void (*cxl_cx4_teardown_msi_irqs)(struct pci_dev *pdev); 740 744 741 745 struct module *owner; 742 746 };
+2
drivers/misc/cxl/main.c
··· 113 113 .cxl_pci_associate_default_context = _cxl_pci_associate_default_context, 114 114 .cxl_pci_disable_device = _cxl_pci_disable_device, 115 115 .cxl_next_msi_hwirq = _cxl_next_msi_hwirq, 116 + .cxl_cx4_setup_msi_irqs = _cxl_cx4_setup_msi_irqs, 117 + .cxl_cx4_teardown_msi_irqs = _cxl_cx4_teardown_msi_irqs, 116 118 .owner = THIS_MODULE, 117 119 }; 118 120
+4
include/misc/cxl-base.h
··· 43 43 void cxl_slbia(struct mm_struct *mm); 44 44 bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); 45 45 void cxl_pci_disable_device(struct pci_dev *dev); 46 + int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); 47 + void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); 46 48 47 49 #else /* CONFIG_CXL_BASE */ 48 50 ··· 54 52 static inline void cxl_slbia(struct mm_struct *mm) {} 55 53 static inline bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) { return false; } 56 54 static inline void cxl_pci_disable_device(struct pci_dev *dev) {} 55 + static inline int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { return -ENODEV; } 56 + static inline void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) {} 57 57 58 58 #endif /* CONFIG_CXL_BASE */ 59 59