Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/apic: Convert to IRQCHIP_MOVE_DEFERRED

Instead of marking individual interrupts as safe to be migrated in
arbitrary contexts, mark the interrupt chips, which require the interrupt
to be moved in actual interrupt context, with the new IRQCHIP_MOVE_DEFERRED
flag. This makes more sense because this is a per interrupt chip property
and not restricted to individual interrupts.

That flips the logic from the historical opt-out to a opt-in model. This is
simpler to handle for other architectures, which default to unrestricted
affinity setting. It also allows to cleanup the redundant core logic
significantly.

All interrupt chips, which belong to a top-level domain sitting directly on
top of the x86 vector domain are marked accordingly, unless the related
setup code marks the interrupts with IRQ_MOVE_PCNTXT, i.e. XEN.

No functional change intended.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Acked-by: Wei Liu <wei.liu@kernel.org>
Link: https://lore.kernel.org/all/20241210103335.563277044@linutronix.de


+7 -23
+1
arch/x86/Kconfig
··· 173 173 select GENERIC_IRQ_RESERVATION_MODE 174 174 select GENERIC_IRQ_SHOW 175 175 select GENERIC_PENDING_IRQ if SMP 176 + select GENERIC_PENDING_IRQ_CHIPFLAGS if SMP 176 177 select GENERIC_PTDUMP 177 178 select GENERIC_SMP_IDLE_THREAD 178 179 select GENERIC_TIME_VSYSCALL
+1 -1
arch/x86/hyperv/irqdomain.c
··· 304 304 .irq_retrigger = irq_chip_retrigger_hierarchy, 305 305 .irq_compose_msi_msg = hv_irq_compose_msi_msg, 306 306 .irq_set_affinity = msi_domain_set_affinity, 307 - .flags = IRQCHIP_SKIP_SET_WAKE, 307 + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED, 308 308 }; 309 309 310 310 static struct msi_domain_ops pci_msi_domain_ops = {
+1 -1
arch/x86/kernel/apic/io_apic.c
··· 1861 1861 .irq_set_affinity = ioapic_set_affinity, 1862 1862 .irq_retrigger = irq_chip_retrigger_hierarchy, 1863 1863 .irq_get_irqchip_state = ioapic_irq_get_chip_state, 1864 - .flags = IRQCHIP_SKIP_SET_WAKE | 1864 + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED | 1865 1865 IRQCHIP_AFFINITY_PRE_STARTUP, 1866 1866 }; 1867 1867
+2 -1
arch/x86/kernel/apic/msi.c
··· 214 214 if (WARN_ON_ONCE(domain != real_parent)) 215 215 return false; 216 216 info->chip->irq_set_affinity = msi_set_affinity; 217 + info->chip->flags |= IRQCHIP_MOVE_DEFERRED; 217 218 break; 218 219 case DOMAIN_BUS_DMAR: 219 220 case DOMAIN_BUS_AMDVI: ··· 316 315 .irq_retrigger = irq_chip_retrigger_hierarchy, 317 316 .irq_compose_msi_msg = dmar_msi_compose_msg, 318 317 .irq_write_msi_msg = dmar_msi_write_msg, 319 - .flags = IRQCHIP_SKIP_SET_WAKE | 318 + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED | 320 319 IRQCHIP_AFFINITY_PRE_STARTUP, 321 320 }; 322 321
-8
arch/x86/kernel/hpet.c
··· 516 516 struct msi_domain_info *info, unsigned int virq, 517 517 irq_hw_number_t hwirq, msi_alloc_info_t *arg) 518 518 { 519 - irq_set_status_flags(virq, IRQ_MOVE_PCNTXT); 520 519 irq_domain_set_info(domain, virq, arg->hwirq, info->chip, NULL, 521 520 handle_edge_irq, arg->data, "edge"); 522 521 523 522 return 0; 524 523 } 525 524 526 - static void hpet_msi_free(struct irq_domain *domain, 527 - struct msi_domain_info *info, unsigned int virq) 528 - { 529 - irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT); 530 - } 531 - 532 525 static struct msi_domain_ops hpet_msi_domain_ops = { 533 526 .msi_init = hpet_msi_init, 534 - .msi_free = hpet_msi_free, 535 527 }; 536 528 537 529 static struct msi_domain_info hpet_msi_domain_info = {
-3
arch/x86/platform/uv/uv_irq.c
··· 92 92 if (ret >= 0) { 93 93 if (info->uv.limit == UV_AFFINITY_CPU) 94 94 irq_set_status_flags(virq, IRQ_NO_BALANCING); 95 - else 96 - irq_set_status_flags(virq, IRQ_MOVE_PCNTXT); 97 95 98 96 chip_data->pnode = uv_blade_to_pnode(info->uv.blade); 99 97 chip_data->offset = info->uv.offset; ··· 111 113 112 114 BUG_ON(nr_irqs != 1); 113 115 kfree(irq_data->chip_data); 114 - irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT); 115 116 irq_clear_status_flags(virq, IRQ_NO_BALANCING); 116 117 irq_domain_free_irqs_top(domain, virq, nr_irqs); 117 118 }
+1 -1
drivers/iommu/amd/init.c
··· 2332 2332 .irq_retrigger = irq_chip_retrigger_hierarchy, 2333 2333 .irq_set_affinity = intcapxt_set_affinity, 2334 2334 .irq_set_wake = intcapxt_set_wake, 2335 - .flags = IRQCHIP_MASK_ON_SUSPEND, 2335 + .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_MOVE_DEFERRED, 2336 2336 }; 2337 2337 2338 2338 static const struct irq_domain_ops intcapxt_domain_ops = {
-1
drivers/iommu/amd/iommu.c
··· 3532 3532 irq_data->chip_data = data; 3533 3533 irq_data->chip = &amd_ir_chip; 3534 3534 irq_remapping_prepare_irte(data, cfg, info, devid, index, i); 3535 - irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT); 3536 3535 } 3537 3536 3538 3537 return 0;
-1
drivers/iommu/intel/irq_remapping.c
··· 1463 1463 else 1464 1464 irq_data->chip = &intel_ir_chip; 1465 1465 intel_irq_remapping_prepare_irte(ird, irq_cfg, info, index, i); 1466 - irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT); 1467 1466 } 1468 1467 return 0; 1469 1468
+1
drivers/pci/controller/pci-hyperv.c
··· 2053 2053 .irq_set_affinity = irq_chip_set_affinity_parent, 2054 2054 #ifdef CONFIG_X86 2055 2055 .irq_ack = irq_chip_ack_parent, 2056 + .flags = IRQCHIP_MOVE_DEFERRED, 2056 2057 #elif defined(CONFIG_ARM64) 2057 2058 .irq_eoi = irq_chip_eoi_parent, 2058 2059 #endif
-6
drivers/xen/events/events_base.c
··· 722 722 INIT_RCU_WORK(&info->rwork, delayed_free_irq); 723 723 724 724 set_info_for_irq(irq, info); 725 - /* 726 - * Interrupt affinity setting can be immediate. No point 727 - * in delaying it until an interrupt is handled. 728 - */ 729 - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 730 - 731 725 INIT_LIST_HEAD(&info->eoi_list); 732 726 list_add_tail(&info->list, &xen_irq_list_head); 733 727 }