Merge tag 'irq-core-2020-12-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull irq updates from Thomas Gleixner:
"This is the second attempt after the first one failed miserably and
got zapped to unblock the rest of the interrupt related patches.

A treewide cleanup of interrupt descriptor (ab)use with all sorts of
racy accesses, inefficient and disfunctional code. The goal is to
remove the export of irq_to_desc() to prevent these things from
creeping up again"

* tag 'irq-core-2020-12-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (30 commits)
genirq: Restrict export of irq_to_desc()
xen/events: Implement irq distribution
xen/events: Reduce irq_info:: Spurious_cnt storage size
xen/events: Only force affinity mask for percpu interrupts
xen/events: Use immediate affinity setting
xen/events: Remove disfunct affinity spreading
xen/events: Remove unused bind_evtchn_to_irq_lateeoi()
net/mlx5: Use effective interrupt affinity
net/mlx5: Replace irq_to_desc() abuse
net/mlx4: Use effective interrupt affinity
net/mlx4: Replace irq_to_desc() abuse
PCI: mobiveil: Use irq_data_get_irq_chip_data()
PCI: xilinx-nwl: Use irq_data_get_irq_chip_data()
NTB/msi: Use irq_has_action()
mfd: ab8500-debugfs: Remove the racy fiddling with irq_desc
pinctrl: nomadik: Use irq_has_action()
drm/i915/pmu: Replace open coded kstat_irqs() copy
drm/i915/lpe_audio: Remove pointless irq_to_desc() usage
s390/irq: Use irq_desc_kstat_cpu() in show_msi_interrupt()
parisc/irq: Use irq_desc_kstat_cpu() in show_interrupts()
...

+281 -228
+1 -1
arch/alpha/kernel/sys_jensen.c
··· 7 7 * 8 8 * Code supporting the Jensen. 9 9 */ 10 - 10 + #include <linux/interrupt.h> 11 11 #include <linux/kernel.h> 12 12 #include <linux/types.h> 13 13 #include <linux/mm.h>
+1 -1
arch/arm/kernel/smp.c
··· 549 549 seq_printf(p, "%*s%u: ", prec - 1, "IPI", i); 550 550 551 551 for_each_online_cpu(cpu) 552 - seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); 552 + seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu)); 553 553 554 554 seq_printf(p, " %s\n", ipi_types[i]); 555 555 }
+1 -1
arch/arm64/kernel/smp.c
··· 811 811 seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, 812 812 prec >= 4 ? " " : ""); 813 813 for_each_online_cpu(cpu) 814 - seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); 814 + seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu)); 815 815 seq_printf(p, " %s\n", ipi_types[i]); 816 816 } 817 817
+2 -5
arch/parisc/kernel/irq.c
··· 216 216 if (!action) 217 217 goto skip; 218 218 seq_printf(p, "%3d: ", i); 219 - #ifdef CONFIG_SMP 219 + 220 220 for_each_online_cpu(j) 221 - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 222 - #else 223 - seq_printf(p, "%10u ", kstat_irqs(i)); 224 - #endif 221 + seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, j)); 225 222 226 223 seq_printf(p, " %14s", irq_desc_get_chip(desc)->name); 227 224 #ifndef PARISC_IRQ_CR16_COUNTS
+1 -1
arch/s390/kernel/irq.c
··· 124 124 raw_spin_lock_irqsave(&desc->lock, flags); 125 125 seq_printf(p, "%3d: ", irq); 126 126 for_each_online_cpu(cpu) 127 - seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); 127 + seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, cpu)); 128 128 129 129 if (desc->irq_data.chip) 130 130 seq_printf(p, " %8s", desc->irq_data.chip->name);
+1
arch/x86/kernel/topology.c
··· 25 25 * 26 26 * Send feedback to <colpatch@us.ibm.com> 27 27 */ 28 + #include <linux/interrupt.h> 28 29 #include <linux/nodemask.h> 29 30 #include <linux/export.h> 30 31 #include <linux/mmzone.h>
-4
drivers/gpu/drm/i915/display/intel_lpe_audio.c
··· 297 297 */ 298 298 void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) 299 299 { 300 - struct irq_desc *desc; 301 - 302 300 if (!HAS_LPE_AUDIO(dev_priv)) 303 301 return; 304 - 305 - desc = irq_to_desc(dev_priv->lpe_audio.irq); 306 302 307 303 lpe_audio_platdev_destroy(dev_priv); 308 304
+34
drivers/gpu/drm/i915/i915_irq.c
··· 60 60 * and related files, but that will be described in separate chapters. 61 61 */ 62 62 63 + /* 64 + * Interrupt statistic for PMU. Increments the counter only if the 65 + * interrupt originated from the the GPU so interrupts from a device which 66 + * shares the interrupt line are not accounted. 67 + */ 68 + static inline void pmu_irq_stats(struct drm_i915_private *i915, 69 + irqreturn_t res) 70 + { 71 + if (unlikely(res != IRQ_HANDLED)) 72 + return; 73 + 74 + /* 75 + * A clever compiler translates that into INC. A not so clever one 76 + * should at least prevent store tearing. 77 + */ 78 + WRITE_ONCE(i915->pmu.irq_count, i915->pmu.irq_count + 1); 79 + } 80 + 63 81 typedef bool (*long_pulse_detect_func)(enum hpd_pin pin, u32 val); 64 82 typedef u32 (*hotplug_enables_func)(struct drm_i915_private *i915, 65 83 enum hpd_pin pin); ··· 1686 1668 valleyview_pipestat_irq_handler(dev_priv, pipe_stats); 1687 1669 } while (0); 1688 1670 1671 + pmu_irq_stats(dev_priv, ret); 1672 + 1689 1673 enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); 1690 1674 1691 1675 return ret; ··· 1764 1744 1765 1745 valleyview_pipestat_irq_handler(dev_priv, pipe_stats); 1766 1746 } while (0); 1747 + 1748 + pmu_irq_stats(dev_priv, ret); 1767 1749 1768 1750 enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); 1769 1751 ··· 2177 2155 if (sde_ier) 2178 2156 raw_reg_write(regs, SDEIER, sde_ier); 2179 2157 2158 + pmu_irq_stats(i915, ret); 2159 + 2180 2160 /* IRQs are synced during runtime_suspend, we don't require a wakeref */ 2181 2161 enable_rpm_wakeref_asserts(&i915->runtime_pm); 2182 2162 ··· 2565 2541 2566 2542 gen8_master_intr_enable(regs); 2567 2543 2544 + pmu_irq_stats(dev_priv, IRQ_HANDLED); 2545 + 2568 2546 return IRQ_HANDLED; 2569 2547 } 2570 2548 ··· 2661 2635 intr_enable(regs); 2662 2636 2663 2637 gen11_gu_misc_irq_handler(gt, gu_misc_iir); 2638 + 2639 + pmu_irq_stats(i915, IRQ_HANDLED); 2664 2640 2665 2641 return IRQ_HANDLED; 2666 2642 } ··· 3962 3934 i8xx_pipestat_irq_handler(dev_priv, iir, pipe_stats); 3963 3935 } while (0); 3964 3936 3937 + pmu_irq_stats(dev_priv, ret); 3938 + 3965 3939 enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); 3966 3940 3967 3941 return ret; ··· 4072 4042 4073 4043 i915_pipestat_irq_handler(dev_priv, iir, pipe_stats); 4074 4044 } while (0); 4045 + 4046 + pmu_irq_stats(dev_priv, ret); 4075 4047 4076 4048 enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); 4077 4049 ··· 4220 4188 4221 4189 i965_pipestat_irq_handler(dev_priv, iir, pipe_stats); 4222 4190 } while (0); 4191 + 4192 + pmu_irq_stats(dev_priv, IRQ_HANDLED); 4223 4193 4224 4194 enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); 4225 4195
+1 -18
drivers/gpu/drm/i915/i915_pmu.c
··· 4 4 * Copyright © 2017-2018 Intel Corporation 5 5 */ 6 6 7 - #include <linux/irq.h> 8 7 #include <linux/pm_runtime.h> 9 8 10 9 #include "gt/intel_engine.h" ··· 423 424 return HRTIMER_RESTART; 424 425 } 425 426 426 - static u64 count_interrupts(struct drm_i915_private *i915) 427 - { 428 - /* open-coded kstat_irqs() */ 429 - struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq); 430 - u64 sum = 0; 431 - int cpu; 432 - 433 - if (!desc || !desc->kstat_irqs) 434 - return 0; 435 - 436 - for_each_possible_cpu(cpu) 437 - sum += *per_cpu_ptr(desc->kstat_irqs, cpu); 438 - 439 - return sum; 440 - } 441 - 442 427 static void i915_pmu_event_destroy(struct perf_event *event) 443 428 { 444 429 struct drm_i915_private *i915 = ··· 573 590 USEC_PER_SEC /* to MHz */); 574 591 break; 575 592 case I915_PMU_INTERRUPTS: 576 - val = count_interrupts(i915); 593 + val = READ_ONCE(pmu->irq_count); 577 594 break; 578 595 case I915_PMU_RC6_RESIDENCY: 579 596 val = get_rc6(&i915->gt);
+8
drivers/gpu/drm/i915/i915_pmu.h
··· 112 112 */ 113 113 ktime_t sleep_last; 114 114 /** 115 + * @irq_count: Number of interrupts 116 + * 117 + * Intentionally unsigned long to avoid atomics or heuristics on 32bit. 118 + * 4e9 interrupts are a lot and postprocessing can really deal with an 119 + * occasional wraparound easily. It's 32bit after all. 120 + */ 121 + unsigned long irq_count; 122 + /** 115 123 * @events_attr_group: Device events attribute group. 116 124 */ 117 125 struct attribute_group events_attr_group;
+3 -13
drivers/mfd/ab8500-debugfs.c
··· 1513 1513 { 1514 1514 int line; 1515 1515 1516 - seq_puts(s, "name: number: number of: wake:\n"); 1516 + seq_puts(s, "name: number: irq: number of: wake:\n"); 1517 1517 1518 1518 for (line = 0; line < num_interrupt_lines; line++) { 1519 - struct irq_desc *desc = irq_to_desc(line + irq_first); 1520 - 1521 - seq_printf(s, "%3i: %6i %4i", 1519 + seq_printf(s, "%3i: %4i %6i %4i\n", 1522 1520 line, 1521 + line + irq_first, 1523 1522 num_interrupts[line], 1524 1523 num_wake_interrupts[line]); 1525 - 1526 - if (desc && desc->name) 1527 - seq_printf(s, "-%-8s", desc->name); 1528 - if (desc && desc->action) { 1529 - struct irqaction *action = desc->action; 1530 - 1531 - seq_printf(s, " %s", action->name); 1532 - while ((action = action->next) != NULL) 1533 - seq_printf(s, ", %s", action->name); 1534 1524 } 1535 1525 seq_putc(s, '\n'); 1536 1526 }
+3 -5
drivers/net/ethernet/mellanox/mlx4/en_cq.c
··· 90 90 int cq_idx) 91 91 { 92 92 struct mlx4_en_dev *mdev = priv->mdev; 93 - int err = 0; 93 + int irq, err = 0; 94 94 int timestamp_en = 0; 95 95 bool assigned_eq = false; 96 96 ··· 116 116 117 117 assigned_eq = true; 118 118 } 119 - 120 - cq->irq_desc = 121 - irq_to_desc(mlx4_eq_get_irq(mdev->dev, 122 - cq->vector)); 119 + irq = mlx4_eq_get_irq(mdev->dev, cq->vector); 120 + cq->aff_mask = irq_get_effective_affinity_mask(irq); 123 121 } else { 124 122 /* For TX we use the same irq per 125 123 ring we assigned for the RX */
+1 -5
drivers/net/ethernet/mellanox/mlx4/en_rx.c
··· 958 958 959 959 /* If we used up all the quota - we're probably not done yet... */ 960 960 if (done == budget || !clean_complete) { 961 - const struct cpumask *aff; 962 - struct irq_data *idata; 963 961 int cpu_curr; 964 962 965 963 /* in case we got here because of !clean_complete */ 966 964 done = budget; 967 965 968 966 cpu_curr = smp_processor_id(); 969 - idata = irq_desc_get_irq_data(cq->irq_desc); 970 - aff = irq_data_get_affinity_mask(idata); 971 967 972 - if (likely(cpumask_test_cpu(cpu_curr, aff))) 968 + if (likely(cpumask_test_cpu(cpu_curr, cq->aff_mask))) 973 969 return budget; 974 970 975 971 /* Current cpu is not according to smp_irq_affinity -
+2 -1
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
··· 47 47 #endif 48 48 #include <linux/cpu_rmap.h> 49 49 #include <linux/ptp_clock_kernel.h> 50 + #include <linux/irq.h> 50 51 #include <net/xdp.h> 51 52 52 53 #include <linux/mlx4/device.h> ··· 366 365 struct mlx4_cqe *buf; 367 366 #define MLX4_EN_OPCODE_ERROR 0x1e 368 367 369 - struct irq_desc *irq_desc; 368 + const struct cpumask *aff_mask; 370 369 }; 371 370 372 371 struct mlx4_en_port_profile {
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/en.h
··· 684 684 spinlock_t async_icosq_lock; 685 685 686 686 /* data path - accessed per napi poll */ 687 - struct irq_desc *irq_desc; 687 + const struct cpumask *aff_mask; 688 688 struct mlx5e_ch_stats *stats; 689 689 690 690 /* control */
-1
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
··· 479 479 c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); 480 480 c->num_tc = params->num_tc; 481 481 c->stats = &priv->port_ptp_stats.ch; 482 - c->irq_desc = irq_to_desc(irq); 483 482 c->lag_port = lag_port; 484 483 485 484 netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll, 64);
-1
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
··· 28 28 u8 lag_port; 29 29 30 30 /* data path - accessed per napi poll */ 31 - struct irq_desc *irq_desc; 32 31 struct mlx5e_ch_stats *stats; 33 32 34 33 /* control */
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
··· 1987 1987 c->num_tc = params->num_tc; 1988 1988 c->xdp = !!params->xdp_prog; 1989 1989 c->stats = &priv->channel_stats[ix].ch; 1990 - c->irq_desc = irq_to_desc(irq); 1990 + c->aff_mask = irq_get_effective_affinity_mask(irq); 1991 1991 c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); 1992 1992 1993 1993 netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
+1 -5
drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
··· 40 40 static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) 41 41 { 42 42 int current_cpu = smp_processor_id(); 43 - const struct cpumask *aff; 44 - struct irq_data *idata; 45 43 46 - idata = irq_desc_get_irq_data(c->irq_desc); 47 - aff = irq_data_get_affinity_mask(idata); 48 - return cpumask_test_cpu(current_cpu, aff); 44 + return cpumask_test_cpu(current_cpu, c->aff_mask); 49 45 } 50 46 51 47 static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
+1 -3
drivers/ntb/msi.c
··· 282 282 struct ntb_msi_desc *msi_desc) 283 283 { 284 284 struct msi_desc *entry; 285 - struct irq_desc *desc; 286 285 int ret; 287 286 288 287 if (!ntb->msi) 289 288 return -EINVAL; 290 289 291 290 for_each_pci_msi_entry(entry, ntb->pdev) { 292 - desc = irq_to_desc(entry->irq); 293 - if (desc->action) 291 + if (irq_has_action(entry->irq)) 294 292 continue; 295 293 296 294 ret = devm_request_threaded_irq(&ntb->dev, entry->irq, handler,
+2 -6
drivers/pci/controller/mobiveil/pcie-mobiveil-host.c
··· 306 306 307 307 static void mobiveil_mask_intx_irq(struct irq_data *data) 308 308 { 309 - struct irq_desc *desc = irq_to_desc(data->irq); 310 - struct mobiveil_pcie *pcie; 309 + struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(data); 311 310 struct mobiveil_root_port *rp; 312 311 unsigned long flags; 313 312 u32 mask, shifted_val; 314 313 315 - pcie = irq_desc_get_chip_data(desc); 316 314 rp = &pcie->rp; 317 315 mask = 1 << ((data->hwirq + PAB_INTX_START) - 1); 318 316 raw_spin_lock_irqsave(&rp->intx_mask_lock, flags); ··· 322 324 323 325 static void mobiveil_unmask_intx_irq(struct irq_data *data) 324 326 { 325 - struct irq_desc *desc = irq_to_desc(data->irq); 326 - struct mobiveil_pcie *pcie; 327 + struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(data); 327 328 struct mobiveil_root_port *rp; 328 329 unsigned long flags; 329 330 u32 shifted_val, mask; 330 331 331 - pcie = irq_desc_get_chip_data(desc); 332 332 rp = &pcie->rp; 333 333 mask = 1 << ((data->hwirq + PAB_INTX_START) - 1); 334 334 raw_spin_lock_irqsave(&rp->intx_mask_lock, flags);
+2 -6
drivers/pci/controller/pcie-xilinx-nwl.c
··· 374 374 375 375 static void nwl_mask_leg_irq(struct irq_data *data) 376 376 { 377 - struct irq_desc *desc = irq_to_desc(data->irq); 378 - struct nwl_pcie *pcie; 377 + struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data); 379 378 unsigned long flags; 380 379 u32 mask; 381 380 u32 val; 382 381 383 - pcie = irq_desc_get_chip_data(desc); 384 382 mask = 1 << (data->hwirq - 1); 385 383 raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags); 386 384 val = nwl_bridge_readl(pcie, MSGF_LEG_MASK); ··· 388 390 389 391 static void nwl_unmask_leg_irq(struct irq_data *data) 390 392 { 391 - struct irq_desc *desc = irq_to_desc(data->irq); 392 - struct nwl_pcie *pcie; 393 + struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data); 393 394 unsigned long flags; 394 395 u32 mask; 395 396 u32 val; 396 397 397 - pcie = irq_desc_get_chip_data(desc); 398 398 mask = 1 << (data->hwirq - 1); 399 399 raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags); 400 400 val = nwl_bridge_readl(pcie, MSGF_LEG_MASK);
+6 -5
drivers/pinctrl/nomadik/pinctrl-nomadik.c
··· 948 948 (mode < 0) ? "unknown" : modes[mode]); 949 949 } else { 950 950 int irq = chip->to_irq(chip, offset); 951 - struct irq_desc *desc = irq_to_desc(irq); 952 951 const int pullidx = pull ? 1 : 0; 952 + bool wake; 953 953 int val; 954 954 static const char * const pulls[] = { 955 955 "none ", ··· 969 969 * This races with request_irq(), set_irq_type(), 970 970 * and set_irq_wake() ... but those are "rare". 971 971 */ 972 - if (irq > 0 && desc && desc->action) { 972 + if (irq > 0 && irq_has_action(irq)) { 973 973 char *trigger; 974 + bool wake; 974 975 975 976 if (nmk_chip->edge_rising & BIT(offset)) 976 977 trigger = "edge-rising"; ··· 980 979 else 981 980 trigger = "edge-undefined"; 982 981 982 + wake = !!(nmk_chip->real_wake & BIT(offset)); 983 + 983 984 seq_printf(s, " irq-%d %s%s", 984 - irq, trigger, 985 - irqd_is_wakeup_set(&desc->irq_data) 986 - ? " wakeup" : ""); 985 + irq, trigger, wake ? " wakeup" : ""); 987 986 } 988 987 } 989 988 clk_disable(nmk_chip->clk);
+116 -66
drivers/xen/events/events_base.c
··· 95 95 struct list_head list; 96 96 struct list_head eoi_list; 97 97 short refcnt; 98 - short spurious_cnt; 98 + u8 spurious_cnt; 99 + u8 is_accounted; 99 100 enum xen_irq_type type; /* type */ 100 101 unsigned irq; 101 102 evtchn_port_t evtchn; /* event channel */ ··· 161 160 162 161 /* IRQ <-> IPI mapping */ 163 162 static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; 163 + 164 + /* Event channel distribution data */ 165 + static atomic_t channels_on_cpu[NR_CPUS]; 164 166 165 167 static int **evtchn_to_irq; 166 168 #ifdef CONFIG_X86 ··· 261 257 irq_set_chip_data(irq, info); 262 258 } 263 259 260 + /* Per CPU channel accounting */ 261 + static void channels_on_cpu_dec(struct irq_info *info) 262 + { 263 + if (!info->is_accounted) 264 + return; 265 + 266 + info->is_accounted = 0; 267 + 268 + if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) 269 + return; 270 + 271 + WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0)); 272 + } 273 + 274 + static void channels_on_cpu_inc(struct irq_info *info) 275 + { 276 + if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) 277 + return; 278 + 279 + if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1, 280 + INT_MAX))) 281 + return; 282 + 283 + info->is_accounted = 1; 284 + } 285 + 264 286 /* Constructors for packed IRQ information. */ 265 287 static int xen_irq_info_common_setup(struct irq_info *info, 266 288 unsigned irq, ··· 369 339 { 370 340 set_evtchn_to_irq(info->evtchn, -1); 371 341 info->evtchn = 0; 342 + channels_on_cpu_dec(info); 372 343 } 373 344 374 345 /* ··· 464 433 return info->u.pirq.flags & PIRQ_NEEDS_EOI; 465 434 } 466 435 467 - static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu) 436 + static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, 437 + bool force_affinity) 468 438 { 469 439 int irq = get_evtchn_to_irq(evtchn); 470 440 struct irq_info *info = info_for_irq(irq); 471 441 472 442 BUG_ON(irq == -1); 473 - #ifdef CONFIG_SMP 474 - cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu)); 475 - #endif 443 + 444 + if (IS_ENABLED(CONFIG_SMP) && force_affinity) { 445 + cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu)); 446 + cpumask_copy(irq_get_effective_affinity_mask(irq), 447 + cpumask_of(cpu)); 448 + } 449 + 476 450 xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu); 477 451 452 + channels_on_cpu_dec(info); 478 453 info->cpu = cpu; 454 + channels_on_cpu_inc(info); 479 455 } 480 456 481 457 /** ··· 561 523 return; 562 524 563 525 if (spurious) { 564 - if ((1 << info->spurious_cnt) < (HZ << 2)) 565 - info->spurious_cnt++; 526 + if ((1 << info->spurious_cnt) < (HZ << 2)) { 527 + if (info->spurious_cnt != 0xFF) 528 + info->spurious_cnt++; 529 + } 566 530 if (info->spurious_cnt > 1) { 567 531 delay = 1 << (info->spurious_cnt - 2); 568 532 if (delay > HZ) ··· 655 615 { 656 616 struct irq_info *info; 657 617 658 - #ifdef CONFIG_SMP 659 - /* By default all event channels notify CPU#0. */ 660 - cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0)); 661 - #endif 662 - 663 618 info = kzalloc(sizeof(*info), GFP_KERNEL); 664 619 if (info == NULL) 665 620 panic("Unable to allocate metadata for IRQ%d\n", irq); ··· 663 628 info->refcnt = -1; 664 629 665 630 set_info_for_irq(irq, info); 631 + /* 632 + * Interrupt affinity setting can be immediate. No point 633 + * in delaying it until an interrupt is handled. 634 + */ 635 + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 666 636 667 637 INIT_LIST_HEAD(&info->eoi_list); 668 638 list_add_tail(&info->list, &xen_irq_list_head); ··· 779 739 if (!VALID_EVTCHN(evtchn)) 780 740 return; 781 741 782 - if (unlikely(irqd_is_setaffinity_pending(data)) && 783 - likely(!irqd_irq_disabled(data))) { 784 - int masked = test_and_set_mask(evtchn); 785 - 786 - clear_evtchn(evtchn); 787 - 788 - irq_move_masked_irq(data); 789 - 790 - if (!masked) 791 - unmask_evtchn(evtchn); 792 - } else 793 - clear_evtchn(evtchn); 742 + clear_evtchn(evtchn); 794 743 795 744 if (pirq_needs_eoi(data->irq)) { 796 745 rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); ··· 823 794 goto err; 824 795 825 796 info->evtchn = evtchn; 826 - bind_evtchn_to_cpu(evtchn, 0); 797 + bind_evtchn_to_cpu(evtchn, 0, false); 827 798 828 799 rc = xen_evtchn_port_setup(evtchn); 829 800 if (rc) ··· 1142 1113 irq = ret; 1143 1114 goto out; 1144 1115 } 1145 - /* New interdomain events are bound to VCPU 0. */ 1146 - bind_evtchn_to_cpu(evtchn, 0); 1116 + /* 1117 + * New interdomain events are initially bound to vCPU0 This 1118 + * is required to setup the event channel in the first 1119 + * place and also important for UP guests because the 1120 + * affinity setting is not invoked on them so nothing would 1121 + * bind the channel. 1122 + */ 1123 + bind_evtchn_to_cpu(evtchn, 0, false); 1147 1124 } else { 1148 1125 struct irq_info *info = info_for_irq(irq); 1149 1126 WARN_ON(info == NULL || info->type != IRQT_EVTCHN); ··· 1166 1131 return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip); 1167 1132 } 1168 1133 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); 1169 - 1170 - int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) 1171 - { 1172 - return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip); 1173 - } 1174 - EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); 1175 1134 1176 1135 static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) 1177 1136 { ··· 1197 1168 irq = ret; 1198 1169 goto out; 1199 1170 } 1200 - bind_evtchn_to_cpu(evtchn, cpu); 1171 + /* 1172 + * Force the affinity mask to the target CPU so proc shows 1173 + * the correct target. 1174 + */ 1175 + bind_evtchn_to_cpu(evtchn, cpu, true); 1201 1176 } else { 1202 1177 struct irq_info *info = info_for_irq(irq); 1203 1178 WARN_ON(info == NULL || info->type != IRQT_IPI); ··· 1314 1281 goto out; 1315 1282 } 1316 1283 1317 - bind_evtchn_to_cpu(evtchn, cpu); 1284 + /* 1285 + * Force the affinity mask for percpu interrupts so proc 1286 + * shows the correct target. 1287 + */ 1288 + bind_evtchn_to_cpu(evtchn, cpu, percpu); 1318 1289 } else { 1319 1290 struct irq_info *info = info_for_irq(irq); 1320 1291 WARN_ON(info == NULL || info->type != IRQT_VIRQ); ··· 1683 1646 1684 1647 mutex_unlock(&irq_mapping_update_lock); 1685 1648 1686 - bind_evtchn_to_cpu(evtchn, info->cpu); 1687 - /* This will be deferred until interrupt is processed */ 1688 - irq_set_affinity(irq, cpumask_of(info->cpu)); 1649 + bind_evtchn_to_cpu(evtchn, info->cpu, false); 1689 1650 1690 1651 /* Unmask the event channel. */ 1691 1652 enable_irq(irq); ··· 1717 1682 * it, but don't do the xenlinux-level rebind in that case. 1718 1683 */ 1719 1684 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) 1720 - bind_evtchn_to_cpu(evtchn, tcpu); 1685 + bind_evtchn_to_cpu(evtchn, tcpu, false); 1721 1686 1722 1687 if (!masked) 1723 1688 unmask_evtchn(evtchn); ··· 1725 1690 return 0; 1726 1691 } 1727 1692 1693 + /* 1694 + * Find the CPU within @dest mask which has the least number of channels 1695 + * assigned. This is not precise as the per cpu counts can be modified 1696 + * concurrently. 1697 + */ 1698 + static unsigned int select_target_cpu(const struct cpumask *dest) 1699 + { 1700 + unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX; 1701 + 1702 + for_each_cpu_and(cpu, dest, cpu_online_mask) { 1703 + unsigned int curch = atomic_read(&channels_on_cpu[cpu]); 1704 + 1705 + if (curch < minch) { 1706 + minch = curch; 1707 + best_cpu = cpu; 1708 + } 1709 + } 1710 + 1711 + /* 1712 + * Catch the unlikely case that dest contains no online CPUs. Can't 1713 + * recurse. 1714 + */ 1715 + if (best_cpu == UINT_MAX) 1716 + return select_target_cpu(cpu_online_mask); 1717 + 1718 + return best_cpu; 1719 + } 1720 + 1728 1721 static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, 1729 1722 bool force) 1730 1723 { 1731 - unsigned tcpu = cpumask_first_and(dest, cpu_online_mask); 1732 - int ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu); 1724 + unsigned int tcpu = select_target_cpu(dest); 1725 + int ret; 1733 1726 1727 + ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu); 1734 1728 if (!ret) 1735 1729 irq_data_update_effective_affinity(data, cpumask_of(tcpu)); 1736 1730 1737 1731 return ret; 1738 1732 } 1739 - 1740 - /* To be called with desc->lock held. */ 1741 - int xen_set_affinity_evtchn(struct irq_desc *desc, unsigned int tcpu) 1742 - { 1743 - struct irq_data *d = irq_desc_get_irq_data(desc); 1744 - 1745 - return set_affinity_irq(d, cpumask_of(tcpu), false); 1746 - } 1747 - EXPORT_SYMBOL_GPL(xen_set_affinity_evtchn); 1748 1733 1749 1734 static void enable_dynirq(struct irq_data *data) 1750 1735 { ··· 1789 1734 if (!VALID_EVTCHN(evtchn)) 1790 1735 return; 1791 1736 1792 - if (unlikely(irqd_is_setaffinity_pending(data)) && 1793 - likely(!irqd_irq_disabled(data))) { 1794 - int masked = test_and_set_mask(evtchn); 1795 - 1796 - clear_evtchn(evtchn); 1797 - 1798 - irq_move_masked_irq(data); 1799 - 1800 - if (!masked) 1801 - unmask_evtchn(evtchn); 1802 - } else 1803 - clear_evtchn(evtchn); 1737 + clear_evtchn(evtchn); 1804 1738 } 1805 1739 1806 1740 static void mask_ack_dynirq(struct irq_data *data) ··· 1874 1830 1875 1831 /* Record the new mapping. */ 1876 1832 (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq); 1877 - bind_evtchn_to_cpu(evtchn, cpu); 1833 + /* The affinity mask is still valid */ 1834 + bind_evtchn_to_cpu(evtchn, cpu, false); 1878 1835 } 1879 1836 } 1880 1837 ··· 1900 1855 1901 1856 /* Record the new mapping. */ 1902 1857 (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi); 1903 - bind_evtchn_to_cpu(evtchn, cpu); 1858 + /* The affinity mask is still valid */ 1859 + bind_evtchn_to_cpu(evtchn, cpu, false); 1904 1860 } 1905 1861 } 1906 1862 ··· 1984 1938 xen_evtchn_resume(); 1985 1939 1986 1940 /* No IRQ <-> event-channel mappings. */ 1987 - list_for_each_entry(info, &xen_irq_list_head, list) 1988 - info->evtchn = 0; /* zap event-channel binding */ 1941 + list_for_each_entry(info, &xen_irq_list_head, list) { 1942 + /* Zap event-channel binding */ 1943 + info->evtchn = 0; 1944 + /* Adjust accounting */ 1945 + channels_on_cpu_dec(info); 1946 + } 1989 1947 1990 1948 clear_evtchn_to_irq_all(); 1991 1949
+1 -33
drivers/xen/evtchn.c
··· 421 421 del_evtchn(u, evtchn); 422 422 } 423 423 424 - static DEFINE_PER_CPU(int, bind_last_selected_cpu); 425 - 426 - static void evtchn_bind_interdom_next_vcpu(evtchn_port_t evtchn) 427 - { 428 - unsigned int selected_cpu, irq; 429 - struct irq_desc *desc; 430 - unsigned long flags; 431 - 432 - irq = irq_from_evtchn(evtchn); 433 - desc = irq_to_desc(irq); 434 - 435 - if (!desc) 436 - return; 437 - 438 - raw_spin_lock_irqsave(&desc->lock, flags); 439 - selected_cpu = this_cpu_read(bind_last_selected_cpu); 440 - selected_cpu = cpumask_next_and(selected_cpu, 441 - desc->irq_common_data.affinity, cpu_online_mask); 442 - 443 - if (unlikely(selected_cpu >= nr_cpu_ids)) 444 - selected_cpu = cpumask_first_and(desc->irq_common_data.affinity, 445 - cpu_online_mask); 446 - 447 - this_cpu_write(bind_last_selected_cpu, selected_cpu); 448 - 449 - /* unmask expects irqs to be disabled */ 450 - xen_set_affinity_evtchn(desc, selected_cpu); 451 - raw_spin_unlock_irqrestore(&desc->lock, flags); 452 - } 453 - 454 424 static long evtchn_ioctl(struct file *file, 455 425 unsigned int cmd, unsigned long arg) 456 426 { ··· 478 508 break; 479 509 480 510 rc = evtchn_bind_to_user(u, bind_interdomain.local_port); 481 - if (rc == 0) { 511 + if (rc == 0) 482 512 rc = bind_interdomain.local_port; 483 - evtchn_bind_interdom_next_vcpu(rc); 484 - } 485 513 break; 486 514 } 487 515
+1
include/linux/interrupt.h
··· 232 232 # define local_irq_enable_in_hardirq() local_irq_enable() 233 233 #endif 234 234 235 + bool irq_has_action(unsigned int irq); 235 236 extern void disable_irq_nosync(unsigned int irq); 236 237 extern bool disable_hardirq(unsigned int irq); 237 238 extern void disable_irq(unsigned int irq);
+7
include/linux/irq.h
··· 906 906 } 907 907 #endif 908 908 909 + static inline struct cpumask *irq_get_effective_affinity_mask(unsigned int irq) 910 + { 911 + struct irq_data *d = irq_get_irq_data(irq); 912 + 913 + return d ? irq_data_get_effective_affinity_mask(d) : NULL; 914 + } 915 + 909 916 unsigned int arch_dynirq_lower_bound(unsigned int from); 910 917 911 918 int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
+16 -24
include/linux/irqdesc.h
··· 113 113 extern struct irq_desc irq_desc[NR_IRQS]; 114 114 #endif 115 115 116 + static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc, 117 + unsigned int cpu) 118 + { 119 + return desc->kstat_irqs ? *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; 120 + } 121 + 116 122 static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) 117 123 { 118 124 return container_of(data->common, struct irq_desc, irq_common_data); ··· 185 179 /* Test to see if a driver has successfully requested an irq */ 186 180 static inline int irq_desc_has_action(struct irq_desc *desc) 187 181 { 188 - return desc->action != NULL; 189 - } 190 - 191 - static inline int irq_has_action(unsigned int irq) 192 - { 193 - return irq_desc_has_action(irq_to_desc(irq)); 182 + return desc && desc->action != NULL; 194 183 } 195 184 196 185 /** ··· 229 228 data->chip = chip; 230 229 } 231 230 231 + bool irq_check_status_bit(unsigned int irq, unsigned int bitmask); 232 + 232 233 static inline bool irq_balancing_disabled(unsigned int irq) 233 234 { 234 - struct irq_desc *desc; 235 - 236 - desc = irq_to_desc(irq); 237 - return desc->status_use_accessors & IRQ_NO_BALANCING_MASK; 235 + return irq_check_status_bit(irq, IRQ_NO_BALANCING_MASK); 238 236 } 239 237 240 238 static inline bool irq_is_percpu(unsigned int irq) 241 239 { 242 - struct irq_desc *desc; 243 - 244 - desc = irq_to_desc(irq); 245 - return desc->status_use_accessors & IRQ_PER_CPU; 240 + return irq_check_status_bit(irq, IRQ_PER_CPU); 246 241 } 247 242 248 243 static inline bool irq_is_percpu_devid(unsigned int irq) 249 244 { 250 - struct irq_desc *desc; 251 - 252 - desc = irq_to_desc(irq); 253 - return desc->status_use_accessors & IRQ_PER_CPU_DEVID; 245 + return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID); 254 246 } 255 247 248 + void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, 249 + struct lock_class_key *request_class); 256 250 static inline void 257 251 irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, 258 252 struct lock_class_key *request_class) 259 253 { 260 - struct irq_desc *desc = irq_to_desc(irq); 261 - 262 - if (desc) { 263 - lockdep_set_class(&desc->lock, lock_class); 264 - lockdep_set_class(&desc->request_mutex, request_class); 265 - } 254 + if (IS_ENABLED(CONFIG_LOCKDEP)) 255 + __irq_set_lockdep_class(irq, lock_class, request_class); 266 256 } 267 257 268 258 #endif
-1
include/linux/kernel_stat.h
··· 67 67 /* 68 68 * Number of interrupts per specific IRQ source, since bootup 69 69 */ 70 - extern unsigned int kstat_irqs(unsigned int irq); 71 70 extern unsigned int kstat_irqs_usr(unsigned int irq); 72 71 73 72 /*
+27 -18
kernel/irq/irqdesc.c
··· 147 147 struct kobj_attribute *attr, char *buf) 148 148 { 149 149 struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); 150 - int cpu, irq = desc->irq_data.irq; 151 150 ssize_t ret = 0; 152 151 char *p = ""; 152 + int cpu; 153 153 154 154 for_each_possible_cpu(cpu) { 155 - unsigned int c = kstat_irqs_cpu(irq, cpu); 155 + unsigned int c = irq_desc_kstat_cpu(desc, cpu); 156 156 157 157 ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c); 158 158 p = ","; ··· 352 352 { 353 353 return radix_tree_lookup(&irq_desc_tree, irq); 354 354 } 355 - EXPORT_SYMBOL(irq_to_desc); 355 + #ifdef CONFIG_KVM_BOOK3S_64_HV 356 + EXPORT_SYMBOL_GPL(irq_to_desc); 357 + #endif 356 358 357 359 static void delete_irq_desc(unsigned int irq) 358 360 { ··· 926 924 return desc->istate & IRQS_NMI; 927 925 } 928 926 929 - /** 930 - * kstat_irqs - Get the statistics for an interrupt 931 - * @irq: The interrupt number 932 - * 933 - * Returns the sum of interrupt counts on all cpus since boot for 934 - * @irq. The caller must ensure that the interrupt is not removed 935 - * concurrently. 936 - */ 937 - unsigned int kstat_irqs(unsigned int irq) 927 + static unsigned int kstat_irqs(unsigned int irq) 938 928 { 939 929 struct irq_desc *desc = irq_to_desc(irq); 940 930 unsigned int sum = 0; ··· 937 943 if (!irq_settings_is_per_cpu_devid(desc) && 938 944 !irq_settings_is_per_cpu(desc) && 939 945 !irq_is_nmi(desc)) 940 - return desc->tot_count; 946 + return data_race(desc->tot_count); 941 947 942 948 for_each_possible_cpu(cpu) 943 - sum += *per_cpu_ptr(desc->kstat_irqs, cpu); 949 + sum += data_race(*per_cpu_ptr(desc->kstat_irqs, cpu)); 944 950 return sum; 945 951 } 946 952 947 953 /** 948 - * kstat_irqs_usr - Get the statistics for an interrupt 954 + * kstat_irqs_usr - Get the statistics for an interrupt from thread context 949 955 * @irq: The interrupt number 950 956 * 951 957 * Returns the sum of interrupt counts on all cpus since boot for @irq. 952 - * Contrary to kstat_irqs() this can be called from any context. 953 - * It uses rcu since a concurrent removal of an interrupt descriptor is 954 - * observing an rcu grace period before delayed_free_desc()/irq_kobj_release(). 958 + * 959 + * It uses rcu to protect the access since a concurrent removal of an 960 + * interrupt descriptor is observing an rcu grace period before 961 + * delayed_free_desc()/irq_kobj_release(). 955 962 */ 956 963 unsigned int kstat_irqs_usr(unsigned int irq) 957 964 { ··· 963 968 rcu_read_unlock(); 964 969 return sum; 965 970 } 971 + 972 + #ifdef CONFIG_LOCKDEP 973 + void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, 974 + struct lock_class_key *request_class) 975 + { 976 + struct irq_desc *desc = irq_to_desc(irq); 977 + 978 + if (desc) { 979 + lockdep_set_class(&desc->lock, lock_class); 980 + lockdep_set_class(&desc->request_mutex, request_class); 981 + } 982 + } 983 + EXPORT_SYMBOL_GPL(__irq_set_lockdep_class); 984 + #endif
+37
kernel/irq/manage.c
··· 2822 2822 return err; 2823 2823 } 2824 2824 EXPORT_SYMBOL_GPL(irq_set_irqchip_state); 2825 + 2826 + /** 2827 + * irq_has_action - Check whether an interrupt is requested 2828 + * @irq: The linux irq number 2829 + * 2830 + * Returns: A snapshot of the current state 2831 + */ 2832 + bool irq_has_action(unsigned int irq) 2833 + { 2834 + bool res; 2835 + 2836 + rcu_read_lock(); 2837 + res = irq_desc_has_action(irq_to_desc(irq)); 2838 + rcu_read_unlock(); 2839 + return res; 2840 + } 2841 + EXPORT_SYMBOL_GPL(irq_has_action); 2842 + 2843 + /** 2844 + * irq_check_status_bit - Check whether bits in the irq descriptor status are set 2845 + * @irq: The linux irq number 2846 + * @bitmask: The bitmask to evaluate 2847 + * 2848 + * Returns: True if one of the bits in @bitmask is set 2849 + */ 2850 + bool irq_check_status_bit(unsigned int irq, unsigned int bitmask) 2851 + { 2852 + struct irq_desc *desc; 2853 + bool res = false; 2854 + 2855 + rcu_read_lock(); 2856 + desc = irq_to_desc(irq); 2857 + if (desc) 2858 + res = !!(desc->status_use_accessors & bitmask); 2859 + rcu_read_unlock(); 2860 + return res; 2861 + }
+3 -2
kernel/irq/proc.c
··· 488 488 if (!desc || irq_settings_is_hidden(desc)) 489 489 goto outsparse; 490 490 491 - if (desc->kstat_irqs) 491 + if (desc->kstat_irqs) { 492 492 for_each_online_cpu(j) 493 - any_count |= *per_cpu_ptr(desc->kstat_irqs, j); 493 + any_count |= data_race(*per_cpu_ptr(desc->kstat_irqs, j)); 494 + } 494 495 495 496 if ((!desc->action || irq_desc_is_chained(desc)) && !any_count) 496 497 goto outsparse;