genirq: Prevent proc race against freeing of irq descriptors

Since the rework of the sparse interrupt code to actually free the
unused interrupt descriptors there exists a race between the /proc
interfaces to the irq subsystem and the code which frees the interrupt
descriptor.

CPU0 CPU1
show_interrupts()
desc = irq_to_desc(X);
free_desc(desc)
remove_from_radix_tree();
kfree(desc);
raw_spinlock_irq(&desc->lock);

/proc/interrupts is the only interface which can actively corrupt
kernel memory via the lock access. /proc/stat can only read from freed
memory. Extremly hard to trigger, but possible.

The interfaces in /proc/irq/N/ are not affected by this because the
removal of the proc file is serialized in procfs against concurrent
readers/writers. The removal happens before the descriptor is freed.

For architectures which have CONFIG_SPARSE_IRQ=n this is a non issue
as the descriptor is never freed. It's merely cleared out with the irq
descriptor lock held. So any concurrent proc access will either see
the old correct value or the cleared out ones.

Protect the lookup and access to the irq descriptor in
show_interrupts() with the sparse_irq_lock.

Provide kstat_irqs_usr() which is protecting the lookup and access
with sparse_irq_lock and switch /proc/stat to use it.

Document the existing kstat_irqs interfaces so it's clear that the
caller needs to take care about protection. The users of these
interfaces are either not affected due to SPARSE_IRQ=n or already
protected against removal.

Fixes: 1f5a5b87f78f "genirq: Implement a sane sparse_irq allocator"
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org

+79 -2
+1 -1
fs/proc/stat.c
··· 159 160 /* sum again ? it could be updated? */ 161 for_each_irq_nr(j) 162 - seq_put_decimal_ull(p, ' ', kstat_irqs(j)); 163 164 seq_printf(p, 165 "\nctxt %llu\n"
··· 159 160 /* sum again ? it could be updated? */ 161 for_each_irq_nr(j) 162 + seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j)); 163 164 seq_printf(p, 165 "\nctxt %llu\n"
+1
include/linux/kernel_stat.h
··· 68 * Number of interrupts per specific IRQ source, since bootup 69 */ 70 extern unsigned int kstat_irqs(unsigned int irq); 71 72 /* 73 * Number of interrupts per cpu, since bootup
··· 68 * Number of interrupts per specific IRQ source, since bootup 69 */ 70 extern unsigned int kstat_irqs(unsigned int irq); 71 + extern unsigned int kstat_irqs_usr(unsigned int irq); 72 73 /* 74 * Number of interrupts per cpu, since bootup
+4
kernel/irq/internals.h
··· 78 79 #ifdef CONFIG_SPARSE_IRQ 80 static inline void irq_mark_irq(unsigned int irq) { } 81 #else 82 extern void irq_mark_irq(unsigned int irq); 83 #endif 84 85 extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
··· 78 79 #ifdef CONFIG_SPARSE_IRQ 80 static inline void irq_mark_irq(unsigned int irq) { } 81 + extern void irq_lock_sparse(void); 82 + extern void irq_unlock_sparse(void); 83 #else 84 extern void irq_mark_irq(unsigned int irq); 85 + static inline void irq_lock_sparse(void) { } 86 + static inline void irq_unlock_sparse(void) { } 87 #endif 88 89 extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
+52
kernel/irq/irqdesc.c
··· 132 static inline void free_masks(struct irq_desc *desc) { } 133 #endif 134 135 static struct irq_desc *alloc_desc(int irq, int node, struct module *owner) 136 { 137 struct irq_desc *desc; ··· 178 179 unregister_irq_proc(irq, desc); 180 181 mutex_lock(&sparse_irq_lock); 182 delete_irq_desc(irq); 183 mutex_unlock(&sparse_irq_lock); ··· 590 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); 591 } 592 593 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) 594 { 595 struct irq_desc *desc = irq_to_desc(irq); ··· 607 *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; 608 } 609 610 unsigned int kstat_irqs(unsigned int irq) 611 { 612 struct irq_desc *desc = irq_to_desc(irq); ··· 625 return 0; 626 for_each_possible_cpu(cpu) 627 sum += *per_cpu_ptr(desc->kstat_irqs, cpu); 628 return sum; 629 }
··· 132 static inline void free_masks(struct irq_desc *desc) { } 133 #endif 134 135 + void irq_lock_sparse(void) 136 + { 137 + mutex_lock(&sparse_irq_lock); 138 + } 139 + 140 + void irq_unlock_sparse(void) 141 + { 142 + mutex_unlock(&sparse_irq_lock); 143 + } 144 + 145 static struct irq_desc *alloc_desc(int irq, int node, struct module *owner) 146 { 147 struct irq_desc *desc; ··· 168 169 unregister_irq_proc(irq, desc); 170 171 + /* 172 + * sparse_irq_lock protects also show_interrupts() and 173 + * kstat_irq_usr(). Once we deleted the descriptor from the 174 + * sparse tree we can free it. Access in proc will fail to 175 + * lookup the descriptor. 176 + */ 177 mutex_lock(&sparse_irq_lock); 178 delete_irq_desc(irq); 179 mutex_unlock(&sparse_irq_lock); ··· 574 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); 575 } 576 577 + /** 578 + * kstat_irqs_cpu - Get the statistics for an interrupt on a cpu 579 + * @irq: The interrupt number 580 + * @cpu: The cpu number 581 + * 582 + * Returns the sum of interrupt counts on @cpu since boot for 583 + * @irq. The caller must ensure that the interrupt is not removed 584 + * concurrently. 585 + */ 586 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) 587 { 588 struct irq_desc *desc = irq_to_desc(irq); ··· 582 *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; 583 } 584 585 + /** 586 + * kstat_irqs - Get the statistics for an interrupt 587 + * @irq: The interrupt number 588 + * 589 + * Returns the sum of interrupt counts on all cpus since boot for 590 + * @irq. The caller must ensure that the interrupt is not removed 591 + * concurrently. 592 + */ 593 unsigned int kstat_irqs(unsigned int irq) 594 { 595 struct irq_desc *desc = irq_to_desc(irq); ··· 592 return 0; 593 for_each_possible_cpu(cpu) 594 sum += *per_cpu_ptr(desc->kstat_irqs, cpu); 595 + return sum; 596 + } 597 + 598 + /** 599 + * kstat_irqs_usr - Get the statistics for an interrupt 600 + * @irq: The interrupt number 601 + * 602 + * Returns the sum of interrupt counts on all cpus since boot for 603 + * @irq. Contrary to kstat_irqs() this can be called from any 604 + * preemptible context. It's protected against concurrent removal of 605 + * an interrupt descriptor when sparse irqs are enabled. 606 + */ 607 + unsigned int kstat_irqs_usr(unsigned int irq) 608 + { 609 + int sum; 610 + 611 + irq_lock_sparse(); 612 + sum = kstat_irqs(irq); 613 + irq_unlock_sparse(); 614 return sum; 615 }
+21 -1
kernel/irq/proc.c
··· 15 16 #include "internals.h" 17 18 static struct proc_dir_entry *root_irq_dir; 19 20 #ifdef CONFIG_SMP ··· 454 seq_putc(p, '\n'); 455 } 456 457 desc = irq_to_desc(i); 458 if (!desc) 459 - return 0; 460 461 raw_spin_lock_irqsave(&desc->lock, flags); 462 for_each_online_cpu(j) ··· 497 seq_putc(p, '\n'); 498 out: 499 raw_spin_unlock_irqrestore(&desc->lock, flags); 500 return 0; 501 } 502 #endif
··· 15 16 #include "internals.h" 17 18 + /* 19 + * Access rules: 20 + * 21 + * procfs protects read/write of /proc/irq/N/ files against a 22 + * concurrent free of the interrupt descriptor. remove_proc_entry() 23 + * immediately prevents new read/writes to happen and waits for 24 + * already running read/write functions to complete. 25 + * 26 + * We remove the proc entries first and then delete the interrupt 27 + * descriptor from the radix tree and free it. So it is guaranteed 28 + * that irq_to_desc(N) is valid as long as the read/writes are 29 + * permitted by procfs. 30 + * 31 + * The read from /proc/interrupts is a different problem because there 32 + * is no protection. So the lookup and the access to irqdesc 33 + * information must be protected by sparse_irq_lock. 34 + */ 35 static struct proc_dir_entry *root_irq_dir; 36 37 #ifdef CONFIG_SMP ··· 437 seq_putc(p, '\n'); 438 } 439 440 + irq_lock_sparse(); 441 desc = irq_to_desc(i); 442 if (!desc) 443 + goto outsparse; 444 445 raw_spin_lock_irqsave(&desc->lock, flags); 446 for_each_online_cpu(j) ··· 479 seq_putc(p, '\n'); 480 out: 481 raw_spin_unlock_irqrestore(&desc->lock, flags); 482 + outsparse: 483 + irq_unlock_sparse(); 484 return 0; 485 } 486 #endif