Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity

When handling writes to /proc/irq, current code is re-programming rte
entries directly. This is not recommended and could potentially cause
chipset's to lockup, or cause missing interrupts.

CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the
interrupt is pending. The same needs to be done for /proc/irq handling as well.
Otherwise user space irq balancers are really not doing the right thing.

- Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for
lack of a generic name.
- added move_irq out of IRQ_BALANCE, and added this same to X86_64
- Added new proc handler for write, so we can do deferred write at irq
handling time.
- Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead
it now shows only active cpu masks, or exactly what was set.
- Provided a common move_irq implementation, instead of duplicating
when using generic irq framework.

Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off.
Tested UP builds as well.

MSI testing: tbd: I have cards, need to look for a x-over cable, although I
did test an earlier version of this patch. Will test in a couple days.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Zwane Mwaikambo <zwane@holomorphy.com>
Grudgingly-acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Ashok Raj and committed by
Linus Torvalds
54d5d424 f63ed39c

+253 -134
+5
arch/i386/Kconfig
··· 1318 1318 bool 1319 1319 default y 1320 1320 1321 + config GENERIC_PENDING_IRQ 1322 + bool 1323 + depends on GENERIC_HARDIRQS && SMP 1324 + default y 1325 + 1321 1326 config X86_SMP 1322 1327 bool 1323 1328 depends on SMP && !X86_VOYAGER
+29 -26
arch/i386/kernel/io_apic.c
··· 33 33 #include <linux/acpi.h> 34 34 #include <linux/module.h> 35 35 #include <linux/sysdev.h> 36 + 36 37 #include <asm/io.h> 37 38 #include <asm/smp.h> 38 39 #include <asm/desc.h> ··· 223 222 clear_IO_APIC_pin(apic, pin); 224 223 } 225 224 225 + #ifdef CONFIG_SMP 226 226 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) 227 227 { 228 228 unsigned long flags; 229 229 int pin; 230 230 struct irq_pin_list *entry = irq_2_pin + irq; 231 231 unsigned int apicid_value; 232 + cpumask_t tmp; 232 233 234 + cpus_and(tmp, cpumask, cpu_online_map); 235 + if (cpus_empty(tmp)) 236 + tmp = TARGET_CPUS; 237 + 238 + cpus_and(cpumask, tmp, CPU_MASK_ALL); 239 + 233 240 apicid_value = cpu_mask_to_apicid(cpumask); 234 241 /* Prepare to do the io_apic_write */ 235 242 apicid_value = apicid_value << 24; ··· 251 242 break; 252 243 entry = irq_2_pin + entry->next; 253 244 } 245 + set_irq_info(irq, cpumask); 254 246 spin_unlock_irqrestore(&ioapic_lock, flags); 255 247 } 256 248 ··· 269 259 # define Dprintk(x...) 270 260 # endif 271 261 272 - cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; 273 262 274 263 #define IRQBALANCE_CHECK_ARCH -999 275 264 static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; ··· 337 328 cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); 338 329 new_cpu = move(cpu, allowed_mask, now, 1); 339 330 if (cpu != new_cpu) { 340 - irq_desc_t *desc = irq_desc + irq; 341 - unsigned long flags; 342 - 343 - spin_lock_irqsave(&desc->lock, flags); 344 - pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu); 345 - spin_unlock_irqrestore(&desc->lock, flags); 331 + set_pending_irq(irq, cpumask_of_cpu(new_cpu)); 346 332 } 347 333 } 348 334 ··· 532 528 cpus_and(tmp, target_cpu_mask, allowed_mask); 533 529 534 530 if (!cpus_empty(tmp)) { 535 - irq_desc_t *desc = irq_desc + selected_irq; 536 - unsigned long flags; 537 531 538 532 Dprintk("irq = %d moved to cpu = %d\n", 539 533 selected_irq, min_loaded); 540 534 /* mark for change destination */ 541 - spin_lock_irqsave(&desc->lock, flags); 542 - pending_irq_balance_cpumask[selected_irq] = 543 - cpumask_of_cpu(min_loaded); 544 - spin_unlock_irqrestore(&desc->lock, flags); 535 + set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); 536 + 545 537 /* Since we made a change, come back sooner to 546 538 * check for more variation. 547 539 */ ··· 568 568 569 569 /* push everything to CPU 0 to give us a starting point. */ 570 570 for (i = 0 ; i < NR_IRQS ; i++) { 571 - pending_irq_balance_cpumask[i] = cpumask_of_cpu(0); 571 + pending_irq_cpumask[i] = cpumask_of_cpu(0); 572 + set_pending_irq(i, cpumask_of_cpu(0)); 572 573 } 573 574 574 575 for ( ; ; ) { ··· 648 647 649 648 __setup("noirqbalance", irqbalance_disable); 650 649 651 - static inline void move_irq(int irq) 652 - { 653 - /* note - we hold the desc->lock */ 654 - if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) { 655 - set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]); 656 - cpus_clear(pending_irq_balance_cpumask[irq]); 657 - } 658 - } 659 - 660 650 late_initcall(balanced_irq_init); 661 - 662 - #else /* !CONFIG_IRQBALANCE */ 663 - static inline void move_irq(int irq) { } 664 651 #endif /* CONFIG_IRQBALANCE */ 652 + #endif /* CONFIG_SMP */ 665 653 666 654 #ifndef CONFIG_SMP 667 655 void fastcall send_IPI_self(int vector) ··· 810 820 * we need to reprogram the ioredtbls to cater for the cpus which have come online 811 821 * so mask in all cases should simply be TARGET_CPUS 812 822 */ 823 + #ifdef CONFIG_SMP 813 824 void __init setup_ioapic_dest(void) 814 825 { 815 826 int pin, ioapic, irq, irq_entry; ··· 829 838 830 839 } 831 840 } 841 + #endif 832 842 833 843 /* 834 844 * EISA Edge/Level control register, ELCR ··· 1241 1249 spin_lock_irqsave(&ioapic_lock, flags); 1242 1250 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); 1243 1251 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); 1252 + set_native_irq_info(irq, TARGET_CPUS); 1244 1253 spin_unlock_irqrestore(&ioapic_lock, flags); 1245 1254 } 1246 1255 } ··· 1937 1944 { 1938 1945 int irq = vector_to_irq(vector); 1939 1946 1947 + move_irq(vector); 1940 1948 ack_edge_ioapic_irq(irq); 1941 1949 } 1942 1950 ··· 1952 1958 { 1953 1959 int irq = vector_to_irq(vector); 1954 1960 1961 + move_irq(vector); 1955 1962 end_level_ioapic_irq(irq); 1956 1963 } 1957 1964 ··· 1970 1975 unmask_IO_APIC_irq(irq); 1971 1976 } 1972 1977 1978 + #ifdef CONFIG_SMP 1973 1979 static void set_ioapic_affinity_vector (unsigned int vector, 1974 1980 cpumask_t cpu_mask) 1975 1981 { 1976 1982 int irq = vector_to_irq(vector); 1977 1983 1984 + set_native_irq_info(vector, cpu_mask); 1978 1985 set_ioapic_affinity_irq(irq, cpu_mask); 1979 1986 } 1987 + #endif 1980 1988 #endif 1981 1989 1982 1990 /* ··· 1998 2000 .disable = disable_edge_ioapic, 1999 2001 .ack = ack_edge_ioapic, 2000 2002 .end = end_edge_ioapic, 2003 + #ifdef CONFIG_SMP 2001 2004 .set_affinity = set_ioapic_affinity, 2005 + #endif 2002 2006 }; 2003 2007 2004 2008 static struct hw_interrupt_type ioapic_level_type = { ··· 2011 2011 .disable = disable_level_ioapic, 2012 2012 .ack = mask_and_ack_level_ioapic, 2013 2013 .end = end_level_ioapic, 2014 + #ifdef CONFIG_SMP 2014 2015 .set_affinity = set_ioapic_affinity, 2016 + #endif 2015 2017 }; 2016 2018 2017 2019 static inline void init_IO_APIC_traps(void) ··· 2571 2569 spin_lock_irqsave(&ioapic_lock, flags); 2572 2570 io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); 2573 2571 io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); 2572 + set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); 2574 2573 spin_unlock_irqrestore(&ioapic_lock, flags); 2575 2574 2576 2575 return 0;
+5
arch/ia64/Kconfig
··· 434 434 bool 435 435 default y 436 436 437 + config GENERIC_PENDING_IRQ 438 + bool 439 + depends on GENERIC_HARDIRQS && SMP 440 + default y 441 + 437 442 source "arch/ia64/hp/sim/Kconfig" 438 443 439 444 source "arch/ia64/oprofile/Kconfig"
+1 -38
arch/ia64/kernel/irq.c
··· 91 91 } 92 92 93 93 #ifdef CONFIG_SMP 94 - /* 95 - * This is updated when the user sets irq affinity via /proc 96 - */ 97 - static cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; 98 - static unsigned long pending_irq_redir[BITS_TO_LONGS(NR_IRQS)]; 99 - 100 94 static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; 101 - 102 - /* 103 - * Arch specific routine for deferred write to iosapic rte to reprogram 104 - * intr destination. 105 - */ 106 - void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) 107 - { 108 - pending_irq_cpumask[irq] = mask_val; 109 - } 110 95 111 96 void set_irq_affinity_info (unsigned int irq, int hwid, int redir) 112 97 { ··· 101 116 102 117 if (irq < NR_IRQS) { 103 118 irq_affinity[irq] = mask; 119 + set_irq_info(irq, mask); 104 120 irq_redir[irq] = (char) (redir & 0xff); 105 121 } 106 122 } 107 - 108 - 109 - void move_irq(int irq) 110 - { 111 - /* note - we hold desc->lock */ 112 - cpumask_t tmp; 113 - irq_desc_t *desc = irq_descp(irq); 114 - int redir = test_bit(irq, pending_irq_redir); 115 - 116 - if (unlikely(!desc->handler->set_affinity)) 117 - return; 118 - 119 - if (!cpus_empty(pending_irq_cpumask[irq])) { 120 - cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); 121 - if (unlikely(!cpus_empty(tmp))) { 122 - desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0), 123 - pending_irq_cpumask[irq]); 124 - } 125 - cpus_clear(pending_irq_cpumask[irq]); 126 - } 127 - } 128 - 129 - 130 123 #endif /* CONFIG_SMP */ 131 124 132 125 #ifdef CONFIG_HOTPLUG_CPU
+5
arch/x86_64/Kconfig
··· 441 441 bool 442 442 default y 443 443 444 + config GENERIC_PENDING_IRQ 445 + bool 446 + depends on GENERIC_HARDIRQS && SMP 447 + default y 448 + 444 449 menu "Power management options" 445 450 446 451 source kernel/power/Kconfig
+64 -38
arch/x86_64/kernel/io_apic.c
··· 78 78 #define vector_to_irq(vector) (vector) 79 79 #endif 80 80 81 + #define __DO_ACTION(R, ACTION, FINAL) \ 82 + \ 83 + { \ 84 + int pin; \ 85 + struct irq_pin_list *entry = irq_2_pin + irq; \ 86 + \ 87 + for (;;) { \ 88 + unsigned int reg; \ 89 + pin = entry->pin; \ 90 + if (pin == -1) \ 91 + break; \ 92 + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ 93 + reg ACTION; \ 94 + io_apic_modify(entry->apic, reg); \ 95 + if (!entry->next) \ 96 + break; \ 97 + entry = irq_2_pin + entry->next; \ 98 + } \ 99 + FINAL; \ 100 + } 101 + 102 + #ifdef CONFIG_SMP 103 + static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 104 + { 105 + unsigned long flags; 106 + unsigned int dest; 107 + cpumask_t tmp; 108 + 109 + cpus_and(tmp, mask, cpu_online_map); 110 + if (cpus_empty(tmp)) 111 + tmp = TARGET_CPUS; 112 + 113 + cpus_and(mask, tmp, CPU_MASK_ALL); 114 + 115 + dest = cpu_mask_to_apicid(mask); 116 + 117 + /* 118 + * Only the high 8 bits are valid. 119 + */ 120 + dest = SET_APIC_LOGICAL_ID(dest); 121 + 122 + spin_lock_irqsave(&ioapic_lock, flags); 123 + __DO_ACTION(1, = dest, ) 124 + set_irq_info(irq, mask); 125 + spin_unlock_irqrestore(&ioapic_lock, flags); 126 + } 127 + #endif 128 + 81 129 /* 82 130 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 83 131 * shared ISA-space IRQs, so we have to support them. We are super ··· 149 101 entry->pin = pin; 150 102 } 151 103 152 - #define __DO_ACTION(R, ACTION, FINAL) \ 153 - \ 154 - { \ 155 - int pin; \ 156 - struct irq_pin_list *entry = irq_2_pin + irq; \ 157 - \ 158 - for (;;) { \ 159 - unsigned int reg; \ 160 - pin = entry->pin; \ 161 - if (pin == -1) \ 162 - break; \ 163 - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ 164 - reg ACTION; \ 165 - io_apic_modify(entry->apic, reg); \ 166 - if (!entry->next) \ 167 - break; \ 168 - entry = irq_2_pin + entry->next; \ 169 - } \ 170 - FINAL; \ 171 - } 172 104 173 105 #define DO_ACTION(name,R,ACTION, FINAL) \ 174 106 \ ··· 795 767 spin_lock_irqsave(&ioapic_lock, flags); 796 768 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); 797 769 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); 770 + set_native_irq_info(irq, TARGET_CPUS); 798 771 spin_unlock_irqrestore(&ioapic_lock, flags); 799 772 } 800 773 } ··· 1343 1314 */ 1344 1315 static void ack_edge_ioapic_irq(unsigned int irq) 1345 1316 { 1317 + move_irq(irq); 1346 1318 if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) 1347 1319 == (IRQ_PENDING | IRQ_DISABLED)) 1348 1320 mask_IO_APIC_irq(irq); ··· 1373 1343 1374 1344 static void end_level_ioapic_irq (unsigned int irq) 1375 1345 { 1346 + move_irq(irq); 1376 1347 ack_APIC_irq(); 1377 - } 1378 - 1379 - static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 1380 - { 1381 - unsigned long flags; 1382 - unsigned int dest; 1383 - 1384 - dest = cpu_mask_to_apicid(mask); 1385 - 1386 - /* 1387 - * Only the high 8 bits are valid. 1388 - */ 1389 - dest = SET_APIC_LOGICAL_ID(dest); 1390 - 1391 - spin_lock_irqsave(&ioapic_lock, flags); 1392 - __DO_ACTION(1, = dest, ) 1393 - spin_unlock_irqrestore(&ioapic_lock, flags); 1394 1348 } 1395 1349 1396 1350 #ifdef CONFIG_PCI_MSI ··· 1389 1375 { 1390 1376 int irq = vector_to_irq(vector); 1391 1377 1378 + move_native_irq(vector); 1392 1379 ack_edge_ioapic_irq(irq); 1393 1380 } 1394 1381 ··· 1404 1389 { 1405 1390 int irq = vector_to_irq(vector); 1406 1391 1392 + move_native_irq(vector); 1407 1393 end_level_ioapic_irq(irq); 1408 1394 } 1409 1395 ··· 1422 1406 unmask_IO_APIC_irq(irq); 1423 1407 } 1424 1408 1409 + #ifdef CONFIG_SMP 1425 1410 static void set_ioapic_affinity_vector (unsigned int vector, 1426 1411 cpumask_t cpu_mask) 1427 1412 { 1428 1413 int irq = vector_to_irq(vector); 1429 1414 1415 + set_native_irq_info(vector, cpu_mask); 1430 1416 set_ioapic_affinity_irq(irq, cpu_mask); 1431 1417 } 1432 - #endif 1418 + #endif // CONFIG_SMP 1419 + #endif // CONFIG_PCI_MSI 1433 1420 1434 1421 /* 1435 1422 * Level and edge triggered IO-APIC interrupts need different handling, ··· 1451 1432 .disable = disable_edge_ioapic, 1452 1433 .ack = ack_edge_ioapic, 1453 1434 .end = end_edge_ioapic, 1435 + #ifdef CONFIG_SMP 1454 1436 .set_affinity = set_ioapic_affinity, 1437 + #endif 1455 1438 }; 1456 1439 1457 1440 static struct hw_interrupt_type ioapic_level_type = { ··· 1464 1443 .disable = disable_level_ioapic, 1465 1444 .ack = mask_and_ack_level_ioapic, 1466 1445 .end = end_level_ioapic, 1446 + #ifdef CONFIG_SMP 1467 1447 .set_affinity = set_ioapic_affinity, 1448 + #endif 1468 1449 }; 1469 1450 1470 1451 static inline void init_IO_APIC_traps(void) ··· 1941 1918 spin_lock_irqsave(&ioapic_lock, flags); 1942 1919 io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); 1943 1920 io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); 1921 + set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); 1944 1922 spin_unlock_irqrestore(&ioapic_lock, flags); 1945 1923 1946 1924 return 0; ··· 1955 1931 * we need to reprogram the ioredtbls to cater for the cpus which have come online 1956 1932 * so mask in all cases should simply be TARGET_CPUS 1957 1933 */ 1934 + #ifdef CONFIG_SMP 1958 1935 void __init setup_ioapic_dest(void) 1959 1936 { 1960 1937 int pin, ioapic, irq, irq_entry; ··· 1974 1949 1975 1950 } 1976 1951 } 1952 + #endif
+5 -12
drivers/pci/msi.c
··· 91 91 { 92 92 struct msi_desc *entry; 93 93 struct msg_address address; 94 + unsigned int irq = vector; 94 95 95 96 entry = (struct msi_desc *)msi_desc[vector]; 96 97 if (!entry || !entry->dev) ··· 113 112 entry->msi_attrib.current_cpu = cpu_mask_to_apicid(cpu_mask); 114 113 pci_write_config_dword(entry->dev, msi_lower_address_reg(pos), 115 114 address.lo_address.value); 115 + set_native_irq_info(irq, cpu_mask); 116 116 break; 117 117 } 118 118 case PCI_CAP_ID_MSIX: ··· 127 125 MSI_TARGET_CPU_SHIFT); 128 126 entry->msi_attrib.current_cpu = cpu_mask_to_apicid(cpu_mask); 129 127 writel(address.lo_address.value, entry->mask_base + offset); 128 + set_native_irq_info(irq, cpu_mask); 130 129 break; 131 130 } 132 131 default: 133 132 break; 134 133 } 135 134 } 136 - 137 - #ifdef CONFIG_IRQBALANCE 138 - static inline void move_msi(int vector) 139 - { 140 - if (!cpus_empty(pending_irq_balance_cpumask[vector])) { 141 - set_msi_affinity(vector, pending_irq_balance_cpumask[vector]); 142 - cpus_clear(pending_irq_balance_cpumask[vector]); 143 - } 144 - } 145 - #endif /* CONFIG_IRQBALANCE */ 146 135 #endif /* CONFIG_SMP */ 147 136 148 137 static void mask_MSI_irq(unsigned int vector) ··· 184 191 185 192 static void end_msi_irq_wo_maskbit(unsigned int vector) 186 193 { 187 - move_msi(vector); 194 + move_native_irq(vector); 188 195 ack_APIC_irq(); 189 196 } 190 197 191 198 static void end_msi_irq_w_maskbit(unsigned int vector) 192 199 { 193 - move_msi(vector); 200 + move_native_irq(vector); 194 201 unmask_MSI_irq(vector); 195 202 ack_APIC_irq(); 196 203 }
-5
drivers/pci/msi.h
··· 19 19 #define NR_HP_RESERVED_VECTORS 20 20 20 21 21 extern int vector_irq[NR_VECTORS]; 22 - extern cpumask_t pending_irq_balance_cpumask[NR_IRQS]; 23 22 extern void (*interrupt[NR_IRQS])(void); 24 23 extern int pci_vector_resources(int last, int nr_released); 25 24 ··· 26 27 #define set_msi_irq_affinity set_msi_affinity 27 28 #else 28 29 #define set_msi_irq_affinity NULL 29 - #endif 30 - 31 - #ifndef CONFIG_IRQBALANCE 32 - static inline void move_msi(int vector) {} 33 30 #endif 34 31 35 32 /*
-7
include/asm-ia64/hw_irq.h
··· 116 116 * and to obtain the irq descriptor for a given irq number. 117 117 */ 118 118 119 - /* Return a pointer to the irq descriptor for IRQ. */ 120 - static inline irq_desc_t * 121 - irq_descp (int irq) 122 - { 123 - return irq_desc + irq; 124 - } 125 - 126 119 /* Extract the IA-64 vector that corresponds to IRQ. */ 127 120 static inline ia64_vector 128 121 irq_to_vector (int irq)
-6
include/asm-ia64/irq.h
··· 30 30 extern void enable_irq (unsigned int); 31 31 extern void set_irq_affinity_info (unsigned int irq, int dest, int redir); 32 32 33 - #ifdef CONFIG_SMP 34 - extern void move_irq(int irq); 35 - #else 36 - #define move_irq(irq) 37 - #endif 38 - 39 33 struct irqaction; 40 34 struct pt_regs; 41 35 int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
+123
include/linux/irq.h
··· 71 71 unsigned int irq_count; /* For detecting broken interrupts */ 72 72 unsigned int irqs_unhandled; 73 73 spinlock_t lock; 74 + #if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) 75 + unsigned int move_irq; /* Flag need to re-target intr dest*/ 76 + #endif 74 77 } ____cacheline_aligned irq_desc_t; 75 78 76 79 extern irq_desc_t irq_desc [NR_IRQS]; 80 + 81 + /* Return a pointer to the irq descriptor for IRQ. */ 82 + static inline irq_desc_t * 83 + irq_descp (int irq) 84 + { 85 + return irq_desc + irq; 86 + } 77 87 78 88 #include <asm/hw_irq.h> /* the arch dependent stuff */ 79 89 ··· 91 81 92 82 #ifdef CONFIG_GENERIC_HARDIRQS 93 83 extern cpumask_t irq_affinity[NR_IRQS]; 84 + 85 + #ifdef CONFIG_SMP 86 + static inline void set_native_irq_info(int irq, cpumask_t mask) 87 + { 88 + irq_affinity[irq] = mask; 89 + } 90 + #else 91 + static inline void set_native_irq_info(int irq, cpumask_t mask) 92 + { 93 + } 94 + #endif 95 + 96 + #ifdef CONFIG_SMP 97 + 98 + #if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) 99 + extern cpumask_t pending_irq_cpumask[NR_IRQS]; 100 + 101 + static inline void set_pending_irq(unsigned int irq, cpumask_t mask) 102 + { 103 + irq_desc_t *desc = irq_desc + irq; 104 + unsigned long flags; 105 + 106 + spin_lock_irqsave(&desc->lock, flags); 107 + desc->move_irq = 1; 108 + pending_irq_cpumask[irq] = mask; 109 + spin_unlock_irqrestore(&desc->lock, flags); 110 + } 111 + 112 + static inline void 113 + move_native_irq(int irq) 114 + { 115 + cpumask_t tmp; 116 + irq_desc_t *desc = irq_descp(irq); 117 + 118 + if (likely (!desc->move_irq)) 119 + return; 120 + 121 + desc->move_irq = 0; 122 + 123 + if (likely(cpus_empty(pending_irq_cpumask[irq]))) 124 + return; 125 + 126 + if (!desc->handler->set_affinity) 127 + return; 128 + 129 + /* note - we hold the desc->lock */ 130 + cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); 131 + 132 + /* 133 + * If there was a valid mask to work with, please 134 + * do the disable, re-program, enable sequence. 135 + * This is *not* particularly important for level triggered 136 + * but in a edge trigger case, we might be setting rte 137 + * when an active trigger is comming in. This could 138 + * cause some ioapics to mal-function. 139 + * Being paranoid i guess! 140 + */ 141 + if (unlikely(!cpus_empty(tmp))) { 142 + desc->handler->disable(irq); 143 + desc->handler->set_affinity(irq,tmp); 144 + desc->handler->enable(irq); 145 + } 146 + cpus_clear(pending_irq_cpumask[irq]); 147 + } 148 + 149 + #ifdef CONFIG_PCI_MSI 150 + /* 151 + * Wonder why these are dummies? 152 + * For e.g the set_ioapic_affinity_vector() calls the set_ioapic_affinity_irq() 153 + * counter part after translating the vector to irq info. We need to perform 154 + * this operation on the real irq, when we dont use vector, i.e when 155 + * pci_use_vector() is false. 156 + */ 157 + static inline void move_irq(int irq) 158 + { 159 + } 160 + 161 + static inline void set_irq_info(int irq, cpumask_t mask) 162 + { 163 + } 164 + 165 + #else // CONFIG_PCI_MSI 166 + 167 + static inline void move_irq(int irq) 168 + { 169 + move_native_irq(irq); 170 + } 171 + 172 + static inline void set_irq_info(int irq, cpumask_t mask) 173 + { 174 + set_native_irq_info(irq, mask); 175 + } 176 + #endif // CONFIG_PCI_MSI 177 + 178 + #else // CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE 179 + 180 + #define move_irq(x) 181 + #define move_native_irq(x) 182 + #define set_pending_irq(x,y) 183 + static inline void set_irq_info(int irq, cpumask_t mask) 184 + { 185 + set_native_irq_info(irq, mask); 186 + } 187 + 188 + #endif // CONFIG_GENERIC_PENDING_IRQ 189 + 190 + #else // CONFIG_SMP 191 + 192 + #define move_irq(x) 193 + #define move_native_irq(x) 194 + 195 + #endif // CONFIG_SMP 196 + 94 197 extern int no_irq_affinity; 95 198 extern int noirqdebug_setup(char *str); 96 199
+4
kernel/irq/manage.c
··· 18 18 19 19 cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; 20 20 21 + #if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) 22 + cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; 23 + #endif 24 + 21 25 /** 22 26 * synchronize_irq - wait for pending IRQ handlers (on other CPUs) 23 27 *
+12 -2
kernel/irq/proc.c
··· 19 19 */ 20 20 static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; 21 21 22 - void __attribute__((weak)) 23 - proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) 22 + #ifdef CONFIG_GENERIC_PENDING_IRQ 23 + void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) 24 + { 25 + /* 26 + * Save these away for later use. Re-progam when the 27 + * interrupt is pending 28 + */ 29 + set_pending_irq(irq, mask_val); 30 + } 31 + #else 32 + void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) 24 33 { 25 34 irq_affinity[irq] = mask_val; 26 35 irq_desc[irq].handler->set_affinity(irq, mask_val); 27 36 } 37 + #endif 28 38 29 39 static int irq_affinity_read_proc(char *page, char **start, off_t off, 30 40 int count, int *eof, void *data)