Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc

Pull powerpc fixes from Ben Herrenschmidt:
"Here are a few things for -rc2, this time it's all written by me so it
can only be perfect .... right ? :)

So we have the fix to call irq_enter/exit on the irq stack we've been
discussing, plus a cleanup on top to remove an unused (and broken)
stack limit tracking feature (well, make it 32-bit only in fact where
it is used and works properly).

Then we have two things that I wrote over the last couple of days and
made the executive decision to include just because I can (and I'm
sure you won't object .... right ?).

They fix a couple of annoying and long standing "issues":

- We had separate zImages for when booting via Open Firmware vs.
booting via a flat device-tree, while it's trivial to make one that
deals with both

- We wasted a ton of cycles spinning secondary CPUs uselessly at boot
instead of starting them when needed on pseries, thus contributing
significantly to global warming"

* 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc:
powerpc/pseries: Do not start secondaries in Open Firmware
powerpc/zImage: make the "OF" wrapper support ePAPR boot
powerpc: Remove ksp_limit on ppc64
powerpc/irq: Run softirqs off the top of the irq stack

+147 -94
+2 -2
arch/powerpc/boot/Makefile
··· 74 74 src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c 75 75 src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c 76 76 77 - src-plat-y := of.c 77 + src-plat-y := of.c epapr.c 78 78 src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \ 79 79 treeboot-walnut.c cuboot-acadia.c \ 80 80 cuboot-kilauea.c simpleboot.c \ ··· 97 97 prpmc2800.c 98 98 src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c 99 99 src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c 100 - src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c 100 + src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c 101 101 102 102 src-wlib := $(sort $(src-wlib-y)) 103 103 src-plat := $(sort $(src-plat-y))
+9
arch/powerpc/boot/epapr-wrapper.c
··· 1 + extern void epapr_platform_init(unsigned long r3, unsigned long r4, 2 + unsigned long r5, unsigned long r6, 3 + unsigned long r7); 4 + 5 + void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, 6 + unsigned long r6, unsigned long r7) 7 + { 8 + epapr_platform_init(r3, r4, r5, r6, r7); 9 + }
+2 -2
arch/powerpc/boot/epapr.c
··· 48 48 fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size); 49 49 } 50 50 51 - void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, 52 - unsigned long r6, unsigned long r7) 51 + void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5, 52 + unsigned long r6, unsigned long r7) 53 53 { 54 54 epapr_magic = r6; 55 55 ima_size = r7;
+15 -1
arch/powerpc/boot/of.c
··· 26 26 27 27 static unsigned long claim_base; 28 28 29 + void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5, 30 + unsigned long r6, unsigned long r7); 31 + 29 32 static void *of_try_claim(unsigned long size) 30 33 { 31 34 unsigned long addr = 0; ··· 64 61 } 65 62 } 66 63 67 - void platform_init(unsigned long a1, unsigned long a2, void *promptr) 64 + static void of_platform_init(unsigned long a1, unsigned long a2, void *promptr) 68 65 { 69 66 platform_ops.image_hdr = of_image_hdr; 70 67 platform_ops.malloc = of_try_claim; ··· 84 81 loader_info.initrd_size = a2; 85 82 } 86 83 } 84 + 85 + void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, 86 + unsigned long r6, unsigned long r7) 87 + { 88 + /* Detect OF vs. ePAPR boot */ 89 + if (r5) 90 + of_platform_init(r3, r4, (void *)r5); 91 + else 92 + epapr_platform_init(r3, r4, r5, r6, r7); 93 + } 94 +
+5 -4
arch/powerpc/boot/wrapper
··· 148 148 149 149 case "$platform" in 150 150 pseries) 151 - platformo=$object/of.o 151 + platformo="$object/of.o $object/epapr.o" 152 152 link_address='0x4000000' 153 153 ;; 154 154 maple) 155 - platformo=$object/of.o 155 + platformo="$object/of.o $object/epapr.o" 156 156 link_address='0x400000' 157 157 ;; 158 158 pmac|chrp) 159 - platformo=$object/of.o 159 + platformo="$object/of.o $object/epapr.o" 160 160 ;; 161 161 coff) 162 - platformo="$object/crt0.o $object/of.o" 162 + platformo="$object/crt0.o $object/of.o $object/epapr.o" 163 163 lds=$object/zImage.coff.lds 164 164 link_address='0x500000' 165 165 pie= ··· 253 253 platformo="$object/treeboot-iss4xx.o" 254 254 ;; 255 255 epapr) 256 + platformo="$object/epapr.o $object/epapr-wrapper.o" 256 257 link_address='0x20000000' 257 258 pie=-pie 258 259 ;;
+2 -2
arch/powerpc/include/asm/irq.h
··· 69 69 70 70 extern void irq_ctx_init(void); 71 71 extern void call_do_softirq(struct thread_info *tp); 72 - extern int call_handle_irq(int irq, void *p1, 73 - struct thread_info *tp, void *func); 72 + extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp); 74 73 extern void do_IRQ(struct pt_regs *regs); 74 + extern void __do_irq(struct pt_regs *regs); 75 75 76 76 int irq_choose_cpu(const struct cpumask *mask); 77 77
+1 -3
arch/powerpc/include/asm/processor.h
··· 149 149 150 150 struct thread_struct { 151 151 unsigned long ksp; /* Kernel stack pointer */ 152 - unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ 153 - 154 152 #ifdef CONFIG_PPC64 155 153 unsigned long ksp_vsid; 156 154 #endif ··· 160 162 #endif 161 163 #ifdef CONFIG_PPC32 162 164 void *pgdir; /* root of page-table tree */ 165 + unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ 163 166 #endif 164 167 #ifdef CONFIG_PPC_ADV_DEBUG_REGS 165 168 /* ··· 320 321 #else 321 322 #define INIT_THREAD { \ 322 323 .ksp = INIT_SP, \ 323 - .ksp_limit = INIT_SP_LIMIT, \ 324 324 .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ 325 325 .fs = KERNEL_DS, \ 326 326 .fpr = {{0}}, \
+2 -1
arch/powerpc/kernel/asm-offsets.c
··· 80 80 DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); 81 81 #else 82 82 DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); 83 + DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16)); 84 + DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); 83 85 #endif /* CONFIG_PPC64 */ 84 86 85 87 DEFINE(KSP, offsetof(struct thread_struct, ksp)); 86 - DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); 87 88 DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); 88 89 #ifdef CONFIG_BOOKE 89 90 DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
+44 -56
arch/powerpc/kernel/irq.c
··· 441 441 } 442 442 #endif 443 443 444 - static inline void handle_one_irq(unsigned int irq) 445 - { 446 - struct thread_info *curtp, *irqtp; 447 - unsigned long saved_sp_limit; 448 - struct irq_desc *desc; 449 - 450 - desc = irq_to_desc(irq); 451 - if (!desc) 452 - return; 453 - 454 - /* Switch to the irq stack to handle this */ 455 - curtp = current_thread_info(); 456 - irqtp = hardirq_ctx[smp_processor_id()]; 457 - 458 - if (curtp == irqtp) { 459 - /* We're already on the irq stack, just handle it */ 460 - desc->handle_irq(irq, desc); 461 - return; 462 - } 463 - 464 - saved_sp_limit = current->thread.ksp_limit; 465 - 466 - irqtp->task = curtp->task; 467 - irqtp->flags = 0; 468 - 469 - /* Copy the softirq bits in preempt_count so that the 470 - * softirq checks work in the hardirq context. */ 471 - irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) | 472 - (curtp->preempt_count & SOFTIRQ_MASK); 473 - 474 - current->thread.ksp_limit = (unsigned long)irqtp + 475 - _ALIGN_UP(sizeof(struct thread_info), 16); 476 - 477 - call_handle_irq(irq, desc, irqtp, desc->handle_irq); 478 - current->thread.ksp_limit = saved_sp_limit; 479 - irqtp->task = NULL; 480 - 481 - /* Set any flag that may have been set on the 482 - * alternate stack 483 - */ 484 - if (irqtp->flags) 485 - set_bits(irqtp->flags, &curtp->flags); 486 - } 487 - 488 444 static inline void check_stack_overflow(void) 489 445 { 490 446 #ifdef CONFIG_DEBUG_STACKOVERFLOW ··· 457 501 #endif 458 502 } 459 503 460 - void do_IRQ(struct pt_regs *regs) 504 + void __do_irq(struct pt_regs *regs) 461 505 { 462 - struct pt_regs *old_regs = set_irq_regs(regs); 506 + struct irq_desc *desc; 463 507 unsigned int irq; 464 508 465 509 irq_enter(); ··· 475 519 */ 476 520 irq = ppc_md.get_irq(); 477 521 478 - /* We can hard enable interrupts now */ 522 + /* We can hard enable interrupts now to allow perf interrupts */ 479 523 may_hard_irq_enable(); 480 524 481 525 /* And finally process it */ 482 - if (irq != NO_IRQ) 483 - handle_one_irq(irq); 484 - else 526 + if (unlikely(irq == NO_IRQ)) 485 527 __get_cpu_var(irq_stat).spurious_irqs++; 528 + else { 529 + desc = irq_to_desc(irq); 530 + if (likely(desc)) 531 + desc->handle_irq(irq, desc); 532 + } 486 533 487 534 trace_irq_exit(regs); 488 535 489 536 irq_exit(); 537 + } 538 + 539 + void do_IRQ(struct pt_regs *regs) 540 + { 541 + struct pt_regs *old_regs = set_irq_regs(regs); 542 + struct thread_info *curtp, *irqtp; 543 + 544 + /* Switch to the irq stack to handle this */ 545 + curtp = current_thread_info(); 546 + irqtp = hardirq_ctx[raw_smp_processor_id()]; 547 + 548 + /* Already there ? */ 549 + if (unlikely(curtp == irqtp)) { 550 + __do_irq(regs); 551 + set_irq_regs(old_regs); 552 + return; 553 + } 554 + 555 + /* Prepare the thread_info in the irq stack */ 556 + irqtp->task = curtp->task; 557 + irqtp->flags = 0; 558 + 559 + /* Copy the preempt_count so that the [soft]irq checks work. */ 560 + irqtp->preempt_count = curtp->preempt_count; 561 + 562 + /* Switch stack and call */ 563 + call_do_irq(regs, irqtp); 564 + 565 + /* Restore stack limit */ 566 + irqtp->task = NULL; 567 + 568 + /* Copy back updates to the thread_info */ 569 + if (irqtp->flags) 570 + set_bits(irqtp->flags, &curtp->flags); 571 + 490 572 set_irq_regs(old_regs); 491 573 } 492 574 ··· 586 592 memset((void *)softirq_ctx[i], 0, THREAD_SIZE); 587 593 tp = softirq_ctx[i]; 588 594 tp->cpu = i; 589 - tp->preempt_count = 0; 590 595 591 596 memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); 592 597 tp = hardirq_ctx[i]; 593 598 tp->cpu = i; 594 - tp->preempt_count = HARDIRQ_OFFSET; 595 599 } 596 600 } 597 601 598 602 static inline void do_softirq_onstack(void) 599 603 { 600 604 struct thread_info *curtp, *irqtp; 601 - unsigned long saved_sp_limit = current->thread.ksp_limit; 602 605 603 606 curtp = current_thread_info(); 604 607 irqtp = softirq_ctx[smp_processor_id()]; 605 608 irqtp->task = curtp->task; 606 609 irqtp->flags = 0; 607 - current->thread.ksp_limit = (unsigned long)irqtp + 608 - _ALIGN_UP(sizeof(struct thread_info), 16); 609 610 call_do_softirq(irqtp); 610 - current->thread.ksp_limit = saved_sp_limit; 611 611 irqtp->task = NULL; 612 612 613 613 /* Set any flag that may have been set on the
+20 -5
arch/powerpc/kernel/misc_32.S
··· 36 36 37 37 .text 38 38 39 + /* 40 + * We store the saved ksp_limit in the unused part 41 + * of the STACK_FRAME_OVERHEAD 42 + */ 39 43 _GLOBAL(call_do_softirq) 40 44 mflr r0 41 45 stw r0,4(r1) 46 + lwz r10,THREAD+KSP_LIMIT(r2) 47 + addi r11,r3,THREAD_INFO_GAP 42 48 stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) 43 49 mr r1,r3 50 + stw r10,8(r1) 51 + stw r11,THREAD+KSP_LIMIT(r2) 44 52 bl __do_softirq 53 + lwz r10,8(r1) 45 54 lwz r1,0(r1) 46 55 lwz r0,4(r1) 56 + stw r10,THREAD+KSP_LIMIT(r2) 47 57 mtlr r0 48 58 blr 49 59 50 - _GLOBAL(call_handle_irq) 60 + _GLOBAL(call_do_irq) 51 61 mflr r0 52 62 stw r0,4(r1) 53 - mtctr r6 54 - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) 55 - mr r1,r5 56 - bctrl 63 + lwz r10,THREAD+KSP_LIMIT(r2) 64 + addi r11,r3,THREAD_INFO_GAP 65 + stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) 66 + mr r1,r4 67 + stw r10,8(r1) 68 + stw r11,THREAD+KSP_LIMIT(r2) 69 + bl __do_irq 70 + lwz r10,8(r1) 57 71 lwz r1,0(r1) 58 72 lwz r0,4(r1) 73 + stw r10,THREAD+KSP_LIMIT(r2) 59 74 mtlr r0 60 75 blr 61 76
+4 -6
arch/powerpc/kernel/misc_64.S
··· 40 40 mtlr r0 41 41 blr 42 42 43 - _GLOBAL(call_handle_irq) 44 - ld r8,0(r6) 43 + _GLOBAL(call_do_irq) 45 44 mflr r0 46 45 std r0,16(r1) 47 - mtctr r8 48 - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) 49 - mr r1,r5 50 - bctrl 46 + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) 47 + mr r1,r4 48 + bl .__do_irq 51 49 ld r1,0(r1) 52 50 ld r0,16(r1) 53 51 mtlr r0
+2 -1
arch/powerpc/kernel/process.c
··· 1000 1000 kregs = (struct pt_regs *) sp; 1001 1001 sp -= STACK_FRAME_OVERHEAD; 1002 1002 p->thread.ksp = sp; 1003 + #ifdef CONFIG_PPC32 1003 1004 p->thread.ksp_limit = (unsigned long)task_stack_page(p) + 1004 1005 _ALIGN_UP(sizeof(struct thread_info), 16); 1005 - 1006 + #endif 1006 1007 #ifdef CONFIG_HAVE_HW_BREAKPOINT 1007 1008 p->thread.ptrace_bps[0] = NULL; 1008 1009 #endif
+21
arch/powerpc/kernel/prom_init.c
··· 196 196 197 197 static cell_t __initdata regbuf[1024]; 198 198 199 + static bool rtas_has_query_cpu_stopped; 200 + 199 201 200 202 /* 201 203 * Error results ... some OF calls will return "-1" on error, some ··· 1576 1574 prom_setprop(rtas_node, "/rtas", "linux,rtas-entry", 1577 1575 &val, sizeof(val)); 1578 1576 1577 + /* Check if it supports "query-cpu-stopped-state" */ 1578 + if (prom_getprop(rtas_node, "query-cpu-stopped-state", 1579 + &val, sizeof(val)) != PROM_ERROR) 1580 + rtas_has_query_cpu_stopped = true; 1581 + 1579 1582 #if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__) 1580 1583 /* PowerVN takeover hack */ 1581 1584 prom_rtas_data = base; ··· 1821 1814 unsigned long *acknowledge 1822 1815 = (void *) LOW_ADDR(__secondary_hold_acknowledge); 1823 1816 unsigned long secondary_hold = LOW_ADDR(__secondary_hold); 1817 + 1818 + /* 1819 + * On pseries, if RTAS supports "query-cpu-stopped-state", 1820 + * we skip this stage, the CPUs will be started by the 1821 + * kernel using RTAS. 1822 + */ 1823 + if ((of_platform == PLATFORM_PSERIES || 1824 + of_platform == PLATFORM_PSERIES_LPAR) && 1825 + rtas_has_query_cpu_stopped) { 1826 + prom_printf("prom_hold_cpus: skipped\n"); 1827 + return; 1828 + } 1824 1829 1825 1830 prom_debug("prom_hold_cpus: start...\n"); 1826 1831 prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); ··· 3030 3011 * On non-powermacs, put all CPUs in spin-loops. 3031 3012 * 3032 3013 * PowerMacs use a different mechanism to spin CPUs 3014 + * 3015 + * (This must be done after instanciating RTAS) 3033 3016 */ 3034 3017 if (of_platform != PLATFORM_POWERMAC && 3035 3018 of_platform != PLATFORM_OPAL)
+2 -1
arch/powerpc/lib/sstep.c
··· 1505 1505 */ 1506 1506 if ((ra == 1) && !(regs->msr & MSR_PR) \ 1507 1507 && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { 1508 + #ifdef CONFIG_PPC32 1508 1509 /* 1509 1510 * Check if we will touch kernel sack overflow 1510 1511 */ ··· 1514 1513 err = -EINVAL; 1515 1514 break; 1516 1515 } 1517 - 1516 + #endif /* CONFIG_PPC32 */ 1518 1517 /* 1519 1518 * Check if we already set since that means we'll 1520 1519 * lose the previous value.
+16 -10
arch/powerpc/platforms/pseries/smp.c
··· 233 233 234 234 alloc_bootmem_cpumask_var(&of_spin_mask); 235 235 236 - /* Mark threads which are still spinning in hold loops. */ 237 - if (cpu_has_feature(CPU_FTR_SMT)) { 238 - for_each_present_cpu(i) { 239 - if (cpu_thread_in_core(i) == 0) 240 - cpumask_set_cpu(i, of_spin_mask); 241 - } 242 - } else { 243 - cpumask_copy(of_spin_mask, cpu_present_mask); 244 - } 236 + /* 237 + * Mark threads which are still spinning in hold loops 238 + * 239 + * We know prom_init will not have started them if RTAS supports 240 + * query-cpu-stopped-state. 241 + */ 242 + if (rtas_token("query-cpu-stopped-state") == RTAS_UNKNOWN_SERVICE) { 243 + if (cpu_has_feature(CPU_FTR_SMT)) { 244 + for_each_present_cpu(i) { 245 + if (cpu_thread_in_core(i) == 0) 246 + cpumask_set_cpu(i, of_spin_mask); 247 + } 248 + } else 249 + cpumask_copy(of_spin_mask, cpu_present_mask); 245 250 246 - cpumask_clear_cpu(boot_cpuid, of_spin_mask); 251 + cpumask_clear_cpu(boot_cpuid, of_spin_mask); 252 + } 247 253 248 254 /* Non-lpar has additional take/give timebase */ 249 255 if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {