Merge branch 'for-2.6.25' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc

* 'for-2.6.25' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc:
[POWERPC] Add arch-specific walk_memory_remove() for 64-bit powerpc
[POWERPC] Enable hotplug memory remove for 64-bit powerpc
[POWERPC] Add remove_memory() for 64-bit powerpc
[POWERPC] Make cell IOMMU fixed mapping printk more useful
[POWERPC] Fix potential cell IOMMU bug when switching back to default DMA ops
[POWERPC] Don't enable cell IOMMU fixed mapping if there are no dma-ranges
[POWERPC] Fix cell IOMMU null pointer explosion on old firmwares
[POWERPC] spufs: Fix timing dependent false return from spufs_run_spu
[POWERPC] spufs: No need to have a runnable SPU for libassist update
[POWERPC] spufs: Update SPU_Status[CISHP] in backing runcntl write
[POWERPC] spufs: Fix state_mutex leaks
[POWERPC] Disable G5 NAP mode during SMU commands on U3

+166 -60
+6
arch/powerpc/Kconfig
··· 272 272 config ARCH_ENABLE_MEMORY_HOTPLUG 273 273 def_bool y 274 274 275 + config ARCH_HAS_WALK_MEMORY 276 + def_bool y 277 + 278 + config ARCH_ENABLE_MEMORY_HOTREMOVE 279 + def_bool y 280 + 275 281 config KEXEC 276 282 bool "kexec system call (EXPERIMENTAL)" 277 283 depends on (PPC_PRPMC2800 || PPC_MULTIPLATFORM) && EXPERIMENTAL
+33
arch/powerpc/mm/mem.c
··· 129 129 return __add_pages(zone, start_pfn, nr_pages); 130 130 } 131 131 132 + #ifdef CONFIG_MEMORY_HOTREMOVE 133 + int remove_memory(u64 start, u64 size) 134 + { 135 + unsigned long start_pfn, end_pfn; 136 + int ret; 137 + 138 + start_pfn = start >> PAGE_SHIFT; 139 + end_pfn = start_pfn + (size >> PAGE_SHIFT); 140 + ret = offline_pages(start_pfn, end_pfn, 120 * HZ); 141 + if (ret) 142 + goto out; 143 + /* Arch-specific calls go here - next patch */ 144 + out: 145 + return ret; 146 + } 147 + #endif /* CONFIG_MEMORY_HOTREMOVE */ 148 + 149 + /* 150 + * walk_memory_resource() needs to make sure there is no holes in a given 151 + * memory range. On PPC64, since this range comes from /sysfs, the range 152 + * is guaranteed to be valid, non-overlapping and can not contain any 153 + * holes. By the time we get here (memory add or remove), /proc/device-tree 154 + * is updated and correct. Only reason we need to check against device-tree 155 + * would be if we allow user-land to specify a memory range through a 156 + * system call/ioctl etc. instead of doing offline/online through /sysfs. 157 + */ 158 + int 159 + walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg, 160 + int (*func)(unsigned long, unsigned long, void *)) 161 + { 162 + return (*func)(start_pfn, nr_pages, arg); 163 + } 164 + 132 165 #endif /* CONFIG_MEMORY_HOTPLUG */ 133 166 134 167 void show_mem(void)
+28 -20
arch/powerpc/platforms/cell/iommu.c
··· 26 26 #include <linux/init.h> 27 27 #include <linux/interrupt.h> 28 28 #include <linux/notifier.h> 29 + #include <linux/of.h> 29 30 #include <linux/of_platform.h> 30 31 31 32 #include <asm/prom.h> ··· 790 789 static u64 cell_iommu_get_fixed_address(struct device *dev) 791 790 { 792 791 u64 cpu_addr, size, best_size, pci_addr = OF_BAD_ADDR; 793 - struct device_node *tmp, *np; 792 + struct device_node *np; 794 793 const u32 *ranges = NULL; 795 794 int i, len, best; 796 795 797 - np = dev->archdata.of_node; 798 - of_node_get(np); 799 - ranges = of_get_property(np, "dma-ranges", &len); 800 - while (!ranges && np) { 801 - tmp = of_get_parent(np); 802 - of_node_put(np); 803 - np = tmp; 796 + np = of_node_get(dev->archdata.of_node); 797 + while (np) { 804 798 ranges = of_get_property(np, "dma-ranges", &len); 799 + if (ranges) 800 + break; 801 + np = of_get_next_parent(np); 805 802 } 806 803 807 804 if (!ranges) { ··· 841 842 if (!dev->dma_mask || !dma_supported(dev, dma_mask)) 842 843 return -EIO; 843 844 844 - if (dma_mask == DMA_BIT_MASK(64)) { 845 - if (cell_iommu_get_fixed_address(dev) == OF_BAD_ADDR) 846 - dev_dbg(dev, "iommu: 64-bit OK, but bad addr\n"); 847 - else { 848 - dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); 849 - set_dma_ops(dev, &dma_iommu_fixed_ops); 850 - cell_dma_dev_setup(dev); 851 - } 845 + if (dma_mask == DMA_BIT_MASK(64) && 846 + cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR) 847 + { 848 + dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); 849 + set_dma_ops(dev, &dma_iommu_fixed_ops); 852 850 } else { 853 851 dev_dbg(dev, "iommu: not 64-bit, using default ops\n"); 854 852 set_dma_ops(dev, get_pci_dma_ops()); 855 853 } 854 + 855 + cell_dma_dev_setup(dev); 856 856 857 857 *dev->dma_mask = dma_mask; 858 858 ··· 913 915 np = of_find_node_by_name(NULL, "axon"); 914 916 if (!np) { 915 917 pr_debug("iommu: fixed mapping disabled, no axons found\n"); 918 + return -1; 919 + } 920 + 921 + /* We must have dma-ranges properties for fixed mapping to work */ 922 + for (np = NULL; (np = of_find_all_nodes(np));) { 923 + if (of_find_property(np, "dma-ranges", NULL)) 924 + break; 925 + } 926 + of_node_put(np); 927 + 928 + if (!np) { 929 + pr_debug("iommu: no dma-ranges found, no fixed mapping\n"); 916 930 return -1; 917 931 } 918 932 ··· 991 981 dsize = htab_size_bytes; 992 982 } 993 983 994 - pr_debug("iommu: setting up %d, dynamic window %lx-%lx " \ 995 - "fixed window %lx-%lx\n", iommu->nid, dbase, 984 + printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx " 985 + "fixed window 0x%lx-0x%lx\n", iommu->nid, dbase, 996 986 dbase + dsize, fbase, fbase + fsize); 997 987 998 988 cell_iommu_setup_page_tables(iommu, dbase, dsize, fbase, fsize); ··· 1007 997 1008 998 dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch; 1009 999 set_pci_dma_ops(&dma_iommu_ops); 1010 - 1011 - printk(KERN_DEBUG "IOMMU fixed mapping established.\n"); 1012 1000 1013 1001 return 0; 1014 1002 }
+6
arch/powerpc/platforms/cell/spufs/backing_ops.c
··· 288 288 spin_lock(&ctx->csa.register_lock); 289 289 ctx->csa.prob.spu_runcntl_RW = val; 290 290 if (val & SPU_RUNCNTL_RUNNABLE) { 291 + ctx->csa.prob.spu_status_R &= 292 + ~SPU_STATUS_STOPPED_BY_STOP & 293 + ~SPU_STATUS_STOPPED_BY_HALT & 294 + ~SPU_STATUS_SINGLE_STEP & 295 + ~SPU_STATUS_INVALID_INSTR & 296 + ~SPU_STATUS_INVALID_CH; 291 297 ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING; 292 298 } else { 293 299 ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING;
+4 -8
arch/powerpc/platforms/cell/spufs/fault.c
··· 108 108 u64 ea, dsisr, access; 109 109 unsigned long flags; 110 110 unsigned flt = 0; 111 - int ret, ret2; 111 + int ret; 112 112 113 113 /* 114 114 * dar and dsisr get passed from the registers ··· 148 148 ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt); 149 149 150 150 /* 151 - * If spu_acquire fails due to a pending signal we just want to return 152 - * EINTR to userspace even if that means missing the dma restart or 153 - * updating the page fault statistics. 151 + * This is nasty: we need the state_mutex for all the bookkeeping even 152 + * if the syscall was interrupted by a signal. ewww. 154 153 */ 155 - ret2 = spu_acquire(ctx); 156 - if (ret2) 157 - goto out; 154 + mutex_lock(&ctx->state_mutex); 158 155 159 156 /* 160 157 * Clear dsisr under ctxt lock after handling the fault, so that ··· 182 185 } else 183 186 spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); 184 187 185 - out: 186 188 spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); 187 189 return ret; 188 190 }
+29 -20
arch/powerpc/platforms/cell/spufs/file.c
··· 358 358 { 359 359 struct spu_context *ctx = vma->vm_file->private_data; 360 360 unsigned long area, offset = address - vma->vm_start; 361 + int ret = 0; 361 362 362 363 spu_context_nospu_trace(spufs_ps_nopfn__enter, ctx); 363 364 ··· 380 379 if (ctx->state == SPU_STATE_SAVED) { 381 380 up_read(&current->mm->mmap_sem); 382 381 spu_context_nospu_trace(spufs_ps_nopfn__sleep, ctx); 383 - spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE); 382 + ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE); 384 383 spu_context_trace(spufs_ps_nopfn__wake, ctx, ctx->spu); 385 384 down_read(&current->mm->mmap_sem); 386 385 } else { ··· 389 388 spu_context_trace(spufs_ps_nopfn__insert, ctx, ctx->spu); 390 389 } 391 390 392 - spu_release(ctx); 391 + if (!ret) 392 + spu_release(ctx); 393 393 return NOPFN_REFAULT; 394 394 } 395 395 ··· 757 755 758 756 count = spu_acquire(ctx); 759 757 if (count) 760 - return count; 758 + goto out; 761 759 762 760 /* wait only for the first element */ 763 761 count = 0; 764 762 if (file->f_flags & O_NONBLOCK) { 765 - if (!spu_ibox_read(ctx, &ibox_data)) 763 + if (!spu_ibox_read(ctx, &ibox_data)) { 766 764 count = -EAGAIN; 765 + goto out_unlock; 766 + } 767 767 } else { 768 768 count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data)); 769 + if (count) 770 + goto out; 769 771 } 770 - if (count) 771 - goto out; 772 772 773 773 /* if we can't write at all, return -EFAULT */ 774 774 count = __put_user(ibox_data, udata); 775 775 if (count) 776 - goto out; 776 + goto out_unlock; 777 777 778 778 for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) { 779 779 int ret; ··· 792 788 break; 793 789 } 794 790 795 - out: 791 + out_unlock: 796 792 spu_release(ctx); 797 - 793 + out: 798 794 return count; 799 795 } 800 796 ··· 909 905 910 906 count = spu_acquire(ctx); 911 907 if (count) 912 - return count; 908 + goto out; 913 909 914 910 /* 915 911 * make sure we can at least write one element, by waiting ··· 917 913 */ 918 914 count = 0; 919 915 if (file->f_flags & O_NONBLOCK) { 920 - if (!spu_wbox_write(ctx, wbox_data)) 916 + if (!spu_wbox_write(ctx, wbox_data)) { 921 917 count = -EAGAIN; 918 + goto out_unlock; 919 + } 922 920 } else { 923 921 count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data)); 922 + if (count) 923 + goto out; 924 924 } 925 925 926 - if (count) 927 - goto out; 928 926 929 927 /* write as much as possible */ 930 928 for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) { ··· 940 934 break; 941 935 } 942 936 943 - out: 937 + out_unlock: 944 938 spu_release(ctx); 939 + out: 945 940 return count; 946 941 } 947 942 ··· 1605 1598 } else { 1606 1599 ret = spufs_wait(ctx->mfc_wq, 1607 1600 spufs_read_mfc_tagstatus(ctx, &status)); 1601 + if (ret) 1602 + goto out; 1608 1603 } 1609 1604 spu_release(ctx); 1610 - 1611 - if (ret) 1612 - goto out; 1613 1605 1614 1606 ret = 4; 1615 1607 if (copy_to_user(buffer, &status, 4)) ··· 1738 1732 int status; 1739 1733 ret = spufs_wait(ctx->mfc_wq, 1740 1734 spu_send_mfc_command(ctx, cmd, &status)); 1735 + if (ret) 1736 + goto out; 1741 1737 if (status) 1742 1738 ret = status; 1743 1739 } ··· 1793 1785 1794 1786 ret = spu_acquire(ctx); 1795 1787 if (ret) 1796 - return ret; 1788 + goto out; 1797 1789 #if 0 1798 1790 /* this currently hangs */ 1799 1791 ret = spufs_wait(ctx->mfc_wq, ··· 1802 1794 goto out; 1803 1795 ret = spufs_wait(ctx->mfc_wq, 1804 1796 ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait); 1805 - out: 1797 + if (ret) 1798 + goto out; 1806 1799 #else 1807 1800 ret = 0; 1808 1801 #endif 1809 1802 spu_release(ctx); 1810 - 1803 + out: 1811 1804 return ret; 1812 1805 } 1813 1806
+13 -8
arch/powerpc/platforms/cell/spufs/run.c
··· 53 53 54 54 stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP | 55 55 SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP; 56 - if (*stat & stopped) 56 + if (!(*stat & SPU_STATUS_RUNNING) && (*stat & stopped)) 57 57 return 1; 58 58 59 59 dsisr = ctx->csa.dsisr; ··· 354 354 355 355 do { 356 356 ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status)); 357 - if (unlikely(ret)) 357 + if (unlikely(ret)) { 358 + /* 359 + * This is nasty: we need the state_mutex for all the 360 + * bookkeeping even if the syscall was interrupted by 361 + * a signal. ewww. 362 + */ 363 + mutex_lock(&ctx->state_mutex); 358 364 break; 365 + } 359 366 spu = ctx->spu; 360 367 if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE, 361 368 &ctx->sched_flags))) { ··· 395 388 SPU_STATUS_STOPPED_BY_HALT | 396 389 SPU_STATUS_SINGLE_STEP))); 397 390 398 - if ((status & SPU_STATUS_STOPPED_BY_STOP) && 399 - (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100) && 400 - (ctx->state == SPU_STATE_RUNNABLE)) 401 - ctx->stats.libassist++; 402 - 403 - 404 391 spu_disable_spu(ctx); 405 392 ret = spu_run_fini(ctx, npc, &status); 406 393 spu_yield(ctx); 394 + 395 + if ((status & SPU_STATUS_STOPPED_BY_STOP) && 396 + (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100)) 397 + ctx->stats.libassist++; 407 398 408 399 if ((ret == 0) || 409 400 ((ret == -ERESTARTSYS) &&
+4 -1
arch/powerpc/platforms/cell/spufs/spufs.h
··· 268 268 * Same as wait_event_interruptible(), except that here 269 269 * we need to call spu_release(ctx) before sleeping, and 270 270 * then spu_acquire(ctx) when awoken. 271 + * 272 + * Returns with state_mutex re-acquired when successfull or 273 + * with -ERESTARTSYS and the state_mutex dropped when interrupted. 271 274 */ 272 275 273 276 #define spufs_wait(wq, condition) \ ··· 281 278 prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE); \ 282 279 if (condition) \ 283 280 break; \ 281 + spu_release(ctx); \ 284 282 if (signal_pending(current)) { \ 285 283 __ret = -ERESTARTSYS; \ 286 284 break; \ 287 285 } \ 288 - spu_release(ctx); \ 289 286 schedule(); \ 290 287 __ret = spu_acquire(ctx); \ 291 288 if (__ret) \
+10 -1
arch/powerpc/platforms/powermac/feature.c
··· 2565 2565 2566 2566 /* Locate core99 Uni-N */ 2567 2567 uninorth_node = of_find_node_by_name(NULL, "uni-n"); 2568 + uninorth_maj = 1; 2569 + 2568 2570 /* Locate G5 u3 */ 2569 2571 if (uninorth_node == NULL) { 2570 2572 uninorth_node = of_find_node_by_name(NULL, "u3"); ··· 2577 2575 uninorth_node = of_find_node_by_name(NULL, "u4"); 2578 2576 uninorth_maj = 4; 2579 2577 } 2580 - if (uninorth_node == NULL) 2578 + if (uninorth_node == NULL) { 2579 + uninorth_maj = 0; 2581 2580 return; 2581 + } 2582 2582 2583 2583 addrp = of_get_property(uninorth_node, "reg", NULL); 2584 2584 if (addrp == NULL) ··· 3033 3029 pmac_agp_resume(pmac_agp_bridge); 3034 3030 } 3035 3031 EXPORT_SYMBOL(pmac_resume_agp_for_card); 3032 + 3033 + int pmac_get_uninorth_variant(void) 3034 + { 3035 + return uninorth_maj; 3036 + }
+24 -1
drivers/macintosh/smu.c
··· 85 85 u32 cmd_buf_abs; /* command buffer absolute */ 86 86 struct list_head cmd_list; 87 87 struct smu_cmd *cmd_cur; /* pending command */ 88 + int broken_nap; 88 89 struct list_head cmd_i2c_list; 89 90 struct smu_i2c_cmd *cmd_i2c_cur; /* pending i2c command */ 90 91 struct timer_list i2c_timer; ··· 135 134 faddr = (unsigned long)smu->cmd_buf; 136 135 fend = faddr + smu->cmd_buf->length + 2; 137 136 flush_inval_dcache_range(faddr, fend); 137 + 138 + 139 + /* We also disable NAP mode for the duration of the command 140 + * on U3 based machines. 141 + * This is slightly racy as it can be written back to 1 by a sysctl 142 + * but that never happens in practice. There seem to be an issue with 143 + * U3 based machines such as the iMac G5 where napping for the 144 + * whole duration of the command prevents the SMU from fetching it 145 + * from memory. This might be related to the strange i2c based 146 + * mechanism the SMU uses to access memory. 147 + */ 148 + if (smu->broken_nap) 149 + powersave_nap = 0; 138 150 139 151 /* This isn't exactly a DMA mapping here, I suspect 140 152 * the SMU is actually communicating with us via i2c to the ··· 225 211 misc = cmd->misc; 226 212 mb(); 227 213 cmd->status = rc; 214 + 215 + /* Re-enable NAP mode */ 216 + if (smu->broken_nap) 217 + powersave_nap = 1; 228 218 bail: 229 219 /* Start next command if any */ 230 220 smu_start_cmd(); ··· 479 461 if (np == NULL) 480 462 return -ENODEV; 481 463 482 - printk(KERN_INFO "SMU driver %s %s\n", VERSION, AUTHOR); 464 + printk(KERN_INFO "SMU: Driver %s %s\n", VERSION, AUTHOR); 483 465 484 466 if (smu_cmdbuf_abs == 0) { 485 467 printk(KERN_ERR "SMU: Command buffer not allocated !\n"); ··· 550 532 printk(KERN_ERR "SMU: Can't map doorbell buffer pointer !\n"); 551 533 goto fail; 552 534 } 535 + 536 + /* U3 has an issue with NAP mode when issuing SMU commands */ 537 + smu->broken_nap = pmac_get_uninorth_variant() < 4; 538 + if (smu->broken_nap) 539 + printk(KERN_INFO "SMU: using NAP mode workaround\n"); 553 540 554 541 sys_ctrler = SYS_CTRLER_SMU; 555 542 return 0;
+8
include/asm-powerpc/pmac_feature.h
··· 392 392 #define UN_BIS(r,v) (UN_OUT((r), UN_IN(r) | (v))) 393 393 #define UN_BIC(r,v) (UN_OUT((r), UN_IN(r) & ~(v))) 394 394 395 + /* Uninorth variant: 396 + * 397 + * 0 = not uninorth 398 + * 1 = U1.x or U2.x 399 + * 3 = U3 400 + * 4 = U4 401 + */ 402 + extern int pmac_get_uninorth_variant(void); 395 403 396 404 #endif /* __ASM_POWERPC_PMAC_FEATURE_H */ 397 405 #endif /* __KERNEL__ */
+1 -1
kernel/resource.c
··· 228 228 229 229 EXPORT_SYMBOL(release_resource); 230 230 231 - #ifdef CONFIG_MEMORY_HOTPLUG 231 + #if defined(CONFIG_MEMORY_HOTPLUG) && !defined(CONFIG_ARCH_HAS_WALK_MEMORY) 232 232 /* 233 233 * Finds the lowest memory reosurce exists within [res->start.res->end) 234 234 * the caller must specify res->start, res->end, res->flags.