[POWERPC] Fix spu SLB invalidations

The SPU code doesn't properly invalidate SPUs SLBs when necessary,
for example when changing a segment size from the hugetlbfs code. In
addition, it saves and restores the SLB content on context switches
which makes it harder to properly handle those invalidations.

This patch removes the saving & restoring for now, something more
efficient might be found later on. It also adds a spu_flush_all_slbs(mm)
that can be used by the core mm code to flush the SLBs of all SPEs that
are running a given mm at the time of the flush.

In order to do that, it adds a spinlock to the list of all SPEs and move
some bits & pieces from spufs to spu_base.c

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

authored by Benjamin Herrenschmidt and committed by Arnd Bergmann 94b2a439 50b520d4

+91 -86
+6
arch/powerpc/mm/hash_utils_64.c
··· 685 "non-cacheable mapping\n"); 686 psize = mmu_vmalloc_psize = MMU_PAGE_4K; 687 } 688 } 689 if (user_region) { 690 if (psize != get_paca()->context.user_psize) { ··· 762 mmu_psize_defs[MMU_PAGE_4K].sllp; 763 get_paca()->context = mm->context; 764 slb_flush_and_rebolt(); 765 } 766 } 767 if (mm->context.user_psize == MMU_PAGE_64K)
··· 685 "non-cacheable mapping\n"); 686 psize = mmu_vmalloc_psize = MMU_PAGE_4K; 687 } 688 + #ifdef CONFIG_SPE_BASE 689 + spu_flush_all_slbs(mm); 690 + #endif 691 } 692 if (user_region) { 693 if (psize != get_paca()->context.user_psize) { ··· 759 mmu_psize_defs[MMU_PAGE_4K].sllp; 760 get_paca()->context = mm->context; 761 slb_flush_and_rebolt(); 762 + #ifdef CONFIG_SPE_BASE 763 + spu_flush_all_slbs(mm); 764 + #endif 765 } 766 } 767 if (mm->context.user_psize == MMU_PAGE_64K)
+4
arch/powerpc/mm/hugetlbpage.c
··· 24 #include <asm/machdep.h> 25 #include <asm/cputable.h> 26 #include <asm/tlb.h> 27 28 #include <linux/sysctl.h> 29 ··· 514 if ((addr + len) > 0x100000000UL) 515 err = open_high_hpage_areas(current->mm, 516 HTLB_AREA_MASK(addr, len)); 517 if (err) { 518 printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" 519 " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
··· 24 #include <asm/machdep.h> 25 #include <asm/cputable.h> 26 #include <asm/tlb.h> 27 + #include <asm/spu.h> 28 29 #include <linux/sysctl.h> 30 ··· 513 if ((addr + len) > 0x100000000UL) 514 err = open_high_hpage_areas(current->mm, 515 HTLB_AREA_MASK(addr, len)); 516 + #ifdef CONFIG_SPE_BASE 517 + spu_flush_all_slbs(current->mm); 518 + #endif 519 if (err) { 520 printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" 521 " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
+69 -12
arch/powerpc/platforms/cell/spu_base.c
··· 38 const struct spu_management_ops *spu_management_ops; 39 const struct spu_priv1_ops *spu_priv1_ops; 40 41 EXPORT_SYMBOL_GPL(spu_priv1_ops); 42 43 static int __spu_trap_invalid_dma(struct spu *spu) 44 { ··· 127 struct spu_priv2 __iomem *priv2 = spu->priv2; 128 struct mm_struct *mm = spu->mm; 129 u64 esid, vsid, llp; 130 131 pr_debug("%s\n", __FUNCTION__); 132 ··· 144 case USER_REGION_ID: 145 #ifdef CONFIG_HUGETLB_PAGE 146 if (in_hugepage_area(mm->context, ea)) 147 - llp = mmu_psize_defs[mmu_huge_psize].sllp; 148 else 149 #endif 150 - llp = mmu_psize_defs[mmu_virtual_psize].sllp; 151 vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | 152 - SLB_VSID_USER | llp; 153 break; 154 case VMALLOC_REGION_ID: 155 - llp = mmu_psize_defs[mmu_virtual_psize].sllp; 156 vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | 157 - SLB_VSID_KERNEL | llp; 158 break; 159 case KERNEL_REGION_ID: 160 - llp = mmu_psize_defs[mmu_linear_psize].sllp; 161 vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | 162 - SLB_VSID_KERNEL | llp; 163 break; 164 default: 165 /* Future: support kernel segments so that drivers ··· 171 pr_debug("invalid region access at %016lx\n", ea); 172 return 1; 173 } 174 175 out_be64(&priv2->slb_index_W, spu->slb_replace); 176 - out_be64(&priv2->slb_vsid_RW, vsid); 177 out_be64(&priv2->slb_esid_RW, esid); 178 179 spu->slb_replace++; ··· 387 if (spu->irqs[2] != NO_IRQ) 388 free_irq(spu->irqs[2], spu); 389 } 390 - 391 - static struct list_head spu_list[MAX_NUMNODES]; 392 - static LIST_HEAD(spu_full_list); 393 - static DEFINE_MUTEX(spu_mutex); 394 395 static void spu_init_channels(struct spu *spu) 396 { ··· 647 struct spu *spu; 648 int ret; 649 static int number; 650 651 ret = -ENOMEM; 652 spu = kzalloc(sizeof (*spu), GFP_KERNEL); ··· 675 goto out_free_irqs; 676 677 mutex_lock(&spu_mutex); 678 list_add(&spu->list, &spu_list[spu->node]); 679 list_add(&spu->full_list, &spu_full_list); 680 mutex_unlock(&spu_mutex); 681 682 goto out;
··· 38 const struct spu_management_ops *spu_management_ops; 39 const struct spu_priv1_ops *spu_priv1_ops; 40 41 + static struct list_head spu_list[MAX_NUMNODES]; 42 + static LIST_HEAD(spu_full_list); 43 + static DEFINE_MUTEX(spu_mutex); 44 + static spinlock_t spu_list_lock = SPIN_LOCK_UNLOCKED; 45 + 46 EXPORT_SYMBOL_GPL(spu_priv1_ops); 47 + 48 + void spu_invalidate_slbs(struct spu *spu) 49 + { 50 + struct spu_priv2 __iomem *priv2 = spu->priv2; 51 + 52 + if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) 53 + out_be64(&priv2->slb_invalidate_all_W, 0UL); 54 + } 55 + EXPORT_SYMBOL_GPL(spu_invalidate_slbs); 56 + 57 + /* This is called by the MM core when a segment size is changed, to 58 + * request a flush of all the SPEs using a given mm 59 + */ 60 + void spu_flush_all_slbs(struct mm_struct *mm) 61 + { 62 + struct spu *spu; 63 + unsigned long flags; 64 + 65 + spin_lock_irqsave(&spu_list_lock, flags); 66 + list_for_each_entry(spu, &spu_full_list, full_list) { 67 + if (spu->mm == mm) 68 + spu_invalidate_slbs(spu); 69 + } 70 + spin_unlock_irqrestore(&spu_list_lock, flags); 71 + } 72 + 73 + /* The hack below stinks... try to do something better one of 74 + * these days... Does it even work properly with NR_CPUS == 1 ? 75 + */ 76 + static inline void mm_needs_global_tlbie(struct mm_struct *mm) 77 + { 78 + int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1; 79 + 80 + /* Global TLBIE broadcast required with SPEs. */ 81 + __cpus_setall(&mm->cpu_vm_mask, nr); 82 + } 83 + 84 + void spu_associate_mm(struct spu *spu, struct mm_struct *mm) 85 + { 86 + unsigned long flags; 87 + 88 + spin_lock_irqsave(&spu_list_lock, flags); 89 + spu->mm = mm; 90 + spin_unlock_irqrestore(&spu_list_lock, flags); 91 + if (mm) 92 + mm_needs_global_tlbie(mm); 93 + } 94 + EXPORT_SYMBOL_GPL(spu_associate_mm); 95 96 static int __spu_trap_invalid_dma(struct spu *spu) 97 { ··· 74 struct spu_priv2 __iomem *priv2 = spu->priv2; 75 struct mm_struct *mm = spu->mm; 76 u64 esid, vsid, llp; 77 + int psize; 78 79 pr_debug("%s\n", __FUNCTION__); 80 ··· 90 case USER_REGION_ID: 91 #ifdef CONFIG_HUGETLB_PAGE 92 if (in_hugepage_area(mm->context, ea)) 93 + psize = mmu_huge_psize; 94 else 95 #endif 96 + psize = mm->context.user_psize; 97 vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | 98 + SLB_VSID_USER; 99 break; 100 case VMALLOC_REGION_ID: 101 + if (ea < VMALLOC_END) 102 + psize = mmu_vmalloc_psize; 103 + else 104 + psize = mmu_io_psize; 105 vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | 106 + SLB_VSID_KERNEL; 107 break; 108 case KERNEL_REGION_ID: 109 + psize = mmu_linear_psize; 110 vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | 111 + SLB_VSID_KERNEL; 112 break; 113 default: 114 /* Future: support kernel segments so that drivers ··· 114 pr_debug("invalid region access at %016lx\n", ea); 115 return 1; 116 } 117 + llp = mmu_psize_defs[psize].sllp; 118 119 out_be64(&priv2->slb_index_W, spu->slb_replace); 120 + out_be64(&priv2->slb_vsid_RW, vsid | llp); 121 out_be64(&priv2->slb_esid_RW, esid); 122 123 spu->slb_replace++; ··· 329 if (spu->irqs[2] != NO_IRQ) 330 free_irq(spu->irqs[2], spu); 331 } 332 333 static void spu_init_channels(struct spu *spu) 334 { ··· 593 struct spu *spu; 594 int ret; 595 static int number; 596 + unsigned long flags; 597 598 ret = -ENOMEM; 599 spu = kzalloc(sizeof (*spu), GFP_KERNEL); ··· 620 goto out_free_irqs; 621 622 mutex_lock(&spu_mutex); 623 + spin_lock_irqsave(&spu_list_lock, flags); 624 list_add(&spu->list, &spu_list[spu->node]); 625 list_add(&spu->full_list, &spu_full_list); 626 + spin_unlock_irqrestore(&spu_list_lock, flags); 627 mutex_unlock(&spu_mutex); 628 629 goto out;
+2 -11
arch/powerpc/platforms/cell/spufs/sched.c
··· 127 mutex_unlock(&spu_prio->active_mutex[node]); 128 } 129 130 - static inline void mm_needs_global_tlbie(struct mm_struct *mm) 131 - { 132 - int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1; 133 - 134 - /* Global TLBIE broadcast required with SPEs. */ 135 - __cpus_setall(&mm->cpu_vm_mask, nr); 136 - } 137 - 138 static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); 139 140 static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) ··· 159 ctx->spu = spu; 160 ctx->ops = &spu_hw_ops; 161 spu->pid = current->pid; 162 - spu->mm = ctx->owner; 163 - mm_needs_global_tlbie(spu->mm); 164 spu->ibox_callback = spufs_ibox_callback; 165 spu->wbox_callback = spufs_wbox_callback; 166 spu->stop_callback = spufs_stop_callback; ··· 196 spu->stop_callback = NULL; 197 spu->mfc_callback = NULL; 198 spu->dma_callback = NULL; 199 - spu->mm = NULL; 200 spu->pid = 0; 201 ctx->ops = &spu_backing_ops; 202 ctx->spu = NULL;
··· 127 mutex_unlock(&spu_prio->active_mutex[node]); 128 } 129 130 static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); 131 132 static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) ··· 167 ctx->spu = spu; 168 ctx->ops = &spu_hw_ops; 169 spu->pid = current->pid; 170 + spu_associate_mm(spu, ctx->owner); 171 spu->ibox_callback = spufs_ibox_callback; 172 spu->wbox_callback = spufs_wbox_callback; 173 spu->stop_callback = spufs_stop_callback; ··· 205 spu->stop_callback = NULL; 206 spu->mfc_callback = NULL; 207 spu->dma_callback = NULL; 208 + spu_associate_mm(spu, NULL); 209 spu->pid = 0; 210 ctx->ops = &spu_backing_ops; 211 ctx->spu = NULL;
+3 -59
arch/powerpc/platforms/cell/spufs/switch.c
··· 468 MFC_CNTL_PURGE_DMA_COMPLETE); 469 } 470 471 - static inline void save_mfc_slbs(struct spu_state *csa, struct spu *spu) 472 - { 473 - struct spu_priv2 __iomem *priv2 = spu->priv2; 474 - int i; 475 - 476 - /* Save, Step 29: 477 - * If MFC_SR1[R]='1', save SLBs in CSA. 478 - */ 479 - if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) { 480 - csa->priv2.slb_index_W = in_be64(&priv2->slb_index_W); 481 - for (i = 0; i < 8; i++) { 482 - out_be64(&priv2->slb_index_W, i); 483 - eieio(); 484 - csa->slb_esid_RW[i] = in_be64(&priv2->slb_esid_RW); 485 - csa->slb_vsid_RW[i] = in_be64(&priv2->slb_vsid_RW); 486 - eieio(); 487 - } 488 - } 489 - } 490 - 491 static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu) 492 { 493 /* Save, Step 30: ··· 688 out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE); 689 } 690 691 - static inline void invalidate_slbs(struct spu_state *csa, struct spu *spu) 692 - { 693 - struct spu_priv2 __iomem *priv2 = spu->priv2; 694 - 695 - /* Save, Step 45: 696 - * Restore, Step 19: 697 - * If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All. 698 - */ 699 - if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) { 700 - out_be64(&priv2->slb_invalidate_all_W, 0UL); 701 - eieio(); 702 - } 703 - } 704 - 705 static inline void get_kernel_slb(u64 ea, u64 slb[2]) 706 { 707 u64 llp; ··· 731 * MFC_SR1[R]=1 (in other words, assume that 732 * translation is desired by OS environment). 733 */ 734 - invalidate_slbs(csa, spu); 735 get_kernel_slb((unsigned long)&spu_save_code[0], code_slb); 736 get_kernel_slb((unsigned long)csa->lscsa, lscsa_slb); 737 load_mfc_slb(spu, code_slb, 0); ··· 1684 } 1685 } 1686 1687 - static inline void restore_mfc_slbs(struct spu_state *csa, struct spu *spu) 1688 - { 1689 - struct spu_priv2 __iomem *priv2 = spu->priv2; 1690 - int i; 1691 - 1692 - /* Restore, Step 68: 1693 - * If MFC_SR1[R]='1', restore SLBs from CSA. 1694 - */ 1695 - if (csa->priv1.mfc_sr1_RW & MFC_STATE1_RELOCATE_MASK) { 1696 - for (i = 0; i < 8; i++) { 1697 - out_be64(&priv2->slb_index_W, i); 1698 - eieio(); 1699 - out_be64(&priv2->slb_esid_RW, csa->slb_esid_RW[i]); 1700 - out_be64(&priv2->slb_vsid_RW, csa->slb_vsid_RW[i]); 1701 - eieio(); 1702 - } 1703 - out_be64(&priv2->slb_index_W, csa->priv2.slb_index_W); 1704 - eieio(); 1705 - } 1706 - } 1707 - 1708 static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu) 1709 { 1710 /* Restore, Step 69: ··· 1820 set_mfc_tclass_id(prev, spu); /* Step 26. */ 1821 purge_mfc_queue(prev, spu); /* Step 27. */ 1822 wait_purge_complete(prev, spu); /* Step 28. */ 1823 - save_mfc_slbs(prev, spu); /* Step 29. */ 1824 setup_mfc_sr1(prev, spu); /* Step 30. */ 1825 save_spu_npc(prev, spu); /* Step 31. */ 1826 save_spu_privcntl(prev, spu); /* Step 32. */ ··· 1931 reset_spu_privcntl(prev, spu); /* Step 16. */ 1932 reset_spu_lslr(prev, spu); /* Step 17. */ 1933 setup_mfc_sr1(prev, spu); /* Step 18. */ 1934 - invalidate_slbs(prev, spu); /* Step 19. */ 1935 reset_ch_part1(prev, spu); /* Step 20. */ 1936 reset_ch_part2(prev, spu); /* Step 21. */ 1937 enable_interrupts(prev, spu); /* Step 22. */ ··· 1999 restore_spu_mb(next, spu); /* Step 65. */ 2000 check_ppu_mb_stat(next, spu); /* Step 66. */ 2001 check_ppuint_mb_stat(next, spu); /* Step 67. */ 2002 - restore_mfc_slbs(next, spu); /* Step 68. */ 2003 restore_mfc_sr1(next, spu); /* Step 69. */ 2004 restore_other_spu_access(next, spu); /* Step 70. */ 2005 restore_spu_runcntl(next, spu); /* Step 71. */
··· 468 MFC_CNTL_PURGE_DMA_COMPLETE); 469 } 470 471 static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu) 472 { 473 /* Save, Step 30: ··· 708 out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE); 709 } 710 711 static inline void get_kernel_slb(u64 ea, u64 slb[2]) 712 { 713 u64 llp; ··· 765 * MFC_SR1[R]=1 (in other words, assume that 766 * translation is desired by OS environment). 767 */ 768 + spu_invalidate_slbs(spu); 769 get_kernel_slb((unsigned long)&spu_save_code[0], code_slb); 770 get_kernel_slb((unsigned long)csa->lscsa, lscsa_slb); 771 load_mfc_slb(spu, code_slb, 0); ··· 1718 } 1719 } 1720 1721 static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu) 1722 { 1723 /* Restore, Step 69: ··· 1875 set_mfc_tclass_id(prev, spu); /* Step 26. */ 1876 purge_mfc_queue(prev, spu); /* Step 27. */ 1877 wait_purge_complete(prev, spu); /* Step 28. */ 1878 setup_mfc_sr1(prev, spu); /* Step 30. */ 1879 save_spu_npc(prev, spu); /* Step 31. */ 1880 save_spu_privcntl(prev, spu); /* Step 32. */ ··· 1987 reset_spu_privcntl(prev, spu); /* Step 16. */ 1988 reset_spu_lslr(prev, spu); /* Step 17. */ 1989 setup_mfc_sr1(prev, spu); /* Step 18. */ 1990 + spu_invalidate_slbs(spu); /* Step 19. */ 1991 reset_ch_part1(prev, spu); /* Step 20. */ 1992 reset_ch_part2(prev, spu); /* Step 21. */ 1993 enable_interrupts(prev, spu); /* Step 22. */ ··· 2055 restore_spu_mb(next, spu); /* Step 65. */ 2056 check_ppu_mb_stat(next, spu); /* Step 66. */ 2057 check_ppuint_mb_stat(next, spu); /* Step 67. */ 2058 + spu_invalidate_slbs(spu); /* Modified Step 68. */ 2059 restore_mfc_sr1(next, spu); /* Step 69. */ 2060 restore_other_spu_access(next, spu); /* Step 70. */ 2061 restore_spu_runcntl(next, spu); /* Step 71. */
+7
include/asm-powerpc/spu.h
··· 165 int spu_irq_class_1_bottom(struct spu *spu); 166 void spu_irq_setaffinity(struct spu *spu, int cpu); 167 168 /* system callbacks from the SPU */ 169 struct spu_syscall_block { 170 u64 nr_ret;
··· 165 int spu_irq_class_1_bottom(struct spu *spu); 166 void spu_irq_setaffinity(struct spu *spu, int cpu); 167 168 + extern void spu_invalidate_slbs(struct spu *spu); 169 + extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm); 170 + 171 + /* Calls from the memory management to the SPU */ 172 + struct mm_struct; 173 + extern void spu_flush_all_slbs(struct mm_struct *mm); 174 + 175 /* system callbacks from the SPU */ 176 struct spu_syscall_block { 177 u64 nr_ret;
-4
include/asm-powerpc/spu_csa.h
··· 221 * @spu_chnlcnt_RW: Array of saved channel counts. 222 * @spu_chnldata_RW: Array of saved channel data. 223 * @suspend_time: Time stamp when decrementer disabled. 224 - * @slb_esid_RW: Array of saved SLB esid entries. 225 - * @slb_vsid_RW: Array of saved SLB vsid entries. 226 * 227 * Structure representing the whole of the SPU 228 * context save area (CSA). This struct contains ··· 243 u32 spu_mailbox_data[4]; 244 u32 pu_mailbox_data[1]; 245 unsigned long suspend_time; 246 - u64 slb_esid_RW[8]; 247 - u64 slb_vsid_RW[8]; 248 spinlock_t register_lock; 249 }; 250
··· 221 * @spu_chnlcnt_RW: Array of saved channel counts. 222 * @spu_chnldata_RW: Array of saved channel data. 223 * @suspend_time: Time stamp when decrementer disabled. 224 * 225 * Structure representing the whole of the SPU 226 * context save area (CSA). This struct contains ··· 245 u32 spu_mailbox_data[4]; 246 u32 pu_mailbox_data[1]; 247 unsigned long suspend_time; 248 spinlock_t register_lock; 249 }; 250