Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'powerpc-4.16-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:
"A larger batch of fixes than we'd like. Roughly 1/3 fixes for new
code, 1/3 fixes for stable and 1/3 minor things.

There's four commits fixing bugs when using 16GB huge pages on hash,
caused by some of the preparatory changes for pkeys.

Two fixes for bugs in the enhanced IRQ soft masking for local_t, one
of which broke KVM in some circumstances.

Four fixes for Power9. The most bizarre being a bug where futexes
stopped working because a NULL pointer dereference didn't trap during
early boot (it aliased the kernel mapping). A fix for memory hotplug
when using the Radix MMU, and a fix for live migration of guests using
the Radix MMU.

Two fixes for hotplug on pseries machines. One where we weren't
correctly updating NUMA info when CPUs are added and removed. And the
other fixes crashes/hangs seen when doing memory hot remove during
boot, which is apparently a thing people do.

Finally a handful of build fixes for obscure configs and other minor
fixes.

Thanks to: Alexey Kardashevskiy, Aneesh Kumar K.V, Balbir Singh, Colin
Ian King, Daniel Henrique Barboza, Florian Weimer, Guenter Roeck,
Harish, Laurent Vivier, Madhavan Srinivasan, Mauricio Faria de
Oliveira, Nathan Fontenot, Nicholas Piggin, Sam Bobroff"

* tag 'powerpc-4.16-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
selftests/powerpc: Fix to use ucontext_t instead of struct ucontext
powerpc/kdump: Fix powernv build break when KEXEC_CORE=n
powerpc/pseries: Fix build break for SPLPAR=n and CPU hotplug
powerpc/mm/hash64: Zero PGD pages on allocation
powerpc/mm/hash64: Store the slot information at the right offset for hugetlb
powerpc/mm/hash64: Allocate larger PMD table if hugetlb config is enabled
powerpc/mm: Fix crashes with 16G huge pages
powerpc/mm: Flush radix process translations when setting MMU type
powerpc/vas: Don't set uses_vas for kernel windows
powerpc/pseries: Enable RAS hotplug events later
powerpc/mm/radix: Split linear mapping on hot-unplug
powerpc/64s/radix: Boot-time NULL pointer protection using a guard-PID
ocxl: fix signed comparison with less than zero
powerpc/64s: Fix may_hard_irq_enable() for PMI soft masking
powerpc/64s: Fix MASKABLE_RELON_EXCEPTION_HV_OOL macro
powerpc/numa: Invalidate numa_cpu_lookup_table on cpu remove

+231 -79
+1
arch/powerpc/include/asm/book3s/32/pgtable.h
··· 16 16 #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) 17 17 18 18 #define PMD_CACHE_INDEX PMD_INDEX_SIZE 19 + #define PUD_CACHE_INDEX PUD_INDEX_SIZE 19 20 20 21 #ifndef __ASSEMBLY__ 21 22 #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
+2 -1
arch/powerpc/include/asm/book3s/64/hash-4k.h
··· 63 63 * keeping the prototype consistent across the two formats. 64 64 */ 65 65 static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, 66 - unsigned int subpg_index, unsigned long hidx) 66 + unsigned int subpg_index, unsigned long hidx, 67 + int offset) 67 68 { 68 69 return (hidx << H_PAGE_F_GIX_SHIFT) & 69 70 (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+11 -5
arch/powerpc/include/asm/book3s/64/hash-64k.h
··· 45 45 * generic accessors and iterators here 46 46 */ 47 47 #define __real_pte __real_pte 48 - static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) 48 + static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset) 49 49 { 50 50 real_pte_t rpte; 51 51 unsigned long *hidxp; ··· 59 59 */ 60 60 smp_rmb(); 61 61 62 - hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); 62 + hidxp = (unsigned long *)(ptep + offset); 63 63 rpte.hidx = *hidxp; 64 64 return rpte; 65 65 } ··· 86 86 * expected to modify the PTE bits accordingly and commit the PTE to memory. 87 87 */ 88 88 static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, 89 - unsigned int subpg_index, unsigned long hidx) 89 + unsigned int subpg_index, 90 + unsigned long hidx, int offset) 90 91 { 91 - unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); 92 + unsigned long *hidxp = (unsigned long *)(ptep + offset); 92 93 93 94 rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index); 94 95 *hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index); ··· 141 140 } 142 141 143 142 #define H_PTE_TABLE_SIZE PTE_FRAG_SIZE 144 - #ifdef CONFIG_TRANSPARENT_HUGEPAGE 143 + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE) 145 144 #define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \ 146 145 (sizeof(unsigned long) << PMD_INDEX_SIZE)) 147 146 #else 148 147 #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) 149 148 #endif 149 + #ifdef CONFIG_HUGETLB_PAGE 150 + #define H_PUD_TABLE_SIZE ((sizeof(pud_t) << PUD_INDEX_SIZE) + \ 151 + (sizeof(unsigned long) << PUD_INDEX_SIZE)) 152 + #else 150 153 #define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) 154 + #endif 151 155 #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) 152 156 153 157 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+12 -1
arch/powerpc/include/asm/book3s/64/hash.h
··· 23 23 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) 24 24 #define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) 25 25 26 - #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES) 26 + #if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \ 27 + defined(CONFIG_PPC_64K_PAGES) 27 28 /* 28 29 * only with hash 64k we need to use the second half of pmd page table 29 30 * to store pointer to deposited pgtable_t ··· 32 31 #define H_PMD_CACHE_INDEX (H_PMD_INDEX_SIZE + 1) 33 32 #else 34 33 #define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE 34 + #endif 35 + /* 36 + * We store the slot details in the second half of page table. 37 + * Increase the pud level table so that hugetlb ptes can be stored 38 + * at pud level. 39 + */ 40 + #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES) 41 + #define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE + 1) 42 + #else 43 + #define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE) 35 44 #endif 36 45 /* 37 46 * Define the address range of the kernel non-linear virtual area
+11 -5
arch/powerpc/include/asm/book3s/64/pgalloc.h
··· 73 73 74 74 static inline pgd_t *pgd_alloc(struct mm_struct *mm) 75 75 { 76 + pgd_t *pgd; 77 + 76 78 if (radix_enabled()) 77 79 return radix__pgd_alloc(mm); 78 - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), 79 - pgtable_gfp_flags(mm, GFP_KERNEL)); 80 + 81 + pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), 82 + pgtable_gfp_flags(mm, GFP_KERNEL)); 83 + memset(pgd, 0, PGD_TABLE_SIZE); 84 + 85 + return pgd; 80 86 } 81 87 82 88 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) ··· 99 93 100 94 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) 101 95 { 102 - return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), 96 + return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX), 103 97 pgtable_gfp_flags(mm, GFP_KERNEL)); 104 98 } 105 99 106 100 static inline void pud_free(struct mm_struct *mm, pud_t *pud) 107 101 { 108 - kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); 102 + kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud); 109 103 } 110 104 111 105 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) ··· 121 115 * ahead and flush the page walk cache 122 116 */ 123 117 flush_tlb_pgtable(tlb, address); 124 - pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE); 118 + pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX); 125 119 } 126 120 127 121 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+3 -1
arch/powerpc/include/asm/book3s/64/pgtable.h
··· 232 232 extern unsigned long __pud_index_size; 233 233 extern unsigned long __pgd_index_size; 234 234 extern unsigned long __pmd_cache_index; 235 + extern unsigned long __pud_cache_index; 235 236 #define PTE_INDEX_SIZE __pte_index_size 236 237 #define PMD_INDEX_SIZE __pmd_index_size 237 238 #define PUD_INDEX_SIZE __pud_index_size 238 239 #define PGD_INDEX_SIZE __pgd_index_size 239 240 #define PMD_CACHE_INDEX __pmd_cache_index 241 + #define PUD_CACHE_INDEX __pud_cache_index 240 242 /* 241 243 * Because of use of pte fragments and THP, size of page table 242 244 * are not always derived out of index size above. ··· 350 348 */ 351 349 #ifndef __real_pte 352 350 353 - #define __real_pte(e,p) ((real_pte_t){(e)}) 351 + #define __real_pte(e, p, o) ((real_pte_t){(e)}) 354 352 #define __rpte_to_pte(r) ((r).pte) 355 353 #define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) 356 354
+1 -1
arch/powerpc/include/asm/exception-64s.h
··· 645 645 EXC_HV, SOFTEN_TEST_HV, bitmask) 646 646 647 647 #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \ 648 - MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\ 648 + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ 649 649 EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) 650 650 651 651 /*
+11 -1
arch/powerpc/include/asm/hw_irq.h
··· 30 30 #define PACA_IRQ_PMI 0x40 31 31 32 32 /* 33 + * Some soft-masked interrupts must be hard masked until they are replayed 34 + * (e.g., because the soft-masked handler does not clear the exception). 35 + */ 36 + #ifdef CONFIG_PPC_BOOK3S 37 + #define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI) 38 + #else 39 + #define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE) 40 + #endif 41 + 42 + /* 33 43 * flags for paca->irq_soft_mask 34 44 */ 35 45 #define IRQS_ENABLED 0 ··· 254 244 static inline void may_hard_irq_enable(void) 255 245 { 256 246 get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; 257 - if (!(get_paca()->irq_happened & PACA_IRQ_EE)) 247 + if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)) 258 248 __hard_irq_enable(); 259 249 } 260 250
+6
arch/powerpc/include/asm/kexec.h
··· 140 140 return false; 141 141 } 142 142 143 + static inline void crash_ipi_callback(struct pt_regs *regs) { } 144 + 145 + static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) 146 + { 147 + } 148 + 143 149 #endif /* CONFIG_KEXEC_CORE */ 144 150 #endif /* ! __ASSEMBLY__ */ 145 151 #endif /* __KERNEL__ */
+1
arch/powerpc/include/asm/nohash/32/pgtable.h
··· 24 24 #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) 25 25 26 26 #define PMD_CACHE_INDEX PMD_INDEX_SIZE 27 + #define PUD_CACHE_INDEX PUD_INDEX_SIZE 27 28 28 29 #ifndef __ASSEMBLY__ 29 30 #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
+1
arch/powerpc/include/asm/nohash/64/pgtable.h
··· 27 27 #else 28 28 #define PMD_CACHE_INDEX PMD_INDEX_SIZE 29 29 #endif 30 + #define PUD_CACHE_INDEX PUD_INDEX_SIZE 30 31 31 32 /* 32 33 * Define the address range of the kernel non-linear virtual area
+10
arch/powerpc/include/asm/topology.h
··· 44 44 extern void sysfs_remove_device_from_node(struct device *dev, int nid); 45 45 extern int numa_update_cpu_topology(bool cpus_locked); 46 46 47 + static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) 48 + { 49 + numa_cpu_lookup_table[cpu] = node; 50 + } 51 + 47 52 static inline int early_cpu_to_node(int cpu) 48 53 { 49 54 int nid; ··· 87 82 extern int start_topology_update(void); 88 83 extern int stop_topology_update(void); 89 84 extern int prrn_is_enabled(void); 85 + extern int find_and_online_cpu_nid(int cpu); 90 86 #else 91 87 static inline int start_topology_update(void) 92 88 { ··· 98 92 return 0; 99 93 } 100 94 static inline int prrn_is_enabled(void) 95 + { 96 + return 0; 97 + } 98 + static inline int find_and_online_cpu_nid(int cpu) 101 99 { 102 100 return 0; 103 101 }
+2
arch/powerpc/kernel/exceptions-64e.S
··· 943 943 /* 944 944 * An interrupt came in while soft-disabled; We mark paca->irq_happened 945 945 * accordingly and if the interrupt is level sensitive, we hard disable 946 + * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so 947 + * keep these in synch. 946 948 */ 947 949 948 950 .macro masked_interrupt_book3e paca_irq full_mask
+3 -3
arch/powerpc/kernel/exceptions-64s.S
··· 1426 1426 * triggered and won't automatically refire. 1427 1427 * - If it was a HMI we return immediately since we handled it in realmode 1428 1428 * and it won't refire. 1429 - * - else we hard disable and return. 1429 + * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return. 1430 1430 * This is called with r10 containing the value to OR to the paca field. 1431 1431 */ 1432 1432 #define MASKED_INTERRUPT(_H) \ ··· 1441 1441 ori r10,r10,0xffff; \ 1442 1442 mtspr SPRN_DEC,r10; \ 1443 1443 b MASKED_DEC_HANDLER_LABEL; \ 1444 - 1: andi. r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI); \ 1445 - bne 2f; \ 1444 + 1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK; \ 1445 + beq 2f; \ 1446 1446 mfspr r10,SPRN_##_H##SRR1; \ 1447 1447 xori r10,r10,MSR_EE; /* clear MSR_EE */ \ 1448 1448 mtspr SPRN_##_H##SRR1,r10; \
+2 -2
arch/powerpc/mm/hash64_4k.c
··· 55 55 * need to add in 0x1 if it's a read-only user page 56 56 */ 57 57 rflags = htab_convert_pte_flags(new_pte); 58 - rpte = __real_pte(__pte(old_pte), ptep); 58 + rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); 59 59 60 60 if (cpu_has_feature(CPU_FTR_NOEXECUTE) && 61 61 !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) ··· 117 117 return -1; 118 118 } 119 119 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; 120 - new_pte |= pte_set_hidx(ptep, rpte, 0, slot); 120 + new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); 121 121 } 122 122 *ptep = __pte(new_pte & ~H_PAGE_BUSY); 123 123 return 0;
+4 -4
arch/powerpc/mm/hash64_64k.c
··· 86 86 87 87 subpg_index = (ea & (PAGE_SIZE - 1)) >> shift; 88 88 vpn = hpt_vpn(ea, vsid, ssize); 89 - rpte = __real_pte(__pte(old_pte), ptep); 89 + rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); 90 90 /* 91 91 *None of the sub 4k page is hashed 92 92 */ ··· 214 214 return -1; 215 215 } 216 216 217 - new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot); 217 + new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE); 218 218 new_pte |= H_PAGE_HASHPTE; 219 219 220 220 *ptep = __pte(new_pte & ~H_PAGE_BUSY); ··· 262 262 } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); 263 263 264 264 rflags = htab_convert_pte_flags(new_pte); 265 - rpte = __real_pte(__pte(old_pte), ptep); 265 + rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); 266 266 267 267 if (cpu_has_feature(CPU_FTR_NOEXECUTE) && 268 268 !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) ··· 327 327 } 328 328 329 329 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; 330 - new_pte |= pte_set_hidx(ptep, rpte, 0, slot); 330 + new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); 331 331 } 332 332 *ptep = __pte(new_pte & ~H_PAGE_BUSY); 333 333 return 0;
+1
arch/powerpc/mm/hash_utils_64.c
··· 1008 1008 __pmd_index_size = H_PMD_INDEX_SIZE; 1009 1009 __pud_index_size = H_PUD_INDEX_SIZE; 1010 1010 __pgd_index_size = H_PGD_INDEX_SIZE; 1011 + __pud_cache_index = H_PUD_CACHE_INDEX; 1011 1012 __pmd_cache_index = H_PMD_CACHE_INDEX; 1012 1013 __pte_table_size = H_PTE_TABLE_SIZE; 1013 1014 __pmd_table_size = H_PMD_TABLE_SIZE;
+7 -3
arch/powerpc/mm/hugetlbpage-hash64.c
··· 27 27 unsigned long vpn; 28 28 unsigned long old_pte, new_pte; 29 29 unsigned long rflags, pa, sz; 30 - long slot; 30 + long slot, offset; 31 31 32 32 BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); 33 33 ··· 63 63 } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); 64 64 65 65 rflags = htab_convert_pte_flags(new_pte); 66 - rpte = __real_pte(__pte(old_pte), ptep); 66 + if (unlikely(mmu_psize == MMU_PAGE_16G)) 67 + offset = PTRS_PER_PUD; 68 + else 69 + offset = PTRS_PER_PMD; 70 + rpte = __real_pte(__pte(old_pte), ptep, offset); 67 71 68 72 sz = ((1UL) << shift); 69 73 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) ··· 108 104 return -1; 109 105 } 110 106 111 - new_pte |= pte_set_hidx(ptep, rpte, 0, slot); 107 + new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset); 112 108 } 113 109 114 110 /*
+2 -2
arch/powerpc/mm/init-common.c
··· 100 100 * same size as either the pgd or pmd index except with THP enabled 101 101 * on book3s 64 102 102 */ 103 - if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) 104 - pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); 103 + if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX)) 104 + pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor); 105 105 }
-5
arch/powerpc/mm/numa.c
··· 143 143 numa_cpu_lookup_table[cpu] = -1; 144 144 } 145 145 146 - static void update_numa_cpu_lookup_table(unsigned int cpu, int node) 147 - { 148 - numa_cpu_lookup_table[cpu] = node; 149 - } 150 - 151 146 static void map_cpu_to_node(int cpu, int node) 152 147 { 153 148 update_numa_cpu_lookup_table(cpu, node);
+95 -22
arch/powerpc/mm/pgtable-radix.c
··· 17 17 #include <linux/of_fdt.h> 18 18 #include <linux/mm.h> 19 19 #include <linux/string_helpers.h> 20 + #include <linux/stop_machine.h> 20 21 21 22 #include <asm/pgtable.h> 22 23 #include <asm/pgalloc.h> 24 + #include <asm/mmu_context.h> 23 25 #include <asm/dma.h> 24 26 #include <asm/machdep.h> 25 27 #include <asm/mmu.h> ··· 335 333 "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); 336 334 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 337 335 trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); 336 + 337 + /* 338 + * The init_mm context is given the first available (non-zero) PID, 339 + * which is the "guard PID" and contains no page table. PIDR should 340 + * never be set to zero because that duplicates the kernel address 341 + * space at the 0x0... offset (quadrant 0)! 342 + * 343 + * An arbitrary PID that may later be allocated by the PID allocator 344 + * for userspace processes must not be used either, because that 345 + * would cause stale user mappings for that PID on CPUs outside of 346 + * the TLB invalidation scheme (because it won't be in mm_cpumask). 347 + * 348 + * So permanently carve out one PID for the purpose of a guard PID. 349 + */ 350 + init_mm.context.id = mmu_base_pid; 351 + mmu_base_pid++; 338 352 } 339 353 340 354 static void __init radix_init_partition_table(void) ··· 553 535 __pmd_index_size = RADIX_PMD_INDEX_SIZE; 554 536 __pud_index_size = RADIX_PUD_INDEX_SIZE; 555 537 __pgd_index_size = RADIX_PGD_INDEX_SIZE; 538 + __pud_cache_index = RADIX_PUD_INDEX_SIZE; 556 539 __pmd_cache_index = RADIX_PMD_INDEX_SIZE; 557 540 __pte_table_size = RADIX_PTE_TABLE_SIZE; 558 541 __pmd_table_size = RADIX_PMD_TABLE_SIZE; ··· 598 579 599 580 radix_init_iamr(); 600 581 radix_init_pgtable(); 601 - 582 + /* Switch to the guard PID before turning on MMU */ 583 + radix__switch_mmu_context(NULL, &init_mm); 602 584 if (cpu_has_feature(CPU_FTR_HVMODE)) 603 585 tlbiel_all(); 604 586 } ··· 624 604 } 625 605 radix_init_iamr(); 626 606 607 + radix__switch_mmu_context(NULL, &init_mm); 627 608 if (cpu_has_feature(CPU_FTR_HVMODE)) 628 609 tlbiel_all(); 629 610 } ··· 687 666 pud_clear(pud); 688 667 } 689 668 669 + struct change_mapping_params { 670 + pte_t *pte; 671 + unsigned long start; 672 + unsigned long end; 673 + unsigned long aligned_start; 674 + unsigned long aligned_end; 675 + }; 676 + 677 + static int stop_machine_change_mapping(void *data) 678 + { 679 + struct change_mapping_params *params = 680 + (struct change_mapping_params *)data; 681 + 682 + if (!data) 683 + return -1; 684 + 685 + spin_unlock(&init_mm.page_table_lock); 686 + pte_clear(&init_mm, params->aligned_start, params->pte); 687 + create_physical_mapping(params->aligned_start, params->start); 688 + create_physical_mapping(params->end, params->aligned_end); 689 + spin_lock(&init_mm.page_table_lock); 690 + return 0; 691 + } 692 + 690 693 static void remove_pte_table(pte_t *pte_start, unsigned long addr, 691 694 unsigned long end) 692 695 { ··· 739 694 } 740 695 } 741 696 697 + /* 698 + * clear the pte and potentially split the mapping helper 699 + */ 700 + static void split_kernel_mapping(unsigned long addr, unsigned long end, 701 + unsigned long size, pte_t *pte) 702 + { 703 + unsigned long mask = ~(size - 1); 704 + unsigned long aligned_start = addr & mask; 705 + unsigned long aligned_end = addr + size; 706 + struct change_mapping_params params; 707 + bool split_region = false; 708 + 709 + if ((end - addr) < size) { 710 + /* 711 + * We're going to clear the PTE, but not flushed 712 + * the mapping, time to remap and flush. The 713 + * effects if visible outside the processor or 714 + * if we are running in code close to the 715 + * mapping we cleared, we are in trouble. 716 + */ 717 + if (overlaps_kernel_text(aligned_start, addr) || 718 + overlaps_kernel_text(end, aligned_end)) { 719 + /* 720 + * Hack, just return, don't pte_clear 721 + */ 722 + WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel " 723 + "text, not splitting\n", addr, end); 724 + return; 725 + } 726 + split_region = true; 727 + } 728 + 729 + if (split_region) { 730 + params.pte = pte; 731 + params.start = addr; 732 + params.end = end; 733 + params.aligned_start = addr & ~(size - 1); 734 + params.aligned_end = min_t(unsigned long, aligned_end, 735 + (unsigned long)__va(memblock_end_of_DRAM())); 736 + stop_machine(stop_machine_change_mapping, &params, NULL); 737 + return; 738 + } 739 + 740 + pte_clear(&init_mm, addr, pte); 741 + } 742 + 742 743 static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, 743 744 unsigned long end) 744 745 { ··· 800 709 continue; 801 710 802 711 if (pmd_huge(*pmd)) { 803 - if (!IS_ALIGNED(addr, PMD_SIZE) || 804 - !IS_ALIGNED(next, PMD_SIZE)) { 805 - WARN_ONCE(1, "%s: unaligned range\n", __func__); 806 - continue; 807 - } 808 - 809 - pte_clear(&init_mm, addr, (pte_t *)pmd); 712 + split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd); 810 713 continue; 811 714 } 812 715 ··· 825 740 continue; 826 741 827 742 if (pud_huge(*pud)) { 828 - if (!IS_ALIGNED(addr, PUD_SIZE) || 829 - !IS_ALIGNED(next, PUD_SIZE)) { 830 - WARN_ONCE(1, "%s: unaligned range\n", __func__); 831 - continue; 832 - } 833 - 834 - pte_clear(&init_mm, addr, (pte_t *)pud); 743 + split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud); 835 744 continue; 836 745 } 837 746 ··· 851 772 continue; 852 773 853 774 if (pgd_huge(*pgd)) { 854 - if (!IS_ALIGNED(addr, PGDIR_SIZE) || 855 - !IS_ALIGNED(next, PGDIR_SIZE)) { 856 - WARN_ONCE(1, "%s: unaligned range\n", __func__); 857 - continue; 858 - } 859 - 860 - pte_clear(&init_mm, addr, (pte_t *)pgd); 775 + split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd); 861 776 continue; 862 777 } 863 778
+4
arch/powerpc/mm/pgtable_64.c
··· 82 82 EXPORT_SYMBOL(__pgd_index_size); 83 83 unsigned long __pmd_cache_index; 84 84 EXPORT_SYMBOL(__pmd_cache_index); 85 + unsigned long __pud_cache_index; 86 + EXPORT_SYMBOL(__pud_cache_index); 85 87 unsigned long __pte_table_size; 86 88 EXPORT_SYMBOL(__pte_table_size); 87 89 unsigned long __pmd_table_size; ··· 472 470 asm volatile("ptesync" : : : "memory"); 473 471 if (old & PATB_HR) { 474 472 asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : 473 + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); 474 + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : 475 475 "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); 476 476 trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1); 477 477 } else {
+7 -2
arch/powerpc/mm/tlb_hash64.c
··· 51 51 unsigned int psize; 52 52 int ssize; 53 53 real_pte_t rpte; 54 - int i; 54 + int i, offset; 55 55 56 56 i = batch->index; 57 57 ··· 67 67 psize = get_slice_psize(mm, addr); 68 68 /* Mask the address for the correct page size */ 69 69 addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); 70 + if (unlikely(psize == MMU_PAGE_16G)) 71 + offset = PTRS_PER_PUD; 72 + else 73 + offset = PTRS_PER_PMD; 70 74 #else 71 75 BUG(); 72 76 psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ ··· 82 78 * support 64k pages, this might be different from the 83 79 * hardware page size encoded in the slice table. */ 84 80 addr &= PAGE_MASK; 81 + offset = PTRS_PER_PTE; 85 82 } 86 83 87 84 ··· 96 91 } 97 92 WARN_ON(vsid == 0); 98 93 vpn = hpt_vpn(addr, vsid, ssize); 99 - rpte = __real_pte(__pte(pte), ptep); 94 + rpte = __real_pte(__pte(pte), ptep, offset); 100 95 101 96 /* 102 97 * Check if we have an active batch on this CPU. If not, just
+8 -8
arch/powerpc/platforms/powernv/vas-window.c
··· 1063 1063 rc = PTR_ERR(txwin->paste_kaddr); 1064 1064 goto free_window; 1065 1065 } 1066 + } else { 1067 + /* 1068 + * A user mapping must ensure that context switch issues 1069 + * CP_ABORT for this thread. 1070 + */ 1071 + rc = set_thread_uses_vas(); 1072 + if (rc) 1073 + goto free_window; 1066 1074 } 1067 - 1068 - /* 1069 - * Now that we have a send window, ensure context switch issues 1070 - * CP_ABORT for this thread. 1071 - */ 1072 - rc = -EINVAL; 1073 - if (set_thread_uses_vas() < 0) 1074 - goto free_window; 1075 1075 1076 1076 set_vinst_win(vinst, txwin); 1077 1077
+2 -2
arch/powerpc/platforms/pseries/hotplug-cpu.c
··· 36 36 #include <asm/xics.h> 37 37 #include <asm/xive.h> 38 38 #include <asm/plpar_wrappers.h> 39 + #include <asm/topology.h> 39 40 40 41 #include "pseries.h" 41 42 #include "offline_states.h" ··· 332 331 BUG_ON(cpu_online(cpu)); 333 332 set_cpu_present(cpu, false); 334 333 set_hard_smp_processor_id(cpu, -1); 334 + update_numa_cpu_lookup_table(cpu, -1); 335 335 break; 336 336 } 337 337 if (cpu >= nr_cpu_ids) ··· 341 339 } 342 340 cpu_maps_update_done(); 343 341 } 344 - 345 - extern int find_and_online_cpu_nid(int cpu); 346 342 347 343 static int dlpar_online_cpu(struct device_node *dn) 348 344 {
+22 -9
arch/powerpc/platforms/pseries/ras.c
··· 49 49 50 50 51 51 /* 52 + * Enable the hotplug interrupt late because processing them may touch other 53 + * devices or systems (e.g. hugepages) that have not been initialized at the 54 + * subsys stage. 55 + */ 56 + int __init init_ras_hotplug_IRQ(void) 57 + { 58 + struct device_node *np; 59 + 60 + /* Hotplug Events */ 61 + np = of_find_node_by_path("/event-sources/hot-plug-events"); 62 + if (np != NULL) { 63 + if (dlpar_workqueue_init() == 0) 64 + request_event_sources_irqs(np, ras_hotplug_interrupt, 65 + "RAS_HOTPLUG"); 66 + of_node_put(np); 67 + } 68 + 69 + return 0; 70 + } 71 + machine_late_initcall(pseries, init_ras_hotplug_IRQ); 72 + 73 + /* 52 74 * Initialize handlers for the set of interrupts caused by hardware errors 53 75 * and power system events. 54 76 */ ··· 85 63 if (np != NULL) { 86 64 request_event_sources_irqs(np, ras_error_interrupt, 87 65 "RAS_ERROR"); 88 - of_node_put(np); 89 - } 90 - 91 - /* Hotplug Events */ 92 - np = of_find_node_by_path("/event-sources/hot-plug-events"); 93 - if (np != NULL) { 94 - if (dlpar_workqueue_init() == 0) 95 - request_event_sources_irqs(np, ras_hotplug_interrupt, 96 - "RAS_HOTPLUG"); 97 66 of_node_put(np); 98 67 } 99 68
+1 -1
drivers/misc/ocxl/file.c
··· 277 277 struct ocxl_context *ctx = file->private_data; 278 278 struct ocxl_kernel_event_header header; 279 279 ssize_t rc; 280 - size_t used = 0; 280 + ssize_t used = 0; 281 281 DEFINE_WAIT(event_wait); 282 282 283 283 memset(&header, 0, sizeof(header));
+1 -1
tools/testing/selftests/powerpc/alignment/alignment_handler.c
··· 57 57 58 58 void sighandler(int sig, siginfo_t *info, void *ctx) 59 59 { 60 - struct ucontext *ucp = ctx; 60 + ucontext_t *ucp = ctx; 61 61 62 62 if (!testing) { 63 63 signal(sig, SIG_DFL);