Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'fixes' into next

Merge our fixes branch into next, this brings in a number of commits
that fix bugs we don't want to hit in next, in particular the fix for
CVE-2019-12817.

+229 -23
+30
arch/powerpc/include/asm/book3s/64/pgtable.h
··· 884 884 return false; 885 885 } 886 886 887 + static inline int pmd_is_serializing(pmd_t pmd) 888 + { 889 + /* 890 + * If the pmd is undergoing a split, the _PAGE_PRESENT bit is clear 891 + * and _PAGE_INVALID is set (see pmd_present, pmdp_invalidate). 892 + * 893 + * This condition may also occur when flushing a pmd while flushing 894 + * it (see ptep_modify_prot_start), so callers must ensure this 895 + * case is fine as well. 896 + */ 897 + if ((pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) == 898 + cpu_to_be64(_PAGE_INVALID)) 899 + return true; 900 + 901 + return false; 902 + } 903 + 887 904 static inline int pmd_bad(pmd_t pmd) 888 905 { 889 906 if (radix_enabled()) ··· 1117 1100 #define pmd_access_permitted pmd_access_permitted 1118 1101 static inline bool pmd_access_permitted(pmd_t pmd, bool write) 1119 1102 { 1103 + /* 1104 + * pmdp_invalidate sets this combination (which is not caught by 1105 + * !pte_present() check in pte_access_permitted), to prevent 1106 + * lock-free lookups, as part of the serialize_against_pte_lookup() 1107 + * synchronisation. 1108 + * 1109 + * This also catches the case where the PTE's hardware PRESENT bit is 1110 + * cleared while TLB is flushed, which is suboptimal but should not 1111 + * be frequent. 1112 + */ 1113 + if (pmd_is_serializing(pmd)) 1114 + return false; 1115 + 1120 1116 return pte_access_permitted(pmd_pte(pmd), write); 1121 1117 } 1122 1118
+4
arch/powerpc/include/asm/btext.h
··· 13 13 int depth, int pitch); 14 14 extern void btext_setup_display(int width, int height, int depth, int pitch, 15 15 unsigned long address); 16 + #ifdef CONFIG_PPC32 16 17 extern void btext_prepare_BAT(void); 18 + #else 19 + static inline void btext_prepare_BAT(void) { } 20 + #endif 17 21 extern void btext_map(void); 18 22 extern void btext_unmap(void); 19 23
+3
arch/powerpc/include/asm/kexec.h
··· 94 94 return crashing_cpu >= 0; 95 95 } 96 96 97 + void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer, 98 + unsigned long start_address) __noreturn; 99 + 97 100 #ifdef CONFIG_KEXEC_FILE 98 101 extern const struct kexec_file_ops kexec_elf64_ops; 99 102
+7
arch/powerpc/include/asm/page.h
··· 323 323 #endif /* __ASSEMBLY__ */ 324 324 #include <asm/slice.h> 325 325 326 + /* 327 + * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks. 328 + */ 329 + #ifdef CONFIG_PPC32 330 + #define ARCH_ZONE_DMA_BITS 30 331 + #else 326 332 #define ARCH_ZONE_DMA_BITS 31 333 + #endif 327 334 328 335 #endif /* _ASM_POWERPC_PAGE_H */
+1 -1
arch/powerpc/kernel/exceptions-64s.S
··· 315 315 mfspr r11,SPRN_DSISR /* Save DSISR */ 316 316 std r11,_DSISR(r1) 317 317 std r9,_CCR(r1) /* Save CR in stackframe */ 318 - kuap_save_amr_and_lock r9, r10, cr1 318 + /* We don't touch AMR here, we never go to virtual mode */ 319 319 /* Save r9 through r13 from EXMC save area to stack frame. */ 320 320 EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) 321 321 mfmsr r11 /* get MSR value */
+1
arch/powerpc/kernel/head_32.S
··· 757 757 stw r0,0(r3) 758 758 759 759 /* load up the MMU */ 760 + bl load_segment_registers 760 761 bl load_up_mmu 761 762 762 763 /* ptr to phys current thread */
+5 -5
arch/powerpc/kernel/head_booke.h
··· 83 83 SAVE_4GPRS(3, r11); \ 84 84 SAVE_2GPRS(7, r11) 85 85 86 - .macro SYSCALL_ENTRY trapno intno 86 + .macro SYSCALL_ENTRY trapno intno srr1 87 87 mfspr r10, SPRN_SPRG_THREAD 88 88 #ifdef CONFIG_KVM_BOOKE_HV 89 89 BEGIN_FTR_SECTION ··· 94 94 mfspr r11, SPRN_SRR1 95 95 mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */ 96 96 bf 3, 1975f 97 - b kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1 97 + b kvmppc_handler_\intno\()_\srr1 98 98 1975: 99 99 mr r12, r13 100 100 lwz r13, THREAD_NORMSAVE(2)(r10) ··· 145 145 tophys(r11,r11) 146 146 addi r11,r11,global_dbcr0@l 147 147 #ifdef CONFIG_SMP 148 - lwz r9,TASK_CPU(r2) 149 - slwi r9,r9,3 150 - add r11,r11,r9 148 + lwz r10, TASK_CPU(r2) 149 + slwi r10, r10, 3 150 + add r11, r11, r10 151 151 #endif 152 152 lwz r12,0(r11) 153 153 mtspr SPRN_DBCR0,r12
+1 -1
arch/powerpc/kernel/head_fsl_booke.S
··· 413 413 414 414 /* System Call Interrupt */ 415 415 START_EXCEPTION(SystemCall) 416 - SYSCALL_ENTRY 0xc00 SYSCALL 416 + SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1 417 417 418 418 /* Auxiliary Processor Unavailable Interrupt */ 419 419 EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
+3 -1
arch/powerpc/kernel/machine_kexec_32.c
··· 30 30 */ 31 31 void default_machine_kexec(struct kimage *image) 32 32 { 33 - extern const unsigned char relocate_new_kernel[]; 34 33 extern const unsigned int relocate_new_kernel_size; 35 34 unsigned long page_list; 36 35 unsigned long reboot_code_buffer, reboot_code_buffer_phys; ··· 56 57 flush_icache_range(reboot_code_buffer, 57 58 reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); 58 59 printk(KERN_INFO "Bye!\n"); 60 + 61 + if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x)) 62 + relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start); 59 63 60 64 /* now call it */ 61 65 rnk = (relocate_new_kernel_t) reboot_code_buffer;
+1
arch/powerpc/kernel/prom_init.c
··· 2349 2349 prom_printf("W=%d H=%d LB=%d addr=0x%x\n", 2350 2350 width, height, pitch, addr); 2351 2351 btext_setup_display(width, height, 8, pitch, addr); 2352 + btext_prepare_BAT(); 2352 2353 } 2353 2354 #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ 2354 2355 }
+1 -1
arch/powerpc/kernel/prom_init_check.sh
··· 27 27 WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush 28 28 _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold 29 29 __secondary_hold_acknowledge __secondary_hold_spinloop __start 30 - logo_linux_clut224 30 + logo_linux_clut224 btext_prepare_BAT 31 31 reloc_got2 kernstart_addr memstart_addr linux_banner _stext 32 32 __prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." 33 33
+1
arch/powerpc/kvm/book3s_hv_builtin.c
··· 833 833 } 834 834 } 835 835 asm volatile("ptesync": : :"memory"); 836 + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); 836 837 } 837 838 838 839 void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
+13 -2
arch/powerpc/kvm/book3s_hv_rmhandlers.S
··· 2507 2507 LOAD_REG_ADDR(r11, dawr_force_enable) 2508 2508 lbz r11, 0(r11) 2509 2509 cmpdi r11, 0 2510 + bne 3f 2510 2511 li r3, H_HARDWARE 2511 - beqlr 2512 + blr 2513 + 3: 2512 2514 /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ 2513 2515 rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW 2514 2516 rlwimi r5, r4, 2, DAWRX_WT 2515 2517 clrrdi r4, r4, 3 2516 2518 std r4, VCPU_DAWR(r3) 2517 2519 std r5, VCPU_DAWRX(r3) 2520 + /* 2521 + * If came in through the real mode hcall handler then it is necessary 2522 + * to write the registers since the return path won't. Otherwise it is 2523 + * sufficient to store then in the vcpu struct as they will be loaded 2524 + * next time the vcpu is run. 2525 + */ 2526 + mfmsr r6 2527 + andi. r6, r6, MSR_DR /* in real mode? */ 2528 + bne 4f 2518 2529 mtspr SPRN_DAWR, r4 2519 2530 mtspr SPRN_DAWRX, r5 2520 - li r3, 0 2531 + 4: li r3, 0 2521 2532 blr 2522 2533 2523 2534 _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
+47 -8
arch/powerpc/mm/book3s64/mmu_context.c
··· 55 55 56 56 void slb_setup_new_exec(void); 57 57 58 + static int realloc_context_ids(mm_context_t *ctx) 59 + { 60 + int i, id; 61 + 62 + /* 63 + * id 0 (aka. ctx->id) is special, we always allocate a new one, even if 64 + * there wasn't one allocated previously (which happens in the exec 65 + * case where ctx is newly allocated). 66 + * 67 + * We have to be a bit careful here. We must keep the existing ids in 68 + * the array, so that we can test if they're non-zero to decide if we 69 + * need to allocate a new one. However in case of error we must free the 70 + * ids we've allocated but *not* any of the existing ones (or risk a 71 + * UAF). That's why we decrement i at the start of the error handling 72 + * loop, to skip the id that we just tested but couldn't reallocate. 73 + */ 74 + for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) { 75 + if (i == 0 || ctx->extended_id[i]) { 76 + id = hash__alloc_context_id(); 77 + if (id < 0) 78 + goto error; 79 + 80 + ctx->extended_id[i] = id; 81 + } 82 + } 83 + 84 + /* The caller expects us to return id */ 85 + return ctx->id; 86 + 87 + error: 88 + for (i--; i >= 0; i--) { 89 + if (ctx->extended_id[i]) 90 + ida_free(&mmu_context_ida, ctx->extended_id[i]); 91 + } 92 + 93 + return id; 94 + } 95 + 58 96 static int hash__init_new_context(struct mm_struct *mm) 59 97 { 60 98 int index; 61 99 62 - index = hash__alloc_context_id(); 63 - if (index < 0) 64 - return index; 65 - 66 100 mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), 67 101 GFP_KERNEL); 68 - if (!mm->context.hash_context) { 69 - ida_free(&mmu_context_ida, index); 102 + if (!mm->context.hash_context) 70 103 return -ENOMEM; 71 - } 72 104 73 105 /* 74 106 * The old code would re-promote on fork, we don't do that when using ··· 128 96 mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), 129 97 GFP_KERNEL); 130 98 if (!mm->context.hash_context->spt) { 131 - ida_free(&mmu_context_ida, index); 132 99 kfree(mm->context.hash_context); 133 100 return -ENOMEM; 134 101 } 135 102 } 136 103 #endif 104 + } 137 105 106 + index = realloc_context_ids(&mm->context); 107 + if (index < 0) { 108 + #ifdef CONFIG_PPC_SUBPAGE_PROT 109 + kfree(mm->context.hash_context->spt); 110 + #endif 111 + kfree(mm->context.hash_context); 112 + return index; 138 113 } 139 114 140 115 pkey_mm_init(mm);
+3
arch/powerpc/mm/book3s64/pgtable.c
··· 116 116 /* 117 117 * This ensures that generic code that rely on IRQ disabling 118 118 * to prevent a parallel THP split work as expected. 119 + * 120 + * Marking the entry with _PAGE_INVALID && ~_PAGE_PRESENT requires 121 + * a special case check in pmd_access_permitted. 119 122 */ 120 123 serialize_against_pte_lookup(vma->vm_mm); 121 124 return __pmd(old_pmd);
+2 -1
arch/powerpc/mm/mem.c
··· 253 253 (long int)((top_of_ram - total_ram) >> 20)); 254 254 255 255 #ifdef CONFIG_ZONE_DMA 256 - max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffffffUL >> PAGE_SHIFT); 256 + max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 257 + ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT); 257 258 #endif 258 259 max_zone_pfns[ZONE_NORMAL] = max_low_pfn; 259 260 #ifdef CONFIG_HIGHMEM
+14 -2
arch/powerpc/mm/pgtable.c
··· 372 372 pdshift = PMD_SHIFT; 373 373 pmdp = pmd_offset(&pud, ea); 374 374 pmd = READ_ONCE(*pmdp); 375 + 375 376 /* 376 - * A hugepage collapse is captured by pmd_none, because 377 - * it mark the pmd none and do a hpte invalidate. 377 + * A hugepage collapse is captured by this condition, see 378 + * pmdp_collapse_flush. 378 379 */ 379 380 if (pmd_none(pmd)) 380 381 return NULL; 382 + 383 + #ifdef CONFIG_PPC_BOOK3S_64 384 + /* 385 + * A hugepage split is captured by this condition, see 386 + * pmdp_invalidate. 387 + * 388 + * Huge page modification can be caught here too. 389 + */ 390 + if (pmd_is_serializing(pmd)) 391 + return NULL; 392 + #endif 381 393 382 394 if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { 383 395 if (is_thp)
+1
arch/powerpc/platforms/powermac/Kconfig
··· 7 7 select PPC_INDIRECT_PCI if PPC32 8 8 select PPC_MPC106 if PPC32 9 9 select PPC_NATIVE 10 + select ZONE_DMA if PPC32 10 11 default y 11 12 12 13 config PPC_PMAC64
+1
tools/testing/selftests/powerpc/mm/.gitignore
··· 4 4 prot_sao 5 5 segv_errors 6 6 wild_bctr 7 + large_vm_fork_separation
+3 -1
tools/testing/selftests/powerpc/mm/Makefile
··· 2 2 noarg: 3 3 $(MAKE) -C ../ 4 4 5 - TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr 5 + TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ 6 + large_vm_fork_separation 6 7 TEST_GEN_FILES := tempfile 7 8 8 9 top_srcdir = ../../../../.. ··· 14 13 $(OUTPUT)/prot_sao: ../utils.c 15 14 16 15 $(OUTPUT)/wild_bctr: CFLAGS += -m64 16 + $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 17 17 18 18 $(OUTPUT)/tempfile: 19 19 dd if=/dev/zero of=$@ bs=64k count=1
+87
tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + // 3 + // Copyright 2019, Michael Ellerman, IBM Corp. 4 + // 5 + // Test that allocating memory beyond the memory limit and then forking is 6 + // handled correctly, ie. the child is able to access the mappings beyond the 7 + // memory limit and the child's writes are not visible to the parent. 8 + 9 + #include <stdio.h> 10 + #include <stdlib.h> 11 + #include <sys/mman.h> 12 + #include <sys/types.h> 13 + #include <sys/wait.h> 14 + #include <unistd.h> 15 + 16 + #include "utils.h" 17 + 18 + 19 + #ifndef MAP_FIXED_NOREPLACE 20 + #define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB 21 + #endif 22 + 23 + 24 + static int test(void) 25 + { 26 + int p2c[2], c2p[2], rc, status, c, *p; 27 + unsigned long page_size; 28 + pid_t pid; 29 + 30 + page_size = sysconf(_SC_PAGESIZE); 31 + SKIP_IF(page_size != 65536); 32 + 33 + // Create a mapping at 512TB to allocate an extended_id 34 + p = mmap((void *)(512ul << 40), page_size, PROT_READ | PROT_WRITE, 35 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0); 36 + if (p == MAP_FAILED) { 37 + perror("mmap"); 38 + printf("Error: couldn't mmap(), confirm kernel has 4TB support?\n"); 39 + return 1; 40 + } 41 + 42 + printf("parent writing %p = 1\n", p); 43 + *p = 1; 44 + 45 + FAIL_IF(pipe(p2c) == -1 || pipe(c2p) == -1); 46 + 47 + pid = fork(); 48 + if (pid == 0) { 49 + FAIL_IF(read(p2c[0], &c, 1) != 1); 50 + 51 + pid = getpid(); 52 + printf("child writing %p = %d\n", p, pid); 53 + *p = pid; 54 + 55 + FAIL_IF(write(c2p[1], &c, 1) != 1); 56 + FAIL_IF(read(p2c[0], &c, 1) != 1); 57 + exit(0); 58 + } 59 + 60 + c = 0; 61 + FAIL_IF(write(p2c[1], &c, 1) != 1); 62 + FAIL_IF(read(c2p[0], &c, 1) != 1); 63 + 64 + // Prevent compiler optimisation 65 + barrier(); 66 + 67 + rc = 0; 68 + printf("parent reading %p = %d\n", p, *p); 69 + if (*p != 1) { 70 + printf("Error: BUG! parent saw child's write! *p = %d\n", *p); 71 + rc = 1; 72 + } 73 + 74 + FAIL_IF(write(p2c[1], &c, 1) != 1); 75 + FAIL_IF(waitpid(pid, &status, 0) == -1); 76 + FAIL_IF(!WIFEXITED(status) || WEXITSTATUS(status)); 77 + 78 + if (rc == 0) 79 + printf("success: test completed OK\n"); 80 + 81 + return rc; 82 + } 83 + 84 + int main(void) 85 + { 86 + return test_harness(test, "large_vm_fork_separation"); 87 + }