Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'mm-stable-2025-04-02-22-07' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull more MM updates from Andrew Morton:

- The series "mm: fixes for fallouts from mem_init() cleanup" from Mike
Rapoport fixes a couple of issues with the just-merged "arch, mm:
reduce code duplication in mem_init()" series

- The series "MAINTAINERS: add my isub-entries to MM part." from Mike
Rapoport does some maintenance on MAINTAINERS

- The series "remove tlb_remove_page_ptdesc()" from Qi Zheng does some
cleanup work to the page mapping code

- The series "mseal system mappings" from Jeff Xu permits sealing of
"system mappings", such as vdso, vvar, vvar_vclock, vectors (arm
compat-mode), sigpage (arm compat-mode)

- Plus the usual shower of singleton patches

* tag 'mm-stable-2025-04-02-22-07' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (31 commits)
mseal sysmap: add arch-support txt
mseal sysmap: enable s390
selftest: test system mappings are sealed
mseal sysmap: update mseal.rst
mseal sysmap: uprobe mapping
mseal sysmap: enable arm64
mseal sysmap: enable x86-64
mseal sysmap: generic vdso vvar mapping
selftests: x86: test_mremap_vdso: skip if vdso is msealed
mseal sysmap: kernel config and header change
mm: pgtable: remove tlb_remove_page_ptdesc()
x86: pgtable: convert to use tlb_remove_ptdesc()
riscv: pgtable: unconditionally use tlb_remove_ptdesc()
mm: pgtable: convert some architectures to use tlb_remove_ptdesc()
mm: pgtable: change pt parameter of tlb_remove_ptdesc() to struct ptdesc*
mm: pgtable: make generic tlb_remove_table() use struct ptdesc
microblaze/mm: put mm_cmdline_setup() in .init.text section
mm/memory_hotplug: fix call folio_test_large with tail page in do_migrate_range
MAINTAINERS: mm: add entry for secretmem
MAINTAINERS: mm: add entry for numa memblocks and numa emulation
...

+417 -128
+30
Documentation/features/core/mseal_sys_mappings/arch-support.txt
··· 1 + # 2 + # Feature name: mseal-system-mappings 3 + # Kconfig: ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS 4 + # description: arch supports mseal system mappings 5 + # 6 + ----------------------- 7 + | arch |status| 8 + ----------------------- 9 + | alpha: | TODO | 10 + | arc: | N/A | 11 + | arm: | N/A | 12 + | arm64: | ok | 13 + | csky: | N/A | 14 + | hexagon: | N/A | 15 + | loongarch: | TODO | 16 + | m68k: | N/A | 17 + | microblaze: | N/A | 18 + | mips: | TODO | 19 + | nios2: | N/A | 20 + | openrisc: | N/A | 21 + | parisc: | TODO | 22 + | powerpc: | TODO | 23 + | riscv: | TODO | 24 + | s390: | ok | 25 + | sh: | N/A | 26 + | sparc: | TODO | 27 + | um: | TODO | 28 + | x86: | ok | 29 + | xtensa: | N/A | 30 + -----------------------
+21
Documentation/userspace-api/mseal.rst
··· 130 130 131 131 - Chrome browser: protect some security sensitive data structures. 132 132 133 + - System mappings: 134 + The system mappings are created by the kernel and includes vdso, vvar, 135 + vvar_vclock, vectors (arm compat-mode), sigpage (arm compat-mode), uprobes. 136 + 137 + Those system mappings are readonly only or execute only, memory sealing can 138 + protect them from ever changing to writable or unmmap/remapped as different 139 + attributes. This is useful to mitigate memory corruption issues where a 140 + corrupted pointer is passed to a memory management system. 141 + 142 + If supported by an architecture (CONFIG_ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS), 143 + the CONFIG_MSEAL_SYSTEM_MAPPINGS seals all system mappings of this 144 + architecture. 145 + 146 + The following architectures currently support this feature: x86-64, arm64, 147 + and s390. 148 + 149 + WARNING: This feature breaks programs which rely on relocating 150 + or unmapping system mappings. Known broken software at the time 151 + of writing includes CHECKPOINT_RESTORE, UML, gVisor, rr. Therefore 152 + this config can't be enabled universally. 153 + 133 154 When not to use mseal 134 155 ===================== 135 156 Applications can apply sealing to any virtual memory region from userspace,
+39
MAINTAINERS
··· 15487 15487 F: tools/testing/selftests/mm/ 15488 15488 N: include/linux/page[-_]* 15489 15489 15490 + MEMORY MANAGEMENT - EXECMEM 15491 + M: Andrew Morton <akpm@linux-foundation.org> 15492 + M: Mike Rapoport <rppt@kernel.org> 15493 + L: linux-mm@kvack.org 15494 + S: Maintained 15495 + F: include/linux/execmem.h 15496 + F: mm/execmem.c 15497 + 15498 + MEMORY MANAGEMENT - NUMA MEMBLOCKS AND NUMA EMULATION 15499 + M: Andrew Morton <akpm@linux-foundation.org> 15500 + M: Mike Rapoport <rppt@kernel.org> 15501 + L: linux-mm@kvack.org 15502 + S: Maintained 15503 + F: include/linux/numa_memblks.h 15504 + F: mm/numa.c 15505 + F: mm/numa_emulation.c 15506 + F: mm/numa_memblks.c 15507 + 15508 + MEMORY MANAGEMENT - SECRETMEM 15509 + M: Andrew Morton <akpm@linux-foundation.org> 15510 + M: Mike Rapoport <rppt@kernel.org> 15511 + L: linux-mm@kvack.org 15512 + S: Maintained 15513 + F: include/linux/secretmem.h 15514 + F: mm/secretmem.c 15515 + 15516 + MEMORY MANAGEMENT - USERFAULTFD 15517 + M: Andrew Morton <akpm@linux-foundation.org> 15518 + R: Peter Xu <peterx@redhat.com> 15519 + L: linux-mm@kvack.org 15520 + S: Maintained 15521 + F: Documentation/admin-guide/mm/userfaultfd.rst 15522 + F: fs/userfaultfd.c 15523 + F: include/asm-generic/pgtable_uffd.h 15524 + F: include/linux/userfaultfd_k.h 15525 + F: include/uapi/linux/userfaultfd.h 15526 + F: mm/userfaultfd.c 15527 + F: tools/testing/selftests/mm/uffd-*.[ch] 15528 + 15490 15529 MEMORY MAPPING 15491 15530 M: Andrew Morton <akpm@linux-foundation.org> 15492 15531 M: Liam R. Howlett <Liam.Howlett@oracle.com>
+1
arch/arm64/Kconfig
··· 38 38 select ARCH_HAS_KEEPINITRD 39 39 select ARCH_HAS_MEMBARRIER_SYNC_CORE 40 40 select ARCH_HAS_MEM_ENCRYPT 41 + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS 41 42 select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS 42 43 select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 43 44 select ARCH_HAS_NONLEAF_PMD_YOUNG if ARM64_HAFT
+6 -3
arch/arm64/kernel/vdso.c
··· 130 130 mm->context.vdso = (void *)vdso_base; 131 131 ret = _install_special_mapping(mm, vdso_base, vdso_text_len, 132 132 VM_READ|VM_EXEC|gp_flags| 133 - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 133 + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 134 + VM_SEALED_SYSMAP, 134 135 vdso_info[abi].cm); 135 136 if (IS_ERR(ret)) 136 137 goto up_fail; ··· 257 256 */ 258 257 ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE, 259 258 VM_READ | VM_EXEC | 260 - VM_MAYREAD | VM_MAYEXEC, 259 + VM_MAYREAD | VM_MAYEXEC | 260 + VM_SEALED_SYSMAP, 261 261 &aarch32_vdso_maps[AA32_MAP_VECTORS]); 262 262 263 263 return PTR_ERR_OR_ZERO(ret); ··· 281 279 */ 282 280 ret = _install_special_mapping(mm, addr, PAGE_SIZE, 283 281 VM_READ | VM_EXEC | VM_MAYREAD | 284 - VM_MAYWRITE | VM_MAYEXEC, 282 + VM_MAYWRITE | VM_MAYEXEC | 283 + VM_SEALED_SYSMAP, 285 284 &aarch32_vdso_maps[AA32_MAP_SIGPAGE]); 286 285 if (IS_ERR(ret)) 287 286 goto out;
+2 -5
arch/csky/include/asm/pgalloc.h
··· 61 61 return ret; 62 62 } 63 63 64 - #define __pte_free_tlb(tlb, pte, address) \ 65 - do { \ 66 - pagetable_dtor(page_ptdesc(pte)); \ 67 - tlb_remove_page_ptdesc(tlb, page_ptdesc(pte)); \ 68 - } while (0) 64 + #define __pte_free_tlb(tlb, pte, address) \ 65 + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) 69 66 70 67 extern void pagetable_init(void); 71 68 extern void mmu_init(unsigned long min_pfn, unsigned long max_pfn);
+2 -5
arch/hexagon/include/asm/pgalloc.h
··· 87 87 max_kernel_seg = pmdindex; 88 88 } 89 89 90 - #define __pte_free_tlb(tlb, pte, addr) \ 91 - do { \ 92 - pagetable_dtor((page_ptdesc(pte))); \ 93 - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ 94 - } while (0) 90 + #define __pte_free_tlb(tlb, pte, addr) \ 91 + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) 95 92 96 93 #endif
+2 -5
arch/loongarch/include/asm/pgalloc.h
··· 55 55 return pte; 56 56 } 57 57 58 - #define __pte_free_tlb(tlb, pte, address) \ 59 - do { \ 60 - pagetable_dtor(page_ptdesc(pte)); \ 61 - tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ 62 - } while (0) 58 + #define __pte_free_tlb(tlb, pte, address) \ 59 + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) 63 60 64 61 #ifndef __PAGETABLE_PMD_FOLDED 65 62
+2 -5
arch/m68k/include/asm/sun3_pgalloc.h
··· 17 17 18 18 extern const char bad_pmd_string[]; 19 19 20 - #define __pte_free_tlb(tlb, pte, addr) \ 21 - do { \ 22 - pagetable_dtor(page_ptdesc(pte)); \ 23 - tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ 24 - } while (0) 20 + #define __pte_free_tlb(tlb, pte, addr) \ 21 + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) 25 22 26 23 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) 27 24 {
+1 -1
arch/microblaze/mm/init.c
··· 118 118 /* 119 119 * Check for command-line options that affect what MMU_init will do. 120 120 */ 121 - static void mm_cmdline_setup(void) 121 + static void __init mm_cmdline_setup(void) 122 122 { 123 123 unsigned long maxmem = 0; 124 124 char *p = cmd_line;
+2 -5
arch/mips/include/asm/pgalloc.h
··· 48 48 extern void pgd_init(void *addr); 49 49 extern pgd_t *pgd_alloc(struct mm_struct *mm); 50 50 51 - #define __pte_free_tlb(tlb, pte, address) \ 52 - do { \ 53 - pagetable_dtor(page_ptdesc(pte)); \ 54 - tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ 55 - } while (0) 51 + #define __pte_free_tlb(tlb, pte, address) \ 52 + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) 56 53 57 54 #ifndef __PAGETABLE_PMD_FOLDED 58 55
+2 -5
arch/nios2/include/asm/pgalloc.h
··· 28 28 29 29 extern pgd_t *pgd_alloc(struct mm_struct *mm); 30 30 31 - #define __pte_free_tlb(tlb, pte, addr) \ 32 - do { \ 33 - pagetable_dtor(page_ptdesc(pte)); \ 34 - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ 35 - } while (0) 31 + #define __pte_free_tlb(tlb, pte, addr) \ 32 + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) 36 33 37 34 #endif /* _ASM_NIOS2_PGALLOC_H */
+2 -5
arch/openrisc/include/asm/pgalloc.h
··· 64 64 65 65 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); 66 66 67 - #define __pte_free_tlb(tlb, pte, addr) \ 68 - do { \ 69 - pagetable_dtor(page_ptdesc(pte)); \ 70 - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ 71 - } while (0) 67 + #define __pte_free_tlb(tlb, pte, addr) \ 68 + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) 72 69 73 70 #endif
+4 -22
arch/riscv/include/asm/pgalloc.h
··· 15 15 #define __HAVE_ARCH_PUD_FREE 16 16 #include <asm-generic/pgalloc.h> 17 17 18 - /* 19 - * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to 20 - * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use 21 - * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this 22 - * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the 23 - * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h 24 - * for more details. 25 - */ 26 - static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) 27 - { 28 - if (riscv_use_sbi_for_rfence()) { 29 - tlb_remove_ptdesc(tlb, pt); 30 - } else { 31 - pagetable_dtor(pt); 32 - tlb_remove_page_ptdesc(tlb, pt); 33 - } 34 - } 35 - 36 18 static inline void pmd_populate_kernel(struct mm_struct *mm, 37 19 pmd_t *pmd, pte_t *pte) 38 20 { ··· 90 108 unsigned long addr) 91 109 { 92 110 if (pgtable_l4_enabled) 93 - riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud)); 111 + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud)); 94 112 } 95 113 96 114 static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, 97 115 unsigned long addr) 98 116 { 99 117 if (pgtable_l5_enabled) 100 - riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); 118 + tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); 101 119 } 102 120 #endif /* __PAGETABLE_PMD_FOLDED */ 103 121 ··· 125 143 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, 126 144 unsigned long addr) 127 145 { 128 - riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd)); 146 + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd)); 129 147 } 130 148 131 149 #endif /* __PAGETABLE_PMD_FOLDED */ ··· 133 151 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, 134 152 unsigned long addr) 135 153 { 136 - riscv_tlb_remove_ptdesc(tlb, page_ptdesc(pte)); 154 + tlb_remove_ptdesc(tlb, page_ptdesc(pte)); 137 155 } 138 156 #endif /* CONFIG_MMU */ 139 157
+1
arch/s390/Kconfig
··· 137 137 select ARCH_SUPPORTS_DEBUG_PAGEALLOC 138 138 select ARCH_SUPPORTS_HUGETLBFS 139 139 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG 140 + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS 140 141 select ARCH_SUPPORTS_NUMA_BALANCING 141 142 select ARCH_SUPPORTS_PER_VMA_LOCK 142 143 select ARCH_USE_BUILTIN_BSWAP
+1 -1
arch/s390/kernel/vdso.c
··· 80 80 vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE; 81 81 /* VM_MAYWRITE for COW so gdb can set breakpoints */ 82 82 vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, 83 - VM_READ|VM_EXEC| 83 + VM_READ|VM_EXEC|VM_SEALED_SYSMAP| 84 84 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 85 85 vdso_mapping); 86 86 if (IS_ERR(vma)) {
+2 -5
arch/sh/include/asm/pgalloc.h
··· 32 32 set_pmd(pmd, __pmd((unsigned long)page_address(pte))); 33 33 } 34 34 35 - #define __pte_free_tlb(tlb, pte, addr) \ 36 - do { \ 37 - pagetable_dtor(page_ptdesc(pte)); \ 38 - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ 39 - } while (0) 35 + #define __pte_free_tlb(tlb, pte, addr) \ 36 + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) 40 37 41 38 #endif /* __ASM_SH_PGALLOC_H */
+6 -15
arch/um/include/asm/pgalloc.h
··· 25 25 */ 26 26 extern pgd_t *pgd_alloc(struct mm_struct *); 27 27 28 - #define __pte_free_tlb(tlb, pte, address) \ 29 - do { \ 30 - pagetable_dtor(page_ptdesc(pte)); \ 31 - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ 32 - } while (0) 28 + #define __pte_free_tlb(tlb, pte, address) \ 29 + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) 33 30 34 31 #if CONFIG_PGTABLE_LEVELS > 2 35 32 36 - #define __pmd_free_tlb(tlb, pmd, address) \ 37 - do { \ 38 - pagetable_dtor(virt_to_ptdesc(pmd)); \ 39 - tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ 40 - } while (0) 33 + #define __pmd_free_tlb(tlb, pmd, address) \ 34 + tlb_remove_ptdesc((tlb), virt_to_ptdesc(pmd)) 41 35 42 36 #if CONFIG_PGTABLE_LEVELS > 3 43 37 44 - #define __pud_free_tlb(tlb, pud, address) \ 45 - do { \ 46 - pagetable_dtor(virt_to_ptdesc(pud)); \ 47 - tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud)); \ 48 - } while (0) 38 + #define __pud_free_tlb(tlb, pud, address) \ 39 + tlb_remove_ptdesc((tlb), virt_to_ptdesc(pud)) 49 40 50 41 #endif 51 42 #endif
+1
arch/x86/Kconfig
··· 27 27 # Options that are inherently 64-bit kernel only: 28 28 select ARCH_HAS_GIGANTIC_PAGE 29 29 select ARCH_HAS_PTDUMP 30 + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS 30 31 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 31 32 select ARCH_SUPPORTS_PER_VMA_LOCK 32 33 select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE
+3 -2
arch/x86/entry/vdso/vma.c
··· 162 162 text_start, 163 163 image->size, 164 164 VM_READ|VM_EXEC| 165 - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 165 + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 166 + VM_SEALED_SYSMAP, 166 167 &vdso_mapping); 167 168 168 169 if (IS_ERR(vma)) { ··· 182 181 VDSO_VCLOCK_PAGES_START(addr), 183 182 VDSO_NR_VCLOCK_PAGES * PAGE_SIZE, 184 183 VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| 185 - VM_PFNMAP, 184 + VM_PFNMAP|VM_SEALED_SYSMAP, 186 185 &vvar_vclock_mapping); 187 186 188 187 if (IS_ERR(vma)) {
+4 -4
arch/x86/mm/pgtable.c
··· 20 20 void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) 21 21 { 22 22 paravirt_release_pte(page_to_pfn(pte)); 23 - tlb_remove_table(tlb, page_ptdesc(pte)); 23 + tlb_remove_ptdesc(tlb, page_ptdesc(pte)); 24 24 } 25 25 26 26 #if CONFIG_PGTABLE_LEVELS > 2 ··· 34 34 #ifdef CONFIG_X86_PAE 35 35 tlb->need_flush_all = 1; 36 36 #endif 37 - tlb_remove_table(tlb, virt_to_ptdesc(pmd)); 37 + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd)); 38 38 } 39 39 40 40 #if CONFIG_PGTABLE_LEVELS > 3 41 41 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) 42 42 { 43 43 paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); 44 - tlb_remove_table(tlb, virt_to_ptdesc(pud)); 44 + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud)); 45 45 } 46 46 47 47 #if CONFIG_PGTABLE_LEVELS > 4 48 48 void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d) 49 49 { 50 50 paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT); 51 - tlb_remove_table(tlb, virt_to_ptdesc(p4d)); 51 + tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); 52 52 } 53 53 #endif /* CONFIG_PGTABLE_LEVELS > 4 */ 54 54 #endif /* CONFIG_PGTABLE_LEVELS > 3 */
+4 -10
include/asm-generic/tlb.h
··· 227 227 */ 228 228 static inline void tlb_remove_table(struct mmu_gather *tlb, void *table) 229 229 { 230 - struct page *page = (struct page *)table; 230 + struct ptdesc *ptdesc = (struct ptdesc *)table; 231 231 232 - pagetable_dtor(page_ptdesc(page)); 233 - tlb_remove_page(tlb, page); 232 + pagetable_dtor(ptdesc); 233 + tlb_remove_page(tlb, ptdesc_page(ptdesc)); 234 234 } 235 235 #endif /* CONFIG_MMU_GATHER_TABLE_FREE */ 236 236 ··· 493 493 return tlb_remove_page_size(tlb, page, PAGE_SIZE); 494 494 } 495 495 496 - static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) 496 + static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt) 497 497 { 498 498 tlb_remove_table(tlb, pt); 499 - } 500 - 501 - /* Like tlb_remove_ptdesc, but for page-like page directories. */ 502 - static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt) 503 - { 504 - tlb_remove_page(tlb, ptdesc_page(pt)); 505 499 } 506 500 507 501 static inline void tlb_change_page_size(struct mmu_gather *tlb,
+10
include/linux/mm.h
··· 4238 4238 int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status); 4239 4239 int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); 4240 4240 4241 + 4242 + /* 4243 + * mseal of userspace process's system mappings. 4244 + */ 4245 + #ifdef CONFIG_MSEAL_SYSTEM_MAPPINGS 4246 + #define VM_SEALED_SYSMAP VM_SEALED 4247 + #else 4248 + #define VM_SEALED_SYSMAP VM_NONE 4249 + #endif 4250 + 4241 4251 #endif /* _LINUX_MM_H */
+22
init/Kconfig
··· 1888 1888 config ARCH_HAS_MEMBARRIER_SYNC_CORE 1889 1889 bool 1890 1890 1891 + config ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS 1892 + bool 1893 + help 1894 + Control MSEAL_SYSTEM_MAPPINGS access based on architecture. 1895 + 1896 + A 64-bit kernel is required for the memory sealing feature. 1897 + No specific hardware features from the CPU are needed. 1898 + 1899 + To enable this feature, the architecture needs to update their 1900 + special mappings calls to include the sealing flag and confirm 1901 + that it doesn't unmap/remap system mappings during the life 1902 + time of the process. The existence of this flag for an architecture 1903 + implies that it does not require the remapping of the system 1904 + mappings during process lifetime, so sealing these mappings is safe 1905 + from a kernel perspective. 1906 + 1907 + After the architecture enables this, a distribution can set 1908 + CONFIG_MSEAL_SYSTEM_MAPPING to manage access to the feature. 1909 + 1910 + For complete descriptions of memory sealing, please see 1911 + Documentation/userspace-api/mseal.rst 1912 + 1891 1913 config HAVE_PERF_EVENTS 1892 1914 bool 1893 1915 help
+2 -1
kernel/events/uprobes.c
··· 1703 1703 } 1704 1704 1705 1705 vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE, 1706 - VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO, 1706 + VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO| 1707 + VM_SEALED_SYSMAP, 1707 1708 &xol_mapping); 1708 1709 if (IS_ERR(vma)) { 1709 1710 ret = PTR_ERR(vma);
+2 -1
lib/vdso/datastore.c
··· 99 99 struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned long addr) 100 100 { 101 101 return _install_special_mapping(mm, addr, VDSO_NR_PAGES * PAGE_SIZE, 102 - VM_READ | VM_MAYREAD | VM_IO | VM_DONTDUMP | VM_PFNMAP, 102 + VM_READ | VM_MAYREAD | VM_IO | VM_DONTDUMP | 103 + VM_PFNMAP | VM_SEALED_SYSMAP, 103 104 &vdso_vvar_mapping); 104 105 } 105 106
+4 -5
mm/damon/core.c
··· 76 76 77 77 if (ops->id >= NR_DAMON_OPS) 78 78 return -EINVAL; 79 + 79 80 mutex_lock(&damon_ops_lock); 80 81 /* Fail for already registered ops */ 81 - if (__damon_is_registered_ops(ops->id)) { 82 + if (__damon_is_registered_ops(ops->id)) 82 83 err = -EINVAL; 83 - goto out; 84 - } 85 - damon_registered_ops[ops->id] = *ops; 86 - out: 84 + else 85 + damon_registered_ops[ops->id] = *ops; 87 86 mutex_unlock(&damon_ops_lock); 88 87 return err; 89 88 }
+1 -4
mm/kasan/kasan_test_c.c
··· 1073 1073 kmem_cache_destroy(cache); 1074 1074 } 1075 1075 1076 - static void empty_cache_ctor(void *object) { } 1077 - 1078 1076 static void kmem_cache_double_destroy(struct kunit *test) 1079 1077 { 1080 1078 struct kmem_cache *cache; 1081 1079 1082 - /* Provide a constructor to prevent cache merging. */ 1083 - cache = kmem_cache_create("test_cache", 200, 0, 0, empty_cache_ctor); 1080 + cache = kmem_cache_create("test_cache", 200, 0, SLAB_NO_MERGE, NULL); 1084 1081 KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache); 1085 1082 kmem_cache_destroy(cache); 1086 1083 KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_destroy(cache));
+3
mm/memblock.c
··· 2167 2167 unsigned long start_pfn = PFN_UP(start); 2168 2168 unsigned long end_pfn = PFN_DOWN(end); 2169 2169 2170 + if (!IS_ENABLED(CONFIG_HIGHMEM) && end_pfn > max_low_pfn) 2171 + end_pfn = max_low_pfn; 2172 + 2170 2173 if (start_pfn >= end_pfn) 2171 2174 return 0; 2172 2175
+3 -9
mm/memory_hotplug.c
··· 1813 1813 page = pfn_to_page(pfn); 1814 1814 folio = page_folio(page); 1815 1815 1816 - /* 1817 - * No reference or lock is held on the folio, so it might 1818 - * be modified concurrently (e.g. split). As such, 1819 - * folio_nr_pages() may read garbage. This is fine as the outer 1820 - * loop will revisit the split folio later. 1821 - */ 1822 - if (folio_test_large(folio)) 1823 - pfn = folio_pfn(folio) + folio_nr_pages(folio) - 1; 1824 - 1825 1816 if (!folio_try_get(folio)) 1826 1817 continue; 1827 1818 1828 1819 if (unlikely(page_folio(page) != folio)) 1829 1820 goto put_folio; 1821 + 1822 + if (folio_test_large(folio)) 1823 + pfn = folio_pfn(folio) + folio_nr_pages(folio) - 1; 1830 1824 1831 1825 if (folio_contain_hwpoisoned_page(folio)) { 1832 1826 if (WARN_ON(folio_test_lru(folio)))
+6 -6
mm/mm_init.c
··· 984 984 } 985 985 } 986 986 987 - #ifdef CONFIG_SPARSEMEM 988 987 /* 989 988 * Initialize the memory map for hole in the range [memory_end, 990 - * section_end]. 989 + * section_end] for SPARSEMEM and in the range [memory_end, memmap_end] 990 + * for FLATMEM. 991 991 * Append the pages in this hole to the highest zone in the last 992 992 * node. 993 - * The call to init_unavailable_range() is outside the ifdef to 994 - * silence the compiler warining about zone_id set but not used; 995 - * for FLATMEM it is a nop anyway 996 993 */ 994 + #ifdef CONFIG_SPARSEMEM 997 995 end_pfn = round_up(end_pfn, PAGES_PER_SECTION); 998 - if (hole_pfn < end_pfn) 996 + #else 997 + end_pfn = round_up(end_pfn, MAX_ORDER_NR_PAGES); 999 998 #endif 999 + if (hole_pfn < end_pfn) 1000 1000 init_unavailable_range(hole_pfn, end_pfn, zone_id, nid); 1001 1001 } 1002 1002
+2 -1
mm/mremap.c
··· 1561 1561 * adjacent to the expanded vma and otherwise 1562 1562 * compatible. 1563 1563 */ 1564 - vma = vrm->vma = vma_merge_extend(&vmi, vma, vrm->delta); 1564 + vma = vma_merge_extend(&vmi, vma, vrm->delta); 1565 1565 if (!vma) { 1566 1566 vrm_uncharge(vrm); 1567 1567 return -ENOMEM; 1568 1568 } 1569 + vrm->vma = vma; 1569 1570 1570 1571 vrm_stat_account(vrm, vrm->delta); 1571 1572
+3 -3
mm/page_alloc.c
··· 1593 1593 1594 1594 static void check_new_page_bad(struct page *page) 1595 1595 { 1596 - if (unlikely(page->flags & __PG_HWPOISON)) { 1596 + if (unlikely(PageHWPoison(page))) { 1597 1597 /* Don't complain about hwpoisoned pages */ 1598 1598 if (PageBuddy(page)) 1599 1599 __ClearPageBuddy(page); ··· 4604 4604 goto retry; 4605 4605 4606 4606 /* Reclaim/compaction failed to prevent the fallback */ 4607 - if (defrag_mode) { 4608 - alloc_flags &= ALLOC_NOFRAGMENT; 4607 + if (defrag_mode && (alloc_flags & ALLOC_NOFRAGMENT)) { 4608 + alloc_flags &= ~ALLOC_NOFRAGMENT; 4609 4609 goto retry; 4610 4610 } 4611 4611
+21
security/Kconfig
··· 51 51 52 52 endchoice 53 53 54 + config MSEAL_SYSTEM_MAPPINGS 55 + bool "mseal system mappings" 56 + depends on 64BIT 57 + depends on ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS 58 + depends on !CHECKPOINT_RESTORE 59 + help 60 + Apply mseal on system mappings. 61 + The system mappings includes vdso, vvar, vvar_vclock, 62 + vectors (arm compat-mode), sigpage (arm compat-mode), uprobes. 63 + 64 + A 64-bit kernel is required for the memory sealing feature. 65 + No specific hardware features from the CPU are needed. 66 + 67 + WARNING: This feature breaks programs which rely on relocating 68 + or unmapping system mappings. Known broken software at the time 69 + of writing includes CHECKPOINT_RESTORE, UML, gVisor, rr. Therefore 70 + this config can't be enabled universally. 71 + 72 + For complete descriptions of memory sealing, please see 73 + Documentation/userspace-api/mseal.rst 74 + 54 75 config SECURITY 55 76 bool "Enable different security models" 56 77 depends on SYSFS
+1
tools/testing/selftests/Makefile
··· 62 62 TARGETS += mount_setattr 63 63 TARGETS += move_mount_set_group 64 64 TARGETS += mqueue 65 + TARGETS += mseal_system_mappings 65 66 TARGETS += nci 66 67 TARGETS += net 67 68 TARGETS += net/af_unix
+28
tools/testing/selftests/mm/va_high_addr_switch.sh
··· 41 41 fi 42 42 } 43 43 44 + check_supported_ppc64() 45 + { 46 + local config="/proc/config.gz" 47 + [[ -f "${config}" ]] || config="/boot/config-$(uname -r)" 48 + [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot" 49 + 50 + local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) 51 + if [[ "${pg_table_levels}" -lt 5 ]]; then 52 + echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" 53 + exit $ksft_skip 54 + fi 55 + 56 + local mmu_support=$(grep -m1 "mmu" /proc/cpuinfo | awk '{print $3}') 57 + if [[ "$mmu_support" != "radix" ]]; then 58 + echo "$0: System does not use Radix MMU, required for 5-level paging" 59 + exit $ksft_skip 60 + fi 61 + 62 + local hugepages_total=$(awk '/HugePages_Total/ {print $2}' /proc/meminfo) 63 + if [[ "${hugepages_total}" -eq 0 ]]; then 64 + echo "$0: HugePages are not enabled, required for some tests" 65 + exit $ksft_skip 66 + fi 67 + } 68 + 44 69 check_test_requirements() 45 70 { 46 71 # The test supports x86_64 and powerpc64. We currently have no useful ··· 74 49 case `uname -m` in 75 50 "x86_64") 76 51 check_supported_x86_64 52 + ;; 53 + "ppc64le"|"ppc64") 54 + check_supported_ppc64 77 55 ;; 78 56 *) 79 57 return 0
+2
tools/testing/selftests/mseal_system_mappings/.gitignore
··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + sysmap_is_sealed
+6
tools/testing/selftests/mseal_system_mappings/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES) 3 + 4 + TEST_GEN_PROGS := sysmap_is_sealed 5 + 6 + include ../lib.mk
+1
tools/testing/selftests/mseal_system_mappings/config
··· 1 + CONFIG_MSEAL_SYSTEM_MAPPINGS=y
+119
tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * test system mappings are sealed when 4 + * KCONFIG_MSEAL_SYSTEM_MAPPINGS=y 5 + */ 6 + 7 + #define _GNU_SOURCE 8 + #include <stdio.h> 9 + #include <errno.h> 10 + #include <unistd.h> 11 + #include <string.h> 12 + #include <stdbool.h> 13 + 14 + #include "../kselftest.h" 15 + #include "../kselftest_harness.h" 16 + 17 + #define VMFLAGS "VmFlags:" 18 + #define MSEAL_FLAGS "sl" 19 + #define MAX_LINE_LEN 512 20 + 21 + bool has_mapping(char *name, FILE *maps) 22 + { 23 + char line[MAX_LINE_LEN]; 24 + 25 + while (fgets(line, sizeof(line), maps)) { 26 + if (strstr(line, name)) 27 + return true; 28 + } 29 + 30 + return false; 31 + } 32 + 33 + bool mapping_is_sealed(char *name, FILE *maps) 34 + { 35 + char line[MAX_LINE_LEN]; 36 + 37 + while (fgets(line, sizeof(line), maps)) { 38 + if (!strncmp(line, VMFLAGS, strlen(VMFLAGS))) { 39 + if (strstr(line, MSEAL_FLAGS)) 40 + return true; 41 + 42 + return false; 43 + } 44 + } 45 + 46 + return false; 47 + } 48 + 49 + FIXTURE(basic) { 50 + FILE *maps; 51 + }; 52 + 53 + FIXTURE_SETUP(basic) 54 + { 55 + self->maps = fopen("/proc/self/smaps", "r"); 56 + if (!self->maps) 57 + SKIP(return, "Could not open /proc/self/smap, errno=%d", 58 + errno); 59 + }; 60 + 61 + FIXTURE_TEARDOWN(basic) 62 + { 63 + if (self->maps) 64 + fclose(self->maps); 65 + }; 66 + 67 + FIXTURE_VARIANT(basic) 68 + { 69 + char *name; 70 + bool sealed; 71 + }; 72 + 73 + FIXTURE_VARIANT_ADD(basic, vdso) { 74 + .name = "[vdso]", 75 + .sealed = true, 76 + }; 77 + 78 + FIXTURE_VARIANT_ADD(basic, vvar) { 79 + .name = "[vvar]", 80 + .sealed = true, 81 + }; 82 + 83 + FIXTURE_VARIANT_ADD(basic, vvar_vclock) { 84 + .name = "[vvar_vclock]", 85 + .sealed = true, 86 + }; 87 + 88 + FIXTURE_VARIANT_ADD(basic, sigpage) { 89 + .name = "[sigpage]", 90 + .sealed = true, 91 + }; 92 + 93 + FIXTURE_VARIANT_ADD(basic, vectors) { 94 + .name = "[vectors]", 95 + .sealed = true, 96 + }; 97 + 98 + FIXTURE_VARIANT_ADD(basic, uprobes) { 99 + .name = "[uprobes]", 100 + .sealed = true, 101 + }; 102 + 103 + FIXTURE_VARIANT_ADD(basic, stack) { 104 + .name = "[stack]", 105 + .sealed = false, 106 + }; 107 + 108 + TEST_F(basic, check_sealed) 109 + { 110 + if (!has_mapping(variant->name, self->maps)) { 111 + SKIP(return, "could not find the mapping, %s", 112 + variant->name); 113 + } 114 + 115 + EXPECT_EQ(variant->sealed, 116 + mapping_is_sealed(variant->name, self->maps)); 117 + }; 118 + 119 + TEST_HARNESS_MAIN
+43
tools/testing/selftests/x86/test_mremap_vdso.c
··· 14 14 #include <errno.h> 15 15 #include <unistd.h> 16 16 #include <string.h> 17 + #include <stdbool.h> 17 18 18 19 #include <sys/mman.h> 19 20 #include <sys/auxv.h> ··· 56 55 57 56 } 58 57 58 + #define VDSO_NAME "[vdso]" 59 + #define VMFLAGS "VmFlags:" 60 + #define MSEAL_FLAGS "sl" 61 + #define MAX_LINE_LEN 512 62 + 63 + bool vdso_sealed(FILE *maps) 64 + { 65 + char line[MAX_LINE_LEN]; 66 + bool has_vdso = false; 67 + 68 + while (fgets(line, sizeof(line), maps)) { 69 + if (strstr(line, VDSO_NAME)) 70 + has_vdso = true; 71 + 72 + if (has_vdso && !strncmp(line, VMFLAGS, strlen(VMFLAGS))) { 73 + if (strstr(line, MSEAL_FLAGS)) 74 + return true; 75 + 76 + return false; 77 + } 78 + } 79 + 80 + return false; 81 + } 82 + 59 83 int main(int argc, char **argv, char **envp) 60 84 { 61 85 pid_t child; 86 + FILE *maps; 62 87 63 88 ksft_print_header(); 64 89 ksft_set_plan(1); 90 + 91 + maps = fopen("/proc/self/smaps", "r"); 92 + if (!maps) { 93 + ksft_test_result_skip( 94 + "Could not open /proc/self/smaps, errno=%d\n", 95 + errno); 96 + 97 + return 0; 98 + } 99 + 100 + if (vdso_sealed(maps)) { 101 + ksft_test_result_skip("vdso is sealed\n"); 102 + return 0; 103 + } 104 + 105 + fclose(maps); 65 106 66 107 child = fork(); 67 108 if (child == -1)