Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'mm-stable-2026-02-18-19-48' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull more MM updates from Andrew Morton:

- "mm/vmscan: fix demotion targets checks in reclaim/demotion" fixes a
couple of issues in the demotion code - pages were failed demotion
and were finding themselves demoted into disallowed nodes (Bing Jiao)

- "Remove XA_ZERO from error recovery of dup_mmap()" fixes a rare
mapledtree race and performs a number of cleanups (Liam Howlett)

- "mm: add bitmap VMA flag helpers and convert all mmap_prepare to use
them" implements a lot of cleanups following on from the conversion
of the VMA flags into a bitmap (Lorenzo Stoakes)

- "support batch checking of references and unmapping for large folios"
implements batching to greatly improve the performance of reclaiming
clean file-backed large folios (Baolin Wang)

- "selftests/mm: add memory failure selftests" does as claimed (Miaohe
Lin)

* tag 'mm-stable-2026-02-18-19-48' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (36 commits)
mm/page_alloc: clear page->private in free_pages_prepare()
selftests/mm: add memory failure dirty pagecache test
selftests/mm: add memory failure clean pagecache test
selftests/mm: add memory failure anonymous page test
mm: rmap: support batched unmapping for file large folios
arm64: mm: implement the architecture-specific clear_flush_young_ptes()
arm64: mm: support batch clearing of the young flag for large folios
arm64: mm: factor out the address and ptep alignment into a new helper
mm: rmap: support batched checks of the references for large folios
tools/testing/vma: add VMA userland tests for VMA flag functions
tools/testing/vma: separate out vma_internal.h into logical headers
tools/testing/vma: separate VMA userland tests into separate files
mm: make vm_area_desc utilise vma_flags_t only
mm: update all remaining mmap_prepare users to use vma_flags_t
mm: update shmem_[kernel]_file_*() functions to use vma_flags_t
mm: update secretmem to use VMA flags on mmap_prepare
mm: update hugetlbfs to use VMA flags on mmap_prepare
mm: add basic VMA flag operation helper functions
tools: bitmap: add missing bitmap_[subset(), andnot()]
mm: add mk_vma_flags() bitmap flag macro helper
...

+3941 -2521
+1
MAINTAINERS
··· 11845 11845 F: include/trace/events/memory-failure.h 11846 11846 F: mm/hwpoison-inject.c 11847 11847 F: mm/memory-failure.c 11848 + F: tools/testing/selftests/mm/memory-failure.c 11848 11849 11849 11850 HYCON HY46XX TOUCHSCREEN SUPPORT 11850 11851 M: Giulio Benetti <giulio.benetti@benettiengineering.com>
+17 -6
arch/arm64/include/asm/pgtable.h
··· 1648 1648 extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm, 1649 1649 unsigned long addr, pte_t *ptep, 1650 1650 unsigned int nr, int full); 1651 - extern int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma, 1652 - unsigned long addr, pte_t *ptep); 1653 - extern int contpte_ptep_clear_flush_young(struct vm_area_struct *vma, 1654 - unsigned long addr, pte_t *ptep); 1651 + int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma, 1652 + unsigned long addr, pte_t *ptep, unsigned int nr); 1653 + int contpte_clear_flush_young_ptes(struct vm_area_struct *vma, 1654 + unsigned long addr, pte_t *ptep, unsigned int nr); 1655 1655 extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr, 1656 1656 pte_t *ptep, unsigned int nr); 1657 1657 extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma, ··· 1823 1823 if (likely(!pte_valid_cont(orig_pte))) 1824 1824 return __ptep_test_and_clear_young(vma, addr, ptep); 1825 1825 1826 - return contpte_ptep_test_and_clear_young(vma, addr, ptep); 1826 + return contpte_test_and_clear_young_ptes(vma, addr, ptep, 1); 1827 1827 } 1828 1828 1829 1829 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH ··· 1835 1835 if (likely(!pte_valid_cont(orig_pte))) 1836 1836 return __ptep_clear_flush_young(vma, addr, ptep); 1837 1837 1838 - return contpte_ptep_clear_flush_young(vma, addr, ptep); 1838 + return contpte_clear_flush_young_ptes(vma, addr, ptep, 1); 1839 + } 1840 + 1841 + #define clear_flush_young_ptes clear_flush_young_ptes 1842 + static inline int clear_flush_young_ptes(struct vm_area_struct *vma, 1843 + unsigned long addr, pte_t *ptep, 1844 + unsigned int nr) 1845 + { 1846 + if (likely(nr == 1 && !pte_cont(__ptep_get(ptep)))) 1847 + return __ptep_clear_flush_young(vma, addr, ptep); 1848 + 1849 + return contpte_clear_flush_young_ptes(vma, addr, ptep, nr); 1839 1850 } 1840 1851 1841 1852 #define wrprotect_ptes wrprotect_ptes
+40 -22
arch/arm64/mm/contpte.c
··· 26 26 return PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES); 27 27 } 28 28 29 + static inline pte_t *contpte_align_addr_ptep(unsigned long *start, 30 + unsigned long *end, pte_t *ptep, 31 + unsigned int nr) 32 + { 33 + /* 34 + * Note: caller must ensure these nr PTEs are consecutive (present) 35 + * PTEs that map consecutive pages of the same large folio within a 36 + * single VMA and a single page table. 37 + */ 38 + if (pte_cont(__ptep_get(ptep + nr - 1))) 39 + *end = ALIGN(*end, CONT_PTE_SIZE); 40 + 41 + if (pte_cont(__ptep_get(ptep))) { 42 + *start = ALIGN_DOWN(*start, CONT_PTE_SIZE); 43 + ptep = contpte_align_down(ptep); 44 + } 45 + 46 + return ptep; 47 + } 48 + 29 49 static void contpte_try_unfold_partial(struct mm_struct *mm, unsigned long addr, 30 50 pte_t *ptep, unsigned int nr) 31 51 { ··· 508 488 } 509 489 EXPORT_SYMBOL_GPL(contpte_get_and_clear_full_ptes); 510 490 511 - int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma, 512 - unsigned long addr, pte_t *ptep) 491 + int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma, 492 + unsigned long addr, pte_t *ptep, 493 + unsigned int nr) 513 494 { 514 495 /* 515 496 * ptep_clear_flush_young() technically requires us to clear the access ··· 519 498 * contig range when the range is covered by a single folio, we can get 520 499 * away with clearing young for the whole contig range here, so we avoid 521 500 * having to unfold. 501 + * 502 + * The 'nr' means consecutive (present) PTEs that map consecutive pages 503 + * of the same large folio in a single VMA and a single page table. 522 504 */ 523 505 506 + unsigned long end = addr + nr * PAGE_SIZE; 524 507 int young = 0; 525 - int i; 526 508 527 - ptep = contpte_align_down(ptep); 528 - addr = ALIGN_DOWN(addr, CONT_PTE_SIZE); 529 - 530 - for (i = 0; i < CONT_PTES; i++, ptep++, addr += PAGE_SIZE) 509 + ptep = contpte_align_addr_ptep(&addr, &end, ptep, nr); 510 + for (; addr != end; ptep++, addr += PAGE_SIZE) 531 511 young |= __ptep_test_and_clear_young(vma, addr, ptep); 532 512 533 513 return young; 534 514 } 535 - EXPORT_SYMBOL_GPL(contpte_ptep_test_and_clear_young); 515 + EXPORT_SYMBOL_GPL(contpte_test_and_clear_young_ptes); 536 516 537 - int contpte_ptep_clear_flush_young(struct vm_area_struct *vma, 538 - unsigned long addr, pte_t *ptep) 517 + int contpte_clear_flush_young_ptes(struct vm_area_struct *vma, 518 + unsigned long addr, pte_t *ptep, 519 + unsigned int nr) 539 520 { 540 521 int young; 541 522 542 - young = contpte_ptep_test_and_clear_young(vma, addr, ptep); 523 + young = contpte_test_and_clear_young_ptes(vma, addr, ptep, nr); 543 524 544 525 if (young) { 526 + unsigned long end = addr + nr * PAGE_SIZE; 527 + 528 + contpte_align_addr_ptep(&addr, &end, ptep, nr); 545 529 /* 546 530 * See comment in __ptep_clear_flush_young(); same rationale for 547 531 * eliding the trailing DSB applies here. 548 532 */ 549 - addr = ALIGN_DOWN(addr, CONT_PTE_SIZE); 550 - __flush_tlb_range_nosync(vma->vm_mm, addr, addr + CONT_PTE_SIZE, 533 + __flush_tlb_range_nosync(vma->vm_mm, addr, end, 551 534 PAGE_SIZE, true, 3); 552 535 } 553 536 554 537 return young; 555 538 } 556 - EXPORT_SYMBOL_GPL(contpte_ptep_clear_flush_young); 539 + EXPORT_SYMBOL_GPL(contpte_clear_flush_young_ptes); 557 540 558 541 void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr, 559 542 pte_t *ptep, unsigned int nr) ··· 594 569 unsigned long start = addr; 595 570 unsigned long end = start + nr * PAGE_SIZE; 596 571 597 - if (pte_cont(__ptep_get(ptep + nr - 1))) 598 - end = ALIGN(end, CONT_PTE_SIZE); 599 - 600 - if (pte_cont(__ptep_get(ptep))) { 601 - start = ALIGN_DOWN(start, CONT_PTE_SIZE); 602 - ptep = contpte_align_down(ptep); 603 - } 604 - 572 + ptep = contpte_align_addr_ptep(&start, &end, ptep, nr); 605 573 __clear_young_dirty_ptes(vma, start, ptep, (end - start) / PAGE_SIZE, flags); 606 574 } 607 575 EXPORT_SYMBOL_GPL(contpte_clear_young_dirty_ptes);
+1 -1
arch/x86/kernel/cpu/sgx/ioctl.c
··· 83 83 encl_size = secs->size + PAGE_SIZE; 84 84 85 85 backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5), 86 - VM_NORESERVE); 86 + mk_vma_flags(VMA_NORESERVE_BIT)); 87 87 if (IS_ERR(backing)) { 88 88 ret = PTR_ERR(backing); 89 89 goto err_out_shrink;
+3 -3
drivers/char/mem.c
··· 306 306 /* can't do an in-place private mapping if there's no MMU */ 307 307 static inline int private_mapping_ok(struct vm_area_desc *desc) 308 308 { 309 - return is_nommu_shared_mapping(desc->vm_flags); 309 + return is_nommu_shared_vma_flags(&desc->vma_flags); 310 310 } 311 311 #else 312 312 ··· 360 360 361 361 desc->vm_ops = &mmap_mem_ops; 362 362 363 - /* Remap-pfn-range will mark the range VM_IO. */ 363 + /* Remap-pfn-range will mark the range with the I/O flag. */ 364 364 mmap_action_remap_full(desc, desc->pgoff); 365 365 /* We filter remap errors to -EAGAIN. */ 366 366 desc->action.error_hook = mmap_filter_error; ··· 520 520 #ifndef CONFIG_MMU 521 521 return -ENOSYS; 522 522 #endif 523 - if (desc->vm_flags & VM_SHARED) 523 + if (vma_desc_test_flags(desc, VMA_SHARED_BIT)) 524 524 return shmem_zero_setup_desc(desc); 525 525 526 526 desc->action.success_hook = mmap_zero_private_success;
+5 -5
drivers/dax/device.c
··· 13 13 #include "dax-private.h" 14 14 #include "bus.h" 15 15 16 - static int __check_vma(struct dev_dax *dev_dax, vm_flags_t vm_flags, 16 + static int __check_vma(struct dev_dax *dev_dax, vma_flags_t flags, 17 17 unsigned long start, unsigned long end, struct file *file, 18 18 const char *func) 19 19 { ··· 24 24 return -ENXIO; 25 25 26 26 /* prevent private mappings from being established */ 27 - if ((vm_flags & VM_MAYSHARE) != VM_MAYSHARE) { 27 + if (!vma_flags_test(&flags, VMA_MAYSHARE_BIT)) { 28 28 dev_info_ratelimited(dev, 29 29 "%s: %s: fail, attempted private mapping\n", 30 30 current->comm, func); ··· 53 53 static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, 54 54 const char *func) 55 55 { 56 - return __check_vma(dev_dax, vma->vm_flags, vma->vm_start, vma->vm_end, 56 + return __check_vma(dev_dax, vma->flags, vma->vm_start, vma->vm_end, 57 57 vma->vm_file, func); 58 58 } 59 59 ··· 306 306 * fault time. 307 307 */ 308 308 id = dax_read_lock(); 309 - rc = __check_vma(dev_dax, desc->vm_flags, desc->start, desc->end, filp, 309 + rc = __check_vma(dev_dax, desc->vma_flags, desc->start, desc->end, filp, 310 310 __func__); 311 311 dax_read_unlock(id); 312 312 if (rc) 313 313 return rc; 314 314 315 315 desc->vm_ops = &dax_vm_ops; 316 - desc->vm_flags |= VM_HUGEPAGE; 316 + vma_desc_set_flags(desc, VMA_HUGEPAGE_BIT); 317 317 return 0; 318 318 } 319 319
+3 -2
drivers/gpu/drm/drm_gem.c
··· 186 186 { 187 187 struct vfsmount *huge_mnt; 188 188 struct file *filp; 189 + const vma_flags_t flags = mk_vma_flags(VMA_NORESERVE_BIT); 189 190 190 191 drm_gem_private_object_init(dev, obj, size); 191 192 192 193 huge_mnt = drm_gem_get_huge_mnt(dev); 193 194 if (huge_mnt) 194 195 filp = shmem_file_setup_with_mnt(huge_mnt, "drm mm object", 195 - size, VM_NORESERVE); 196 + size, flags); 196 197 else 197 - filp = shmem_file_setup("drm mm object", size, VM_NORESERVE); 198 + filp = shmem_file_setup("drm mm object", size, flags); 198 199 199 200 if (IS_ERR(filp)) 200 201 return PTR_ERR(filp);
+1 -1
drivers/gpu/drm/i915/gem/i915_gem_shmem.c
··· 499 499 resource_size_t size, 500 500 unsigned int flags) 501 501 { 502 - unsigned long shmem_flags = VM_NORESERVE; 502 + const vma_flags_t shmem_flags = mk_vma_flags(VMA_NORESERVE_BIT); 503 503 struct vfsmount *huge_mnt; 504 504 struct file *filp; 505 505
+2 -1
drivers/gpu/drm/i915/gem/i915_gem_ttm.c
··· 200 200 struct address_space *mapping; 201 201 gfp_t mask; 202 202 203 - filp = shmem_file_setup("i915-shmem-tt", size, VM_NORESERVE); 203 + filp = shmem_file_setup("i915-shmem-tt", size, 204 + mk_vma_flags(VMA_NORESERVE_BIT)); 204 205 if (IS_ERR(filp)) 205 206 return PTR_ERR(filp); 206 207
+2 -1
drivers/gpu/drm/i915/gt/shmem_utils.c
··· 19 19 struct file *file; 20 20 int err; 21 21 22 - file = shmem_file_setup(name, PAGE_ALIGN(len), VM_NORESERVE); 22 + file = shmem_file_setup(name, PAGE_ALIGN(len), 23 + mk_vma_flags(VMA_NORESERVE_BIT)); 23 24 if (IS_ERR(file)) 24 25 return file; 25 26
+1 -1
drivers/gpu/drm/ttm/tests/ttm_tt_test.c
··· 143 143 err = ttm_tt_init(tt, bo, 0, caching, 0); 144 144 KUNIT_ASSERT_EQ(test, err, 0); 145 145 146 - shmem = shmem_file_setup("ttm swap", BO_SIZE, 0); 146 + shmem = shmem_file_setup("ttm swap", BO_SIZE, EMPTY_VMA_FLAGS); 147 147 tt->swap_storage = shmem; 148 148 149 149 ttm_tt_fini(tt);
+2 -1
drivers/gpu/drm/ttm/ttm_backup.c
··· 178 178 */ 179 179 struct file *ttm_backup_shmem_create(loff_t size) 180 180 { 181 - return shmem_file_setup("ttm shmem backup", size, 0); 181 + return shmem_file_setup("ttm shmem backup", size, 182 + EMPTY_VMA_FLAGS); 182 183 }
+1 -1
drivers/gpu/drm/ttm/ttm_tt.c
··· 330 330 struct page *to_page; 331 331 int i, ret; 332 332 333 - swap_storage = shmem_file_setup("ttm swap", size, 0); 333 + swap_storage = shmem_file_setup("ttm swap", size, EMPTY_VMA_FLAGS); 334 334 if (IS_ERR(swap_storage)) { 335 335 pr_err("Failed allocating swap storage\n"); 336 336 return PTR_ERR(swap_storage);
+1 -1
fs/aio.c
··· 394 394 395 395 static int aio_ring_mmap_prepare(struct vm_area_desc *desc) 396 396 { 397 - desc->vm_flags |= VM_DONTEXPAND; 397 + vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT); 398 398 desc->vm_ops = &aio_ring_vm_ops; 399 399 return 0; 400 400 }
+3 -2
fs/erofs/data.c
··· 473 473 if (!IS_DAX(file_inode(desc->file))) 474 474 return generic_file_readonly_mmap_prepare(desc); 475 475 476 - if ((desc->vm_flags & VM_SHARED) && (desc->vm_flags & VM_MAYWRITE)) 476 + if (vma_desc_test_flags(desc, VMA_SHARED_BIT) && 477 + vma_desc_test_flags(desc, VMA_MAYWRITE_BIT)) 477 478 return -EINVAL; 478 479 479 480 desc->vm_ops = &erofs_dax_vm_ops; 480 - desc->vm_flags |= VM_HUGEPAGE; 481 + vma_desc_set_flags(desc, VMA_HUGEPAGE_BIT); 481 482 return 0; 482 483 } 483 484 #else
+2 -2
fs/ext4/file.c
··· 818 818 * We don't support synchronous mappings for non-DAX files and 819 819 * for DAX files if underneath dax_device is not synchronous. 820 820 */ 821 - if (!daxdev_mapping_supported(desc->vm_flags, file_inode(file), dax_dev)) 821 + if (!daxdev_mapping_supported(desc, file_inode(file), dax_dev)) 822 822 return -EOPNOTSUPP; 823 823 824 824 file_accessed(file); 825 825 if (IS_DAX(file_inode(file))) { 826 826 desc->vm_ops = &ext4_dax_vm_ops; 827 - desc->vm_flags |= VM_HUGEPAGE; 827 + vma_desc_set_flags(desc, VMA_HUGEPAGE_BIT); 828 828 } else { 829 829 desc->vm_ops = &ext4_file_vm_ops; 830 830 }
+7 -7
fs/hugetlbfs/inode.c
··· 109 109 loff_t len, vma_len; 110 110 int ret; 111 111 struct hstate *h = hstate_file(file); 112 - vm_flags_t vm_flags; 112 + vma_flags_t vma_flags; 113 113 114 114 /* 115 115 * vma address alignment (but not the pgoff alignment) has ··· 119 119 * way when do_mmap unwinds (may be important on powerpc 120 120 * and ia64). 121 121 */ 122 - desc->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; 122 + vma_desc_set_flags(desc, VMA_HUGETLB_BIT, VMA_DONTEXPAND_BIT); 123 123 desc->vm_ops = &hugetlb_vm_ops; 124 124 125 125 /* ··· 148 148 149 149 ret = -ENOMEM; 150 150 151 - vm_flags = desc->vm_flags; 151 + vma_flags = desc->vma_flags; 152 152 /* 153 153 * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip 154 154 * reserving here. Note: only for SHM hugetlbfs file, the inode 155 155 * flag S_PRIVATE is set. 156 156 */ 157 157 if (inode->i_flags & S_PRIVATE) 158 - vm_flags |= VM_NORESERVE; 158 + vma_flags_set(&vma_flags, VMA_NORESERVE_BIT); 159 159 160 160 if (hugetlb_reserve_pages(inode, 161 161 desc->pgoff >> huge_page_order(h), 162 162 len >> huge_page_shift(h), desc, 163 - vm_flags) < 0) 163 + vma_flags) < 0) 164 164 goto out; 165 165 166 166 ret = 0; 167 - if ((desc->vm_flags & VM_WRITE) && inode->i_size < len) 167 + if (vma_desc_test_flags(desc, VMA_WRITE_BIT) && inode->i_size < len) 168 168 i_size_write(inode, len); 169 169 out: 170 170 inode_unlock(inode); ··· 1527 1527 * otherwise hugetlb_reserve_pages reserves one less hugepages than intended. 1528 1528 */ 1529 1529 struct file *hugetlb_file_setup(const char *name, size_t size, 1530 - vm_flags_t acctflag, int creat_flags, 1530 + vma_flags_t acctflag, int creat_flags, 1531 1531 int page_size_log) 1532 1532 { 1533 1533 struct inode *inode;
+1 -1
fs/ntfs3/file.c
··· 276 276 struct file *file = desc->file; 277 277 struct inode *inode = file_inode(file); 278 278 struct ntfs_inode *ni = ntfs_i(inode); 279 - bool rw = desc->vm_flags & VM_WRITE; 279 + const bool rw = vma_desc_test_flags(desc, VMA_WRITE_BIT); 280 280 int err; 281 281 282 282 /* Avoid any operation if inode is bad. */
+2 -2
fs/orangefs/file.c
··· 411 411 "orangefs_file_mmap: called on %pD\n", file); 412 412 413 413 /* set the sequential readahead hint */ 414 - desc->vm_flags |= VM_SEQ_READ; 415 - desc->vm_flags &= ~VM_RAND_READ; 414 + vma_desc_set_flags(desc, VMA_SEQ_READ_BIT); 415 + vma_desc_clear_flags(desc, VMA_RAND_READ_BIT); 416 416 417 417 file_accessed(file); 418 418 desc->vm_ops = &orangefs_file_vm_ops;
+1 -1
fs/ramfs/file-nommu.c
··· 264 264 */ 265 265 static int ramfs_nommu_mmap_prepare(struct vm_area_desc *desc) 266 266 { 267 - if (!is_nommu_shared_mapping(desc->vm_flags)) 267 + if (!is_nommu_shared_vma_flags(&desc->vma_flags)) 268 268 return -ENOSYS; 269 269 270 270 file_accessed(desc->file);
+1 -1
fs/resctrl/pseudo_lock.c
··· 1044 1044 * Ensure changes are carried directly to the memory being mapped, 1045 1045 * do not allow copy-on-write mapping. 1046 1046 */ 1047 - if (!(desc->vm_flags & VM_SHARED)) { 1047 + if (!vma_desc_test_flags(desc, VMA_SHARED_BIT)) { 1048 1048 mutex_unlock(&rdtgroup_mutex); 1049 1049 return -EINVAL; 1050 1050 }
+1 -1
fs/romfs/mmap-nommu.c
··· 63 63 */ 64 64 static int romfs_mmap_prepare(struct vm_area_desc *desc) 65 65 { 66 - return is_nommu_shared_mapping(desc->vm_flags) ? 0 : -ENOSYS; 66 + return is_nommu_shared_vma_flags(&desc->vma_flags) ? 0 : -ENOSYS; 67 67 } 68 68 69 69 static unsigned romfs_mmap_capabilities(struct file *file)
+2 -1
fs/xfs/scrub/xfile.c
··· 61 61 if (!xf) 62 62 return -ENOMEM; 63 63 64 - xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE); 64 + xf->file = shmem_kernel_file_setup(description, isize, 65 + mk_vma_flags(VMA_NORESERVE_BIT)); 65 66 if (IS_ERR(xf->file)) { 66 67 error = PTR_ERR(xf->file); 67 68 goto out_xfile;
+1 -1
fs/xfs/xfs_buf_mem.c
··· 62 62 if (!btp) 63 63 return -ENOMEM; 64 64 65 - file = shmem_kernel_file_setup(descr, 0, 0); 65 + file = shmem_kernel_file_setup(descr, 0, EMPTY_VMA_FLAGS); 66 66 if (IS_ERR(file)) { 67 67 error = PTR_ERR(file); 68 68 goto out_free_btp;
+2 -2
fs/xfs/xfs_file.c
··· 2010 2010 * We don't support synchronous mappings for non-DAX files and 2011 2011 * for DAX files if underneath dax_device is not synchronous. 2012 2012 */ 2013 - if (!daxdev_mapping_supported(desc->vm_flags, file_inode(file), 2013 + if (!daxdev_mapping_supported(desc, file_inode(file), 2014 2014 target->bt_daxdev)) 2015 2015 return -EOPNOTSUPP; 2016 2016 2017 2017 file_accessed(file); 2018 2018 desc->vm_ops = &xfs_file_vm_ops; 2019 2019 if (IS_DAX(inode)) 2020 - desc->vm_flags |= VM_HUGEPAGE; 2020 + vma_desc_set_flags(desc, VMA_HUGEPAGE_BIT); 2021 2021 return 0; 2022 2022 } 2023 2023
+2 -1
fs/zonefs/file.c
··· 333 333 * ordering between msync() and page cache writeback. 334 334 */ 335 335 if (zonefs_inode_is_seq(file_inode(file)) && 336 - (desc->vm_flags & VM_SHARED) && (desc->vm_flags & VM_MAYWRITE)) 336 + vma_desc_test_flags(desc, VMA_SHARED_BIT) && 337 + vma_desc_test_flags(desc, VMA_MAYWRITE_BIT)) 337 338 return -EINVAL; 338 339 339 340 file_accessed(file);
+3 -3
include/linux/cpuset.h
··· 176 176 task_unlock(current); 177 177 } 178 178 179 - extern bool cpuset_node_allowed(struct cgroup *cgroup, int nid); 179 + extern void cpuset_nodes_allowed(struct cgroup *cgroup, nodemask_t *mask); 180 180 #else /* !CONFIG_CPUSETS */ 181 181 182 182 static inline bool cpusets_enabled(void) { return false; } ··· 299 299 return false; 300 300 } 301 301 302 - static inline bool cpuset_node_allowed(struct cgroup *cgroup, int nid) 302 + static inline void cpuset_nodes_allowed(struct cgroup *cgroup, nodemask_t *mask) 303 303 { 304 - return true; 304 + nodes_copy(*mask, node_states[N_MEMORY]); 305 305 } 306 306 #endif /* !CONFIG_CPUSETS */ 307 307
+4 -4
include/linux/dax.h
··· 65 65 /* 66 66 * Check if given mapping is supported by the file / underlying device. 67 67 */ 68 - static inline bool daxdev_mapping_supported(vm_flags_t vm_flags, 68 + static inline bool daxdev_mapping_supported(const struct vm_area_desc *desc, 69 69 const struct inode *inode, 70 70 struct dax_device *dax_dev) 71 71 { 72 - if (!(vm_flags & VM_SYNC)) 72 + if (!vma_desc_test_flags(desc, VMA_SYNC_BIT)) 73 73 return true; 74 74 if (!IS_DAX(inode)) 75 75 return false; ··· 111 111 static inline void set_dax_synchronous(struct dax_device *dax_dev) 112 112 { 113 113 } 114 - static inline bool daxdev_mapping_supported(vm_flags_t vm_flags, 114 + static inline bool daxdev_mapping_supported(const struct vm_area_desc *desc, 115 115 const struct inode *inode, 116 116 struct dax_device *dax_dev) 117 117 { 118 - return !(vm_flags & VM_SYNC); 118 + return !vma_desc_test_flags(desc, VMA_SYNC_BIT); 119 119 } 120 120 static inline size_t dax_recovery_write(struct dax_device *dax_dev, 121 121 pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
+3 -3
include/linux/hugetlb.h
··· 148 148 struct folio **foliop); 149 149 #endif /* CONFIG_USERFAULTFD */ 150 150 long hugetlb_reserve_pages(struct inode *inode, long from, long to, 151 - struct vm_area_desc *desc, vm_flags_t vm_flags); 151 + struct vm_area_desc *desc, vma_flags_t vma_flags); 152 152 long hugetlb_unreserve_pages(struct inode *inode, long start, long end, 153 153 long freed); 154 154 bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list); ··· 527 527 } 528 528 529 529 extern const struct vm_operations_struct hugetlb_vm_ops; 530 - struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, 530 + struct file *hugetlb_file_setup(const char *name, size_t size, vma_flags_t acct, 531 531 int creat_flags, int page_size_log); 532 532 533 533 static inline bool is_file_hugepages(const struct file *file) ··· 543 543 544 544 #define is_file_hugepages(file) false 545 545 static inline struct file * 546 - hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, 546 + hugetlb_file_setup(const char *name, size_t size, vma_flags_t acctflag, 547 547 int creat_flags, int page_size_log) 548 548 { 549 549 return ERR_PTR(-ENOSYS);
+10
include/linux/hugetlb_inline.h
··· 11 11 return !!(vm_flags & VM_HUGETLB); 12 12 } 13 13 14 + static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags) 15 + { 16 + return vma_flags_test(flags, VMA_HUGETLB_BIT); 17 + } 18 + 14 19 #else 15 20 16 21 static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags) 22 + { 23 + return false; 24 + } 25 + 26 + static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags) 17 27 { 18 28 return false; 19 29 }
+3 -3
include/linux/memcontrol.h
··· 1758 1758 rcu_read_unlock(); 1759 1759 } 1760 1760 1761 - bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid); 1761 + void mem_cgroup_node_filter_allowed(struct mem_cgroup *memcg, nodemask_t *mask); 1762 1762 1763 1763 void mem_cgroup_show_protected_memory(struct mem_cgroup *memcg); 1764 1764 ··· 1829 1829 return 0; 1830 1830 } 1831 1831 1832 - static inline bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid) 1832 + static inline void mem_cgroup_node_filter_allowed(struct mem_cgroup *memcg, 1833 + nodemask_t *mask) 1833 1834 { 1834 - return true; 1835 1835 } 1836 1836 1837 1837 static inline void mem_cgroup_show_protected_memory(struct mem_cgroup *memcg)
+3 -3
include/linux/memory-tiers.h
··· 53 53 struct list_head *memory_types); 54 54 void mt_put_memory_types(struct list_head *memory_types); 55 55 #ifdef CONFIG_MIGRATION 56 - int next_demotion_node(int node); 56 + int next_demotion_node(int node, const nodemask_t *allowed_mask); 57 57 void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); 58 58 bool node_is_toptier(int node); 59 59 #else 60 - static inline int next_demotion_node(int node) 60 + static inline int next_demotion_node(int node, const nodemask_t *allowed_mask) 61 61 { 62 62 return NUMA_NO_NODE; 63 63 } ··· 101 101 102 102 } 103 103 104 - static inline int next_demotion_node(int node) 104 + static inline int next_demotion_node(int node, const nodemask_t *allowed_mask) 105 105 { 106 106 return NUMA_NO_NODE; 107 107 }
+231 -21
include/linux/mm.h
··· 2 2 #ifndef _LINUX_MM_H 3 3 #define _LINUX_MM_H 4 4 5 + #include <linux/args.h> 5 6 #include <linux/errno.h> 6 7 #include <linux/mmdebug.h> 7 8 #include <linux/gfp.h> ··· 552 551 /* 553 552 * Physically remapped pages are special. Tell the 554 553 * rest of the world about it: 555 - * VM_IO tells people not to look at these pages 554 + * IO tells people not to look at these pages 556 555 * (accesses can have side effects). 557 - * VM_PFNMAP tells the core MM that the base pages are just 556 + * PFNMAP tells the core MM that the base pages are just 558 557 * raw PFN mappings, and do not have a "struct page" associated 559 558 * with them. 560 - * VM_DONTEXPAND 559 + * DONTEXPAND 561 560 * Disable vma merging and expanding with mremap(). 562 - * VM_DONTDUMP 561 + * DONTDUMP 563 562 * Omit vma from core dump, even when VM_IO turned off. 564 563 */ 565 - #define VM_REMAP_FLAGS (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP) 564 + #define VMA_REMAP_FLAGS mk_vma_flags(VMA_IO_BIT, VMA_PFNMAP_BIT, \ 565 + VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT) 566 566 567 567 /* This mask prevents VMA from being scanned with khugepaged */ 568 568 #define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB) ··· 947 945 * system word. 948 946 */ 949 947 if (NUM_VMA_FLAG_BITS > BITS_PER_LONG) { 950 - unsigned long *bitmap = ACCESS_PRIVATE(&vma->flags, __vma_flags); 948 + unsigned long *bitmap = vma->flags.__vma_flags; 951 949 952 950 bitmap_zero(&bitmap[1], NUM_VMA_FLAG_BITS - BITS_PER_LONG); 953 951 } ··· 991 989 __vm_flags_mod(vma, set, clear); 992 990 } 993 991 994 - static inline bool __vma_flag_atomic_valid(struct vm_area_struct *vma, 995 - vma_flag_t bit) 992 + static inline bool __vma_atomic_valid_flag(struct vm_area_struct *vma, vma_flag_t bit) 996 993 { 997 994 const vm_flags_t mask = BIT((__force int)bit); 998 995 ··· 1006 1005 * Set VMA flag atomically. Requires only VMA/mmap read lock. Only specific 1007 1006 * valid flags are allowed to do this. 1008 1007 */ 1009 - static inline void vma_flag_set_atomic(struct vm_area_struct *vma, 1010 - vma_flag_t bit) 1008 + static inline void vma_set_atomic_flag(struct vm_area_struct *vma, vma_flag_t bit) 1011 1009 { 1012 - unsigned long *bitmap = ACCESS_PRIVATE(&vma->flags, __vma_flags); 1010 + unsigned long *bitmap = vma->flags.__vma_flags; 1013 1011 1014 1012 vma_assert_stabilised(vma); 1015 - if (__vma_flag_atomic_valid(vma, bit)) 1013 + if (__vma_atomic_valid_flag(vma, bit)) 1016 1014 set_bit((__force int)bit, bitmap); 1017 1015 } 1018 1016 ··· 1022 1022 * This is necessarily racey, so callers must ensure that serialisation is 1023 1023 * achieved through some other means, or that races are permissible. 1024 1024 */ 1025 - static inline bool vma_flag_test_atomic(struct vm_area_struct *vma, 1026 - vma_flag_t bit) 1025 + static inline bool vma_test_atomic_flag(struct vm_area_struct *vma, vma_flag_t bit) 1027 1026 { 1028 - if (__vma_flag_atomic_valid(vma, bit)) 1027 + if (__vma_atomic_valid_flag(vma, bit)) 1029 1028 return test_bit((__force int)bit, &vma->vm_flags); 1030 1029 1031 1030 return false; 1032 1031 } 1032 + 1033 + /* Set an individual VMA flag in flags, non-atomically. */ 1034 + static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit) 1035 + { 1036 + unsigned long *bitmap = flags->__vma_flags; 1037 + 1038 + __set_bit((__force int)bit, bitmap); 1039 + } 1040 + 1041 + static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits) 1042 + { 1043 + vma_flags_t flags; 1044 + int i; 1045 + 1046 + vma_flags_clear_all(&flags); 1047 + for (i = 0; i < count; i++) 1048 + vma_flag_set(&flags, bits[i]); 1049 + return flags; 1050 + } 1051 + 1052 + /* 1053 + * Helper macro which bitwise-or combines the specified input flags into a 1054 + * vma_flags_t bitmap value. E.g.: 1055 + * 1056 + * vma_flags_t flags = mk_vma_flags(VMA_IO_BIT, VMA_PFNMAP_BIT, 1057 + * VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT); 1058 + * 1059 + * The compiler cleverly optimises away all of the work and this ends up being 1060 + * equivalent to aggregating the values manually. 1061 + */ 1062 + #define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \ 1063 + (const vma_flag_t []){__VA_ARGS__}) 1064 + 1065 + /* Test each of to_test flags in flags, non-atomically. */ 1066 + static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags, 1067 + vma_flags_t to_test) 1068 + { 1069 + const unsigned long *bitmap = flags->__vma_flags; 1070 + const unsigned long *bitmap_to_test = to_test.__vma_flags; 1071 + 1072 + return bitmap_intersects(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS); 1073 + } 1074 + 1075 + /* 1076 + * Test whether any specified VMA flag is set, e.g.: 1077 + * 1078 + * if (vma_flags_test(flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... } 1079 + */ 1080 + #define vma_flags_test(flags, ...) \ 1081 + vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__)) 1082 + 1083 + /* Test that ALL of the to_test flags are set, non-atomically. */ 1084 + static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags, 1085 + vma_flags_t to_test) 1086 + { 1087 + const unsigned long *bitmap = flags->__vma_flags; 1088 + const unsigned long *bitmap_to_test = to_test.__vma_flags; 1089 + 1090 + return bitmap_subset(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS); 1091 + } 1092 + 1093 + /* 1094 + * Test whether ALL specified VMA flags are set, e.g.: 1095 + * 1096 + * if (vma_flags_test_all(flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... } 1097 + */ 1098 + #define vma_flags_test_all(flags, ...) \ 1099 + vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__)) 1100 + 1101 + /* Set each of the to_set flags in flags, non-atomically. */ 1102 + static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set) 1103 + { 1104 + unsigned long *bitmap = flags->__vma_flags; 1105 + const unsigned long *bitmap_to_set = to_set.__vma_flags; 1106 + 1107 + bitmap_or(bitmap, bitmap, bitmap_to_set, NUM_VMA_FLAG_BITS); 1108 + } 1109 + 1110 + /* 1111 + * Set all specified VMA flags, e.g.: 1112 + * 1113 + * vma_flags_set(&flags, VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT); 1114 + */ 1115 + #define vma_flags_set(flags, ...) \ 1116 + vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__)) 1117 + 1118 + /* Clear all of the to-clear flags in flags, non-atomically. */ 1119 + static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear) 1120 + { 1121 + unsigned long *bitmap = flags->__vma_flags; 1122 + const unsigned long *bitmap_to_clear = to_clear.__vma_flags; 1123 + 1124 + bitmap_andnot(bitmap, bitmap, bitmap_to_clear, NUM_VMA_FLAG_BITS); 1125 + } 1126 + 1127 + /* 1128 + * Clear all specified individual flags, e.g.: 1129 + * 1130 + * vma_flags_clear(&flags, VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT); 1131 + */ 1132 + #define vma_flags_clear(flags, ...) \ 1133 + vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__)) 1134 + 1135 + /* 1136 + * Helper to test that ALL specified flags are set in a VMA. 1137 + * 1138 + * Note: appropriate locks must be held, this function does not acquire them for 1139 + * you. 1140 + */ 1141 + static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma, 1142 + vma_flags_t flags) 1143 + { 1144 + return vma_flags_test_all_mask(&vma->flags, flags); 1145 + } 1146 + 1147 + /* 1148 + * Helper macro for checking that ALL specified flags are set in a VMA, e.g.: 1149 + * 1150 + * if (vma_test_all_flags(vma, VMA_READ_BIT, VMA_MAYREAD_BIT) { ... } 1151 + */ 1152 + #define vma_test_all_flags(vma, ...) \ 1153 + vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) 1154 + 1155 + /* 1156 + * Helper to set all VMA flags in a VMA. 1157 + * 1158 + * Note: appropriate locks must be held, this function does not acquire them for 1159 + * you. 1160 + */ 1161 + static inline void vma_set_flags_mask(struct vm_area_struct *vma, 1162 + vma_flags_t flags) 1163 + { 1164 + vma_flags_set_mask(&vma->flags, flags); 1165 + } 1166 + 1167 + /* 1168 + * Helper macro for specifying VMA flags in a VMA, e.g.: 1169 + * 1170 + * vma_set_flags(vma, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT, 1171 + * VMA_DONTDUMP_BIT); 1172 + * 1173 + * Note: appropriate locks must be held, this function does not acquire them for 1174 + * you. 1175 + */ 1176 + #define vma_set_flags(vma, ...) \ 1177 + vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) 1178 + 1179 + /* Helper to test all VMA flags in a VMA descriptor. */ 1180 + static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc, 1181 + vma_flags_t flags) 1182 + { 1183 + return vma_flags_test_mask(&desc->vma_flags, flags); 1184 + } 1185 + 1186 + /* 1187 + * Helper macro for testing VMA flags for an input pointer to a struct 1188 + * vm_area_desc object describing a proposed VMA, e.g.: 1189 + * 1190 + * if (vma_desc_test_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT, 1191 + * VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT)) { ... } 1192 + */ 1193 + #define vma_desc_test_flags(desc, ...) \ 1194 + vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) 1195 + 1196 + /* Helper to set all VMA flags in a VMA descriptor. */ 1197 + static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc, 1198 + vma_flags_t flags) 1199 + { 1200 + vma_flags_set_mask(&desc->vma_flags, flags); 1201 + } 1202 + 1203 + /* 1204 + * Helper macro for specifying VMA flags for an input pointer to a struct 1205 + * vm_area_desc object describing a proposed VMA, e.g.: 1206 + * 1207 + * vma_desc_set_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT, 1208 + * VMA_DONTDUMP_BIT); 1209 + */ 1210 + #define vma_desc_set_flags(desc, ...) \ 1211 + vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) 1212 + 1213 + /* Helper to clear all VMA flags in a VMA descriptor. */ 1214 + static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc, 1215 + vma_flags_t flags) 1216 + { 1217 + vma_flags_clear_mask(&desc->vma_flags, flags); 1218 + } 1219 + 1220 + /* 1221 + * Helper macro for clearing VMA flags for an input pointer to a struct 1222 + * vm_area_desc object describing a proposed VMA, e.g.: 1223 + * 1224 + * vma_desc_clear_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT, 1225 + * VMA_DONTDUMP_BIT); 1226 + */ 1227 + #define vma_desc_clear_flags(desc, ...) \ 1228 + vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) 1033 1229 1034 1230 static inline void vma_set_anonymous(struct vm_area_struct *vma) 1035 1231 { ··· 1292 1096 return vma->vm_flags & VM_ACCESS_FLAGS; 1293 1097 } 1294 1098 1295 - static inline bool is_shared_maywrite(vm_flags_t vm_flags) 1099 + static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags) 1296 1100 { 1297 1101 return (vm_flags & (VM_SHARED | VM_MAYWRITE)) == 1298 1102 (VM_SHARED | VM_MAYWRITE); 1299 1103 } 1300 1104 1105 + static inline bool is_shared_maywrite(const vma_flags_t *flags) 1106 + { 1107 + return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT); 1108 + } 1109 + 1301 1110 static inline bool vma_is_shared_maywrite(const struct vm_area_struct *vma) 1302 1111 { 1303 - return is_shared_maywrite(vma->vm_flags); 1112 + return is_shared_maywrite(&vma->flags); 1304 1113 } 1305 1114 1306 1115 static inline ··· 1933 1732 return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; 1934 1733 } 1935 1734 1735 + static inline bool vma_desc_is_cow_mapping(struct vm_area_desc *desc) 1736 + { 1737 + const vma_flags_t *flags = &desc->vma_flags; 1738 + 1739 + return vma_flags_test(flags, VMA_MAYWRITE_BIT) && 1740 + !vma_flags_test(flags, VMA_SHARED_BIT); 1741 + } 1742 + 1936 1743 #ifndef CONFIG_MMU 1937 1744 static inline bool is_nommu_shared_mapping(vm_flags_t flags) 1938 1745 { ··· 1953 1744 * write permissions later. 1954 1745 */ 1955 1746 return flags & (VM_MAYSHARE | VM_MAYOVERLAY); 1747 + } 1748 + 1749 + static inline bool is_nommu_shared_vma_flags(const vma_flags_t *flags) 1750 + { 1751 + return vma_flags_test(flags, VMA_MAYSHARE_BIT, VMA_MAYOVERLAY_BIT); 1956 1752 } 1957 1753 #endif 1958 1754 ··· 2841 2627 zap_page_range_single(vma, vma->vm_start, 2842 2628 vma->vm_end - vma->vm_start, NULL); 2843 2629 } 2844 - void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, 2845 - struct vm_area_struct *start_vma, unsigned long start, 2846 - unsigned long end, unsigned long tree_end); 2847 - 2848 2630 struct mmu_notifier_range; 2849 2631 2850 2632 void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
+12 -11
include/linux/mm_types.h
··· 844 844 845 845 /* 846 846 * If specified, this hook is invoked when an error occurred when 847 - * attempting the selection action. 847 + * attempting the selected action. 848 848 * 849 849 * The hook can return an error code in order to filter the error, but 850 850 * it is not valid to clear the error here. ··· 866 866 #define NUM_VMA_FLAG_BITS BITS_PER_LONG 867 867 typedef struct { 868 868 DECLARE_BITMAP(__vma_flags, NUM_VMA_FLAG_BITS); 869 - } __private vma_flags_t; 869 + } vma_flags_t; 870 + 871 + #define EMPTY_VMA_FLAGS ((vma_flags_t){ }) 870 872 871 873 /* 872 874 * Describes a VMA that is about to be mmap()'ed. Drivers may choose to ··· 887 885 /* Mutable fields. Populated with initial state. */ 888 886 pgoff_t pgoff; 889 887 struct file *vm_file; 890 - union { 891 - vm_flags_t vm_flags; 892 - vma_flags_t vma_flags; 893 - }; 888 + vma_flags_t vma_flags; 894 889 pgprot_t page_prot; 895 890 896 891 /* Write-only fields. */ ··· 1058 1059 /* Clears all bits in the VMA flags bitmap, non-atomically. */ 1059 1060 static inline void vma_flags_clear_all(vma_flags_t *flags) 1060 1061 { 1061 - bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS); 1062 + bitmap_zero(flags->__vma_flags, NUM_VMA_FLAG_BITS); 1062 1063 } 1063 1064 1064 1065 /* ··· 1069 1070 */ 1070 1071 static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value) 1071 1072 { 1072 - *ACCESS_PRIVATE(flags, __vma_flags) = value; 1073 + unsigned long *bitmap = flags->__vma_flags; 1074 + 1075 + bitmap[0] = value; 1073 1076 } 1074 1077 1075 1078 /* ··· 1082 1081 */ 1083 1082 static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value) 1084 1083 { 1085 - unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); 1084 + unsigned long *bitmap = flags->__vma_flags; 1086 1085 1087 1086 WRITE_ONCE(*bitmap, value); 1088 1087 } ··· 1090 1089 /* Update the first system word of VMA flags setting bits, non-atomically. */ 1091 1090 static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value) 1092 1091 { 1093 - unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); 1092 + unsigned long *bitmap = flags->__vma_flags; 1094 1093 1095 1094 *bitmap |= value; 1096 1095 } ··· 1098 1097 /* Update the first system word of VMA flags clearing bits, non-atomically. */ 1099 1098 static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value) 1100 1099 { 1101 - unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); 1100 + unsigned long *bitmap = flags->__vma_flags; 1102 1101 1103 1102 *bitmap &= ~value; 1104 1103 }
+5 -4
include/linux/mmu_notifier.h
··· 515 515 range->owner = owner; 516 516 } 517 517 518 - #define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ 518 + #define clear_flush_young_ptes_notify(__vma, __address, __ptep, __nr) \ 519 519 ({ \ 520 520 int __young; \ 521 521 struct vm_area_struct *___vma = __vma; \ 522 522 unsigned long ___address = __address; \ 523 - __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ 523 + unsigned int ___nr = __nr; \ 524 + __young = clear_flush_young_ptes(___vma, ___address, __ptep, ___nr); \ 524 525 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ 525 526 ___address, \ 526 527 ___address + \ 527 - PAGE_SIZE); \ 528 + ___nr * PAGE_SIZE); \ 528 529 __young; \ 529 530 }) 530 531 ··· 651 650 652 651 #define mmu_notifier_range_update_to_read_only(r) false 653 652 654 - #define ptep_clear_flush_young_notify ptep_clear_flush_young 653 + #define clear_flush_young_ptes_notify clear_flush_young_ptes 655 654 #define pmdp_clear_flush_young_notify pmdp_clear_flush_young 656 655 #define ptep_clear_young_notify ptep_test_and_clear_young 657 656 #define pmdp_clear_young_notify pmdp_test_and_clear_young
+54 -19
include/linux/pgtable.h
··· 23 23 #endif 24 24 25 25 /* 26 - * On almost all architectures and configurations, 0 can be used as the 27 - * upper ceiling to free_pgtables(): on many architectures it has the same 28 - * effect as using TASK_SIZE. However, there is one configuration which 29 - * must impose a more careful limit, to avoid freeing kernel pgtables. 30 - */ 31 - #ifndef USER_PGTABLES_CEILING 32 - #define USER_PGTABLES_CEILING 0UL 33 - #endif 34 - 35 - /* 36 - * This defines the first usable user address. Platforms 37 - * can override its value with custom FIRST_USER_ADDRESS 38 - * defined in their respective <asm/pgtable.h>. 39 - */ 40 - #ifndef FIRST_USER_ADDRESS 41 - #define FIRST_USER_ADDRESS 0UL 42 - #endif 43 - 44 - /* 45 26 * This defines the generic helper for accessing PMD page 46 27 * table page. Although platforms can still override this 47 28 * via their respective <asm/pgtable.h>. ··· 1068 1087 } 1069 1088 #endif 1070 1089 1090 + #ifndef clear_flush_young_ptes 1091 + /** 1092 + * clear_flush_young_ptes - Mark PTEs that map consecutive pages of the same 1093 + * folio as old and flush the TLB. 1094 + * @vma: The virtual memory area the pages are mapped into. 1095 + * @addr: Address the first page is mapped at. 1096 + * @ptep: Page table pointer for the first entry. 1097 + * @nr: Number of entries to clear access bit. 1098 + * 1099 + * May be overridden by the architecture; otherwise, implemented as a simple 1100 + * loop over ptep_clear_flush_young(). 1101 + * 1102 + * Note that PTE bits in the PTE range besides the PFN can differ. For example, 1103 + * some PTEs might be write-protected. 1104 + * 1105 + * Context: The caller holds the page table lock. The PTEs map consecutive 1106 + * pages that belong to the same folio. The PTEs are all in the same PMD. 1107 + */ 1108 + static inline int clear_flush_young_ptes(struct vm_area_struct *vma, 1109 + unsigned long addr, pte_t *ptep, unsigned int nr) 1110 + { 1111 + int young = 0; 1112 + 1113 + for (;;) { 1114 + young |= ptep_clear_flush_young(vma, addr, ptep); 1115 + if (--nr == 0) 1116 + break; 1117 + ptep++; 1118 + addr += PAGE_SIZE; 1119 + } 1120 + 1121 + return young; 1122 + } 1123 + #endif 1124 + 1071 1125 /* 1072 1126 * On some architectures hardware does not set page access bit when accessing 1073 1127 * memory page, it is responsibility of software setting this bit. It brings ··· 1644 1628 void arch_sync_kernel_mappings(unsigned long start, unsigned long end); 1645 1629 1646 1630 #endif /* CONFIG_MMU */ 1631 + 1632 + /* 1633 + * On almost all architectures and configurations, 0 can be used as the 1634 + * upper ceiling to free_pgtables(): on many architectures it has the same 1635 + * effect as using TASK_SIZE. However, there is one configuration which 1636 + * must impose a more careful limit, to avoid freeing kernel pgtables. 1637 + */ 1638 + #ifndef USER_PGTABLES_CEILING 1639 + #define USER_PGTABLES_CEILING 0UL 1640 + #endif 1641 + 1642 + /* 1643 + * This defines the first usable user address. Platforms 1644 + * can override its value with custom FIRST_USER_ADDRESS 1645 + * defined in their respective <asm/pgtable.h>. 1646 + */ 1647 + #ifndef FIRST_USER_ADDRESS 1648 + #define FIRST_USER_ADDRESS 0UL 1649 + #endif 1647 1650 1648 1651 /* 1649 1652 * No-op macros that just return the current protection value. Defined here
+3 -5
include/linux/shmem_fs.h
··· 102 102 extern const struct fs_parameter_spec shmem_fs_parameters[]; 103 103 extern void shmem_init(void); 104 104 extern int shmem_init_fs_context(struct fs_context *fc); 105 - extern struct file *shmem_file_setup(const char *name, 106 - loff_t size, unsigned long flags); 107 - extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, 108 - unsigned long flags); 105 + struct file *shmem_file_setup(const char *name, loff_t size, vma_flags_t flags); 106 + struct file *shmem_kernel_file_setup(const char *name, loff_t size, vma_flags_t vma_flags); 109 107 extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, 110 - const char *name, loff_t size, unsigned long flags); 108 + const char *name, loff_t size, vma_flags_t flags); 111 109 int shmem_zero_setup(struct vm_area_struct *vma); 112 110 int shmem_zero_setup_desc(struct vm_area_desc *desc); 113 111 extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
+6 -6
ipc/shm.c
··· 707 707 int error; 708 708 struct shmid_kernel *shp; 709 709 size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 710 + const bool has_no_reserve = shmflg & SHM_NORESERVE; 711 + vma_flags_t acctflag = EMPTY_VMA_FLAGS; 710 712 struct file *file; 711 713 char name[13]; 712 - vm_flags_t acctflag = 0; 713 714 714 715 if (size < SHMMIN || size > ns->shm_ctlmax) 715 716 return -EINVAL; ··· 750 749 hugesize = ALIGN(size, huge_page_size(hs)); 751 750 752 751 /* hugetlb_file_setup applies strict accounting */ 753 - if (shmflg & SHM_NORESERVE) 754 - acctflag = VM_NORESERVE; 752 + if (has_no_reserve) 753 + vma_flags_set(&acctflag, VMA_NORESERVE_BIT); 755 754 file = hugetlb_file_setup(name, hugesize, acctflag, 756 755 HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); 757 756 } else { ··· 759 758 * Do not allow no accounting for OVERCOMMIT_NEVER, even 760 759 * if it's asked for. 761 760 */ 762 - if ((shmflg & SHM_NORESERVE) && 763 - sysctl_overcommit_memory != OVERCOMMIT_NEVER) 764 - acctflag = VM_NORESERVE; 761 + if (has_no_reserve && sysctl_overcommit_memory != OVERCOMMIT_NEVER) 762 + vma_flags_set(&acctflag, VMA_NORESERVE_BIT); 765 763 file = shmem_kernel_file_setup(name, size, acctflag); 766 764 } 767 765 error = PTR_ERR(file);
+36 -18
kernel/cgroup/cpuset.c
··· 4145 4145 return allowed; 4146 4146 } 4147 4147 4148 - bool cpuset_node_allowed(struct cgroup *cgroup, int nid) 4148 + /** 4149 + * cpuset_nodes_allowed - return effective_mems mask from a cgroup cpuset. 4150 + * @cgroup: pointer to struct cgroup. 4151 + * @mask: pointer to struct nodemask_t to be returned. 4152 + * 4153 + * Returns effective_mems mask from a cgroup cpuset if it is cgroup v2 and 4154 + * has cpuset subsys. Otherwise, returns node_states[N_MEMORY]. 4155 + * 4156 + * This function intentionally avoids taking the cpuset_mutex or callback_lock 4157 + * when accessing effective_mems. This is because the obtained effective_mems 4158 + * is stale immediately after the query anyway (e.g., effective_mems is updated 4159 + * immediately after releasing the lock but before returning). 4160 + * 4161 + * As a result, returned @mask may be empty because cs->effective_mems can be 4162 + * rebound during this call. Besides, nodes in @mask are not guaranteed to be 4163 + * online due to hot plugins. Callers should check the mask for validity on 4164 + * return based on its subsequent use. 4165 + **/ 4166 + void cpuset_nodes_allowed(struct cgroup *cgroup, nodemask_t *mask) 4149 4167 { 4150 4168 struct cgroup_subsys_state *css; 4151 4169 struct cpuset *cs; 4152 - bool allowed; 4153 4170 4154 4171 /* 4155 4172 * In v1, mem_cgroup and cpuset are unlikely in the same hierarchy 4156 4173 * and mems_allowed is likely to be empty even if we could get to it, 4157 - * so return true to avoid taking a global lock on the empty check. 4174 + * so return directly to avoid taking a global lock on the empty check. 4158 4175 */ 4159 - if (!cpuset_v2()) 4160 - return true; 4176 + if (!cgroup || !cpuset_v2()) { 4177 + nodes_copy(*mask, node_states[N_MEMORY]); 4178 + return; 4179 + } 4161 4180 4162 4181 css = cgroup_get_e_css(cgroup, &cpuset_cgrp_subsys); 4163 - if (!css) 4164 - return true; 4182 + if (!css) { 4183 + nodes_copy(*mask, node_states[N_MEMORY]); 4184 + return; 4185 + } 4165 4186 4166 4187 /* 4188 + * The reference taken via cgroup_get_e_css is sufficient to 4189 + * protect css, but it does not imply safe accesses to effective_mems. 4190 + * 4167 4191 * Normally, accessing effective_mems would require the cpuset_mutex 4168 - * or callback_lock - but node_isset is atomic and the reference 4169 - * taken via cgroup_get_e_css is sufficient to protect css. 4170 - * 4171 - * Since this interface is intended for use by migration paths, we 4172 - * relax locking here to avoid taking global locks - while accepting 4173 - * there may be rare scenarios where the result may be innaccurate. 4174 - * 4175 - * Reclaim and migration are subject to these same race conditions, and 4176 - * cannot make strong isolation guarantees, so this is acceptable. 4192 + * or callback_lock - but the correctness of this information is stale 4193 + * immediately after the query anyway. We do not acquire the lock 4194 + * during this process to save lock contention in exchange for racing 4195 + * against mems_allowed rebinds. 4177 4196 */ 4178 4197 cs = container_of(css, struct cpuset, css); 4179 - allowed = node_isset(nid, cs->effective_mems); 4198 + nodes_copy(*mask, cs->effective_mems); 4180 4199 css_put(css); 4181 - return allowed; 4182 4200 } 4183 4201 4184 4202 /**
+1 -1
kernel/relay.c
··· 91 91 return -EINVAL; 92 92 93 93 desc->vm_ops = &relay_file_mmap_ops; 94 - desc->vm_flags |= VM_DONTEXPAND; 94 + vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT); 95 95 desc->private_data = buf; 96 96 97 97 return 0;
+1 -1
mm/filemap.c
··· 4012 4012 4013 4013 int generic_file_readonly_mmap_prepare(struct vm_area_desc *desc) 4014 4014 { 4015 - if (is_shared_maywrite(desc->vm_flags)) 4015 + if (is_shared_maywrite(&desc->vma_flags)) 4016 4016 return -EINVAL; 4017 4017 return generic_file_mmap_prepare(desc); 4018 4018 }
+11 -11
mm/hugetlb.c
··· 1193 1193 1194 1194 static void set_vma_desc_resv_map(struct vm_area_desc *desc, struct resv_map *map) 1195 1195 { 1196 - VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags)); 1197 - VM_WARN_ON_ONCE(desc->vm_flags & VM_MAYSHARE); 1196 + VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags)); 1197 + VM_WARN_ON_ONCE(vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)); 1198 1198 1199 1199 desc->private_data = map; 1200 1200 } 1201 1201 1202 1202 static void set_vma_desc_resv_flags(struct vm_area_desc *desc, unsigned long flags) 1203 1203 { 1204 - VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags)); 1205 - VM_WARN_ON_ONCE(desc->vm_flags & VM_MAYSHARE); 1204 + VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags)); 1205 + VM_WARN_ON_ONCE(vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)); 1206 1206 1207 1207 desc->private_data = (void *)((unsigned long)desc->private_data | flags); 1208 1208 } ··· 1216 1216 1217 1217 static bool is_vma_desc_resv_set(struct vm_area_desc *desc, unsigned long flag) 1218 1218 { 1219 - VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags)); 1219 + VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags)); 1220 1220 1221 1221 return ((unsigned long)desc->private_data) & flag; 1222 1222 } ··· 6571 6571 long hugetlb_reserve_pages(struct inode *inode, 6572 6572 long from, long to, 6573 6573 struct vm_area_desc *desc, 6574 - vm_flags_t vm_flags) 6574 + vma_flags_t vma_flags) 6575 6575 { 6576 6576 long chg = -1, add = -1, spool_resv, gbl_resv; 6577 6577 struct hstate *h = hstate_inode(inode); ··· 6592 6592 * attempt will be made for VM_NORESERVE to allocate a page 6593 6593 * without using reserves 6594 6594 */ 6595 - if (vm_flags & VM_NORESERVE) 6595 + if (vma_flags_test(&vma_flags, VMA_NORESERVE_BIT)) 6596 6596 return 0; 6597 6597 6598 6598 /* ··· 6601 6601 * to reserve the full area even if read-only as mprotect() may be 6602 6602 * called to make the mapping read-write. Assume !desc is a shm mapping 6603 6603 */ 6604 - if (!desc || desc->vm_flags & VM_MAYSHARE) { 6604 + if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)) { 6605 6605 /* 6606 6606 * resv_map can not be NULL as hugetlb_reserve_pages is only 6607 6607 * called for inodes for which resv_maps were created (see ··· 6635 6635 if (err < 0) 6636 6636 goto out_err; 6637 6637 6638 - if (desc && !(desc->vm_flags & VM_MAYSHARE) && h_cg) { 6638 + if (desc && !vma_desc_test_flags(desc, VMA_MAYSHARE_BIT) && h_cg) { 6639 6639 /* For private mappings, the hugetlb_cgroup uncharge info hangs 6640 6640 * of the resv_map. 6641 6641 */ ··· 6672 6672 * consumed reservations are stored in the map. Hence, nothing 6673 6673 * else has to be done for private mappings here 6674 6674 */ 6675 - if (!desc || desc->vm_flags & VM_MAYSHARE) { 6675 + if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)) { 6676 6676 add = region_add(resv_map, from, to, regions_needed, h, h_cg); 6677 6677 6678 6678 if (unlikely(add < 0)) { ··· 6736 6736 hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h), 6737 6737 chg * pages_per_huge_page(h), h_cg); 6738 6738 out_err: 6739 - if (!desc || desc->vm_flags & VM_MAYSHARE) 6739 + if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)) 6740 6740 /* Only call region_abort if the region_chg succeeded but the 6741 6741 * region_add failed or didn't run. 6742 6742 */
+6 -4
mm/internal.h
··· 197 197 } 198 198 } 199 199 200 + /* unmap_vmas is in mm/memory.c */ 201 + void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap); 202 + 200 203 #ifdef CONFIG_MMU 201 204 202 205 static inline void get_anon_vma(struct anon_vma *anon_vma) ··· 512 509 void deactivate_file_folio(struct folio *folio); 513 510 void folio_activate(struct folio *folio); 514 511 515 - void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, 516 - struct vm_area_struct *start_vma, unsigned long floor, 517 - unsigned long ceiling, bool mm_wr_locked); 512 + void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *desc); 513 + 518 514 void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte); 519 515 520 516 struct zap_details; ··· 1046 1044 unsigned long start, unsigned long end, int *locked); 1047 1045 extern long faultin_page_range(struct mm_struct *mm, unsigned long start, 1048 1046 unsigned long end, bool write, int *locked); 1049 - bool mlock_future_ok(const struct mm_struct *mm, vm_flags_t vm_flags, 1047 + bool mlock_future_ok(const struct mm_struct *mm, bool is_vma_locked, 1050 1048 unsigned long bytes); 1051 1049 1052 1050 /*
+1 -1
mm/khugepaged.c
··· 1732 1732 * obtained on guard region installation after the flag is set, so this 1733 1733 * check being performed under this lock excludes races. 1734 1734 */ 1735 - if (vma_flag_test_atomic(vma, VMA_MAYBE_GUARD_BIT)) 1735 + if (vma_test_atomic_flag(vma, VMA_MAYBE_GUARD_BIT)) 1736 1736 return false; 1737 1737 1738 1738 return true;
+1 -1
mm/madvise.c
··· 1140 1140 * acquire an mmap/VMA write lock to read it. All remaining readers may 1141 1141 * or may not see the flag set, but we don't care. 1142 1142 */ 1143 - vma_flag_set_atomic(vma, VMA_MAYBE_GUARD_BIT); 1143 + vma_set_atomic_flag(vma, VMA_MAYBE_GUARD_BIT); 1144 1144 1145 1145 /* 1146 1146 * If anonymous and we are establishing page tables the VMA ought to
+14 -2
mm/memcontrol.c
··· 5649 5649 5650 5650 #endif /* CONFIG_SWAP */ 5651 5651 5652 - bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid) 5652 + void mem_cgroup_node_filter_allowed(struct mem_cgroup *memcg, nodemask_t *mask) 5653 5653 { 5654 - return memcg ? cpuset_node_allowed(memcg->css.cgroup, nid) : true; 5654 + nodemask_t allowed; 5655 + 5656 + if (!memcg) 5657 + return; 5658 + 5659 + /* 5660 + * Since this interface is intended for use by migration paths, and 5661 + * reclaim and migration are subject to race conditions such as changes 5662 + * in effective_mems and hot-unpluging of nodes, inaccurate allowed 5663 + * mask is acceptable. 5664 + */ 5665 + cpuset_nodes_allowed(memcg->css.cgroup, &allowed); 5666 + nodes_and(*mask, *mask, allowed); 5655 5667 } 5656 5668 5657 5669 void mem_cgroup_show_protected_memory(struct mem_cgroup *memcg)
+3 -3
mm/memfd.c
··· 86 86 gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE); 87 87 idx >>= huge_page_order(h); 88 88 89 - nr_resv = hugetlb_reserve_pages(inode, idx, idx + 1, NULL, 0); 89 + nr_resv = hugetlb_reserve_pages(inode, idx, idx + 1, NULL, EMPTY_VMA_FLAGS); 90 90 if (nr_resv < 0) 91 91 return ERR_PTR(nr_resv); 92 92 ··· 463 463 int err = 0; 464 464 465 465 if (flags & MFD_HUGETLB) { 466 - file = hugetlb_file_setup(name, 0, VM_NORESERVE, 466 + file = hugetlb_file_setup(name, 0, mk_vma_flags(VMA_NORESERVE_BIT), 467 467 HUGETLB_ANONHUGE_INODE, 468 468 (flags >> MFD_HUGE_SHIFT) & 469 469 MFD_HUGE_MASK); 470 470 } else { 471 - file = shmem_file_setup(name, 0, VM_NORESERVE); 471 + file = shmem_file_setup(name, 0, mk_vma_flags(VMA_NORESERVE_BIT)); 472 472 } 473 473 if (IS_ERR(file)) 474 474 return file;
+16 -5
mm/memory-tiers.c
··· 320 320 /** 321 321 * next_demotion_node() - Get the next node in the demotion path 322 322 * @node: The starting node to lookup the next node 323 + * @allowed_mask: The pointer to allowed node mask 323 324 * 324 325 * Return: node id for next memory node in the demotion path hierarchy 325 326 * from @node; NUMA_NO_NODE if @node is terminal. This does not keep 326 327 * @node online or guarantee that it *continues* to be the next demotion 327 328 * target. 328 329 */ 329 - int next_demotion_node(int node) 330 + int next_demotion_node(int node, const nodemask_t *allowed_mask) 330 331 { 331 332 struct demotion_nodes *nd; 332 - int target; 333 + nodemask_t mask; 333 334 334 335 if (!node_demotion) 335 336 return NUMA_NO_NODE; ··· 345 344 * node_demotion[] reads need to be consistent. 346 345 */ 347 346 rcu_read_lock(); 347 + /* Filter out nodes that are not in allowed_mask. */ 348 + nodes_and(mask, nd->preferred, *allowed_mask); 349 + rcu_read_unlock(); 350 + 348 351 /* 349 352 * If there are multiple target nodes, just select one 350 353 * target node randomly. ··· 361 356 * caching issue, which seems more complicated. So selecting 362 357 * target node randomly seems better until now. 363 358 */ 364 - target = node_random(&nd->preferred); 365 - rcu_read_unlock(); 359 + if (!nodes_empty(mask)) 360 + return node_random(&mask); 366 361 367 - return target; 362 + /* 363 + * Preferred nodes are not in allowed_mask. Flip bits in 364 + * allowed_mask as used node mask. Then, use it to get the 365 + * closest demotion target. 366 + */ 367 + nodes_complement(mask, *allowed_mask); 368 + return find_next_best_node(node, &mask); 368 369 } 369 370 370 371 static void disable_all_demotion_targets(void)
+54 -47
mm/memory.c
··· 370 370 } while (pgd++, addr = next, addr != end); 371 371 } 372 372 373 - void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, 374 - struct vm_area_struct *vma, unsigned long floor, 375 - unsigned long ceiling, bool mm_wr_locked) 373 + /** 374 + * free_pgtables() - Free a range of page tables 375 + * @tlb: The mmu gather 376 + * @unmap: The unmap_desc 377 + * 378 + * Note: pg_start and pg_end are provided to indicate the absolute range of the 379 + * page tables that should be removed. This can differ from the vma mappings on 380 + * some archs that may have mappings that need to be removed outside the vmas. 381 + * Note that the prev->vm_end and next->vm_start are often used. 382 + * 383 + * The vma_end differs from the pg_end when a dup_mmap() failed and the tree has 384 + * unrelated data to the mm_struct being torn down. 385 + */ 386 + void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *unmap) 376 387 { 377 388 struct unlink_vma_file_batch vb; 389 + struct ma_state *mas = unmap->mas; 390 + struct vm_area_struct *vma = unmap->first; 391 + 392 + /* 393 + * Note: USER_PGTABLES_CEILING may be passed as the value of pg_end and 394 + * may be 0. Underflow is expected in this case. Otherwise the 395 + * pagetable end is exclusive. vma_end is exclusive. The last vma 396 + * address should never be larger than the pagetable end. 397 + */ 398 + WARN_ON_ONCE(unmap->vma_end - 1 > unmap->pg_end - 1); 378 399 379 400 tlb_free_vmas(tlb); 380 401 ··· 403 382 unsigned long addr = vma->vm_start; 404 383 struct vm_area_struct *next; 405 384 406 - /* 407 - * Note: USER_PGTABLES_CEILING may be passed as ceiling and may 408 - * be 0. This will underflow and is okay. 409 - */ 410 - next = mas_find(mas, ceiling - 1); 411 - if (unlikely(xa_is_zero(next))) 412 - next = NULL; 385 + next = mas_find(mas, unmap->tree_end - 1); 413 386 414 387 /* 415 388 * Hide vma from rmap and truncate_pagecache before freeing 416 389 * pgtables 417 390 */ 418 - if (mm_wr_locked) 391 + if (unmap->mm_wr_locked) 419 392 vma_start_write(vma); 420 393 unlink_anon_vmas(vma); 421 394 ··· 421 406 */ 422 407 while (next && next->vm_start <= vma->vm_end + PMD_SIZE) { 423 408 vma = next; 424 - next = mas_find(mas, ceiling - 1); 425 - if (unlikely(xa_is_zero(next))) 426 - next = NULL; 427 - if (mm_wr_locked) 409 + next = mas_find(mas, unmap->tree_end - 1); 410 + if (unmap->mm_wr_locked) 428 411 vma_start_write(vma); 429 412 unlink_anon_vmas(vma); 430 413 unlink_file_vma_batch_add(&vb, vma); 431 414 } 432 415 unlink_file_vma_batch_final(&vb); 433 416 434 - free_pgd_range(tlb, addr, vma->vm_end, 435 - floor, next ? next->vm_start : ceiling); 417 + free_pgd_range(tlb, addr, vma->vm_end, unmap->pg_start, 418 + next ? next->vm_start : unmap->pg_end); 436 419 vma = next; 437 420 } while (vma); 438 421 } ··· 2137 2124 /** 2138 2125 * unmap_vmas - unmap a range of memory covered by a list of vma's 2139 2126 * @tlb: address of the caller's struct mmu_gather 2140 - * @mas: the maple state 2141 - * @vma: the starting vma 2142 - * @start_addr: virtual address at which to start unmapping 2143 - * @end_addr: virtual address at which to end unmapping 2144 - * @tree_end: The maximum index to check 2127 + * @unmap: The unmap_desc 2145 2128 * 2146 2129 * Unmap all pages in the vma list. 2147 2130 * ··· 2150 2141 * ensure that any thus-far unmapped pages are flushed before unmap_vmas() 2151 2142 * drops the lock and schedules. 2152 2143 */ 2153 - void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, 2154 - struct vm_area_struct *vma, unsigned long start_addr, 2155 - unsigned long end_addr, unsigned long tree_end) 2144 + void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap) 2156 2145 { 2146 + struct vm_area_struct *vma; 2157 2147 struct mmu_notifier_range range; 2158 2148 struct zap_details details = { 2159 2149 .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP, ··· 2160 2152 .even_cows = true, 2161 2153 }; 2162 2154 2155 + vma = unmap->first; 2163 2156 mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm, 2164 - start_addr, end_addr); 2157 + unmap->vma_start, unmap->vma_end); 2165 2158 mmu_notifier_invalidate_range_start(&range); 2166 2159 do { 2167 - unsigned long start = start_addr; 2168 - unsigned long end = end_addr; 2160 + unsigned long start = unmap->vma_start; 2161 + unsigned long end = unmap->vma_end; 2169 2162 hugetlb_zap_begin(vma, &start, &end); 2170 2163 unmap_single_vma(tlb, vma, start, end, &details); 2171 2164 hugetlb_zap_end(vma, &details); 2172 - vma = mas_find(mas, tree_end - 1); 2173 - } while (vma && likely(!xa_is_zero(vma))); 2165 + vma = mas_find(unmap->mas, unmap->tree_end - 1); 2166 + } while (vma); 2174 2167 mmu_notifier_invalidate_range_end(&range); 2175 2168 } 2176 2169 ··· 2957 2948 return 0; 2958 2949 } 2959 2950 2960 - static int get_remap_pgoff(vm_flags_t vm_flags, unsigned long addr, 2951 + static int get_remap_pgoff(bool is_cow, unsigned long addr, 2961 2952 unsigned long end, unsigned long vm_start, unsigned long vm_end, 2962 2953 unsigned long pfn, pgoff_t *vm_pgoff_p) 2963 2954 { ··· 2967 2958 * un-COW'ed pages by matching them up with "vma->vm_pgoff". 2968 2959 * See vm_normal_page() for details. 2969 2960 */ 2970 - if (is_cow_mapping(vm_flags)) { 2961 + if (is_cow) { 2971 2962 if (addr != vm_start || end != vm_end) 2972 2963 return -EINVAL; 2973 2964 *vm_pgoff_p = pfn; ··· 2988 2979 if (WARN_ON_ONCE(!PAGE_ALIGNED(addr))) 2989 2980 return -EINVAL; 2990 2981 2991 - VM_WARN_ON_ONCE((vma->vm_flags & VM_REMAP_FLAGS) != VM_REMAP_FLAGS); 2982 + VM_WARN_ON_ONCE(!vma_test_all_flags_mask(vma, VMA_REMAP_FLAGS)); 2992 2983 2993 2984 BUG_ON(addr >= end); 2994 2985 pfn -= addr >> PAGE_SHIFT; ··· 3112 3103 * check it again on complete and will fail there if specified addr is 3113 3104 * invalid. 3114 3105 */ 3115 - get_remap_pgoff(desc->vm_flags, desc->start, desc->end, 3106 + get_remap_pgoff(vma_desc_is_cow_mapping(desc), desc->start, desc->end, 3116 3107 desc->start, desc->end, pfn, &desc->pgoff); 3117 - desc->vm_flags |= VM_REMAP_FLAGS; 3108 + vma_desc_set_flags_mask(desc, VMA_REMAP_FLAGS); 3118 3109 } 3119 3110 3120 3111 static int remap_pfn_range_prepare_vma(struct vm_area_struct *vma, unsigned long addr, ··· 3123 3114 unsigned long end = addr + PAGE_ALIGN(size); 3124 3115 int err; 3125 3116 3126 - err = get_remap_pgoff(vma->vm_flags, addr, end, 3127 - vma->vm_start, vma->vm_end, 3128 - pfn, &vma->vm_pgoff); 3117 + err = get_remap_pgoff(is_cow_mapping(vma->vm_flags), addr, end, 3118 + vma->vm_start, vma->vm_end, pfn, &vma->vm_pgoff); 3129 3119 if (err) 3130 3120 return err; 3131 3121 3132 - vm_flags_set(vma, VM_REMAP_FLAGS); 3122 + vma_set_flags_mask(vma, VMA_REMAP_FLAGS); 3133 3123 return 0; 3134 3124 } 3135 3125 ··· 7324 7316 const unsigned long base_addr = ALIGN_DOWN(addr_hint, folio_size(folio)); 7325 7317 const long fault_idx = (addr_hint - base_addr) / PAGE_SIZE; 7326 7318 const struct range pg = DEFINE_RANGE(0, folio_nr_pages(folio) - 1); 7327 - const int radius = FOLIO_ZERO_LOCALITY_RADIUS; 7319 + const long radius = FOLIO_ZERO_LOCALITY_RADIUS; 7328 7320 struct range r[3]; 7329 7321 int i; 7330 7322 ··· 7332 7324 * Faulting page and its immediate neighbourhood. Will be cleared at the 7333 7325 * end to keep its cachelines hot. 7334 7326 */ 7335 - r[2] = DEFINE_RANGE(clamp_t(s64, fault_idx - radius, pg.start, pg.end), 7336 - clamp_t(s64, fault_idx + radius, pg.start, pg.end)); 7327 + r[2] = DEFINE_RANGE(fault_idx - radius < (long)pg.start ? pg.start : fault_idx - radius, 7328 + fault_idx + radius > (long)pg.end ? pg.end : fault_idx + radius); 7329 + 7337 7330 7338 7331 /* Region to the left of the fault */ 7339 - r[1] = DEFINE_RANGE(pg.start, 7340 - clamp_t(s64, r[2].start - 1, pg.start - 1, r[2].start)); 7332 + r[1] = DEFINE_RANGE(pg.start, r[2].start - 1); 7341 7333 7342 7334 /* Region to the right of the fault: always valid for the common fault_idx=0 case. */ 7343 - r[0] = DEFINE_RANGE(clamp_t(s64, r[2].end + 1, r[2].end, pg.end + 1), 7344 - pg.end); 7335 + r[0] = DEFINE_RANGE(r[2].end + 1, pg.end); 7345 7336 7346 7337 for (i = 0; i < ARRAY_SIZE(r); i++) { 7347 7338 const unsigned long addr = base_addr + r[i].start * PAGE_SIZE; 7348 - const unsigned int nr_pages = range_len(&r[i]); 7339 + const long nr_pages = (long)range_len(&r[i]); 7349 7340 struct page *page = folio_page(folio, r[i].start); 7350 7341 7351 7342 if (nr_pages > 0)
+75 -35
mm/mmap.c
··· 108 108 if (IS_ERR_VALUE(mapped_addr)) 109 109 return mapped_addr; 110 110 111 - return mlock_future_ok(current->mm, current->mm->def_flags, len) 111 + return mlock_future_ok(current->mm, 112 + current->mm->def_flags & VM_LOCKED, len) 112 113 ? 0 : -EAGAIN; 113 114 } 114 115 ··· 226 225 return hint; 227 226 } 228 227 229 - bool mlock_future_ok(const struct mm_struct *mm, vm_flags_t vm_flags, 230 - unsigned long bytes) 228 + bool mlock_future_ok(const struct mm_struct *mm, bool is_vma_locked, 229 + unsigned long bytes) 231 230 { 232 231 unsigned long locked_pages, limit_pages; 233 232 234 - if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK)) 233 + if (!is_vma_locked || capable(CAP_IPC_LOCK)) 235 234 return true; 236 235 237 236 locked_pages = bytes >> PAGE_SHIFT; ··· 417 416 if (!can_do_mlock()) 418 417 return -EPERM; 419 418 420 - if (!mlock_future_ok(mm, vm_flags, len)) 419 + if (!mlock_future_ok(mm, vm_flags & VM_LOCKED, len)) 421 420 return -EAGAIN; 422 421 423 422 if (file) { ··· 595 594 * taken when vm_ops->mmap() is called 596 595 */ 597 596 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, 598 - VM_NORESERVE, 597 + mk_vma_flags(VMA_NORESERVE_BIT), 599 598 HUGETLB_ANONHUGE_INODE, 600 599 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); 601 600 if (IS_ERR(file)) ··· 1248 1247 } 1249 1248 EXPORT_SYMBOL(vm_brk_flags); 1250 1249 1250 + static 1251 + unsigned long tear_down_vmas(struct mm_struct *mm, struct vma_iterator *vmi, 1252 + struct vm_area_struct *vma, unsigned long end) 1253 + { 1254 + unsigned long nr_accounted = 0; 1255 + int count = 0; 1256 + 1257 + mmap_assert_write_locked(mm); 1258 + vma_iter_set(vmi, vma->vm_end); 1259 + do { 1260 + if (vma->vm_flags & VM_ACCOUNT) 1261 + nr_accounted += vma_pages(vma); 1262 + vma_mark_detached(vma); 1263 + remove_vma(vma); 1264 + count++; 1265 + cond_resched(); 1266 + vma = vma_next(vmi); 1267 + } while (vma && vma->vm_end <= end); 1268 + 1269 + VM_WARN_ON_ONCE(count != mm->map_count); 1270 + return nr_accounted; 1271 + } 1272 + 1251 1273 /* Release all mmaps. */ 1252 1274 void exit_mmap(struct mm_struct *mm) 1253 1275 { ··· 1278 1254 struct vm_area_struct *vma; 1279 1255 unsigned long nr_accounted = 0; 1280 1256 VMA_ITERATOR(vmi, mm, 0); 1281 - int count = 0; 1257 + struct unmap_desc unmap; 1282 1258 1283 1259 /* mm's last user has gone, and its about to be pulled down */ 1284 1260 mmu_notifier_release(mm); ··· 1287 1263 arch_exit_mmap(mm); 1288 1264 1289 1265 vma = vma_next(&vmi); 1290 - if (!vma || unlikely(xa_is_zero(vma))) { 1266 + if (!vma) { 1291 1267 /* Can happen if dup_mmap() received an OOM */ 1292 1268 mmap_read_unlock(mm); 1293 1269 mmap_write_lock(mm); 1294 1270 goto destroy; 1295 1271 } 1296 1272 1273 + unmap_all_init(&unmap, &vmi, vma); 1297 1274 flush_cache_mm(mm); 1298 1275 tlb_gather_mmu_fullmm(&tlb, mm); 1299 1276 /* update_hiwater_rss(mm) here? but nobody should be looking */ 1300 1277 /* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */ 1301 - unmap_vmas(&tlb, &vmi.mas, vma, 0, ULONG_MAX, ULONG_MAX); 1278 + unmap_vmas(&tlb, &unmap); 1302 1279 mmap_read_unlock(mm); 1303 1280 1304 1281 /* ··· 1308 1283 */ 1309 1284 mm_flags_set(MMF_OOM_SKIP, mm); 1310 1285 mmap_write_lock(mm); 1286 + unmap.mm_wr_locked = true; 1311 1287 mt_clear_in_rcu(&mm->mm_mt); 1312 - vma_iter_set(&vmi, vma->vm_end); 1313 - free_pgtables(&tlb, &vmi.mas, vma, FIRST_USER_ADDRESS, 1314 - USER_PGTABLES_CEILING, true); 1288 + unmap_pgtable_init(&unmap, &vmi); 1289 + free_pgtables(&tlb, &unmap); 1315 1290 tlb_finish_mmu(&tlb); 1316 1291 1317 1292 /* ··· 1319 1294 * enabled, without holding any MM locks besides the unreachable 1320 1295 * mmap_write_lock. 1321 1296 */ 1322 - vma_iter_set(&vmi, vma->vm_end); 1323 - do { 1324 - if (vma->vm_flags & VM_ACCOUNT) 1325 - nr_accounted += vma_pages(vma); 1326 - vma_mark_detached(vma); 1327 - remove_vma(vma); 1328 - count++; 1329 - cond_resched(); 1330 - vma = vma_next(&vmi); 1331 - } while (vma && likely(!xa_is_zero(vma))); 1297 + nr_accounted = tear_down_vmas(mm, &vmi, vma, ULONG_MAX); 1332 1298 1333 - BUG_ON(count != mm->map_count); 1334 - 1335 - trace_exit_mmap(mm); 1336 1299 destroy: 1337 1300 __mt_destroy(&mm->mm_mt); 1301 + trace_exit_mmap(mm); 1338 1302 mmap_write_unlock(mm); 1339 1303 vm_unacct_memory(nr_accounted); 1340 1304 } ··· 1854 1840 ksm_fork(mm, oldmm); 1855 1841 khugepaged_fork(mm, oldmm); 1856 1842 } else { 1843 + unsigned long end; 1857 1844 1858 1845 /* 1859 - * The entire maple tree has already been duplicated. If the 1860 - * mmap duplication fails, mark the failure point with 1861 - * XA_ZERO_ENTRY. In exit_mmap(), if this marker is encountered, 1862 - * stop releasing VMAs that have not been duplicated after this 1863 - * point. 1846 + * The entire maple tree has already been duplicated, but 1847 + * replacing the vmas failed at mpnt (which could be NULL if 1848 + * all were allocated but the last vma was not fully set up). 1849 + * Use the start address of the failure point to clean up the 1850 + * partially initialized tree. 1864 1851 */ 1865 - if (mpnt) { 1866 - mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1); 1867 - mas_store(&vmi.mas, XA_ZERO_ENTRY); 1868 - /* Avoid OOM iterating a broken tree */ 1869 - mm_flags_set(MMF_OOM_SKIP, mm); 1852 + if (!mm->map_count) { 1853 + /* zero vmas were written to the new tree. */ 1854 + end = 0; 1855 + } else if (mpnt) { 1856 + /* partial tree failure */ 1857 + end = mpnt->vm_start; 1858 + } else { 1859 + /* All vmas were written to the new tree */ 1860 + end = ULONG_MAX; 1870 1861 } 1862 + 1863 + /* Hide mm from oom killer because the memory is being freed */ 1864 + mm_flags_set(MMF_OOM_SKIP, mm); 1865 + if (end) { 1866 + vma_iter_set(&vmi, 0); 1867 + tmp = vma_next(&vmi); 1868 + UNMAP_STATE(unmap, &vmi, /* first = */ tmp, 1869 + /* vma_start = */ 0, /* vma_end = */ end, 1870 + /* prev = */ NULL, /* next = */ NULL); 1871 + 1872 + /* 1873 + * Don't iterate over vmas beyond the failure point for 1874 + * both unmap_vma() and free_pgtables(). 1875 + */ 1876 + unmap.tree_end = end; 1877 + flush_cache_mm(mm); 1878 + unmap_region(&unmap); 1879 + charge = tear_down_vmas(mm, &vmi, tmp, end); 1880 + vm_unacct_memory(charge); 1881 + } 1882 + __mt_destroy(&mm->mm_mt); 1871 1883 /* 1872 1884 * The mm_struct is going to exit, but the locks will be dropped 1873 1885 * first. Set the mm_struct as unstable is advisable as it is
+1 -1
mm/mremap.c
··· 1740 1740 if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) 1741 1741 return -EFAULT; 1742 1742 1743 - if (!mlock_future_ok(mm, vma->vm_flags, vrm->delta)) 1743 + if (!mlock_future_ok(mm, vma->vm_flags & VM_LOCKED, vrm->delta)) 1744 1744 return -EAGAIN; 1745 1745 1746 1746 if (!may_expand_vm(mm, vma->vm_flags, vrm->delta >> PAGE_SHIFT))
+1
mm/page_alloc.c
··· 1429 1429 1430 1430 page_cpupid_reset_last(page); 1431 1431 page->flags.f &= ~PAGE_FLAGS_CHECK_AT_PREP; 1432 + page->private = 0; 1432 1433 reset_page_owner(page, order); 1433 1434 page_table_check_free(page, order); 1434 1435 pgalloc_tag_sub(page, 1 << order);
+32 -6
mm/rmap.c
··· 913 913 struct folio_referenced_arg *pra = arg; 914 914 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0); 915 915 int ptes = 0, referenced = 0; 916 + unsigned int nr; 916 917 917 918 while (page_vma_mapped_walk(&pvmw)) { 918 919 address = pvmw.address; 920 + nr = 1; 919 921 920 922 if (vma->vm_flags & VM_LOCKED) { 921 923 ptes++; ··· 962 960 if (lru_gen_look_around(&pvmw)) 963 961 referenced++; 964 962 } else if (pvmw.pte) { 965 - if (ptep_clear_flush_young_notify(vma, address, 966 - pvmw.pte)) 963 + if (folio_test_large(folio)) { 964 + unsigned long end_addr = pmd_addr_end(address, vma->vm_end); 965 + unsigned int max_nr = (end_addr - address) >> PAGE_SHIFT; 966 + pte_t pteval = ptep_get(pvmw.pte); 967 + 968 + nr = folio_pte_batch(folio, pvmw.pte, 969 + pteval, max_nr); 970 + } 971 + 972 + ptes += nr; 973 + if (clear_flush_young_ptes_notify(vma, address, pvmw.pte, nr)) 967 974 referenced++; 975 + /* Skip the batched PTEs */ 976 + pvmw.pte += nr - 1; 977 + pvmw.address += (nr - 1) * PAGE_SIZE; 968 978 } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 969 979 if (pmdp_clear_flush_young_notify(vma, address, 970 980 pvmw.pmd)) ··· 986 972 WARN_ON_ONCE(1); 987 973 } 988 974 989 - pra->mapcount--; 975 + pra->mapcount -= nr; 976 + /* 977 + * If we are sure that we batched the entire folio, 978 + * we can just optimize and stop right here. 979 + */ 980 + if (ptes == pvmw.nr_pages) { 981 + page_vma_mapped_walk_done(&pvmw); 982 + break; 983 + } 990 984 } 991 985 992 986 if (referenced) ··· 1945 1923 end_addr = pmd_addr_end(addr, vma->vm_end); 1946 1924 max_nr = (end_addr - addr) >> PAGE_SHIFT; 1947 1925 1948 - /* We only support lazyfree batching for now ... */ 1949 - if (!folio_test_anon(folio) || folio_test_swapbacked(folio)) 1926 + /* We only support lazyfree or file folios batching for now ... */ 1927 + if (folio_test_anon(folio) && folio_test_swapbacked(folio)) 1950 1928 return 1; 1929 + 1951 1930 if (pte_unused(pte)) 1931 + return 1; 1932 + 1933 + if (userfaultfd_wp(vma)) 1952 1934 return 1; 1953 1935 1954 1936 return folio_pte_batch(folio, pvmw->pte, pte, max_nr); ··· 2317 2291 * 2318 2292 * See Documentation/mm/mmu_notifier.rst 2319 2293 */ 2320 - dec_mm_counter(mm, mm_counter_file(folio)); 2294 + add_mm_counter(mm, mm_counter_file(folio), -nr_pages); 2321 2295 } 2322 2296 discard: 2323 2297 if (unlikely(folio_test_hugetlb(folio))) {
+3 -4
mm/secretmem.c
··· 122 122 { 123 123 const unsigned long len = vma_desc_size(desc); 124 124 125 - if ((desc->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0) 125 + if (!vma_desc_test_flags(desc, VMA_SHARED_BIT, VMA_MAYSHARE_BIT)) 126 126 return -EINVAL; 127 127 128 - if (!mlock_future_ok(desc->mm, desc->vm_flags | VM_LOCKED, len)) 128 + vma_desc_set_flags(desc, VMA_LOCKED_BIT, VMA_DONTDUMP_BIT); 129 + if (!mlock_future_ok(desc->mm, /*is_vma_locked=*/ true, len)) 129 130 return -EAGAIN; 130 - 131 - desc->vm_flags |= VM_LOCKED | VM_DONTDUMP; 132 131 desc->vm_ops = &secretmem_vm_ops; 133 132 134 133 return 0;
+34 -27
mm/shmem.c
··· 3062 3062 } 3063 3063 3064 3064 static struct inode *__shmem_get_inode(struct mnt_idmap *idmap, 3065 - struct super_block *sb, 3066 - struct inode *dir, umode_t mode, 3067 - dev_t dev, unsigned long flags) 3065 + struct super_block *sb, 3066 + struct inode *dir, umode_t mode, 3067 + dev_t dev, vma_flags_t flags) 3068 3068 { 3069 3069 struct inode *inode; 3070 3070 struct shmem_inode_info *info; ··· 3092 3092 spin_lock_init(&info->lock); 3093 3093 atomic_set(&info->stop_eviction, 0); 3094 3094 info->seals = F_SEAL_SEAL; 3095 - info->flags = (flags & VM_NORESERVE) ? SHMEM_F_NORESERVE : 0; 3095 + info->flags = vma_flags_test(&flags, VMA_NORESERVE_BIT) 3096 + ? SHMEM_F_NORESERVE : 0; 3096 3097 info->i_crtime = inode_get_mtime(inode); 3097 3098 info->fsflags = (dir == NULL) ? 0 : 3098 3099 SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED; ··· 3146 3145 #ifdef CONFIG_TMPFS_QUOTA 3147 3146 static struct inode *shmem_get_inode(struct mnt_idmap *idmap, 3148 3147 struct super_block *sb, struct inode *dir, 3149 - umode_t mode, dev_t dev, unsigned long flags) 3148 + umode_t mode, dev_t dev, vma_flags_t flags) 3150 3149 { 3151 3150 int err; 3152 3151 struct inode *inode; ··· 3172 3171 return ERR_PTR(err); 3173 3172 } 3174 3173 #else 3175 - static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, 3174 + static struct inode *shmem_get_inode(struct mnt_idmap *idmap, 3176 3175 struct super_block *sb, struct inode *dir, 3177 - umode_t mode, dev_t dev, unsigned long flags) 3176 + umode_t mode, dev_t dev, vma_flags_t flags) 3178 3177 { 3179 3178 return __shmem_get_inode(idmap, sb, dir, mode, dev, flags); 3180 3179 } ··· 3881 3880 if (!generic_ci_validate_strict_name(dir, &dentry->d_name)) 3882 3881 return -EINVAL; 3883 3882 3884 - inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE); 3883 + inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, 3884 + mk_vma_flags(VMA_NORESERVE_BIT)); 3885 3885 if (IS_ERR(inode)) 3886 3886 return PTR_ERR(inode); 3887 3887 ··· 3917 3915 struct inode *inode; 3918 3916 int error; 3919 3917 3920 - inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, VM_NORESERVE); 3918 + inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, 3919 + mk_vma_flags(VMA_NORESERVE_BIT)); 3921 3920 if (IS_ERR(inode)) { 3922 3921 error = PTR_ERR(inode); 3923 3922 goto err_out; ··· 4115 4112 return -ENAMETOOLONG; 4116 4113 4117 4114 inode = shmem_get_inode(idmap, dir->i_sb, dir, S_IFLNK | 0777, 0, 4118 - VM_NORESERVE); 4115 + mk_vma_flags(VMA_NORESERVE_BIT)); 4119 4116 if (IS_ERR(inode)) 4120 4117 return PTR_ERR(inode); 4121 4118 ··· 5116 5113 #endif /* CONFIG_TMPFS_QUOTA */ 5117 5114 5118 5115 inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL, 5119 - S_IFDIR | sbinfo->mode, 0, VM_NORESERVE); 5116 + S_IFDIR | sbinfo->mode, 0, 5117 + mk_vma_flags(VMA_NORESERVE_BIT)); 5120 5118 if (IS_ERR(inode)) { 5121 5119 error = PTR_ERR(inode); 5122 5120 goto failed; ··· 5818 5814 5819 5815 static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, 5820 5816 struct super_block *sb, struct inode *dir, 5821 - umode_t mode, dev_t dev, unsigned long flags) 5817 + umode_t mode, dev_t dev, vma_flags_t flags) 5822 5818 { 5823 5819 struct inode *inode = ramfs_get_inode(sb, dir, mode, dev); 5824 5820 return inode ? inode : ERR_PTR(-ENOSPC); ··· 5829 5825 /* common code */ 5830 5826 5831 5827 static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, 5832 - loff_t size, unsigned long vm_flags, 5828 + loff_t size, vma_flags_t flags, 5833 5829 unsigned int i_flags) 5834 5830 { 5835 - unsigned long flags = (vm_flags & VM_NORESERVE) ? SHMEM_F_NORESERVE : 0; 5831 + const unsigned long shmem_flags = 5832 + vma_flags_test(&flags, VMA_NORESERVE_BIT) ? SHMEM_F_NORESERVE : 0; 5836 5833 struct inode *inode; 5837 5834 struct file *res; 5838 5835 ··· 5846 5841 if (is_idmapped_mnt(mnt)) 5847 5842 return ERR_PTR(-EINVAL); 5848 5843 5849 - if (shmem_acct_size(flags, size)) 5844 + if (shmem_acct_size(shmem_flags, size)) 5850 5845 return ERR_PTR(-ENOMEM); 5851 5846 5852 5847 inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL, 5853 - S_IFREG | S_IRWXUGO, 0, vm_flags); 5848 + S_IFREG | S_IRWXUGO, 0, flags); 5854 5849 if (IS_ERR(inode)) { 5855 - shmem_unacct_size(flags, size); 5850 + shmem_unacct_size(shmem_flags, size); 5856 5851 return ERR_CAST(inode); 5857 5852 } 5858 5853 inode->i_flags |= i_flags; ··· 5875 5870 * checks are provided at the key or shm level rather than the inode. 5876 5871 * @name: name for dentry (to be seen in /proc/<pid>/maps) 5877 5872 * @size: size to be set for the file 5878 - * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 5873 + * @flags: VMA_NORESERVE_BIT suppresses pre-accounting of the entire object size 5879 5874 */ 5880 - struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags) 5875 + struct file *shmem_kernel_file_setup(const char *name, loff_t size, 5876 + vma_flags_t flags) 5881 5877 { 5882 5878 return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE); 5883 5879 } ··· 5888 5882 * shmem_file_setup - get an unlinked file living in tmpfs 5889 5883 * @name: name for dentry (to be seen in /proc/<pid>/maps) 5890 5884 * @size: size to be set for the file 5891 - * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 5885 + * @flags: VMA_NORESERVE_BIT suppresses pre-accounting of the entire object size 5892 5886 */ 5893 - struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) 5887 + struct file *shmem_file_setup(const char *name, loff_t size, vma_flags_t flags) 5894 5888 { 5895 5889 return __shmem_file_setup(shm_mnt, name, size, flags, 0); 5896 5890 } ··· 5901 5895 * @mnt: the tmpfs mount where the file will be created 5902 5896 * @name: name for dentry (to be seen in /proc/<pid>/maps) 5903 5897 * @size: size to be set for the file 5904 - * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 5898 + * @flags: VMA_NORESERVE_BIT suppresses pre-accounting of the entire object size 5905 5899 */ 5906 5900 struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name, 5907 - loff_t size, unsigned long flags) 5901 + loff_t size, vma_flags_t flags) 5908 5902 { 5909 5903 return __shmem_file_setup(mnt, name, size, flags, 0); 5910 5904 } 5911 5905 EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt); 5912 5906 5913 - static struct file *__shmem_zero_setup(unsigned long start, unsigned long end, vm_flags_t vm_flags) 5907 + static struct file *__shmem_zero_setup(unsigned long start, unsigned long end, 5908 + vma_flags_t flags) 5914 5909 { 5915 5910 loff_t size = end - start; 5916 5911 ··· 5921 5914 * accessible to the user through its mapping, use S_PRIVATE flag to 5922 5915 * bypass file security, in the same way as shmem_kernel_file_setup(). 5923 5916 */ 5924 - return shmem_kernel_file_setup("dev/zero", size, vm_flags); 5917 + return shmem_kernel_file_setup("dev/zero", size, flags); 5925 5918 } 5926 5919 5927 5920 /** ··· 5931 5924 */ 5932 5925 int shmem_zero_setup(struct vm_area_struct *vma) 5933 5926 { 5934 - struct file *file = __shmem_zero_setup(vma->vm_start, vma->vm_end, vma->vm_flags); 5927 + struct file *file = __shmem_zero_setup(vma->vm_start, vma->vm_end, vma->flags); 5935 5928 5936 5929 if (IS_ERR(file)) 5937 5930 return PTR_ERR(file); ··· 5952 5945 */ 5953 5946 int shmem_zero_setup_desc(struct vm_area_desc *desc) 5954 5947 { 5955 - struct file *file = __shmem_zero_setup(desc->start, desc->end, desc->vm_flags); 5948 + struct file *file = __shmem_zero_setup(desc->start, desc->end, desc->vma_flags); 5956 5949 5957 5950 if (IS_ERR(file)) 5958 5951 return PTR_ERR(file);
+1 -1
mm/util.c
··· 1154 1154 1155 1155 .pgoff = vma->vm_pgoff, 1156 1156 .vm_file = vma->vm_file, 1157 - .vm_flags = vma->vm_flags, 1157 + .vma_flags = vma->flags, 1158 1158 .page_prot = vma->vm_page_prot, 1159 1159 1160 1160 .action.type = MMAP_NOTHING, /* Default */
+37 -30
mm/vma.c
··· 15 15 unsigned long end; 16 16 pgoff_t pgoff; 17 17 unsigned long pglen; 18 - vm_flags_t vm_flags; 18 + union { 19 + vm_flags_t vm_flags; 20 + vma_flags_t vma_flags; 21 + }; 19 22 struct file *file; 20 23 pgprot_t page_prot; 21 24 ··· 475 472 * 476 473 * Called with the mm semaphore held. 477 474 */ 478 - void unmap_region(struct ma_state *mas, struct vm_area_struct *vma, 479 - struct vm_area_struct *prev, struct vm_area_struct *next) 475 + void unmap_region(struct unmap_desc *unmap) 480 476 { 481 - struct mm_struct *mm = vma->vm_mm; 477 + struct mm_struct *mm = unmap->first->vm_mm; 482 478 struct mmu_gather tlb; 483 479 484 480 tlb_gather_mmu(&tlb, mm); 485 481 update_hiwater_rss(mm); 486 - unmap_vmas(&tlb, mas, vma, vma->vm_start, vma->vm_end, vma->vm_end); 487 - mas_set(mas, vma->vm_end); 488 - free_pgtables(&tlb, mas, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, 489 - next ? next->vm_start : USER_PGTABLES_CEILING, 490 - /* mm_wr_locked = */ true); 482 + unmap_vmas(&tlb, unmap); 483 + mas_set(unmap->mas, unmap->tree_reset); 484 + free_pgtables(&tlb, unmap); 491 485 tlb_finish_mmu(&tlb); 492 486 } 493 487 ··· 1256 1256 static inline void vms_clear_ptes(struct vma_munmap_struct *vms, 1257 1257 struct ma_state *mas_detach, bool mm_wr_locked) 1258 1258 { 1259 - struct mmu_gather tlb; 1259 + struct unmap_desc unmap = { 1260 + .mas = mas_detach, 1261 + .first = vms->vma, 1262 + /* start and end may be different if there is no prev or next vma. */ 1263 + .pg_start = vms->unmap_start, 1264 + .pg_end = vms->unmap_end, 1265 + .vma_start = vms->start, 1266 + .vma_end = vms->end, 1267 + /* 1268 + * The tree limits and reset differ from the normal case since it's a 1269 + * side-tree 1270 + */ 1271 + .tree_reset = 1, 1272 + .tree_end = vms->vma_count, 1273 + /* 1274 + * We can free page tables without write-locking mmap_lock because VMAs 1275 + * were isolated before we downgraded mmap_lock. 1276 + */ 1277 + .mm_wr_locked = mm_wr_locked, 1278 + }; 1260 1279 1261 1280 if (!vms->clear_ptes) /* Nothing to do */ 1262 1281 return; 1263 1282 1264 - /* 1265 - * We can free page tables without write-locking mmap_lock because VMAs 1266 - * were isolated before we downgraded mmap_lock. 1267 - */ 1268 1283 mas_set(mas_detach, 1); 1269 - tlb_gather_mmu(&tlb, vms->vma->vm_mm); 1270 - update_hiwater_rss(vms->vma->vm_mm); 1271 - unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end, 1272 - vms->vma_count); 1273 - 1274 - mas_set(mas_detach, 1); 1275 - /* start and end may be different if there is no prev or next vma. */ 1276 - free_pgtables(&tlb, mas_detach, vms->vma, vms->unmap_start, 1277 - vms->unmap_end, mm_wr_locked); 1278 - tlb_finish_mmu(&tlb); 1284 + unmap_region(&unmap); 1279 1285 vms->clear_ptes = false; 1280 1286 } 1281 1287 ··· 2372 2366 2373 2367 desc->pgoff = map->pgoff; 2374 2368 desc->vm_file = map->file; 2375 - desc->vm_flags = map->vm_flags; 2369 + desc->vma_flags = map->vma_flags; 2376 2370 desc->page_prot = map->page_prot; 2377 2371 } 2378 2372 ··· 2467 2461 2468 2462 error = mmap_file(vma->vm_file, vma); 2469 2463 if (error) { 2464 + UNMAP_STATE(unmap, vmi, vma, vma->vm_start, vma->vm_end, 2465 + map->prev, map->next); 2470 2466 fput(vma->vm_file); 2471 2467 vma->vm_file = NULL; 2472 2468 2473 2469 vma_iter_set(vmi, vma->vm_end); 2474 2470 /* Undo any partial mapping done by a device driver. */ 2475 - unmap_region(&vmi->mas, vma, map->prev, map->next); 2476 - 2471 + unmap_region(&unmap); 2477 2472 return error; 2478 2473 } 2479 2474 ··· 2653 2646 map->file_doesnt_need_get = true; 2654 2647 map->file = desc->vm_file; 2655 2648 } 2656 - map->vm_flags = desc->vm_flags; 2649 + map->vma_flags = desc->vma_flags; 2657 2650 map->page_prot = desc->page_prot; 2658 2651 /* User-defined fields. */ 2659 2652 map->vm_ops = desc->vm_ops; ··· 2826 2819 return -EINVAL; 2827 2820 2828 2821 /* Map writable and ensure this isn't a sealed memfd. */ 2829 - if (file && is_shared_maywrite(vm_flags)) { 2822 + if (file && is_shared_maywrite_vm_flags(vm_flags)) { 2830 2823 int error = mapping_map_writable(file->f_mapping); 2831 2824 2832 2825 if (error) ··· 3056 3049 return -ENOMEM; 3057 3050 3058 3051 /* mlock limit tests */ 3059 - if (!mlock_future_ok(mm, vma->vm_flags, grow << PAGE_SHIFT)) 3052 + if (!mlock_future_ok(mm, vma->vm_flags & VM_LOCKED, grow << PAGE_SHIFT)) 3060 3053 return -ENOMEM; 3061 3054 3062 3055 /* Check to ensure the stack will not grow into a hugetlb-only region */
+68 -5
mm/vma.h
··· 155 155 156 156 }; 157 157 158 + struct unmap_desc { 159 + struct ma_state *mas; /* the maple state point to the first vma */ 160 + struct vm_area_struct *first; /* The first vma */ 161 + unsigned long pg_start; /* The first pagetable address to free (floor) */ 162 + unsigned long pg_end; /* The last pagetable address to free (ceiling) */ 163 + unsigned long vma_start; /* The min vma address */ 164 + unsigned long vma_end; /* The max vma address */ 165 + unsigned long tree_end; /* Maximum for the vma tree search */ 166 + unsigned long tree_reset; /* Where to reset the vma tree walk */ 167 + bool mm_wr_locked; /* If the mmap write lock is held */ 168 + }; 169 + 170 + /* 171 + * unmap_all_init() - Initialize unmap_desc to remove all vmas, point the 172 + * pg_start and pg_end to a safe location. 173 + */ 174 + static inline void unmap_all_init(struct unmap_desc *unmap, 175 + struct vma_iterator *vmi, struct vm_area_struct *vma) 176 + { 177 + unmap->mas = &vmi->mas; 178 + unmap->first = vma; 179 + unmap->pg_start = FIRST_USER_ADDRESS; 180 + unmap->pg_end = USER_PGTABLES_CEILING; 181 + unmap->vma_start = 0; 182 + unmap->vma_end = ULONG_MAX; 183 + unmap->tree_end = ULONG_MAX; 184 + unmap->tree_reset = vma->vm_end; 185 + unmap->mm_wr_locked = false; 186 + } 187 + 188 + /* 189 + * unmap_pgtable_init() - Initialize unmap_desc to remove all page tables within 190 + * the user range. 191 + * 192 + * ARM can have mappings outside of vmas. 193 + * See: e2cdef8c847b4 ("[PATCH] freepgt: free_pgtables from FIRST_USER_ADDRESS") 194 + * 195 + * ARM LPAE uses page table mappings beyond the USER_PGTABLES_CEILING 196 + * See: CONFIG_ARM_LPAE in arch/arm/include/asm/pgtable.h 197 + */ 198 + static inline void unmap_pgtable_init(struct unmap_desc *unmap, 199 + struct vma_iterator *vmi) 200 + { 201 + vma_iter_set(vmi, unmap->tree_reset); 202 + unmap->vma_start = FIRST_USER_ADDRESS; 203 + unmap->vma_end = USER_PGTABLES_CEILING; 204 + unmap->tree_end = USER_PGTABLES_CEILING; 205 + } 206 + 207 + #define UNMAP_STATE(name, _vmi, _vma, _vma_start, _vma_end, _prev, _next) \ 208 + struct unmap_desc name = { \ 209 + .mas = &(_vmi)->mas, \ 210 + .first = _vma, \ 211 + .pg_start = _prev ? ((struct vm_area_struct *)_prev)->vm_end : \ 212 + FIRST_USER_ADDRESS, \ 213 + .pg_end = _next ? ((struct vm_area_struct *)_next)->vm_start : \ 214 + USER_PGTABLES_CEILING, \ 215 + .vma_start = _vma_start, \ 216 + .vma_end = _vma_end, \ 217 + .tree_end = _next ? \ 218 + ((struct vm_area_struct *)_next)->vm_start : \ 219 + USER_PGTABLES_CEILING, \ 220 + .tree_reset = _vma->vm_end, \ 221 + .mm_wr_locked = true, \ 222 + } 223 + 158 224 static inline bool vmg_nomem(struct vma_merge_struct *vmg) 159 225 { 160 226 return vmg->state == VMA_MERGE_ERROR_NOMEM; ··· 309 243 vma->vm_pgoff = desc->pgoff; 310 244 if (desc->vm_file != vma->vm_file) 311 245 vma_set_file(vma, desc->vm_file); 312 - if (desc->vm_flags != vma->vm_flags) 313 - vm_flags_set(vma, desc->vm_flags); 246 + vma->flags = desc->vma_flags; 314 247 vma->vm_page_prot = desc->page_prot; 315 248 316 249 /* User-defined fields. */ ··· 327 262 bool unlock); 328 263 329 264 void remove_vma(struct vm_area_struct *vma); 330 - 331 - void unmap_region(struct ma_state *mas, struct vm_area_struct *vma, 332 - struct vm_area_struct *prev, struct vm_area_struct *next); 265 + void unmap_region(struct unmap_desc *unmap); 333 266 334 267 /** 335 268 * vma_modify_flags() - Perform any necessary split/merge in preparation for
+1
mm/vma_internal.h
··· 46 46 #include <linux/swap.h> 47 47 #include <linux/uprobes.h> 48 48 #include <linux/userfaultfd_k.h> 49 + #include <linux/pgtable.h> 49 50 50 51 #include <asm/current.h> 51 52 #include <asm/tlb.h>
+21 -12
mm/vmscan.c
··· 343 343 static bool can_demote(int nid, struct scan_control *sc, 344 344 struct mem_cgroup *memcg) 345 345 { 346 - int demotion_nid; 346 + struct pglist_data *pgdat = NODE_DATA(nid); 347 + nodemask_t allowed_mask; 347 348 348 - if (!numa_demotion_enabled) 349 + if (!pgdat || !numa_demotion_enabled) 349 350 return false; 350 351 if (sc && sc->no_demotion) 351 352 return false; 352 353 353 - demotion_nid = next_demotion_node(nid); 354 - if (demotion_nid == NUMA_NO_NODE) 354 + node_get_allowed_targets(pgdat, &allowed_mask); 355 + if (nodes_empty(allowed_mask)) 355 356 return false; 356 357 357 - /* If demotion node isn't in the cgroup's mems_allowed, fall back */ 358 - return mem_cgroup_node_allowed(memcg, demotion_nid); 358 + /* Filter out nodes that are not in cgroup's mems_allowed. */ 359 + mem_cgroup_node_filter_allowed(memcg, &allowed_mask); 360 + return !nodes_empty(allowed_mask); 359 361 } 360 362 361 363 static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg, ··· 1019 1017 * Folios which are not demoted are left on @demote_folios. 1020 1018 */ 1021 1019 static unsigned int demote_folio_list(struct list_head *demote_folios, 1022 - struct pglist_data *pgdat) 1020 + struct pglist_data *pgdat, 1021 + struct mem_cgroup *memcg) 1023 1022 { 1024 - int target_nid = next_demotion_node(pgdat->node_id); 1023 + int target_nid; 1025 1024 unsigned int nr_succeeded; 1026 1025 nodemask_t allowed_mask; 1027 1026 ··· 1034 1031 */ 1035 1032 .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | 1036 1033 __GFP_NOMEMALLOC | GFP_NOWAIT, 1037 - .nid = target_nid, 1038 1034 .nmask = &allowed_mask, 1039 1035 .reason = MR_DEMOTION, 1040 1036 }; ··· 1041 1039 if (list_empty(demote_folios)) 1042 1040 return 0; 1043 1041 1044 - if (target_nid == NUMA_NO_NODE) 1042 + node_get_allowed_targets(pgdat, &allowed_mask); 1043 + mem_cgroup_node_filter_allowed(memcg, &allowed_mask); 1044 + if (nodes_empty(allowed_mask)) 1045 1045 return 0; 1046 1046 1047 - node_get_allowed_targets(pgdat, &allowed_mask); 1047 + target_nid = next_demotion_node(pgdat->node_id, &allowed_mask); 1048 + if (target_nid == NUMA_NO_NODE) 1049 + /* No lower-tier nodes or nodes were hot-unplugged. */ 1050 + return 0; 1051 + 1052 + mtc.nid = target_nid; 1048 1053 1049 1054 /* Demotion ignores all cpuset and mempolicy settings */ 1050 1055 migrate_pages(demote_folios, alloc_demote_folio, NULL, ··· 1573 1564 /* 'folio_list' is always empty here */ 1574 1565 1575 1566 /* Migrate folios selected for demotion */ 1576 - nr_demoted = demote_folio_list(&demote_folios, pgdat); 1567 + nr_demoted = demote_folio_list(&demote_folios, pgdat, memcg); 1577 1568 nr_reclaimed += nr_demoted; 1578 1569 stat->nr_demoted += nr_demoted; 1579 1570 /* Folios that could not be demoted are still in @demote_folios */
+1 -1
security/keys/big_key.c
··· 103 103 0, enckey); 104 104 105 105 /* save aligned data to file */ 106 - file = shmem_kernel_file_setup("", enclen, 0); 106 + file = shmem_kernel_file_setup("", enclen, EMPTY_VMA_FLAGS); 107 107 if (IS_ERR(file)) { 108 108 ret = PTR_ERR(file); 109 109 goto err_enckey;
+22
tools/include/linux/bitmap.h
··· 24 24 void __bitmap_clear(unsigned long *map, unsigned int start, int len); 25 25 bool __bitmap_intersects(const unsigned long *bitmap1, 26 26 const unsigned long *bitmap2, unsigned int bits); 27 + bool __bitmap_subset(const unsigned long *bitmap1, 28 + const unsigned long *bitmap2, unsigned int nbits); 29 + bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, 30 + const unsigned long *bitmap2, unsigned int nbits); 27 31 28 32 #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) 29 33 #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) ··· 83 79 *dst = *src1 | *src2; 84 80 else 85 81 __bitmap_or(dst, src1, src2, nbits); 82 + } 83 + 84 + static __always_inline 85 + bool bitmap_andnot(unsigned long *dst, const unsigned long *src1, 86 + const unsigned long *src2, unsigned int nbits) 87 + { 88 + if (small_const_nbits(nbits)) 89 + return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; 90 + return __bitmap_andnot(dst, src1, src2, nbits); 86 91 } 87 92 88 93 static inline unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags __maybe_unused) ··· 168 155 return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; 169 156 else 170 157 return __bitmap_intersects(src1, src2, nbits); 158 + } 159 + 160 + static __always_inline 161 + bool bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) 162 + { 163 + if (small_const_nbits(nbits)) 164 + return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); 165 + else 166 + return __bitmap_subset(src1, src2, nbits); 171 167 } 172 168 173 169 static inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits)
+29
tools/lib/bitmap.c
··· 140 140 *p &= ~mask_to_clear; 141 141 } 142 142 } 143 + 144 + bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, 145 + const unsigned long *bitmap2, unsigned int bits) 146 + { 147 + unsigned int k; 148 + unsigned int lim = bits/BITS_PER_LONG; 149 + unsigned long result = 0; 150 + 151 + for (k = 0; k < lim; k++) 152 + result |= (dst[k] = bitmap1[k] & ~bitmap2[k]); 153 + if (bits % BITS_PER_LONG) 154 + result |= (dst[k] = bitmap1[k] & ~bitmap2[k] & 155 + BITMAP_LAST_WORD_MASK(bits)); 156 + return result != 0; 157 + } 158 + 159 + bool __bitmap_subset(const unsigned long *bitmap1, 160 + const unsigned long *bitmap2, unsigned int bits) 161 + { 162 + unsigned int k, lim = bits/BITS_PER_LONG; 163 + for (k = 0; k < lim; ++k) 164 + if (bitmap1[k] & ~bitmap2[k]) 165 + return false; 166 + 167 + if (bits % BITS_PER_LONG) 168 + if ((bitmap1[k] & ~bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) 169 + return false; 170 + return true; 171 + }
+1
tools/testing/selftests/mm/.gitignore
··· 12 12 map_populate 13 13 thuge-gen 14 14 compaction_test 15 + memory-failure 15 16 migration 16 17 mlock2-tests 17 18 mrelease_test
+2
tools/testing/selftests/mm/Makefile
··· 75 75 ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64 loongarch32 loongarch64)) 76 76 TEST_GEN_FILES += memfd_secret 77 77 endif 78 + TEST_GEN_FILES += memory-failure 78 79 TEST_GEN_FILES += migration 79 80 TEST_GEN_FILES += mkdirty 80 81 TEST_GEN_FILES += mlock-random-test ··· 155 154 TEST_PROGS += ksft_madv_guard.sh 156 155 TEST_PROGS += ksft_madv_populate.sh 157 156 TEST_PROGS += ksft_memfd_secret.sh 157 + TEST_PROGS += ksft_memory_failure.sh 158 158 TEST_PROGS += ksft_migration.sh 159 159 TEST_PROGS += ksft_mkdirty.sh 160 160 TEST_PROGS += ksft_mlock.sh
+2
tools/testing/selftests/mm/config
··· 11 11 CONFIG_FTRACE=y 12 12 CONFIG_PROFILING=y 13 13 CONFIG_UPROBES=y 14 + CONFIG_MEMORY_FAILURE=y 15 + CONFIG_HWPOISON_INJECT=m
+4
tools/testing/selftests/mm/ksft_memory_failure.sh
··· 1 + #!/bin/sh -e 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + ./run_vmtests.sh -t memory-failure
+359
tools/testing/selftests/mm/memory-failure.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Memory-failure functional tests. 4 + * 5 + * Author(s): Miaohe Lin <linmiaohe@huawei.com> 6 + */ 7 + 8 + #include "../kselftest_harness.h" 9 + 10 + #include <sys/mman.h> 11 + #include <linux/mman.h> 12 + #include <linux/string.h> 13 + #include <unistd.h> 14 + #include <signal.h> 15 + #include <setjmp.h> 16 + #include <unistd.h> 17 + #include <fcntl.h> 18 + #include <sys/vfs.h> 19 + #include <linux/magic.h> 20 + #include <errno.h> 21 + 22 + #include "vm_util.h" 23 + 24 + enum inject_type { 25 + MADV_HARD, 26 + MADV_SOFT, 27 + }; 28 + 29 + enum result_type { 30 + MADV_HARD_ANON, 31 + MADV_HARD_CLEAN_PAGECACHE, 32 + MADV_HARD_DIRTY_PAGECACHE, 33 + MADV_SOFT_ANON, 34 + MADV_SOFT_CLEAN_PAGECACHE, 35 + MADV_SOFT_DIRTY_PAGECACHE, 36 + }; 37 + 38 + static jmp_buf signal_jmp_buf; 39 + static siginfo_t siginfo; 40 + const char *pagemap_proc = "/proc/self/pagemap"; 41 + const char *kpageflags_proc = "/proc/kpageflags"; 42 + 43 + FIXTURE(memory_failure) 44 + { 45 + unsigned long page_size; 46 + unsigned long corrupted_size; 47 + unsigned long pfn; 48 + int pagemap_fd; 49 + int kpageflags_fd; 50 + bool triggered; 51 + }; 52 + 53 + FIXTURE_VARIANT(memory_failure) 54 + { 55 + enum inject_type type; 56 + int (*inject)(FIXTURE_DATA(memory_failure) * self, void *vaddr); 57 + }; 58 + 59 + static int madv_hard_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr) 60 + { 61 + return madvise(vaddr, self->page_size, MADV_HWPOISON); 62 + } 63 + 64 + FIXTURE_VARIANT_ADD(memory_failure, madv_hard) 65 + { 66 + .type = MADV_HARD, 67 + .inject = madv_hard_inject, 68 + }; 69 + 70 + static int madv_soft_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr) 71 + { 72 + return madvise(vaddr, self->page_size, MADV_SOFT_OFFLINE); 73 + } 74 + 75 + FIXTURE_VARIANT_ADD(memory_failure, madv_soft) 76 + { 77 + .type = MADV_SOFT, 78 + .inject = madv_soft_inject, 79 + }; 80 + 81 + static void sigbus_action(int signo, siginfo_t *si, void *args) 82 + { 83 + memcpy(&siginfo, si, sizeof(siginfo_t)); 84 + siglongjmp(signal_jmp_buf, 1); 85 + } 86 + 87 + static int setup_sighandler(void) 88 + { 89 + struct sigaction sa = { 90 + .sa_sigaction = sigbus_action, 91 + .sa_flags = SA_SIGINFO, 92 + }; 93 + 94 + return sigaction(SIGBUS, &sa, NULL); 95 + } 96 + 97 + FIXTURE_SETUP(memory_failure) 98 + { 99 + memset(self, 0, sizeof(*self)); 100 + 101 + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); 102 + 103 + memset(&siginfo, 0, sizeof(siginfo)); 104 + if (setup_sighandler()) 105 + SKIP(return, "setup sighandler failed.\n"); 106 + 107 + self->pagemap_fd = open(pagemap_proc, O_RDONLY); 108 + if (self->pagemap_fd == -1) 109 + SKIP(return, "open %s failed.\n", pagemap_proc); 110 + 111 + self->kpageflags_fd = open(kpageflags_proc, O_RDONLY); 112 + if (self->kpageflags_fd == -1) 113 + SKIP(return, "open %s failed.\n", kpageflags_proc); 114 + } 115 + 116 + static void teardown_sighandler(void) 117 + { 118 + struct sigaction sa = { 119 + .sa_handler = SIG_DFL, 120 + .sa_flags = SA_SIGINFO, 121 + }; 122 + 123 + sigaction(SIGBUS, &sa, NULL); 124 + } 125 + 126 + FIXTURE_TEARDOWN(memory_failure) 127 + { 128 + close(self->kpageflags_fd); 129 + close(self->pagemap_fd); 130 + teardown_sighandler(); 131 + } 132 + 133 + static void prepare(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self, 134 + void *vaddr) 135 + { 136 + self->pfn = pagemap_get_pfn(self->pagemap_fd, vaddr); 137 + ASSERT_NE(self->pfn, -1UL); 138 + 139 + ASSERT_EQ(get_hardware_corrupted_size(&self->corrupted_size), 0); 140 + } 141 + 142 + static bool check_memory(void *vaddr, unsigned long size) 143 + { 144 + char buf[64]; 145 + 146 + memset(buf, 0xce, sizeof(buf)); 147 + while (size >= sizeof(buf)) { 148 + if (memcmp(vaddr, buf, sizeof(buf))) 149 + return false; 150 + size -= sizeof(buf); 151 + vaddr += sizeof(buf); 152 + } 153 + 154 + return true; 155 + } 156 + 157 + static void check(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self, 158 + void *vaddr, enum result_type type, int setjmp) 159 + { 160 + unsigned long size; 161 + uint64_t pfn_flags; 162 + 163 + switch (type) { 164 + case MADV_SOFT_ANON: 165 + case MADV_HARD_CLEAN_PAGECACHE: 166 + case MADV_SOFT_CLEAN_PAGECACHE: 167 + case MADV_SOFT_DIRTY_PAGECACHE: 168 + /* It is not expected to receive a SIGBUS signal. */ 169 + ASSERT_EQ(setjmp, 0); 170 + 171 + /* The page content should remain unchanged. */ 172 + ASSERT_TRUE(check_memory(vaddr, self->page_size)); 173 + 174 + /* The backing pfn of addr should have changed. */ 175 + ASSERT_NE(pagemap_get_pfn(self->pagemap_fd, vaddr), self->pfn); 176 + break; 177 + case MADV_HARD_ANON: 178 + case MADV_HARD_DIRTY_PAGECACHE: 179 + /* The SIGBUS signal should have been received. */ 180 + ASSERT_EQ(setjmp, 1); 181 + 182 + /* Check if siginfo contains correct SIGBUS context. */ 183 + ASSERT_EQ(siginfo.si_signo, SIGBUS); 184 + ASSERT_EQ(siginfo.si_code, BUS_MCEERR_AR); 185 + ASSERT_EQ(1UL << siginfo.si_addr_lsb, self->page_size); 186 + ASSERT_EQ(siginfo.si_addr, vaddr); 187 + 188 + /* XXX Check backing pte is hwpoison entry when supported. */ 189 + ASSERT_TRUE(pagemap_is_swapped(self->pagemap_fd, vaddr)); 190 + break; 191 + default: 192 + SKIP(return, "unexpected inject type %d.\n", type); 193 + } 194 + 195 + /* Check if the value of HardwareCorrupted has increased. */ 196 + ASSERT_EQ(get_hardware_corrupted_size(&size), 0); 197 + ASSERT_EQ(size, self->corrupted_size + self->page_size / 1024); 198 + 199 + /* Check if HWPoison flag is set. */ 200 + ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0); 201 + ASSERT_EQ(pfn_flags & KPF_HWPOISON, KPF_HWPOISON); 202 + } 203 + 204 + static void cleanup(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self, 205 + void *vaddr) 206 + { 207 + unsigned long size; 208 + uint64_t pfn_flags; 209 + 210 + ASSERT_EQ(unpoison_memory(self->pfn), 0); 211 + 212 + /* Check if HWPoison flag is cleared. */ 213 + ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0); 214 + ASSERT_NE(pfn_flags & KPF_HWPOISON, KPF_HWPOISON); 215 + 216 + /* Check if the value of HardwareCorrupted has decreased. */ 217 + ASSERT_EQ(get_hardware_corrupted_size(&size), 0); 218 + ASSERT_EQ(size, self->corrupted_size); 219 + } 220 + 221 + TEST_F(memory_failure, anon) 222 + { 223 + char *addr; 224 + int ret; 225 + 226 + addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE, 227 + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 228 + if (addr == MAP_FAILED) 229 + SKIP(return, "mmap failed, not enough memory.\n"); 230 + memset(addr, 0xce, self->page_size); 231 + 232 + prepare(_metadata, self, addr); 233 + 234 + ret = sigsetjmp(signal_jmp_buf, 1); 235 + if (!self->triggered) { 236 + self->triggered = true; 237 + ASSERT_EQ(variant->inject(self, addr), 0); 238 + FORCE_READ(*addr); 239 + } 240 + 241 + if (variant->type == MADV_HARD) 242 + check(_metadata, self, addr, MADV_HARD_ANON, ret); 243 + else 244 + check(_metadata, self, addr, MADV_SOFT_ANON, ret); 245 + 246 + cleanup(_metadata, self, addr); 247 + 248 + ASSERT_EQ(munmap(addr, self->page_size), 0); 249 + } 250 + 251 + static int prepare_file(const char *fname, unsigned long size) 252 + { 253 + int fd; 254 + 255 + fd = open(fname, O_RDWR | O_CREAT, 0664); 256 + if (fd >= 0) { 257 + unlink(fname); 258 + ftruncate(fd, size); 259 + } 260 + return fd; 261 + } 262 + 263 + /* Borrowed from mm/gup_longterm.c. */ 264 + static int get_fs_type(int fd) 265 + { 266 + struct statfs fs; 267 + int ret; 268 + 269 + do { 270 + ret = fstatfs(fd, &fs); 271 + } while (ret && errno == EINTR); 272 + 273 + return ret ? 0 : (int)fs.f_type; 274 + } 275 + 276 + TEST_F(memory_failure, clean_pagecache) 277 + { 278 + int fd; 279 + char *addr; 280 + int ret; 281 + int fs_type; 282 + 283 + fd = prepare_file("./clean-page-cache-test-file", self->page_size); 284 + if (fd < 0) 285 + SKIP(return, "failed to open test file.\n"); 286 + fs_type = get_fs_type(fd); 287 + if (!fs_type || fs_type == TMPFS_MAGIC) 288 + SKIP(return, "unsupported filesystem :%x\n", fs_type); 289 + 290 + addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE, 291 + MAP_SHARED, fd, 0); 292 + if (addr == MAP_FAILED) 293 + SKIP(return, "mmap failed, not enough memory.\n"); 294 + memset(addr, 0xce, self->page_size); 295 + fsync(fd); 296 + 297 + prepare(_metadata, self, addr); 298 + 299 + ret = sigsetjmp(signal_jmp_buf, 1); 300 + if (!self->triggered) { 301 + self->triggered = true; 302 + ASSERT_EQ(variant->inject(self, addr), 0); 303 + FORCE_READ(*addr); 304 + } 305 + 306 + if (variant->type == MADV_HARD) 307 + check(_metadata, self, addr, MADV_HARD_CLEAN_PAGECACHE, ret); 308 + else 309 + check(_metadata, self, addr, MADV_SOFT_CLEAN_PAGECACHE, ret); 310 + 311 + cleanup(_metadata, self, addr); 312 + 313 + ASSERT_EQ(munmap(addr, self->page_size), 0); 314 + 315 + ASSERT_EQ(close(fd), 0); 316 + } 317 + 318 + TEST_F(memory_failure, dirty_pagecache) 319 + { 320 + int fd; 321 + char *addr; 322 + int ret; 323 + int fs_type; 324 + 325 + fd = prepare_file("./dirty-page-cache-test-file", self->page_size); 326 + if (fd < 0) 327 + SKIP(return, "failed to open test file.\n"); 328 + fs_type = get_fs_type(fd); 329 + if (!fs_type || fs_type == TMPFS_MAGIC) 330 + SKIP(return, "unsupported filesystem :%x\n", fs_type); 331 + 332 + addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE, 333 + MAP_SHARED, fd, 0); 334 + if (addr == MAP_FAILED) 335 + SKIP(return, "mmap failed, not enough memory.\n"); 336 + memset(addr, 0xce, self->page_size); 337 + 338 + prepare(_metadata, self, addr); 339 + 340 + ret = sigsetjmp(signal_jmp_buf, 1); 341 + if (!self->triggered) { 342 + self->triggered = true; 343 + ASSERT_EQ(variant->inject(self, addr), 0); 344 + FORCE_READ(*addr); 345 + } 346 + 347 + if (variant->type == MADV_HARD) 348 + check(_metadata, self, addr, MADV_HARD_DIRTY_PAGECACHE, ret); 349 + else 350 + check(_metadata, self, addr, MADV_SOFT_DIRTY_PAGECACHE, ret); 351 + 352 + cleanup(_metadata, self, addr); 353 + 354 + ASSERT_EQ(munmap(addr, self->page_size), 0); 355 + 356 + ASSERT_EQ(close(fd), 0); 357 + } 358 + 359 + TEST_HARNESS_MAIN
+21
tools/testing/selftests/mm/run_vmtests.sh
··· 91 91 test VMA merge cases behave as expected 92 92 - rmap 93 93 test rmap behaves as expected 94 + - memory-failure 95 + test memory-failure behaves as expected 94 96 95 97 example: ./run_vmtests.sh -t "hmm mmap ksm" 96 98 EOF ··· 528 526 CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned 529 527 530 528 CATEGORY="rmap" run_test ./rmap 529 + 530 + # Try to load hwpoison_inject if not present. 531 + HWPOISON_DIR=/sys/kernel/debug/hwpoison/ 532 + if [ ! -d "$HWPOISON_DIR" ]; then 533 + if ! modprobe -q -R hwpoison_inject; then 534 + echo "Module hwpoison_inject not found, skipping..." 535 + else 536 + modprobe hwpoison_inject > /dev/null 2>&1 537 + LOADED_MOD=1 538 + fi 539 + fi 540 + 541 + if [ -d "$HWPOISON_DIR" ]; then 542 + CATEGORY="memory-failure" run_test ./memory-failure 543 + fi 544 + 545 + if [ -n "${LOADED_MOD}" ]; then 546 + modprobe -r hwpoison_inject > /dev/null 2>&1 547 + fi 531 548 532 549 if [ "${HAVE_HUGEPAGES}" = 1 ]; then 533 550 echo "$orig_nr_hugepgs" > /proc/sys/vm/nr_hugepages
+41
tools/testing/selftests/mm/vm_util.c
··· 723 723 close(ksm_fd); 724 724 return ret == 1 ? 0 : -errno; 725 725 } 726 + 727 + int get_hardware_corrupted_size(unsigned long *val) 728 + { 729 + unsigned long size; 730 + char *line = NULL; 731 + size_t linelen = 0; 732 + FILE *f = fopen("/proc/meminfo", "r"); 733 + int ret = -1; 734 + 735 + if (!f) 736 + return ret; 737 + 738 + while (getline(&line, &linelen, f) > 0) { 739 + if (sscanf(line, "HardwareCorrupted: %12lu kB", &size) == 1) { 740 + *val = size; 741 + ret = 0; 742 + break; 743 + } 744 + } 745 + 746 + free(line); 747 + fclose(f); 748 + return ret; 749 + } 750 + 751 + int unpoison_memory(unsigned long pfn) 752 + { 753 + int unpoison_fd, len; 754 + char buf[32]; 755 + ssize_t ret; 756 + 757 + unpoison_fd = open("/sys/kernel/debug/hwpoison/unpoison-pfn", O_WRONLY); 758 + if (unpoison_fd < 0) 759 + return -errno; 760 + 761 + len = sprintf(buf, "0x%lx\n", pfn); 762 + ret = write(unpoison_fd, buf, len); 763 + close(unpoison_fd); 764 + 765 + return ret > 0 ? 0 : -errno; 766 + }
+3
tools/testing/selftests/mm/vm_util.h
··· 20 20 21 21 #define KPF_COMPOUND_HEAD BIT_ULL(15) 22 22 #define KPF_COMPOUND_TAIL BIT_ULL(16) 23 + #define KPF_HWPOISON BIT_ULL(19) 23 24 #define KPF_THP BIT_ULL(22) 24 25 /* 25 26 * Ignore the checkpatch warning, we must read from x but don't want to do ··· 155 154 int ksm_use_zero_pages(void); 156 155 int ksm_start(void); 157 156 int ksm_stop(void); 157 + int get_hardware_corrupted_size(unsigned long *val); 158 + int unpoison_memory(unsigned long pfn); 158 159 159 160 /* 160 161 * On ppc64 this will only work with radix 2M hugepage size
+5 -2
tools/testing/vma/Makefile
··· 6 6 7 7 include ../shared/shared.mk 8 8 9 - OFILES = $(SHARED_OFILES) vma.o maple-shim.o 9 + OFILES = $(SHARED_OFILES) main.o shared.o maple-shim.o 10 10 TARGETS = vma 11 11 12 - vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h 12 + # These can be varied to test different sizes. 13 + CFLAGS += -DNUM_VMA_FLAG_BITS=128 -DNUM_MM_FLAG_BITS=128 14 + 15 + main.o: main.c shared.c shared.h vma_internal.h tests/merge.c tests/mmap.c tests/vma.c ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h include/custom.h include/dup.h include/stubs.h 13 16 14 17 vma: $(OFILES) 15 18 $(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS)
+119
tools/testing/vma/include/custom.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ */ 2 + 3 + #pragma once 4 + 5 + /* 6 + * Contains declarations that exist in the kernel which have been CUSTOMISED for 7 + * testing purposes to faciliate userland VMA testing. 8 + */ 9 + 10 + #ifdef CONFIG_MMU 11 + extern unsigned long mmap_min_addr; 12 + extern unsigned long dac_mmap_min_addr; 13 + #else 14 + #define mmap_min_addr 0UL 15 + #define dac_mmap_min_addr 0UL 16 + #endif 17 + 18 + #define VM_WARN_ON(_expr) (WARN_ON(_expr)) 19 + #define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr)) 20 + #define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr)) 21 + #define VM_BUG_ON(_expr) (BUG_ON(_expr)) 22 + #define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr)) 23 + 24 + /* We hardcode this for now. */ 25 + #define sysctl_max_map_count 0x1000000UL 26 + 27 + #define TASK_SIZE ((1ul << 47)-PAGE_SIZE) 28 + 29 + /* 30 + * The shared stubs do not implement this, it amounts to an fprintf(STDERR,...) 31 + * either way :) 32 + */ 33 + #define pr_warn_once pr_err 34 + 35 + #define pgtable_supports_soft_dirty() 1 36 + 37 + struct anon_vma { 38 + struct anon_vma *root; 39 + struct rb_root_cached rb_root; 40 + 41 + /* Test fields. */ 42 + bool was_cloned; 43 + bool was_unlinked; 44 + }; 45 + 46 + static inline void unlink_anon_vmas(struct vm_area_struct *vma) 47 + { 48 + /* For testing purposes, indicate that the anon_vma was unlinked. */ 49 + vma->anon_vma->was_unlinked = true; 50 + } 51 + 52 + static inline void vma_start_write(struct vm_area_struct *vma) 53 + { 54 + /* Used to indicate to tests that a write operation has begun. */ 55 + vma->vm_lock_seq++; 56 + } 57 + 58 + static inline __must_check 59 + int vma_start_write_killable(struct vm_area_struct *vma) 60 + { 61 + /* Used to indicate to tests that a write operation has begun. */ 62 + vma->vm_lock_seq++; 63 + return 0; 64 + } 65 + 66 + static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src, 67 + enum vma_operation operation) 68 + { 69 + /* For testing purposes. We indicate that an anon_vma has been cloned. */ 70 + if (src->anon_vma != NULL) { 71 + dst->anon_vma = src->anon_vma; 72 + dst->anon_vma->was_cloned = true; 73 + } 74 + 75 + return 0; 76 + } 77 + 78 + static inline int __anon_vma_prepare(struct vm_area_struct *vma) 79 + { 80 + struct anon_vma *anon_vma = calloc(1, sizeof(struct anon_vma)); 81 + 82 + if (!anon_vma) 83 + return -ENOMEM; 84 + 85 + anon_vma->root = anon_vma; 86 + vma->anon_vma = anon_vma; 87 + 88 + return 0; 89 + } 90 + 91 + static inline int anon_vma_prepare(struct vm_area_struct *vma) 92 + { 93 + if (likely(vma->anon_vma)) 94 + return 0; 95 + 96 + return __anon_vma_prepare(vma); 97 + } 98 + 99 + static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) 100 + { 101 + if (reset_refcnt) 102 + refcount_set(&vma->vm_refcnt, 0); 103 + } 104 + 105 + static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits) 106 + { 107 + vma_flags_t flags; 108 + int i; 109 + 110 + /* 111 + * For testing purposes: allow invalid bit specification so we can 112 + * easily test. 113 + */ 114 + vma_flags_clear_all(&flags); 115 + for (i = 0; i < count; i++) 116 + if (bits[i] < NUM_VMA_FLAG_BITS) 117 + vma_flag_set(&flags, bits[i]); 118 + return flags; 119 + }
+1320
tools/testing/vma/include/dup.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ */ 2 + 3 + #pragma once 4 + 5 + /* Forward declarations to avoid header cycle. */ 6 + struct vm_area_struct; 7 + static inline void vma_start_write(struct vm_area_struct *vma); 8 + 9 + extern const struct vm_operations_struct vma_dummy_vm_ops; 10 + extern unsigned long stack_guard_gap; 11 + extern const struct vm_operations_struct vma_dummy_vm_ops; 12 + extern unsigned long rlimit(unsigned int limit); 13 + struct task_struct *get_current(void); 14 + 15 + #define MMF_HAS_MDWE 28 16 + #define current get_current() 17 + 18 + /* 19 + * Define the task command name length as enum, then it can be visible to 20 + * BPF programs. 21 + */ 22 + enum { 23 + TASK_COMM_LEN = 16, 24 + }; 25 + 26 + /* PARTIALLY implemented types. */ 27 + struct mm_struct { 28 + struct maple_tree mm_mt; 29 + int map_count; /* number of VMAs */ 30 + unsigned long total_vm; /* Total pages mapped */ 31 + unsigned long locked_vm; /* Pages that have PG_mlocked set */ 32 + unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ 33 + unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ 34 + unsigned long stack_vm; /* VM_STACK */ 35 + 36 + unsigned long def_flags; 37 + 38 + mm_flags_t flags; /* Must use mm_flags_* helpers to access */ 39 + }; 40 + struct address_space { 41 + struct rb_root_cached i_mmap; 42 + unsigned long flags; 43 + atomic_t i_mmap_writable; 44 + }; 45 + struct file_operations { 46 + int (*mmap)(struct file *, struct vm_area_struct *); 47 + int (*mmap_prepare)(struct vm_area_desc *); 48 + }; 49 + struct file { 50 + struct address_space *f_mapping; 51 + const struct file_operations *f_op; 52 + }; 53 + struct anon_vma_chain { 54 + struct anon_vma *anon_vma; 55 + struct list_head same_vma; 56 + }; 57 + struct task_struct { 58 + char comm[TASK_COMM_LEN]; 59 + pid_t pid; 60 + struct mm_struct *mm; 61 + 62 + /* Used for emulating ABI behavior of previous Linux versions: */ 63 + unsigned int personality; 64 + }; 65 + 66 + struct kref { 67 + refcount_t refcount; 68 + }; 69 + 70 + struct anon_vma_name { 71 + struct kref kref; 72 + /* The name needs to be at the end because it is dynamically sized. */ 73 + char name[]; 74 + }; 75 + 76 + /* 77 + * Contains declarations that are DUPLICATED from kernel source in order to 78 + * faciliate userland VMA testing. 79 + * 80 + * These must be kept in sync with kernel source. 81 + */ 82 + 83 + #define VMA_LOCK_OFFSET 0x40000000 84 + 85 + typedef struct { unsigned long v; } freeptr_t; 86 + 87 + #define VM_NONE 0x00000000 88 + 89 + typedef int __bitwise vma_flag_t; 90 + 91 + #define ACCESS_PRIVATE(p, member) ((p)->member) 92 + 93 + #define DECLARE_VMA_BIT(name, bitnum) \ 94 + VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum) 95 + #define DECLARE_VMA_BIT_ALIAS(name, aliased) \ 96 + VMA_ ## name ## _BIT = VMA_ ## aliased ## _BIT 97 + enum { 98 + DECLARE_VMA_BIT(READ, 0), 99 + DECLARE_VMA_BIT(WRITE, 1), 100 + DECLARE_VMA_BIT(EXEC, 2), 101 + DECLARE_VMA_BIT(SHARED, 3), 102 + /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ 103 + DECLARE_VMA_BIT(MAYREAD, 4), /* limits for mprotect() etc. */ 104 + DECLARE_VMA_BIT(MAYWRITE, 5), 105 + DECLARE_VMA_BIT(MAYEXEC, 6), 106 + DECLARE_VMA_BIT(MAYSHARE, 7), 107 + DECLARE_VMA_BIT(GROWSDOWN, 8), /* general info on the segment */ 108 + #ifdef CONFIG_MMU 109 + DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */ 110 + #else 111 + /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */ 112 + DECLARE_VMA_BIT(MAYOVERLAY, 9), 113 + #endif /* CONFIG_MMU */ 114 + /* Page-ranges managed without "struct page", just pure PFN */ 115 + DECLARE_VMA_BIT(PFNMAP, 10), 116 + DECLARE_VMA_BIT(MAYBE_GUARD, 11), 117 + DECLARE_VMA_BIT(UFFD_WP, 12), /* wrprotect pages tracking */ 118 + DECLARE_VMA_BIT(LOCKED, 13), 119 + DECLARE_VMA_BIT(IO, 14), /* Memory mapped I/O or similar */ 120 + DECLARE_VMA_BIT(SEQ_READ, 15), /* App will access data sequentially */ 121 + DECLARE_VMA_BIT(RAND_READ, 16), /* App will not benefit from clustered reads */ 122 + DECLARE_VMA_BIT(DONTCOPY, 17), /* Do not copy this vma on fork */ 123 + DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */ 124 + DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */ 125 + DECLARE_VMA_BIT(ACCOUNT, 20), /* Is a VM accounted object */ 126 + DECLARE_VMA_BIT(NORESERVE, 21), /* should the VM suppress accounting */ 127 + DECLARE_VMA_BIT(HUGETLB, 22), /* Huge TLB Page VM */ 128 + DECLARE_VMA_BIT(SYNC, 23), /* Synchronous page faults */ 129 + DECLARE_VMA_BIT(ARCH_1, 24), /* Architecture-specific flag */ 130 + DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */ 131 + DECLARE_VMA_BIT(DONTDUMP, 26), /* Do not include in the core dump */ 132 + DECLARE_VMA_BIT(SOFTDIRTY, 27), /* NOT soft dirty clean area */ 133 + DECLARE_VMA_BIT(MIXEDMAP, 28), /* Can contain struct page and pure PFN pages */ 134 + DECLARE_VMA_BIT(HUGEPAGE, 29), /* MADV_HUGEPAGE marked this vma */ 135 + DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */ 136 + DECLARE_VMA_BIT(MERGEABLE, 31), /* KSM may merge identical pages */ 137 + /* These bits are reused, we define specific uses below. */ 138 + DECLARE_VMA_BIT(HIGH_ARCH_0, 32), 139 + DECLARE_VMA_BIT(HIGH_ARCH_1, 33), 140 + DECLARE_VMA_BIT(HIGH_ARCH_2, 34), 141 + DECLARE_VMA_BIT(HIGH_ARCH_3, 35), 142 + DECLARE_VMA_BIT(HIGH_ARCH_4, 36), 143 + DECLARE_VMA_BIT(HIGH_ARCH_5, 37), 144 + DECLARE_VMA_BIT(HIGH_ARCH_6, 38), 145 + /* 146 + * This flag is used to connect VFIO to arch specific KVM code. It 147 + * indicates that the memory under this VMA is safe for use with any 148 + * non-cachable memory type inside KVM. Some VFIO devices, on some 149 + * platforms, are thought to be unsafe and can cause machine crashes 150 + * if KVM does not lock down the memory type. 151 + */ 152 + DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39), 153 + #ifdef CONFIG_PPC32 154 + DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1), 155 + #else 156 + DECLARE_VMA_BIT(DROPPABLE, 40), 157 + #endif 158 + DECLARE_VMA_BIT(UFFD_MINOR, 41), 159 + DECLARE_VMA_BIT(SEALED, 42), 160 + /* Flags that reuse flags above. */ 161 + DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0), 162 + DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1), 163 + DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2), 164 + DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3), 165 + DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4), 166 + #if defined(CONFIG_X86_USER_SHADOW_STACK) 167 + /* 168 + * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of 169 + * support core mm. 170 + * 171 + * These VMAs will get a single end guard page. This helps userspace 172 + * protect itself from attacks. A single page is enough for current 173 + * shadow stack archs (x86). See the comments near alloc_shstk() in 174 + * arch/x86/kernel/shstk.c for more details on the guard size. 175 + */ 176 + DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5), 177 + #elif defined(CONFIG_ARM64_GCS) 178 + /* 179 + * arm64's Guarded Control Stack implements similar functionality and 180 + * has similar constraints to shadow stacks. 181 + */ 182 + DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6), 183 + #endif 184 + DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), /* Strong Access Ordering (powerpc) */ 185 + DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), /* parisc */ 186 + DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), /* sparc64 */ 187 + DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), /* arm64 */ 188 + DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), /* sparc64, arm64 */ 189 + DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), /* !CONFIG_MMU */ 190 + DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), /* arm64 */ 191 + DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */ 192 + #ifdef CONFIG_STACK_GROWSUP 193 + DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP), 194 + DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN), 195 + #else 196 + DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN), 197 + #endif 198 + }; 199 + 200 + #define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT) 201 + #define VM_READ INIT_VM_FLAG(READ) 202 + #define VM_WRITE INIT_VM_FLAG(WRITE) 203 + #define VM_EXEC INIT_VM_FLAG(EXEC) 204 + #define VM_SHARED INIT_VM_FLAG(SHARED) 205 + #define VM_MAYREAD INIT_VM_FLAG(MAYREAD) 206 + #define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE) 207 + #define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC) 208 + #define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE) 209 + #define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN) 210 + #ifdef CONFIG_MMU 211 + #define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING) 212 + #else 213 + #define VM_UFFD_MISSING VM_NONE 214 + #define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY) 215 + #endif 216 + #define VM_PFNMAP INIT_VM_FLAG(PFNMAP) 217 + #define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD) 218 + #define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP) 219 + #define VM_LOCKED INIT_VM_FLAG(LOCKED) 220 + #define VM_IO INIT_VM_FLAG(IO) 221 + #define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ) 222 + #define VM_RAND_READ INIT_VM_FLAG(RAND_READ) 223 + #define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY) 224 + #define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND) 225 + #define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT) 226 + #define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT) 227 + #define VM_NORESERVE INIT_VM_FLAG(NORESERVE) 228 + #define VM_HUGETLB INIT_VM_FLAG(HUGETLB) 229 + #define VM_SYNC INIT_VM_FLAG(SYNC) 230 + #define VM_ARCH_1 INIT_VM_FLAG(ARCH_1) 231 + #define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK) 232 + #define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP) 233 + #ifdef CONFIG_MEM_SOFT_DIRTY 234 + #define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY) 235 + #else 236 + #define VM_SOFTDIRTY VM_NONE 237 + #endif 238 + #define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP) 239 + #define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE) 240 + #define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE) 241 + #define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE) 242 + #define VM_STACK INIT_VM_FLAG(STACK) 243 + #ifdef CONFIG_STACK_GROWS_UP 244 + #define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY) 245 + #else 246 + #define VM_STACK_EARLY VM_NONE 247 + #endif 248 + #ifdef CONFIG_ARCH_HAS_PKEYS 249 + #define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT) 250 + /* Despite the naming, these are FLAGS not bits. */ 251 + #define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0) 252 + #define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1) 253 + #define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2) 254 + #if CONFIG_ARCH_PKEY_BITS > 3 255 + #define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3) 256 + #else 257 + #define VM_PKEY_BIT3 VM_NONE 258 + #endif /* CONFIG_ARCH_PKEY_BITS > 3 */ 259 + #if CONFIG_ARCH_PKEY_BITS > 4 260 + #define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4) 261 + #else 262 + #define VM_PKEY_BIT4 VM_NONE 263 + #endif /* CONFIG_ARCH_PKEY_BITS > 4 */ 264 + #endif /* CONFIG_ARCH_HAS_PKEYS */ 265 + #if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS) 266 + #define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK) 267 + #else 268 + #define VM_SHADOW_STACK VM_NONE 269 + #endif 270 + #if defined(CONFIG_PPC64) 271 + #define VM_SAO INIT_VM_FLAG(SAO) 272 + #elif defined(CONFIG_PARISC) 273 + #define VM_GROWSUP INIT_VM_FLAG(GROWSUP) 274 + #elif defined(CONFIG_SPARC64) 275 + #define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI) 276 + #define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR) 277 + #elif defined(CONFIG_ARM64) 278 + #define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI) 279 + #define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR) 280 + #elif !defined(CONFIG_MMU) 281 + #define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY) 282 + #endif 283 + #ifndef VM_GROWSUP 284 + #define VM_GROWSUP VM_NONE 285 + #endif 286 + #ifdef CONFIG_ARM64_MTE 287 + #define VM_MTE INIT_VM_FLAG(MTE) 288 + #define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED) 289 + #else 290 + #define VM_MTE VM_NONE 291 + #define VM_MTE_ALLOWED VM_NONE 292 + #endif 293 + #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR 294 + #define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR) 295 + #else 296 + #define VM_UFFD_MINOR VM_NONE 297 + #endif 298 + #ifdef CONFIG_64BIT 299 + #define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED) 300 + #define VM_SEALED INIT_VM_FLAG(SEALED) 301 + #else 302 + #define VM_ALLOW_ANY_UNCACHED VM_NONE 303 + #define VM_SEALED VM_NONE 304 + #endif 305 + #if defined(CONFIG_64BIT) || defined(CONFIG_PPC32) 306 + #define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE) 307 + #else 308 + #define VM_DROPPABLE VM_NONE 309 + #endif 310 + 311 + /* Bits set in the VMA until the stack is in its final location */ 312 + #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) 313 + 314 + #define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) 315 + 316 + /* Common data flag combinations */ 317 + #define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ 318 + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 319 + #define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \ 320 + VM_MAYWRITE | VM_MAYEXEC) 321 + #define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \ 322 + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 323 + 324 + #ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */ 325 + #define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC 326 + #endif 327 + 328 + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ 329 + #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS 330 + #endif 331 + 332 + #define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK) 333 + 334 + #define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) 335 + 336 + /* VMA basic access permission flags */ 337 + #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) 338 + 339 + /* 340 + * Special vmas that are non-mergable, non-mlock()able. 341 + */ 342 + #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) 343 + 344 + #define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) 345 + #define TASK_SIZE_LOW DEFAULT_MAP_WINDOW 346 + #define TASK_SIZE_MAX DEFAULT_MAP_WINDOW 347 + #define STACK_TOP TASK_SIZE_LOW 348 + #define STACK_TOP_MAX TASK_SIZE_MAX 349 + 350 + /* This mask represents all the VMA flag bits used by mlock */ 351 + #define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) 352 + 353 + #define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) 354 + 355 + #define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ 356 + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 357 + 358 + #define RLIMIT_STACK 3 /* max stack size */ 359 + #define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */ 360 + 361 + #define CAP_IPC_LOCK 14 362 + 363 + #define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD) 364 + 365 + #define VM_IGNORE_MERGE VM_STICKY 366 + 367 + #define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD) 368 + 369 + #define pgprot_val(x) ((x).pgprot) 370 + #define __pgprot(x) ((pgprot_t) { (x) } ) 371 + 372 + #define for_each_vma(__vmi, __vma) \ 373 + while (((__vma) = vma_next(&(__vmi))) != NULL) 374 + 375 + /* The MM code likes to work with exclusive end addresses */ 376 + #define for_each_vma_range(__vmi, __vma, __end) \ 377 + while (((__vma) = vma_find(&(__vmi), (__end))) != NULL) 378 + 379 + #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) 380 + 381 + #define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT)) 382 + 383 + #define test_and_set_bit(nr, addr) __test_and_set_bit(nr, addr) 384 + #define test_and_clear_bit(nr, addr) __test_and_clear_bit(nr, addr) 385 + 386 + #define AS_MM_ALL_LOCKS 2 387 + 388 + #define swap(a, b) \ 389 + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) 390 + 391 + /* 392 + * Flags for bug emulation. 393 + * 394 + * These occupy the top three bytes. 395 + */ 396 + enum { 397 + READ_IMPLIES_EXEC = 0x0400000, 398 + }; 399 + 400 + struct vma_iterator { 401 + struct ma_state mas; 402 + }; 403 + 404 + #define VMA_ITERATOR(name, __mm, __addr) \ 405 + struct vma_iterator name = { \ 406 + .mas = { \ 407 + .tree = &(__mm)->mm_mt, \ 408 + .index = __addr, \ 409 + .node = NULL, \ 410 + .status = ma_start, \ 411 + }, \ 412 + } 413 + 414 + #define DEFINE_MUTEX(mutexname) \ 415 + struct mutex mutexname = {} 416 + 417 + #define DECLARE_BITMAP(name, bits) \ 418 + unsigned long name[BITS_TO_LONGS(bits)] 419 + 420 + #define EMPTY_VMA_FLAGS ((vma_flags_t){ }) 421 + 422 + /* What action should be taken after an .mmap_prepare call is complete? */ 423 + enum mmap_action_type { 424 + MMAP_NOTHING, /* Mapping is complete, no further action. */ 425 + MMAP_REMAP_PFN, /* Remap PFN range. */ 426 + MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */ 427 + }; 428 + 429 + /* 430 + * Describes an action an mmap_prepare hook can instruct to be taken to complete 431 + * the mapping of a VMA. Specified in vm_area_desc. 432 + */ 433 + struct mmap_action { 434 + union { 435 + /* Remap range. */ 436 + struct { 437 + unsigned long start; 438 + unsigned long start_pfn; 439 + unsigned long size; 440 + pgprot_t pgprot; 441 + } remap; 442 + }; 443 + enum mmap_action_type type; 444 + 445 + /* 446 + * If specified, this hook is invoked after the selected action has been 447 + * successfully completed. Note that the VMA write lock still held. 448 + * 449 + * The absolute minimum ought to be done here. 450 + * 451 + * Returns 0 on success, or an error code. 452 + */ 453 + int (*success_hook)(const struct vm_area_struct *vma); 454 + 455 + /* 456 + * If specified, this hook is invoked when an error occurred when 457 + * attempting the selection action. 458 + * 459 + * The hook can return an error code in order to filter the error, but 460 + * it is not valid to clear the error here. 461 + */ 462 + int (*error_hook)(int err); 463 + 464 + /* 465 + * This should be set in rare instances where the operation required 466 + * that the rmap should not be able to access the VMA until 467 + * completely set up. 468 + */ 469 + bool hide_from_rmap_until_complete :1; 470 + }; 471 + 472 + /* Operations which modify VMAs. */ 473 + enum vma_operation { 474 + VMA_OP_SPLIT, 475 + VMA_OP_MERGE_UNFAULTED, 476 + VMA_OP_REMAP, 477 + VMA_OP_FORK, 478 + }; 479 + 480 + /* 481 + * Describes a VMA that is about to be mmap()'ed. Drivers may choose to 482 + * manipulate mutable fields which will cause those fields to be updated in the 483 + * resultant VMA. 484 + * 485 + * Helper functions are not required for manipulating any field. 486 + */ 487 + struct vm_area_desc { 488 + /* Immutable state. */ 489 + const struct mm_struct *const mm; 490 + struct file *const file; /* May vary from vm_file in stacked callers. */ 491 + unsigned long start; 492 + unsigned long end; 493 + 494 + /* Mutable fields. Populated with initial state. */ 495 + pgoff_t pgoff; 496 + struct file *vm_file; 497 + union { 498 + vm_flags_t vm_flags; 499 + vma_flags_t vma_flags; 500 + }; 501 + pgprot_t page_prot; 502 + 503 + /* Write-only fields. */ 504 + const struct vm_operations_struct *vm_ops; 505 + void *private_data; 506 + 507 + /* Take further action? */ 508 + struct mmap_action action; 509 + }; 510 + 511 + struct vm_area_struct { 512 + /* The first cache line has the info for VMA tree walking. */ 513 + 514 + union { 515 + struct { 516 + /* VMA covers [vm_start; vm_end) addresses within mm */ 517 + unsigned long vm_start; 518 + unsigned long vm_end; 519 + }; 520 + freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */ 521 + }; 522 + 523 + struct mm_struct *vm_mm; /* The address space we belong to. */ 524 + pgprot_t vm_page_prot; /* Access permissions of this VMA. */ 525 + 526 + /* 527 + * Flags, see mm.h. 528 + * To modify use vm_flags_{init|reset|set|clear|mod} functions. 529 + */ 530 + union { 531 + const vm_flags_t vm_flags; 532 + vma_flags_t flags; 533 + }; 534 + 535 + #ifdef CONFIG_PER_VMA_LOCK 536 + /* 537 + * Can only be written (using WRITE_ONCE()) while holding both: 538 + * - mmap_lock (in write mode) 539 + * - vm_refcnt bit at VMA_LOCK_OFFSET is set 540 + * Can be read reliably while holding one of: 541 + * - mmap_lock (in read or write mode) 542 + * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1 543 + * Can be read unreliably (using READ_ONCE()) for pessimistic bailout 544 + * while holding nothing (except RCU to keep the VMA struct allocated). 545 + * 546 + * This sequence counter is explicitly allowed to overflow; sequence 547 + * counter reuse can only lead to occasional unnecessary use of the 548 + * slowpath. 549 + */ 550 + unsigned int vm_lock_seq; 551 + #endif 552 + 553 + /* 554 + * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma 555 + * list, after a COW of one of the file pages. A MAP_SHARED vma 556 + * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack 557 + * or brk vma (with NULL file) can only be in an anon_vma list. 558 + */ 559 + struct list_head anon_vma_chain; /* Serialized by mmap_lock & 560 + * page_table_lock */ 561 + struct anon_vma *anon_vma; /* Serialized by page_table_lock */ 562 + 563 + /* Function pointers to deal with this struct. */ 564 + const struct vm_operations_struct *vm_ops; 565 + 566 + /* Information about our backing store: */ 567 + unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE 568 + units */ 569 + struct file * vm_file; /* File we map to (can be NULL). */ 570 + void * vm_private_data; /* was vm_pte (shared mem) */ 571 + 572 + #ifdef CONFIG_SWAP 573 + atomic_long_t swap_readahead_info; 574 + #endif 575 + #ifndef CONFIG_MMU 576 + struct vm_region *vm_region; /* NOMMU mapping region */ 577 + #endif 578 + #ifdef CONFIG_NUMA 579 + struct mempolicy *vm_policy; /* NUMA policy for the VMA */ 580 + #endif 581 + #ifdef CONFIG_NUMA_BALANCING 582 + struct vma_numab_state *numab_state; /* NUMA Balancing state */ 583 + #endif 584 + #ifdef CONFIG_PER_VMA_LOCK 585 + /* Unstable RCU readers are allowed to read this. */ 586 + refcount_t vm_refcnt; 587 + #endif 588 + /* 589 + * For areas with an address space and backing store, 590 + * linkage into the address_space->i_mmap interval tree. 591 + * 592 + */ 593 + struct { 594 + struct rb_node rb; 595 + unsigned long rb_subtree_last; 596 + } shared; 597 + #ifdef CONFIG_ANON_VMA_NAME 598 + /* 599 + * For private and shared anonymous mappings, a pointer to a null 600 + * terminated string containing the name given to the vma, or NULL if 601 + * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. 602 + */ 603 + struct anon_vma_name *anon_name; 604 + #endif 605 + struct vm_userfaultfd_ctx vm_userfaultfd_ctx; 606 + } __randomize_layout; 607 + 608 + struct vm_operations_struct { 609 + void (*open)(struct vm_area_struct * area); 610 + /** 611 + * @close: Called when the VMA is being removed from the MM. 612 + * Context: User context. May sleep. Caller holds mmap_lock. 613 + */ 614 + void (*close)(struct vm_area_struct * area); 615 + /* Called any time before splitting to check if it's allowed */ 616 + int (*may_split)(struct vm_area_struct *area, unsigned long addr); 617 + int (*mremap)(struct vm_area_struct *area); 618 + /* 619 + * Called by mprotect() to make driver-specific permission 620 + * checks before mprotect() is finalised. The VMA must not 621 + * be modified. Returns 0 if mprotect() can proceed. 622 + */ 623 + int (*mprotect)(struct vm_area_struct *vma, unsigned long start, 624 + unsigned long end, unsigned long newflags); 625 + vm_fault_t (*fault)(struct vm_fault *vmf); 626 + vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order); 627 + vm_fault_t (*map_pages)(struct vm_fault *vmf, 628 + pgoff_t start_pgoff, pgoff_t end_pgoff); 629 + unsigned long (*pagesize)(struct vm_area_struct * area); 630 + 631 + /* notification that a previously read-only page is about to become 632 + * writable, if an error is returned it will cause a SIGBUS */ 633 + vm_fault_t (*page_mkwrite)(struct vm_fault *vmf); 634 + 635 + /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */ 636 + vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf); 637 + 638 + /* called by access_process_vm when get_user_pages() fails, typically 639 + * for use by special VMAs. See also generic_access_phys() for a generic 640 + * implementation useful for any iomem mapping. 641 + */ 642 + int (*access)(struct vm_area_struct *vma, unsigned long addr, 643 + void *buf, int len, int write); 644 + 645 + /* Called by the /proc/PID/maps code to ask the vma whether it 646 + * has a special name. Returning non-NULL will also cause this 647 + * vma to be dumped unconditionally. */ 648 + const char *(*name)(struct vm_area_struct *vma); 649 + 650 + #ifdef CONFIG_NUMA 651 + /* 652 + * set_policy() op must add a reference to any non-NULL @new mempolicy 653 + * to hold the policy upon return. Caller should pass NULL @new to 654 + * remove a policy and fall back to surrounding context--i.e. do not 655 + * install a MPOL_DEFAULT policy, nor the task or system default 656 + * mempolicy. 657 + */ 658 + int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); 659 + 660 + /* 661 + * get_policy() op must add reference [mpol_get()] to any policy at 662 + * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure 663 + * in mm/mempolicy.c will do this automatically. 664 + * get_policy() must NOT add a ref if the policy at (vma,addr) is not 665 + * marked as MPOL_SHARED. vma policies are protected by the mmap_lock. 666 + * If no [shared/vma] mempolicy exists at the addr, get_policy() op 667 + * must return NULL--i.e., do not "fallback" to task or system default 668 + * policy. 669 + */ 670 + struct mempolicy *(*get_policy)(struct vm_area_struct *vma, 671 + unsigned long addr, pgoff_t *ilx); 672 + #endif 673 + #ifdef CONFIG_FIND_NORMAL_PAGE 674 + /* 675 + * Called by vm_normal_page() for special PTEs in @vma at @addr. This 676 + * allows for returning a "normal" page from vm_normal_page() even 677 + * though the PTE indicates that the "struct page" either does not exist 678 + * or should not be touched: "special". 679 + * 680 + * Do not add new users: this really only works when a "normal" page 681 + * was mapped, but then the PTE got changed to something weird (+ 682 + * marked special) that would not make pte_pfn() identify the originally 683 + * inserted page. 684 + */ 685 + struct page *(*find_normal_page)(struct vm_area_struct *vma, 686 + unsigned long addr); 687 + #endif /* CONFIG_FIND_NORMAL_PAGE */ 688 + }; 689 + 690 + struct vm_unmapped_area_info { 691 + #define VM_UNMAPPED_AREA_TOPDOWN 1 692 + unsigned long flags; 693 + unsigned long length; 694 + unsigned long low_limit; 695 + unsigned long high_limit; 696 + unsigned long align_mask; 697 + unsigned long align_offset; 698 + unsigned long start_gap; 699 + }; 700 + 701 + struct pagetable_move_control { 702 + struct vm_area_struct *old; /* Source VMA. */ 703 + struct vm_area_struct *new; /* Destination VMA. */ 704 + unsigned long old_addr; /* Address from which the move begins. */ 705 + unsigned long old_end; /* Exclusive address at which old range ends. */ 706 + unsigned long new_addr; /* Address to move page tables to. */ 707 + unsigned long len_in; /* Bytes to remap specified by user. */ 708 + 709 + bool need_rmap_locks; /* Do rmap locks need to be taken? */ 710 + bool for_stack; /* Is this an early temp stack being moved? */ 711 + }; 712 + 713 + #define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \ 714 + struct pagetable_move_control name = { \ 715 + .old = old_, \ 716 + .new = new_, \ 717 + .old_addr = old_addr_, \ 718 + .old_end = (old_addr_) + (len_), \ 719 + .new_addr = new_addr_, \ 720 + .len_in = len_, \ 721 + } 722 + 723 + static inline void vma_iter_invalidate(struct vma_iterator *vmi) 724 + { 725 + mas_pause(&vmi->mas); 726 + } 727 + 728 + static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) 729 + { 730 + return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot)); 731 + } 732 + 733 + static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags) 734 + { 735 + return __pgprot(vm_flags); 736 + } 737 + 738 + static inline bool mm_flags_test(int flag, const struct mm_struct *mm) 739 + { 740 + return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); 741 + } 742 + 743 + /* 744 + * Copy value to the first system word of VMA flags, non-atomically. 745 + * 746 + * IMPORTANT: This does not overwrite bytes past the first system word. The 747 + * caller must account for this. 748 + */ 749 + static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value) 750 + { 751 + *ACCESS_PRIVATE(flags, __vma_flags) = value; 752 + } 753 + 754 + /* 755 + * Copy value to the first system word of VMA flags ONCE, non-atomically. 756 + * 757 + * IMPORTANT: This does not overwrite bytes past the first system word. The 758 + * caller must account for this. 759 + */ 760 + static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value) 761 + { 762 + unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); 763 + 764 + WRITE_ONCE(*bitmap, value); 765 + } 766 + 767 + /* Update the first system word of VMA flags setting bits, non-atomically. */ 768 + static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value) 769 + { 770 + unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); 771 + 772 + *bitmap |= value; 773 + } 774 + 775 + /* Update the first system word of VMA flags clearing bits, non-atomically. */ 776 + static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value) 777 + { 778 + unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); 779 + 780 + *bitmap &= ~value; 781 + } 782 + 783 + static inline void vma_flags_clear_all(vma_flags_t *flags) 784 + { 785 + bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS); 786 + } 787 + 788 + static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit) 789 + { 790 + unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); 791 + 792 + __set_bit((__force int)bit, bitmap); 793 + } 794 + 795 + /* Use when VMA is not part of the VMA tree and needs no locking */ 796 + static inline void vm_flags_init(struct vm_area_struct *vma, 797 + vm_flags_t flags) 798 + { 799 + vma_flags_clear_all(&vma->flags); 800 + vma_flags_overwrite_word(&vma->flags, flags); 801 + } 802 + 803 + /* 804 + * Use when VMA is part of the VMA tree and modifications need coordination 805 + * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and 806 + * it should be locked explicitly beforehand. 807 + */ 808 + static inline void vm_flags_reset(struct vm_area_struct *vma, 809 + vm_flags_t flags) 810 + { 811 + vma_assert_write_locked(vma); 812 + vm_flags_init(vma, flags); 813 + } 814 + 815 + static inline void vm_flags_reset_once(struct vm_area_struct *vma, 816 + vm_flags_t flags) 817 + { 818 + vma_assert_write_locked(vma); 819 + /* 820 + * The user should only be interested in avoiding reordering of 821 + * assignment to the first word. 822 + */ 823 + vma_flags_clear_all(&vma->flags); 824 + vma_flags_overwrite_word_once(&vma->flags, flags); 825 + } 826 + 827 + static inline void vm_flags_set(struct vm_area_struct *vma, 828 + vm_flags_t flags) 829 + { 830 + vma_start_write(vma); 831 + vma_flags_set_word(&vma->flags, flags); 832 + } 833 + 834 + static inline void vm_flags_clear(struct vm_area_struct *vma, 835 + vm_flags_t flags) 836 + { 837 + vma_start_write(vma); 838 + vma_flags_clear_word(&vma->flags, flags); 839 + } 840 + 841 + static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits); 842 + 843 + #define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \ 844 + (const vma_flag_t []){__VA_ARGS__}) 845 + 846 + static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags, 847 + vma_flags_t to_test) 848 + { 849 + const unsigned long *bitmap = flags->__vma_flags; 850 + const unsigned long *bitmap_to_test = to_test.__vma_flags; 851 + 852 + return bitmap_intersects(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS); 853 + } 854 + 855 + #define vma_flags_test(flags, ...) \ 856 + vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__)) 857 + 858 + static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags, 859 + vma_flags_t to_test) 860 + { 861 + const unsigned long *bitmap = flags->__vma_flags; 862 + const unsigned long *bitmap_to_test = to_test.__vma_flags; 863 + 864 + return bitmap_subset(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS); 865 + } 866 + 867 + #define vma_flags_test_all(flags, ...) \ 868 + vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__)) 869 + 870 + static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set) 871 + { 872 + unsigned long *bitmap = flags->__vma_flags; 873 + const unsigned long *bitmap_to_set = to_set.__vma_flags; 874 + 875 + bitmap_or(bitmap, bitmap, bitmap_to_set, NUM_VMA_FLAG_BITS); 876 + } 877 + 878 + #define vma_flags_set(flags, ...) \ 879 + vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__)) 880 + 881 + static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear) 882 + { 883 + unsigned long *bitmap = flags->__vma_flags; 884 + const unsigned long *bitmap_to_clear = to_clear.__vma_flags; 885 + 886 + bitmap_andnot(bitmap, bitmap, bitmap_to_clear, NUM_VMA_FLAG_BITS); 887 + } 888 + 889 + #define vma_flags_clear(flags, ...) \ 890 + vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__)) 891 + 892 + static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma, 893 + vma_flags_t flags) 894 + { 895 + return vma_flags_test_all_mask(&vma->flags, flags); 896 + } 897 + 898 + #define vma_test_all_flags(vma, ...) \ 899 + vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) 900 + 901 + static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags) 902 + { 903 + return (vm_flags & (VM_SHARED | VM_MAYWRITE)) == 904 + (VM_SHARED | VM_MAYWRITE); 905 + } 906 + 907 + static inline void vma_set_flags_mask(struct vm_area_struct *vma, 908 + vma_flags_t flags) 909 + { 910 + vma_flags_set_mask(&vma->flags, flags); 911 + } 912 + 913 + #define vma_set_flags(vma, ...) \ 914 + vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__)) 915 + 916 + static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc, 917 + vma_flags_t flags) 918 + { 919 + return vma_flags_test_mask(&desc->vma_flags, flags); 920 + } 921 + 922 + #define vma_desc_test_flags(desc, ...) \ 923 + vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) 924 + 925 + static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc, 926 + vma_flags_t flags) 927 + { 928 + vma_flags_set_mask(&desc->vma_flags, flags); 929 + } 930 + 931 + #define vma_desc_set_flags(desc, ...) \ 932 + vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) 933 + 934 + static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc, 935 + vma_flags_t flags) 936 + { 937 + vma_flags_clear_mask(&desc->vma_flags, flags); 938 + } 939 + 940 + #define vma_desc_clear_flags(desc, ...) \ 941 + vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__)) 942 + 943 + static inline bool is_shared_maywrite(const vma_flags_t *flags) 944 + { 945 + return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT); 946 + } 947 + 948 + static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma) 949 + { 950 + return is_shared_maywrite(&vma->flags); 951 + } 952 + 953 + static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) 954 + { 955 + /* 956 + * Uses mas_find() to get the first VMA when the iterator starts. 957 + * Calling mas_next() could skip the first entry. 958 + */ 959 + return mas_find(&vmi->mas, ULONG_MAX); 960 + } 961 + 962 + /* 963 + * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these 964 + * assertions should be made either under mmap_write_lock or when the object 965 + * has been isolated under mmap_write_lock, ensuring no competing writers. 966 + */ 967 + static inline void vma_assert_attached(struct vm_area_struct *vma) 968 + { 969 + WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); 970 + } 971 + 972 + static inline void vma_assert_detached(struct vm_area_struct *vma) 973 + { 974 + WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); 975 + } 976 + 977 + static inline void vma_assert_write_locked(struct vm_area_struct *); 978 + static inline void vma_mark_attached(struct vm_area_struct *vma) 979 + { 980 + vma_assert_write_locked(vma); 981 + vma_assert_detached(vma); 982 + refcount_set_release(&vma->vm_refcnt, 1); 983 + } 984 + 985 + static inline void vma_mark_detached(struct vm_area_struct *vma) 986 + { 987 + vma_assert_write_locked(vma); 988 + vma_assert_attached(vma); 989 + /* We are the only writer, so no need to use vma_refcount_put(). */ 990 + if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) { 991 + /* 992 + * Reader must have temporarily raised vm_refcnt but it will 993 + * drop it without using the vma since vma is write-locked. 994 + */ 995 + } 996 + } 997 + 998 + static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) 999 + { 1000 + memset(vma, 0, sizeof(*vma)); 1001 + vma->vm_mm = mm; 1002 + vma->vm_ops = &vma_dummy_vm_ops; 1003 + INIT_LIST_HEAD(&vma->anon_vma_chain); 1004 + vma->vm_lock_seq = UINT_MAX; 1005 + } 1006 + 1007 + /* 1008 + * These are defined in vma.h, but sadly vm_stat_account() is referenced by 1009 + * kernel/fork.c, so we have to these broadly available there, and temporarily 1010 + * define them here to resolve the dependency cycle. 1011 + */ 1012 + #define is_exec_mapping(flags) \ 1013 + ((flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC) 1014 + 1015 + #define is_stack_mapping(flags) \ 1016 + (((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK)) 1017 + 1018 + #define is_data_mapping(flags) \ 1019 + ((flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE) 1020 + 1021 + static inline void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, 1022 + long npages) 1023 + { 1024 + WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages); 1025 + 1026 + if (is_exec_mapping(flags)) 1027 + mm->exec_vm += npages; 1028 + else if (is_stack_mapping(flags)) 1029 + mm->stack_vm += npages; 1030 + else if (is_data_mapping(flags)) 1031 + mm->data_vm += npages; 1032 + } 1033 + 1034 + #undef is_exec_mapping 1035 + #undef is_stack_mapping 1036 + #undef is_data_mapping 1037 + 1038 + static inline void vm_unacct_memory(long pages) 1039 + { 1040 + vm_acct_memory(-pages); 1041 + } 1042 + 1043 + static inline void mapping_allow_writable(struct address_space *mapping) 1044 + { 1045 + atomic_inc(&mapping->i_mmap_writable); 1046 + } 1047 + 1048 + static inline 1049 + struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) 1050 + { 1051 + return mas_find(&vmi->mas, max - 1); 1052 + } 1053 + 1054 + static inline int vma_iter_clear_gfp(struct vma_iterator *vmi, 1055 + unsigned long start, unsigned long end, gfp_t gfp) 1056 + { 1057 + __mas_set_range(&vmi->mas, start, end - 1); 1058 + mas_store_gfp(&vmi->mas, NULL, gfp); 1059 + if (unlikely(mas_is_err(&vmi->mas))) 1060 + return -ENOMEM; 1061 + 1062 + return 0; 1063 + } 1064 + 1065 + static inline void vma_set_anonymous(struct vm_area_struct *vma) 1066 + { 1067 + vma->vm_ops = NULL; 1068 + } 1069 + 1070 + /* Declared in vma.h. */ 1071 + static inline void set_vma_from_desc(struct vm_area_struct *vma, 1072 + struct vm_area_desc *desc); 1073 + 1074 + static inline int __compat_vma_mmap(const struct file_operations *f_op, 1075 + struct file *file, struct vm_area_struct *vma) 1076 + { 1077 + struct vm_area_desc desc = { 1078 + .mm = vma->vm_mm, 1079 + .file = file, 1080 + .start = vma->vm_start, 1081 + .end = vma->vm_end, 1082 + 1083 + .pgoff = vma->vm_pgoff, 1084 + .vm_file = vma->vm_file, 1085 + .vm_flags = vma->vm_flags, 1086 + .page_prot = vma->vm_page_prot, 1087 + 1088 + .action.type = MMAP_NOTHING, /* Default */ 1089 + }; 1090 + int err; 1091 + 1092 + err = f_op->mmap_prepare(&desc); 1093 + if (err) 1094 + return err; 1095 + 1096 + mmap_action_prepare(&desc.action, &desc); 1097 + set_vma_from_desc(vma, &desc); 1098 + return mmap_action_complete(&desc.action, vma); 1099 + } 1100 + 1101 + static inline int compat_vma_mmap(struct file *file, 1102 + struct vm_area_struct *vma) 1103 + { 1104 + return __compat_vma_mmap(file->f_op, file, vma); 1105 + } 1106 + 1107 + 1108 + static inline void vma_iter_init(struct vma_iterator *vmi, 1109 + struct mm_struct *mm, unsigned long addr) 1110 + { 1111 + mas_init(&vmi->mas, &mm->mm_mt, addr); 1112 + } 1113 + 1114 + static inline unsigned long vma_pages(struct vm_area_struct *vma) 1115 + { 1116 + return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 1117 + } 1118 + 1119 + static inline void mmap_assert_locked(struct mm_struct *); 1120 + static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, 1121 + unsigned long start_addr, 1122 + unsigned long end_addr) 1123 + { 1124 + unsigned long index = start_addr; 1125 + 1126 + mmap_assert_locked(mm); 1127 + return mt_find(&mm->mm_mt, &index, end_addr - 1); 1128 + } 1129 + 1130 + static inline 1131 + struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) 1132 + { 1133 + return mtree_load(&mm->mm_mt, addr); 1134 + } 1135 + 1136 + static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi) 1137 + { 1138 + return mas_prev(&vmi->mas, 0); 1139 + } 1140 + 1141 + static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr) 1142 + { 1143 + mas_set(&vmi->mas, addr); 1144 + } 1145 + 1146 + static inline bool vma_is_anonymous(struct vm_area_struct *vma) 1147 + { 1148 + return !vma->vm_ops; 1149 + } 1150 + 1151 + /* Defined in vma.h, so temporarily define here to avoid circular dependency. */ 1152 + #define vma_iter_load(vmi) \ 1153 + mas_walk(&(vmi)->mas) 1154 + 1155 + static inline struct vm_area_struct * 1156 + find_vma_prev(struct mm_struct *mm, unsigned long addr, 1157 + struct vm_area_struct **pprev) 1158 + { 1159 + struct vm_area_struct *vma; 1160 + VMA_ITERATOR(vmi, mm, addr); 1161 + 1162 + vma = vma_iter_load(&vmi); 1163 + *pprev = vma_prev(&vmi); 1164 + if (!vma) 1165 + vma = vma_next(&vmi); 1166 + return vma; 1167 + } 1168 + 1169 + #undef vma_iter_load 1170 + 1171 + static inline void vma_iter_free(struct vma_iterator *vmi) 1172 + { 1173 + mas_destroy(&vmi->mas); 1174 + } 1175 + 1176 + static inline 1177 + struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi) 1178 + { 1179 + return mas_next_range(&vmi->mas, ULONG_MAX); 1180 + } 1181 + 1182 + bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); 1183 + 1184 + /* Update vma->vm_page_prot to reflect vma->vm_flags. */ 1185 + static inline void vma_set_page_prot(struct vm_area_struct *vma) 1186 + { 1187 + vm_flags_t vm_flags = vma->vm_flags; 1188 + pgprot_t vm_page_prot; 1189 + 1190 + /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */ 1191 + vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags)); 1192 + 1193 + if (vma_wants_writenotify(vma, vm_page_prot)) { 1194 + vm_flags &= ~VM_SHARED; 1195 + /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */ 1196 + vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags)); 1197 + } 1198 + /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */ 1199 + WRITE_ONCE(vma->vm_page_prot, vm_page_prot); 1200 + } 1201 + 1202 + static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma) 1203 + { 1204 + if (vma->vm_flags & VM_GROWSDOWN) 1205 + return stack_guard_gap; 1206 + 1207 + /* See reasoning around the VM_SHADOW_STACK definition */ 1208 + if (vma->vm_flags & VM_SHADOW_STACK) 1209 + return PAGE_SIZE; 1210 + 1211 + return 0; 1212 + } 1213 + 1214 + static inline unsigned long vm_start_gap(struct vm_area_struct *vma) 1215 + { 1216 + unsigned long gap = stack_guard_start_gap(vma); 1217 + unsigned long vm_start = vma->vm_start; 1218 + 1219 + vm_start -= gap; 1220 + if (vm_start > vma->vm_start) 1221 + vm_start = 0; 1222 + return vm_start; 1223 + } 1224 + 1225 + static inline unsigned long vm_end_gap(struct vm_area_struct *vma) 1226 + { 1227 + unsigned long vm_end = vma->vm_end; 1228 + 1229 + if (vma->vm_flags & VM_GROWSUP) { 1230 + vm_end += stack_guard_gap; 1231 + if (vm_end < vma->vm_end) 1232 + vm_end = -PAGE_SIZE; 1233 + } 1234 + return vm_end; 1235 + } 1236 + 1237 + static inline bool vma_is_accessible(struct vm_area_struct *vma) 1238 + { 1239 + return vma->vm_flags & VM_ACCESS_FLAGS; 1240 + } 1241 + 1242 + static inline bool mlock_future_ok(const struct mm_struct *mm, 1243 + vm_flags_t vm_flags, unsigned long bytes) 1244 + { 1245 + unsigned long locked_pages, limit_pages; 1246 + 1247 + if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK)) 1248 + return true; 1249 + 1250 + locked_pages = bytes >> PAGE_SHIFT; 1251 + locked_pages += mm->locked_vm; 1252 + 1253 + limit_pages = rlimit(RLIMIT_MEMLOCK); 1254 + limit_pages >>= PAGE_SHIFT; 1255 + 1256 + return locked_pages <= limit_pages; 1257 + } 1258 + 1259 + static inline bool map_deny_write_exec(unsigned long old, unsigned long new) 1260 + { 1261 + /* If MDWE is disabled, we have nothing to deny. */ 1262 + if (mm_flags_test(MMF_HAS_MDWE, current->mm)) 1263 + return false; 1264 + 1265 + /* If the new VMA is not executable, we have nothing to deny. */ 1266 + if (!(new & VM_EXEC)) 1267 + return false; 1268 + 1269 + /* Under MDWE we do not accept newly writably executable VMAs... */ 1270 + if (new & VM_WRITE) 1271 + return true; 1272 + 1273 + /* ...nor previously non-executable VMAs becoming executable. */ 1274 + if (!(old & VM_EXEC)) 1275 + return true; 1276 + 1277 + return false; 1278 + } 1279 + 1280 + static inline int mapping_map_writable(struct address_space *mapping) 1281 + { 1282 + return atomic_inc_unless_negative(&mapping->i_mmap_writable) ? 1283 + 0 : -EPERM; 1284 + } 1285 + 1286 + /* Did the driver provide valid mmap hook configuration? */ 1287 + static inline bool can_mmap_file(struct file *file) 1288 + { 1289 + bool has_mmap = file->f_op->mmap; 1290 + bool has_mmap_prepare = file->f_op->mmap_prepare; 1291 + 1292 + /* Hooks are mutually exclusive. */ 1293 + if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) 1294 + return false; 1295 + if (!has_mmap && !has_mmap_prepare) 1296 + return false; 1297 + 1298 + return true; 1299 + } 1300 + 1301 + static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) 1302 + { 1303 + if (file->f_op->mmap_prepare) 1304 + return compat_vma_mmap(file, vma); 1305 + 1306 + return file->f_op->mmap(file, vma); 1307 + } 1308 + 1309 + static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc) 1310 + { 1311 + return file->f_op->mmap_prepare(desc); 1312 + } 1313 + 1314 + static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) 1315 + { 1316 + /* Changing an anonymous vma with this is illegal */ 1317 + get_file(file); 1318 + swap(vma->vm_file, file); 1319 + fput(file); 1320 + }
+428
tools/testing/vma/include/stubs.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ */ 2 + 3 + #pragma once 4 + 5 + /* 6 + * Contains declarations that are STUBBED, that is that are rendered no-ops, in 7 + * order to faciliate userland VMA testing. 8 + */ 9 + 10 + /* Forward declarations. */ 11 + struct mm_struct; 12 + struct vm_area_struct; 13 + struct vm_area_desc; 14 + struct pagetable_move_control; 15 + struct mmap_action; 16 + struct file; 17 + struct anon_vma; 18 + struct anon_vma_chain; 19 + struct address_space; 20 + struct unmap_desc; 21 + 22 + #define __bitwise 23 + #define __randomize_layout 24 + 25 + #define FIRST_USER_ADDRESS 0UL 26 + #define USER_PGTABLES_CEILING 0UL 27 + 28 + #define vma_policy(vma) NULL 29 + 30 + #define down_write_nest_lock(sem, nest_lock) 31 + 32 + #define data_race(expr) expr 33 + 34 + #define ASSERT_EXCLUSIVE_WRITER(x) 35 + 36 + struct vm_userfaultfd_ctx {}; 37 + struct mempolicy {}; 38 + struct mmu_gather {}; 39 + struct mutex {}; 40 + struct vm_fault {}; 41 + 42 + static inline void userfaultfd_unmap_complete(struct mm_struct *mm, 43 + struct list_head *uf) 44 + { 45 + } 46 + 47 + static inline unsigned long move_page_tables(struct pagetable_move_control *pmc) 48 + { 49 + return 0; 50 + } 51 + 52 + static inline void free_pgd_range(struct mmu_gather *tlb, 53 + unsigned long addr, unsigned long end, 54 + unsigned long floor, unsigned long ceiling) 55 + { 56 + } 57 + 58 + static inline int ksm_execve(struct mm_struct *mm) 59 + { 60 + return 0; 61 + } 62 + 63 + static inline void ksm_exit(struct mm_struct *mm) 64 + { 65 + } 66 + 67 + static inline void vma_numab_state_init(struct vm_area_struct *vma) 68 + { 69 + } 70 + 71 + static inline void vma_numab_state_free(struct vm_area_struct *vma) 72 + { 73 + } 74 + 75 + static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, 76 + struct vm_area_struct *new_vma) 77 + { 78 + } 79 + 80 + static inline void free_anon_vma_name(struct vm_area_struct *vma) 81 + { 82 + } 83 + 84 + static inline void mmap_action_prepare(struct mmap_action *action, 85 + struct vm_area_desc *desc) 86 + { 87 + } 88 + 89 + static inline int mmap_action_complete(struct mmap_action *action, 90 + struct vm_area_struct *vma) 91 + { 92 + return 0; 93 + } 94 + 95 + static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) 96 + { 97 + } 98 + 99 + static inline bool shmem_file(struct file *file) 100 + { 101 + return false; 102 + } 103 + 104 + static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm, 105 + const struct file *file, vm_flags_t vm_flags) 106 + { 107 + return vm_flags; 108 + } 109 + 110 + static inline void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn) 111 + { 112 + } 113 + 114 + static inline int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr, 115 + unsigned long pfn, unsigned long size, pgprot_t pgprot) 116 + { 117 + return 0; 118 + } 119 + 120 + static inline int do_munmap(struct mm_struct *, unsigned long, size_t, 121 + struct list_head *uf) 122 + { 123 + return 0; 124 + } 125 + 126 + /* Currently stubbed but we may later wish to un-stub. */ 127 + static inline void vm_acct_memory(long pages); 128 + 129 + static inline void mmap_assert_locked(struct mm_struct *mm) 130 + { 131 + } 132 + 133 + 134 + static inline void anon_vma_unlock_write(struct anon_vma *anon_vma) 135 + { 136 + } 137 + 138 + static inline void i_mmap_unlock_write(struct address_space *mapping) 139 + { 140 + } 141 + 142 + static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, 143 + unsigned long start, 144 + unsigned long end, 145 + struct list_head *unmaps) 146 + { 147 + return 0; 148 + } 149 + 150 + static inline void mmap_write_downgrade(struct mm_struct *mm) 151 + { 152 + } 153 + 154 + static inline void mmap_read_unlock(struct mm_struct *mm) 155 + { 156 + } 157 + 158 + static inline void mmap_write_unlock(struct mm_struct *mm) 159 + { 160 + } 161 + 162 + static inline int mmap_write_lock_killable(struct mm_struct *mm) 163 + { 164 + return 0; 165 + } 166 + 167 + static inline bool can_modify_mm(struct mm_struct *mm, 168 + unsigned long start, 169 + unsigned long end) 170 + { 171 + return true; 172 + } 173 + 174 + static inline void arch_unmap(struct mm_struct *mm, 175 + unsigned long start, 176 + unsigned long end) 177 + { 178 + } 179 + 180 + static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) 181 + { 182 + return true; 183 + } 184 + 185 + static inline void khugepaged_enter_vma(struct vm_area_struct *vma, 186 + vm_flags_t vm_flags) 187 + { 188 + } 189 + 190 + static inline bool mapping_can_writeback(struct address_space *mapping) 191 + { 192 + return true; 193 + } 194 + 195 + static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) 196 + { 197 + return false; 198 + } 199 + 200 + static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) 201 + { 202 + return false; 203 + } 204 + 205 + static inline bool userfaultfd_wp(struct vm_area_struct *vma) 206 + { 207 + return false; 208 + } 209 + 210 + static inline void mmap_assert_write_locked(struct mm_struct *mm) 211 + { 212 + } 213 + 214 + static inline void mutex_lock(struct mutex *lock) 215 + { 216 + } 217 + 218 + static inline void mutex_unlock(struct mutex *lock) 219 + { 220 + } 221 + 222 + static inline bool mutex_is_locked(struct mutex *lock) 223 + { 224 + return true; 225 + } 226 + 227 + static inline bool signal_pending(void *p) 228 + { 229 + return false; 230 + } 231 + 232 + static inline bool is_file_hugepages(struct file *file) 233 + { 234 + return false; 235 + } 236 + 237 + static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) 238 + { 239 + return 0; 240 + } 241 + 242 + static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, 243 + unsigned long npages) 244 + { 245 + return true; 246 + } 247 + 248 + static inline int shmem_zero_setup(struct vm_area_struct *vma) 249 + { 250 + return 0; 251 + } 252 + 253 + 254 + static inline void vm_acct_memory(long pages) 255 + { 256 + } 257 + 258 + static inline void vma_interval_tree_insert(struct vm_area_struct *vma, 259 + struct rb_root_cached *rb) 260 + { 261 + } 262 + 263 + static inline void vma_interval_tree_remove(struct vm_area_struct *vma, 264 + struct rb_root_cached *rb) 265 + { 266 + } 267 + 268 + static inline void flush_dcache_mmap_unlock(struct address_space *mapping) 269 + { 270 + } 271 + 272 + static inline void anon_vma_interval_tree_insert(struct anon_vma_chain *avc, 273 + struct rb_root_cached *rb) 274 + { 275 + } 276 + 277 + static inline void anon_vma_interval_tree_remove(struct anon_vma_chain *avc, 278 + struct rb_root_cached *rb) 279 + { 280 + } 281 + 282 + static inline void uprobe_mmap(struct vm_area_struct *vma) 283 + { 284 + } 285 + 286 + static inline void uprobe_munmap(struct vm_area_struct *vma, 287 + unsigned long start, unsigned long end) 288 + { 289 + } 290 + 291 + static inline void i_mmap_lock_write(struct address_space *mapping) 292 + { 293 + } 294 + 295 + static inline void anon_vma_lock_write(struct anon_vma *anon_vma) 296 + { 297 + } 298 + 299 + static inline void vma_assert_write_locked(struct vm_area_struct *vma) 300 + { 301 + } 302 + 303 + static inline void ksm_add_vma(struct vm_area_struct *vma) 304 + { 305 + } 306 + 307 + static inline void perf_event_mmap(struct vm_area_struct *vma) 308 + { 309 + } 310 + 311 + static inline bool vma_is_dax(struct vm_area_struct *vma) 312 + { 313 + return false; 314 + } 315 + 316 + static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm) 317 + { 318 + return NULL; 319 + } 320 + 321 + static inline bool arch_validate_flags(vm_flags_t flags) 322 + { 323 + return true; 324 + } 325 + 326 + static inline void vma_close(struct vm_area_struct *vma) 327 + { 328 + } 329 + 330 + static inline int mmap_file(struct file *file, struct vm_area_struct *vma) 331 + { 332 + return 0; 333 + } 334 + 335 + static inline int is_hugepage_only_range(struct mm_struct *mm, 336 + unsigned long addr, unsigned long len) 337 + { 338 + return 0; 339 + } 340 + 341 + static inline bool capable(int cap) 342 + { 343 + return true; 344 + } 345 + 346 + static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) 347 + { 348 + return NULL; 349 + } 350 + 351 + static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, 352 + struct vm_userfaultfd_ctx vm_ctx) 353 + { 354 + return true; 355 + } 356 + 357 + static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, 358 + struct anon_vma_name *anon_name2) 359 + { 360 + return true; 361 + } 362 + 363 + static inline void might_sleep(void) 364 + { 365 + } 366 + 367 + static inline void fput(struct file *file) 368 + { 369 + } 370 + 371 + static inline void mpol_put(struct mempolicy *pol) 372 + { 373 + } 374 + 375 + static inline void lru_add_drain(void) 376 + { 377 + } 378 + 379 + static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm) 380 + { 381 + } 382 + 383 + static inline void update_hiwater_rss(struct mm_struct *mm) 384 + { 385 + } 386 + 387 + static inline void update_hiwater_vm(struct mm_struct *mm) 388 + { 389 + } 390 + 391 + static inline void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap) 392 + { 393 + } 394 + 395 + static inline void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *unmap) 396 + { 397 + } 398 + 399 + static inline void mapping_unmap_writable(struct address_space *mapping) 400 + { 401 + } 402 + 403 + static inline void flush_dcache_mmap_lock(struct address_space *mapping) 404 + { 405 + } 406 + 407 + static inline void tlb_finish_mmu(struct mmu_gather *tlb) 408 + { 409 + } 410 + 411 + static inline struct file *get_file(struct file *f) 412 + { 413 + return f; 414 + } 415 + 416 + static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) 417 + { 418 + return 0; 419 + } 420 + 421 + static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, 422 + unsigned long start, 423 + unsigned long end, 424 + struct vm_area_struct *next) 425 + { 426 + } 427 + 428 + static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
+55
tools/testing/vma/main.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + 3 + #include "shared.h" 4 + /* 5 + * Directly import the VMA implementation here. Our vma_internal.h wrapper 6 + * provides userland-equivalent functionality for everything vma.c uses. 7 + */ 8 + #include "../../../mm/vma_init.c" 9 + #include "../../../mm/vma_exec.c" 10 + #include "../../../mm/vma.c" 11 + 12 + /* Tests are included directly so they can test static functions in mm/vma.c. */ 13 + #include "tests/merge.c" 14 + #include "tests/mmap.c" 15 + #include "tests/vma.c" 16 + 17 + /* Helper functions which utilise static kernel functions. */ 18 + 19 + struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg) 20 + { 21 + struct vm_area_struct *vma; 22 + 23 + vma = vma_merge_existing_range(vmg); 24 + if (vma) 25 + vma_assert_attached(vma); 26 + return vma; 27 + } 28 + 29 + int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma) 30 + { 31 + int res; 32 + 33 + res = vma_link(mm, vma); 34 + if (!res) 35 + vma_assert_attached(vma); 36 + return res; 37 + } 38 + 39 + /* Main test running which invokes tests/ *.c runners. */ 40 + int main(void) 41 + { 42 + int num_tests = 0, num_fail = 0; 43 + 44 + maple_tree_init(); 45 + vma_state_init(); 46 + 47 + run_merge_tests(&num_tests, &num_fail); 48 + run_mmap_tests(&num_tests, &num_fail); 49 + run_vma_tests(&num_tests, &num_fail); 50 + 51 + printf("%d tests run, %d passed, %d failed.\n", 52 + num_tests, num_tests - num_fail, num_fail); 53 + 54 + return num_fail == 0 ? EXIT_SUCCESS : EXIT_FAILURE; 55 + }
+131
tools/testing/vma/shared.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + 3 + #include "shared.h" 4 + 5 + 6 + bool fail_prealloc; 7 + unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; 8 + unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; 9 + unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT; 10 + 11 + const struct vm_operations_struct vma_dummy_vm_ops; 12 + struct anon_vma dummy_anon_vma; 13 + struct task_struct __current; 14 + 15 + struct vm_area_struct *alloc_vma(struct mm_struct *mm, 16 + unsigned long start, unsigned long end, 17 + pgoff_t pgoff, vm_flags_t vm_flags) 18 + { 19 + struct vm_area_struct *vma = vm_area_alloc(mm); 20 + 21 + if (vma == NULL) 22 + return NULL; 23 + 24 + vma->vm_start = start; 25 + vma->vm_end = end; 26 + vma->vm_pgoff = pgoff; 27 + vm_flags_reset(vma, vm_flags); 28 + vma_assert_detached(vma); 29 + 30 + return vma; 31 + } 32 + 33 + void detach_free_vma(struct vm_area_struct *vma) 34 + { 35 + vma_mark_detached(vma); 36 + vm_area_free(vma); 37 + } 38 + 39 + struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, 40 + unsigned long start, unsigned long end, 41 + pgoff_t pgoff, vm_flags_t vm_flags) 42 + { 43 + struct vm_area_struct *vma = alloc_vma(mm, start, end, pgoff, vm_flags); 44 + 45 + if (vma == NULL) 46 + return NULL; 47 + 48 + if (attach_vma(mm, vma)) { 49 + detach_free_vma(vma); 50 + return NULL; 51 + } 52 + 53 + /* 54 + * Reset this counter which we use to track whether writes have 55 + * begun. Linking to the tree will have caused this to be incremented, 56 + * which means we will get a false positive otherwise. 57 + */ 58 + vma->vm_lock_seq = UINT_MAX; 59 + 60 + return vma; 61 + } 62 + 63 + void reset_dummy_anon_vma(void) 64 + { 65 + dummy_anon_vma.was_cloned = false; 66 + dummy_anon_vma.was_unlinked = false; 67 + } 68 + 69 + int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi) 70 + { 71 + struct vm_area_struct *vma; 72 + int count = 0; 73 + 74 + fail_prealloc = false; 75 + reset_dummy_anon_vma(); 76 + 77 + vma_iter_set(vmi, 0); 78 + for_each_vma(*vmi, vma) { 79 + detach_free_vma(vma); 80 + count++; 81 + } 82 + 83 + mtree_destroy(&mm->mm_mt); 84 + mm->map_count = 0; 85 + return count; 86 + } 87 + 88 + bool vma_write_started(struct vm_area_struct *vma) 89 + { 90 + int seq = vma->vm_lock_seq; 91 + 92 + /* We reset after each check. */ 93 + vma->vm_lock_seq = UINT_MAX; 94 + 95 + /* The vma_start_write() stub simply increments this value. */ 96 + return seq > -1; 97 + } 98 + 99 + void __vma_set_dummy_anon_vma(struct vm_area_struct *vma, 100 + struct anon_vma_chain *avc, struct anon_vma *anon_vma) 101 + { 102 + vma->anon_vma = anon_vma; 103 + INIT_LIST_HEAD(&vma->anon_vma_chain); 104 + list_add(&avc->same_vma, &vma->anon_vma_chain); 105 + avc->anon_vma = vma->anon_vma; 106 + } 107 + 108 + void vma_set_dummy_anon_vma(struct vm_area_struct *vma, 109 + struct anon_vma_chain *avc) 110 + { 111 + __vma_set_dummy_anon_vma(vma, avc, &dummy_anon_vma); 112 + } 113 + 114 + struct task_struct *get_current(void) 115 + { 116 + return &__current; 117 + } 118 + 119 + unsigned long rlimit(unsigned int limit) 120 + { 121 + return (unsigned long)-1; 122 + } 123 + 124 + void vma_set_range(struct vm_area_struct *vma, 125 + unsigned long start, unsigned long end, 126 + pgoff_t pgoff) 127 + { 128 + vma->vm_start = start; 129 + vma->vm_end = end; 130 + vma->vm_pgoff = pgoff; 131 + }
+114
tools/testing/vma/shared.h
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + 3 + #pragma once 4 + 5 + #include <stdbool.h> 6 + #include <stdio.h> 7 + #include <stdlib.h> 8 + 9 + #include "generated/bit-length.h" 10 + #include "maple-shared.h" 11 + #include "vma_internal.h" 12 + #include "../../../mm/vma.h" 13 + 14 + /* Simple test runner. Assumes local num_[fail, tests] counters. */ 15 + #define TEST(name) \ 16 + do { \ 17 + (*num_tests)++; \ 18 + if (!test_##name()) { \ 19 + (*num_fail)++; \ 20 + fprintf(stderr, "Test " #name " FAILED\n"); \ 21 + } \ 22 + } while (0) 23 + 24 + #define ASSERT_TRUE(_expr) \ 25 + do { \ 26 + if (!(_expr)) { \ 27 + fprintf(stderr, \ 28 + "Assert FAILED at %s:%d:%s(): %s is FALSE.\n", \ 29 + __FILE__, __LINE__, __FUNCTION__, #_expr); \ 30 + return false; \ 31 + } \ 32 + } while (0) 33 + 34 + #define ASSERT_FALSE(_expr) ASSERT_TRUE(!(_expr)) 35 + #define ASSERT_EQ(_val1, _val2) ASSERT_TRUE((_val1) == (_val2)) 36 + #define ASSERT_NE(_val1, _val2) ASSERT_TRUE((_val1) != (_val2)) 37 + 38 + #define IS_SET(_val, _flags) ((_val & _flags) == _flags) 39 + 40 + extern bool fail_prealloc; 41 + 42 + /* Override vma_iter_prealloc() so we can choose to fail it. */ 43 + #define vma_iter_prealloc(vmi, vma) \ 44 + (fail_prealloc ? -ENOMEM : mas_preallocate(&(vmi)->mas, (vma), GFP_KERNEL)) 45 + 46 + #define CONFIG_DEFAULT_MMAP_MIN_ADDR 65536 47 + 48 + extern unsigned long mmap_min_addr; 49 + extern unsigned long dac_mmap_min_addr; 50 + extern unsigned long stack_guard_gap; 51 + 52 + extern const struct vm_operations_struct vma_dummy_vm_ops; 53 + extern struct anon_vma dummy_anon_vma; 54 + extern struct task_struct __current; 55 + 56 + /* 57 + * Helper function which provides a wrapper around a merge existing VMA 58 + * operation. 59 + * 60 + * Declared in main.c as uses static VMA function. 61 + */ 62 + struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg); 63 + 64 + /* 65 + * Helper function to allocate a VMA and link it to the tree. 66 + * 67 + * Declared in main.c as uses static VMA function. 68 + */ 69 + int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma); 70 + 71 + /* Helper function providing a dummy vm_ops->close() method.*/ 72 + static inline void dummy_close(struct vm_area_struct *) 73 + { 74 + } 75 + 76 + /* Helper function to simply allocate a VMA. */ 77 + struct vm_area_struct *alloc_vma(struct mm_struct *mm, 78 + unsigned long start, unsigned long end, 79 + pgoff_t pgoff, vm_flags_t vm_flags); 80 + 81 + /* Helper function to detach and free a VMA. */ 82 + void detach_free_vma(struct vm_area_struct *vma); 83 + 84 + /* Helper function to allocate a VMA and link it to the tree. */ 85 + struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, 86 + unsigned long start, unsigned long end, 87 + pgoff_t pgoff, vm_flags_t vm_flags); 88 + 89 + /* 90 + * Helper function to reset the dummy anon_vma to indicate it has not been 91 + * duplicated. 92 + */ 93 + void reset_dummy_anon_vma(void); 94 + 95 + /* 96 + * Helper function to remove all VMAs and destroy the maple tree associated with 97 + * a virtual address space. Returns a count of VMAs in the tree. 98 + */ 99 + int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi); 100 + 101 + /* Helper function to determine if VMA has had vma_start_write() performed. */ 102 + bool vma_write_started(struct vm_area_struct *vma); 103 + 104 + void __vma_set_dummy_anon_vma(struct vm_area_struct *vma, 105 + struct anon_vma_chain *avc, struct anon_vma *anon_vma); 106 + 107 + /* Provide a simple dummy VMA/anon_vma dummy setup for testing. */ 108 + void vma_set_dummy_anon_vma(struct vm_area_struct *vma, 109 + struct anon_vma_chain *avc); 110 + 111 + /* Helper function to specify a VMA's range. */ 112 + void vma_set_range(struct vm_area_struct *vma, 113 + unsigned long start, unsigned long end, 114 + pgoff_t pgoff);
+57
tools/testing/vma/tests/mmap.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + 3 + static bool test_mmap_region_basic(void) 4 + { 5 + struct mm_struct mm = {}; 6 + unsigned long addr; 7 + struct vm_area_struct *vma; 8 + VMA_ITERATOR(vmi, &mm, 0); 9 + 10 + current->mm = &mm; 11 + 12 + /* Map at 0x300000, length 0x3000. */ 13 + addr = __mmap_region(NULL, 0x300000, 0x3000, 14 + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, 15 + 0x300, NULL); 16 + ASSERT_EQ(addr, 0x300000); 17 + 18 + /* Map at 0x250000, length 0x3000. */ 19 + addr = __mmap_region(NULL, 0x250000, 0x3000, 20 + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, 21 + 0x250, NULL); 22 + ASSERT_EQ(addr, 0x250000); 23 + 24 + /* Map at 0x303000, merging to 0x300000 of length 0x6000. */ 25 + addr = __mmap_region(NULL, 0x303000, 0x3000, 26 + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, 27 + 0x303, NULL); 28 + ASSERT_EQ(addr, 0x303000); 29 + 30 + /* Map at 0x24d000, merging to 0x250000 of length 0x6000. */ 31 + addr = __mmap_region(NULL, 0x24d000, 0x3000, 32 + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, 33 + 0x24d, NULL); 34 + ASSERT_EQ(addr, 0x24d000); 35 + 36 + ASSERT_EQ(mm.map_count, 2); 37 + 38 + for_each_vma(vmi, vma) { 39 + if (vma->vm_start == 0x300000) { 40 + ASSERT_EQ(vma->vm_end, 0x306000); 41 + ASSERT_EQ(vma->vm_pgoff, 0x300); 42 + } else if (vma->vm_start == 0x24d000) { 43 + ASSERT_EQ(vma->vm_end, 0x253000); 44 + ASSERT_EQ(vma->vm_pgoff, 0x24d); 45 + } else { 46 + ASSERT_FALSE(true); 47 + } 48 + } 49 + 50 + cleanup_mm(&mm, &vmi); 51 + return true; 52 + } 53 + 54 + static void run_mmap_tests(int *num_tests, int *num_fail) 55 + { 56 + TEST(mmap_region_basic); 57 + }
+339
tools/testing/vma/tests/vma.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + 3 + static bool compare_legacy_flags(vm_flags_t legacy_flags, vma_flags_t flags) 4 + { 5 + const unsigned long legacy_val = legacy_flags; 6 + /* The lower word should contain the precise same value. */ 7 + const unsigned long flags_lower = flags.__vma_flags[0]; 8 + #if NUM_VMA_FLAGS > BITS_PER_LONG 9 + int i; 10 + 11 + /* All bits in higher flag values should be zero. */ 12 + for (i = 1; i < NUM_VMA_FLAGS / BITS_PER_LONG; i++) { 13 + if (flags.__vma_flags[i] != 0) 14 + return false; 15 + } 16 + #endif 17 + 18 + static_assert(sizeof(legacy_flags) == sizeof(unsigned long)); 19 + 20 + return legacy_val == flags_lower; 21 + } 22 + 23 + static bool test_copy_vma(void) 24 + { 25 + vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; 26 + struct mm_struct mm = {}; 27 + bool need_locks = false; 28 + VMA_ITERATOR(vmi, &mm, 0); 29 + struct vm_area_struct *vma, *vma_new, *vma_next; 30 + 31 + /* Move backwards and do not merge. */ 32 + 33 + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); 34 + vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks); 35 + ASSERT_NE(vma_new, vma); 36 + ASSERT_EQ(vma_new->vm_start, 0); 37 + ASSERT_EQ(vma_new->vm_end, 0x2000); 38 + ASSERT_EQ(vma_new->vm_pgoff, 0); 39 + vma_assert_attached(vma_new); 40 + 41 + cleanup_mm(&mm, &vmi); 42 + 43 + /* Move a VMA into position next to another and merge the two. */ 44 + 45 + vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags); 46 + vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, vm_flags); 47 + vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks); 48 + vma_assert_attached(vma_new); 49 + 50 + ASSERT_EQ(vma_new, vma_next); 51 + 52 + cleanup_mm(&mm, &vmi); 53 + return true; 54 + } 55 + 56 + static bool test_vma_flags_unchanged(void) 57 + { 58 + vma_flags_t flags = EMPTY_VMA_FLAGS; 59 + vm_flags_t legacy_flags = 0; 60 + int bit; 61 + struct vm_area_struct vma; 62 + struct vm_area_desc desc; 63 + 64 + 65 + vma.flags = EMPTY_VMA_FLAGS; 66 + desc.vma_flags = EMPTY_VMA_FLAGS; 67 + 68 + for (bit = 0; bit < BITS_PER_LONG; bit++) { 69 + vma_flags_t mask = mk_vma_flags(bit); 70 + 71 + legacy_flags |= (1UL << bit); 72 + 73 + /* Individual flags. */ 74 + vma_flags_set(&flags, bit); 75 + ASSERT_TRUE(compare_legacy_flags(legacy_flags, flags)); 76 + 77 + /* Via mask. */ 78 + vma_flags_set_mask(&flags, mask); 79 + ASSERT_TRUE(compare_legacy_flags(legacy_flags, flags)); 80 + 81 + /* Same for VMA. */ 82 + vma_set_flags(&vma, bit); 83 + ASSERT_TRUE(compare_legacy_flags(legacy_flags, vma.flags)); 84 + vma_set_flags_mask(&vma, mask); 85 + ASSERT_TRUE(compare_legacy_flags(legacy_flags, vma.flags)); 86 + 87 + /* Same for VMA descriptor. */ 88 + vma_desc_set_flags(&desc, bit); 89 + ASSERT_TRUE(compare_legacy_flags(legacy_flags, desc.vma_flags)); 90 + vma_desc_set_flags_mask(&desc, mask); 91 + ASSERT_TRUE(compare_legacy_flags(legacy_flags, desc.vma_flags)); 92 + } 93 + 94 + return true; 95 + } 96 + 97 + static bool test_vma_flags_cleared(void) 98 + { 99 + const vma_flags_t empty = EMPTY_VMA_FLAGS; 100 + vma_flags_t flags; 101 + int i; 102 + 103 + /* Set all bits high. */ 104 + memset(&flags, 1, sizeof(flags)); 105 + /* Try to clear. */ 106 + vma_flags_clear_all(&flags); 107 + /* Equal to EMPTY_VMA_FLAGS? */ 108 + ASSERT_EQ(memcmp(&empty, &flags, sizeof(flags)), 0); 109 + /* Make sure every unsigned long entry in bitmap array zero. */ 110 + for (i = 0; i < sizeof(flags) / BITS_PER_LONG; i++) { 111 + const unsigned long val = flags.__vma_flags[i]; 112 + 113 + ASSERT_EQ(val, 0); 114 + } 115 + 116 + return true; 117 + } 118 + 119 + /* 120 + * Assert that VMA flag functions that operate at the system word level function 121 + * correctly. 122 + */ 123 + static bool test_vma_flags_word(void) 124 + { 125 + vma_flags_t flags = EMPTY_VMA_FLAGS; 126 + const vma_flags_t comparison = 127 + mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, 64, 65); 128 + 129 + /* Set some custom high flags. */ 130 + vma_flags_set(&flags, 64, 65); 131 + /* Now overwrite the first word. */ 132 + vma_flags_overwrite_word(&flags, VM_READ | VM_WRITE); 133 + /* Ensure they are equal. */ 134 + ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0); 135 + 136 + flags = EMPTY_VMA_FLAGS; 137 + vma_flags_set(&flags, 64, 65); 138 + 139 + /* Do the same with the _once() equivalent. */ 140 + vma_flags_overwrite_word_once(&flags, VM_READ | VM_WRITE); 141 + ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0); 142 + 143 + flags = EMPTY_VMA_FLAGS; 144 + vma_flags_set(&flags, 64, 65); 145 + 146 + /* Make sure we can set a word without disturbing other bits. */ 147 + vma_flags_set(&flags, VMA_WRITE_BIT); 148 + vma_flags_set_word(&flags, VM_READ); 149 + ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0); 150 + 151 + flags = EMPTY_VMA_FLAGS; 152 + vma_flags_set(&flags, 64, 65); 153 + 154 + /* Make sure we can clear a word without disturbing other bits. */ 155 + vma_flags_set(&flags, VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT); 156 + vma_flags_clear_word(&flags, VM_EXEC); 157 + ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0); 158 + 159 + return true; 160 + } 161 + 162 + /* Ensure that vma_flags_test() and friends works correctly. */ 163 + static bool test_vma_flags_test(void) 164 + { 165 + const vma_flags_t flags = mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, 166 + VMA_EXEC_BIT, 64, 65); 167 + struct vm_area_struct vma; 168 + struct vm_area_desc desc; 169 + 170 + vma.flags = flags; 171 + desc.vma_flags = flags; 172 + 173 + #define do_test(...) \ 174 + ASSERT_TRUE(vma_flags_test(&flags, __VA_ARGS__)); \ 175 + ASSERT_TRUE(vma_desc_test_flags(&desc, __VA_ARGS__)) 176 + 177 + #define do_test_all_true(...) \ 178 + ASSERT_TRUE(vma_flags_test_all(&flags, __VA_ARGS__)); \ 179 + ASSERT_TRUE(vma_test_all_flags(&vma, __VA_ARGS__)) 180 + 181 + #define do_test_all_false(...) \ 182 + ASSERT_FALSE(vma_flags_test_all(&flags, __VA_ARGS__)); \ 183 + ASSERT_FALSE(vma_test_all_flags(&vma, __VA_ARGS__)) 184 + 185 + /* 186 + * Testing for some flags that are present, some that are not - should 187 + * pass. ANY flags matching should work. 188 + */ 189 + do_test(VMA_READ_BIT, VMA_MAYREAD_BIT, VMA_SEQ_READ_BIT); 190 + /* However, the ...test_all() variant should NOT pass. */ 191 + do_test_all_false(VMA_READ_BIT, VMA_MAYREAD_BIT, VMA_SEQ_READ_BIT); 192 + /* But should pass for flags present. */ 193 + do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64, 65); 194 + /* Also subsets... */ 195 + do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64); 196 + do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT); 197 + do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT); 198 + do_test_all_true(VMA_READ_BIT); 199 + /* 200 + * Check _mask variant. We don't need to test extensively as macro 201 + * helper is the equivalent. 202 + */ 203 + ASSERT_TRUE(vma_flags_test_mask(&flags, flags)); 204 + ASSERT_TRUE(vma_flags_test_all_mask(&flags, flags)); 205 + 206 + /* Single bits. */ 207 + do_test(VMA_READ_BIT); 208 + do_test(VMA_WRITE_BIT); 209 + do_test(VMA_EXEC_BIT); 210 + #if NUM_VMA_FLAG_BITS > 64 211 + do_test(64); 212 + do_test(65); 213 + #endif 214 + 215 + /* Two bits. */ 216 + do_test(VMA_READ_BIT, VMA_WRITE_BIT); 217 + do_test(VMA_READ_BIT, VMA_EXEC_BIT); 218 + do_test(VMA_WRITE_BIT, VMA_EXEC_BIT); 219 + /* Ordering shouldn't matter. */ 220 + do_test(VMA_WRITE_BIT, VMA_READ_BIT); 221 + do_test(VMA_EXEC_BIT, VMA_READ_BIT); 222 + do_test(VMA_EXEC_BIT, VMA_WRITE_BIT); 223 + #if NUM_VMA_FLAG_BITS > 64 224 + do_test(VMA_READ_BIT, 64); 225 + do_test(VMA_WRITE_BIT, 64); 226 + do_test(64, VMA_READ_BIT); 227 + do_test(64, VMA_WRITE_BIT); 228 + do_test(VMA_READ_BIT, 65); 229 + do_test(VMA_WRITE_BIT, 65); 230 + do_test(65, VMA_READ_BIT); 231 + do_test(65, VMA_WRITE_BIT); 232 + #endif 233 + /* Three bits. */ 234 + do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT); 235 + #if NUM_VMA_FLAG_BITS > 64 236 + /* No need to consider every single permutation. */ 237 + do_test(VMA_READ_BIT, VMA_WRITE_BIT, 64); 238 + do_test(VMA_READ_BIT, VMA_WRITE_BIT, 65); 239 + 240 + /* Four bits. */ 241 + do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64); 242 + do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 65); 243 + 244 + /* Five bits. */ 245 + do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64, 65); 246 + #endif 247 + 248 + #undef do_test 249 + #undef do_test_all_true 250 + #undef do_test_all_false 251 + 252 + return true; 253 + } 254 + 255 + /* Ensure that vma_flags_clear() and friends works correctly. */ 256 + static bool test_vma_flags_clear(void) 257 + { 258 + vma_flags_t flags = mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, 259 + VMA_EXEC_BIT, 64, 65); 260 + vma_flags_t mask = mk_vma_flags(VMA_EXEC_BIT, 64); 261 + struct vm_area_struct vma; 262 + struct vm_area_desc desc; 263 + 264 + vma.flags = flags; 265 + desc.vma_flags = flags; 266 + 267 + /* Cursory check of _mask() variant, as the helper macros imply. */ 268 + vma_flags_clear_mask(&flags, mask); 269 + vma_flags_clear_mask(&vma.flags, mask); 270 + vma_desc_clear_flags_mask(&desc, mask); 271 + ASSERT_FALSE(vma_flags_test(&flags, VMA_EXEC_BIT, 64)); 272 + ASSERT_FALSE(vma_flags_test(&vma.flags, VMA_EXEC_BIT, 64)); 273 + ASSERT_FALSE(vma_desc_test_flags(&desc, VMA_EXEC_BIT, 64)); 274 + /* Reset. */ 275 + vma_flags_set(&flags, VMA_EXEC_BIT, 64); 276 + vma_set_flags(&vma, VMA_EXEC_BIT, 64); 277 + vma_desc_set_flags(&desc, VMA_EXEC_BIT, 64); 278 + 279 + /* 280 + * Clear the flags and assert clear worked, then reset flags back to 281 + * include specified flags. 282 + */ 283 + #define do_test_and_reset(...) \ 284 + vma_flags_clear(&flags, __VA_ARGS__); \ 285 + vma_flags_clear(&vma.flags, __VA_ARGS__); \ 286 + vma_desc_clear_flags(&desc, __VA_ARGS__); \ 287 + ASSERT_FALSE(vma_flags_test(&flags, __VA_ARGS__)); \ 288 + ASSERT_FALSE(vma_flags_test(&vma.flags, __VA_ARGS__)); \ 289 + ASSERT_FALSE(vma_desc_test_flags(&desc, __VA_ARGS__)); \ 290 + vma_flags_set(&flags, __VA_ARGS__); \ 291 + vma_set_flags(&vma, __VA_ARGS__); \ 292 + vma_desc_set_flags(&desc, __VA_ARGS__) 293 + 294 + /* Single flags. */ 295 + do_test_and_reset(VMA_READ_BIT); 296 + do_test_and_reset(VMA_WRITE_BIT); 297 + do_test_and_reset(VMA_EXEC_BIT); 298 + do_test_and_reset(64); 299 + do_test_and_reset(65); 300 + 301 + /* Two flags, in different orders. */ 302 + do_test_and_reset(VMA_READ_BIT, VMA_WRITE_BIT); 303 + do_test_and_reset(VMA_READ_BIT, VMA_EXEC_BIT); 304 + do_test_and_reset(VMA_READ_BIT, 64); 305 + do_test_and_reset(VMA_READ_BIT, 65); 306 + do_test_and_reset(VMA_WRITE_BIT, VMA_READ_BIT); 307 + do_test_and_reset(VMA_WRITE_BIT, VMA_EXEC_BIT); 308 + do_test_and_reset(VMA_WRITE_BIT, 64); 309 + do_test_and_reset(VMA_WRITE_BIT, 65); 310 + do_test_and_reset(VMA_EXEC_BIT, VMA_READ_BIT); 311 + do_test_and_reset(VMA_EXEC_BIT, VMA_WRITE_BIT); 312 + do_test_and_reset(VMA_EXEC_BIT, 64); 313 + do_test_and_reset(VMA_EXEC_BIT, 65); 314 + do_test_and_reset(64, VMA_READ_BIT); 315 + do_test_and_reset(64, VMA_WRITE_BIT); 316 + do_test_and_reset(64, VMA_EXEC_BIT); 317 + do_test_and_reset(64, 65); 318 + do_test_and_reset(65, VMA_READ_BIT); 319 + do_test_and_reset(65, VMA_WRITE_BIT); 320 + do_test_and_reset(65, VMA_EXEC_BIT); 321 + do_test_and_reset(65, 64); 322 + 323 + /* Three flags. */ 324 + 325 + #undef do_test_some_missing 326 + #undef do_test_and_reset 327 + 328 + return true; 329 + } 330 + 331 + static void run_vma_tests(int *num_tests, int *num_fail) 332 + { 333 + TEST(copy_vma); 334 + TEST(vma_flags_unchanged); 335 + TEST(vma_flags_cleared); 336 + TEST(vma_flags_word); 337 + TEST(vma_flags_test); 338 + TEST(vma_flags_clear); 339 + }
+8 -324
tools/testing/vma/vma.c tools/testing/vma/tests/merge.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0-or-later 2 2 3 - #include <stdbool.h> 4 - #include <stdio.h> 5 - #include <stdlib.h> 6 - 7 - #include "generated/bit-length.h" 8 - 9 - #include "maple-shared.h" 10 - #include "vma_internal.h" 11 - 12 - /* Include so header guard set. */ 13 - #include "../../../mm/vma.h" 14 - 15 - static bool fail_prealloc; 16 - 17 - /* Then override vma_iter_prealloc() so we can choose to fail it. */ 18 - #define vma_iter_prealloc(vmi, vma) \ 19 - (fail_prealloc ? -ENOMEM : mas_preallocate(&(vmi)->mas, (vma), GFP_KERNEL)) 20 - 21 - #define CONFIG_DEFAULT_MMAP_MIN_ADDR 65536 22 - 23 - unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; 24 - unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; 25 - unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT; 26 - 27 - /* 28 - * Directly import the VMA implementation here. Our vma_internal.h wrapper 29 - * provides userland-equivalent functionality for everything vma.c uses. 30 - */ 31 - #include "../../../mm/vma_init.c" 32 - #include "../../../mm/vma_exec.c" 33 - #include "../../../mm/vma.c" 34 - 35 - const struct vm_operations_struct vma_dummy_vm_ops; 36 - static struct anon_vma dummy_anon_vma; 37 - 38 - #define ASSERT_TRUE(_expr) \ 39 - do { \ 40 - if (!(_expr)) { \ 41 - fprintf(stderr, \ 42 - "Assert FAILED at %s:%d:%s(): %s is FALSE.\n", \ 43 - __FILE__, __LINE__, __FUNCTION__, #_expr); \ 44 - return false; \ 45 - } \ 46 - } while (0) 47 - #define ASSERT_FALSE(_expr) ASSERT_TRUE(!(_expr)) 48 - #define ASSERT_EQ(_val1, _val2) ASSERT_TRUE((_val1) == (_val2)) 49 - #define ASSERT_NE(_val1, _val2) ASSERT_TRUE((_val1) != (_val2)) 50 - 51 - #define IS_SET(_val, _flags) ((_val & _flags) == _flags) 52 - 53 - static struct task_struct __current; 54 - 55 - struct task_struct *get_current(void) 56 - { 57 - return &__current; 58 - } 59 - 60 - unsigned long rlimit(unsigned int limit) 61 - { 62 - return (unsigned long)-1; 63 - } 64 - 65 - /* Helper function to simply allocate a VMA. */ 66 - static struct vm_area_struct *alloc_vma(struct mm_struct *mm, 67 - unsigned long start, 68 - unsigned long end, 69 - pgoff_t pgoff, 70 - vm_flags_t vm_flags) 71 - { 72 - struct vm_area_struct *vma = vm_area_alloc(mm); 73 - 74 - if (vma == NULL) 75 - return NULL; 76 - 77 - vma->vm_start = start; 78 - vma->vm_end = end; 79 - vma->vm_pgoff = pgoff; 80 - vm_flags_reset(vma, vm_flags); 81 - vma_assert_detached(vma); 82 - 83 - return vma; 84 - } 85 - 86 - /* Helper function to allocate a VMA and link it to the tree. */ 87 - static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma) 88 - { 89 - int res; 90 - 91 - res = vma_link(mm, vma); 92 - if (!res) 93 - vma_assert_attached(vma); 94 - return res; 95 - } 96 - 97 - static void detach_free_vma(struct vm_area_struct *vma) 98 - { 99 - vma_mark_detached(vma); 100 - vm_area_free(vma); 101 - } 102 - 103 - /* Helper function to allocate a VMA and link it to the tree. */ 104 - static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, 105 - unsigned long start, 106 - unsigned long end, 107 - pgoff_t pgoff, 108 - vm_flags_t vm_flags) 109 - { 110 - struct vm_area_struct *vma = alloc_vma(mm, start, end, pgoff, vm_flags); 111 - 112 - if (vma == NULL) 113 - return NULL; 114 - 115 - if (attach_vma(mm, vma)) { 116 - detach_free_vma(vma); 117 - return NULL; 118 - } 119 - 120 - /* 121 - * Reset this counter which we use to track whether writes have 122 - * begun. Linking to the tree will have caused this to be incremented, 123 - * which means we will get a false positive otherwise. 124 - */ 125 - vma->vm_lock_seq = UINT_MAX; 126 - 127 - return vma; 128 - } 129 - 130 3 /* Helper function which provides a wrapper around a merge new VMA operation. */ 131 4 static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg) 132 5 { ··· 20 147 } 21 148 22 149 /* 23 - * Helper function which provides a wrapper around a merge existing VMA 24 - * operation. 25 - */ 26 - static struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg) 27 - { 28 - struct vm_area_struct *vma; 29 - 30 - vma = vma_merge_existing_range(vmg); 31 - if (vma) 32 - vma_assert_attached(vma); 33 - return vma; 34 - } 35 - 36 - /* 37 150 * Helper function which provides a wrapper around the expansion of an existing 38 151 * VMA. 39 152 */ ··· 32 173 * Helper function to reset merge state the associated VMA iterator to a 33 174 * specified new range. 34 175 */ 35 - static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start, 36 - unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags) 176 + void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start, 177 + unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags) 37 178 { 38 179 vma_iter_set(vmg->vmi, start); 39 180 ··· 56 197 57 198 /* Helper function to set both the VMG range and its anon_vma. */ 58 199 static void vmg_set_range_anon_vma(struct vma_merge_struct *vmg, unsigned long start, 59 - unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags, 60 - struct anon_vma *anon_vma) 200 + unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags, 201 + struct anon_vma *anon_vma) 61 202 { 62 203 vmg_set_range(vmg, start, end, pgoff, vm_flags); 63 204 vmg->anon_vma = anon_vma; ··· 70 211 * VMA, link it to the maple tree and return it. 71 212 */ 72 213 static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm, 73 - struct vma_merge_struct *vmg, 74 - unsigned long start, unsigned long end, 75 - pgoff_t pgoff, vm_flags_t vm_flags, 76 - bool *was_merged) 214 + struct vma_merge_struct *vmg, unsigned long start, 215 + unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags, 216 + bool *was_merged) 77 217 { 78 218 struct vm_area_struct *merged; 79 219 ··· 90 232 ASSERT_EQ(vmg->state, VMA_MERGE_NOMERGE); 91 233 92 234 return alloc_and_link_vma(mm, start, end, pgoff, vm_flags); 93 - } 94 - 95 - /* 96 - * Helper function to reset the dummy anon_vma to indicate it has not been 97 - * duplicated. 98 - */ 99 - static void reset_dummy_anon_vma(void) 100 - { 101 - dummy_anon_vma.was_cloned = false; 102 - dummy_anon_vma.was_unlinked = false; 103 - } 104 - 105 - /* 106 - * Helper function to remove all VMAs and destroy the maple tree associated with 107 - * a virtual address space. Returns a count of VMAs in the tree. 108 - */ 109 - static int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi) 110 - { 111 - struct vm_area_struct *vma; 112 - int count = 0; 113 - 114 - fail_prealloc = false; 115 - reset_dummy_anon_vma(); 116 - 117 - vma_iter_set(vmi, 0); 118 - for_each_vma(*vmi, vma) { 119 - detach_free_vma(vma); 120 - count++; 121 - } 122 - 123 - mtree_destroy(&mm->mm_mt); 124 - mm->map_count = 0; 125 - return count; 126 - } 127 - 128 - /* Helper function to determine if VMA has had vma_start_write() performed. */ 129 - static bool vma_write_started(struct vm_area_struct *vma) 130 - { 131 - int seq = vma->vm_lock_seq; 132 - 133 - /* We reset after each check. */ 134 - vma->vm_lock_seq = UINT_MAX; 135 - 136 - /* The vma_start_write() stub simply increments this value. */ 137 - return seq > -1; 138 - } 139 - 140 - /* Helper function providing a dummy vm_ops->close() method.*/ 141 - static void dummy_close(struct vm_area_struct *) 142 - { 143 - } 144 - 145 - static void __vma_set_dummy_anon_vma(struct vm_area_struct *vma, 146 - struct anon_vma_chain *avc, 147 - struct anon_vma *anon_vma) 148 - { 149 - vma->anon_vma = anon_vma; 150 - INIT_LIST_HEAD(&vma->anon_vma_chain); 151 - list_add(&avc->same_vma, &vma->anon_vma_chain); 152 - avc->anon_vma = vma->anon_vma; 153 - } 154 - 155 - static void vma_set_dummy_anon_vma(struct vm_area_struct *vma, 156 - struct anon_vma_chain *avc) 157 - { 158 - __vma_set_dummy_anon_vma(vma, avc, &dummy_anon_vma); 159 235 } 160 236 161 237 static bool test_simple_merge(void) ··· 1408 1616 return true; 1409 1617 } 1410 1618 1411 - static bool test_copy_vma(void) 1412 - { 1413 - vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; 1414 - struct mm_struct mm = {}; 1415 - bool need_locks = false; 1416 - VMA_ITERATOR(vmi, &mm, 0); 1417 - struct vm_area_struct *vma, *vma_new, *vma_next; 1418 - 1419 - /* Move backwards and do not merge. */ 1420 - 1421 - vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags); 1422 - vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks); 1423 - ASSERT_NE(vma_new, vma); 1424 - ASSERT_EQ(vma_new->vm_start, 0); 1425 - ASSERT_EQ(vma_new->vm_end, 0x2000); 1426 - ASSERT_EQ(vma_new->vm_pgoff, 0); 1427 - vma_assert_attached(vma_new); 1428 - 1429 - cleanup_mm(&mm, &vmi); 1430 - 1431 - /* Move a VMA into position next to another and merge the two. */ 1432 - 1433 - vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags); 1434 - vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, vm_flags); 1435 - vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks); 1436 - vma_assert_attached(vma_new); 1437 - 1438 - ASSERT_EQ(vma_new, vma_next); 1439 - 1440 - cleanup_mm(&mm, &vmi); 1441 - return true; 1442 - } 1443 - 1444 1619 static bool test_expand_only_mode(void) 1445 1620 { 1446 1621 vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; ··· 1448 1689 return true; 1449 1690 } 1450 1691 1451 - static bool test_mmap_region_basic(void) 1692 + static void run_merge_tests(int *num_tests, int *num_fail) 1452 1693 { 1453 - struct mm_struct mm = {}; 1454 - unsigned long addr; 1455 - struct vm_area_struct *vma; 1456 - VMA_ITERATOR(vmi, &mm, 0); 1457 - 1458 - current->mm = &mm; 1459 - 1460 - /* Map at 0x300000, length 0x3000. */ 1461 - addr = __mmap_region(NULL, 0x300000, 0x3000, 1462 - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, 1463 - 0x300, NULL); 1464 - ASSERT_EQ(addr, 0x300000); 1465 - 1466 - /* Map at 0x250000, length 0x3000. */ 1467 - addr = __mmap_region(NULL, 0x250000, 0x3000, 1468 - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, 1469 - 0x250, NULL); 1470 - ASSERT_EQ(addr, 0x250000); 1471 - 1472 - /* Map at 0x303000, merging to 0x300000 of length 0x6000. */ 1473 - addr = __mmap_region(NULL, 0x303000, 0x3000, 1474 - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, 1475 - 0x303, NULL); 1476 - ASSERT_EQ(addr, 0x303000); 1477 - 1478 - /* Map at 0x24d000, merging to 0x250000 of length 0x6000. */ 1479 - addr = __mmap_region(NULL, 0x24d000, 0x3000, 1480 - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, 1481 - 0x24d, NULL); 1482 - ASSERT_EQ(addr, 0x24d000); 1483 - 1484 - ASSERT_EQ(mm.map_count, 2); 1485 - 1486 - for_each_vma(vmi, vma) { 1487 - if (vma->vm_start == 0x300000) { 1488 - ASSERT_EQ(vma->vm_end, 0x306000); 1489 - ASSERT_EQ(vma->vm_pgoff, 0x300); 1490 - } else if (vma->vm_start == 0x24d000) { 1491 - ASSERT_EQ(vma->vm_end, 0x253000); 1492 - ASSERT_EQ(vma->vm_pgoff, 0x24d); 1493 - } else { 1494 - ASSERT_FALSE(true); 1495 - } 1496 - } 1497 - 1498 - cleanup_mm(&mm, &vmi); 1499 - return true; 1500 - } 1501 - 1502 - int main(void) 1503 - { 1504 - int num_tests = 0, num_fail = 0; 1505 - 1506 - maple_tree_init(); 1507 - vma_state_init(); 1508 - 1509 - #define TEST(name) \ 1510 - do { \ 1511 - num_tests++; \ 1512 - if (!test_##name()) { \ 1513 - num_fail++; \ 1514 - fprintf(stderr, "Test " #name " FAILED\n"); \ 1515 - } \ 1516 - } while (0) 1517 - 1518 1694 /* Very simple tests to kick the tyres. */ 1519 1695 TEST(simple_merge); 1520 1696 TEST(simple_modify); ··· 1465 1771 TEST(dup_anon_vma); 1466 1772 TEST(vmi_prealloc_fail); 1467 1773 TEST(merge_extend); 1468 - TEST(copy_vma); 1469 1774 TEST(expand_only_mode); 1470 - 1471 - TEST(mmap_region_basic); 1472 - 1473 - #undef TEST 1474 - 1475 - printf("%d tests run, %d passed, %d failed.\n", 1476 - num_tests, num_tests - num_fail, num_fail); 1477 - 1478 - return num_fail == 0 ? EXIT_SUCCESS : EXIT_FAILURE; 1479 1775 }
+20 -1829
tools/testing/vma/vma_internal.h
··· 12 12 #ifndef __MM_VMA_INTERNAL_H 13 13 #define __MM_VMA_INTERNAL_H 14 14 15 - #define __private 16 - #define __bitwise 17 - #define __randomize_layout 15 + #include <stdlib.h> 18 16 19 17 #define CONFIG_MMU 20 18 #define CONFIG_PER_VMA_LOCK 21 19 22 - #include <stdlib.h> 20 + #ifdef __CONCAT 21 + #undef __CONCAT 22 + #endif 23 23 24 + #include <linux/args.h> 24 25 #include <linux/atomic.h> 26 + #include <linux/bitmap.h> 25 27 #include <linux/list.h> 26 28 #include <linux/maple_tree.h> 27 29 #include <linux/mm.h> ··· 31 29 #include <linux/refcount.h> 32 30 #include <linux/slab.h> 33 31 34 - extern unsigned long stack_guard_gap; 35 - #ifdef CONFIG_MMU 36 - extern unsigned long mmap_min_addr; 37 - extern unsigned long dac_mmap_min_addr; 38 - #else 39 - #define mmap_min_addr 0UL 40 - #define dac_mmap_min_addr 0UL 41 - #endif 42 - 43 - #define VM_WARN_ON(_expr) (WARN_ON(_expr)) 44 - #define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr)) 45 - #define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr)) 46 - #define VM_BUG_ON(_expr) (BUG_ON(_expr)) 47 - #define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr)) 48 - 49 - #define MMF_HAS_MDWE 28 50 - 51 32 /* 52 - * vm_flags in vm_area_struct, see mm_types.h. 53 - * When changing, update also include/trace/events/mmflags.h 33 + * DUPLICATE typedef definitions from kernel source that have to be declared 34 + * ahead of all other headers. 54 35 */ 55 - 56 - #define VM_NONE 0x00000000 57 - 58 - /** 59 - * typedef vma_flag_t - specifies an individual VMA flag by bit number. 60 - * 61 - * This value is made type safe by sparse to avoid passing invalid flag values 62 - * around. 63 - */ 64 - typedef int __bitwise vma_flag_t; 65 - 66 - #define DECLARE_VMA_BIT(name, bitnum) \ 67 - VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum) 68 - #define DECLARE_VMA_BIT_ALIAS(name, aliased) \ 69 - VMA_ ## name ## _BIT = VMA_ ## aliased ## _BIT 70 - enum { 71 - DECLARE_VMA_BIT(READ, 0), 72 - DECLARE_VMA_BIT(WRITE, 1), 73 - DECLARE_VMA_BIT(EXEC, 2), 74 - DECLARE_VMA_BIT(SHARED, 3), 75 - /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ 76 - DECLARE_VMA_BIT(MAYREAD, 4), /* limits for mprotect() etc. */ 77 - DECLARE_VMA_BIT(MAYWRITE, 5), 78 - DECLARE_VMA_BIT(MAYEXEC, 6), 79 - DECLARE_VMA_BIT(MAYSHARE, 7), 80 - DECLARE_VMA_BIT(GROWSDOWN, 8), /* general info on the segment */ 81 - #ifdef CONFIG_MMU 82 - DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */ 83 - #else 84 - /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */ 85 - DECLARE_VMA_BIT(MAYOVERLAY, 9), 86 - #endif /* CONFIG_MMU */ 87 - /* Page-ranges managed without "struct page", just pure PFN */ 88 - DECLARE_VMA_BIT(PFNMAP, 10), 89 - DECLARE_VMA_BIT(MAYBE_GUARD, 11), 90 - DECLARE_VMA_BIT(UFFD_WP, 12), /* wrprotect pages tracking */ 91 - DECLARE_VMA_BIT(LOCKED, 13), 92 - DECLARE_VMA_BIT(IO, 14), /* Memory mapped I/O or similar */ 93 - DECLARE_VMA_BIT(SEQ_READ, 15), /* App will access data sequentially */ 94 - DECLARE_VMA_BIT(RAND_READ, 16), /* App will not benefit from clustered reads */ 95 - DECLARE_VMA_BIT(DONTCOPY, 17), /* Do not copy this vma on fork */ 96 - DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */ 97 - DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */ 98 - DECLARE_VMA_BIT(ACCOUNT, 20), /* Is a VM accounted object */ 99 - DECLARE_VMA_BIT(NORESERVE, 21), /* should the VM suppress accounting */ 100 - DECLARE_VMA_BIT(HUGETLB, 22), /* Huge TLB Page VM */ 101 - DECLARE_VMA_BIT(SYNC, 23), /* Synchronous page faults */ 102 - DECLARE_VMA_BIT(ARCH_1, 24), /* Architecture-specific flag */ 103 - DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */ 104 - DECLARE_VMA_BIT(DONTDUMP, 26), /* Do not include in the core dump */ 105 - DECLARE_VMA_BIT(SOFTDIRTY, 27), /* NOT soft dirty clean area */ 106 - DECLARE_VMA_BIT(MIXEDMAP, 28), /* Can contain struct page and pure PFN pages */ 107 - DECLARE_VMA_BIT(HUGEPAGE, 29), /* MADV_HUGEPAGE marked this vma */ 108 - DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */ 109 - DECLARE_VMA_BIT(MERGEABLE, 31), /* KSM may merge identical pages */ 110 - /* These bits are reused, we define specific uses below. */ 111 - DECLARE_VMA_BIT(HIGH_ARCH_0, 32), 112 - DECLARE_VMA_BIT(HIGH_ARCH_1, 33), 113 - DECLARE_VMA_BIT(HIGH_ARCH_2, 34), 114 - DECLARE_VMA_BIT(HIGH_ARCH_3, 35), 115 - DECLARE_VMA_BIT(HIGH_ARCH_4, 36), 116 - DECLARE_VMA_BIT(HIGH_ARCH_5, 37), 117 - DECLARE_VMA_BIT(HIGH_ARCH_6, 38), 118 - /* 119 - * This flag is used to connect VFIO to arch specific KVM code. It 120 - * indicates that the memory under this VMA is safe for use with any 121 - * non-cachable memory type inside KVM. Some VFIO devices, on some 122 - * platforms, are thought to be unsafe and can cause machine crashes 123 - * if KVM does not lock down the memory type. 124 - */ 125 - DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39), 126 - #ifdef CONFIG_PPC32 127 - DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1), 128 - #else 129 - DECLARE_VMA_BIT(DROPPABLE, 40), 130 - #endif 131 - DECLARE_VMA_BIT(UFFD_MINOR, 41), 132 - DECLARE_VMA_BIT(SEALED, 42), 133 - /* Flags that reuse flags above. */ 134 - DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0), 135 - DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1), 136 - DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2), 137 - DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3), 138 - DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4), 139 - #if defined(CONFIG_X86_USER_SHADOW_STACK) 140 - /* 141 - * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of 142 - * support core mm. 143 - * 144 - * These VMAs will get a single end guard page. This helps userspace 145 - * protect itself from attacks. A single page is enough for current 146 - * shadow stack archs (x86). See the comments near alloc_shstk() in 147 - * arch/x86/kernel/shstk.c for more details on the guard size. 148 - */ 149 - DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5), 150 - #elif defined(CONFIG_ARM64_GCS) 151 - /* 152 - * arm64's Guarded Control Stack implements similar functionality and 153 - * has similar constraints to shadow stacks. 154 - */ 155 - DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6), 156 - #endif 157 - DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), /* Strong Access Ordering (powerpc) */ 158 - DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), /* parisc */ 159 - DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), /* sparc64 */ 160 - DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), /* arm64 */ 161 - DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), /* sparc64, arm64 */ 162 - DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), /* !CONFIG_MMU */ 163 - DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), /* arm64 */ 164 - DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */ 165 - #ifdef CONFIG_STACK_GROWSUP 166 - DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP), 167 - DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN), 168 - #else 169 - DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN), 170 - #endif 171 - }; 172 - 173 - #define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT) 174 - #define VM_READ INIT_VM_FLAG(READ) 175 - #define VM_WRITE INIT_VM_FLAG(WRITE) 176 - #define VM_EXEC INIT_VM_FLAG(EXEC) 177 - #define VM_SHARED INIT_VM_FLAG(SHARED) 178 - #define VM_MAYREAD INIT_VM_FLAG(MAYREAD) 179 - #define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE) 180 - #define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC) 181 - #define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE) 182 - #define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN) 183 - #ifdef CONFIG_MMU 184 - #define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING) 185 - #else 186 - #define VM_UFFD_MISSING VM_NONE 187 - #define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY) 188 - #endif 189 - #define VM_PFNMAP INIT_VM_FLAG(PFNMAP) 190 - #define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD) 191 - #define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP) 192 - #define VM_LOCKED INIT_VM_FLAG(LOCKED) 193 - #define VM_IO INIT_VM_FLAG(IO) 194 - #define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ) 195 - #define VM_RAND_READ INIT_VM_FLAG(RAND_READ) 196 - #define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY) 197 - #define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND) 198 - #define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT) 199 - #define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT) 200 - #define VM_NORESERVE INIT_VM_FLAG(NORESERVE) 201 - #define VM_HUGETLB INIT_VM_FLAG(HUGETLB) 202 - #define VM_SYNC INIT_VM_FLAG(SYNC) 203 - #define VM_ARCH_1 INIT_VM_FLAG(ARCH_1) 204 - #define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK) 205 - #define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP) 206 - #ifdef CONFIG_MEM_SOFT_DIRTY 207 - #define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY) 208 - #else 209 - #define VM_SOFTDIRTY VM_NONE 210 - #endif 211 - #define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP) 212 - #define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE) 213 - #define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE) 214 - #define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE) 215 - #define VM_STACK INIT_VM_FLAG(STACK) 216 - #ifdef CONFIG_STACK_GROWS_UP 217 - #define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY) 218 - #else 219 - #define VM_STACK_EARLY VM_NONE 220 - #endif 221 - #ifdef CONFIG_ARCH_HAS_PKEYS 222 - #define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT) 223 - /* Despite the naming, these are FLAGS not bits. */ 224 - #define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0) 225 - #define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1) 226 - #define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2) 227 - #if CONFIG_ARCH_PKEY_BITS > 3 228 - #define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3) 229 - #else 230 - #define VM_PKEY_BIT3 VM_NONE 231 - #endif /* CONFIG_ARCH_PKEY_BITS > 3 */ 232 - #if CONFIG_ARCH_PKEY_BITS > 4 233 - #define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4) 234 - #else 235 - #define VM_PKEY_BIT4 VM_NONE 236 - #endif /* CONFIG_ARCH_PKEY_BITS > 4 */ 237 - #endif /* CONFIG_ARCH_HAS_PKEYS */ 238 - #if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS) 239 - #define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK) 240 - #else 241 - #define VM_SHADOW_STACK VM_NONE 242 - #endif 243 - #if defined(CONFIG_PPC64) 244 - #define VM_SAO INIT_VM_FLAG(SAO) 245 - #elif defined(CONFIG_PARISC) 246 - #define VM_GROWSUP INIT_VM_FLAG(GROWSUP) 247 - #elif defined(CONFIG_SPARC64) 248 - #define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI) 249 - #define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR) 250 - #elif defined(CONFIG_ARM64) 251 - #define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI) 252 - #define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR) 253 - #elif !defined(CONFIG_MMU) 254 - #define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY) 255 - #endif 256 - #ifndef VM_GROWSUP 257 - #define VM_GROWSUP VM_NONE 258 - #endif 259 - #ifdef CONFIG_ARM64_MTE 260 - #define VM_MTE INIT_VM_FLAG(MTE) 261 - #define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED) 262 - #else 263 - #define VM_MTE VM_NONE 264 - #define VM_MTE_ALLOWED VM_NONE 265 - #endif 266 - #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR 267 - #define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR) 268 - #else 269 - #define VM_UFFD_MINOR VM_NONE 270 - #endif 271 - #ifdef CONFIG_64BIT 272 - #define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED) 273 - #define VM_SEALED INIT_VM_FLAG(SEALED) 274 - #else 275 - #define VM_ALLOW_ANY_UNCACHED VM_NONE 276 - #define VM_SEALED VM_NONE 277 - #endif 278 - #if defined(CONFIG_64BIT) || defined(CONFIG_PPC32) 279 - #define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE) 280 - #else 281 - #define VM_DROPPABLE VM_NONE 282 - #endif 283 - 284 - /* Bits set in the VMA until the stack is in its final location */ 285 - #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) 286 - 287 - #define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) 288 - 289 - /* Common data flag combinations */ 290 - #define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ 291 - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 292 - #define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \ 293 - VM_MAYWRITE | VM_MAYEXEC) 294 - #define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \ 295 - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 296 - 297 - #ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */ 298 - #define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC 299 - #endif 300 - 301 - #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ 302 - #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS 303 - #endif 304 - 305 - #define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK) 306 - 307 - #define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) 308 - 309 - /* VMA basic access permission flags */ 310 - #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) 311 - 312 - /* 313 - * Special vmas that are non-mergable, non-mlock()able. 314 - */ 315 - #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) 316 - 317 - #define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) 318 - #define TASK_SIZE_LOW DEFAULT_MAP_WINDOW 319 - #define TASK_SIZE_MAX DEFAULT_MAP_WINDOW 320 - #define STACK_TOP TASK_SIZE_LOW 321 - #define STACK_TOP_MAX TASK_SIZE_MAX 322 - 323 - /* This mask represents all the VMA flag bits used by mlock */ 324 - #define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) 325 - 326 - #define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) 327 - 328 - #define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ 329 - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 330 - 331 - #define RLIMIT_STACK 3 /* max stack size */ 332 - #define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */ 333 - 334 - #define CAP_IPC_LOCK 14 335 - 336 - /* 337 - * Flags which should be 'sticky' on merge - that is, flags which, when one VMA 338 - * possesses it but the other does not, the merged VMA should nonetheless have 339 - * applied to it: 340 - * 341 - * VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its 342 - * references cleared via /proc/$pid/clear_refs, any merged VMA 343 - * should be considered soft-dirty also as it operates at a VMA 344 - * granularity. 345 - */ 346 - #define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD) 347 - 348 - /* 349 - * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one 350 - * of these flags and the other not does not preclude a merge. 351 - * 352 - * VM_STICKY - When merging VMAs, VMA flags must match, unless they are 353 - * 'sticky'. If any sticky flags exist in either VMA, we simply 354 - * set all of them on the merged VMA. 355 - */ 356 - #define VM_IGNORE_MERGE VM_STICKY 357 - 358 - /* 359 - * Flags which should result in page tables being copied on fork. These are 360 - * flags which indicate that the VMA maps page tables which cannot be 361 - * reconsistuted upon page fault, so necessitate page table copying upon 362 - * 363 - * VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be 364 - * reasonably reconstructed on page fault. 365 - * 366 - * VM_UFFD_WP - Encodes metadata about an installed uffd 367 - * write protect handler, which cannot be 368 - * reconstructed on page fault. 369 - * 370 - * We always copy pgtables when dst_vma has uffd-wp 371 - * enabled even if it's file-backed 372 - * (e.g. shmem). Because when uffd-wp is enabled, 373 - * pgtable contains uffd-wp protection information, 374 - * that's something we can't retrieve from page cache, 375 - * and skip copying will lose those info. 376 - * 377 - * VM_MAYBE_GUARD - Could contain page guard region markers which 378 - * by design are a property of the page tables 379 - * only and thus cannot be reconstructed on page 380 - * fault. 381 - */ 382 - #define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD) 383 - 384 - #define FIRST_USER_ADDRESS 0UL 385 - #define USER_PGTABLES_CEILING 0UL 386 - 387 - #define vma_policy(vma) NULL 388 - 389 - #define down_write_nest_lock(sem, nest_lock) 390 - 391 - #define pgprot_val(x) ((x).pgprot) 392 - #define __pgprot(x) ((pgprot_t) { (x) } ) 393 - 394 - #define for_each_vma(__vmi, __vma) \ 395 - while (((__vma) = vma_next(&(__vmi))) != NULL) 396 - 397 - /* The MM code likes to work with exclusive end addresses */ 398 - #define for_each_vma_range(__vmi, __vma, __end) \ 399 - while (((__vma) = vma_find(&(__vmi), (__end))) != NULL) 400 - 401 - #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) 402 - 403 - #define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT)) 404 - 405 - #define test_and_set_bit(nr, addr) __test_and_set_bit(nr, addr) 406 - #define test_and_clear_bit(nr, addr) __test_and_clear_bit(nr, addr) 407 - 408 - #define TASK_SIZE ((1ul << 47)-PAGE_SIZE) 409 - 410 - #define AS_MM_ALL_LOCKS 2 411 - 412 - /* We hardcode this for now. */ 413 - #define sysctl_max_map_count 0x1000000UL 414 - 415 - #define pgoff_t unsigned long 416 - typedef unsigned long pgprotval_t; 417 - typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; 418 - typedef unsigned long vm_flags_t; 419 - typedef __bitwise unsigned int vm_fault_t; 420 - 421 - /* 422 - * The shared stubs do not implement this, it amounts to an fprintf(STDERR,...) 423 - * either way :) 424 - */ 425 - #define pr_warn_once pr_err 426 - 427 - #define data_race(expr) expr 428 - 429 - #define ASSERT_EXCLUSIVE_WRITER(x) 430 - 431 - #define pgtable_supports_soft_dirty() 1 432 - 433 - /** 434 - * swap - swap values of @a and @b 435 - * @a: first value 436 - * @b: second value 437 - */ 438 - #define swap(a, b) \ 439 - do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) 440 - 441 - struct kref { 442 - refcount_t refcount; 443 - }; 444 - 445 - /* 446 - * Define the task command name length as enum, then it can be visible to 447 - * BPF programs. 448 - */ 449 - enum { 450 - TASK_COMM_LEN = 16, 451 - }; 452 - 453 - /* 454 - * Flags for bug emulation. 455 - * 456 - * These occupy the top three bytes. 457 - */ 458 - enum { 459 - READ_IMPLIES_EXEC = 0x0400000, 460 - }; 461 - 462 - struct task_struct { 463 - char comm[TASK_COMM_LEN]; 464 - pid_t pid; 465 - struct mm_struct *mm; 466 - 467 - /* Used for emulating ABI behavior of previous Linux versions: */ 468 - unsigned int personality; 469 - }; 470 - 471 - struct task_struct *get_current(void); 472 - #define current get_current() 473 - 474 - struct anon_vma { 475 - struct anon_vma *root; 476 - struct rb_root_cached rb_root; 477 - 478 - /* Test fields. */ 479 - bool was_cloned; 480 - bool was_unlinked; 481 - }; 482 - 483 - struct anon_vma_chain { 484 - struct anon_vma *anon_vma; 485 - struct list_head same_vma; 486 - }; 487 - 488 - struct anon_vma_name { 489 - struct kref kref; 490 - /* The name needs to be at the end because it is dynamically sized. */ 491 - char name[]; 492 - }; 493 - 494 - struct vma_iterator { 495 - struct ma_state mas; 496 - }; 497 - 498 - #define VMA_ITERATOR(name, __mm, __addr) \ 499 - struct vma_iterator name = { \ 500 - .mas = { \ 501 - .tree = &(__mm)->mm_mt, \ 502 - .index = __addr, \ 503 - .node = NULL, \ 504 - .status = ma_start, \ 505 - }, \ 506 - } 507 - 508 - struct address_space { 509 - struct rb_root_cached i_mmap; 510 - unsigned long flags; 511 - atomic_t i_mmap_writable; 512 - }; 513 - 514 - struct vm_userfaultfd_ctx {}; 515 - struct mempolicy {}; 516 - struct mmu_gather {}; 517 - struct mutex {}; 518 - #define DEFINE_MUTEX(mutexname) \ 519 - struct mutex mutexname = {} 520 - 521 - #define DECLARE_BITMAP(name, bits) \ 522 - unsigned long name[BITS_TO_LONGS(bits)] 523 - 524 - #define NUM_MM_FLAG_BITS (64) 36 + #define __private 37 + /* NUM_MM_FLAG_BITS defined by test code. */ 525 38 typedef struct { 526 39 __private DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS); 527 40 } mm_flags_t; 528 - 529 - /* 530 - * Opaque type representing current VMA (vm_area_struct) flag state. Must be 531 - * accessed via vma_flags_xxx() helper functions. 532 - */ 533 - #define NUM_VMA_FLAG_BITS BITS_PER_LONG 41 + /* NUM_VMA_FLAG_BITS defined by test code. */ 534 42 typedef struct { 535 43 DECLARE_BITMAP(__vma_flags, NUM_VMA_FLAG_BITS); 536 44 } __private vma_flags_t; 537 45 538 - struct mm_struct { 539 - struct maple_tree mm_mt; 540 - int map_count; /* number of VMAs */ 541 - unsigned long total_vm; /* Total pages mapped */ 542 - unsigned long locked_vm; /* Pages that have PG_mlocked set */ 543 - unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ 544 - unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ 545 - unsigned long stack_vm; /* VM_STACK */ 546 - 547 - unsigned long def_flags; 548 - 549 - mm_flags_t flags; /* Must use mm_flags_* helpers to access */ 550 - }; 551 - 552 - struct vm_area_struct; 553 - 554 - 555 - /* What action should be taken after an .mmap_prepare call is complete? */ 556 - enum mmap_action_type { 557 - MMAP_NOTHING, /* Mapping is complete, no further action. */ 558 - MMAP_REMAP_PFN, /* Remap PFN range. */ 559 - MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */ 560 - }; 561 - 562 - /* 563 - * Describes an action an mmap_prepare hook can instruct to be taken to complete 564 - * the mapping of a VMA. Specified in vm_area_desc. 565 - */ 566 - struct mmap_action { 567 - union { 568 - /* Remap range. */ 569 - struct { 570 - unsigned long start; 571 - unsigned long start_pfn; 572 - unsigned long size; 573 - pgprot_t pgprot; 574 - } remap; 575 - }; 576 - enum mmap_action_type type; 577 - 578 - /* 579 - * If specified, this hook is invoked after the selected action has been 580 - * successfully completed. Note that the VMA write lock still held. 581 - * 582 - * The absolute minimum ought to be done here. 583 - * 584 - * Returns 0 on success, or an error code. 585 - */ 586 - int (*success_hook)(const struct vm_area_struct *vma); 587 - 588 - /* 589 - * If specified, this hook is invoked when an error occurred when 590 - * attempting the selection action. 591 - * 592 - * The hook can return an error code in order to filter the error, but 593 - * it is not valid to clear the error here. 594 - */ 595 - int (*error_hook)(int err); 596 - 597 - /* 598 - * This should be set in rare instances where the operation required 599 - * that the rmap should not be able to access the VMA until 600 - * completely set up. 601 - */ 602 - bool hide_from_rmap_until_complete :1; 603 - }; 604 - 605 - /* Operations which modify VMAs. */ 606 - enum vma_operation { 607 - VMA_OP_SPLIT, 608 - VMA_OP_MERGE_UNFAULTED, 609 - VMA_OP_REMAP, 610 - VMA_OP_FORK, 611 - }; 612 - 613 - /* 614 - * Describes a VMA that is about to be mmap()'ed. Drivers may choose to 615 - * manipulate mutable fields which will cause those fields to be updated in the 616 - * resultant VMA. 617 - * 618 - * Helper functions are not required for manipulating any field. 619 - */ 620 - struct vm_area_desc { 621 - /* Immutable state. */ 622 - const struct mm_struct *const mm; 623 - struct file *const file; /* May vary from vm_file in stacked callers. */ 624 - unsigned long start; 625 - unsigned long end; 626 - 627 - /* Mutable fields. Populated with initial state. */ 628 - pgoff_t pgoff; 629 - struct file *vm_file; 630 - union { 631 - vm_flags_t vm_flags; 632 - vma_flags_t vma_flags; 633 - }; 634 - pgprot_t page_prot; 635 - 636 - /* Write-only fields. */ 637 - const struct vm_operations_struct *vm_ops; 638 - void *private_data; 639 - 640 - /* Take further action? */ 641 - struct mmap_action action; 642 - }; 643 - 644 - struct file_operations { 645 - int (*mmap)(struct file *, struct vm_area_struct *); 646 - int (*mmap_prepare)(struct vm_area_desc *); 647 - }; 648 - 649 - struct file { 650 - struct address_space *f_mapping; 651 - const struct file_operations *f_op; 652 - }; 653 - 654 - #define VMA_LOCK_OFFSET 0x40000000 655 - 656 - typedef struct { unsigned long v; } freeptr_t; 657 - 658 - struct vm_area_struct { 659 - /* The first cache line has the info for VMA tree walking. */ 660 - 661 - union { 662 - struct { 663 - /* VMA covers [vm_start; vm_end) addresses within mm */ 664 - unsigned long vm_start; 665 - unsigned long vm_end; 666 - }; 667 - freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */ 668 - }; 669 - 670 - struct mm_struct *vm_mm; /* The address space we belong to. */ 671 - pgprot_t vm_page_prot; /* Access permissions of this VMA. */ 672 - 673 - /* 674 - * Flags, see mm.h. 675 - * To modify use vm_flags_{init|reset|set|clear|mod} functions. 676 - */ 677 - union { 678 - const vm_flags_t vm_flags; 679 - vma_flags_t flags; 680 - }; 681 - 682 - #ifdef CONFIG_PER_VMA_LOCK 683 - /* 684 - * Can only be written (using WRITE_ONCE()) while holding both: 685 - * - mmap_lock (in write mode) 686 - * - vm_refcnt bit at VMA_LOCK_OFFSET is set 687 - * Can be read reliably while holding one of: 688 - * - mmap_lock (in read or write mode) 689 - * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1 690 - * Can be read unreliably (using READ_ONCE()) for pessimistic bailout 691 - * while holding nothing (except RCU to keep the VMA struct allocated). 692 - * 693 - * This sequence counter is explicitly allowed to overflow; sequence 694 - * counter reuse can only lead to occasional unnecessary use of the 695 - * slowpath. 696 - */ 697 - unsigned int vm_lock_seq; 698 - #endif 699 - 700 - /* 701 - * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma 702 - * list, after a COW of one of the file pages. A MAP_SHARED vma 703 - * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack 704 - * or brk vma (with NULL file) can only be in an anon_vma list. 705 - */ 706 - struct list_head anon_vma_chain; /* Serialized by mmap_lock & 707 - * page_table_lock */ 708 - struct anon_vma *anon_vma; /* Serialized by page_table_lock */ 709 - 710 - /* Function pointers to deal with this struct. */ 711 - const struct vm_operations_struct *vm_ops; 712 - 713 - /* Information about our backing store: */ 714 - unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE 715 - units */ 716 - struct file * vm_file; /* File we map to (can be NULL). */ 717 - void * vm_private_data; /* was vm_pte (shared mem) */ 718 - 719 - #ifdef CONFIG_SWAP 720 - atomic_long_t swap_readahead_info; 721 - #endif 722 - #ifndef CONFIG_MMU 723 - struct vm_region *vm_region; /* NOMMU mapping region */ 724 - #endif 725 - #ifdef CONFIG_NUMA 726 - struct mempolicy *vm_policy; /* NUMA policy for the VMA */ 727 - #endif 728 - #ifdef CONFIG_NUMA_BALANCING 729 - struct vma_numab_state *numab_state; /* NUMA Balancing state */ 730 - #endif 731 - #ifdef CONFIG_PER_VMA_LOCK 732 - /* Unstable RCU readers are allowed to read this. */ 733 - refcount_t vm_refcnt; 734 - #endif 735 - /* 736 - * For areas with an address space and backing store, 737 - * linkage into the address_space->i_mmap interval tree. 738 - * 739 - */ 740 - struct { 741 - struct rb_node rb; 742 - unsigned long rb_subtree_last; 743 - } shared; 744 - #ifdef CONFIG_ANON_VMA_NAME 745 - /* 746 - * For private and shared anonymous mappings, a pointer to a null 747 - * terminated string containing the name given to the vma, or NULL if 748 - * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. 749 - */ 750 - struct anon_vma_name *anon_name; 751 - #endif 752 - struct vm_userfaultfd_ctx vm_userfaultfd_ctx; 753 - } __randomize_layout; 754 - 755 - struct vm_fault {}; 756 - 757 - struct vm_operations_struct { 758 - void (*open)(struct vm_area_struct * area); 759 - /** 760 - * @close: Called when the VMA is being removed from the MM. 761 - * Context: User context. May sleep. Caller holds mmap_lock. 762 - */ 763 - void (*close)(struct vm_area_struct * area); 764 - /* Called any time before splitting to check if it's allowed */ 765 - int (*may_split)(struct vm_area_struct *area, unsigned long addr); 766 - int (*mremap)(struct vm_area_struct *area); 767 - /* 768 - * Called by mprotect() to make driver-specific permission 769 - * checks before mprotect() is finalised. The VMA must not 770 - * be modified. Returns 0 if mprotect() can proceed. 771 - */ 772 - int (*mprotect)(struct vm_area_struct *vma, unsigned long start, 773 - unsigned long end, unsigned long newflags); 774 - vm_fault_t (*fault)(struct vm_fault *vmf); 775 - vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order); 776 - vm_fault_t (*map_pages)(struct vm_fault *vmf, 777 - pgoff_t start_pgoff, pgoff_t end_pgoff); 778 - unsigned long (*pagesize)(struct vm_area_struct * area); 779 - 780 - /* notification that a previously read-only page is about to become 781 - * writable, if an error is returned it will cause a SIGBUS */ 782 - vm_fault_t (*page_mkwrite)(struct vm_fault *vmf); 783 - 784 - /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */ 785 - vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf); 786 - 787 - /* called by access_process_vm when get_user_pages() fails, typically 788 - * for use by special VMAs. See also generic_access_phys() for a generic 789 - * implementation useful for any iomem mapping. 790 - */ 791 - int (*access)(struct vm_area_struct *vma, unsigned long addr, 792 - void *buf, int len, int write); 793 - 794 - /* Called by the /proc/PID/maps code to ask the vma whether it 795 - * has a special name. Returning non-NULL will also cause this 796 - * vma to be dumped unconditionally. */ 797 - const char *(*name)(struct vm_area_struct *vma); 798 - 799 - #ifdef CONFIG_NUMA 800 - /* 801 - * set_policy() op must add a reference to any non-NULL @new mempolicy 802 - * to hold the policy upon return. Caller should pass NULL @new to 803 - * remove a policy and fall back to surrounding context--i.e. do not 804 - * install a MPOL_DEFAULT policy, nor the task or system default 805 - * mempolicy. 806 - */ 807 - int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); 808 - 809 - /* 810 - * get_policy() op must add reference [mpol_get()] to any policy at 811 - * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure 812 - * in mm/mempolicy.c will do this automatically. 813 - * get_policy() must NOT add a ref if the policy at (vma,addr) is not 814 - * marked as MPOL_SHARED. vma policies are protected by the mmap_lock. 815 - * If no [shared/vma] mempolicy exists at the addr, get_policy() op 816 - * must return NULL--i.e., do not "fallback" to task or system default 817 - * policy. 818 - */ 819 - struct mempolicy *(*get_policy)(struct vm_area_struct *vma, 820 - unsigned long addr, pgoff_t *ilx); 821 - #endif 822 - #ifdef CONFIG_FIND_NORMAL_PAGE 823 - /* 824 - * Called by vm_normal_page() for special PTEs in @vma at @addr. This 825 - * allows for returning a "normal" page from vm_normal_page() even 826 - * though the PTE indicates that the "struct page" either does not exist 827 - * or should not be touched: "special". 828 - * 829 - * Do not add new users: this really only works when a "normal" page 830 - * was mapped, but then the PTE got changed to something weird (+ 831 - * marked special) that would not make pte_pfn() identify the originally 832 - * inserted page. 833 - */ 834 - struct page *(*find_normal_page)(struct vm_area_struct *vma, 835 - unsigned long addr); 836 - #endif /* CONFIG_FIND_NORMAL_PAGE */ 837 - }; 838 - 839 - struct vm_unmapped_area_info { 840 - #define VM_UNMAPPED_AREA_TOPDOWN 1 841 - unsigned long flags; 842 - unsigned long length; 843 - unsigned long low_limit; 844 - unsigned long high_limit; 845 - unsigned long align_mask; 846 - unsigned long align_offset; 847 - unsigned long start_gap; 848 - }; 849 - 850 - struct pagetable_move_control { 851 - struct vm_area_struct *old; /* Source VMA. */ 852 - struct vm_area_struct *new; /* Destination VMA. */ 853 - unsigned long old_addr; /* Address from which the move begins. */ 854 - unsigned long old_end; /* Exclusive address at which old range ends. */ 855 - unsigned long new_addr; /* Address to move page tables to. */ 856 - unsigned long len_in; /* Bytes to remap specified by user. */ 857 - 858 - bool need_rmap_locks; /* Do rmap locks need to be taken? */ 859 - bool for_stack; /* Is this an early temp stack being moved? */ 860 - }; 861 - 862 - #define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \ 863 - struct pagetable_move_control name = { \ 864 - .old = old_, \ 865 - .new = new_, \ 866 - .old_addr = old_addr_, \ 867 - .old_end = (old_addr_) + (len_), \ 868 - .new_addr = new_addr_, \ 869 - .len_in = len_, \ 870 - } 871 - 872 - static inline void vma_iter_invalidate(struct vma_iterator *vmi) 873 - { 874 - mas_pause(&vmi->mas); 875 - } 876 - 877 - static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) 878 - { 879 - return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot)); 880 - } 881 - 882 - static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags) 883 - { 884 - return __pgprot(vm_flags); 885 - } 886 - 887 - static inline bool is_shared_maywrite(vm_flags_t vm_flags) 888 - { 889 - return (vm_flags & (VM_SHARED | VM_MAYWRITE)) == 890 - (VM_SHARED | VM_MAYWRITE); 891 - } 892 - 893 - static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma) 894 - { 895 - return is_shared_maywrite(vma->vm_flags); 896 - } 897 - 898 - static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) 899 - { 900 - /* 901 - * Uses mas_find() to get the first VMA when the iterator starts. 902 - * Calling mas_next() could skip the first entry. 903 - */ 904 - return mas_find(&vmi->mas, ULONG_MAX); 905 - } 906 - 907 - /* 908 - * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these 909 - * assertions should be made either under mmap_write_lock or when the object 910 - * has been isolated under mmap_write_lock, ensuring no competing writers. 911 - */ 912 - static inline void vma_assert_attached(struct vm_area_struct *vma) 913 - { 914 - WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); 915 - } 916 - 917 - static inline void vma_assert_detached(struct vm_area_struct *vma) 918 - { 919 - WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); 920 - } 921 - 922 - static inline void vma_assert_write_locked(struct vm_area_struct *); 923 - static inline void vma_mark_attached(struct vm_area_struct *vma) 924 - { 925 - vma_assert_write_locked(vma); 926 - vma_assert_detached(vma); 927 - refcount_set_release(&vma->vm_refcnt, 1); 928 - } 929 - 930 - static inline void vma_mark_detached(struct vm_area_struct *vma) 931 - { 932 - vma_assert_write_locked(vma); 933 - vma_assert_attached(vma); 934 - /* We are the only writer, so no need to use vma_refcount_put(). */ 935 - if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) { 936 - /* 937 - * Reader must have temporarily raised vm_refcnt but it will 938 - * drop it without using the vma since vma is write-locked. 939 - */ 940 - } 941 - } 942 - 943 - extern const struct vm_operations_struct vma_dummy_vm_ops; 944 - 945 - extern unsigned long rlimit(unsigned int limit); 946 - 947 - static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) 948 - { 949 - memset(vma, 0, sizeof(*vma)); 950 - vma->vm_mm = mm; 951 - vma->vm_ops = &vma_dummy_vm_ops; 952 - INIT_LIST_HEAD(&vma->anon_vma_chain); 953 - vma->vm_lock_seq = UINT_MAX; 954 - } 955 - 956 - /* 957 - * These are defined in vma.h, but sadly vm_stat_account() is referenced by 958 - * kernel/fork.c, so we have to these broadly available there, and temporarily 959 - * define them here to resolve the dependency cycle. 960 - */ 961 - 962 - #define is_exec_mapping(flags) \ 963 - ((flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC) 964 - 965 - #define is_stack_mapping(flags) \ 966 - (((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK)) 967 - 968 - #define is_data_mapping(flags) \ 969 - ((flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE) 970 - 971 - static inline void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, 972 - long npages) 973 - { 974 - WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages); 975 - 976 - if (is_exec_mapping(flags)) 977 - mm->exec_vm += npages; 978 - else if (is_stack_mapping(flags)) 979 - mm->stack_vm += npages; 980 - else if (is_data_mapping(flags)) 981 - mm->data_vm += npages; 982 - } 983 - 984 - #undef is_exec_mapping 985 - #undef is_stack_mapping 986 - #undef is_data_mapping 987 - 988 - /* Currently stubbed but we may later wish to un-stub. */ 989 - static inline void vm_acct_memory(long pages); 990 - static inline void vm_unacct_memory(long pages) 991 - { 992 - vm_acct_memory(-pages); 993 - } 994 - 995 - static inline void mapping_allow_writable(struct address_space *mapping) 996 - { 997 - atomic_inc(&mapping->i_mmap_writable); 998 - } 999 - 1000 - static inline void vma_set_range(struct vm_area_struct *vma, 1001 - unsigned long start, unsigned long end, 1002 - pgoff_t pgoff) 1003 - { 1004 - vma->vm_start = start; 1005 - vma->vm_end = end; 1006 - vma->vm_pgoff = pgoff; 1007 - } 1008 - 1009 - static inline 1010 - struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) 1011 - { 1012 - return mas_find(&vmi->mas, max - 1); 1013 - } 1014 - 1015 - static inline int vma_iter_clear_gfp(struct vma_iterator *vmi, 1016 - unsigned long start, unsigned long end, gfp_t gfp) 1017 - { 1018 - __mas_set_range(&vmi->mas, start, end - 1); 1019 - mas_store_gfp(&vmi->mas, NULL, gfp); 1020 - if (unlikely(mas_is_err(&vmi->mas))) 1021 - return -ENOMEM; 1022 - 1023 - return 0; 1024 - } 1025 - 1026 - static inline void mmap_assert_locked(struct mm_struct *); 1027 - static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, 1028 - unsigned long start_addr, 1029 - unsigned long end_addr) 1030 - { 1031 - unsigned long index = start_addr; 1032 - 1033 - mmap_assert_locked(mm); 1034 - return mt_find(&mm->mm_mt, &index, end_addr - 1); 1035 - } 1036 - 1037 - static inline 1038 - struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) 1039 - { 1040 - return mtree_load(&mm->mm_mt, addr); 1041 - } 1042 - 1043 - static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi) 1044 - { 1045 - return mas_prev(&vmi->mas, 0); 1046 - } 1047 - 1048 - static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr) 1049 - { 1050 - mas_set(&vmi->mas, addr); 1051 - } 1052 - 1053 - static inline bool vma_is_anonymous(struct vm_area_struct *vma) 1054 - { 1055 - return !vma->vm_ops; 1056 - } 1057 - 1058 - /* Defined in vma.h, so temporarily define here to avoid circular dependency. */ 1059 - #define vma_iter_load(vmi) \ 1060 - mas_walk(&(vmi)->mas) 1061 - 1062 - static inline struct vm_area_struct * 1063 - find_vma_prev(struct mm_struct *mm, unsigned long addr, 1064 - struct vm_area_struct **pprev) 1065 - { 1066 - struct vm_area_struct *vma; 1067 - VMA_ITERATOR(vmi, mm, addr); 1068 - 1069 - vma = vma_iter_load(&vmi); 1070 - *pprev = vma_prev(&vmi); 1071 - if (!vma) 1072 - vma = vma_next(&vmi); 1073 - return vma; 1074 - } 1075 - 1076 - #undef vma_iter_load 1077 - 1078 - static inline void vma_iter_init(struct vma_iterator *vmi, 1079 - struct mm_struct *mm, unsigned long addr) 1080 - { 1081 - mas_init(&vmi->mas, &mm->mm_mt, addr); 1082 - } 1083 - 1084 - /* Stubbed functions. */ 1085 - 1086 - static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) 1087 - { 1088 - return NULL; 1089 - } 1090 - 1091 - static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, 1092 - struct vm_userfaultfd_ctx vm_ctx) 1093 - { 1094 - return true; 1095 - } 1096 - 1097 - static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, 1098 - struct anon_vma_name *anon_name2) 1099 - { 1100 - return true; 1101 - } 1102 - 1103 - static inline void might_sleep(void) 1104 - { 1105 - } 1106 - 1107 - static inline unsigned long vma_pages(struct vm_area_struct *vma) 1108 - { 1109 - return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 1110 - } 1111 - 1112 - static inline void fput(struct file *file) 1113 - { 1114 - } 1115 - 1116 - static inline void mpol_put(struct mempolicy *pol) 1117 - { 1118 - } 1119 - 1120 - static inline void lru_add_drain(void) 1121 - { 1122 - } 1123 - 1124 - static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm) 1125 - { 1126 - } 1127 - 1128 - static inline void update_hiwater_rss(struct mm_struct *mm) 1129 - { 1130 - } 1131 - 1132 - static inline void update_hiwater_vm(struct mm_struct *mm) 1133 - { 1134 - } 1135 - 1136 - static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, 1137 - struct vm_area_struct *vma, unsigned long start_addr, 1138 - unsigned long end_addr, unsigned long tree_end) 1139 - { 1140 - } 1141 - 1142 - static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, 1143 - struct vm_area_struct *vma, unsigned long floor, 1144 - unsigned long ceiling, bool mm_wr_locked) 1145 - { 1146 - } 1147 - 1148 - static inline void mapping_unmap_writable(struct address_space *mapping) 1149 - { 1150 - } 1151 - 1152 - static inline void flush_dcache_mmap_lock(struct address_space *mapping) 1153 - { 1154 - } 1155 - 1156 - static inline void tlb_finish_mmu(struct mmu_gather *tlb) 1157 - { 1158 - } 1159 - 1160 - static inline struct file *get_file(struct file *f) 1161 - { 1162 - return f; 1163 - } 1164 - 1165 - static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) 1166 - { 1167 - return 0; 1168 - } 1169 - 1170 - static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src, 1171 - enum vma_operation operation) 1172 - { 1173 - /* For testing purposes. We indicate that an anon_vma has been cloned. */ 1174 - if (src->anon_vma != NULL) { 1175 - dst->anon_vma = src->anon_vma; 1176 - dst->anon_vma->was_cloned = true; 1177 - } 1178 - 1179 - return 0; 1180 - } 1181 - 1182 - static inline void vma_start_write(struct vm_area_struct *vma) 1183 - { 1184 - /* Used to indicate to tests that a write operation has begun. */ 1185 - vma->vm_lock_seq++; 1186 - } 1187 - 1188 - static inline __must_check 1189 - int vma_start_write_killable(struct vm_area_struct *vma) 1190 - { 1191 - /* Used to indicate to tests that a write operation has begun. */ 1192 - vma->vm_lock_seq++; 1193 - return 0; 1194 - } 1195 - 1196 - static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, 1197 - unsigned long start, 1198 - unsigned long end, 1199 - struct vm_area_struct *next) 1200 - { 1201 - } 1202 - 1203 - static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {} 1204 - 1205 - static inline void vma_iter_free(struct vma_iterator *vmi) 1206 - { 1207 - mas_destroy(&vmi->mas); 1208 - } 1209 - 1210 - static inline 1211 - struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi) 1212 - { 1213 - return mas_next_range(&vmi->mas, ULONG_MAX); 1214 - } 1215 - 1216 - static inline void vm_acct_memory(long pages) 1217 - { 1218 - } 1219 - 1220 - static inline void vma_interval_tree_insert(struct vm_area_struct *vma, 1221 - struct rb_root_cached *rb) 1222 - { 1223 - } 1224 - 1225 - static inline void vma_interval_tree_remove(struct vm_area_struct *vma, 1226 - struct rb_root_cached *rb) 1227 - { 1228 - } 1229 - 1230 - static inline void flush_dcache_mmap_unlock(struct address_space *mapping) 1231 - { 1232 - } 1233 - 1234 - static inline void anon_vma_interval_tree_insert(struct anon_vma_chain *avc, 1235 - struct rb_root_cached *rb) 1236 - { 1237 - } 1238 - 1239 - static inline void anon_vma_interval_tree_remove(struct anon_vma_chain *avc, 1240 - struct rb_root_cached *rb) 1241 - { 1242 - } 1243 - 1244 - static inline void uprobe_mmap(struct vm_area_struct *vma) 1245 - { 1246 - } 1247 - 1248 - static inline void uprobe_munmap(struct vm_area_struct *vma, 1249 - unsigned long start, unsigned long end) 1250 - { 1251 - } 1252 - 1253 - static inline void i_mmap_lock_write(struct address_space *mapping) 1254 - { 1255 - } 1256 - 1257 - static inline void anon_vma_lock_write(struct anon_vma *anon_vma) 1258 - { 1259 - } 1260 - 1261 - static inline void vma_assert_write_locked(struct vm_area_struct *vma) 1262 - { 1263 - } 1264 - 1265 - static inline void unlink_anon_vmas(struct vm_area_struct *vma) 1266 - { 1267 - /* For testing purposes, indicate that the anon_vma was unlinked. */ 1268 - vma->anon_vma->was_unlinked = true; 1269 - } 1270 - 1271 - static inline void anon_vma_unlock_write(struct anon_vma *anon_vma) 1272 - { 1273 - } 1274 - 1275 - static inline void i_mmap_unlock_write(struct address_space *mapping) 1276 - { 1277 - } 1278 - 1279 - static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, 1280 - unsigned long start, 1281 - unsigned long end, 1282 - struct list_head *unmaps) 1283 - { 1284 - return 0; 1285 - } 1286 - 1287 - static inline void mmap_write_downgrade(struct mm_struct *mm) 1288 - { 1289 - } 1290 - 1291 - static inline void mmap_read_unlock(struct mm_struct *mm) 1292 - { 1293 - } 1294 - 1295 - static inline void mmap_write_unlock(struct mm_struct *mm) 1296 - { 1297 - } 1298 - 1299 - static inline int mmap_write_lock_killable(struct mm_struct *mm) 1300 - { 1301 - return 0; 1302 - } 1303 - 1304 - static inline bool can_modify_mm(struct mm_struct *mm, 1305 - unsigned long start, 1306 - unsigned long end) 1307 - { 1308 - return true; 1309 - } 1310 - 1311 - static inline void arch_unmap(struct mm_struct *mm, 1312 - unsigned long start, 1313 - unsigned long end) 1314 - { 1315 - } 1316 - 1317 - static inline void mmap_assert_locked(struct mm_struct *mm) 1318 - { 1319 - } 1320 - 1321 - static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) 1322 - { 1323 - return true; 1324 - } 1325 - 1326 - static inline void khugepaged_enter_vma(struct vm_area_struct *vma, 1327 - vm_flags_t vm_flags) 1328 - { 1329 - } 1330 - 1331 - static inline bool mapping_can_writeback(struct address_space *mapping) 1332 - { 1333 - return true; 1334 - } 1335 - 1336 - static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) 1337 - { 1338 - return false; 1339 - } 1340 - 1341 - static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) 1342 - { 1343 - return false; 1344 - } 1345 - 1346 - static inline bool userfaultfd_wp(struct vm_area_struct *vma) 1347 - { 1348 - return false; 1349 - } 1350 - 1351 - static inline void mmap_assert_write_locked(struct mm_struct *mm) 1352 - { 1353 - } 1354 - 1355 - static inline void mutex_lock(struct mutex *lock) 1356 - { 1357 - } 1358 - 1359 - static inline void mutex_unlock(struct mutex *lock) 1360 - { 1361 - } 1362 - 1363 - static inline bool mutex_is_locked(struct mutex *lock) 1364 - { 1365 - return true; 1366 - } 1367 - 1368 - static inline bool signal_pending(void *p) 1369 - { 1370 - return false; 1371 - } 1372 - 1373 - static inline bool is_file_hugepages(struct file *file) 1374 - { 1375 - return false; 1376 - } 1377 - 1378 - static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) 1379 - { 1380 - return 0; 1381 - } 1382 - 1383 - static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, 1384 - unsigned long npages) 1385 - { 1386 - return true; 1387 - } 1388 - 1389 - static inline int shmem_zero_setup(struct vm_area_struct *vma) 1390 - { 1391 - return 0; 1392 - } 1393 - 1394 - static inline void vma_set_anonymous(struct vm_area_struct *vma) 1395 - { 1396 - vma->vm_ops = NULL; 1397 - } 1398 - 1399 - static inline void ksm_add_vma(struct vm_area_struct *vma) 1400 - { 1401 - } 1402 - 1403 - static inline void perf_event_mmap(struct vm_area_struct *vma) 1404 - { 1405 - } 1406 - 1407 - static inline bool vma_is_dax(struct vm_area_struct *vma) 1408 - { 1409 - return false; 1410 - } 1411 - 1412 - static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm) 1413 - { 1414 - return NULL; 1415 - } 1416 - 1417 - bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); 1418 - 1419 - /* Update vma->vm_page_prot to reflect vma->vm_flags. */ 1420 - static inline void vma_set_page_prot(struct vm_area_struct *vma) 1421 - { 1422 - vm_flags_t vm_flags = vma->vm_flags; 1423 - pgprot_t vm_page_prot; 1424 - 1425 - /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */ 1426 - vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags)); 1427 - 1428 - if (vma_wants_writenotify(vma, vm_page_prot)) { 1429 - vm_flags &= ~VM_SHARED; 1430 - /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */ 1431 - vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags)); 1432 - } 1433 - /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */ 1434 - WRITE_ONCE(vma->vm_page_prot, vm_page_prot); 1435 - } 1436 - 1437 - static inline bool arch_validate_flags(vm_flags_t flags) 1438 - { 1439 - return true; 1440 - } 1441 - 1442 - static inline void vma_close(struct vm_area_struct *vma) 1443 - { 1444 - } 1445 - 1446 - static inline int mmap_file(struct file *file, struct vm_area_struct *vma) 1447 - { 1448 - return 0; 1449 - } 1450 - 1451 - static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma) 1452 - { 1453 - if (vma->vm_flags & VM_GROWSDOWN) 1454 - return stack_guard_gap; 1455 - 1456 - /* See reasoning around the VM_SHADOW_STACK definition */ 1457 - if (vma->vm_flags & VM_SHADOW_STACK) 1458 - return PAGE_SIZE; 1459 - 1460 - return 0; 1461 - } 1462 - 1463 - static inline unsigned long vm_start_gap(struct vm_area_struct *vma) 1464 - { 1465 - unsigned long gap = stack_guard_start_gap(vma); 1466 - unsigned long vm_start = vma->vm_start; 1467 - 1468 - vm_start -= gap; 1469 - if (vm_start > vma->vm_start) 1470 - vm_start = 0; 1471 - return vm_start; 1472 - } 1473 - 1474 - static inline unsigned long vm_end_gap(struct vm_area_struct *vma) 1475 - { 1476 - unsigned long vm_end = vma->vm_end; 1477 - 1478 - if (vma->vm_flags & VM_GROWSUP) { 1479 - vm_end += stack_guard_gap; 1480 - if (vm_end < vma->vm_end) 1481 - vm_end = -PAGE_SIZE; 1482 - } 1483 - return vm_end; 1484 - } 1485 - 1486 - static inline int is_hugepage_only_range(struct mm_struct *mm, 1487 - unsigned long addr, unsigned long len) 1488 - { 1489 - return 0; 1490 - } 1491 - 1492 - static inline bool vma_is_accessible(struct vm_area_struct *vma) 1493 - { 1494 - return vma->vm_flags & VM_ACCESS_FLAGS; 1495 - } 1496 - 1497 - static inline bool capable(int cap) 1498 - { 1499 - return true; 1500 - } 1501 - 1502 - static inline bool mlock_future_ok(const struct mm_struct *mm, 1503 - vm_flags_t vm_flags, unsigned long bytes) 1504 - { 1505 - unsigned long locked_pages, limit_pages; 1506 - 1507 - if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK)) 1508 - return true; 1509 - 1510 - locked_pages = bytes >> PAGE_SHIFT; 1511 - locked_pages += mm->locked_vm; 1512 - 1513 - limit_pages = rlimit(RLIMIT_MEMLOCK); 1514 - limit_pages >>= PAGE_SHIFT; 1515 - 1516 - return locked_pages <= limit_pages; 1517 - } 1518 - 1519 - static inline int __anon_vma_prepare(struct vm_area_struct *vma) 1520 - { 1521 - struct anon_vma *anon_vma = calloc(1, sizeof(struct anon_vma)); 1522 - 1523 - if (!anon_vma) 1524 - return -ENOMEM; 1525 - 1526 - anon_vma->root = anon_vma; 1527 - vma->anon_vma = anon_vma; 1528 - 1529 - return 0; 1530 - } 1531 - 1532 - static inline int anon_vma_prepare(struct vm_area_struct *vma) 1533 - { 1534 - if (likely(vma->anon_vma)) 1535 - return 0; 1536 - 1537 - return __anon_vma_prepare(vma); 1538 - } 1539 - 1540 - static inline void userfaultfd_unmap_complete(struct mm_struct *mm, 1541 - struct list_head *uf) 1542 - { 1543 - } 1544 - 1545 - #define ACCESS_PRIVATE(p, member) ((p)->member) 1546 - 1547 - #define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE) 1548 - 1549 - static __always_inline void bitmap_zero(unsigned long *dst, unsigned int nbits) 1550 - { 1551 - unsigned int len = bitmap_size(nbits); 1552 - 1553 - if (small_const_nbits(nbits)) 1554 - *dst = 0; 1555 - else 1556 - memset(dst, 0, len); 1557 - } 1558 - 1559 - static inline bool mm_flags_test(int flag, const struct mm_struct *mm) 1560 - { 1561 - return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); 1562 - } 1563 - 1564 - /* Clears all bits in the VMA flags bitmap, non-atomically. */ 1565 - static inline void vma_flags_clear_all(vma_flags_t *flags) 1566 - { 1567 - bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS); 1568 - } 1569 - 1570 - /* 1571 - * Copy value to the first system word of VMA flags, non-atomically. 1572 - * 1573 - * IMPORTANT: This does not overwrite bytes past the first system word. The 1574 - * caller must account for this. 1575 - */ 1576 - static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value) 1577 - { 1578 - *ACCESS_PRIVATE(flags, __vma_flags) = value; 1579 - } 1580 - 1581 - /* 1582 - * Copy value to the first system word of VMA flags ONCE, non-atomically. 1583 - * 1584 - * IMPORTANT: This does not overwrite bytes past the first system word. The 1585 - * caller must account for this. 1586 - */ 1587 - static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value) 1588 - { 1589 - unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); 1590 - 1591 - WRITE_ONCE(*bitmap, value); 1592 - } 1593 - 1594 - /* Update the first system word of VMA flags setting bits, non-atomically. */ 1595 - static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value) 1596 - { 1597 - unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); 1598 - 1599 - *bitmap |= value; 1600 - } 1601 - 1602 - /* Update the first system word of VMA flags clearing bits, non-atomically. */ 1603 - static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value) 1604 - { 1605 - unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags); 1606 - 1607 - *bitmap &= ~value; 1608 - } 1609 - 1610 - 1611 - /* Use when VMA is not part of the VMA tree and needs no locking */ 1612 - static inline void vm_flags_init(struct vm_area_struct *vma, 1613 - vm_flags_t flags) 1614 - { 1615 - vma_flags_clear_all(&vma->flags); 1616 - vma_flags_overwrite_word(&vma->flags, flags); 1617 - } 1618 - 1619 - /* 1620 - * Use when VMA is part of the VMA tree and modifications need coordination 1621 - * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and 1622 - * it should be locked explicitly beforehand. 1623 - */ 1624 - static inline void vm_flags_reset(struct vm_area_struct *vma, 1625 - vm_flags_t flags) 1626 - { 1627 - vma_assert_write_locked(vma); 1628 - vm_flags_init(vma, flags); 1629 - } 1630 - 1631 - static inline void vm_flags_reset_once(struct vm_area_struct *vma, 1632 - vm_flags_t flags) 1633 - { 1634 - vma_assert_write_locked(vma); 1635 - /* 1636 - * The user should only be interested in avoiding reordering of 1637 - * assignment to the first word. 1638 - */ 1639 - vma_flags_clear_all(&vma->flags); 1640 - vma_flags_overwrite_word_once(&vma->flags, flags); 1641 - } 1642 - 1643 - static inline void vm_flags_set(struct vm_area_struct *vma, 1644 - vm_flags_t flags) 1645 - { 1646 - vma_start_write(vma); 1647 - vma_flags_set_word(&vma->flags, flags); 1648 - } 1649 - 1650 - static inline void vm_flags_clear(struct vm_area_struct *vma, 1651 - vm_flags_t flags) 1652 - { 1653 - vma_start_write(vma); 1654 - vma_flags_clear_word(&vma->flags, flags); 1655 - } 1656 - 1657 - /* 1658 - * Denies creating a writable executable mapping or gaining executable permissions. 1659 - * 1660 - * This denies the following: 1661 - * 1662 - * a) mmap(PROT_WRITE | PROT_EXEC) 1663 - * 1664 - * b) mmap(PROT_WRITE) 1665 - * mprotect(PROT_EXEC) 1666 - * 1667 - * c) mmap(PROT_WRITE) 1668 - * mprotect(PROT_READ) 1669 - * mprotect(PROT_EXEC) 1670 - * 1671 - * But allows the following: 1672 - * 1673 - * d) mmap(PROT_READ | PROT_EXEC) 1674 - * mmap(PROT_READ | PROT_EXEC | PROT_BTI) 1675 - * 1676 - * This is only applicable if the user has set the Memory-Deny-Write-Execute 1677 - * (MDWE) protection mask for the current process. 1678 - * 1679 - * @old specifies the VMA flags the VMA originally possessed, and @new the ones 1680 - * we propose to set. 1681 - * 1682 - * Return: false if proposed change is OK, true if not ok and should be denied. 1683 - */ 1684 - static inline bool map_deny_write_exec(unsigned long old, unsigned long new) 1685 - { 1686 - /* If MDWE is disabled, we have nothing to deny. */ 1687 - if (mm_flags_test(MMF_HAS_MDWE, current->mm)) 1688 - return false; 1689 - 1690 - /* If the new VMA is not executable, we have nothing to deny. */ 1691 - if (!(new & VM_EXEC)) 1692 - return false; 1693 - 1694 - /* Under MDWE we do not accept newly writably executable VMAs... */ 1695 - if (new & VM_WRITE) 1696 - return true; 1697 - 1698 - /* ...nor previously non-executable VMAs becoming executable. */ 1699 - if (!(old & VM_EXEC)) 1700 - return true; 1701 - 1702 - return false; 1703 - } 1704 - 1705 - static inline int mapping_map_writable(struct address_space *mapping) 1706 - { 1707 - return atomic_inc_unless_negative(&mapping->i_mmap_writable) ? 1708 - 0 : -EPERM; 1709 - } 1710 - 1711 - static inline unsigned long move_page_tables(struct pagetable_move_control *pmc) 1712 - { 1713 - return 0; 1714 - } 1715 - 1716 - static inline void free_pgd_range(struct mmu_gather *tlb, 1717 - unsigned long addr, unsigned long end, 1718 - unsigned long floor, unsigned long ceiling) 1719 - { 1720 - } 1721 - 1722 - static inline int ksm_execve(struct mm_struct *mm) 1723 - { 1724 - return 0; 1725 - } 1726 - 1727 - static inline void ksm_exit(struct mm_struct *mm) 1728 - { 1729 - } 1730 - 1731 - static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) 1732 - { 1733 - if (reset_refcnt) 1734 - refcount_set(&vma->vm_refcnt, 0); 1735 - } 1736 - 1737 - static inline void vma_numab_state_init(struct vm_area_struct *vma) 1738 - { 1739 - } 1740 - 1741 - static inline void vma_numab_state_free(struct vm_area_struct *vma) 1742 - { 1743 - } 1744 - 1745 - static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, 1746 - struct vm_area_struct *new_vma) 1747 - { 1748 - } 1749 - 1750 - static inline void free_anon_vma_name(struct vm_area_struct *vma) 1751 - { 1752 - } 1753 - 1754 - /* Declared in vma.h. */ 1755 - static inline void set_vma_from_desc(struct vm_area_struct *vma, 1756 - struct vm_area_desc *desc); 1757 - 1758 - static inline void mmap_action_prepare(struct mmap_action *action, 1759 - struct vm_area_desc *desc) 1760 - { 1761 - } 1762 - 1763 - static inline int mmap_action_complete(struct mmap_action *action, 1764 - struct vm_area_struct *vma) 1765 - { 1766 - return 0; 1767 - } 1768 - 1769 - static inline int __compat_vma_mmap(const struct file_operations *f_op, 1770 - struct file *file, struct vm_area_struct *vma) 1771 - { 1772 - struct vm_area_desc desc = { 1773 - .mm = vma->vm_mm, 1774 - .file = file, 1775 - .start = vma->vm_start, 1776 - .end = vma->vm_end, 1777 - 1778 - .pgoff = vma->vm_pgoff, 1779 - .vm_file = vma->vm_file, 1780 - .vm_flags = vma->vm_flags, 1781 - .page_prot = vma->vm_page_prot, 1782 - 1783 - .action.type = MMAP_NOTHING, /* Default */ 1784 - }; 1785 - int err; 1786 - 1787 - err = f_op->mmap_prepare(&desc); 1788 - if (err) 1789 - return err; 1790 - 1791 - mmap_action_prepare(&desc.action, &desc); 1792 - set_vma_from_desc(vma, &desc); 1793 - return mmap_action_complete(&desc.action, vma); 1794 - } 1795 - 1796 - static inline int compat_vma_mmap(struct file *file, 1797 - struct vm_area_struct *vma) 1798 - { 1799 - return __compat_vma_mmap(file->f_op, file, vma); 1800 - } 1801 - 1802 - /* Did the driver provide valid mmap hook configuration? */ 1803 - static inline bool can_mmap_file(struct file *file) 1804 - { 1805 - bool has_mmap = file->f_op->mmap; 1806 - bool has_mmap_prepare = file->f_op->mmap_prepare; 1807 - 1808 - /* Hooks are mutually exclusive. */ 1809 - if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) 1810 - return false; 1811 - if (!has_mmap && !has_mmap_prepare) 1812 - return false; 1813 - 1814 - return true; 1815 - } 1816 - 1817 - static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) 1818 - { 1819 - if (file->f_op->mmap_prepare) 1820 - return compat_vma_mmap(file, vma); 1821 - 1822 - return file->f_op->mmap(file, vma); 1823 - } 1824 - 1825 - static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc) 1826 - { 1827 - return file->f_op->mmap_prepare(desc); 1828 - } 1829 - 1830 - static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) 1831 - { 1832 - } 1833 - 1834 - static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) 1835 - { 1836 - /* Changing an anonymous vma with this is illegal */ 1837 - get_file(file); 1838 - swap(vma->vm_file, file); 1839 - fput(file); 1840 - } 1841 - 1842 - static inline bool shmem_file(struct file *file) 1843 - { 1844 - return false; 1845 - } 1846 - 1847 - static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm, 1848 - const struct file *file, vm_flags_t vm_flags) 1849 - { 1850 - return vm_flags; 1851 - } 1852 - 1853 - static inline void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn) 1854 - { 1855 - } 1856 - 1857 - static inline int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr, 1858 - unsigned long pfn, unsigned long size, pgprot_t pgprot) 1859 - { 1860 - return 0; 1861 - } 1862 - 1863 - static inline int do_munmap(struct mm_struct *, unsigned long, size_t, 1864 - struct list_head *uf) 1865 - { 1866 - return 0; 1867 - } 46 + typedef unsigned long vm_flags_t; 47 + #define pgoff_t unsigned long 48 + typedef unsigned long pgprotval_t; 49 + typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; 50 + typedef __bitwise unsigned int vm_fault_t; 51 + 52 + #include "include/stubs.h" 53 + #include "include/dup.h" 54 + #include "include/custom.h" 1868 55 1869 56 #endif /* __MM_VMA_INTERNAL_H */