Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
"8 patches.

Subsystems affected by this patch series: mm (hugetlb, pagemap, and
userfaultfd), memfd, selftests, and kconfig"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
configs/debug: set CONFIG_DEBUG_INFO=y properly
proc: fix documentation and description of pagemap
kselftest/vm: fix tests build with old libc
memfd: fix F_SEAL_WRITE after shmem huge page allocated
mm: fix use-after-free when anon vma name is used after vma is freed
mm: prevent vm_area_struct::anon_name refcount saturation
mm: refactor vm_area_struct::anon_vma_name usage code
selftests/vm: cleanup hugetlb file after mremap test

Changed files
+196 -139
Documentation
admin-guide
fs
include
kernel
mm
tools
+1 -1
Documentation/admin-guide/mm/pagemap.rst
··· 23 * Bit 56 page exclusively mapped (since 4.2) 24 * Bit 57 pte is uffd-wp write-protected (since 5.13) (see 25 :ref:`Documentation/admin-guide/mm/userfaultfd.rst <userfaultfd>`) 26 - * Bits 57-60 zero 27 * Bit 61 page is file-page or shared-anon (since 3.5) 28 * Bit 62 page swapped 29 * Bit 63 page present
··· 23 * Bit 56 page exclusively mapped (since 4.2) 24 * Bit 57 pte is uffd-wp write-protected (since 5.13) (see 25 :ref:`Documentation/admin-guide/mm/userfaultfd.rst <userfaultfd>`) 26 + * Bits 58-60 zero 27 * Bit 61 page is file-page or shared-anon (since 3.5) 28 * Bit 62 page swapped 29 * Bit 63 page present
+5 -4
fs/proc/task_mmu.c
··· 309 310 name = arch_vma_name(vma); 311 if (!name) { 312 - const char *anon_name; 313 314 if (!mm) { 315 name = "[vdso]"; ··· 327 goto done; 328 } 329 330 - anon_name = vma_anon_name(vma); 331 if (anon_name) { 332 seq_pad(m, ' '); 333 - seq_printf(m, "[anon:%s]", anon_name); 334 } 335 } 336 ··· 1597 * Bits 5-54 swap offset if swapped 1598 * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst) 1599 * Bit 56 page exclusively mapped 1600 - * Bits 57-60 zero 1601 * Bit 61 page is file-page or shared-anon 1602 * Bit 62 page swapped 1603 * Bit 63 page present
··· 309 310 name = arch_vma_name(vma); 311 if (!name) { 312 + struct anon_vma_name *anon_name; 313 314 if (!mm) { 315 name = "[vdso]"; ··· 327 goto done; 328 } 329 330 + anon_name = anon_vma_name(vma); 331 if (anon_name) { 332 seq_pad(m, ' '); 333 + seq_printf(m, "[anon:%s]", anon_name->name); 334 } 335 } 336 ··· 1597 * Bits 5-54 swap offset if swapped 1598 * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst) 1599 * Bit 56 page exclusively mapped 1600 + * Bit 57 pte is uffd-wp write-protected 1601 + * Bits 58-60 zero 1602 * Bit 61 page is file-page or shared-anon 1603 * Bit 62 page swapped 1604 * Bit 63 page present
+3 -3
fs/userfaultfd.c
··· 878 new_flags, vma->anon_vma, 879 vma->vm_file, vma->vm_pgoff, 880 vma_policy(vma), 881 - NULL_VM_UFFD_CTX, vma_anon_name(vma)); 882 if (prev) 883 vma = prev; 884 else ··· 1438 vma->anon_vma, vma->vm_file, vma->vm_pgoff, 1439 vma_policy(vma), 1440 ((struct vm_userfaultfd_ctx){ ctx }), 1441 - vma_anon_name(vma)); 1442 if (prev) { 1443 vma = prev; 1444 goto next; ··· 1615 prev = vma_merge(mm, prev, start, vma_end, new_flags, 1616 vma->anon_vma, vma->vm_file, vma->vm_pgoff, 1617 vma_policy(vma), 1618 - NULL_VM_UFFD_CTX, vma_anon_name(vma)); 1619 if (prev) { 1620 vma = prev; 1621 goto next;
··· 878 new_flags, vma->anon_vma, 879 vma->vm_file, vma->vm_pgoff, 880 vma_policy(vma), 881 + NULL_VM_UFFD_CTX, anon_vma_name(vma)); 882 if (prev) 883 vma = prev; 884 else ··· 1438 vma->anon_vma, vma->vm_file, vma->vm_pgoff, 1439 vma_policy(vma), 1440 ((struct vm_userfaultfd_ctx){ ctx }), 1441 + anon_vma_name(vma)); 1442 if (prev) { 1443 vma = prev; 1444 goto next; ··· 1615 prev = vma_merge(mm, prev, start, vma_end, new_flags, 1616 vma->anon_vma, vma->vm_file, vma->vm_pgoff, 1617 vma_policy(vma), 1618 + NULL_VM_UFFD_CTX, anon_vma_name(vma)); 1619 if (prev) { 1620 vma = prev; 1621 goto next;
+4 -3
include/linux/mm.h
··· 2626 extern struct vm_area_struct *vma_merge(struct mm_struct *, 2627 struct vm_area_struct *prev, unsigned long addr, unsigned long end, 2628 unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, 2629 - struct mempolicy *, struct vm_userfaultfd_ctx, const char *); 2630 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); 2631 extern int __split_vma(struct mm_struct *, struct vm_area_struct *, 2632 unsigned long addr, int new_below); ··· 3372 3373 #ifdef CONFIG_ANON_VMA_NAME 3374 int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, 3375 - unsigned long len_in, const char *name); 3376 #else 3377 static inline int 3378 madvise_set_anon_name(struct mm_struct *mm, unsigned long start, 3379 - unsigned long len_in, const char *name) { 3380 return 0; 3381 } 3382 #endif
··· 2626 extern struct vm_area_struct *vma_merge(struct mm_struct *, 2627 struct vm_area_struct *prev, unsigned long addr, unsigned long end, 2628 unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, 2629 + struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *); 2630 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); 2631 extern int __split_vma(struct mm_struct *, struct vm_area_struct *, 2632 unsigned long addr, int new_below); ··· 3372 3373 #ifdef CONFIG_ANON_VMA_NAME 3374 int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, 3375 + unsigned long len_in, 3376 + struct anon_vma_name *anon_name); 3377 #else 3378 static inline int 3379 madvise_set_anon_name(struct mm_struct *mm, unsigned long start, 3380 + unsigned long len_in, struct anon_vma_name *anon_name) { 3381 return 0; 3382 } 3383 #endif
+70 -29
include/linux/mm_inline.h
··· 140 141 #ifdef CONFIG_ANON_VMA_NAME 142 /* 143 - * mmap_lock should be read-locked when calling vma_anon_name() and while using 144 - * the returned pointer. 145 */ 146 - extern const char *vma_anon_name(struct vm_area_struct *vma); 147 - 148 - /* 149 - * mmap_lock should be read-locked for orig_vma->vm_mm. 150 - * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be 151 - * isolated. 152 - */ 153 - extern void dup_vma_anon_name(struct vm_area_struct *orig_vma, 154 - struct vm_area_struct *new_vma); 155 - 156 - /* 157 - * mmap_lock should be write-locked or vma should have been isolated under 158 - * write-locked mmap_lock protection. 159 - */ 160 - extern void free_vma_anon_name(struct vm_area_struct *vma); 161 162 /* mmap_lock should be read-locked */ 163 - static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, 164 - const char *name) 165 { 166 - const char *vma_name = vma_anon_name(vma); 167 168 - /* either both NULL, or pointers to same string */ 169 - if (vma_name == name) 170 return true; 171 172 - return name && vma_name && !strcmp(name, vma_name); 173 } 174 #else /* CONFIG_ANON_VMA_NAME */ 175 - static inline const char *vma_anon_name(struct vm_area_struct *vma) 176 { 177 return NULL; 178 } 179 - static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma, 180 - struct vm_area_struct *new_vma) {} 181 - static inline void free_vma_anon_name(struct vm_area_struct *vma) {} 182 - static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, 183 - const char *name) 184 { 185 return true; 186 } 187 #endif /* CONFIG_ANON_VMA_NAME */ 188 189 static inline void init_tlb_flush_pending(struct mm_struct *mm)
··· 140 141 #ifdef CONFIG_ANON_VMA_NAME 142 /* 143 + * mmap_lock should be read-locked when calling anon_vma_name(). Caller should 144 + * either keep holding the lock while using the returned pointer or it should 145 + * raise anon_vma_name refcount before releasing the lock. 146 */ 147 + extern struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma); 148 + extern struct anon_vma_name *anon_vma_name_alloc(const char *name); 149 + extern void anon_vma_name_free(struct kref *kref); 150 151 /* mmap_lock should be read-locked */ 152 + static inline void anon_vma_name_get(struct anon_vma_name *anon_name) 153 { 154 + if (anon_name) 155 + kref_get(&anon_name->kref); 156 + } 157 158 + static inline void anon_vma_name_put(struct anon_vma_name *anon_name) 159 + { 160 + if (anon_name) 161 + kref_put(&anon_name->kref, anon_vma_name_free); 162 + } 163 + 164 + static inline 165 + struct anon_vma_name *anon_vma_name_reuse(struct anon_vma_name *anon_name) 166 + { 167 + /* Prevent anon_name refcount saturation early on */ 168 + if (kref_read(&anon_name->kref) < REFCOUNT_MAX) { 169 + anon_vma_name_get(anon_name); 170 + return anon_name; 171 + 172 + } 173 + return anon_vma_name_alloc(anon_name->name); 174 + } 175 + 176 + static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, 177 + struct vm_area_struct *new_vma) 178 + { 179 + struct anon_vma_name *anon_name = anon_vma_name(orig_vma); 180 + 181 + if (anon_name) 182 + new_vma->anon_name = anon_vma_name_reuse(anon_name); 183 + } 184 + 185 + static inline void free_anon_vma_name(struct vm_area_struct *vma) 186 + { 187 + /* 188 + * Not using anon_vma_name because it generates a warning if mmap_lock 189 + * is not held, which might be the case here. 190 + */ 191 + if (!vma->vm_file) 192 + anon_vma_name_put(vma->anon_name); 193 + } 194 + 195 + static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, 196 + struct anon_vma_name *anon_name2) 197 + { 198 + if (anon_name1 == anon_name2) 199 return true; 200 201 + return anon_name1 && anon_name2 && 202 + !strcmp(anon_name1->name, anon_name2->name); 203 } 204 + 205 #else /* CONFIG_ANON_VMA_NAME */ 206 + static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) 207 { 208 return NULL; 209 } 210 + 211 + static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) 212 + { 213 + return NULL; 214 + } 215 + 216 + static inline void anon_vma_name_get(struct anon_vma_name *anon_name) {} 217 + static inline void anon_vma_name_put(struct anon_vma_name *anon_name) {} 218 + static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, 219 + struct vm_area_struct *new_vma) {} 220 + static inline void free_anon_vma_name(struct vm_area_struct *vma) {} 221 + 222 + static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, 223 + struct anon_vma_name *anon_name2) 224 { 225 return true; 226 } 227 + 228 #endif /* CONFIG_ANON_VMA_NAME */ 229 230 static inline void init_tlb_flush_pending(struct mm_struct *mm)
+4 -1
include/linux/mm_types.h
··· 416 struct rb_node rb; 417 unsigned long rb_subtree_last; 418 } shared; 419 - /* Serialized by mmap_sem. */ 420 struct anon_vma_name *anon_name; 421 }; 422
··· 416 struct rb_node rb; 417 unsigned long rb_subtree_last; 418 } shared; 419 + /* 420 + * Serialized by mmap_sem. Never use directly because it is 421 + * valid only when vm_file is NULL. Use anon_vma_name instead. 422 + */ 423 struct anon_vma_name *anon_name; 424 }; 425
+1 -1
kernel/configs/debug.config
··· 16 # 17 # Compile-time checks and compiler options 18 # 19 - CONFIG_DEBUG_INFO=y 20 CONFIG_DEBUG_SECTION_MISMATCH=y 21 CONFIG_FRAME_WARN=2048 22 CONFIG_SECTION_MISMATCH_WARN_ONLY=y
··· 16 # 17 # Compile-time checks and compiler options 18 # 19 + CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y 20 CONFIG_DEBUG_SECTION_MISMATCH=y 21 CONFIG_FRAME_WARN=2048 22 CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+2 -2
kernel/fork.c
··· 366 *new = data_race(*orig); 367 INIT_LIST_HEAD(&new->anon_vma_chain); 368 new->vm_next = new->vm_prev = NULL; 369 - dup_vma_anon_name(orig, new); 370 } 371 return new; 372 } 373 374 void vm_area_free(struct vm_area_struct *vma) 375 { 376 - free_vma_anon_name(vma); 377 kmem_cache_free(vm_area_cachep, vma); 378 } 379
··· 366 *new = data_race(*orig); 367 INIT_LIST_HEAD(&new->anon_vma_chain); 368 new->vm_next = new->vm_prev = NULL; 369 + dup_anon_vma_name(orig, new); 370 } 371 return new; 372 } 373 374 void vm_area_free(struct vm_area_struct *vma) 375 { 376 + free_anon_vma_name(vma); 377 kmem_cache_free(vm_area_cachep, vma); 378 } 379
+12 -7
kernel/sys.c
··· 7 8 #include <linux/export.h> 9 #include <linux/mm.h> 10 #include <linux/utsname.h> 11 #include <linux/mman.h> 12 #include <linux/reboot.h> ··· 2287 { 2288 struct mm_struct *mm = current->mm; 2289 const char __user *uname; 2290 - char *name, *pch; 2291 int error; 2292 2293 switch (opt) { 2294 case PR_SET_VMA_ANON_NAME: 2295 uname = (const char __user *)arg; 2296 if (uname) { 2297 - name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN); 2298 2299 if (IS_ERR(name)) 2300 return PTR_ERR(name); 2301 ··· 2306 return -EINVAL; 2307 } 2308 } 2309 - } else { 2310 - /* Reset the name */ 2311 - name = NULL; 2312 } 2313 2314 mmap_write_lock(mm); 2315 - error = madvise_set_anon_name(mm, addr, size, name); 2316 mmap_write_unlock(mm); 2317 - kfree(name); 2318 break; 2319 default: 2320 error = -EINVAL;
··· 7 8 #include <linux/export.h> 9 #include <linux/mm.h> 10 + #include <linux/mm_inline.h> 11 #include <linux/utsname.h> 12 #include <linux/mman.h> 13 #include <linux/reboot.h> ··· 2286 { 2287 struct mm_struct *mm = current->mm; 2288 const char __user *uname; 2289 + struct anon_vma_name *anon_name = NULL; 2290 int error; 2291 2292 switch (opt) { 2293 case PR_SET_VMA_ANON_NAME: 2294 uname = (const char __user *)arg; 2295 if (uname) { 2296 + char *name, *pch; 2297 2298 + name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN); 2299 if (IS_ERR(name)) 2300 return PTR_ERR(name); 2301 ··· 2304 return -EINVAL; 2305 } 2306 } 2307 + /* anon_vma has its own copy */ 2308 + anon_name = anon_vma_name_alloc(name); 2309 + kfree(name); 2310 + if (!anon_name) 2311 + return -ENOMEM; 2312 + 2313 } 2314 2315 mmap_write_lock(mm); 2316 + error = madvise_set_anon_name(mm, addr, size, anon_name); 2317 mmap_write_unlock(mm); 2318 + anon_vma_name_put(anon_name); 2319 break; 2320 default: 2321 error = -EINVAL;
+34 -58
mm/madvise.c
··· 65 } 66 67 #ifdef CONFIG_ANON_VMA_NAME 68 - static struct anon_vma_name *anon_vma_name_alloc(const char *name) 69 { 70 struct anon_vma_name *anon_name; 71 size_t count; ··· 81 return anon_name; 82 } 83 84 - static void vma_anon_name_free(struct kref *kref) 85 { 86 struct anon_vma_name *anon_name = 87 container_of(kref, struct anon_vma_name, kref); 88 kfree(anon_name); 89 } 90 91 - static inline bool has_vma_anon_name(struct vm_area_struct *vma) 92 { 93 - return !vma->vm_file && vma->anon_name; 94 - } 95 - 96 - const char *vma_anon_name(struct vm_area_struct *vma) 97 - { 98 - if (!has_vma_anon_name(vma)) 99 - return NULL; 100 - 101 mmap_assert_locked(vma->vm_mm); 102 103 - return vma->anon_name->name; 104 - } 105 106 - void dup_vma_anon_name(struct vm_area_struct *orig_vma, 107 - struct vm_area_struct *new_vma) 108 - { 109 - if (!has_vma_anon_name(orig_vma)) 110 - return; 111 - 112 - kref_get(&orig_vma->anon_name->kref); 113 - new_vma->anon_name = orig_vma->anon_name; 114 - } 115 - 116 - void free_vma_anon_name(struct vm_area_struct *vma) 117 - { 118 - struct anon_vma_name *anon_name; 119 - 120 - if (!has_vma_anon_name(vma)) 121 - return; 122 - 123 - anon_name = vma->anon_name; 124 - vma->anon_name = NULL; 125 - kref_put(&anon_name->kref, vma_anon_name_free); 126 } 127 128 /* mmap_lock should be write-locked */ 129 - static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name) 130 { 131 - const char *anon_name; 132 133 - if (!name) { 134 - free_vma_anon_name(vma); 135 return 0; 136 } 137 138 - anon_name = vma_anon_name(vma); 139 - if (anon_name) { 140 - /* Same name, nothing to do here */ 141 - if (!strcmp(name, anon_name)) 142 - return 0; 143 144 - free_vma_anon_name(vma); 145 - } 146 - vma->anon_name = anon_vma_name_alloc(name); 147 - if (!vma->anon_name) 148 - return -ENOMEM; 149 150 return 0; 151 } 152 #else /* CONFIG_ANON_VMA_NAME */ 153 - static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name) 154 { 155 - if (name) 156 return -EINVAL; 157 158 return 0; ··· 131 /* 132 * Update the vm_flags on region of a vma, splitting it or merging it as 133 * necessary. Must be called with mmap_sem held for writing; 134 */ 135 static int madvise_update_vma(struct vm_area_struct *vma, 136 struct vm_area_struct **prev, unsigned long start, 137 unsigned long end, unsigned long new_flags, 138 - const char *name) 139 { 140 struct mm_struct *mm = vma->vm_mm; 141 int error; 142 pgoff_t pgoff; 143 144 - if (new_flags == vma->vm_flags && is_same_vma_anon_name(vma, name)) { 145 *prev = vma; 146 return 0; 147 } ··· 151 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 152 *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma, 153 vma->vm_file, pgoff, vma_policy(vma), 154 - vma->vm_userfaultfd_ctx, name); 155 if (*prev) { 156 vma = *prev; 157 goto success; ··· 181 */ 182 vma->vm_flags = new_flags; 183 if (!vma->vm_file) { 184 - error = replace_vma_anon_name(vma, name); 185 if (error) 186 return error; 187 } ··· 947 unsigned long behavior) 948 { 949 int error; 950 unsigned long new_flags = vma->vm_flags; 951 952 switch (behavior) { ··· 1013 break; 1014 } 1015 1016 error = madvise_update_vma(vma, prev, start, end, new_flags, 1017 - vma_anon_name(vma)); 1018 1019 out: 1020 /* ··· 1201 static int madvise_vma_anon_name(struct vm_area_struct *vma, 1202 struct vm_area_struct **prev, 1203 unsigned long start, unsigned long end, 1204 - unsigned long name) 1205 { 1206 int error; 1207 ··· 1210 return -EBADF; 1211 1212 error = madvise_update_vma(vma, prev, start, end, vma->vm_flags, 1213 - (const char *)name); 1214 1215 /* 1216 * madvise() returns EAGAIN if kernel resources, such as ··· 1222 } 1223 1224 int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, 1225 - unsigned long len_in, const char *name) 1226 { 1227 unsigned long end; 1228 unsigned long len; ··· 1242 if (end == start) 1243 return 0; 1244 1245 - return madvise_walk_vmas(mm, start, end, (unsigned long)name, 1246 madvise_vma_anon_name); 1247 } 1248 #endif /* CONFIG_ANON_VMA_NAME */
··· 65 } 66 67 #ifdef CONFIG_ANON_VMA_NAME 68 + struct anon_vma_name *anon_vma_name_alloc(const char *name) 69 { 70 struct anon_vma_name *anon_name; 71 size_t count; ··· 81 return anon_name; 82 } 83 84 + void anon_vma_name_free(struct kref *kref) 85 { 86 struct anon_vma_name *anon_name = 87 container_of(kref, struct anon_vma_name, kref); 88 kfree(anon_name); 89 } 90 91 + struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) 92 { 93 mmap_assert_locked(vma->vm_mm); 94 95 + if (vma->vm_file) 96 + return NULL; 97 98 + return vma->anon_name; 99 } 100 101 /* mmap_lock should be write-locked */ 102 + static int replace_anon_vma_name(struct vm_area_struct *vma, 103 + struct anon_vma_name *anon_name) 104 { 105 + struct anon_vma_name *orig_name = anon_vma_name(vma); 106 107 + if (!anon_name) { 108 + vma->anon_name = NULL; 109 + anon_vma_name_put(orig_name); 110 return 0; 111 } 112 113 + if (anon_vma_name_eq(orig_name, anon_name)) 114 + return 0; 115 116 + vma->anon_name = anon_vma_name_reuse(anon_name); 117 + anon_vma_name_put(orig_name); 118 119 return 0; 120 } 121 #else /* CONFIG_ANON_VMA_NAME */ 122 + static int replace_anon_vma_name(struct vm_area_struct *vma, 123 + struct anon_vma_name *anon_name) 124 { 125 + if (anon_name) 126 return -EINVAL; 127 128 return 0; ··· 161 /* 162 * Update the vm_flags on region of a vma, splitting it or merging it as 163 * necessary. Must be called with mmap_sem held for writing; 164 + * Caller should ensure anon_name stability by raising its refcount even when 165 + * anon_name belongs to a valid vma because this function might free that vma. 166 */ 167 static int madvise_update_vma(struct vm_area_struct *vma, 168 struct vm_area_struct **prev, unsigned long start, 169 unsigned long end, unsigned long new_flags, 170 + struct anon_vma_name *anon_name) 171 { 172 struct mm_struct *mm = vma->vm_mm; 173 int error; 174 pgoff_t pgoff; 175 176 + if (new_flags == vma->vm_flags && anon_vma_name_eq(anon_vma_name(vma), anon_name)) { 177 *prev = vma; 178 return 0; 179 } ··· 179 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 180 *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma, 181 vma->vm_file, pgoff, vma_policy(vma), 182 + vma->vm_userfaultfd_ctx, anon_name); 183 if (*prev) { 184 vma = *prev; 185 goto success; ··· 209 */ 210 vma->vm_flags = new_flags; 211 if (!vma->vm_file) { 212 + error = replace_anon_vma_name(vma, anon_name); 213 if (error) 214 return error; 215 } ··· 975 unsigned long behavior) 976 { 977 int error; 978 + struct anon_vma_name *anon_name; 979 unsigned long new_flags = vma->vm_flags; 980 981 switch (behavior) { ··· 1040 break; 1041 } 1042 1043 + anon_name = anon_vma_name(vma); 1044 + anon_vma_name_get(anon_name); 1045 error = madvise_update_vma(vma, prev, start, end, new_flags, 1046 + anon_name); 1047 + anon_vma_name_put(anon_name); 1048 1049 out: 1050 /* ··· 1225 static int madvise_vma_anon_name(struct vm_area_struct *vma, 1226 struct vm_area_struct **prev, 1227 unsigned long start, unsigned long end, 1228 + unsigned long anon_name) 1229 { 1230 int error; 1231 ··· 1234 return -EBADF; 1235 1236 error = madvise_update_vma(vma, prev, start, end, vma->vm_flags, 1237 + (struct anon_vma_name *)anon_name); 1238 1239 /* 1240 * madvise() returns EAGAIN if kernel resources, such as ··· 1246 } 1247 1248 int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, 1249 + unsigned long len_in, struct anon_vma_name *anon_name) 1250 { 1251 unsigned long end; 1252 unsigned long len; ··· 1266 if (end == start) 1267 return 0; 1268 1269 + return madvise_walk_vmas(mm, start, end, (unsigned long)anon_name, 1270 madvise_vma_anon_name); 1271 } 1272 #endif /* CONFIG_ANON_VMA_NAME */
+29 -13
mm/memfd.c
··· 31 static void memfd_tag_pins(struct xa_state *xas) 32 { 33 struct page *page; 34 - unsigned int tagged = 0; 35 36 lru_add_drain(); 37 38 xas_lock_irq(xas); 39 xas_for_each(xas, page, ULONG_MAX) { 40 - if (xa_is_value(page)) 41 - continue; 42 - page = find_subpage(page, xas->xa_index); 43 - if (page_count(page) - page_mapcount(page) > 1) 44 - xas_set_mark(xas, MEMFD_TAG_PINNED); 45 46 - if (++tagged % XA_CHECK_SCHED) 47 continue; 48 49 xas_pause(xas); 50 xas_unlock_irq(xas); ··· 81 82 error = 0; 83 for (scan = 0; scan <= LAST_SCAN; scan++) { 84 - unsigned int tagged = 0; 85 86 if (!xas_marked(&xas, MEMFD_TAG_PINNED)) 87 break; ··· 96 xas_lock_irq(&xas); 97 xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { 98 bool clear = true; 99 - if (xa_is_value(page)) 100 - continue; 101 - page = find_subpage(page, xas.xa_index); 102 - if (page_count(page) - page_mapcount(page) != 1) { 103 /* 104 * On the last scan, we clean up all those tags 105 * we inserted; but make a note that we still ··· 116 } 117 if (clear) 118 xas_clear_mark(&xas, MEMFD_TAG_PINNED); 119 - if (++tagged % XA_CHECK_SCHED) 120 continue; 121 122 xas_pause(&xas); 123 xas_unlock_irq(&xas);
··· 31 static void memfd_tag_pins(struct xa_state *xas) 32 { 33 struct page *page; 34 + int latency = 0; 35 + int cache_count; 36 37 lru_add_drain(); 38 39 xas_lock_irq(xas); 40 xas_for_each(xas, page, ULONG_MAX) { 41 + cache_count = 1; 42 + if (!xa_is_value(page) && 43 + PageTransHuge(page) && !PageHuge(page)) 44 + cache_count = HPAGE_PMD_NR; 45 46 + if (!xa_is_value(page) && 47 + page_count(page) - total_mapcount(page) != cache_count) 48 + xas_set_mark(xas, MEMFD_TAG_PINNED); 49 + if (cache_count != 1) 50 + xas_set(xas, page->index + cache_count); 51 + 52 + latency += cache_count; 53 + if (latency < XA_CHECK_SCHED) 54 continue; 55 + latency = 0; 56 57 xas_pause(xas); 58 xas_unlock_irq(xas); ··· 73 74 error = 0; 75 for (scan = 0; scan <= LAST_SCAN; scan++) { 76 + int latency = 0; 77 + int cache_count; 78 79 if (!xas_marked(&xas, MEMFD_TAG_PINNED)) 80 break; ··· 87 xas_lock_irq(&xas); 88 xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { 89 bool clear = true; 90 + 91 + cache_count = 1; 92 + if (!xa_is_value(page) && 93 + PageTransHuge(page) && !PageHuge(page)) 94 + cache_count = HPAGE_PMD_NR; 95 + 96 + if (!xa_is_value(page) && cache_count != 97 + page_count(page) - total_mapcount(page)) { 98 /* 99 * On the last scan, we clean up all those tags 100 * we inserted; but make a note that we still ··· 103 } 104 if (clear) 105 xas_clear_mark(&xas, MEMFD_TAG_PINNED); 106 + 107 + latency += cache_count; 108 + if (latency < XA_CHECK_SCHED) 109 continue; 110 + latency = 0; 111 112 xas_pause(&xas); 113 xas_unlock_irq(&xas);
+1 -1
mm/mempolicy.c
··· 814 prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, 815 vma->anon_vma, vma->vm_file, pgoff, 816 new_pol, vma->vm_userfaultfd_ctx, 817 - vma_anon_name(vma)); 818 if (prev) { 819 vma = prev; 820 next = vma->vm_next;
··· 814 prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, 815 vma->anon_vma, vma->vm_file, pgoff, 816 new_pol, vma->vm_userfaultfd_ctx, 817 + anon_vma_name(vma)); 818 if (prev) { 819 vma = prev; 820 next = vma->vm_next;
+1 -1
mm/mlock.c
··· 512 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 513 *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, 514 vma->vm_file, pgoff, vma_policy(vma), 515 - vma->vm_userfaultfd_ctx, vma_anon_name(vma)); 516 if (*prev) { 517 vma = *prev; 518 goto success;
··· 512 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 513 *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, 514 vma->vm_file, pgoff, vma_policy(vma), 515 + vma->vm_userfaultfd_ctx, anon_vma_name(vma)); 516 if (*prev) { 517 vma = *prev; 518 goto success;
+6 -6
mm/mmap.c
··· 1031 static inline int is_mergeable_vma(struct vm_area_struct *vma, 1032 struct file *file, unsigned long vm_flags, 1033 struct vm_userfaultfd_ctx vm_userfaultfd_ctx, 1034 - const char *anon_name) 1035 { 1036 /* 1037 * VM_SOFTDIRTY should not prevent from VMA merging, if we ··· 1049 return 0; 1050 if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx)) 1051 return 0; 1052 - if (!is_same_vma_anon_name(vma, anon_name)) 1053 return 0; 1054 return 1; 1055 } ··· 1084 struct anon_vma *anon_vma, struct file *file, 1085 pgoff_t vm_pgoff, 1086 struct vm_userfaultfd_ctx vm_userfaultfd_ctx, 1087 - const char *anon_name) 1088 { 1089 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && 1090 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { ··· 1106 struct anon_vma *anon_vma, struct file *file, 1107 pgoff_t vm_pgoff, 1108 struct vm_userfaultfd_ctx vm_userfaultfd_ctx, 1109 - const char *anon_name) 1110 { 1111 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && 1112 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { ··· 1167 struct anon_vma *anon_vma, struct file *file, 1168 pgoff_t pgoff, struct mempolicy *policy, 1169 struct vm_userfaultfd_ctx vm_userfaultfd_ctx, 1170 - const char *anon_name) 1171 { 1172 pgoff_t pglen = (end - addr) >> PAGE_SHIFT; 1173 struct vm_area_struct *area, *next; ··· 3256 return NULL; /* should never get here */ 3257 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, 3258 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), 3259 - vma->vm_userfaultfd_ctx, vma_anon_name(vma)); 3260 if (new_vma) { 3261 /* 3262 * Source vma may have been merged into new_vma
··· 1031 static inline int is_mergeable_vma(struct vm_area_struct *vma, 1032 struct file *file, unsigned long vm_flags, 1033 struct vm_userfaultfd_ctx vm_userfaultfd_ctx, 1034 + struct anon_vma_name *anon_name) 1035 { 1036 /* 1037 * VM_SOFTDIRTY should not prevent from VMA merging, if we ··· 1049 return 0; 1050 if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx)) 1051 return 0; 1052 + if (!anon_vma_name_eq(anon_vma_name(vma), anon_name)) 1053 return 0; 1054 return 1; 1055 } ··· 1084 struct anon_vma *anon_vma, struct file *file, 1085 pgoff_t vm_pgoff, 1086 struct vm_userfaultfd_ctx vm_userfaultfd_ctx, 1087 + struct anon_vma_name *anon_name) 1088 { 1089 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && 1090 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { ··· 1106 struct anon_vma *anon_vma, struct file *file, 1107 pgoff_t vm_pgoff, 1108 struct vm_userfaultfd_ctx vm_userfaultfd_ctx, 1109 + struct anon_vma_name *anon_name) 1110 { 1111 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && 1112 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { ··· 1167 struct anon_vma *anon_vma, struct file *file, 1168 pgoff_t pgoff, struct mempolicy *policy, 1169 struct vm_userfaultfd_ctx vm_userfaultfd_ctx, 1170 + struct anon_vma_name *anon_name) 1171 { 1172 pgoff_t pglen = (end - addr) >> PAGE_SHIFT; 1173 struct vm_area_struct *area, *next; ··· 3256 return NULL; /* should never get here */ 3257 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, 3258 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), 3259 + vma->vm_userfaultfd_ctx, anon_vma_name(vma)); 3260 if (new_vma) { 3261 /* 3262 * Source vma may have been merged into new_vma
+1 -1
mm/mprotect.c
··· 464 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 465 *pprev = vma_merge(mm, *pprev, start, end, newflags, 466 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), 467 - vma->vm_userfaultfd_ctx, vma_anon_name(vma)); 468 if (*pprev) { 469 vma = *pprev; 470 VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);
··· 464 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 465 *pprev = vma_merge(mm, *pprev, start, end, newflags, 466 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), 467 + vma->vm_userfaultfd_ctx, anon_vma_name(vma)); 468 if (*pprev) { 469 vma = *pprev; 470 VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);
+19 -7
tools/testing/selftests/vm/hugepage-mremap.c
··· 3 * hugepage-mremap: 4 * 5 * Example of remapping huge page memory in a user application using the 6 - * mremap system call. Code assumes a hugetlbfs filesystem is mounted 7 - * at './huge'. The amount of memory used by this test is decided by a command 8 - * line argument in MBs. If missing, the default amount is 10MB. 9 * 10 * To make sure the test triggers pmd sharing and goes through the 'unshare' 11 * path in the mremap code use 1GB (1024) or more. ··· 26 #define DEFAULT_LENGTH_MB 10UL 27 #define MB_TO_BYTES(x) (x * 1024 * 1024) 28 29 - #define FILE_NAME "huge/hugepagefile" 30 #define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC) 31 #define FLAGS (MAP_SHARED | MAP_ANONYMOUS) 32 ··· 107 108 int main(int argc, char *argv[]) 109 { 110 /* Read memory length as the first arg if valid, otherwise fallback to 111 - * the default length. Any additional args are ignored. 112 */ 113 - size_t length = argc > 1 ? (size_t)atoi(argv[1]) : 0UL; 114 115 length = length > 0 ? length : DEFAULT_LENGTH_MB; 116 length = MB_TO_BYTES(length); 117 118 int ret = 0; 119 120 - int fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755); 121 122 if (fd < 0) { 123 perror("Open failed"); ··· 177 ret = read_bytes(addr, length); 178 179 munmap(addr, length); 180 181 return ret; 182 }
··· 3 * hugepage-mremap: 4 * 5 * Example of remapping huge page memory in a user application using the 6 + * mremap system call. The path to a file in a hugetlbfs filesystem must 7 + * be passed as the last argument to this test. The amount of memory used 8 + * by this test in MBs can optionally be passed as an argument. If no memory 9 + * amount is passed, the default amount is 10MB. 10 * 11 * To make sure the test triggers pmd sharing and goes through the 'unshare' 12 * path in the mremap code use 1GB (1024) or more. ··· 25 #define DEFAULT_LENGTH_MB 10UL 26 #define MB_TO_BYTES(x) (x * 1024 * 1024) 27 28 #define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC) 29 #define FLAGS (MAP_SHARED | MAP_ANONYMOUS) 30 ··· 107 108 int main(int argc, char *argv[]) 109 { 110 + size_t length; 111 + 112 + if (argc != 2 && argc != 3) { 113 + printf("Usage: %s [length_in_MB] <hugetlb_file>\n", argv[0]); 114 + exit(1); 115 + } 116 + 117 /* Read memory length as the first arg if valid, otherwise fallback to 118 + * the default length. 119 */ 120 + if (argc == 3) 121 + length = argc > 2 ? (size_t)atoi(argv[1]) : 0UL; 122 123 length = length > 0 ? length : DEFAULT_LENGTH_MB; 124 length = MB_TO_BYTES(length); 125 126 int ret = 0; 127 128 + /* last arg is the hugetlb file name */ 129 + int fd = open(argv[argc-1], O_CREAT | O_RDWR, 0755); 130 131 if (fd < 0) { 132 perror("Open failed"); ··· 168 ret = read_bytes(addr, length); 169 170 munmap(addr, length); 171 + 172 + close(fd); 173 + unlink(argv[argc-1]); 174 175 return ret; 176 }
+2 -1
tools/testing/selftests/vm/run_vmtests.sh
··· 111 echo "-----------------------" 112 echo "running hugepage-mremap" 113 echo "-----------------------" 114 - ./hugepage-mremap 256 115 if [ $? -ne 0 ]; then 116 echo "[FAIL]" 117 exitcode=1 118 else 119 echo "[PASS]" 120 fi 121 122 echo "NOTE: The above hugetlb tests provide minimal coverage. Use" 123 echo " https://github.com/libhugetlbfs/libhugetlbfs.git for"
··· 111 echo "-----------------------" 112 echo "running hugepage-mremap" 113 echo "-----------------------" 114 + ./hugepage-mremap $mnt/huge_mremap 115 if [ $? -ne 0 ]; then 116 echo "[FAIL]" 117 exitcode=1 118 else 119 echo "[PASS]" 120 fi 121 + rm -f $mnt/huge_mremap 122 123 echo "NOTE: The above hugetlb tests provide minimal coverage. Use" 124 echo " https://github.com/libhugetlbfs/libhugetlbfs.git for"
+1
tools/testing/selftests/vm/userfaultfd.c
··· 46 #include <signal.h> 47 #include <poll.h> 48 #include <string.h> 49 #include <sys/mman.h> 50 #include <sys/syscall.h> 51 #include <sys/ioctl.h>
··· 46 #include <signal.h> 47 #include <poll.h> 48 #include <string.h> 49 + #include <linux/mman.h> 50 #include <sys/mman.h> 51 #include <sys/syscall.h> 52 #include <sys/ioctl.h>