Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

userfaultfd: non-cooperative: add event for memory unmaps

When a non-cooperative userfaultfd monitor copies pages in the
background, it may encounter regions that were already unmapped.
Addition of UFFD_EVENT_UNMAP allows the uffd monitor to track precisely
changes in the virtual memory layout.

Since there might be different uffd contexts for the affected VMAs, we
first should create a temporary representation for the unmap event for
each uffd context and then notify them one by one to the appropriate
userfault file descriptors.

The event notification occurs after the mmap_sem has been released.

[arnd@arndb.de: fix nommu build]
Link: http://lkml.kernel.org/r/20170203165141.3665284-1-arnd@arndb.de
[mhocko@suse.com: fix nommu build]
Link: http://lkml.kernel.org/r/20170202091503.GA22823@dhcp22.suse.cz
Link: http://lkml.kernel.org/r/1485542673-24387-3-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Mike Rapoport and committed by
Linus Torvalds
897ab3e0 846b1a0f

+160 -45
+1 -1
arch/mips/kernel/vdso.c
··· 111 111 base = mmap_region(NULL, STACK_TOP, PAGE_SIZE, 112 112 VM_READ|VM_WRITE|VM_EXEC| 113 113 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 114 - 0); 114 + 0, NULL); 115 115 if (IS_ERR_VALUE(base)) { 116 116 ret = base; 117 117 goto out;
+1 -1
arch/tile/mm/elf.c
··· 143 143 unsigned long addr = MEM_USER_INTRPT; 144 144 addr = mmap_region(NULL, addr, INTRPT_SIZE, 145 145 VM_READ|VM_EXEC| 146 - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0); 146 + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0, NULL); 147 147 if (addr > (unsigned long) -PAGE_SIZE) 148 148 retval = (int) addr; 149 149 }
+1 -1
arch/x86/entry/vdso/vma.c
··· 186 186 187 187 if (IS_ERR(vma)) { 188 188 ret = PTR_ERR(vma); 189 - do_munmap(mm, text_start, image->size); 189 + do_munmap(mm, text_start, image->size, NULL); 190 190 } else { 191 191 current->mm->context.vdso = (void __user *)text_start; 192 192 current->mm->context.vdso_image = image;
+2 -2
arch/x86/mm/mpx.c
··· 51 51 52 52 down_write(&mm->mmap_sem); 53 53 addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE, 54 - MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate); 54 + MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate, NULL); 55 55 up_write(&mm->mmap_sem); 56 56 if (populate) 57 57 mm_populate(addr, populate); ··· 893 893 * avoid recursion, do_munmap() will check whether it comes 894 894 * from one bounds table through VM_MPX flag. 895 895 */ 896 - return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm)); 896 + return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL); 897 897 } 898 898 899 899 static int try_unmap_single_bt(struct mm_struct *mm,
+1 -1
fs/aio.c
··· 512 512 513 513 ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, 514 514 PROT_READ | PROT_WRITE, 515 - MAP_SHARED, 0, &unused); 515 + MAP_SHARED, 0, &unused, NULL); 516 516 up_write(&mm->mmap_sem); 517 517 if (IS_ERR((void *)ctx->mmap_base)) { 518 518 ctx->mmap_size = 0;
+2 -2
fs/proc/vmcore.c
··· 388 388 } 389 389 return 0; 390 390 fail: 391 - do_munmap(vma->vm_mm, from, len); 391 + do_munmap(vma->vm_mm, from, len, NULL); 392 392 return -EAGAIN; 393 393 } 394 394 ··· 481 481 482 482 return 0; 483 483 fail: 484 - do_munmap(vma->vm_mm, vma->vm_start, len); 484 + do_munmap(vma->vm_mm, vma->vm_start, len, NULL); 485 485 return -EAGAIN; 486 486 } 487 487 #else
+65
fs/userfaultfd.c
··· 71 71 struct list_head list; 72 72 }; 73 73 74 + struct userfaultfd_unmap_ctx { 75 + struct userfaultfd_ctx *ctx; 76 + unsigned long start; 77 + unsigned long end; 78 + struct list_head list; 79 + }; 80 + 74 81 struct userfaultfd_wait_queue { 75 82 struct uffd_msg msg; 76 83 wait_queue_t wq; ··· 714 707 userfaultfd_event_wait_completion(ctx, &ewq); 715 708 716 709 down_read(&mm->mmap_sem); 710 + } 711 + 712 + static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, 713 + unsigned long start, unsigned long end) 714 + { 715 + struct userfaultfd_unmap_ctx *unmap_ctx; 716 + 717 + list_for_each_entry(unmap_ctx, unmaps, list) 718 + if (unmap_ctx->ctx == ctx && unmap_ctx->start == start && 719 + unmap_ctx->end == end) 720 + return true; 721 + 722 + return false; 723 + } 724 + 725 + int userfaultfd_unmap_prep(struct vm_area_struct *vma, 726 + unsigned long start, unsigned long end, 727 + struct list_head *unmaps) 728 + { 729 + for ( ; vma && vma->vm_start < end; vma = vma->vm_next) { 730 + struct userfaultfd_unmap_ctx *unmap_ctx; 731 + struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; 732 + 733 + if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) || 734 + has_unmap_ctx(ctx, unmaps, start, end)) 735 + continue; 736 + 737 + unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL); 738 + if (!unmap_ctx) 739 + return -ENOMEM; 740 + 741 + userfaultfd_ctx_get(ctx); 742 + unmap_ctx->ctx = ctx; 743 + unmap_ctx->start = start; 744 + unmap_ctx->end = end; 745 + list_add_tail(&unmap_ctx->list, unmaps); 746 + } 747 + 748 + return 0; 749 + } 750 + 751 + void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) 752 + { 753 + struct userfaultfd_unmap_ctx *ctx, *n; 754 + struct userfaultfd_wait_queue ewq; 755 + 756 + list_for_each_entry_safe(ctx, n, uf, list) { 757 + msg_init(&ewq.msg); 758 + 759 + ewq.msg.event = UFFD_EVENT_UNMAP; 760 + ewq.msg.arg.remove.start = ctx->start; 761 + ewq.msg.arg.remove.end = ctx->end; 762 + 763 + userfaultfd_event_wait_completion(ctx->ctx, &ewq); 764 + 765 + list_del(&ctx->list); 766 + kfree(ctx); 767 + } 717 768 } 718 769 719 770 static int userfaultfd_release(struct inode *inode, struct file *file)
+9 -5
include/linux/mm.h
··· 2090 2090 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); 2091 2091 2092 2092 extern unsigned long mmap_region(struct file *file, unsigned long addr, 2093 - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff); 2093 + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, 2094 + struct list_head *uf); 2094 2095 extern unsigned long do_mmap(struct file *file, unsigned long addr, 2095 2096 unsigned long len, unsigned long prot, unsigned long flags, 2096 - vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate); 2097 - extern int do_munmap(struct mm_struct *, unsigned long, size_t); 2097 + vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, 2098 + struct list_head *uf); 2099 + extern int do_munmap(struct mm_struct *, unsigned long, size_t, 2100 + struct list_head *uf); 2098 2101 2099 2102 static inline unsigned long 2100 2103 do_mmap_pgoff(struct file *file, unsigned long addr, 2101 2104 unsigned long len, unsigned long prot, unsigned long flags, 2102 - unsigned long pgoff, unsigned long *populate) 2105 + unsigned long pgoff, unsigned long *populate, 2106 + struct list_head *uf) 2103 2107 { 2104 - return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate); 2108 + return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate, uf); 2105 2109 } 2106 2110 2107 2111 #ifdef CONFIG_MMU
+18
include/linux/userfaultfd_k.h
··· 66 66 unsigned long start, 67 67 unsigned long end); 68 68 69 + extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, 70 + unsigned long start, unsigned long end, 71 + struct list_head *uf); 72 + extern void userfaultfd_unmap_complete(struct mm_struct *mm, 73 + struct list_head *uf); 74 + 69 75 #else /* CONFIG_USERFAULTFD */ 70 76 71 77 /* mm helpers */ ··· 122 116 struct vm_area_struct **prev, 123 117 unsigned long start, 124 118 unsigned long end) 119 + { 120 + } 121 + 122 + static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, 123 + unsigned long start, unsigned long end, 124 + struct list_head *uf) 125 + { 126 + return 0; 127 + } 128 + 129 + static inline void userfaultfd_unmap_complete(struct mm_struct *mm, 130 + struct list_head *uf) 125 131 { 126 132 } 127 133 #endif /* CONFIG_USERFAULTFD */
+3
include/uapi/linux/userfaultfd.h
··· 21 21 #define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \ 22 22 UFFD_FEATURE_EVENT_REMAP | \ 23 23 UFFD_FEATURE_EVENT_REMOVE | \ 24 + UFFD_FEATURE_EVENT_UNMAP | \ 24 25 UFFD_FEATURE_MISSING_HUGETLBFS | \ 25 26 UFFD_FEATURE_MISSING_SHMEM) 26 27 #define UFFD_API_IOCTLS \ ··· 111 110 #define UFFD_EVENT_FORK 0x13 112 111 #define UFFD_EVENT_REMAP 0x14 113 112 #define UFFD_EVENT_REMOVE 0x15 113 + #define UFFD_EVENT_UNMAP 0x16 114 114 115 115 /* flags for UFFD_EVENT_PAGEFAULT */ 116 116 #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ ··· 160 158 #define UFFD_FEATURE_EVENT_REMOVE (1<<3) 161 159 #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) 162 160 #define UFFD_FEATURE_MISSING_SHMEM (1<<5) 161 + #define UFFD_FEATURE_EVENT_UNMAP (1<<6) 163 162 __u64 features; 164 163 165 164 __u64 ioctls;
+4 -4
ipc/shm.c
··· 1222 1222 goto invalid; 1223 1223 } 1224 1224 1225 - addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate); 1225 + addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL); 1226 1226 *raddr = addr; 1227 1227 err = 0; 1228 1228 if (IS_ERR_VALUE(addr)) ··· 1329 1329 */ 1330 1330 file = vma->vm_file; 1331 1331 size = i_size_read(file_inode(vma->vm_file)); 1332 - do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); 1332 + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1333 1333 /* 1334 1334 * We discovered the size of the shm segment, so 1335 1335 * break out of here and fall through to the next ··· 1356 1356 if ((vma->vm_ops == &shm_vm_ops) && 1357 1357 ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) && 1358 1358 (vma->vm_file == file)) 1359 - do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); 1359 + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1360 1360 vma = next; 1361 1361 } 1362 1362 ··· 1365 1365 * given 1366 1366 */ 1367 1367 if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { 1368 - do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); 1368 + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1369 1369 retval = 0; 1370 1370 } 1371 1371
+31 -15
mm/mmap.c
··· 176 176 return next; 177 177 } 178 178 179 - static int do_brk(unsigned long addr, unsigned long len); 179 + static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf); 180 180 181 181 SYSCALL_DEFINE1(brk, unsigned long, brk) 182 182 { ··· 185 185 struct mm_struct *mm = current->mm; 186 186 unsigned long min_brk; 187 187 bool populate; 188 + LIST_HEAD(uf); 188 189 189 190 if (down_write_killable(&mm->mmap_sem)) 190 191 return -EINTR; ··· 223 222 224 223 /* Always allow shrinking brk. */ 225 224 if (brk <= mm->brk) { 226 - if (!do_munmap(mm, newbrk, oldbrk-newbrk)) 225 + if (!do_munmap(mm, newbrk, oldbrk-newbrk, &uf)) 227 226 goto set_brk; 228 227 goto out; 229 228 } ··· 233 232 goto out; 234 233 235 234 /* Ok, looks good - let it rip. */ 236 - if (do_brk(oldbrk, newbrk-oldbrk) < 0) 235 + if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0) 237 236 goto out; 238 237 239 238 set_brk: 240 239 mm->brk = brk; 241 240 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0; 242 241 up_write(&mm->mmap_sem); 242 + userfaultfd_unmap_complete(mm, &uf); 243 243 if (populate) 244 244 mm_populate(oldbrk, newbrk - oldbrk); 245 245 return brk; ··· 1306 1304 unsigned long do_mmap(struct file *file, unsigned long addr, 1307 1305 unsigned long len, unsigned long prot, 1308 1306 unsigned long flags, vm_flags_t vm_flags, 1309 - unsigned long pgoff, unsigned long *populate) 1307 + unsigned long pgoff, unsigned long *populate, 1308 + struct list_head *uf) 1310 1309 { 1311 1310 struct mm_struct *mm = current->mm; 1312 1311 int pkey = 0; ··· 1450 1447 vm_flags |= VM_NORESERVE; 1451 1448 } 1452 1449 1453 - addr = mmap_region(file, addr, len, vm_flags, pgoff); 1450 + addr = mmap_region(file, addr, len, vm_flags, pgoff, uf); 1454 1451 if (!IS_ERR_VALUE(addr) && 1455 1452 ((vm_flags & VM_LOCKED) || 1456 1453 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) ··· 1586 1583 } 1587 1584 1588 1585 unsigned long mmap_region(struct file *file, unsigned long addr, 1589 - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff) 1586 + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, 1587 + struct list_head *uf) 1590 1588 { 1591 1589 struct mm_struct *mm = current->mm; 1592 1590 struct vm_area_struct *vma, *prev; ··· 1613 1609 /* Clear old maps */ 1614 1610 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link, 1615 1611 &rb_parent)) { 1616 - if (do_munmap(mm, addr, len)) 1612 + if (do_munmap(mm, addr, len, uf)) 1617 1613 return -ENOMEM; 1618 1614 } 1619 1615 ··· 2583 2579 * work. This now handles partial unmappings. 2584 2580 * Jeremy Fitzhardinge <jeremy@goop.org> 2585 2581 */ 2586 - int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) 2582 + int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, 2583 + struct list_head *uf) 2587 2584 { 2588 2585 unsigned long end; 2589 2586 struct vm_area_struct *vma, *prev, *last; ··· 2607 2602 end = start + len; 2608 2603 if (vma->vm_start >= end) 2609 2604 return 0; 2605 + 2606 + if (uf) { 2607 + int error = userfaultfd_unmap_prep(vma, start, end, uf); 2608 + 2609 + if (error) 2610 + return error; 2611 + } 2610 2612 2611 2613 /* 2612 2614 * If we need to split any vma, do it now to save pain later. ··· 2680 2668 { 2681 2669 int ret; 2682 2670 struct mm_struct *mm = current->mm; 2671 + LIST_HEAD(uf); 2683 2672 2684 2673 if (down_write_killable(&mm->mmap_sem)) 2685 2674 return -EINTR; 2686 2675 2687 - ret = do_munmap(mm, start, len); 2676 + ret = do_munmap(mm, start, len, &uf); 2688 2677 up_write(&mm->mmap_sem); 2678 + userfaultfd_unmap_complete(mm, &uf); 2689 2679 return ret; 2690 2680 } 2691 2681 EXPORT_SYMBOL(vm_munmap); ··· 2787 2773 2788 2774 file = get_file(vma->vm_file); 2789 2775 ret = do_mmap_pgoff(vma->vm_file, start, size, 2790 - prot, flags, pgoff, &populate); 2776 + prot, flags, pgoff, &populate, NULL); 2791 2777 fput(file); 2792 2778 out: 2793 2779 up_write(&mm->mmap_sem); ··· 2813 2799 * anonymous maps. eventually we may be able to do some 2814 2800 * brk-specific accounting here. 2815 2801 */ 2816 - static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) 2802 + static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf) 2817 2803 { 2818 2804 struct mm_struct *mm = current->mm; 2819 2805 struct vm_area_struct *vma, *prev; ··· 2852 2838 */ 2853 2839 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link, 2854 2840 &rb_parent)) { 2855 - if (do_munmap(mm, addr, len)) 2841 + if (do_munmap(mm, addr, len, uf)) 2856 2842 return -ENOMEM; 2857 2843 } 2858 2844 ··· 2899 2885 return 0; 2900 2886 } 2901 2887 2902 - static int do_brk(unsigned long addr, unsigned long len) 2888 + static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf) 2903 2889 { 2904 - return do_brk_flags(addr, len, 0); 2890 + return do_brk_flags(addr, len, 0, uf); 2905 2891 } 2906 2892 2907 2893 int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags) ··· 2909 2895 struct mm_struct *mm = current->mm; 2910 2896 int ret; 2911 2897 bool populate; 2898 + LIST_HEAD(uf); 2912 2899 2913 2900 if (down_write_killable(&mm->mmap_sem)) 2914 2901 return -EINTR; 2915 2902 2916 - ret = do_brk_flags(addr, len, flags); 2903 + ret = do_brk_flags(addr, len, flags, &uf); 2917 2904 populate = ((mm->def_flags & VM_LOCKED) != 0); 2918 2905 up_write(&mm->mmap_sem); 2906 + userfaultfd_unmap_complete(mm, &uf); 2919 2907 if (populate && !ret) 2920 2908 mm_populate(addr, len); 2921 2909 return ret;
+14 -9
mm/mremap.c
··· 252 252 static unsigned long move_vma(struct vm_area_struct *vma, 253 253 unsigned long old_addr, unsigned long old_len, 254 254 unsigned long new_len, unsigned long new_addr, 255 - bool *locked, struct vm_userfaultfd_ctx *uf) 255 + bool *locked, struct vm_userfaultfd_ctx *uf, 256 + struct list_head *uf_unmap) 256 257 { 257 258 struct mm_struct *mm = vma->vm_mm; 258 259 struct vm_area_struct *new_vma; ··· 342 341 if (unlikely(vma->vm_flags & VM_PFNMAP)) 343 342 untrack_pfn_moved(vma); 344 343 345 - if (do_munmap(mm, old_addr, old_len) < 0) { 344 + if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) { 346 345 /* OOM: unable to split vma, just get accounts right */ 347 346 vm_unacct_memory(excess >> PAGE_SHIFT); 348 347 excess = 0; ··· 418 417 419 418 static unsigned long mremap_to(unsigned long addr, unsigned long old_len, 420 419 unsigned long new_addr, unsigned long new_len, bool *locked, 421 - struct vm_userfaultfd_ctx *uf) 420 + struct vm_userfaultfd_ctx *uf, 421 + struct list_head *uf_unmap) 422 422 { 423 423 struct mm_struct *mm = current->mm; 424 424 struct vm_area_struct *vma; ··· 437 435 if (addr + old_len > new_addr && new_addr + new_len > addr) 438 436 goto out; 439 437 440 - ret = do_munmap(mm, new_addr, new_len); 438 + ret = do_munmap(mm, new_addr, new_len, NULL); 441 439 if (ret) 442 440 goto out; 443 441 444 442 if (old_len >= new_len) { 445 - ret = do_munmap(mm, addr+new_len, old_len - new_len); 443 + ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap); 446 444 if (ret && old_len != new_len) 447 445 goto out; 448 446 old_len = new_len; ··· 464 462 if (offset_in_page(ret)) 465 463 goto out1; 466 464 467 - ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf); 465 + ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf, 466 + uf_unmap); 468 467 if (!(offset_in_page(ret))) 469 468 goto out; 470 469 out1: ··· 505 502 unsigned long charged = 0; 506 503 bool locked = false; 507 504 struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; 505 + LIST_HEAD(uf_unmap); 508 506 509 507 if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) 510 508 return ret; ··· 532 528 533 529 if (flags & MREMAP_FIXED) { 534 530 ret = mremap_to(addr, old_len, new_addr, new_len, 535 - &locked, &uf); 531 + &locked, &uf, &uf_unmap); 536 532 goto out; 537 533 } 538 534 ··· 542 538 * do_munmap does all the needed commit accounting 543 539 */ 544 540 if (old_len >= new_len) { 545 - ret = do_munmap(mm, addr+new_len, old_len - new_len); 541 + ret = do_munmap(mm, addr+new_len, old_len - new_len, &uf_unmap); 546 542 if (ret && old_len != new_len) 547 543 goto out; 548 544 ret = addr; ··· 602 598 } 603 599 604 600 ret = move_vma(vma, addr, old_len, new_len, new_addr, 605 - &locked, &uf); 601 + &locked, &uf, &uf_unmap); 606 602 } 607 603 out: 608 604 if (offset_in_page(ret)) { ··· 613 609 if (locked && new_len > old_len) 614 610 mm_populate(new_addr + old_len, new_len - old_len); 615 611 mremap_userfaultfd_complete(&uf, addr, new_addr, old_len); 612 + userfaultfd_unmap_complete(mm, &uf_unmap); 616 613 return ret; 617 614 }
+4 -3
mm/nommu.c
··· 1205 1205 unsigned long flags, 1206 1206 vm_flags_t vm_flags, 1207 1207 unsigned long pgoff, 1208 - unsigned long *populate) 1208 + unsigned long *populate, 1209 + struct list_head *uf) 1209 1210 { 1210 1211 struct vm_area_struct *vma; 1211 1212 struct vm_region *region; ··· 1578 1577 * - under NOMMU conditions the chunk to be unmapped must be backed by a single 1579 1578 * VMA, though it need not cover the whole VMA 1580 1579 */ 1581 - int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) 1580 + int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf) 1582 1581 { 1583 1582 struct vm_area_struct *vma; 1584 1583 unsigned long end; ··· 1644 1643 int ret; 1645 1644 1646 1645 down_write(&mm->mmap_sem); 1647 - ret = do_munmap(mm, addr, len); 1646 + ret = do_munmap(mm, addr, len, NULL); 1648 1647 up_write(&mm->mmap_sem); 1649 1648 return ret; 1650 1649 }
+4 -1
mm/util.c
··· 11 11 #include <linux/mman.h> 12 12 #include <linux/hugetlb.h> 13 13 #include <linux/vmalloc.h> 14 + #include <linux/userfaultfd_k.h> 14 15 15 16 #include <asm/sections.h> 16 17 #include <linux/uaccess.h> ··· 298 297 unsigned long ret; 299 298 struct mm_struct *mm = current->mm; 300 299 unsigned long populate; 300 + LIST_HEAD(uf); 301 301 302 302 ret = security_mmap_file(file, prot, flag); 303 303 if (!ret) { 304 304 if (down_write_killable(&mm->mmap_sem)) 305 305 return -EINTR; 306 306 ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff, 307 - &populate); 307 + &populate, &uf); 308 308 up_write(&mm->mmap_sem); 309 + userfaultfd_unmap_complete(mm, &uf); 309 310 if (populate) 310 311 mm_populate(ret, populate); 311 312 }