Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: mlock: add mlock flags to enable VM_LOCKONFAULT usage

The previous patch introduced a flag that specified pages in a VMA should
be placed on the unevictable LRU, but they should not be made present when
the area is created. This patch adds the ability to set this state via
the new mlock system calls.

We add MLOCK_ONFAULT for mlock2 and MCL_ONFAULT for mlockall.
MLOCK_ONFAULT will set the VM_LOCKONFAULT modifier for VM_LOCKED.
MCL_ONFAULT should be used as a modifier to the two other mlockall flags.
When used with MCL_CURRENT, all current mappings will be marked with
VM_LOCKED | VM_LOCKONFAULT. When used with MCL_FUTURE, the mm->def_flags
will be marked with VM_LOCKED | VM_LOCKONFAULT. When used with both
MCL_CURRENT and MCL_FUTURE, all current mappings and mm->def_flags will be
marked with VM_LOCKED | VM_LOCKONFAULT.

Prior to this patch, mlockall() will unconditionally clear the
mm->def_flags any time it is called without MCL_FUTURE. This behavior is
maintained after adding MCL_ONFAULT. If a call to mlockall(MCL_FUTURE) is
followed by mlockall(MCL_CURRENT), the mm->def_flags will be cleared and
new VMAs will be unlocked. This remains true with or without MCL_ONFAULT
in either mlockall() invocation.

munlock() will unconditionally clear both vma flags. munlockall()
unconditionally clears for VMA flags on all VMAs and in the mm->def_flags
field.

Signed-off-by: Eric B Munson <emunson@akamai.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Eric B Munson and committed by
Linus Torvalds
b0f205c2 de60f5f1

+65 -13
+3
arch/alpha/include/uapi/asm/mman.h
··· 37 37 38 38 #define MCL_CURRENT 8192 /* lock all currently mapped pages */ 39 39 #define MCL_FUTURE 16384 /* lock all additions to address space */ 40 + #define MCL_ONFAULT 32768 /* lock all pages that are faulted in */ 41 + 42 + #define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ 40 43 41 44 #define MADV_NORMAL 0 /* no further special treatment */ 42 45 #define MADV_RANDOM 1 /* expect random page references */
+6
arch/mips/include/uapi/asm/mman.h
··· 61 61 */ 62 62 #define MCL_CURRENT 1 /* lock all current mappings */ 63 63 #define MCL_FUTURE 2 /* lock all future mappings */ 64 + #define MCL_ONFAULT 4 /* lock all pages that are faulted in */ 65 + 66 + /* 67 + * Flags for mlock 68 + */ 69 + #define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ 64 70 65 71 #define MADV_NORMAL 0 /* no further special treatment */ 66 72 #define MADV_RANDOM 1 /* expect random page references */
+3
arch/parisc/include/uapi/asm/mman.h
··· 31 31 32 32 #define MCL_CURRENT 1 /* lock all current mappings */ 33 33 #define MCL_FUTURE 2 /* lock all future mappings */ 34 + #define MCL_ONFAULT 4 /* lock all pages that are faulted in */ 35 + 36 + #define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ 34 37 35 38 #define MADV_NORMAL 0 /* no further special treatment */ 36 39 #define MADV_RANDOM 1 /* expect random page references */
+1
arch/powerpc/include/uapi/asm/mman.h
··· 22 22 23 23 #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ 24 24 #define MCL_FUTURE 0x4000 /* lock all additions to address space */ 25 + #define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ 25 26 26 27 #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ 27 28 #define MAP_NONBLOCK 0x10000 /* do not block on IO */
+1
arch/sparc/include/uapi/asm/mman.h
··· 17 17 18 18 #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ 19 19 #define MCL_FUTURE 0x4000 /* lock all additions to address space */ 20 + #define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ 20 21 21 22 #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ 22 23 #define MAP_NONBLOCK 0x10000 /* do not block on IO */
+1
arch/tile/include/uapi/asm/mman.h
··· 36 36 */ 37 37 #define MCL_CURRENT 1 /* lock all current mappings */ 38 38 #define MCL_FUTURE 2 /* lock all future mappings */ 39 + #define MCL_ONFAULT 4 /* lock all pages that are faulted in */ 39 40 40 41 41 42 #endif /* _ASM_TILE_MMAN_H */
+6
arch/xtensa/include/uapi/asm/mman.h
··· 74 74 */ 75 75 #define MCL_CURRENT 1 /* lock all current mappings */ 76 76 #define MCL_FUTURE 2 /* lock all future mappings */ 77 + #define MCL_ONFAULT 4 /* lock all pages that are faulted in */ 78 + 79 + /* 80 + * Flags for mlock 81 + */ 82 + #define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ 77 83 78 84 #define MADV_NORMAL 0 /* no further special treatment */ 79 85 #define MADV_RANDOM 1 /* expect random page references */
+5
include/uapi/asm-generic/mman-common.h
··· 25 25 # define MAP_UNINITIALIZED 0x0 /* Don't support this flag */ 26 26 #endif 27 27 28 + /* 29 + * Flags for mlock 30 + */ 31 + #define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ 32 + 28 33 #define MS_ASYNC 1 /* sync memory asynchronously */ 29 34 #define MS_INVALIDATE 2 /* invalidate the caches */ 30 35 #define MS_SYNC 4 /* synchronous memory sync */
+1
include/uapi/asm-generic/mman.h
··· 17 17 18 18 #define MCL_CURRENT 1 /* lock all current mappings */ 19 19 #define MCL_FUTURE 2 /* lock all future mappings */ 20 + #define MCL_ONFAULT 4 /* lock all pages that are faulted in */ 20 21 21 22 #endif /* __ASM_GENERIC_MMAN_H */
+38 -13
mm/mlock.c
··· 506 506 507 507 if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || 508 508 is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm)) 509 - goto out; /* don't set VM_LOCKED, don't count */ 509 + /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ 510 + goto out; 510 511 511 512 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 512 513 *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, ··· 578 577 prev = vma; 579 578 580 579 for (nstart = start ; ; ) { 581 - vm_flags_t newflags = vma->vm_flags & ~VM_LOCKED; 580 + vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; 582 581 583 582 newflags |= flags; 584 583 ··· 647 646 648 647 SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags) 649 648 { 650 - if (flags) 649 + vm_flags_t vm_flags = VM_LOCKED; 650 + 651 + if (flags & ~MLOCK_ONFAULT) 651 652 return -EINVAL; 652 653 653 - return do_mlock(start, len, VM_LOCKED); 654 + if (flags & MLOCK_ONFAULT) 655 + vm_flags |= VM_LOCKONFAULT; 656 + 657 + return do_mlock(start, len, vm_flags); 654 658 } 655 659 656 660 SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) ··· 672 666 return ret; 673 667 } 674 668 669 + /* 670 + * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall) 671 + * and translate into the appropriate modifications to mm->def_flags and/or the 672 + * flags for all current VMAs. 673 + * 674 + * There are a couple of subtleties with this. If mlockall() is called multiple 675 + * times with different flags, the values do not necessarily stack. If mlockall 676 + * is called once including the MCL_FUTURE flag and then a second time without 677 + * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags. 678 + */ 675 679 static int apply_mlockall_flags(int flags) 676 680 { 677 681 struct vm_area_struct * vma, * prev = NULL; 682 + vm_flags_t to_add = 0; 678 683 679 - if (flags & MCL_FUTURE) 684 + current->mm->def_flags &= VM_LOCKED_CLEAR_MASK; 685 + if (flags & MCL_FUTURE) { 680 686 current->mm->def_flags |= VM_LOCKED; 681 - else 682 - current->mm->def_flags &= ~VM_LOCKED; 683 687 684 - if (flags == MCL_FUTURE) 685 - goto out; 688 + if (flags & MCL_ONFAULT) 689 + current->mm->def_flags |= VM_LOCKONFAULT; 690 + 691 + if (!(flags & MCL_CURRENT)) 692 + goto out; 693 + } 694 + 695 + if (flags & MCL_CURRENT) { 696 + to_add |= VM_LOCKED; 697 + if (flags & MCL_ONFAULT) 698 + to_add |= VM_LOCKONFAULT; 699 + } 686 700 687 701 for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { 688 702 vm_flags_t newflags; 689 703 690 - newflags = vma->vm_flags & ~VM_LOCKED; 691 - if (flags & MCL_CURRENT) 692 - newflags |= VM_LOCKED; 704 + newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; 705 + newflags |= to_add; 693 706 694 707 /* Ignore errors */ 695 708 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); ··· 723 698 unsigned long lock_limit; 724 699 int ret; 725 700 726 - if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE))) 701 + if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT))) 727 702 return -EINVAL; 728 703 729 704 if (!can_do_mlock())