Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm, thp: fix collapsing of hugepages on madvise

If an anonymous mapping is not allowed to fault thp memory and then
madvise(MADV_HUGEPAGE) is used after fault, khugepaged will never
collapse this memory into thp memory.

This occurs because the madvise(2) handler for thp, hugepage_madvise(),
clears VM_NOHUGEPAGE on the stack and it isn't stored in vma->vm_flags
until the final action of madvise_behavior(). This causes the
khugepaged_enter_vma_merge() to be a no-op in hugepage_madvise() when
the vma had previously had VM_NOHUGEPAGE set.

Fix this by passing the correct vma flags to the khugepaged mm slot
handler. There's no chance khugepaged can run on this vma until after
madvise_behavior() returns since we hold mm->mmap_sem.

It would be possible to clear VM_NOHUGEPAGE directly from vma->vm_flags
in hugepage_advise(), but I didn't want to introduce special case
behavior into madvise_behavior(). I think it's best to just let it
always set vma->vm_flags itself.

Signed-off-by: David Rientjes <rientjes@google.com>
Reported-by: Suleiman Souhlal <suleiman@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

David Rientjes and committed by
Linus Torvalds
6d50e60c 47f29df7

+20 -16
+10 -7
include/linux/khugepaged.h
··· 6 6 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 7 7 extern int __khugepaged_enter(struct mm_struct *mm); 8 8 extern void __khugepaged_exit(struct mm_struct *mm); 9 - extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma); 9 + extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma, 10 + unsigned long vm_flags); 10 11 11 12 #define khugepaged_enabled() \ 12 13 (transparent_hugepage_flags & \ ··· 36 35 __khugepaged_exit(mm); 37 36 } 38 37 39 - static inline int khugepaged_enter(struct vm_area_struct *vma) 38 + static inline int khugepaged_enter(struct vm_area_struct *vma, 39 + unsigned long vm_flags) 40 40 { 41 41 if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) 42 42 if ((khugepaged_always() || 43 - (khugepaged_req_madv() && 44 - vma->vm_flags & VM_HUGEPAGE)) && 45 - !(vma->vm_flags & VM_NOHUGEPAGE)) 43 + (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) && 44 + !(vm_flags & VM_NOHUGEPAGE)) 46 45 if (__khugepaged_enter(vma->vm_mm)) 47 46 return -ENOMEM; 48 47 return 0; ··· 55 54 static inline void khugepaged_exit(struct mm_struct *mm) 56 55 { 57 56 } 58 - static inline int khugepaged_enter(struct vm_area_struct *vma) 57 + static inline int khugepaged_enter(struct vm_area_struct *vma, 58 + unsigned long vm_flags) 59 59 { 60 60 return 0; 61 61 } 62 - static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma) 62 + static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma, 63 + unsigned long vm_flags) 63 64 { 64 65 return 0; 65 66 }
+6 -5
mm/huge_memory.c
··· 803 803 return VM_FAULT_FALLBACK; 804 804 if (unlikely(anon_vma_prepare(vma))) 805 805 return VM_FAULT_OOM; 806 - if (unlikely(khugepaged_enter(vma))) 806 + if (unlikely(khugepaged_enter(vma, vma->vm_flags))) 807 807 return VM_FAULT_OOM; 808 808 if (!(flags & FAULT_FLAG_WRITE) && 809 809 transparent_hugepage_use_zero_page()) { ··· 1970 1970 * register it here without waiting a page fault that 1971 1971 * may not happen any time soon. 1972 1972 */ 1973 - if (unlikely(khugepaged_enter_vma_merge(vma))) 1973 + if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags))) 1974 1974 return -ENOMEM; 1975 1975 break; 1976 1976 case MADV_NOHUGEPAGE: ··· 2071 2071 return 0; 2072 2072 } 2073 2073 2074 - int khugepaged_enter_vma_merge(struct vm_area_struct *vma) 2074 + int khugepaged_enter_vma_merge(struct vm_area_struct *vma, 2075 + unsigned long vm_flags) 2075 2076 { 2076 2077 unsigned long hstart, hend; 2077 2078 if (!vma->anon_vma) ··· 2084 2083 if (vma->vm_ops) 2085 2084 /* khugepaged not yet working on file or special mappings */ 2086 2085 return 0; 2087 - VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); 2086 + VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma); 2088 2087 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 2089 2088 hend = vma->vm_end & HPAGE_PMD_MASK; 2090 2089 if (hstart < hend) 2091 - return khugepaged_enter(vma); 2090 + return khugepaged_enter(vma, vm_flags); 2092 2091 return 0; 2093 2092 } 2094 2093
+4 -4
mm/mmap.c
··· 1080 1080 end, prev->vm_pgoff, NULL); 1081 1081 if (err) 1082 1082 return NULL; 1083 - khugepaged_enter_vma_merge(prev); 1083 + khugepaged_enter_vma_merge(prev, vm_flags); 1084 1084 return prev; 1085 1085 } 1086 1086 ··· 1099 1099 next->vm_pgoff - pglen, NULL); 1100 1100 if (err) 1101 1101 return NULL; 1102 - khugepaged_enter_vma_merge(area); 1102 + khugepaged_enter_vma_merge(area, vm_flags); 1103 1103 return area; 1104 1104 } 1105 1105 ··· 2208 2208 } 2209 2209 } 2210 2210 vma_unlock_anon_vma(vma); 2211 - khugepaged_enter_vma_merge(vma); 2211 + khugepaged_enter_vma_merge(vma, vma->vm_flags); 2212 2212 validate_mm(vma->vm_mm); 2213 2213 return error; 2214 2214 } ··· 2277 2277 } 2278 2278 } 2279 2279 vma_unlock_anon_vma(vma); 2280 - khugepaged_enter_vma_merge(vma); 2280 + khugepaged_enter_vma_merge(vma, vma->vm_flags); 2281 2281 validate_mm(vma->vm_mm); 2282 2282 return error; 2283 2283 }