Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm, thp: Do not make page table dirty unconditionally in touch_p[mu]d()

Currently, we unconditionally make page table dirty in touch_pmd().
It may result in false-positive can_follow_write_pmd().

We may avoid the situation, if we would only make the page table entry
dirty if caller asks for write access -- FOLL_WRITE.

The patch also changes touch_pud() in the same way.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Kirill A. Shutemov and committed by
Linus Torvalds
a8f97366 4fbd8d19

+13 -23
+13 -23
mm/huge_memory.c
··· 842 842 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 843 843 844 844 static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, 845 - pmd_t *pmd) 845 + pmd_t *pmd, int flags) 846 846 { 847 847 pmd_t _pmd; 848 848 849 - /* 850 - * We should set the dirty bit only for FOLL_WRITE but for now 851 - * the dirty bit in the pmd is meaningless. And if the dirty 852 - * bit will become meaningful and we'll only set it with 853 - * FOLL_WRITE, an atomic set_bit will be required on the pmd to 854 - * set the young bit, instead of the current set_pmd_at. 855 - */ 856 - _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); 849 + _pmd = pmd_mkyoung(*pmd); 850 + if (flags & FOLL_WRITE) 851 + _pmd = pmd_mkdirty(_pmd); 857 852 if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, 858 - pmd, _pmd, 1)) 853 + pmd, _pmd, flags & FOLL_WRITE)) 859 854 update_mmu_cache_pmd(vma, addr, pmd); 860 855 } 861 856 ··· 879 884 return NULL; 880 885 881 886 if (flags & FOLL_TOUCH) 882 - touch_pmd(vma, addr, pmd); 887 + touch_pmd(vma, addr, pmd, flags); 883 888 884 889 /* 885 890 * device mapped pages can only be returned if the ··· 990 995 991 996 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 992 997 static void touch_pud(struct vm_area_struct *vma, unsigned long addr, 993 - pud_t *pud) 998 + pud_t *pud, int flags) 994 999 { 995 1000 pud_t _pud; 996 1001 997 - /* 998 - * We should set the dirty bit only for FOLL_WRITE but for now 999 - * the dirty bit in the pud is meaningless. And if the dirty 1000 - * bit will become meaningful and we'll only set it with 1001 - * FOLL_WRITE, an atomic set_bit will be required on the pud to 1002 - * set the young bit, instead of the current set_pud_at. 1003 - */ 1004 - _pud = pud_mkyoung(pud_mkdirty(*pud)); 1002 + _pud = pud_mkyoung(*pud); 1003 + if (flags & FOLL_WRITE) 1004 + _pud = pud_mkdirty(_pud); 1005 1005 if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK, 1006 - pud, _pud, 1)) 1006 + pud, _pud, flags & FOLL_WRITE)) 1007 1007 update_mmu_cache_pud(vma, addr, pud); 1008 1008 } 1009 1009 ··· 1021 1031 return NULL; 1022 1032 1023 1033 if (flags & FOLL_TOUCH) 1024 - touch_pud(vma, addr, pud); 1034 + touch_pud(vma, addr, pud, flags); 1025 1035 1026 1036 /* 1027 1037 * device mapped pages can only be returned if the ··· 1414 1424 page = pmd_page(*pmd); 1415 1425 VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); 1416 1426 if (flags & FOLL_TOUCH) 1417 - touch_pmd(vma, addr, pmd); 1427 + touch_pmd(vma, addr, pmd, flags); 1418 1428 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { 1419 1429 /* 1420 1430 * We don't mlock() pte-mapped THPs. This way we can avoid