Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dax: remove VM_MIXEDMAP for fsdax and device dax

This patch is reworked from an earlier patch that Dan has posted:
https://patchwork.kernel.org/patch/10131727/

VM_MIXEDMAP is used by dax to direct mm paths like vm_normal_page() that
the memory page it is dealing with is not typical memory from the linear
map. The get_user_pages_fast() path, since it does not resolve the vma,
is already using {pte,pmd}_devmap() as a stand-in for VM_MIXEDMAP, so we
use that as a VM_MIXEDMAP replacement in some locations. In the cases
where there is no pte to consult we fallback to using vma_is_dax() to
detect the VM_MIXEDMAP special case.

Now that we have explicit driver pfn_t-flag opt-in/opt-out for
get_user_pages() support for DAX we can stop setting VM_MIXEDMAP. This
also means we no longer need to worry about safely manipulating vm_flags
in a future where we support dynamically changing the dax mode of a
file.

DAX should also now be supported with madvise_behavior(), vma_merge(),
and copy_page_range().

This patch has been tested against ndctl unit test. It has also been
tested against xfstests commit: 625515d using fake pmem created by
memmap and no additional issues have been observed.

Link: http://lkml.kernel.org/r/152847720311.55924.16999195879201817653.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Dave Jiang and committed by
Linus Torvalds
e1fb4a08 e36488c8

+27 -14
+1 -1
drivers/dax/device.c
··· 474 474 return rc; 475 475 476 476 vma->vm_ops = &dax_vm_ops; 477 - vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; 477 + vma->vm_flags |= VM_HUGEPAGE; 478 478 return 0; 479 479 } 480 480
-1
fs/ext2/file.c
··· 126 126 127 127 file_accessed(file); 128 128 vma->vm_ops = &ext2_dax_vm_ops; 129 - vma->vm_flags |= VM_MIXEDMAP; 130 129 return 0; 131 130 } 132 131 #else
+1 -1
fs/ext4/file.c
··· 374 374 file_accessed(file); 375 375 if (IS_DAX(file_inode(file))) { 376 376 vma->vm_ops = &ext4_dax_vm_ops; 377 - vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; 377 + vma->vm_flags |= VM_HUGEPAGE; 378 378 } else { 379 379 vma->vm_ops = &ext4_file_vm_ops; 380 380 }
+1 -1
fs/xfs/xfs_file.c
··· 1169 1169 file_accessed(filp); 1170 1170 vma->vm_ops = &xfs_file_vm_ops; 1171 1171 if (IS_DAX(file_inode(filp))) 1172 - vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; 1172 + vma->vm_flags |= VM_HUGEPAGE; 1173 1173 return 0; 1174 1174 } 1175 1175
+4 -2
mm/hmm.c
··· 676 676 return -EINVAL; 677 677 678 678 /* FIXME support hugetlb fs */ 679 - if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) { 679 + if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) || 680 + vma_is_dax(vma)) { 680 681 hmm_pfns_special(range); 681 682 return -EINVAL; 682 683 } ··· 850 849 return -EINVAL; 851 850 852 851 /* FIXME support hugetlb fs */ 853 - if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) { 852 + if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) || 853 + vma_is_dax(vma)) { 854 854 hmm_pfns_special(range); 855 855 return -EINVAL; 856 856 }
+2 -2
mm/huge_memory.c
··· 762 762 * but we need to be consistent with PTEs and architectures that 763 763 * can't support a 'special' bit. 764 764 */ 765 - BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); 765 + BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && 766 + !pfn_t_devmap(pfn)); 766 767 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == 767 768 (VM_PFNMAP|VM_MIXEDMAP)); 768 769 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); 769 - BUG_ON(!pfn_t_devmap(pfn)); 770 770 771 771 if (addr < vma->vm_start || addr >= vma->vm_end) 772 772 return VM_FAULT_SIGBUS;
+3
mm/ksm.c
··· 2430 2430 VM_HUGETLB | VM_MIXEDMAP)) 2431 2431 return 0; /* just ignore the advice */ 2432 2432 2433 + if (vma_is_dax(vma)) 2434 + return 0; 2435 + 2433 2436 #ifdef VM_SAO 2434 2437 if (*vm_flags & VM_SAO) 2435 2438 return 0;
+6
mm/memory.c
··· 859 859 return NULL; 860 860 } 861 861 } 862 + 863 + if (pte_devmap(pte)) 864 + return NULL; 865 + 862 866 print_bad_pte(vma, addr, pte, NULL); 863 867 return NULL; 864 868 } ··· 927 923 } 928 924 } 929 925 926 + if (pmd_devmap(pmd)) 927 + return NULL; 930 928 if (is_zero_pfn(pfn)) 931 929 return NULL; 932 930 if (unlikely(pfn > highest_memmap_pfn))
+2 -1
mm/migrate.c
··· 2951 2951 /* Sanity check the arguments */ 2952 2952 start &= PAGE_MASK; 2953 2953 end &= PAGE_MASK; 2954 - if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) 2954 + if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) || 2955 + vma_is_dax(vma)) 2955 2956 return -EINVAL; 2956 2957 if (start < vma->vm_start || start >= vma->vm_end) 2957 2958 return -EINVAL;
+2 -1
mm/mlock.c
··· 527 527 vm_flags_t old_flags = vma->vm_flags; 528 528 529 529 if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || 530 - is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm)) 530 + is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) || 531 + vma_is_dax(vma)) 531 532 /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ 532 533 goto out; 533 534
+5 -4
mm/mmap.c
··· 1796 1796 1797 1797 vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT); 1798 1798 if (vm_flags & VM_LOCKED) { 1799 - if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || 1800 - vma == get_gate_vma(current->mm))) 1801 - mm->locked_vm += (len >> PAGE_SHIFT); 1802 - else 1799 + if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) || 1800 + is_vm_hugetlb_page(vma) || 1801 + vma == get_gate_vma(current->mm)) 1803 1802 vma->vm_flags &= VM_LOCKED_CLEAR_MASK; 1803 + else 1804 + mm->locked_vm += (len >> PAGE_SHIFT); 1804 1805 } 1805 1806 1806 1807 if (file)