Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

hugetlb: pass vma into huge_pte_alloc() and huge_pmd_share()

Patch series "hugetlb: Disable huge pmd unshare for uffd-wp", v4.

This series tries to disable huge pmd unshare of hugetlbfs backed memory
for uffd-wp. Although uffd-wp of hugetlbfs is still during rfc stage,
the idea of this series may be needed for multiple tasks (Axel's uffd
minor fault series, and Mike's soft dirty series), so I picked it out
from the larger series.

This patch (of 4):

It is a preparation work to be able to behave differently in the per
architecture huge_pte_alloc() according to different VMA attributes.

Pass it deeper into huge_pmd_share() so that we can avoid the find_vma() call.

[peterx@redhat.com: build fix]
Link: https://lkml.kernel.org/r/20210304164653.GB397383@xz-x1Link: https://lkml.kernel.org/r/20210218230633.15028-1-peterx@redhat.com

Link: https://lkml.kernel.org/r/20210218230633.15028-2-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Suggested-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Adam Ruprecht <ruprecht@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Cannon Matthews <cannonmatthews@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chinwen Chang <chinwen.chang@mediatek.com>
Cc: David Rientjes <rientjes@google.com>
Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Lokesh Gidra <lokeshgidra@google.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Michal Koutn" <mkoutny@suse.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oliver Upton <oupton@google.com>
Cc: Shaohua Li <shli@fb.com>
Cc: Shawn Anastasio <shawn@anastas.io>
Cc: Steven Price <steven.price@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Peter Xu and committed by
Linus Torvalds
aec44e0f 786b3112

+24 -20
+2 -2
arch/arm64/mm/hugetlbpage.c
··· 252 252 set_pte(ptep, pte); 253 253 } 254 254 255 - pte_t *huge_pte_alloc(struct mm_struct *mm, 255 + pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 256 256 unsigned long addr, unsigned long sz) 257 257 { 258 258 pgd_t *pgdp; ··· 286 286 } else if (sz == PMD_SIZE) { 287 287 if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && 288 288 pud_none(READ_ONCE(*pudp))) 289 - ptep = huge_pmd_share(mm, addr, pudp); 289 + ptep = huge_pmd_share(mm, vma, addr, pudp); 290 290 else 291 291 ptep = (pte_t *)pmd_alloc(mm, pudp, addr); 292 292 } else if (sz == (CONT_PMD_SIZE)) {
+2 -1
arch/ia64/mm/hugetlbpage.c
··· 25 25 EXPORT_SYMBOL(hpage_shift); 26 26 27 27 pte_t * 28 - huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) 28 + huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 29 + unsigned long addr, unsigned long sz) 29 30 { 30 31 unsigned long taddr = htlbpage_to_page(addr); 31 32 pgd_t *pgd;
+2 -2
arch/mips/mm/hugetlbpage.c
··· 21 21 #include <asm/tlb.h> 22 22 #include <asm/tlbflush.h> 23 23 24 - pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, 25 - unsigned long sz) 24 + pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 25 + unsigned long addr, unsigned long sz) 26 26 { 27 27 pgd_t *pgd; 28 28 p4d_t *p4d;
+1 -1
arch/parisc/mm/hugetlbpage.c
··· 44 44 } 45 45 46 46 47 - pte_t *huge_pte_alloc(struct mm_struct *mm, 47 + pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 48 48 unsigned long addr, unsigned long sz) 49 49 { 50 50 pgd_t *pgd;
+2 -1
arch/powerpc/mm/hugetlbpage.c
··· 106 106 * At this point we do the placement change only for BOOK3S 64. This would 107 107 * possibly work on other subarchs. 108 108 */ 109 - pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) 109 + pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 110 + unsigned long addr, unsigned long sz) 110 111 { 111 112 pgd_t *pg; 112 113 p4d_t *p4;
+1 -1
arch/s390/mm/hugetlbpage.c
··· 189 189 return pte; 190 190 } 191 191 192 - pte_t *huge_pte_alloc(struct mm_struct *mm, 192 + pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 193 193 unsigned long addr, unsigned long sz) 194 194 { 195 195 pgd_t *pgdp;
+1 -1
arch/sh/mm/hugetlbpage.c
··· 21 21 #include <asm/tlbflush.h> 22 22 #include <asm/cacheflush.h> 23 23 24 - pte_t *huge_pte_alloc(struct mm_struct *mm, 24 + pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 25 25 unsigned long addr, unsigned long sz) 26 26 { 27 27 pgd_t *pgd;
+1 -1
arch/sparc/mm/hugetlbpage.c
··· 279 279 unsigned long pmd_leaf_size(pmd_t pmd) { return 1UL << tte_to_shift(*(pte_t *)&pmd); } 280 280 unsigned long pte_leaf_size(pte_t pte) { return 1UL << tte_to_shift(pte); } 281 281 282 - pte_t *huge_pte_alloc(struct mm_struct *mm, 282 + pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 283 283 unsigned long addr, unsigned long sz) 284 284 { 285 285 pgd_t *pgd;
+3 -2
include/linux/hugetlb.h
··· 152 152 extern struct mutex *hugetlb_fault_mutex_table; 153 153 u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); 154 154 155 - pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); 155 + pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, 156 + unsigned long addr, pud_t *pud); 156 157 157 158 struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); 158 159 ··· 162 161 163 162 /* arch callbacks */ 164 163 165 - pte_t *huge_pte_alloc(struct mm_struct *mm, 164 + pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 166 165 unsigned long addr, unsigned long sz); 167 166 pte_t *huge_pte_offset(struct mm_struct *mm, 168 167 unsigned long addr, unsigned long sz);
+8 -7
mm/hugetlb.c
··· 3795 3795 src_pte = huge_pte_offset(src, addr, sz); 3796 3796 if (!src_pte) 3797 3797 continue; 3798 - dst_pte = huge_pte_alloc(dst, addr, sz); 3798 + dst_pte = huge_pte_alloc(dst, vma, addr, sz); 3799 3799 if (!dst_pte) { 3800 3800 ret = -ENOMEM; 3801 3801 break; ··· 4563 4563 */ 4564 4564 mapping = vma->vm_file->f_mapping; 4565 4565 i_mmap_lock_read(mapping); 4566 - ptep = huge_pte_alloc(mm, haddr, huge_page_size(h)); 4566 + ptep = huge_pte_alloc(mm, vma, haddr, huge_page_size(h)); 4567 4567 if (!ptep) { 4568 4568 i_mmap_unlock_read(mapping); 4569 4569 return VM_FAULT_OOM; ··· 5370 5370 * if !vma_shareable check at the beginning of the routine. i_mmap_rwsem is 5371 5371 * only required for subsequent processing. 5372 5372 */ 5373 - pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) 5373 + pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, 5374 + unsigned long addr, pud_t *pud) 5374 5375 { 5375 - struct vm_area_struct *vma = find_vma(mm, addr); 5376 5376 struct address_space *mapping = vma->vm_file->f_mapping; 5377 5377 pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + 5378 5378 vma->vm_pgoff; ··· 5450 5450 } 5451 5451 #define want_pmd_share() (1) 5452 5452 #else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ 5453 - pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) 5453 + pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, 5454 + unsigned long addr, pud_t *pud) 5454 5455 { 5455 5456 return NULL; 5456 5457 } ··· 5470 5469 #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ 5471 5470 5472 5471 #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB 5473 - pte_t *huge_pte_alloc(struct mm_struct *mm, 5472 + pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 5474 5473 unsigned long addr, unsigned long sz) 5475 5474 { 5476 5475 pgd_t *pgd; ··· 5489 5488 } else { 5490 5489 BUG_ON(sz != PMD_SIZE); 5491 5490 if (want_pmd_share() && pud_none(*pud)) 5492 - pte = huge_pmd_share(mm, addr, pud); 5491 + pte = huge_pmd_share(mm, vma, addr, pud); 5493 5492 else 5494 5493 pte = (pte_t *)pmd_alloc(mm, pud, addr); 5495 5494 }
+1 -1
mm/userfaultfd.c
··· 290 290 mutex_lock(&hugetlb_fault_mutex_table[hash]); 291 291 292 292 err = -ENOMEM; 293 - dst_pte = huge_pte_alloc(dst_mm, dst_addr, vma_hpagesize); 293 + dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, vma_hpagesize); 294 294 if (!dst_pte) { 295 295 mutex_unlock(&hugetlb_fault_mutex_table[hash]); 296 296 i_mmap_unlock_read(mapping);