Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARM: 7858/1: mm: make UACCESS_WITH_MEMCPY huge page aware

The memory pinning code in uaccess_with_memcpy.c does not check
for HugeTLB or THP pmds, and will enter an infinite loop should
a __copy_to_user or __clear_user occur against a huge page.

This patch adds detection code for huge pages to pin_page_for_write.
As this code can be executed in a fast path it refers to the actual
pmds rather than the vma. If a HugeTLB or THP is found (they have
the same pmd representation on ARM), the page table spinlock is
taken to prevent modification whilst the page is pinned.

On ARM, huge pages are only represented as pmds, thus no huge pud
checks are performed. (For huge puds one would lock the page table
in a similar manner as in the pmd case).

Two helper functions are introduced; pmd_thp_or_huge will check
whether or not a page is huge or transparent huge (which have the
same pmd layout on ARM), and pmd_hugewillfault will detect whether
or not a page fault will occur on write to the page.

Running the following test (with the chunking from read_zero
removed):
$ dd if=/dev/zero of=/dev/null bs=10M count=1024
Gave: 2.3 GB/s backed by normal pages,
2.9 GB/s backed by huge pages,
5.1 GB/s backed by huge pages, with page mask=HPAGE_MASK.

After some discussion, it was decided not to adopt the HPAGE_MASK,
as this would have a significant detrimental effect on the overall
system latency due to page_table_lock being held for too long.
This could be revisited if split huge page locks are adopted.

Signed-off-by: Steve Capper <steve.capper@linaro.org>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

authored by

Steven Capper and committed by
Russell King
a3a9ea65 92871b94

+48 -3
+7
arch/arm/include/asm/pgtable-2level.h
··· 181 181 182 182 #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) 183 183 184 + /* 185 + * We don't have huge page support for short descriptors, for the moment 186 + * define empty stubs for use by pin_page_for_write. 187 + */ 188 + #define pmd_hugewillfault(pmd) (0) 189 + #define pmd_thp_or_huge(pmd) (0) 190 + 184 191 #endif /* __ASSEMBLY__ */ 185 192 186 193 #endif /* _ASM_PGTABLE_2LEVEL_H */
+3
arch/arm/include/asm/pgtable-3level.h
··· 206 206 #define __HAVE_ARCH_PMD_WRITE 207 207 #define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) 208 208 209 + #define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd)) 210 + #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) 211 + 209 212 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 210 213 #define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) 211 214 #define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+38 -3
arch/arm/lib/uaccess_with_memcpy.c
··· 18 18 #include <linux/hardirq.h> /* for in_atomic() */ 19 19 #include <linux/gfp.h> 20 20 #include <linux/highmem.h> 21 + #include <linux/hugetlb.h> 21 22 #include <asm/current.h> 22 23 #include <asm/page.h> 23 24 ··· 41 40 return 0; 42 41 43 42 pmd = pmd_offset(pud, addr); 44 - if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd))) 43 + if (unlikely(pmd_none(*pmd))) 44 + return 0; 45 + 46 + /* 47 + * A pmd can be bad if it refers to a HugeTLB or THP page. 48 + * 49 + * Both THP and HugeTLB pages have the same pmd layout 50 + * and should not be manipulated by the pte functions. 51 + * 52 + * Lock the page table for the destination and check 53 + * to see that it's still huge and whether or not we will 54 + * need to fault on write, or if we have a splitting THP. 55 + */ 56 + if (unlikely(pmd_thp_or_huge(*pmd))) { 57 + ptl = &current->mm->page_table_lock; 58 + spin_lock(ptl); 59 + if (unlikely(!pmd_thp_or_huge(*pmd) 60 + || pmd_hugewillfault(*pmd) 61 + || pmd_trans_splitting(*pmd))) { 62 + spin_unlock(ptl); 63 + return 0; 64 + } 65 + 66 + *ptep = NULL; 67 + *ptlp = ptl; 68 + return 1; 69 + } 70 + 71 + if (unlikely(pmd_bad(*pmd))) 45 72 return 0; 46 73 47 74 pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); ··· 123 94 from += tocopy; 124 95 n -= tocopy; 125 96 126 - pte_unmap_unlock(pte, ptl); 97 + if (pte) 98 + pte_unmap_unlock(pte, ptl); 99 + else 100 + spin_unlock(ptl); 127 101 } 128 102 if (!atomic) 129 103 up_read(&current->mm->mmap_sem); ··· 179 147 addr += tocopy; 180 148 n -= tocopy; 181 149 182 - pte_unmap_unlock(pte, ptl); 150 + if (pte) 151 + pte_unmap_unlock(pte, ptl); 152 + else 153 + spin_unlock(ptl); 183 154 } 184 155 up_read(&current->mm->mmap_sem); 185 156