Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

MIPS: use the generic get_user_pages_fast code

The mips code is mostly equivalent to the generic one, minus various
bugfixes and an arch override for gup_fast_permitted.

Note that this defines ARCH_HAS_PTE_SPECIAL for mips as mips has
pte_special and pte_mkspecial implemented and used in the existing gup
code. They are no-op stubs, though which makes me a little unsure if this
is really right thing to do.

Note that this also adds back a missing cpu_has_dc_aliases check for
__get_user_pages_fast, which the old code was only doing for
get_user_pages_fast. This clearly looks like an oversight, as any
condition that makes get_user_pages_fast unsafe also applies to
__get_user_pages_fast.

[hch@lst.de: MIPS: don't select ARCH_HAS_PTE_SPECIAL]
Link: http://lkml.kernel.org/r/20190701151818.32227-3-hch@lst.de
Link: http://lkml.kernel.org/r/20190625143715.1689-5-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: James Hogan <jhogan@kernel.org>
Cc: Khalid Aziz <khalid.aziz@oracle.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Christoph Hellwig and committed by
Linus Torvalds
446f062b 39656e83

+5 -304
+2
arch/mips/Kconfig
··· 34 34 select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC 35 35 select GENERIC_SMP_IDLE_THREAD 36 36 select GENERIC_TIME_VSYSCALL 37 + select GUP_GET_PTE_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT 37 38 select HANDLE_DOMAIN_IRQ 38 39 select HAVE_ARCH_COMPILER_H 39 40 select HAVE_ARCH_JUMP_LABEL ··· 56 55 select HAVE_FTRACE_MCOUNT_RECORD 57 56 select HAVE_FUNCTION_GRAPH_TRACER 58 57 select HAVE_FUNCTION_TRACER 58 + select HAVE_GENERIC_GUP 59 59 select HAVE_IDE 60 60 select HAVE_IOREMAP_PROT 61 61 select HAVE_IRQ_EXIT_ON_IRQ_STACK
+3
arch/mips/include/asm/pgtable.h
··· 20 20 #include <asm/cmpxchg.h> 21 21 #include <asm/io.h> 22 22 #include <asm/pgtable-bits.h> 23 + #include <asm/cpu-features.h> 23 24 24 25 struct mm_struct; 25 26 struct vm_area_struct; ··· 626 625 } 627 626 628 627 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 628 + 629 + #define gup_fast_permitted(start, end) (!cpu_has_dc_aliases) 629 630 630 631 #include <asm-generic/pgtable.h> 631 632
-1
arch/mips/mm/Makefile
··· 7 7 obj-y += context.o 8 8 obj-y += extable.o 9 9 obj-y += fault.o 10 - obj-y += gup.o 11 10 obj-y += init.o 12 11 obj-y += mmap.o 13 12 obj-y += page.o
-303
arch/mips/mm/gup.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * Lockless get_user_pages_fast for MIPS 4 - * 5 - * Copyright (C) 2008 Nick Piggin 6 - * Copyright (C) 2008 Novell Inc. 7 - * Copyright (C) 2011 Ralf Baechle 8 - */ 9 - #include <linux/sched.h> 10 - #include <linux/mm.h> 11 - #include <linux/vmstat.h> 12 - #include <linux/highmem.h> 13 - #include <linux/swap.h> 14 - #include <linux/hugetlb.h> 15 - 16 - #include <asm/cpu-features.h> 17 - #include <asm/pgtable.h> 18 - 19 - static inline pte_t gup_get_pte(pte_t *ptep) 20 - { 21 - #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) 22 - pte_t pte; 23 - 24 - retry: 25 - pte.pte_low = ptep->pte_low; 26 - smp_rmb(); 27 - pte.pte_high = ptep->pte_high; 28 - smp_rmb(); 29 - if (unlikely(pte.pte_low != ptep->pte_low)) 30 - goto retry; 31 - 32 - return pte; 33 - #else 34 - return READ_ONCE(*ptep); 35 - #endif 36 - } 37 - 38 - static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, 39 - int write, struct page **pages, int *nr) 40 - { 41 - pte_t *ptep = pte_offset_map(&pmd, addr); 42 - do { 43 - pte_t pte = gup_get_pte(ptep); 44 - struct page *page; 45 - 46 - if (!pte_present(pte) || 47 - pte_special(pte) || (write && !pte_write(pte))) { 48 - pte_unmap(ptep); 49 - return 0; 50 - } 51 - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 52 - page = pte_page(pte); 53 - get_page(page); 54 - SetPageReferenced(page); 55 - pages[*nr] = page; 56 - (*nr)++; 57 - 58 - } while (ptep++, addr += PAGE_SIZE, addr != end); 59 - 60 - pte_unmap(ptep - 1); 61 - return 1; 62 - } 63 - 64 - static inline void get_head_page_multiple(struct page *page, int nr) 65 - { 66 - VM_BUG_ON(page != compound_head(page)); 67 - VM_BUG_ON(page_count(page) == 0); 68 - page_ref_add(page, nr); 69 - SetPageReferenced(page); 70 - } 71 - 72 - static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, 73 - int write, struct page **pages, int *nr) 74 - { 75 - pte_t pte = *(pte_t *)&pmd; 76 - struct page *head, *page; 77 - int refs; 78 - 79 - if (write && !pte_write(pte)) 80 - return 0; 81 - /* hugepages are never "special" */ 82 - VM_BUG_ON(pte_special(pte)); 83 - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 84 - 85 - refs = 0; 86 - head = pte_page(pte); 87 - page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); 88 - do { 89 - VM_BUG_ON(compound_head(page) != head); 90 - pages[*nr] = page; 91 - (*nr)++; 92 - page++; 93 - refs++; 94 - } while (addr += PAGE_SIZE, addr != end); 95 - 96 - get_head_page_multiple(head, refs); 97 - return 1; 98 - } 99 - 100 - static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, 101 - int write, struct page **pages, int *nr) 102 - { 103 - unsigned long next; 104 - pmd_t *pmdp; 105 - 106 - pmdp = pmd_offset(&pud, addr); 107 - do { 108 - pmd_t pmd = *pmdp; 109 - 110 - next = pmd_addr_end(addr, end); 111 - if (pmd_none(pmd)) 112 - return 0; 113 - if (unlikely(pmd_huge(pmd))) { 114 - if (!gup_huge_pmd(pmd, addr, next, write, pages,nr)) 115 - return 0; 116 - } else { 117 - if (!gup_pte_range(pmd, addr, next, write, pages,nr)) 118 - return 0; 119 - } 120 - } while (pmdp++, addr = next, addr != end); 121 - 122 - return 1; 123 - } 124 - 125 - static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, 126 - int write, struct page **pages, int *nr) 127 - { 128 - pte_t pte = *(pte_t *)&pud; 129 - struct page *head, *page; 130 - int refs; 131 - 132 - if (write && !pte_write(pte)) 133 - return 0; 134 - /* hugepages are never "special" */ 135 - VM_BUG_ON(pte_special(pte)); 136 - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 137 - 138 - refs = 0; 139 - head = pte_page(pte); 140 - page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); 141 - do { 142 - VM_BUG_ON(compound_head(page) != head); 143 - pages[*nr] = page; 144 - (*nr)++; 145 - page++; 146 - refs++; 147 - } while (addr += PAGE_SIZE, addr != end); 148 - 149 - get_head_page_multiple(head, refs); 150 - return 1; 151 - } 152 - 153 - static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, 154 - int write, struct page **pages, int *nr) 155 - { 156 - unsigned long next; 157 - pud_t *pudp; 158 - 159 - pudp = pud_offset(&pgd, addr); 160 - do { 161 - pud_t pud = *pudp; 162 - 163 - next = pud_addr_end(addr, end); 164 - if (pud_none(pud)) 165 - return 0; 166 - if (unlikely(pud_huge(pud))) { 167 - if (!gup_huge_pud(pud, addr, next, write, pages,nr)) 168 - return 0; 169 - } else { 170 - if (!gup_pmd_range(pud, addr, next, write, pages,nr)) 171 - return 0; 172 - } 173 - } while (pudp++, addr = next, addr != end); 174 - 175 - return 1; 176 - } 177 - 178 - /* 179 - * Like get_user_pages_fast() except its IRQ-safe in that it won't fall 180 - * back to the regular GUP. 181 - * Note a difference with get_user_pages_fast: this always returns the 182 - * number of pages pinned, 0 if no pages were pinned. 183 - */ 184 - int __get_user_pages_fast(unsigned long start, int nr_pages, int write, 185 - struct page **pages) 186 - { 187 - struct mm_struct *mm = current->mm; 188 - unsigned long addr, len, end; 189 - unsigned long next; 190 - unsigned long flags; 191 - pgd_t *pgdp; 192 - int nr = 0; 193 - 194 - start &= PAGE_MASK; 195 - addr = start; 196 - len = (unsigned long) nr_pages << PAGE_SHIFT; 197 - end = start + len; 198 - if (unlikely(!access_ok((void __user *)start, len))) 199 - return 0; 200 - 201 - /* 202 - * XXX: batch / limit 'nr', to avoid large irq off latency 203 - * needs some instrumenting to determine the common sizes used by 204 - * important workloads (eg. DB2), and whether limiting the batch 205 - * size will decrease performance. 206 - * 207 - * It seems like we're in the clear for the moment. Direct-IO is 208 - * the main guy that batches up lots of get_user_pages, and even 209 - * they are limited to 64-at-a-time which is not so many. 210 - */ 211 - /* 212 - * This doesn't prevent pagetable teardown, but does prevent 213 - * the pagetables and pages from being freed. 214 - * 215 - * So long as we atomically load page table pointers versus teardown, 216 - * we can follow the address down to the page and take a ref on it. 217 - */ 218 - local_irq_save(flags); 219 - pgdp = pgd_offset(mm, addr); 220 - do { 221 - pgd_t pgd = *pgdp; 222 - 223 - next = pgd_addr_end(addr, end); 224 - if (pgd_none(pgd)) 225 - break; 226 - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) 227 - break; 228 - } while (pgdp++, addr = next, addr != end); 229 - local_irq_restore(flags); 230 - 231 - return nr; 232 - } 233 - 234 - /** 235 - * get_user_pages_fast() - pin user pages in memory 236 - * @start: starting user address 237 - * @nr_pages: number of pages from start to pin 238 - * @gup_flags: flags modifying pin behaviour 239 - * @pages: array that receives pointers to the pages pinned. 240 - * Should be at least nr_pages long. 241 - * 242 - * Attempt to pin user pages in memory without taking mm->mmap_sem. 243 - * If not successful, it will fall back to taking the lock and 244 - * calling get_user_pages(). 245 - * 246 - * Returns number of pages pinned. This may be fewer than the number 247 - * requested. If nr_pages is 0 or negative, returns 0. If no pages 248 - * were pinned, returns -errno. 249 - */ 250 - int get_user_pages_fast(unsigned long start, int nr_pages, 251 - unsigned int gup_flags, struct page **pages) 252 - { 253 - struct mm_struct *mm = current->mm; 254 - unsigned long addr, len, end; 255 - unsigned long next; 256 - pgd_t *pgdp; 257 - int ret, nr = 0; 258 - 259 - start &= PAGE_MASK; 260 - addr = start; 261 - len = (unsigned long) nr_pages << PAGE_SHIFT; 262 - 263 - end = start + len; 264 - if (end < start || cpu_has_dc_aliases) 265 - goto slow_irqon; 266 - 267 - /* XXX: batch / limit 'nr' */ 268 - local_irq_disable(); 269 - pgdp = pgd_offset(mm, addr); 270 - do { 271 - pgd_t pgd = *pgdp; 272 - 273 - next = pgd_addr_end(addr, end); 274 - if (pgd_none(pgd)) 275 - goto slow; 276 - if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE, 277 - pages, &nr)) 278 - goto slow; 279 - } while (pgdp++, addr = next, addr != end); 280 - local_irq_enable(); 281 - 282 - VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); 283 - return nr; 284 - slow: 285 - local_irq_enable(); 286 - 287 - slow_irqon: 288 - /* Try to get the remaining pages with get_user_pages */ 289 - start += nr << PAGE_SHIFT; 290 - pages += nr; 291 - 292 - ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, 293 - pages, gup_flags); 294 - 295 - /* Have to be a bit careful with return values */ 296 - if (nr > 0) { 297 - if (ret < 0) 298 - ret = nr; 299 - else 300 - ret += nr; 301 - } 302 - return ret; 303 - }