Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.15 301 lines 7.1 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Lockless get_user_pages_fast for MIPS 4 * 5 * Copyright (C) 2008 Nick Piggin 6 * Copyright (C) 2008 Novell Inc. 7 * Copyright (C) 2011 Ralf Baechle 8 */ 9#include <linux/sched.h> 10#include <linux/mm.h> 11#include <linux/vmstat.h> 12#include <linux/highmem.h> 13#include <linux/swap.h> 14#include <linux/hugetlb.h> 15 16#include <asm/cpu-features.h> 17#include <asm/pgtable.h> 18 19static inline pte_t gup_get_pte(pte_t *ptep) 20{ 21#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) 22 pte_t pte; 23 24retry: 25 pte.pte_low = ptep->pte_low; 26 smp_rmb(); 27 pte.pte_high = ptep->pte_high; 28 smp_rmb(); 29 if (unlikely(pte.pte_low != ptep->pte_low)) 30 goto retry; 31 32 return pte; 33#else 34 return READ_ONCE(*ptep); 35#endif 36} 37 38static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, 39 int write, struct page **pages, int *nr) 40{ 41 pte_t *ptep = pte_offset_map(&pmd, addr); 42 do { 43 pte_t pte = gup_get_pte(ptep); 44 struct page *page; 45 46 if (!pte_present(pte) || 47 pte_special(pte) || (write && !pte_write(pte))) { 48 pte_unmap(ptep); 49 return 0; 50 } 51 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 52 page = pte_page(pte); 53 get_page(page); 54 SetPageReferenced(page); 55 pages[*nr] = page; 56 (*nr)++; 57 58 } while (ptep++, addr += PAGE_SIZE, addr != end); 59 60 pte_unmap(ptep - 1); 61 return 1; 62} 63 64static inline void get_head_page_multiple(struct page *page, int nr) 65{ 66 VM_BUG_ON(page != compound_head(page)); 67 VM_BUG_ON(page_count(page) == 0); 68 page_ref_add(page, nr); 69 SetPageReferenced(page); 70} 71 72static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, 73 int write, struct page **pages, int *nr) 74{ 75 pte_t pte = *(pte_t *)&pmd; 76 struct page *head, *page; 77 int refs; 78 79 if (write && !pte_write(pte)) 80 return 0; 81 /* hugepages are never "special" */ 82 VM_BUG_ON(pte_special(pte)); 83 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 84 85 refs = 0; 86 head = pte_page(pte); 87 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); 88 do { 89 VM_BUG_ON(compound_head(page) != head); 90 pages[*nr] = page; 91 (*nr)++; 92 page++; 93 refs++; 94 } while (addr += PAGE_SIZE, addr != end); 95 96 get_head_page_multiple(head, refs); 97 return 1; 98} 99 100static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, 101 int write, struct page **pages, int *nr) 102{ 103 unsigned long next; 104 pmd_t *pmdp; 105 106 pmdp = pmd_offset(&pud, addr); 107 do { 108 pmd_t pmd = *pmdp; 109 110 next = pmd_addr_end(addr, end); 111 if (pmd_none(pmd)) 112 return 0; 113 if (unlikely(pmd_huge(pmd))) { 114 if (!gup_huge_pmd(pmd, addr, next, write, pages,nr)) 115 return 0; 116 } else { 117 if (!gup_pte_range(pmd, addr, next, write, pages,nr)) 118 return 0; 119 } 120 } while (pmdp++, addr = next, addr != end); 121 122 return 1; 123} 124 125static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, 126 int write, struct page **pages, int *nr) 127{ 128 pte_t pte = *(pte_t *)&pud; 129 struct page *head, *page; 130 int refs; 131 132 if (write && !pte_write(pte)) 133 return 0; 134 /* hugepages are never "special" */ 135 VM_BUG_ON(pte_special(pte)); 136 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 137 138 refs = 0; 139 head = pte_page(pte); 140 page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); 141 do { 142 VM_BUG_ON(compound_head(page) != head); 143 pages[*nr] = page; 144 (*nr)++; 145 page++; 146 refs++; 147 } while (addr += PAGE_SIZE, addr != end); 148 149 get_head_page_multiple(head, refs); 150 return 1; 151} 152 153static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, 154 int write, struct page **pages, int *nr) 155{ 156 unsigned long next; 157 pud_t *pudp; 158 159 pudp = pud_offset(&pgd, addr); 160 do { 161 pud_t pud = *pudp; 162 163 next = pud_addr_end(addr, end); 164 if (pud_none(pud)) 165 return 0; 166 if (unlikely(pud_huge(pud))) { 167 if (!gup_huge_pud(pud, addr, next, write, pages,nr)) 168 return 0; 169 } else { 170 if (!gup_pmd_range(pud, addr, next, write, pages,nr)) 171 return 0; 172 } 173 } while (pudp++, addr = next, addr != end); 174 175 return 1; 176} 177 178/* 179 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall 180 * back to the regular GUP. 181 */ 182int __get_user_pages_fast(unsigned long start, int nr_pages, int write, 183 struct page **pages) 184{ 185 struct mm_struct *mm = current->mm; 186 unsigned long addr, len, end; 187 unsigned long next; 188 unsigned long flags; 189 pgd_t *pgdp; 190 int nr = 0; 191 192 start &= PAGE_MASK; 193 addr = start; 194 len = (unsigned long) nr_pages << PAGE_SHIFT; 195 end = start + len; 196 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, 197 (void __user *)start, len))) 198 return 0; 199 200 /* 201 * XXX: batch / limit 'nr', to avoid large irq off latency 202 * needs some instrumenting to determine the common sizes used by 203 * important workloads (eg. DB2), and whether limiting the batch 204 * size will decrease performance. 205 * 206 * It seems like we're in the clear for the moment. Direct-IO is 207 * the main guy that batches up lots of get_user_pages, and even 208 * they are limited to 64-at-a-time which is not so many. 209 */ 210 /* 211 * This doesn't prevent pagetable teardown, but does prevent 212 * the pagetables and pages from being freed. 213 * 214 * So long as we atomically load page table pointers versus teardown, 215 * we can follow the address down to the page and take a ref on it. 216 */ 217 local_irq_save(flags); 218 pgdp = pgd_offset(mm, addr); 219 do { 220 pgd_t pgd = *pgdp; 221 222 next = pgd_addr_end(addr, end); 223 if (pgd_none(pgd)) 224 break; 225 if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) 226 break; 227 } while (pgdp++, addr = next, addr != end); 228 local_irq_restore(flags); 229 230 return nr; 231} 232 233/** 234 * get_user_pages_fast() - pin user pages in memory 235 * @start: starting user address 236 * @nr_pages: number of pages from start to pin 237 * @write: whether pages will be written to 238 * @pages: array that receives pointers to the pages pinned. 239 * Should be at least nr_pages long. 240 * 241 * Attempt to pin user pages in memory without taking mm->mmap_sem. 242 * If not successful, it will fall back to taking the lock and 243 * calling get_user_pages(). 244 * 245 * Returns number of pages pinned. This may be fewer than the number 246 * requested. If nr_pages is 0 or negative, returns 0. If no pages 247 * were pinned, returns -errno. 248 */ 249int get_user_pages_fast(unsigned long start, int nr_pages, int write, 250 struct page **pages) 251{ 252 struct mm_struct *mm = current->mm; 253 unsigned long addr, len, end; 254 unsigned long next; 255 pgd_t *pgdp; 256 int ret, nr = 0; 257 258 start &= PAGE_MASK; 259 addr = start; 260 len = (unsigned long) nr_pages << PAGE_SHIFT; 261 262 end = start + len; 263 if (end < start || cpu_has_dc_aliases) 264 goto slow_irqon; 265 266 /* XXX: batch / limit 'nr' */ 267 local_irq_disable(); 268 pgdp = pgd_offset(mm, addr); 269 do { 270 pgd_t pgd = *pgdp; 271 272 next = pgd_addr_end(addr, end); 273 if (pgd_none(pgd)) 274 goto slow; 275 if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) 276 goto slow; 277 } while (pgdp++, addr = next, addr != end); 278 local_irq_enable(); 279 280 VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); 281 return nr; 282slow: 283 local_irq_enable(); 284 285slow_irqon: 286 /* Try to get the remaining pages with get_user_pages */ 287 start += nr << PAGE_SHIFT; 288 pages += nr; 289 290 ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, 291 pages, write ? FOLL_WRITE : 0); 292 293 /* Have to be a bit careful with return values */ 294 if (nr > 0) { 295 if (ret < 0) 296 ret = nr; 297 else 298 ret += nr; 299 } 300 return ret; 301}