Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.7-rc2 300 lines 7.1 kB view raw
1/* 2 * Lockless get_user_pages_fast for MIPS 3 * 4 * Copyright (C) 2008 Nick Piggin 5 * Copyright (C) 2008 Novell Inc. 6 * Copyright (C) 2011 Ralf Baechle 7 */ 8#include <linux/sched.h> 9#include <linux/mm.h> 10#include <linux/vmstat.h> 11#include <linux/highmem.h> 12#include <linux/swap.h> 13#include <linux/hugetlb.h> 14 15#include <asm/cpu-features.h> 16#include <asm/pgtable.h> 17 18static inline pte_t gup_get_pte(pte_t *ptep) 19{ 20#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) 21 pte_t pte; 22 23retry: 24 pte.pte_low = ptep->pte_low; 25 smp_rmb(); 26 pte.pte_high = ptep->pte_high; 27 smp_rmb(); 28 if (unlikely(pte.pte_low != ptep->pte_low)) 29 goto retry; 30 31 return pte; 32#else 33 return READ_ONCE(*ptep); 34#endif 35} 36 37static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, 38 int write, struct page **pages, int *nr) 39{ 40 pte_t *ptep = pte_offset_map(&pmd, addr); 41 do { 42 pte_t pte = gup_get_pte(ptep); 43 struct page *page; 44 45 if (!pte_present(pte) || 46 pte_special(pte) || (write && !pte_write(pte))) { 47 pte_unmap(ptep); 48 return 0; 49 } 50 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 51 page = pte_page(pte); 52 get_page(page); 53 SetPageReferenced(page); 54 pages[*nr] = page; 55 (*nr)++; 56 57 } while (ptep++, addr += PAGE_SIZE, addr != end); 58 59 pte_unmap(ptep - 1); 60 return 1; 61} 62 63static inline void get_head_page_multiple(struct page *page, int nr) 64{ 65 VM_BUG_ON(page != compound_head(page)); 66 VM_BUG_ON(page_count(page) == 0); 67 page_ref_add(page, nr); 68 SetPageReferenced(page); 69} 70 71static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, 72 int write, struct page **pages, int *nr) 73{ 74 pte_t pte = *(pte_t *)&pmd; 75 struct page *head, *page; 76 int refs; 77 78 if (write && !pte_write(pte)) 79 return 0; 80 /* hugepages are never "special" */ 81 VM_BUG_ON(pte_special(pte)); 82 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 83 84 refs = 0; 85 head = pte_page(pte); 86 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); 87 do { 88 VM_BUG_ON(compound_head(page) != head); 89 pages[*nr] = page; 90 (*nr)++; 91 page++; 92 refs++; 93 } while (addr += PAGE_SIZE, addr != end); 94 95 get_head_page_multiple(head, refs); 96 return 1; 97} 98 99static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, 100 int write, struct page **pages, int *nr) 101{ 102 unsigned long next; 103 pmd_t *pmdp; 104 105 pmdp = pmd_offset(&pud, addr); 106 do { 107 pmd_t pmd = *pmdp; 108 109 next = pmd_addr_end(addr, end); 110 if (pmd_none(pmd)) 111 return 0; 112 if (unlikely(pmd_huge(pmd))) { 113 if (!gup_huge_pmd(pmd, addr, next, write, pages,nr)) 114 return 0; 115 } else { 116 if (!gup_pte_range(pmd, addr, next, write, pages,nr)) 117 return 0; 118 } 119 } while (pmdp++, addr = next, addr != end); 120 121 return 1; 122} 123 124static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, 125 int write, struct page **pages, int *nr) 126{ 127 pte_t pte = *(pte_t *)&pud; 128 struct page *head, *page; 129 int refs; 130 131 if (write && !pte_write(pte)) 132 return 0; 133 /* hugepages are never "special" */ 134 VM_BUG_ON(pte_special(pte)); 135 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 136 137 refs = 0; 138 head = pte_page(pte); 139 page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); 140 do { 141 VM_BUG_ON(compound_head(page) != head); 142 pages[*nr] = page; 143 (*nr)++; 144 page++; 145 refs++; 146 } while (addr += PAGE_SIZE, addr != end); 147 148 get_head_page_multiple(head, refs); 149 return 1; 150} 151 152static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, 153 int write, struct page **pages, int *nr) 154{ 155 unsigned long next; 156 pud_t *pudp; 157 158 pudp = pud_offset(&pgd, addr); 159 do { 160 pud_t pud = *pudp; 161 162 next = pud_addr_end(addr, end); 163 if (pud_none(pud)) 164 return 0; 165 if (unlikely(pud_huge(pud))) { 166 if (!gup_huge_pud(pud, addr, next, write, pages,nr)) 167 return 0; 168 } else { 169 if (!gup_pmd_range(pud, addr, next, write, pages,nr)) 170 return 0; 171 } 172 } while (pudp++, addr = next, addr != end); 173 174 return 1; 175} 176 177/* 178 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall 179 * back to the regular GUP. 180 */ 181int __get_user_pages_fast(unsigned long start, int nr_pages, int write, 182 struct page **pages) 183{ 184 struct mm_struct *mm = current->mm; 185 unsigned long addr, len, end; 186 unsigned long next; 187 unsigned long flags; 188 pgd_t *pgdp; 189 int nr = 0; 190 191 start &= PAGE_MASK; 192 addr = start; 193 len = (unsigned long) nr_pages << PAGE_SHIFT; 194 end = start + len; 195 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, 196 (void __user *)start, len))) 197 return 0; 198 199 /* 200 * XXX: batch / limit 'nr', to avoid large irq off latency 201 * needs some instrumenting to determine the common sizes used by 202 * important workloads (eg. DB2), and whether limiting the batch 203 * size will decrease performance. 204 * 205 * It seems like we're in the clear for the moment. Direct-IO is 206 * the main guy that batches up lots of get_user_pages, and even 207 * they are limited to 64-at-a-time which is not so many. 208 */ 209 /* 210 * This doesn't prevent pagetable teardown, but does prevent 211 * the pagetables and pages from being freed. 212 * 213 * So long as we atomically load page table pointers versus teardown, 214 * we can follow the address down to the page and take a ref on it. 215 */ 216 local_irq_save(flags); 217 pgdp = pgd_offset(mm, addr); 218 do { 219 pgd_t pgd = *pgdp; 220 221 next = pgd_addr_end(addr, end); 222 if (pgd_none(pgd)) 223 break; 224 if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) 225 break; 226 } while (pgdp++, addr = next, addr != end); 227 local_irq_restore(flags); 228 229 return nr; 230} 231 232/** 233 * get_user_pages_fast() - pin user pages in memory 234 * @start: starting user address 235 * @nr_pages: number of pages from start to pin 236 * @write: whether pages will be written to 237 * @pages: array that receives pointers to the pages pinned. 238 * Should be at least nr_pages long. 239 * 240 * Attempt to pin user pages in memory without taking mm->mmap_sem. 241 * If not successful, it will fall back to taking the lock and 242 * calling get_user_pages(). 243 * 244 * Returns number of pages pinned. This may be fewer than the number 245 * requested. If nr_pages is 0 or negative, returns 0. If no pages 246 * were pinned, returns -errno. 247 */ 248int get_user_pages_fast(unsigned long start, int nr_pages, int write, 249 struct page **pages) 250{ 251 struct mm_struct *mm = current->mm; 252 unsigned long addr, len, end; 253 unsigned long next; 254 pgd_t *pgdp; 255 int ret, nr = 0; 256 257 start &= PAGE_MASK; 258 addr = start; 259 len = (unsigned long) nr_pages << PAGE_SHIFT; 260 261 end = start + len; 262 if (end < start || cpu_has_dc_aliases) 263 goto slow_irqon; 264 265 /* XXX: batch / limit 'nr' */ 266 local_irq_disable(); 267 pgdp = pgd_offset(mm, addr); 268 do { 269 pgd_t pgd = *pgdp; 270 271 next = pgd_addr_end(addr, end); 272 if (pgd_none(pgd)) 273 goto slow; 274 if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) 275 goto slow; 276 } while (pgdp++, addr = next, addr != end); 277 local_irq_enable(); 278 279 VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); 280 return nr; 281slow: 282 local_irq_enable(); 283 284slow_irqon: 285 /* Try to get the remaining pages with get_user_pages */ 286 start += nr << PAGE_SHIFT; 287 pages += nr; 288 289 ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, 290 write, 0, pages); 291 292 /* Have to be a bit careful with return values */ 293 if (nr > 0) { 294 if (ret < 0) 295 ret = nr; 296 else 297 ret += nr; 298 } 299 return ret; 300}