Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.4-rc4 315 lines 7.6 kB view raw
1/* 2 * Lockless get_user_pages_fast for MIPS 3 * 4 * Copyright (C) 2008 Nick Piggin 5 * Copyright (C) 2008 Novell Inc. 6 * Copyright (C) 2011 Ralf Baechle 7 */ 8#include <linux/sched.h> 9#include <linux/mm.h> 10#include <linux/vmstat.h> 11#include <linux/highmem.h> 12#include <linux/swap.h> 13#include <linux/hugetlb.h> 14 15#include <asm/pgtable.h> 16 17static inline pte_t gup_get_pte(pte_t *ptep) 18{ 19#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32) 20 pte_t pte; 21 22retry: 23 pte.pte_low = ptep->pte_low; 24 smp_rmb(); 25 pte.pte_high = ptep->pte_high; 26 smp_rmb(); 27 if (unlikely(pte.pte_low != ptep->pte_low)) 28 goto retry; 29 30 return pte; 31#else 32 return ACCESS_ONCE(*ptep); 33#endif 34} 35 36static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, 37 int write, struct page **pages, int *nr) 38{ 39 pte_t *ptep = pte_offset_map(&pmd, addr); 40 do { 41 pte_t pte = gup_get_pte(ptep); 42 struct page *page; 43 44 if (!pte_present(pte) || 45 pte_special(pte) || (write && !pte_write(pte))) { 46 pte_unmap(ptep); 47 return 0; 48 } 49 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 50 page = pte_page(pte); 51 get_page(page); 52 SetPageReferenced(page); 53 pages[*nr] = page; 54 (*nr)++; 55 56 } while (ptep++, addr += PAGE_SIZE, addr != end); 57 58 pte_unmap(ptep - 1); 59 return 1; 60} 61 62static inline void get_head_page_multiple(struct page *page, int nr) 63{ 64 VM_BUG_ON(page != compound_head(page)); 65 VM_BUG_ON(page_count(page) == 0); 66 atomic_add(nr, &page->_count); 67 SetPageReferenced(page); 68} 69 70static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, 71 int write, struct page **pages, int *nr) 72{ 73 pte_t pte = *(pte_t *)&pmd; 74 struct page *head, *page; 75 int refs; 76 77 if (write && !pte_write(pte)) 78 return 0; 79 /* hugepages are never "special" */ 80 VM_BUG_ON(pte_special(pte)); 81 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 82 83 refs = 0; 84 head = pte_page(pte); 85 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); 86 do { 87 VM_BUG_ON(compound_head(page) != head); 88 pages[*nr] = page; 89 if (PageTail(page)) 90 get_huge_page_tail(page); 91 (*nr)++; 92 page++; 93 refs++; 94 } while (addr += PAGE_SIZE, addr != end); 95 96 get_head_page_multiple(head, refs); 97 return 1; 98} 99 100static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, 101 int write, struct page **pages, int *nr) 102{ 103 unsigned long next; 104 pmd_t *pmdp; 105 106 pmdp = pmd_offset(&pud, addr); 107 do { 108 pmd_t pmd = *pmdp; 109 110 next = pmd_addr_end(addr, end); 111 /* 112 * The pmd_trans_splitting() check below explains why 113 * pmdp_splitting_flush has to flush the tlb, to stop 114 * this gup-fast code from running while we set the 115 * splitting bit in the pmd. Returning zero will take 116 * the slow path that will call wait_split_huge_page() 117 * if the pmd is still in splitting state. gup-fast 118 * can't because it has irq disabled and 119 * wait_split_huge_page() would never return as the 120 * tlb flush IPI wouldn't run. 121 */ 122 if (pmd_none(pmd) || pmd_trans_splitting(pmd)) 123 return 0; 124 if (unlikely(pmd_huge(pmd))) { 125 if (!gup_huge_pmd(pmd, addr, next, write, pages,nr)) 126 return 0; 127 } else { 128 if (!gup_pte_range(pmd, addr, next, write, pages,nr)) 129 return 0; 130 } 131 } while (pmdp++, addr = next, addr != end); 132 133 return 1; 134} 135 136static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, 137 int write, struct page **pages, int *nr) 138{ 139 pte_t pte = *(pte_t *)&pud; 140 struct page *head, *page; 141 int refs; 142 143 if (write && !pte_write(pte)) 144 return 0; 145 /* hugepages are never "special" */ 146 VM_BUG_ON(pte_special(pte)); 147 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 148 149 refs = 0; 150 head = pte_page(pte); 151 page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); 152 do { 153 VM_BUG_ON(compound_head(page) != head); 154 pages[*nr] = page; 155 (*nr)++; 156 page++; 157 refs++; 158 } while (addr += PAGE_SIZE, addr != end); 159 160 get_head_page_multiple(head, refs); 161 return 1; 162} 163 164static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, 165 int write, struct page **pages, int *nr) 166{ 167 unsigned long next; 168 pud_t *pudp; 169 170 pudp = pud_offset(&pgd, addr); 171 do { 172 pud_t pud = *pudp; 173 174 next = pud_addr_end(addr, end); 175 if (pud_none(pud)) 176 return 0; 177 if (unlikely(pud_huge(pud))) { 178 if (!gup_huge_pud(pud, addr, next, write, pages,nr)) 179 return 0; 180 } else { 181 if (!gup_pmd_range(pud, addr, next, write, pages,nr)) 182 return 0; 183 } 184 } while (pudp++, addr = next, addr != end); 185 186 return 1; 187} 188 189/* 190 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall 191 * back to the regular GUP. 192 */ 193int __get_user_pages_fast(unsigned long start, int nr_pages, int write, 194 struct page **pages) 195{ 196 struct mm_struct *mm = current->mm; 197 unsigned long addr, len, end; 198 unsigned long next; 199 unsigned long flags; 200 pgd_t *pgdp; 201 int nr = 0; 202 203 start &= PAGE_MASK; 204 addr = start; 205 len = (unsigned long) nr_pages << PAGE_SHIFT; 206 end = start + len; 207 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, 208 (void __user *)start, len))) 209 return 0; 210 211 /* 212 * XXX: batch / limit 'nr', to avoid large irq off latency 213 * needs some instrumenting to determine the common sizes used by 214 * important workloads (eg. DB2), and whether limiting the batch 215 * size will decrease performance. 216 * 217 * It seems like we're in the clear for the moment. Direct-IO is 218 * the main guy that batches up lots of get_user_pages, and even 219 * they are limited to 64-at-a-time which is not so many. 220 */ 221 /* 222 * This doesn't prevent pagetable teardown, but does prevent 223 * the pagetables and pages from being freed. 224 * 225 * So long as we atomically load page table pointers versus teardown, 226 * we can follow the address down to the page and take a ref on it. 227 */ 228 local_irq_save(flags); 229 pgdp = pgd_offset(mm, addr); 230 do { 231 pgd_t pgd = *pgdp; 232 233 next = pgd_addr_end(addr, end); 234 if (pgd_none(pgd)) 235 break; 236 if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) 237 break; 238 } while (pgdp++, addr = next, addr != end); 239 local_irq_restore(flags); 240 241 return nr; 242} 243 244/** 245 * get_user_pages_fast() - pin user pages in memory 246 * @start: starting user address 247 * @nr_pages: number of pages from start to pin 248 * @write: whether pages will be written to 249 * @pages: array that receives pointers to the pages pinned. 250 * Should be at least nr_pages long. 251 * 252 * Attempt to pin user pages in memory without taking mm->mmap_sem. 253 * If not successful, it will fall back to taking the lock and 254 * calling get_user_pages(). 255 * 256 * Returns number of pages pinned. This may be fewer than the number 257 * requested. If nr_pages is 0 or negative, returns 0. If no pages 258 * were pinned, returns -errno. 259 */ 260int get_user_pages_fast(unsigned long start, int nr_pages, int write, 261 struct page **pages) 262{ 263 struct mm_struct *mm = current->mm; 264 unsigned long addr, len, end; 265 unsigned long next; 266 pgd_t *pgdp; 267 int ret, nr = 0; 268 269 start &= PAGE_MASK; 270 addr = start; 271 len = (unsigned long) nr_pages << PAGE_SHIFT; 272 273 end = start + len; 274 if (end < start) 275 goto slow_irqon; 276 277 /* XXX: batch / limit 'nr' */ 278 local_irq_disable(); 279 pgdp = pgd_offset(mm, addr); 280 do { 281 pgd_t pgd = *pgdp; 282 283 next = pgd_addr_end(addr, end); 284 if (pgd_none(pgd)) 285 goto slow; 286 if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) 287 goto slow; 288 } while (pgdp++, addr = next, addr != end); 289 local_irq_enable(); 290 291 VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); 292 return nr; 293slow: 294 local_irq_enable(); 295 296slow_irqon: 297 /* Try to get the remaining pages with get_user_pages */ 298 start += nr << PAGE_SHIFT; 299 pages += nr; 300 301 down_read(&mm->mmap_sem); 302 ret = get_user_pages(current, mm, start, 303 (end - start) >> PAGE_SHIFT, 304 write, 0, pages, NULL); 305 up_read(&mm->mmap_sem); 306 307 /* Have to be a bit careful with return values */ 308 if (nr > 0) { 309 if (ret < 0) 310 ret = nr; 311 else 312 ret += nr; 313 } 314 return ret; 315}