Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.32 295 lines 6.9 kB view raw
1/* 2 * Lockless get_user_pages_fast for powerpc 3 * 4 * Copyright (C) 2008 Nick Piggin 5 * Copyright (C) 2008 Novell Inc. 6 */ 7#undef DEBUG 8 9#include <linux/sched.h> 10#include <linux/mm.h> 11#include <linux/hugetlb.h> 12#include <linux/vmstat.h> 13#include <linux/pagemap.h> 14#include <linux/rwsem.h> 15#include <asm/pgtable.h> 16 17#ifdef __HAVE_ARCH_PTE_SPECIAL 18 19/* 20 * The performance critical leaf functions are made noinline otherwise gcc 21 * inlines everything into a single function which results in too much 22 * register pressure. 23 */ 24static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, 25 unsigned long end, int write, struct page **pages, int *nr) 26{ 27 unsigned long mask, result; 28 pte_t *ptep; 29 30 result = _PAGE_PRESENT|_PAGE_USER; 31 if (write) 32 result |= _PAGE_RW; 33 mask = result | _PAGE_SPECIAL; 34 35 ptep = pte_offset_kernel(&pmd, addr); 36 do { 37 pte_t pte = *ptep; 38 struct page *page; 39 40 if ((pte_val(pte) & mask) != result) 41 return 0; 42 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 43 page = pte_page(pte); 44 if (!page_cache_get_speculative(page)) 45 return 0; 46 if (unlikely(pte_val(pte) != pte_val(*ptep))) { 47 put_page(page); 48 return 0; 49 } 50 pages[*nr] = page; 51 (*nr)++; 52 53 } while (ptep++, addr += PAGE_SIZE, addr != end); 54 55 return 1; 56} 57 58#ifdef CONFIG_HUGETLB_PAGE 59static noinline int gup_huge_pte(pte_t *ptep, struct hstate *hstate, 60 unsigned long *addr, unsigned long end, 61 int write, struct page **pages, int *nr) 62{ 63 unsigned long mask; 64 unsigned long pte_end; 65 struct page *head, *page; 66 pte_t pte; 67 int refs; 68 69 pte_end = (*addr + huge_page_size(hstate)) & huge_page_mask(hstate); 70 if (pte_end < end) 71 end = pte_end; 72 73 pte = *ptep; 74 mask = _PAGE_PRESENT|_PAGE_USER; 75 if (write) 76 mask |= _PAGE_RW; 77 if ((pte_val(pte) & mask) != mask) 78 return 0; 79 /* hugepages are never "special" */ 80 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 81 82 refs = 0; 83 head = pte_page(pte); 84 page = head + ((*addr & ~huge_page_mask(hstate)) >> PAGE_SHIFT); 85 do { 86 VM_BUG_ON(compound_head(page) != head); 87 pages[*nr] = page; 88 (*nr)++; 89 page++; 90 refs++; 91 } while (*addr += PAGE_SIZE, *addr != end); 92 93 if (!page_cache_add_speculative(head, refs)) { 94 *nr -= refs; 95 return 0; 96 } 97 if (unlikely(pte_val(pte) != pte_val(*ptep))) { 98 /* Could be optimized better */ 99 while (*nr) { 100 put_page(page); 101 (*nr)--; 102 } 103 } 104 105 return 1; 106} 107#endif /* CONFIG_HUGETLB_PAGE */ 108 109static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, 110 int write, struct page **pages, int *nr) 111{ 112 unsigned long next; 113 pmd_t *pmdp; 114 115 pmdp = pmd_offset(&pud, addr); 116 do { 117 pmd_t pmd = *pmdp; 118 119 next = pmd_addr_end(addr, end); 120 if (pmd_none(pmd)) 121 return 0; 122 if (!gup_pte_range(pmd, addr, next, write, pages, nr)) 123 return 0; 124 } while (pmdp++, addr = next, addr != end); 125 126 return 1; 127} 128 129static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, 130 int write, struct page **pages, int *nr) 131{ 132 unsigned long next; 133 pud_t *pudp; 134 135 pudp = pud_offset(&pgd, addr); 136 do { 137 pud_t pud = *pudp; 138 139 next = pud_addr_end(addr, end); 140 if (pud_none(pud)) 141 return 0; 142 if (!gup_pmd_range(pud, addr, next, write, pages, nr)) 143 return 0; 144 } while (pudp++, addr = next, addr != end); 145 146 return 1; 147} 148 149int get_user_pages_fast(unsigned long start, int nr_pages, int write, 150 struct page **pages) 151{ 152 struct mm_struct *mm = current->mm; 153 unsigned long addr, len, end; 154 unsigned long next; 155 pgd_t *pgdp; 156 int nr = 0; 157#ifdef CONFIG_PPC64 158 unsigned int shift; 159 int psize; 160#endif 161 162 pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); 163 164 start &= PAGE_MASK; 165 addr = start; 166 len = (unsigned long) nr_pages << PAGE_SHIFT; 167 end = start + len; 168 169 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, 170 start, len))) 171 goto slow_irqon; 172 173 pr_devel(" aligned: %lx .. %lx\n", start, end); 174 175#ifdef CONFIG_HUGETLB_PAGE 176 /* We bail out on slice boundary crossing when hugetlb is 177 * enabled in order to not have to deal with two different 178 * page table formats 179 */ 180 if (addr < SLICE_LOW_TOP) { 181 if (end > SLICE_LOW_TOP) 182 goto slow_irqon; 183 184 if (unlikely(GET_LOW_SLICE_INDEX(addr) != 185 GET_LOW_SLICE_INDEX(end - 1))) 186 goto slow_irqon; 187 } else { 188 if (unlikely(GET_HIGH_SLICE_INDEX(addr) != 189 GET_HIGH_SLICE_INDEX(end - 1))) 190 goto slow_irqon; 191 } 192#endif /* CONFIG_HUGETLB_PAGE */ 193 194 /* 195 * XXX: batch / limit 'nr', to avoid large irq off latency 196 * needs some instrumenting to determine the common sizes used by 197 * important workloads (eg. DB2), and whether limiting the batch size 198 * will decrease performance. 199 * 200 * It seems like we're in the clear for the moment. Direct-IO is 201 * the main guy that batches up lots of get_user_pages, and even 202 * they are limited to 64-at-a-time which is not so many. 203 */ 204 /* 205 * This doesn't prevent pagetable teardown, but does prevent 206 * the pagetables from being freed on powerpc. 207 * 208 * So long as we atomically load page table pointers versus teardown, 209 * we can follow the address down to the the page and take a ref on it. 210 */ 211 local_irq_disable(); 212 213#ifdef CONFIG_PPC64 214 /* Those bits are related to hugetlbfs implementation and only exist 215 * on 64-bit for now 216 */ 217 psize = get_slice_psize(mm, addr); 218 shift = mmu_psize_defs[psize].shift; 219#endif /* CONFIG_PPC64 */ 220 221#ifdef CONFIG_HUGETLB_PAGE 222 if (unlikely(mmu_huge_psizes[psize])) { 223 pte_t *ptep; 224 unsigned long a = addr; 225 unsigned long sz = ((1UL) << shift); 226 struct hstate *hstate = size_to_hstate(sz); 227 228 BUG_ON(!hstate); 229 /* 230 * XXX: could be optimized to avoid hstate 231 * lookup entirely (just use shift) 232 */ 233 234 do { 235 VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, a)].shift); 236 ptep = huge_pte_offset(mm, a); 237 pr_devel(" %016lx: huge ptep %p\n", a, ptep); 238 if (!ptep || !gup_huge_pte(ptep, hstate, &a, end, write, pages, 239 &nr)) 240 goto slow; 241 } while (a != end); 242 } else 243#endif /* CONFIG_HUGETLB_PAGE */ 244 { 245 pgdp = pgd_offset(mm, addr); 246 do { 247 pgd_t pgd = *pgdp; 248 249#ifdef CONFIG_PPC64 250 VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift); 251#endif 252 pr_devel(" %016lx: normal pgd %p\n", addr, 253 (void *)pgd_val(pgd)); 254 next = pgd_addr_end(addr, end); 255 if (pgd_none(pgd)) 256 goto slow; 257 if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) 258 goto slow; 259 } while (pgdp++, addr = next, addr != end); 260 } 261 local_irq_enable(); 262 263 VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); 264 return nr; 265 266 { 267 int ret; 268 269slow: 270 local_irq_enable(); 271slow_irqon: 272 pr_devel(" slow path ! nr = %d\n", nr); 273 274 /* Try to get the remaining pages with get_user_pages */ 275 start += nr << PAGE_SHIFT; 276 pages += nr; 277 278 down_read(&mm->mmap_sem); 279 ret = get_user_pages(current, mm, start, 280 (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); 281 up_read(&mm->mmap_sem); 282 283 /* Have to be a bit careful with return values */ 284 if (nr > 0) { 285 if (ret < 0) 286 ret = nr; 287 else 288 ret += nr; 289 } 290 291 return ret; 292 } 293} 294 295#endif /* __HAVE_ARCH_PTE_SPECIAL */