Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.17 246 lines 6.3 kB view raw
1/* 2 * Lockless get_user_pages_fast for s390 3 * 4 * Copyright IBM Corp. 2010 5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 6 */ 7#include <linux/sched.h> 8#include <linux/mm.h> 9#include <linux/hugetlb.h> 10#include <linux/vmstat.h> 11#include <linux/pagemap.h> 12#include <linux/rwsem.h> 13#include <asm/pgtable.h> 14 15/* 16 * The performance critical leaf functions are made noinline otherwise gcc 17 * inlines everything into a single function which results in too much 18 * register pressure. 19 */ 20static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, 21 unsigned long end, int write, struct page **pages, int *nr) 22{ 23 unsigned long mask; 24 pte_t *ptep, pte; 25 struct page *page; 26 27 mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL; 28 29 ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); 30 do { 31 pte = *ptep; 32 barrier(); 33 if ((pte_val(pte) & mask) != 0) 34 return 0; 35 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 36 page = pte_page(pte); 37 if (!page_cache_get_speculative(page)) 38 return 0; 39 if (unlikely(pte_val(pte) != pte_val(*ptep))) { 40 put_page(page); 41 return 0; 42 } 43 pages[*nr] = page; 44 (*nr)++; 45 46 } while (ptep++, addr += PAGE_SIZE, addr != end); 47 48 return 1; 49} 50 51static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, 52 unsigned long end, int write, struct page **pages, int *nr) 53{ 54 unsigned long mask, result; 55 struct page *head, *page, *tail; 56 int refs; 57 58 result = write ? 0 : _SEGMENT_ENTRY_PROTECT; 59 mask = result | _SEGMENT_ENTRY_INVALID; 60 if ((pmd_val(pmd) & mask) != result) 61 return 0; 62 VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); 63 64 refs = 0; 65 head = pmd_page(pmd); 66 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); 67 tail = page; 68 do { 69 VM_BUG_ON(compound_head(page) != head); 70 pages[*nr] = page; 71 (*nr)++; 72 page++; 73 refs++; 74 } while (addr += PAGE_SIZE, addr != end); 75 76 if (!page_cache_add_speculative(head, refs)) { 77 *nr -= refs; 78 return 0; 79 } 80 81 if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { 82 *nr -= refs; 83 while (refs--) 84 put_page(head); 85 return 0; 86 } 87 88 /* 89 * Any tail page need their mapcount reference taken before we 90 * return. 91 */ 92 while (refs--) { 93 if (PageTail(tail)) 94 get_huge_page_tail(tail); 95 tail++; 96 } 97 98 return 1; 99} 100 101 102static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, 103 unsigned long end, int write, struct page **pages, int *nr) 104{ 105 unsigned long next; 106 pmd_t *pmdp, pmd; 107 108 pmdp = (pmd_t *) pudp; 109#ifdef CONFIG_64BIT 110 if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 111 pmdp = (pmd_t *) pud_deref(pud); 112 pmdp += pmd_index(addr); 113#endif 114 do { 115 pmd = *pmdp; 116 barrier(); 117 next = pmd_addr_end(addr, end); 118 /* 119 * The pmd_trans_splitting() check below explains why 120 * pmdp_splitting_flush() has to serialize with 121 * smp_call_function() against our disabled IRQs, to stop 122 * this gup-fast code from running while we set the 123 * splitting bit in the pmd. Returning zero will take 124 * the slow path that will call wait_split_huge_page() 125 * if the pmd is still in splitting state. 126 */ 127 if (pmd_none(pmd) || pmd_trans_splitting(pmd)) 128 return 0; 129 if (unlikely(pmd_large(pmd))) { 130 if (!gup_huge_pmd(pmdp, pmd, addr, next, 131 write, pages, nr)) 132 return 0; 133 } else if (!gup_pte_range(pmdp, pmd, addr, next, 134 write, pages, nr)) 135 return 0; 136 } while (pmdp++, addr = next, addr != end); 137 138 return 1; 139} 140 141static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, 142 unsigned long end, int write, struct page **pages, int *nr) 143{ 144 unsigned long next; 145 pud_t *pudp, pud; 146 147 pudp = (pud_t *) pgdp; 148#ifdef CONFIG_64BIT 149 if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) 150 pudp = (pud_t *) pgd_deref(pgd); 151 pudp += pud_index(addr); 152#endif 153 do { 154 pud = *pudp; 155 barrier(); 156 next = pud_addr_end(addr, end); 157 if (pud_none(pud)) 158 return 0; 159 if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr)) 160 return 0; 161 } while (pudp++, addr = next, addr != end); 162 163 return 1; 164} 165 166/* 167 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall 168 * back to the regular GUP. 169 */ 170int __get_user_pages_fast(unsigned long start, int nr_pages, int write, 171 struct page **pages) 172{ 173 struct mm_struct *mm = current->mm; 174 unsigned long addr, len, end; 175 unsigned long next, flags; 176 pgd_t *pgdp, pgd; 177 int nr = 0; 178 179 start &= PAGE_MASK; 180 addr = start; 181 len = (unsigned long) nr_pages << PAGE_SHIFT; 182 end = start + len; 183 if ((end <= start) || (end > TASK_SIZE)) 184 return 0; 185 /* 186 * local_irq_save() doesn't prevent pagetable teardown, but does 187 * prevent the pagetables from being freed on s390. 188 * 189 * So long as we atomically load page table pointers versus teardown, 190 * we can follow the address down to the the page and take a ref on it. 191 */ 192 local_irq_save(flags); 193 pgdp = pgd_offset(mm, addr); 194 do { 195 pgd = *pgdp; 196 barrier(); 197 next = pgd_addr_end(addr, end); 198 if (pgd_none(pgd)) 199 break; 200 if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) 201 break; 202 } while (pgdp++, addr = next, addr != end); 203 local_irq_restore(flags); 204 205 return nr; 206} 207 208/** 209 * get_user_pages_fast() - pin user pages in memory 210 * @start: starting user address 211 * @nr_pages: number of pages from start to pin 212 * @write: whether pages will be written to 213 * @pages: array that receives pointers to the pages pinned. 214 * Should be at least nr_pages long. 215 * 216 * Attempt to pin user pages in memory without taking mm->mmap_sem. 217 * If not successful, it will fall back to taking the lock and 218 * calling get_user_pages(). 219 * 220 * Returns number of pages pinned. This may be fewer than the number 221 * requested. If nr_pages is 0 or negative, returns 0. If no pages 222 * were pinned, returns -errno. 223 */ 224int get_user_pages_fast(unsigned long start, int nr_pages, int write, 225 struct page **pages) 226{ 227 struct mm_struct *mm = current->mm; 228 int nr, ret; 229 230 start &= PAGE_MASK; 231 nr = __get_user_pages_fast(start, nr_pages, write, pages); 232 if (nr == nr_pages) 233 return nr; 234 235 /* Try to get the remaining pages with get_user_pages */ 236 start += nr << PAGE_SHIFT; 237 pages += nr; 238 down_read(&mm->mmap_sem); 239 ret = get_user_pages(current, mm, start, 240 nr_pages - nr, write, 0, pages, NULL); 241 up_read(&mm->mmap_sem); 242 /* Have to be a bit careful with return values */ 243 if (nr > 0) 244 ret = (ret < 0) ? nr : ret + nr; 245 return ret; 246}