Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[S390] use generic RCU page-table freeing code

Replace the s390 specific rcu page-table freeing code with the
generic variant. This requires to duplicate the definition for the
struct mmu_table_batch as s390 does not use the generic tlb flush
code.

While we are at it remove the restriction that page table fragments
can not be reused after a single fragment has been freed with rcu
and split out allocation and freeing of page tables with pgstes.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

+172 -235
+1
arch/s390/Kconfig
··· 89 89 select HAVE_GET_USER_PAGES_FAST 90 90 select HAVE_ARCH_MUTEX_CPU_RELAX 91 91 select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 92 + select HAVE_RCU_TABLE_FREE if SMP 92 93 select ARCH_INLINE_SPIN_TRYLOCK 93 94 select ARCH_INLINE_SPIN_TRYLOCK_BH 94 95 select ARCH_INLINE_SPIN_LOCK
+4 -4
arch/s390/include/asm/pgalloc.h
··· 17 17 #include <linux/gfp.h> 18 18 #include <linux/mm.h> 19 19 20 - #define check_pgt_cache() do {} while (0) 21 - 22 20 unsigned long *crst_table_alloc(struct mm_struct *); 23 21 void crst_table_free(struct mm_struct *, unsigned long *); 24 - void crst_table_free_rcu(struct mm_struct *, unsigned long *); 25 22 26 23 unsigned long *page_table_alloc(struct mm_struct *); 27 24 void page_table_free(struct mm_struct *, unsigned long *); 28 - void page_table_free_rcu(struct mm_struct *, unsigned long *); 25 + #ifdef CONFIG_HAVE_RCU_TABLE_FREE 26 + void page_table_free_rcu(struct mmu_gather *, unsigned long *); 27 + void __tlb_remove_table(void *_table); 28 + #endif 29 29 30 30 static inline void clear_table(unsigned long *s, unsigned long val, size_t n) 31 31 {
+42 -52
arch/s390/include/asm/tlb.h
··· 26 26 #include <linux/swap.h> 27 27 #include <asm/processor.h> 28 28 #include <asm/pgalloc.h> 29 - #include <asm/smp.h> 30 29 #include <asm/tlbflush.h> 31 30 32 31 struct mmu_gather { 33 32 struct mm_struct *mm; 33 + #ifdef CONFIG_HAVE_RCU_TABLE_FREE 34 + struct mmu_table_batch *batch; 35 + #endif 34 36 unsigned int fullmm; 35 - unsigned int nr_ptes; 36 - unsigned int nr_pxds; 37 - unsigned int max; 38 - void **array; 39 - void *local[8]; 37 + unsigned int need_flush; 40 38 }; 41 39 42 - static inline void __tlb_alloc_page(struct mmu_gather *tlb) 43 - { 44 - unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); 40 + #ifdef CONFIG_HAVE_RCU_TABLE_FREE 41 + struct mmu_table_batch { 42 + struct rcu_head rcu; 43 + unsigned int nr; 44 + void *tables[0]; 45 + }; 45 46 46 - if (addr) { 47 - tlb->array = (void *) addr; 48 - tlb->max = PAGE_SIZE / sizeof(void *); 49 - } 50 - } 47 + #define MAX_TABLE_BATCH \ 48 + ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) 49 + 50 + extern void tlb_table_flush(struct mmu_gather *tlb); 51 + extern void tlb_remove_table(struct mmu_gather *tlb, void *table); 52 + #endif 51 53 52 54 static inline void tlb_gather_mmu(struct mmu_gather *tlb, 53 55 struct mm_struct *mm, 54 56 unsigned int full_mm_flush) 55 57 { 56 58 tlb->mm = mm; 57 - tlb->max = ARRAY_SIZE(tlb->local); 58 - tlb->array = tlb->local; 59 59 tlb->fullmm = full_mm_flush; 60 + tlb->need_flush = 0; 61 + #ifdef CONFIG_HAVE_RCU_TABLE_FREE 62 + tlb->batch = NULL; 63 + #endif 60 64 if (tlb->fullmm) 61 65 __tlb_flush_mm(mm); 62 - else 63 - __tlb_alloc_page(tlb); 64 - tlb->nr_ptes = 0; 65 - tlb->nr_pxds = tlb->max; 66 66 } 67 67 68 68 static inline void tlb_flush_mmu(struct mmu_gather *tlb) 69 69 { 70 - if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < tlb->max)) 71 - __tlb_flush_mm(tlb->mm); 72 - while (tlb->nr_ptes > 0) 73 - page_table_free_rcu(tlb->mm, tlb->array[--tlb->nr_ptes]); 74 - while (tlb->nr_pxds < tlb->max) 75 - crst_table_free_rcu(tlb->mm, tlb->array[tlb->nr_pxds++]); 70 + if (!tlb->need_flush) 71 + return; 72 + tlb->need_flush = 0; 73 + __tlb_flush_mm(tlb->mm); 74 + #ifdef CONFIG_HAVE_RCU_TABLE_FREE 75 + tlb_table_flush(tlb); 76 + #endif 76 77 } 77 78 78 79 static inline void tlb_finish_mmu(struct mmu_gather *tlb, 79 80 unsigned long start, unsigned long end) 80 81 { 81 82 tlb_flush_mmu(tlb); 82 - 83 - rcu_table_freelist_finish(); 84 - 85 - /* keep the page table cache within bounds */ 86 - check_pgt_cache(); 87 - 88 - if (tlb->array != tlb->local) 89 - free_pages((unsigned long) tlb->array, 0); 90 83 } 91 84 92 85 /* ··· 105 112 static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, 106 113 unsigned long address) 107 114 { 108 - if (!tlb->fullmm) { 109 - tlb->array[tlb->nr_ptes++] = pte; 110 - if (tlb->nr_ptes >= tlb->nr_pxds) 111 - tlb_flush_mmu(tlb); 112 - } else 113 - page_table_free(tlb->mm, (unsigned long *) pte); 115 + #ifdef CONFIG_HAVE_RCU_TABLE_FREE 116 + if (!tlb->fullmm) 117 + return page_table_free_rcu(tlb, (unsigned long *) pte); 118 + #endif 119 + page_table_free(tlb->mm, (unsigned long *) pte); 114 120 } 115 121 116 122 /* ··· 125 133 #ifdef __s390x__ 126 134 if (tlb->mm->context.asce_limit <= (1UL << 31)) 127 135 return; 128 - if (!tlb->fullmm) { 129 - tlb->array[--tlb->nr_pxds] = pmd; 130 - if (tlb->nr_ptes >= tlb->nr_pxds) 131 - tlb_flush_mmu(tlb); 132 - } else 133 - crst_table_free(tlb->mm, (unsigned long *) pmd); 136 + #ifdef CONFIG_HAVE_RCU_TABLE_FREE 137 + if (!tlb->fullmm) 138 + return tlb_remove_table(tlb, pmd); 139 + #endif 140 + crst_table_free(tlb->mm, (unsigned long *) pmd); 134 141 #endif 135 142 } 136 143 ··· 146 155 #ifdef __s390x__ 147 156 if (tlb->mm->context.asce_limit <= (1UL << 42)) 148 157 return; 149 - if (!tlb->fullmm) { 150 - tlb->array[--tlb->nr_pxds] = pud; 151 - if (tlb->nr_ptes >= tlb->nr_pxds) 152 - tlb_flush_mmu(tlb); 153 - } else 154 - crst_table_free(tlb->mm, (unsigned long *) pud); 158 + #ifdef CONFIG_HAVE_RCU_TABLE_FREE 159 + if (!tlb->fullmm) 160 + return tlb_remove_table(tlb, pud); 161 + #endif 162 + crst_table_free(tlb->mm, (unsigned long *) pud); 155 163 #endif 156 164 } 157 165
+125 -179
arch/s390/mm/pgtable.c
··· 24 24 #include <asm/tlbflush.h> 25 25 #include <asm/mmu_context.h> 26 26 27 - struct rcu_table_freelist { 28 - struct rcu_head rcu; 29 - struct mm_struct *mm; 30 - unsigned int pgt_index; 31 - unsigned int crst_index; 32 - unsigned long *table[0]; 33 - }; 34 - 35 - #define RCU_FREELIST_SIZE \ 36 - ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \ 37 - / sizeof(unsigned long)) 38 - 39 - static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist); 40 - 41 - static void __page_table_free(struct mm_struct *mm, unsigned long *table); 42 - 43 - static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm) 44 - { 45 - struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist); 46 - struct rcu_table_freelist *batch = *batchp; 47 - 48 - if (batch) 49 - return batch; 50 - batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC); 51 - if (batch) { 52 - batch->mm = mm; 53 - batch->pgt_index = 0; 54 - batch->crst_index = RCU_FREELIST_SIZE; 55 - *batchp = batch; 56 - } 57 - return batch; 58 - } 59 - 60 - static void rcu_table_freelist_callback(struct rcu_head *head) 61 - { 62 - struct rcu_table_freelist *batch = 63 - container_of(head, struct rcu_table_freelist, rcu); 64 - 65 - while (batch->pgt_index > 0) 66 - __page_table_free(batch->mm, batch->table[--batch->pgt_index]); 67 - while (batch->crst_index < RCU_FREELIST_SIZE) 68 - crst_table_free(batch->mm, batch->table[batch->crst_index++]); 69 - free_page((unsigned long) batch); 70 - } 71 - 72 - void rcu_table_freelist_finish(void) 73 - { 74 - struct rcu_table_freelist **batchp = &get_cpu_var(rcu_table_freelist); 75 - struct rcu_table_freelist *batch = *batchp; 76 - 77 - if (!batch) 78 - goto out; 79 - call_rcu(&batch->rcu, rcu_table_freelist_callback); 80 - *batchp = NULL; 81 - out: 82 - put_cpu_var(rcu_table_freelist); 83 - } 84 - 85 - static void smp_sync(void *arg) 86 - { 87 - } 88 - 89 27 #ifndef CONFIG_64BIT 90 28 #define ALLOC_ORDER 1 91 - #define TABLES_PER_PAGE 4 92 - #define FRAG_MASK 15UL 93 - #define SECOND_HALVES 10UL 94 - 95 - void clear_table_pgstes(unsigned long *table) 96 - { 97 - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); 98 - memset(table + 256, 0, PAGE_SIZE/4); 99 - clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); 100 - memset(table + 768, 0, PAGE_SIZE/4); 101 - } 102 - 29 + #define FRAG_MASK 0x0f 103 30 #else 104 31 #define ALLOC_ORDER 2 105 - #define TABLES_PER_PAGE 2 106 - #define FRAG_MASK 3UL 107 - #define SECOND_HALVES 2UL 108 - 109 - void clear_table_pgstes(unsigned long *table) 110 - { 111 - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); 112 - memset(table + 256, 0, PAGE_SIZE/2); 113 - } 114 - 32 + #define FRAG_MASK 0x03 115 33 #endif 116 34 117 35 unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; ··· 56 138 void crst_table_free(struct mm_struct *mm, unsigned long *table) 57 139 { 58 140 free_pages((unsigned long) table, ALLOC_ORDER); 59 - } 60 - 61 - void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) 62 - { 63 - struct rcu_table_freelist *batch; 64 - 65 - preempt_disable(); 66 - if (atomic_read(&mm->mm_users) < 2 && 67 - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { 68 - crst_table_free(mm, table); 69 - goto out; 70 - } 71 - batch = rcu_table_freelist_get(mm); 72 - if (!batch) { 73 - smp_call_function(smp_sync, NULL, 1); 74 - crst_table_free(mm, table); 75 - goto out; 76 - } 77 - batch->table[--batch->crst_index] = table; 78 - if (batch->pgt_index >= batch->crst_index) 79 - rcu_table_freelist_finish(); 80 - out: 81 - preempt_enable(); 82 141 } 83 142 84 143 #ifdef CONFIG_64BIT ··· 133 238 } 134 239 #endif 135 240 241 + static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) 242 + { 243 + unsigned int old, new; 244 + 245 + do { 246 + old = atomic_read(v); 247 + new = old ^ bits; 248 + } while (atomic_cmpxchg(v, old, new) != old); 249 + return new; 250 + } 251 + 136 252 /* 137 253 * page table entry allocation/free routines. 138 254 */ 255 + #ifdef CONFIG_PGSTE 256 + static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) 257 + { 258 + struct page *page; 259 + unsigned long *table; 260 + 261 + page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 262 + if (!page) 263 + return NULL; 264 + pgtable_page_ctor(page); 265 + atomic_set(&page->_mapcount, 3); 266 + table = (unsigned long *) page_to_phys(page); 267 + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); 268 + clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); 269 + return table; 270 + } 271 + 272 + static inline void page_table_free_pgste(unsigned long *table) 273 + { 274 + struct page *page; 275 + 276 + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 277 + pgtable_page_ctor(page); 278 + atomic_set(&page->_mapcount, -1); 279 + __free_page(page); 280 + } 281 + #endif 282 + 139 283 unsigned long *page_table_alloc(struct mm_struct *mm) 140 284 { 141 285 struct page *page; 142 286 unsigned long *table; 143 - unsigned long bits; 287 + unsigned int mask, bit; 144 288 145 - bits = (mm->context.has_pgste) ? 3UL : 1UL; 289 + #ifdef CONFIG_PGSTE 290 + if (mm_has_pgste(mm)) 291 + return page_table_alloc_pgste(mm); 292 + #endif 293 + /* Allocate fragments of a 4K page as 1K/2K page table */ 146 294 spin_lock_bh(&mm->context.list_lock); 147 - page = NULL; 295 + mask = FRAG_MASK; 148 296 if (!list_empty(&mm->context.pgtable_list)) { 149 297 page = list_first_entry(&mm->context.pgtable_list, 150 298 struct page, lru); 151 - if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) 152 - page = NULL; 299 + table = (unsigned long *) page_to_phys(page); 300 + mask = atomic_read(&page->_mapcount); 301 + mask = mask | (mask >> 4); 153 302 } 154 - if (!page) { 303 + if ((mask & FRAG_MASK) == FRAG_MASK) { 155 304 spin_unlock_bh(&mm->context.list_lock); 156 305 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 157 306 if (!page) 158 307 return NULL; 159 308 pgtable_page_ctor(page); 160 - page->flags &= ~FRAG_MASK; 309 + atomic_set(&page->_mapcount, 1); 161 310 table = (unsigned long *) page_to_phys(page); 162 - if (mm->context.has_pgste) 163 - clear_table_pgstes(table); 164 - else 165 - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 311 + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 166 312 spin_lock_bh(&mm->context.list_lock); 167 313 list_add(&page->lru, &mm->context.pgtable_list); 314 + } else { 315 + for (bit = 1; mask & bit; bit <<= 1) 316 + table += PTRS_PER_PTE; 317 + mask = atomic_xor_bits(&page->_mapcount, bit); 318 + if ((mask & FRAG_MASK) == FRAG_MASK) 319 + list_del(&page->lru); 168 320 } 169 - table = (unsigned long *) page_to_phys(page); 170 - while (page->flags & bits) { 171 - table += 256; 172 - bits <<= 1; 173 - } 174 - page->flags |= bits; 175 - if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) 176 - list_move_tail(&page->lru, &mm->context.pgtable_list); 177 321 spin_unlock_bh(&mm->context.list_lock); 178 322 return table; 179 - } 180 - 181 - static void __page_table_free(struct mm_struct *mm, unsigned long *table) 182 - { 183 - struct page *page; 184 - unsigned long bits; 185 - 186 - bits = ((unsigned long) table) & 15; 187 - table = (unsigned long *)(((unsigned long) table) ^ bits); 188 - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 189 - page->flags ^= bits; 190 - if (!(page->flags & FRAG_MASK)) { 191 - pgtable_page_dtor(page); 192 - __free_page(page); 193 - } 194 323 } 195 324 196 325 void page_table_free(struct mm_struct *mm, unsigned long *table) 197 326 { 198 327 struct page *page; 199 - unsigned long bits; 328 + unsigned int bit, mask; 200 329 201 - bits = (mm->context.has_pgste) ? 3UL : 1UL; 202 - bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 330 + #ifdef CONFIG_PGSTE 331 + if (mm_has_pgste(mm)) 332 + return page_table_free_pgste(table); 333 + #endif 334 + /* Free 1K/2K page table fragment of a 4K page */ 203 335 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 336 + bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 204 337 spin_lock_bh(&mm->context.list_lock); 205 - page->flags ^= bits; 206 - if (page->flags & FRAG_MASK) { 207 - /* Page now has some free pgtable fragments. */ 208 - if (!list_empty(&page->lru)) 209 - list_move(&page->lru, &mm->context.pgtable_list); 210 - page = NULL; 211 - } else 212 - /* All fragments of the 4K page have been freed. */ 338 + if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 213 339 list_del(&page->lru); 340 + mask = atomic_xor_bits(&page->_mapcount, bit); 341 + if (mask & FRAG_MASK) 342 + list_add(&page->lru, &mm->context.pgtable_list); 214 343 spin_unlock_bh(&mm->context.list_lock); 215 - if (page) { 344 + if (mask == 0) { 216 345 pgtable_page_dtor(page); 346 + atomic_set(&page->_mapcount, -1); 217 347 __free_page(page); 218 348 } 219 349 } 220 350 221 - void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) 222 - { 223 - struct rcu_table_freelist *batch; 224 - struct page *page; 225 - unsigned long bits; 351 + #ifdef CONFIG_HAVE_RCU_TABLE_FREE 226 352 227 - preempt_disable(); 228 - if (atomic_read(&mm->mm_users) < 2 && 229 - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { 230 - page_table_free(mm, table); 231 - goto out; 353 + static void __page_table_free_rcu(void *table, unsigned bit) 354 + { 355 + struct page *page; 356 + 357 + #ifdef CONFIG_PGSTE 358 + if (bit == FRAG_MASK) 359 + return page_table_free_pgste(table); 360 + #endif 361 + /* Free 1K/2K page table fragment of a 4K page */ 362 + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 363 + if (atomic_xor_bits(&page->_mapcount, bit) == 0) { 364 + pgtable_page_dtor(page); 365 + atomic_set(&page->_mapcount, -1); 366 + __free_page(page); 232 367 } 233 - batch = rcu_table_freelist_get(mm); 234 - if (!batch) { 235 - smp_call_function(smp_sync, NULL, 1); 236 - page_table_free(mm, table); 237 - goto out; 368 + } 369 + 370 + void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) 371 + { 372 + struct mm_struct *mm; 373 + struct page *page; 374 + unsigned int bit, mask; 375 + 376 + mm = tlb->mm; 377 + #ifdef CONFIG_PGSTE 378 + if (mm_has_pgste(mm)) { 379 + table = (unsigned long *) (__pa(table) | FRAG_MASK); 380 + tlb_remove_table(tlb, table); 381 + return; 238 382 } 239 - bits = (mm->context.has_pgste) ? 3UL : 1UL; 240 - bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 383 + #endif 384 + bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); 241 385 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 242 386 spin_lock_bh(&mm->context.list_lock); 243 - /* Delayed freeing with rcu prevents reuse of pgtable fragments */ 244 - list_del_init(&page->lru); 387 + if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 388 + list_del(&page->lru); 389 + mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); 390 + if (mask & FRAG_MASK) 391 + list_add_tail(&page->lru, &mm->context.pgtable_list); 245 392 spin_unlock_bh(&mm->context.list_lock); 246 - table = (unsigned long *)(((unsigned long) table) | bits); 247 - batch->table[batch->pgt_index++] = table; 248 - if (batch->pgt_index >= batch->crst_index) 249 - rcu_table_freelist_finish(); 250 - out: 251 - preempt_enable(); 393 + table = (unsigned long *) (__pa(table) | (bit << 4)); 394 + tlb_remove_table(tlb, table); 252 395 } 396 + 397 + void __tlb_remove_table(void *_table) 398 + { 399 + void *table = (void *)((unsigned long) _table & PAGE_MASK); 400 + unsigned type = (unsigned long) _table & ~PAGE_MASK; 401 + 402 + if (type) 403 + __page_table_free_rcu(table, type); 404 + else 405 + free_pages((unsigned long) table, ALLOC_ORDER); 406 + } 407 + 408 + #endif 253 409 254 410 /* 255 411 * switch on pgstes for its userspace process (for kvm) ··· 315 369 return -EINVAL; 316 370 317 371 /* Do we have pgstes? if yes, we are done */ 318 - if (tsk->mm->context.has_pgste) 372 + if (mm_has_pgste(tsk->mm)) 319 373 return 0; 320 374 321 375 /* lets check if we are allowed to replace the mm */