Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[S390] kvm guest address space mapping

Add code that allows KVM to control the virtual memory layout that
is seen by a guest. The guest address space uses a second page table
that shares the last level pte-tables with the process page table.
If a page is unmapped from the process page table it is automatically
unmapped from the guest page table as well.

The guest address space mapping starts out empty, KVM can map any
individual 1MB segments from the process virtual memory to any 1MB
aligned location in the guest virtual memory. If a target segment in
the process virtual memory does not exist or is unmapped while a
guest mapping exists the desired target address is stored as an
invalid segment table entry in the guest page table.
The population of the guest page table is fault driven.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

+489 -52
+1 -1
arch/s390/include/asm/lowcore.h
··· 268 268 __u64 vdso_per_cpu_data; /* 0x0358 */ 269 269 __u64 machine_flags; /* 0x0360 */ 270 270 __u64 ftrace_func; /* 0x0368 */ 271 - __u64 sie_hook; /* 0x0370 */ 271 + __u64 gmap; /* 0x0370 */ 272 272 __u64 cmf_hpp; /* 0x0378 */ 273 273 274 274 /* Interrupt response block. */
+3 -1
arch/s390/include/asm/mmu.h
··· 6 6 unsigned int flush_mm; 7 7 spinlock_t list_lock; 8 8 struct list_head pgtable_list; 9 + struct list_head gmap_list; 9 10 unsigned long asce_bits; 10 11 unsigned long asce_limit; 11 12 unsigned long vdso_base; ··· 18 17 19 18 #define INIT_MM_CONTEXT(name) \ 20 19 .context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \ 21 - .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), 20 + .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \ 21 + .context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list), 22 22 23 23 #endif
+4 -3
arch/s390/include/asm/pgalloc.h
··· 20 20 unsigned long *crst_table_alloc(struct mm_struct *); 21 21 void crst_table_free(struct mm_struct *, unsigned long *); 22 22 23 - unsigned long *page_table_alloc(struct mm_struct *); 23 + unsigned long *page_table_alloc(struct mm_struct *, unsigned long); 24 24 void page_table_free(struct mm_struct *, unsigned long *); 25 25 #ifdef CONFIG_HAVE_RCU_TABLE_FREE 26 26 void page_table_free_rcu(struct mmu_gather *, unsigned long *); ··· 115 115 { 116 116 spin_lock_init(&mm->context.list_lock); 117 117 INIT_LIST_HEAD(&mm->context.pgtable_list); 118 + INIT_LIST_HEAD(&mm->context.gmap_list); 118 119 return (pgd_t *) crst_table_alloc(mm); 119 120 } 120 121 #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) ··· 134 133 /* 135 134 * page table entry allocation/free routines. 136 135 */ 137 - #define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm)) 138 - #define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm)) 136 + #define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) 137 + #define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) 139 138 140 139 #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) 141 140 #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
+42
arch/s390/include/asm/pgtable.h
··· 654 654 #endif 655 655 } 656 656 657 + /** 658 + * struct gmap_struct - guest address space 659 + * @mm: pointer to the parent mm_struct 660 + * @table: pointer to the page directory 661 + * @crst_list: list of all crst tables used in the guest address space 662 + */ 663 + struct gmap { 664 + struct list_head list; 665 + struct mm_struct *mm; 666 + unsigned long *table; 667 + struct list_head crst_list; 668 + }; 669 + 670 + /** 671 + * struct gmap_rmap - reverse mapping for segment table entries 672 + * @next: pointer to the next gmap_rmap structure in the list 673 + * @entry: pointer to a segment table entry 674 + */ 675 + struct gmap_rmap { 676 + struct list_head list; 677 + unsigned long *entry; 678 + }; 679 + 680 + /** 681 + * struct gmap_pgtable - gmap information attached to a page table 682 + * @vmaddr: address of the 1MB segment in the process virtual memory 683 + * @mapper: list of segment table entries maping a page table 684 + */ 685 + struct gmap_pgtable { 686 + unsigned long vmaddr; 687 + struct list_head mapper; 688 + }; 689 + 690 + struct gmap *gmap_alloc(struct mm_struct *mm); 691 + void gmap_free(struct gmap *gmap); 692 + void gmap_enable(struct gmap *gmap); 693 + void gmap_disable(struct gmap *gmap); 694 + int gmap_map_segment(struct gmap *gmap, unsigned long from, 695 + unsigned long to, unsigned long length); 696 + int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); 697 + unsigned long gmap_fault(unsigned long address, struct gmap *); 698 + 657 699 /* 658 700 * Certain architectures need to do special things when PTEs 659 701 * within a page table are directly modified. Thus, the following
+1
arch/s390/include/asm/processor.h
··· 80 80 mm_segment_t mm_segment; 81 81 unsigned long prot_addr; /* address of protection-excep. */ 82 82 unsigned int trap_no; 83 + unsigned long gmap_addr; /* address of last gmap fault. */ 83 84 struct per_regs per_user; /* User specified PER registers */ 84 85 struct per_event per_event; /* Cause of the last PER trap */ 85 86 /* pfault_wait is used to block the process on a pfault event */
+1 -1
arch/s390/include/asm/tlbflush.h
··· 80 80 * on all cpus instead of doing a local flush if the mm 81 81 * only ran on the local cpu. 82 82 */ 83 - if (MACHINE_HAS_IDTE) 83 + if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list)) 84 84 __tlb_flush_idte((unsigned long) mm->pgd | 85 85 mm->context.asce_bits); 86 86 else
+1 -1
arch/s390/kernel/asm-offsets.c
··· 151 151 DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area)); 152 152 DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr)); 153 153 DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); 154 - DEFINE(__LC_SIE_HOOK, offsetof(struct _lowcore, sie_hook)); 154 + DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); 155 155 DEFINE(__LC_CMF_HPP, offsetof(struct _lowcore, cmf_hpp)); 156 156 #endif /* CONFIG_32BIT */ 157 157 return 0;
+17 -1
arch/s390/mm/fault.c
··· 303 303 flags = FAULT_FLAG_ALLOW_RETRY; 304 304 if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) 305 305 flags |= FAULT_FLAG_WRITE; 306 - retry: 307 306 down_read(&mm->mmap_sem); 308 307 308 + #ifdef CONFIG_PGSTE 309 + if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { 310 + address = gmap_fault(address, 311 + (struct gmap *) S390_lowcore.gmap); 312 + if (address == -EFAULT) { 313 + fault = VM_FAULT_BADMAP; 314 + goto out_up; 315 + } 316 + if (address == -ENOMEM) { 317 + fault = VM_FAULT_OOM; 318 + goto out_up; 319 + } 320 + } 321 + #endif 322 + 323 + retry: 309 324 fault = VM_FAULT_BADMAP; 310 325 vma = find_vma(mm, address); 311 326 if (!vma) ··· 371 356 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 372 357 * of starvation. */ 373 358 flags &= ~FAULT_FLAG_ALLOW_RETRY; 359 + down_read(&mm->mmap_sem); 374 360 goto retry; 375 361 } 376 362 }
+1 -1
arch/s390/mm/hugetlbpage.c
··· 35 35 if (MACHINE_HAS_HPAGE) 36 36 return 0; 37 37 38 - ptep = (pte_t *) pte_alloc_one(&init_mm, address); 38 + ptep = (pte_t *) pte_alloc_one(&init_mm, addr); 39 39 if (!ptep) 40 40 return -ENOMEM; 41 41
+414 -39
arch/s390/mm/pgtable.c
··· 16 16 #include <linux/module.h> 17 17 #include <linux/quicklist.h> 18 18 #include <linux/rcupdate.h> 19 + #include <linux/slab.h> 19 20 20 21 #include <asm/system.h> 21 22 #include <asm/pgtable.h> ··· 134 133 } 135 134 #endif 136 135 136 + #ifdef CONFIG_PGSTE 137 + 138 + /** 139 + * gmap_alloc - allocate a guest address space 140 + * @mm: pointer to the parent mm_struct 141 + * 142 + * Returns a guest address space structure. 143 + */ 144 + struct gmap *gmap_alloc(struct mm_struct *mm) 145 + { 146 + struct gmap *gmap; 147 + struct page *page; 148 + unsigned long *table; 149 + 150 + gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); 151 + if (!gmap) 152 + goto out; 153 + INIT_LIST_HEAD(&gmap->crst_list); 154 + gmap->mm = mm; 155 + page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 156 + if (!page) 157 + goto out_free; 158 + list_add(&page->lru, &gmap->crst_list); 159 + table = (unsigned long *) page_to_phys(page); 160 + crst_table_init(table, _REGION1_ENTRY_EMPTY); 161 + gmap->table = table; 162 + list_add(&gmap->list, &mm->context.gmap_list); 163 + return gmap; 164 + 165 + out_free: 166 + kfree(gmap); 167 + out: 168 + return NULL; 169 + } 170 + EXPORT_SYMBOL_GPL(gmap_alloc); 171 + 172 + static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) 173 + { 174 + struct gmap_pgtable *mp; 175 + struct gmap_rmap *rmap; 176 + struct page *page; 177 + 178 + if (*table & _SEGMENT_ENTRY_INV) 179 + return 0; 180 + page = pfn_to_page(*table >> PAGE_SHIFT); 181 + mp = (struct gmap_pgtable *) page->index; 182 + list_for_each_entry(rmap, &mp->mapper, list) { 183 + if (rmap->entry != table) 184 + continue; 185 + list_del(&rmap->list); 186 + kfree(rmap); 187 + break; 188 + } 189 + *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; 190 + return 1; 191 + } 192 + 193 + static void gmap_flush_tlb(struct gmap *gmap) 194 + { 195 + if (MACHINE_HAS_IDTE) 196 + __tlb_flush_idte((unsigned long) gmap->table | 197 + _ASCE_TYPE_REGION1); 198 + else 199 + __tlb_flush_global(); 200 + } 201 + 202 + /** 203 + * gmap_free - free a guest address space 204 + * @gmap: pointer to the guest address space structure 205 + */ 206 + void gmap_free(struct gmap *gmap) 207 + { 208 + struct page *page, *next; 209 + unsigned long *table; 210 + int i; 211 + 212 + 213 + /* Flush tlb. */ 214 + if (MACHINE_HAS_IDTE) 215 + __tlb_flush_idte((unsigned long) gmap->table | 216 + _ASCE_TYPE_REGION1); 217 + else 218 + __tlb_flush_global(); 219 + 220 + /* Free all segment & region tables. */ 221 + down_read(&gmap->mm->mmap_sem); 222 + list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { 223 + table = (unsigned long *) page_to_phys(page); 224 + if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) 225 + /* Remove gmap rmap structures for segment table. */ 226 + for (i = 0; i < PTRS_PER_PMD; i++, table++) 227 + gmap_unlink_segment(gmap, table); 228 + __free_pages(page, ALLOC_ORDER); 229 + } 230 + up_read(&gmap->mm->mmap_sem); 231 + list_del(&gmap->list); 232 + kfree(gmap); 233 + } 234 + EXPORT_SYMBOL_GPL(gmap_free); 235 + 236 + /** 237 + * gmap_enable - switch primary space to the guest address space 238 + * @gmap: pointer to the guest address space structure 239 + */ 240 + void gmap_enable(struct gmap *gmap) 241 + { 242 + /* Load primary space page table origin. */ 243 + S390_lowcore.user_asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | 244 + _ASCE_USER_BITS | __pa(gmap->table); 245 + asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) ); 246 + S390_lowcore.gmap = (unsigned long) gmap; 247 + } 248 + EXPORT_SYMBOL_GPL(gmap_enable); 249 + 250 + /** 251 + * gmap_disable - switch back to the standard primary address space 252 + * @gmap: pointer to the guest address space structure 253 + */ 254 + void gmap_disable(struct gmap *gmap) 255 + { 256 + /* Load primary space page table origin. */ 257 + S390_lowcore.user_asce = 258 + gmap->mm->context.asce_bits | __pa(gmap->mm->pgd); 259 + asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) ); 260 + S390_lowcore.gmap = 0UL; 261 + } 262 + EXPORT_SYMBOL_GPL(gmap_disable); 263 + 264 + static int gmap_alloc_table(struct gmap *gmap, 265 + unsigned long *table, unsigned long init) 266 + { 267 + struct page *page; 268 + unsigned long *new; 269 + 270 + page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 271 + if (!page) 272 + return -ENOMEM; 273 + new = (unsigned long *) page_to_phys(page); 274 + crst_table_init(new, init); 275 + down_read(&gmap->mm->mmap_sem); 276 + if (*table & _REGION_ENTRY_INV) { 277 + list_add(&page->lru, &gmap->crst_list); 278 + *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 279 + (*table & _REGION_ENTRY_TYPE_MASK); 280 + } else 281 + __free_pages(page, ALLOC_ORDER); 282 + up_read(&gmap->mm->mmap_sem); 283 + return 0; 284 + } 285 + 286 + /** 287 + * gmap_unmap_segment - unmap segment from the guest address space 288 + * @gmap: pointer to the guest address space structure 289 + * @addr: address in the guest address space 290 + * @len: length of the memory area to unmap 291 + * 292 + * Returns 0 if the unmap succeded, -EINVAL if not. 293 + */ 294 + int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 295 + { 296 + unsigned long *table; 297 + unsigned long off; 298 + int flush; 299 + 300 + if ((to | len) & (PMD_SIZE - 1)) 301 + return -EINVAL; 302 + if (len == 0 || to + len < to) 303 + return -EINVAL; 304 + 305 + flush = 0; 306 + down_read(&gmap->mm->mmap_sem); 307 + for (off = 0; off < len; off += PMD_SIZE) { 308 + /* Walk the guest addr space page table */ 309 + table = gmap->table + (((to + off) >> 53) & 0x7ff); 310 + if (*table & _REGION_ENTRY_INV) 311 + return 0; 312 + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 313 + table = table + (((to + off) >> 42) & 0x7ff); 314 + if (*table & _REGION_ENTRY_INV) 315 + return 0; 316 + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 317 + table = table + (((to + off) >> 31) & 0x7ff); 318 + if (*table & _REGION_ENTRY_INV) 319 + return 0; 320 + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 321 + table = table + (((to + off) >> 20) & 0x7ff); 322 + 323 + /* Clear segment table entry in guest address space. */ 324 + flush |= gmap_unlink_segment(gmap, table); 325 + *table = _SEGMENT_ENTRY_INV; 326 + } 327 + up_read(&gmap->mm->mmap_sem); 328 + if (flush) 329 + gmap_flush_tlb(gmap); 330 + return 0; 331 + } 332 + EXPORT_SYMBOL_GPL(gmap_unmap_segment); 333 + 334 + /** 335 + * gmap_mmap_segment - map a segment to the guest address space 336 + * @gmap: pointer to the guest address space structure 337 + * @from: source address in the parent address space 338 + * @to: target address in the guest address space 339 + * 340 + * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not. 341 + */ 342 + int gmap_map_segment(struct gmap *gmap, unsigned long from, 343 + unsigned long to, unsigned long len) 344 + { 345 + unsigned long *table; 346 + unsigned long off; 347 + int flush; 348 + 349 + if ((from | to | len) & (PMD_SIZE - 1)) 350 + return -EINVAL; 351 + if (len == 0 || from + len > PGDIR_SIZE || 352 + from + len < from || to + len < to) 353 + return -EINVAL; 354 + 355 + flush = 0; 356 + down_read(&gmap->mm->mmap_sem); 357 + for (off = 0; off < len; off += PMD_SIZE) { 358 + /* Walk the gmap address space page table */ 359 + table = gmap->table + (((to + off) >> 53) & 0x7ff); 360 + if ((*table & _REGION_ENTRY_INV) && 361 + gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) 362 + goto out_unmap; 363 + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 364 + table = table + (((to + off) >> 42) & 0x7ff); 365 + if ((*table & _REGION_ENTRY_INV) && 366 + gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) 367 + goto out_unmap; 368 + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 369 + table = table + (((to + off) >> 31) & 0x7ff); 370 + if ((*table & _REGION_ENTRY_INV) && 371 + gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) 372 + goto out_unmap; 373 + table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); 374 + table = table + (((to + off) >> 20) & 0x7ff); 375 + 376 + /* Store 'from' address in an invalid segment table entry. */ 377 + flush |= gmap_unlink_segment(gmap, table); 378 + *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); 379 + } 380 + up_read(&gmap->mm->mmap_sem); 381 + if (flush) 382 + gmap_flush_tlb(gmap); 383 + return 0; 384 + 385 + out_unmap: 386 + up_read(&gmap->mm->mmap_sem); 387 + gmap_unmap_segment(gmap, to, len); 388 + return -ENOMEM; 389 + } 390 + EXPORT_SYMBOL_GPL(gmap_map_segment); 391 + 392 + unsigned long gmap_fault(unsigned long address, struct gmap *gmap) 393 + { 394 + unsigned long *table, vmaddr, segment; 395 + struct mm_struct *mm; 396 + struct gmap_pgtable *mp; 397 + struct gmap_rmap *rmap; 398 + struct vm_area_struct *vma; 399 + struct page *page; 400 + pgd_t *pgd; 401 + pud_t *pud; 402 + pmd_t *pmd; 403 + 404 + current->thread.gmap_addr = address; 405 + mm = gmap->mm; 406 + /* Walk the gmap address space page table */ 407 + table = gmap->table + ((address >> 53) & 0x7ff); 408 + if (unlikely(*table & _REGION_ENTRY_INV)) 409 + return -EFAULT; 410 + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 411 + table = table + ((address >> 42) & 0x7ff); 412 + if (unlikely(*table & _REGION_ENTRY_INV)) 413 + return -EFAULT; 414 + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 415 + table = table + ((address >> 31) & 0x7ff); 416 + if (unlikely(*table & _REGION_ENTRY_INV)) 417 + return -EFAULT; 418 + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 419 + table = table + ((address >> 20) & 0x7ff); 420 + 421 + /* Convert the gmap address to an mm address. */ 422 + segment = *table; 423 + if (likely(!(segment & _SEGMENT_ENTRY_INV))) { 424 + page = pfn_to_page(segment >> PAGE_SHIFT); 425 + mp = (struct gmap_pgtable *) page->index; 426 + return mp->vmaddr | (address & ~PMD_MASK); 427 + } else if (segment & _SEGMENT_ENTRY_RO) { 428 + vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; 429 + vma = find_vma(mm, vmaddr); 430 + if (!vma || vma->vm_start > vmaddr) 431 + return -EFAULT; 432 + 433 + /* Walk the parent mm page table */ 434 + pgd = pgd_offset(mm, vmaddr); 435 + pud = pud_alloc(mm, pgd, vmaddr); 436 + if (!pud) 437 + return -ENOMEM; 438 + pmd = pmd_alloc(mm, pud, vmaddr); 439 + if (!pmd) 440 + return -ENOMEM; 441 + if (!pmd_present(*pmd) && 442 + __pte_alloc(mm, vma, pmd, vmaddr)) 443 + return -ENOMEM; 444 + /* pmd now points to a valid segment table entry. */ 445 + rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); 446 + if (!rmap) 447 + return -ENOMEM; 448 + /* Link gmap segment table entry location to page table. */ 449 + page = pmd_page(*pmd); 450 + mp = (struct gmap_pgtable *) page->index; 451 + rmap->entry = table; 452 + list_add(&rmap->list, &mp->mapper); 453 + /* Set gmap segment table entry to page table. */ 454 + *table = pmd_val(*pmd) & PAGE_MASK; 455 + return vmaddr | (address & ~PMD_MASK); 456 + } 457 + return -EFAULT; 458 + 459 + } 460 + EXPORT_SYMBOL_GPL(gmap_fault); 461 + 462 + void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) 463 + { 464 + struct gmap_rmap *rmap, *next; 465 + struct gmap_pgtable *mp; 466 + struct page *page; 467 + int flush; 468 + 469 + flush = 0; 470 + spin_lock(&mm->page_table_lock); 471 + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 472 + mp = (struct gmap_pgtable *) page->index; 473 + list_for_each_entry_safe(rmap, next, &mp->mapper, list) { 474 + *rmap->entry = 475 + _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; 476 + list_del(&rmap->list); 477 + kfree(rmap); 478 + flush = 1; 479 + } 480 + spin_unlock(&mm->page_table_lock); 481 + if (flush) 482 + __tlb_flush_global(); 483 + } 484 + 485 + static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 486 + unsigned long vmaddr) 487 + { 488 + struct page *page; 489 + unsigned long *table; 490 + struct gmap_pgtable *mp; 491 + 492 + page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 493 + if (!page) 494 + return NULL; 495 + mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); 496 + if (!mp) { 497 + __free_page(page); 498 + return NULL; 499 + } 500 + pgtable_page_ctor(page); 501 + mp->vmaddr = vmaddr & PMD_MASK; 502 + INIT_LIST_HEAD(&mp->mapper); 503 + page->index = (unsigned long) mp; 504 + atomic_set(&page->_mapcount, 3); 505 + table = (unsigned long *) page_to_phys(page); 506 + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); 507 + clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); 508 + return table; 509 + } 510 + 511 + static inline void page_table_free_pgste(unsigned long *table) 512 + { 513 + struct page *page; 514 + struct gmap_pgtable *mp; 515 + 516 + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 517 + mp = (struct gmap_pgtable *) page->index; 518 + BUG_ON(!list_empty(&mp->mapper)); 519 + pgtable_page_ctor(page); 520 + atomic_set(&page->_mapcount, -1); 521 + kfree(mp); 522 + __free_page(page); 523 + } 524 + 525 + #else /* CONFIG_PGSTE */ 526 + 527 + static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 528 + unsigned long vmaddr) 529 + { 530 + } 531 + 532 + static inline void page_table_free_pgste(unsigned long *table) 533 + { 534 + } 535 + 536 + static inline void gmap_unmap_notifier(struct mm_struct *mm, 537 + unsigned long *table) 538 + { 539 + } 540 + 541 + #endif /* CONFIG_PGSTE */ 542 + 137 543 static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) 138 544 { 139 545 unsigned int old, new; ··· 555 147 /* 556 148 * page table entry allocation/free routines. 557 149 */ 558 - #ifdef CONFIG_PGSTE 559 - static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) 560 - { 561 - struct page *page; 562 - unsigned long *table; 563 - 564 - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 565 - if (!page) 566 - return NULL; 567 - pgtable_page_ctor(page); 568 - atomic_set(&page->_mapcount, 3); 569 - table = (unsigned long *) page_to_phys(page); 570 - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); 571 - clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); 572 - return table; 573 - } 574 - 575 - static inline void page_table_free_pgste(unsigned long *table) 576 - { 577 - struct page *page; 578 - 579 - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 580 - pgtable_page_ctor(page); 581 - atomic_set(&page->_mapcount, -1); 582 - __free_page(page); 583 - } 584 - #endif 585 - 586 - unsigned long *page_table_alloc(struct mm_struct *mm) 150 + unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) 587 151 { 588 152 struct page *page; 589 153 unsigned long *table; 590 154 unsigned int mask, bit; 591 155 592 - #ifdef CONFIG_PGSTE 593 156 if (mm_has_pgste(mm)) 594 - return page_table_alloc_pgste(mm); 595 - #endif 157 + return page_table_alloc_pgste(mm, vmaddr); 596 158 /* Allocate fragments of a 4K page as 1K/2K page table */ 597 159 spin_lock_bh(&mm->context.list_lock); 598 160 mask = FRAG_MASK; ··· 600 222 struct page *page; 601 223 unsigned int bit, mask; 602 224 603 - #ifdef CONFIG_PGSTE 604 - if (mm_has_pgste(mm)) 225 + if (mm_has_pgste(mm)) { 226 + gmap_unmap_notifier(mm, table); 605 227 return page_table_free_pgste(table); 606 - #endif 228 + } 607 229 /* Free 1K/2K page table fragment of a 4K page */ 608 230 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 609 231 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); ··· 627 249 { 628 250 struct page *page; 629 251 630 - #ifdef CONFIG_PGSTE 631 252 if (bit == FRAG_MASK) 632 253 return page_table_free_pgste(table); 633 - #endif 634 254 /* Free 1K/2K page table fragment of a 4K page */ 635 255 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 636 256 if (atomic_xor_bits(&page->_mapcount, bit) == 0) { ··· 645 269 unsigned int bit, mask; 646 270 647 271 mm = tlb->mm; 648 - #ifdef CONFIG_PGSTE 649 272 if (mm_has_pgste(mm)) { 273 + gmap_unmap_notifier(mm, table); 650 274 table = (unsigned long *) (__pa(table) | FRAG_MASK); 651 275 tlb_remove_table(tlb, table); 652 276 return; 653 277 } 654 - #endif 655 278 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); 656 279 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 657 280 spin_lock_bh(&mm->context.list_lock);
+4 -4
arch/s390/mm/vmem.c
··· 61 61 return pmd; 62 62 } 63 63 64 - static pte_t __ref *vmem_pte_alloc(void) 64 + static pte_t __ref *vmem_pte_alloc(unsigned long address) 65 65 { 66 66 pte_t *pte; 67 67 68 68 if (slab_is_available()) 69 - pte = (pte_t *) page_table_alloc(&init_mm); 69 + pte = (pte_t *) page_table_alloc(&init_mm, address); 70 70 else 71 71 pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t)); 72 72 if (!pte) ··· 120 120 } 121 121 #endif 122 122 if (pmd_none(*pm_dir)) { 123 - pt_dir = vmem_pte_alloc(); 123 + pt_dir = vmem_pte_alloc(address); 124 124 if (!pt_dir) 125 125 goto out; 126 126 pmd_populate(&init_mm, pm_dir, pt_dir); ··· 205 205 206 206 pm_dir = pmd_offset(pu_dir, address); 207 207 if (pmd_none(*pm_dir)) { 208 - pt_dir = vmem_pte_alloc(); 208 + pt_dir = vmem_pte_alloc(address); 209 209 if (!pt_dir) 210 210 goto out; 211 211 pmd_populate(&init_mm, pm_dir, pt_dir);