Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: e500: perform hugepage check after looking up the PFN

e500 KVM tries to bypass __kvm_faultin_pfn() in order to map VM_PFNMAP
VMAs as huge pages. This is a Bad Idea because VM_PFNMAP VMAs could
become noncontiguous as a result of callsto remap_pfn_range().

Instead, use the already existing host PTE lookup to retrieve a
valid host-side mapping level after __kvm_faultin_pfn() has
returned. Then find the largest size that will satisfy the
guest's request while staying within a single host PTE.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

+69 -109
+69 -109
arch/powerpc/kvm/e500_mmu_host.c
··· 326 326 struct tlbe_ref *ref) 327 327 { 328 328 struct kvm_memory_slot *slot; 329 - unsigned long pfn = 0; /* silence GCC warning */ 329 + unsigned int psize; 330 + unsigned long pfn; 330 331 struct page *page = NULL; 331 332 unsigned long hva; 332 - int pfnmap = 0; 333 333 int tsize = BOOK3E_PAGESZ_4K; 334 334 int ret = 0; 335 335 unsigned long mmu_seq; 336 336 struct kvm *kvm = vcpu_e500->vcpu.kvm; 337 - unsigned long tsize_pages = 0; 338 337 pte_t *ptep; 339 338 unsigned int wimg = 0; 340 339 pgd_t *pgdir; ··· 355 356 slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn); 356 357 hva = gfn_to_hva_memslot(slot, gfn); 357 358 358 - if (tlbsel == 1) { 359 - struct vm_area_struct *vma; 360 - mmap_read_lock(kvm->mm); 361 - 362 - vma = find_vma(kvm->mm, hva); 363 - if (vma && hva >= vma->vm_start && 364 - (vma->vm_flags & VM_PFNMAP)) { 365 - /* 366 - * This VMA is a physically contiguous region (e.g. 367 - * /dev/mem) that bypasses normal Linux page 368 - * management. Find the overlap between the 369 - * vma and the memslot. 370 - */ 371 - 372 - unsigned long start, end; 373 - unsigned long slot_start, slot_end; 374 - 375 - pfnmap = 1; 376 - writable = vma->vm_flags & VM_WRITE; 377 - 378 - start = vma->vm_pgoff; 379 - end = start + 380 - vma_pages(vma); 381 - 382 - pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT); 383 - 384 - slot_start = pfn - (gfn - slot->base_gfn); 385 - slot_end = slot_start + slot->npages; 386 - 387 - if (start < slot_start) 388 - start = slot_start; 389 - if (end > slot_end) 390 - end = slot_end; 391 - 392 - tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> 393 - MAS1_TSIZE_SHIFT; 394 - 395 - /* 396 - * e500 doesn't implement the lowest tsize bit, 397 - * or 1K pages. 398 - */ 399 - tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); 400 - 401 - /* 402 - * Now find the largest tsize (up to what the guest 403 - * requested) that will cover gfn, stay within the 404 - * range, and for which gfn and pfn are mutually 405 - * aligned. 406 - */ 407 - 408 - for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { 409 - unsigned long gfn_start, gfn_end; 410 - tsize_pages = 1UL << (tsize - 2); 411 - 412 - gfn_start = gfn & ~(tsize_pages - 1); 413 - gfn_end = gfn_start + tsize_pages; 414 - 415 - if (gfn_start + pfn - gfn < start) 416 - continue; 417 - if (gfn_end + pfn - gfn > end) 418 - continue; 419 - if ((gfn & (tsize_pages - 1)) != 420 - (pfn & (tsize_pages - 1))) 421 - continue; 422 - 423 - gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); 424 - pfn &= ~(tsize_pages - 1); 425 - break; 426 - } 427 - } else if (vma && hva >= vma->vm_start && 428 - is_vm_hugetlb_page(vma)) { 429 - unsigned long psize = vma_kernel_pagesize(vma); 430 - 431 - tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> 432 - MAS1_TSIZE_SHIFT; 433 - 434 - /* 435 - * Take the largest page size that satisfies both host 436 - * and guest mapping 437 - */ 438 - tsize = min(__ilog2(psize) - 10, tsize); 439 - 440 - /* 441 - * e500 doesn't implement the lowest tsize bit, 442 - * or 1K pages. 443 - */ 444 - tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); 445 - } 446 - 447 - mmap_read_unlock(kvm->mm); 448 - } 449 - 450 - if (likely(!pfnmap)) { 451 - tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT); 452 - pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, &writable, &page); 453 - if (is_error_noslot_pfn(pfn)) { 454 - if (printk_ratelimit()) 455 - pr_err("%s: real page not found for gfn %lx\n", 456 - __func__, (long)gfn); 457 - return -EINVAL; 458 - } 459 - 460 - /* Align guest and physical address to page map boundaries */ 461 - pfn &= ~(tsize_pages - 1); 462 - gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); 359 + pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, &writable, &page); 360 + if (is_error_noslot_pfn(pfn)) { 361 + if (printk_ratelimit()) 362 + pr_err("%s: real page not found for gfn %lx\n", 363 + __func__, (long)gfn); 364 + return -EINVAL; 463 365 } 464 366 465 367 spin_lock(&kvm->mmu_lock); ··· 378 478 * can't run hence pfn won't change. 379 479 */ 380 480 local_irq_save(flags); 381 - ptep = find_linux_pte(pgdir, hva, NULL, NULL); 481 + ptep = find_linux_pte(pgdir, hva, NULL, &psize); 382 482 if (ptep) { 383 483 pte_t pte = READ_ONCE(*ptep); 384 484 ··· 394 494 } 395 495 } 396 496 local_irq_restore(flags); 497 + 498 + if (psize && tlbsel == 1) { 499 + unsigned long psize_pages, tsize_pages; 500 + unsigned long start, end; 501 + unsigned long slot_start, slot_end; 502 + 503 + psize_pages = 1UL << (psize - PAGE_SHIFT); 504 + start = pfn & ~(psize_pages - 1); 505 + end = start + psize_pages; 506 + 507 + slot_start = pfn - (gfn - slot->base_gfn); 508 + slot_end = slot_start + slot->npages; 509 + 510 + if (start < slot_start) 511 + start = slot_start; 512 + if (end > slot_end) 513 + end = slot_end; 514 + 515 + tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> 516 + MAS1_TSIZE_SHIFT; 517 + 518 + /* 519 + * Any page size that doesn't satisfy the host mapping 520 + * will fail the start and end tests. 521 + */ 522 + tsize = min(psize - PAGE_SHIFT + BOOK3E_PAGESZ_4K, tsize); 523 + 524 + /* 525 + * e500 doesn't implement the lowest tsize bit, 526 + * or 1K pages. 527 + */ 528 + tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); 529 + 530 + /* 531 + * Now find the largest tsize (up to what the guest 532 + * requested) that will cover gfn, stay within the 533 + * range, and for which gfn and pfn are mutually 534 + * aligned. 535 + */ 536 + 537 + for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { 538 + unsigned long gfn_start, gfn_end; 539 + tsize_pages = 1UL << (tsize - 2); 540 + 541 + gfn_start = gfn & ~(tsize_pages - 1); 542 + gfn_end = gfn_start + tsize_pages; 543 + 544 + if (gfn_start + pfn - gfn < start) 545 + continue; 546 + if (gfn_end + pfn - gfn > end) 547 + continue; 548 + if ((gfn & (tsize_pages - 1)) != 549 + (pfn & (tsize_pages - 1))) 550 + continue; 551 + 552 + gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); 553 + pfn &= ~(tsize_pages - 1); 554 + break; 555 + } 556 + } 397 557 398 558 kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg, writable); 399 559 kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,