KVM: MMU: limit rmap chain length

Otherwise the host can spend too long traversing an rmap chain, which
happens under a spinlock.

Cc: stable@kernel.org
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

authored by Marcelo Tosatti and committed by Avi Kivity 53a27b39 e9cbde8c

+28 -5
+28 -5
arch/x86/kvm/mmu.c
··· 489 * 490 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc 491 * containing more mappings. 492 */ 493 - static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) 494 { 495 struct kvm_mmu_page *sp; 496 struct kvm_rmap_desc *desc; 497 unsigned long *rmapp; 498 - int i; 499 500 if (!is_rmap_pte(*spte)) 501 - return; 502 gfn = unalias_gfn(vcpu->kvm, gfn); 503 sp = page_header(__pa(spte)); 504 sp->gfns[spte - sp->spt] = gfn; ··· 519 } else { 520 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); 521 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); 522 - while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) 523 desc = desc->more; 524 if (desc->shadow_ptes[RMAP_EXT-1]) { 525 desc->more = mmu_alloc_rmap_desc(vcpu); 526 desc = desc->more; ··· 531 ; 532 desc->shadow_ptes[i] = spte; 533 } 534 } 535 536 static void rmap_desc_remove_entry(unsigned long *rmapp, ··· 759 spte = rmap_next(kvm, rmapp, spte); 760 } 761 return young; 762 } 763 764 int kvm_age_hva(struct kvm *kvm, unsigned long hva) ··· 1761 { 1762 int was_rmapped = 0; 1763 int was_writeble = is_writeble_pte(*shadow_pte); 1764 1765 pgprintk("%s: spte %llx access %x write_fault %d" 1766 " user_fault %d gfn %lx\n", ··· 1803 1804 page_header_update_slot(vcpu->kvm, shadow_pte, gfn); 1805 if (!was_rmapped) { 1806 - rmap_add(vcpu, shadow_pte, gfn, largepage); 1807 if (!is_rmap_pte(*shadow_pte)) 1808 kvm_release_pfn_clean(pfn); 1809 } else { 1810 if (was_writeble) 1811 kvm_release_pfn_dirty(pfn);
··· 489 * 490 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc 491 * containing more mappings. 492 + * 493 + * Returns the number of rmap entries before the spte was added or zero if 494 + * the spte was not added. 495 + * 496 */ 497 + static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) 498 { 499 struct kvm_mmu_page *sp; 500 struct kvm_rmap_desc *desc; 501 unsigned long *rmapp; 502 + int i, count = 0; 503 504 if (!is_rmap_pte(*spte)) 505 + return count; 506 gfn = unalias_gfn(vcpu->kvm, gfn); 507 sp = page_header(__pa(spte)); 508 sp->gfns[spte - sp->spt] = gfn; ··· 515 } else { 516 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); 517 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); 518 + while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) { 519 desc = desc->more; 520 + count += RMAP_EXT; 521 + } 522 if (desc->shadow_ptes[RMAP_EXT-1]) { 523 desc->more = mmu_alloc_rmap_desc(vcpu); 524 desc = desc->more; ··· 525 ; 526 desc->shadow_ptes[i] = spte; 527 } 528 + return count; 529 } 530 531 static void rmap_desc_remove_entry(unsigned long *rmapp, ··· 752 spte = rmap_next(kvm, rmapp, spte); 753 } 754 return young; 755 + } 756 + 757 + #define RMAP_RECYCLE_THRESHOLD 1000 758 + 759 + static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage) 760 + { 761 + unsigned long *rmapp; 762 + 763 + gfn = unalias_gfn(vcpu->kvm, gfn); 764 + rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage); 765 + 766 + kvm_unmap_rmapp(vcpu->kvm, rmapp); 767 + kvm_flush_remote_tlbs(vcpu->kvm); 768 } 769 770 int kvm_age_hva(struct kvm *kvm, unsigned long hva) ··· 1741 { 1742 int was_rmapped = 0; 1743 int was_writeble = is_writeble_pte(*shadow_pte); 1744 + int rmap_count; 1745 1746 pgprintk("%s: spte %llx access %x write_fault %d" 1747 " user_fault %d gfn %lx\n", ··· 1782 1783 page_header_update_slot(vcpu->kvm, shadow_pte, gfn); 1784 if (!was_rmapped) { 1785 + rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage); 1786 if (!is_rmap_pte(*shadow_pte)) 1787 kvm_release_pfn_clean(pfn); 1788 + if (rmap_count > RMAP_RECYCLE_THRESHOLD) 1789 + rmap_recycle(vcpu, gfn, largepage); 1790 } else { 1791 if (was_writeble) 1792 kvm_release_pfn_dirty(pfn);