KVM: MMU: limit rmap chain length

Otherwise the host can spend too long traversing an rmap chain, which
happens under a spinlock.

Cc: stable@kernel.org
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

authored by Marcelo Tosatti and committed by Avi Kivity 53a27b39 e9cbde8c

+28 -5
+28 -5
arch/x86/kvm/mmu.c
··· 489 489 * 490 490 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc 491 491 * containing more mappings. 492 + * 493 + * Returns the number of rmap entries before the spte was added or zero if 494 + * the spte was not added. 495 + * 492 496 */ 493 - static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) 497 + static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) 494 498 { 495 499 struct kvm_mmu_page *sp; 496 500 struct kvm_rmap_desc *desc; 497 501 unsigned long *rmapp; 498 - int i; 502 + int i, count = 0; 499 503 500 504 if (!is_rmap_pte(*spte)) 501 - return; 505 + return count; 502 506 gfn = unalias_gfn(vcpu->kvm, gfn); 503 507 sp = page_header(__pa(spte)); 504 508 sp->gfns[spte - sp->spt] = gfn; ··· 519 515 } else { 520 516 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); 521 517 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); 522 - while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) 518 + while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) { 523 519 desc = desc->more; 520 + count += RMAP_EXT; 521 + } 524 522 if (desc->shadow_ptes[RMAP_EXT-1]) { 525 523 desc->more = mmu_alloc_rmap_desc(vcpu); 526 524 desc = desc->more; ··· 531 525 ; 532 526 desc->shadow_ptes[i] = spte; 533 527 } 528 + return count; 534 529 } 535 530 536 531 static void rmap_desc_remove_entry(unsigned long *rmapp, ··· 759 752 spte = rmap_next(kvm, rmapp, spte); 760 753 } 761 754 return young; 755 + } 756 + 757 + #define RMAP_RECYCLE_THRESHOLD 1000 758 + 759 + static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage) 760 + { 761 + unsigned long *rmapp; 762 + 763 + gfn = unalias_gfn(vcpu->kvm, gfn); 764 + rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage); 765 + 766 + kvm_unmap_rmapp(vcpu->kvm, rmapp); 767 + kvm_flush_remote_tlbs(vcpu->kvm); 762 768 } 763 769 764 770 int kvm_age_hva(struct kvm *kvm, unsigned long hva) ··· 1761 1741 { 1762 1742 int was_rmapped = 0; 1763 1743 int was_writeble = is_writeble_pte(*shadow_pte); 1744 + int rmap_count; 1764 1745 1765 1746 pgprintk("%s: spte %llx access %x write_fault %d" 1766 1747 " user_fault %d gfn %lx\n", ··· 1803 1782 1804 1783 page_header_update_slot(vcpu->kvm, shadow_pte, gfn); 1805 1784 if (!was_rmapped) { 1806 - rmap_add(vcpu, shadow_pte, gfn, largepage); 1785 + rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage); 1807 1786 if (!is_rmap_pte(*shadow_pte)) 1808 1787 kvm_release_pfn_clean(pfn); 1788 + if (rmap_count > RMAP_RECYCLE_THRESHOLD) 1789 + rmap_recycle(vcpu, gfn, largepage); 1809 1790 } else { 1810 1791 if (was_writeble) 1811 1792 kvm_release_pfn_dirty(pfn);