KVM: Fix memory slot management functions for guest smp

The memory slot management functions were oriented against vcpu 0, where
they should be kvm-wide. This causes hangs starting X on guest smp.

Fix by making the functions (and resultant tail in the mmu) non-vcpu-specific.
Unfortunately this reduces the efficiency of the mmu object cache a bit. We
may have to revisit this later.

Signed-off-by: Avi Kivity <avi@qumranet.com>

+52 -123
+2 -2
drivers/kvm/kvm.h
··· 535 int kvm_mmu_setup(struct kvm_vcpu *vcpu); 536 537 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 538 - void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot); 539 - void kvm_mmu_zap_all(struct kvm_vcpu *vcpu); 540 541 hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); 542 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
··· 535 int kvm_mmu_setup(struct kvm_vcpu *vcpu); 536 537 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 538 + void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 539 + void kvm_mmu_zap_all(struct kvm *kvm); 540 541 hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); 542 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
+9 -59
drivers/kvm/kvm_main.c
··· 238 kvm_arch_ops->vcpu_load(vcpu); 239 } 240 241 - /* 242 - * Switches to specified vcpu, until a matching vcpu_put(). Will return NULL 243 - * if the slot is not populated. 244 - */ 245 - static struct kvm_vcpu *vcpu_load_slot(struct kvm *kvm, int slot) 246 - { 247 - struct kvm_vcpu *vcpu = &kvm->vcpus[slot]; 248 - 249 - mutex_lock(&vcpu->mutex); 250 - if (!vcpu->vmcs) { 251 - mutex_unlock(&vcpu->mutex); 252 - return NULL; 253 - } 254 - kvm_arch_ops->vcpu_load(vcpu); 255 - return vcpu; 256 - } 257 - 258 static void vcpu_put(struct kvm_vcpu *vcpu) 259 { 260 kvm_arch_ops->vcpu_put(vcpu); ··· 646 } 647 EXPORT_SYMBOL_GPL(fx_init); 648 649 - static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot) 650 - { 651 - spin_lock(&vcpu->kvm->lock); 652 - kvm_mmu_slot_remove_write_access(vcpu, slot); 653 - spin_unlock(&vcpu->kvm->lock); 654 - } 655 - 656 /* 657 * Allocate some memory and give it an address in the guest physical address 658 * space. ··· 768 *memslot = new; 769 ++kvm->memory_config_version; 770 771 spin_unlock(&kvm->lock); 772 - 773 - for (i = 0; i < KVM_MAX_VCPUS; ++i) { 774 - struct kvm_vcpu *vcpu; 775 - 776 - vcpu = vcpu_load_slot(kvm, i); 777 - if (!vcpu) 778 - continue; 779 - if (new.flags & KVM_MEM_LOG_DIRTY_PAGES) 780 - do_remove_write_access(vcpu, mem->slot); 781 - kvm_mmu_reset_context(vcpu); 782 - vcpu_put(vcpu); 783 - } 784 785 kvm_free_physmem_slot(&old, &new); 786 return 0; ··· 793 struct kvm_memory_slot *memslot; 794 int r, i; 795 int n; 796 - int cleared; 797 unsigned long any = 0; 798 799 spin_lock(&kvm->lock); ··· 821 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 822 goto out; 823 824 - if (any) { 825 - cleared = 0; 826 - for (i = 0; i < KVM_MAX_VCPUS; ++i) { 827 - struct kvm_vcpu *vcpu; 828 - 829 - vcpu = vcpu_load_slot(kvm, i); 830 - if (!vcpu) 831 - continue; 832 - if (!cleared) { 833 - do_remove_write_access(vcpu, log->slot); 834 - memset(memslot->dirty_bitmap, 0, n); 835 - cleared = 1; 836 - } 837 - kvm_arch_ops->tlb_flush(vcpu); 838 - vcpu_put(vcpu); 839 - } 840 - } 841 842 r = 0; 843 ··· 874 break; 875 kvm->naliases = n; 876 877 - spin_unlock(&kvm->lock); 878 879 - vcpu_load(&kvm->vcpus[0]); 880 - spin_lock(&kvm->lock); 881 - kvm_mmu_zap_all(&kvm->vcpus[0]); 882 spin_unlock(&kvm->lock); 883 - vcpu_put(&kvm->vcpus[0]); 884 885 return 0; 886
··· 238 kvm_arch_ops->vcpu_load(vcpu); 239 } 240 241 static void vcpu_put(struct kvm_vcpu *vcpu) 242 { 243 kvm_arch_ops->vcpu_put(vcpu); ··· 663 } 664 EXPORT_SYMBOL_GPL(fx_init); 665 666 /* 667 * Allocate some memory and give it an address in the guest physical address 668 * space. ··· 792 *memslot = new; 793 ++kvm->memory_config_version; 794 795 + kvm_mmu_slot_remove_write_access(kvm, mem->slot); 796 + kvm_flush_remote_tlbs(kvm); 797 + 798 spin_unlock(&kvm->lock); 799 800 kvm_free_physmem_slot(&old, &new); 801 return 0; ··· 826 struct kvm_memory_slot *memslot; 827 int r, i; 828 int n; 829 unsigned long any = 0; 830 831 spin_lock(&kvm->lock); ··· 855 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 856 goto out; 857 858 + spin_lock(&kvm->lock); 859 + kvm_mmu_slot_remove_write_access(kvm, log->slot); 860 + kvm_flush_remote_tlbs(kvm); 861 + memset(memslot->dirty_bitmap, 0, n); 862 + spin_unlock(&kvm->lock); 863 864 r = 0; 865 ··· 920 break; 921 kvm->naliases = n; 922 923 + kvm_mmu_zap_all(kvm); 924 925 spin_unlock(&kvm->lock); 926 927 return 0; 928
+41 -62
drivers/kvm/mmu.c
··· 281 return p; 282 } 283 284 - static void mmu_memory_cache_free(struct kvm_mmu_memory_cache *mc, void *obj) 285 - { 286 - if (mc->nobjs < KVM_NR_MEM_OBJS) 287 - mc->objects[mc->nobjs++] = obj; 288 - else 289 - kfree(obj); 290 - } 291 - 292 static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu) 293 { 294 return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache, 295 sizeof(struct kvm_pte_chain)); 296 } 297 298 - static void mmu_free_pte_chain(struct kvm_vcpu *vcpu, 299 - struct kvm_pte_chain *pc) 300 { 301 - mmu_memory_cache_free(&vcpu->mmu_pte_chain_cache, pc); 302 } 303 304 static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) ··· 298 sizeof(struct kvm_rmap_desc)); 299 } 300 301 - static void mmu_free_rmap_desc(struct kvm_vcpu *vcpu, 302 - struct kvm_rmap_desc *rd) 303 { 304 - mmu_memory_cache_free(&vcpu->mmu_rmap_desc_cache, rd); 305 } 306 307 /* ··· 345 } 346 } 347 348 - static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu, 349 - struct page *page, 350 struct kvm_rmap_desc *desc, 351 int i, 352 struct kvm_rmap_desc *prev_desc) ··· 365 prev_desc->more = desc->more; 366 else 367 set_page_private(page,(unsigned long)desc->more | 1); 368 - mmu_free_rmap_desc(vcpu, desc); 369 } 370 371 - static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte) 372 { 373 struct page *page; 374 struct kvm_rmap_desc *desc; ··· 396 while (desc) { 397 for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) 398 if (desc->shadow_ptes[i] == spte) { 399 - rmap_desc_remove_entry(vcpu, page, 400 desc, i, 401 prev_desc); 402 return; ··· 431 BUG_ON(!(*spte & PT_PRESENT_MASK)); 432 BUG_ON(!(*spte & PT_WRITABLE_MASK)); 433 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 434 - rmap_remove(vcpu, spte); 435 set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); 436 kvm_flush_remote_tlbs(vcpu->kvm); 437 } ··· 453 } 454 #endif 455 456 - static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, 457 struct kvm_mmu_page *page_head) 458 { 459 ASSERT(is_empty_shadow_page(page_head->spt)); 460 list_del(&page_head->link); 461 - mmu_memory_cache_free(&vcpu->mmu_page_cache, page_head->spt); 462 - mmu_memory_cache_free(&vcpu->mmu_page_header_cache, page_head); 463 - ++vcpu->kvm->n_free_mmu_pages; 464 } 465 466 static unsigned kvm_page_table_hashfn(gfn_t gfn) ··· 526 pte_chain->parent_ptes[0] = parent_pte; 527 } 528 529 - static void mmu_page_remove_parent_pte(struct kvm_vcpu *vcpu, 530 - struct kvm_mmu_page *page, 531 u64 *parent_pte) 532 { 533 struct kvm_pte_chain *pte_chain; ··· 553 pte_chain->parent_ptes[i] = NULL; 554 if (i == 0) { 555 hlist_del(&pte_chain->link); 556 - mmu_free_pte_chain(vcpu, pte_chain); 557 if (hlist_empty(&page->parent_ptes)) { 558 page->multimapped = 0; 559 page->parent_pte = NULL; ··· 631 return page; 632 } 633 634 - static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, 635 struct kvm_mmu_page *page) 636 { 637 unsigned i; ··· 643 if (page->role.level == PT_PAGE_TABLE_LEVEL) { 644 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { 645 if (pt[i] & PT_PRESENT_MASK) 646 - rmap_remove(vcpu, &pt[i]); 647 pt[i] = 0; 648 } 649 - kvm_flush_remote_tlbs(vcpu->kvm); 650 return; 651 } 652 ··· 657 if (!(ent & PT_PRESENT_MASK)) 658 continue; 659 ent &= PT64_BASE_ADDR_MASK; 660 - mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]); 661 } 662 - kvm_flush_remote_tlbs(vcpu->kvm); 663 } 664 665 - static void kvm_mmu_put_page(struct kvm_vcpu *vcpu, 666 - struct kvm_mmu_page *page, 667 u64 *parent_pte) 668 { 669 - mmu_page_remove_parent_pte(vcpu, page, parent_pte); 670 } 671 672 - static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, 673 struct kvm_mmu_page *page) 674 { 675 u64 *parent_pte; ··· 684 parent_pte = chain->parent_ptes[0]; 685 } 686 BUG_ON(!parent_pte); 687 - kvm_mmu_put_page(vcpu, page, parent_pte); 688 set_shadow_pte(parent_pte, 0); 689 } 690 - kvm_mmu_page_unlink_children(vcpu, page); 691 if (!page->root_count) { 692 hlist_del(&page->hash_link); 693 - kvm_mmu_free_page(vcpu, page); 694 } else 695 - list_move(&page->link, &vcpu->kvm->active_mmu_pages); 696 } 697 698 static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) ··· 711 if (page->gfn == gfn && !page->role.metaphysical) { 712 pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn, 713 page->role.word); 714 - kvm_mmu_zap_page(vcpu, page); 715 r = 1; 716 } 717 return r; ··· 724 while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { 725 pgprintk("%s: zap %lx %x\n", 726 __FUNCTION__, gfn, page->role.word); 727 - kvm_mmu_zap_page(vcpu, page); 728 } 729 } 730 ··· 1076 pte = *spte; 1077 if (is_present_pte(pte)) { 1078 if (page->role.level == PT_PAGE_TABLE_LEVEL) 1079 - rmap_remove(vcpu, spte); 1080 else { 1081 child = page_header(pte & PT64_BASE_ADDR_MASK); 1082 - mmu_page_remove_parent_pte(vcpu, child, spte); 1083 } 1084 } 1085 *spte = 0; ··· 1148 */ 1149 pgprintk("misaligned: gpa %llx bytes %d role %x\n", 1150 gpa, bytes, page->role.word); 1151 - kvm_mmu_zap_page(vcpu, page); 1152 continue; 1153 } 1154 page_offset = offset; ··· 1194 1195 page = container_of(vcpu->kvm->active_mmu_pages.prev, 1196 struct kvm_mmu_page, link); 1197 - kvm_mmu_zap_page(vcpu, page); 1198 } 1199 } 1200 EXPORT_SYMBOL_GPL(kvm_mmu_free_some_pages); ··· 1206 while (!list_empty(&vcpu->kvm->active_mmu_pages)) { 1207 page = container_of(vcpu->kvm->active_mmu_pages.next, 1208 struct kvm_mmu_page, link); 1209 - kvm_mmu_zap_page(vcpu, page); 1210 } 1211 free_page((unsigned long)vcpu->mmu.pae_root); 1212 } ··· 1264 mmu_free_memory_caches(vcpu); 1265 } 1266 1267 - void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot) 1268 { 1269 - struct kvm *kvm = vcpu->kvm; 1270 struct kvm_mmu_page *page; 1271 1272 list_for_each_entry(page, &kvm->active_mmu_pages, link) { ··· 1279 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) 1280 /* avoid RMW */ 1281 if (pt[i] & PT_WRITABLE_MASK) { 1282 - rmap_remove(vcpu, &pt[i]); 1283 pt[i] &= ~PT_WRITABLE_MASK; 1284 } 1285 } 1286 } 1287 1288 - void kvm_mmu_zap_all(struct kvm_vcpu *vcpu) 1289 { 1290 - destroy_kvm_mmu(vcpu); 1291 1292 - while (!list_empty(&vcpu->kvm->active_mmu_pages)) { 1293 - struct kvm_mmu_page *page; 1294 1295 - page = container_of(vcpu->kvm->active_mmu_pages.next, 1296 - struct kvm_mmu_page, link); 1297 - kvm_mmu_zap_page(vcpu, page); 1298 - } 1299 - 1300 - mmu_free_memory_caches(vcpu); 1301 - kvm_flush_remote_tlbs(vcpu->kvm); 1302 - init_kvm_mmu(vcpu); 1303 } 1304 1305 void kvm_mmu_module_exit(void)
··· 281 return p; 282 } 283 284 static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu) 285 { 286 return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache, 287 sizeof(struct kvm_pte_chain)); 288 } 289 290 + static void mmu_free_pte_chain(struct kvm_pte_chain *pc) 291 { 292 + kfree(pc); 293 } 294 295 static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) ··· 307 sizeof(struct kvm_rmap_desc)); 308 } 309 310 + static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) 311 { 312 + kfree(rd); 313 } 314 315 /* ··· 355 } 356 } 357 358 + static void rmap_desc_remove_entry(struct page *page, 359 struct kvm_rmap_desc *desc, 360 int i, 361 struct kvm_rmap_desc *prev_desc) ··· 376 prev_desc->more = desc->more; 377 else 378 set_page_private(page,(unsigned long)desc->more | 1); 379 + mmu_free_rmap_desc(desc); 380 } 381 382 + static void rmap_remove(u64 *spte) 383 { 384 struct page *page; 385 struct kvm_rmap_desc *desc; ··· 407 while (desc) { 408 for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) 409 if (desc->shadow_ptes[i] == spte) { 410 + rmap_desc_remove_entry(page, 411 desc, i, 412 prev_desc); 413 return; ··· 442 BUG_ON(!(*spte & PT_PRESENT_MASK)); 443 BUG_ON(!(*spte & PT_WRITABLE_MASK)); 444 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 445 + rmap_remove(spte); 446 set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); 447 kvm_flush_remote_tlbs(vcpu->kvm); 448 } ··· 464 } 465 #endif 466 467 + static void kvm_mmu_free_page(struct kvm *kvm, 468 struct kvm_mmu_page *page_head) 469 { 470 ASSERT(is_empty_shadow_page(page_head->spt)); 471 list_del(&page_head->link); 472 + kfree(page_head->spt); 473 + kfree(page_head); 474 + ++kvm->n_free_mmu_pages; 475 } 476 477 static unsigned kvm_page_table_hashfn(gfn_t gfn) ··· 537 pte_chain->parent_ptes[0] = parent_pte; 538 } 539 540 + static void mmu_page_remove_parent_pte(struct kvm_mmu_page *page, 541 u64 *parent_pte) 542 { 543 struct kvm_pte_chain *pte_chain; ··· 565 pte_chain->parent_ptes[i] = NULL; 566 if (i == 0) { 567 hlist_del(&pte_chain->link); 568 + mmu_free_pte_chain(pte_chain); 569 if (hlist_empty(&page->parent_ptes)) { 570 page->multimapped = 0; 571 page->parent_pte = NULL; ··· 643 return page; 644 } 645 646 + static void kvm_mmu_page_unlink_children(struct kvm *kvm, 647 struct kvm_mmu_page *page) 648 { 649 unsigned i; ··· 655 if (page->role.level == PT_PAGE_TABLE_LEVEL) { 656 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { 657 if (pt[i] & PT_PRESENT_MASK) 658 + rmap_remove(&pt[i]); 659 pt[i] = 0; 660 } 661 + kvm_flush_remote_tlbs(kvm); 662 return; 663 } 664 ··· 669 if (!(ent & PT_PRESENT_MASK)) 670 continue; 671 ent &= PT64_BASE_ADDR_MASK; 672 + mmu_page_remove_parent_pte(page_header(ent), &pt[i]); 673 } 674 + kvm_flush_remote_tlbs(kvm); 675 } 676 677 + static void kvm_mmu_put_page(struct kvm_mmu_page *page, 678 u64 *parent_pte) 679 { 680 + mmu_page_remove_parent_pte(page, parent_pte); 681 } 682 683 + static void kvm_mmu_zap_page(struct kvm *kvm, 684 struct kvm_mmu_page *page) 685 { 686 u64 *parent_pte; ··· 697 parent_pte = chain->parent_ptes[0]; 698 } 699 BUG_ON(!parent_pte); 700 + kvm_mmu_put_page(page, parent_pte); 701 set_shadow_pte(parent_pte, 0); 702 } 703 + kvm_mmu_page_unlink_children(kvm, page); 704 if (!page->root_count) { 705 hlist_del(&page->hash_link); 706 + kvm_mmu_free_page(kvm, page); 707 } else 708 + list_move(&page->link, &kvm->active_mmu_pages); 709 } 710 711 static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) ··· 724 if (page->gfn == gfn && !page->role.metaphysical) { 725 pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn, 726 page->role.word); 727 + kvm_mmu_zap_page(vcpu->kvm, page); 728 r = 1; 729 } 730 return r; ··· 737 while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { 738 pgprintk("%s: zap %lx %x\n", 739 __FUNCTION__, gfn, page->role.word); 740 + kvm_mmu_zap_page(vcpu->kvm, page); 741 } 742 } 743 ··· 1089 pte = *spte; 1090 if (is_present_pte(pte)) { 1091 if (page->role.level == PT_PAGE_TABLE_LEVEL) 1092 + rmap_remove(spte); 1093 else { 1094 child = page_header(pte & PT64_BASE_ADDR_MASK); 1095 + mmu_page_remove_parent_pte(child, spte); 1096 } 1097 } 1098 *spte = 0; ··· 1161 */ 1162 pgprintk("misaligned: gpa %llx bytes %d role %x\n", 1163 gpa, bytes, page->role.word); 1164 + kvm_mmu_zap_page(vcpu->kvm, page); 1165 continue; 1166 } 1167 page_offset = offset; ··· 1207 1208 page = container_of(vcpu->kvm->active_mmu_pages.prev, 1209 struct kvm_mmu_page, link); 1210 + kvm_mmu_zap_page(vcpu->kvm, page); 1211 } 1212 } 1213 EXPORT_SYMBOL_GPL(kvm_mmu_free_some_pages); ··· 1219 while (!list_empty(&vcpu->kvm->active_mmu_pages)) { 1220 page = container_of(vcpu->kvm->active_mmu_pages.next, 1221 struct kvm_mmu_page, link); 1222 + kvm_mmu_zap_page(vcpu->kvm, page); 1223 } 1224 free_page((unsigned long)vcpu->mmu.pae_root); 1225 } ··· 1277 mmu_free_memory_caches(vcpu); 1278 } 1279 1280 + void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) 1281 { 1282 struct kvm_mmu_page *page; 1283 1284 list_for_each_entry(page, &kvm->active_mmu_pages, link) { ··· 1293 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) 1294 /* avoid RMW */ 1295 if (pt[i] & PT_WRITABLE_MASK) { 1296 + rmap_remove(&pt[i]); 1297 pt[i] &= ~PT_WRITABLE_MASK; 1298 } 1299 } 1300 } 1301 1302 + void kvm_mmu_zap_all(struct kvm *kvm) 1303 { 1304 + struct kvm_mmu_page *page, *node; 1305 1306 + list_for_each_entry_safe(page, node, &kvm->active_mmu_pages, link) 1307 + kvm_mmu_zap_page(kvm, page); 1308 1309 + kvm_flush_remote_tlbs(kvm); 1310 } 1311 1312 void kvm_mmu_module_exit(void)