KVM: Fix memory slot management functions for guest smp

The memory slot management functions were oriented against vcpu 0, where
they should be kvm-wide. This causes hangs starting X on guest smp.

Fix by making the functions (and resultant tail in the mmu) non-vcpu-specific.
Unfortunately this reduces the efficiency of the mmu object cache a bit. We
may have to revisit this later.

Signed-off-by: Avi Kivity <avi@qumranet.com>

+52 -123
+2 -2
drivers/kvm/kvm.h
··· 535 535 int kvm_mmu_setup(struct kvm_vcpu *vcpu); 536 536 537 537 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 538 - void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot); 539 - void kvm_mmu_zap_all(struct kvm_vcpu *vcpu); 538 + void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 539 + void kvm_mmu_zap_all(struct kvm *kvm); 540 540 541 541 hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); 542 542 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
+9 -59
drivers/kvm/kvm_main.c
··· 238 238 kvm_arch_ops->vcpu_load(vcpu); 239 239 } 240 240 241 - /* 242 - * Switches to specified vcpu, until a matching vcpu_put(). Will return NULL 243 - * if the slot is not populated. 244 - */ 245 - static struct kvm_vcpu *vcpu_load_slot(struct kvm *kvm, int slot) 246 - { 247 - struct kvm_vcpu *vcpu = &kvm->vcpus[slot]; 248 - 249 - mutex_lock(&vcpu->mutex); 250 - if (!vcpu->vmcs) { 251 - mutex_unlock(&vcpu->mutex); 252 - return NULL; 253 - } 254 - kvm_arch_ops->vcpu_load(vcpu); 255 - return vcpu; 256 - } 257 - 258 241 static void vcpu_put(struct kvm_vcpu *vcpu) 259 242 { 260 243 kvm_arch_ops->vcpu_put(vcpu); ··· 646 663 } 647 664 EXPORT_SYMBOL_GPL(fx_init); 648 665 649 - static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot) 650 - { 651 - spin_lock(&vcpu->kvm->lock); 652 - kvm_mmu_slot_remove_write_access(vcpu, slot); 653 - spin_unlock(&vcpu->kvm->lock); 654 - } 655 - 656 666 /* 657 667 * Allocate some memory and give it an address in the guest physical address 658 668 * space. ··· 768 792 *memslot = new; 769 793 ++kvm->memory_config_version; 770 794 795 + kvm_mmu_slot_remove_write_access(kvm, mem->slot); 796 + kvm_flush_remote_tlbs(kvm); 797 + 771 798 spin_unlock(&kvm->lock); 772 - 773 - for (i = 0; i < KVM_MAX_VCPUS; ++i) { 774 - struct kvm_vcpu *vcpu; 775 - 776 - vcpu = vcpu_load_slot(kvm, i); 777 - if (!vcpu) 778 - continue; 779 - if (new.flags & KVM_MEM_LOG_DIRTY_PAGES) 780 - do_remove_write_access(vcpu, mem->slot); 781 - kvm_mmu_reset_context(vcpu); 782 - vcpu_put(vcpu); 783 - } 784 799 785 800 kvm_free_physmem_slot(&old, &new); 786 801 return 0; ··· 793 826 struct kvm_memory_slot *memslot; 794 827 int r, i; 795 828 int n; 796 - int cleared; 797 829 unsigned long any = 0; 798 830 799 831 spin_lock(&kvm->lock); ··· 821 855 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 822 856 goto out; 823 857 824 - if (any) { 825 - cleared = 0; 826 - for (i = 0; i < KVM_MAX_VCPUS; ++i) { 827 - struct kvm_vcpu *vcpu; 828 - 829 - vcpu = vcpu_load_slot(kvm, i); 830 - if (!vcpu) 831 - continue; 832 - if (!cleared) { 833 - do_remove_write_access(vcpu, log->slot); 834 - memset(memslot->dirty_bitmap, 0, n); 835 - cleared = 1; 836 - } 837 - kvm_arch_ops->tlb_flush(vcpu); 838 - vcpu_put(vcpu); 839 - } 840 - } 858 + spin_lock(&kvm->lock); 859 + kvm_mmu_slot_remove_write_access(kvm, log->slot); 860 + kvm_flush_remote_tlbs(kvm); 861 + memset(memslot->dirty_bitmap, 0, n); 862 + spin_unlock(&kvm->lock); 841 863 842 864 r = 0; 843 865 ··· 874 920 break; 875 921 kvm->naliases = n; 876 922 877 - spin_unlock(&kvm->lock); 923 + kvm_mmu_zap_all(kvm); 878 924 879 - vcpu_load(&kvm->vcpus[0]); 880 - spin_lock(&kvm->lock); 881 - kvm_mmu_zap_all(&kvm->vcpus[0]); 882 925 spin_unlock(&kvm->lock); 883 - vcpu_put(&kvm->vcpus[0]); 884 926 885 927 return 0; 886 928
+41 -62
drivers/kvm/mmu.c
··· 281 281 return p; 282 282 } 283 283 284 - static void mmu_memory_cache_free(struct kvm_mmu_memory_cache *mc, void *obj) 285 - { 286 - if (mc->nobjs < KVM_NR_MEM_OBJS) 287 - mc->objects[mc->nobjs++] = obj; 288 - else 289 - kfree(obj); 290 - } 291 - 292 284 static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu) 293 285 { 294 286 return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache, 295 287 sizeof(struct kvm_pte_chain)); 296 288 } 297 289 298 - static void mmu_free_pte_chain(struct kvm_vcpu *vcpu, 299 - struct kvm_pte_chain *pc) 290 + static void mmu_free_pte_chain(struct kvm_pte_chain *pc) 300 291 { 301 - mmu_memory_cache_free(&vcpu->mmu_pte_chain_cache, pc); 292 + kfree(pc); 302 293 } 303 294 304 295 static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) ··· 298 307 sizeof(struct kvm_rmap_desc)); 299 308 } 300 309 301 - static void mmu_free_rmap_desc(struct kvm_vcpu *vcpu, 302 - struct kvm_rmap_desc *rd) 310 + static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) 303 311 { 304 - mmu_memory_cache_free(&vcpu->mmu_rmap_desc_cache, rd); 312 + kfree(rd); 305 313 } 306 314 307 315 /* ··· 345 355 } 346 356 } 347 357 348 - static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu, 349 - struct page *page, 358 + static void rmap_desc_remove_entry(struct page *page, 350 359 struct kvm_rmap_desc *desc, 351 360 int i, 352 361 struct kvm_rmap_desc *prev_desc) ··· 365 376 prev_desc->more = desc->more; 366 377 else 367 378 set_page_private(page,(unsigned long)desc->more | 1); 368 - mmu_free_rmap_desc(vcpu, desc); 379 + mmu_free_rmap_desc(desc); 369 380 } 370 381 371 - static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte) 382 + static void rmap_remove(u64 *spte) 372 383 { 373 384 struct page *page; 374 385 struct kvm_rmap_desc *desc; ··· 396 407 while (desc) { 397 408 for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) 398 409 if (desc->shadow_ptes[i] == spte) { 399 - rmap_desc_remove_entry(vcpu, page, 410 + rmap_desc_remove_entry(page, 400 411 desc, i, 401 412 prev_desc); 402 413 return; ··· 431 442 BUG_ON(!(*spte & PT_PRESENT_MASK)); 432 443 BUG_ON(!(*spte & PT_WRITABLE_MASK)); 433 444 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 434 - rmap_remove(vcpu, spte); 445 + rmap_remove(spte); 435 446 set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); 436 447 kvm_flush_remote_tlbs(vcpu->kvm); 437 448 } ··· 453 464 } 454 465 #endif 455 466 456 - static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, 467 + static void kvm_mmu_free_page(struct kvm *kvm, 457 468 struct kvm_mmu_page *page_head) 458 469 { 459 470 ASSERT(is_empty_shadow_page(page_head->spt)); 460 471 list_del(&page_head->link); 461 - mmu_memory_cache_free(&vcpu->mmu_page_cache, page_head->spt); 462 - mmu_memory_cache_free(&vcpu->mmu_page_header_cache, page_head); 463 - ++vcpu->kvm->n_free_mmu_pages; 472 + kfree(page_head->spt); 473 + kfree(page_head); 474 + ++kvm->n_free_mmu_pages; 464 475 } 465 476 466 477 static unsigned kvm_page_table_hashfn(gfn_t gfn) ··· 526 537 pte_chain->parent_ptes[0] = parent_pte; 527 538 } 528 539 529 - static void mmu_page_remove_parent_pte(struct kvm_vcpu *vcpu, 530 - struct kvm_mmu_page *page, 540 + static void mmu_page_remove_parent_pte(struct kvm_mmu_page *page, 531 541 u64 *parent_pte) 532 542 { 533 543 struct kvm_pte_chain *pte_chain; ··· 553 565 pte_chain->parent_ptes[i] = NULL; 554 566 if (i == 0) { 555 567 hlist_del(&pte_chain->link); 556 - mmu_free_pte_chain(vcpu, pte_chain); 568 + mmu_free_pte_chain(pte_chain); 557 569 if (hlist_empty(&page->parent_ptes)) { 558 570 page->multimapped = 0; 559 571 page->parent_pte = NULL; ··· 631 643 return page; 632 644 } 633 645 634 - static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, 646 + static void kvm_mmu_page_unlink_children(struct kvm *kvm, 635 647 struct kvm_mmu_page *page) 636 648 { 637 649 unsigned i; ··· 643 655 if (page->role.level == PT_PAGE_TABLE_LEVEL) { 644 656 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { 645 657 if (pt[i] & PT_PRESENT_MASK) 646 - rmap_remove(vcpu, &pt[i]); 658 + rmap_remove(&pt[i]); 647 659 pt[i] = 0; 648 660 } 649 - kvm_flush_remote_tlbs(vcpu->kvm); 661 + kvm_flush_remote_tlbs(kvm); 650 662 return; 651 663 } 652 664 ··· 657 669 if (!(ent & PT_PRESENT_MASK)) 658 670 continue; 659 671 ent &= PT64_BASE_ADDR_MASK; 660 - mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]); 672 + mmu_page_remove_parent_pte(page_header(ent), &pt[i]); 661 673 } 662 - kvm_flush_remote_tlbs(vcpu->kvm); 674 + kvm_flush_remote_tlbs(kvm); 663 675 } 664 676 665 - static void kvm_mmu_put_page(struct kvm_vcpu *vcpu, 666 - struct kvm_mmu_page *page, 677 + static void kvm_mmu_put_page(struct kvm_mmu_page *page, 667 678 u64 *parent_pte) 668 679 { 669 - mmu_page_remove_parent_pte(vcpu, page, parent_pte); 680 + mmu_page_remove_parent_pte(page, parent_pte); 670 681 } 671 682 672 - static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, 683 + static void kvm_mmu_zap_page(struct kvm *kvm, 673 684 struct kvm_mmu_page *page) 674 685 { 675 686 u64 *parent_pte; ··· 684 697 parent_pte = chain->parent_ptes[0]; 685 698 } 686 699 BUG_ON(!parent_pte); 687 - kvm_mmu_put_page(vcpu, page, parent_pte); 700 + kvm_mmu_put_page(page, parent_pte); 688 701 set_shadow_pte(parent_pte, 0); 689 702 } 690 - kvm_mmu_page_unlink_children(vcpu, page); 703 + kvm_mmu_page_unlink_children(kvm, page); 691 704 if (!page->root_count) { 692 705 hlist_del(&page->hash_link); 693 - kvm_mmu_free_page(vcpu, page); 706 + kvm_mmu_free_page(kvm, page); 694 707 } else 695 - list_move(&page->link, &vcpu->kvm->active_mmu_pages); 708 + list_move(&page->link, &kvm->active_mmu_pages); 696 709 } 697 710 698 711 static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) ··· 711 724 if (page->gfn == gfn && !page->role.metaphysical) { 712 725 pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn, 713 726 page->role.word); 714 - kvm_mmu_zap_page(vcpu, page); 727 + kvm_mmu_zap_page(vcpu->kvm, page); 715 728 r = 1; 716 729 } 717 730 return r; ··· 724 737 while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { 725 738 pgprintk("%s: zap %lx %x\n", 726 739 __FUNCTION__, gfn, page->role.word); 727 - kvm_mmu_zap_page(vcpu, page); 740 + kvm_mmu_zap_page(vcpu->kvm, page); 728 741 } 729 742 } 730 743 ··· 1076 1089 pte = *spte; 1077 1090 if (is_present_pte(pte)) { 1078 1091 if (page->role.level == PT_PAGE_TABLE_LEVEL) 1079 - rmap_remove(vcpu, spte); 1092 + rmap_remove(spte); 1080 1093 else { 1081 1094 child = page_header(pte & PT64_BASE_ADDR_MASK); 1082 - mmu_page_remove_parent_pte(vcpu, child, spte); 1095 + mmu_page_remove_parent_pte(child, spte); 1083 1096 } 1084 1097 } 1085 1098 *spte = 0; ··· 1148 1161 */ 1149 1162 pgprintk("misaligned: gpa %llx bytes %d role %x\n", 1150 1163 gpa, bytes, page->role.word); 1151 - kvm_mmu_zap_page(vcpu, page); 1164 + kvm_mmu_zap_page(vcpu->kvm, page); 1152 1165 continue; 1153 1166 } 1154 1167 page_offset = offset; ··· 1194 1207 1195 1208 page = container_of(vcpu->kvm->active_mmu_pages.prev, 1196 1209 struct kvm_mmu_page, link); 1197 - kvm_mmu_zap_page(vcpu, page); 1210 + kvm_mmu_zap_page(vcpu->kvm, page); 1198 1211 } 1199 1212 } 1200 1213 EXPORT_SYMBOL_GPL(kvm_mmu_free_some_pages); ··· 1206 1219 while (!list_empty(&vcpu->kvm->active_mmu_pages)) { 1207 1220 page = container_of(vcpu->kvm->active_mmu_pages.next, 1208 1221 struct kvm_mmu_page, link); 1209 - kvm_mmu_zap_page(vcpu, page); 1222 + kvm_mmu_zap_page(vcpu->kvm, page); 1210 1223 } 1211 1224 free_page((unsigned long)vcpu->mmu.pae_root); 1212 1225 } ··· 1264 1277 mmu_free_memory_caches(vcpu); 1265 1278 } 1266 1279 1267 - void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot) 1280 + void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) 1268 1281 { 1269 - struct kvm *kvm = vcpu->kvm; 1270 1282 struct kvm_mmu_page *page; 1271 1283 1272 1284 list_for_each_entry(page, &kvm->active_mmu_pages, link) { ··· 1279 1293 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) 1280 1294 /* avoid RMW */ 1281 1295 if (pt[i] & PT_WRITABLE_MASK) { 1282 - rmap_remove(vcpu, &pt[i]); 1296 + rmap_remove(&pt[i]); 1283 1297 pt[i] &= ~PT_WRITABLE_MASK; 1284 1298 } 1285 1299 } 1286 1300 } 1287 1301 1288 - void kvm_mmu_zap_all(struct kvm_vcpu *vcpu) 1302 + void kvm_mmu_zap_all(struct kvm *kvm) 1289 1303 { 1290 - destroy_kvm_mmu(vcpu); 1304 + struct kvm_mmu_page *page, *node; 1291 1305 1292 - while (!list_empty(&vcpu->kvm->active_mmu_pages)) { 1293 - struct kvm_mmu_page *page; 1306 + list_for_each_entry_safe(page, node, &kvm->active_mmu_pages, link) 1307 + kvm_mmu_zap_page(kvm, page); 1294 1308 1295 - page = container_of(vcpu->kvm->active_mmu_pages.next, 1296 - struct kvm_mmu_page, link); 1297 - kvm_mmu_zap_page(vcpu, page); 1298 - } 1299 - 1300 - mmu_free_memory_caches(vcpu); 1301 - kvm_flush_remote_tlbs(vcpu->kvm); 1302 - init_kvm_mmu(vcpu); 1309 + kvm_flush_remote_tlbs(kvm); 1303 1310 } 1304 1311 1305 1312 void kvm_mmu_module_exit(void)