Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'kvm-ppc-uvmem-5.5-2' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD

KVM: Add support for secure guests under the Protected Execution
Framework (PEF) Ultravisor on POWER.

This enables secure memory to be represented as device memory,
which provides a way for the host to keep track of which pages of a
secure guest have been moved into secure memory managed by the
ultravisor and are no longer accessible by the host, and manage
movement of pages between secure and normal memory.

+1137
+18
Documentation/virt/kvm/api.txt
··· 4149 4149 #define KVM_PMU_EVENT_ALLOW 0 4150 4150 #define KVM_PMU_EVENT_DENY 1 4151 4151 4152 + 4.121 KVM_PPC_SVM_OFF 4153 + 4154 + Capability: basic 4155 + Architectures: powerpc 4156 + Type: vm ioctl 4157 + Parameters: none 4158 + Returns: 0 on successful completion, 4159 + Errors: 4160 + EINVAL: if ultravisor failed to terminate the secure guest 4161 + ENOMEM: if hypervisor failed to allocate new radix page tables for guest 4162 + 4163 + This ioctl is used to turn off the secure mode of the guest or transition 4164 + the guest from secure mode to normal mode. This is invoked when the guest 4165 + is reset. This has no effect if called for a normal guest. 4166 + 4167 + This ioctl issues an ultravisor call to terminate the secure guest, 4168 + unpins the VPA pages and releases all the device pages that are used to 4169 + track the secure pages by hypervisor. 4152 4170 4153 4171 5. The kvm_run structure 4154 4172 ------------------------
+17
arch/powerpc/Kconfig
··· 451 451 help 452 452 Support user-mode Transactional Memory on POWERPC. 453 453 454 + config PPC_UV 455 + bool "Ultravisor support" 456 + depends on KVM_BOOK3S_HV_POSSIBLE 457 + select ZONE_DEVICE 458 + select DEV_PAGEMAP_OPS 459 + select DEVICE_PRIVATE 460 + select MEMORY_HOTPLUG 461 + select MEMORY_HOTREMOVE 462 + default n 463 + help 464 + This option paravirtualizes the kernel to run in POWER platforms that 465 + supports the Protected Execution Facility (PEF). On such platforms, 466 + the ultravisor firmware runs at a privilege level above the 467 + hypervisor. 468 + 469 + If unsure, say "N". 470 + 454 471 config LD_HEAD_STUB_CATCH 455 472 bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT 456 473 depends on PPC64
+9
arch/powerpc/include/asm/hvcall.h
··· 342 342 #define H_TLB_INVALIDATE 0xF808 343 343 #define H_COPY_TOFROM_GUEST 0xF80C 344 344 345 + /* Flags for H_SVM_PAGE_IN */ 346 + #define H_PAGE_IN_SHARED 0x1 347 + 348 + /* Platform-specific hcalls used by the Ultravisor */ 349 + #define H_SVM_PAGE_IN 0xEF00 350 + #define H_SVM_PAGE_OUT 0xEF04 351 + #define H_SVM_INIT_START 0xEF08 352 + #define H_SVM_INIT_DONE 0xEF0C 353 + 345 354 /* Values for 2nd argument to H_SET_MODE */ 346 355 #define H_SET_MODE_RESOURCE_SET_CIABR 1 347 356 #define H_SET_MODE_RESOURCE_SET_DAWR 2
+74
arch/powerpc/include/asm/kvm_book3s_uvmem.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __ASM_KVM_BOOK3S_UVMEM_H__ 3 + #define __ASM_KVM_BOOK3S_UVMEM_H__ 4 + 5 + #ifdef CONFIG_PPC_UV 6 + int kvmppc_uvmem_init(void); 7 + void kvmppc_uvmem_free(void); 8 + int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot); 9 + void kvmppc_uvmem_slot_free(struct kvm *kvm, 10 + const struct kvm_memory_slot *slot); 11 + unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, 12 + unsigned long gra, 13 + unsigned long flags, 14 + unsigned long page_shift); 15 + unsigned long kvmppc_h_svm_page_out(struct kvm *kvm, 16 + unsigned long gra, 17 + unsigned long flags, 18 + unsigned long page_shift); 19 + unsigned long kvmppc_h_svm_init_start(struct kvm *kvm); 20 + unsigned long kvmppc_h_svm_init_done(struct kvm *kvm); 21 + int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn); 22 + void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, 23 + struct kvm *kvm); 24 + #else 25 + static inline int kvmppc_uvmem_init(void) 26 + { 27 + return 0; 28 + } 29 + 30 + static inline void kvmppc_uvmem_free(void) { } 31 + 32 + static inline int 33 + kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) 34 + { 35 + return 0; 36 + } 37 + 38 + static inline void 39 + kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) { } 40 + 41 + static inline unsigned long 42 + kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra, 43 + unsigned long flags, unsigned long page_shift) 44 + { 45 + return H_UNSUPPORTED; 46 + } 47 + 48 + static inline unsigned long 49 + kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra, 50 + unsigned long flags, unsigned long page_shift) 51 + { 52 + return H_UNSUPPORTED; 53 + } 54 + 55 + static inline unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) 56 + { 57 + return H_UNSUPPORTED; 58 + } 59 + 60 + static inline unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) 61 + { 62 + return H_UNSUPPORTED; 63 + } 64 + 65 + static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) 66 + { 67 + return -EFAULT; 68 + } 69 + 70 + static inline void 71 + kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, 72 + struct kvm *kvm) { } 73 + #endif /* CONFIG_PPC_UV */ 74 + #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
+6
arch/powerpc/include/asm/kvm_host.h
··· 275 275 276 276 struct kvm_resize_hpt; 277 277 278 + /* Flag values for kvm_arch.secure_guest */ 279 + #define KVMPPC_SECURE_INIT_START 0x1 /* H_SVM_INIT_START has been called */ 280 + #define KVMPPC_SECURE_INIT_DONE 0x2 /* H_SVM_INIT_DONE completed */ 281 + 278 282 struct kvm_arch { 279 283 unsigned int lpid; 280 284 unsigned int smt_mode; /* # vcpus per virtual core */ ··· 334 330 #endif 335 331 struct kvmppc_ops *kvm_ops; 336 332 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 333 + struct mutex uvmem_lock; 334 + struct list_head uvmem_pfns; 337 335 struct mutex mmu_setup_lock; /* nests inside vcpu mutexes */ 338 336 u64 l1_ptcr; 339 337 int max_nested_lpid;
+1
arch/powerpc/include/asm/kvm_ppc.h
··· 322 322 int size); 323 323 int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, 324 324 int size); 325 + int (*svm_off)(struct kvm *kvm); 325 326 }; 326 327 327 328 extern struct kvmppc_ops *kvmppc_hv_ops;
+6
arch/powerpc/include/asm/ultravisor-api.h
··· 26 26 #define UV_WRITE_PATE 0xF104 27 27 #define UV_RETURN 0xF11C 28 28 #define UV_ESM 0xF110 29 + #define UV_REGISTER_MEM_SLOT 0xF120 30 + #define UV_UNREGISTER_MEM_SLOT 0xF124 31 + #define UV_PAGE_IN 0xF128 32 + #define UV_PAGE_OUT 0xF12C 29 33 #define UV_SHARE_PAGE 0xF130 30 34 #define UV_UNSHARE_PAGE 0xF134 31 35 #define UV_UNSHARE_ALL_PAGES 0xF140 36 + #define UV_PAGE_INVAL 0xF138 37 + #define UV_SVM_TERMINATE 0xF13C 32 38 33 39 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
+36
arch/powerpc/include/asm/ultravisor.h
··· 46 46 return ucall_norets(UV_UNSHARE_ALL_PAGES); 47 47 } 48 48 49 + static inline int uv_page_in(u64 lpid, u64 src_ra, u64 dst_gpa, u64 flags, 50 + u64 page_shift) 51 + { 52 + return ucall_norets(UV_PAGE_IN, lpid, src_ra, dst_gpa, flags, 53 + page_shift); 54 + } 55 + 56 + static inline int uv_page_out(u64 lpid, u64 dst_ra, u64 src_gpa, u64 flags, 57 + u64 page_shift) 58 + { 59 + return ucall_norets(UV_PAGE_OUT, lpid, dst_ra, src_gpa, flags, 60 + page_shift); 61 + } 62 + 63 + static inline int uv_register_mem_slot(u64 lpid, u64 start_gpa, u64 size, 64 + u64 flags, u64 slotid) 65 + { 66 + return ucall_norets(UV_REGISTER_MEM_SLOT, lpid, start_gpa, 67 + size, flags, slotid); 68 + } 69 + 70 + static inline int uv_unregister_mem_slot(u64 lpid, u64 slotid) 71 + { 72 + return ucall_norets(UV_UNREGISTER_MEM_SLOT, lpid, slotid); 73 + } 74 + 75 + static inline int uv_page_inval(u64 lpid, u64 gpa, u64 page_shift) 76 + { 77 + return ucall_norets(UV_PAGE_INVAL, lpid, gpa, page_shift); 78 + } 79 + 80 + static inline int uv_svm_terminate(u64 lpid) 81 + { 82 + return ucall_norets(UV_SVM_TERMINATE, lpid); 83 + } 84 + 49 85 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
+3
arch/powerpc/kvm/Makefile
··· 71 71 book3s_64_mmu_radix.o \ 72 72 book3s_hv_nested.o 73 73 74 + kvm-hv-$(CONFIG_PPC_UV) += \ 75 + book3s_hv_uvmem.o 76 + 74 77 kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ 75 78 book3s_hv_tm.o 76 79
+25
arch/powerpc/kvm/book3s_64_mmu_radix.c
··· 19 19 #include <asm/pgtable.h> 20 20 #include <asm/pgalloc.h> 21 21 #include <asm/pte-walk.h> 22 + #include <asm/ultravisor.h> 23 + #include <asm/kvm_book3s_uvmem.h> 22 24 23 25 /* 24 26 * Supported radix tree geometry. ··· 917 915 if (!(dsisr & DSISR_PRTABLE_FAULT)) 918 916 gpa |= ea & 0xfff; 919 917 918 + if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) 919 + return kvmppc_send_page_to_uv(kvm, gfn); 920 + 920 921 /* Get the corresponding memslot */ 921 922 memslot = gfn_to_memslot(kvm, gfn); 922 923 ··· 977 972 unsigned long gpa = gfn << PAGE_SHIFT; 978 973 unsigned int shift; 979 974 975 + if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) { 976 + uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT); 977 + return 0; 978 + } 979 + 980 980 ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); 981 981 if (ptep && pte_present(*ptep)) 982 982 kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, ··· 998 988 unsigned int shift; 999 989 int ref = 0; 1000 990 unsigned long old, *rmapp; 991 + 992 + if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) 993 + return ref; 1001 994 1002 995 ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); 1003 996 if (ptep && pte_present(*ptep) && pte_young(*ptep)) { ··· 1026 1013 unsigned int shift; 1027 1014 int ref = 0; 1028 1015 1016 + if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) 1017 + return ref; 1018 + 1029 1019 ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); 1030 1020 if (ptep && pte_present(*ptep) && pte_young(*ptep)) 1031 1021 ref = 1; ··· 1045 1029 unsigned int shift; 1046 1030 int ret = 0; 1047 1031 unsigned long old, *rmapp; 1032 + 1033 + if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) 1034 + return ret; 1048 1035 1049 1036 ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); 1050 1037 if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { ··· 1100 1081 pte_t *ptep; 1101 1082 unsigned long gpa; 1102 1083 unsigned int shift; 1084 + 1085 + if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START) 1086 + kvmppc_uvmem_drop_pages(memslot, kvm); 1087 + 1088 + if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) 1089 + return; 1103 1090 1104 1091 gpa = memslot->base_gfn << PAGE_SHIFT; 1105 1092 spin_lock(&kvm->mmu_lock);
+143
arch/powerpc/kvm/book3s_hv.c
··· 72 72 #include <asm/xics.h> 73 73 #include <asm/xive.h> 74 74 #include <asm/hw_breakpoint.h> 75 + #include <asm/kvm_host.h> 76 + #include <asm/kvm_book3s_uvmem.h> 77 + #include <asm/ultravisor.h> 75 78 76 79 #include "book3s.h" 77 80 ··· 1073 1070 kvmppc_get_gpr(vcpu, 5), 1074 1071 kvmppc_get_gpr(vcpu, 6)); 1075 1072 break; 1073 + case H_SVM_PAGE_IN: 1074 + ret = kvmppc_h_svm_page_in(vcpu->kvm, 1075 + kvmppc_get_gpr(vcpu, 4), 1076 + kvmppc_get_gpr(vcpu, 5), 1077 + kvmppc_get_gpr(vcpu, 6)); 1078 + break; 1079 + case H_SVM_PAGE_OUT: 1080 + ret = kvmppc_h_svm_page_out(vcpu->kvm, 1081 + kvmppc_get_gpr(vcpu, 4), 1082 + kvmppc_get_gpr(vcpu, 5), 1083 + kvmppc_get_gpr(vcpu, 6)); 1084 + break; 1085 + case H_SVM_INIT_START: 1086 + ret = kvmppc_h_svm_init_start(vcpu->kvm); 1087 + break; 1088 + case H_SVM_INIT_DONE: 1089 + ret = kvmppc_h_svm_init_done(vcpu->kvm); 1090 + break; 1091 + 1076 1092 default: 1077 1093 return RESUME_HOST; 1078 1094 } ··· 4516 4494 if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) && 4517 4495 ((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES)) 4518 4496 kvmppc_radix_flush_memslot(kvm, old); 4497 + /* 4498 + * If UV hasn't yet called H_SVM_INIT_START, don't register memslots. 4499 + */ 4500 + if (!kvm->arch.secure_guest) 4501 + return; 4502 + 4503 + switch (change) { 4504 + case KVM_MR_CREATE: 4505 + if (kvmppc_uvmem_slot_init(kvm, new)) 4506 + return; 4507 + uv_register_mem_slot(kvm->arch.lpid, 4508 + new->base_gfn << PAGE_SHIFT, 4509 + new->npages * PAGE_SIZE, 4510 + 0, new->id); 4511 + break; 4512 + case KVM_MR_DELETE: 4513 + uv_unregister_mem_slot(kvm->arch.lpid, old->id); 4514 + kvmppc_uvmem_slot_free(kvm, old); 4515 + break; 4516 + default: 4517 + /* TODO: Handle KVM_MR_MOVE */ 4518 + break; 4519 + } 4519 4520 } 4520 4521 4521 4522 /* ··· 4812 4767 char buf[32]; 4813 4768 int ret; 4814 4769 4770 + mutex_init(&kvm->arch.uvmem_lock); 4771 + INIT_LIST_HEAD(&kvm->arch.uvmem_pfns); 4815 4772 mutex_init(&kvm->arch.mmu_setup_lock); 4816 4773 4817 4774 /* Allocate the guest's logical partition ID */ ··· 4983 4936 if (nesting_enabled(kvm)) 4984 4937 kvmhv_release_all_nested(kvm); 4985 4938 kvm->arch.process_table = 0; 4939 + uv_svm_terminate(kvm->arch.lpid); 4986 4940 kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0); 4987 4941 } 4942 + 4988 4943 kvmppc_free_lpid(kvm->arch.lpid); 4989 4944 4990 4945 kvmppc_free_pimap(kvm); ··· 5426 5377 return rc; 5427 5378 } 5428 5379 5380 + static void unpin_vpa_reset(struct kvm *kvm, struct kvmppc_vpa *vpa) 5381 + { 5382 + unpin_vpa(kvm, vpa); 5383 + vpa->gpa = 0; 5384 + vpa->pinned_addr = NULL; 5385 + vpa->dirty = false; 5386 + vpa->update_pending = 0; 5387 + } 5388 + 5389 + /* 5390 + * IOCTL handler to turn off secure mode of guest 5391 + * 5392 + * - Release all device pages 5393 + * - Issue ucall to terminate the guest on the UV side 5394 + * - Unpin the VPA pages. 5395 + * - Reinit the partition scoped page tables 5396 + */ 5397 + static int kvmhv_svm_off(struct kvm *kvm) 5398 + { 5399 + struct kvm_vcpu *vcpu; 5400 + int mmu_was_ready; 5401 + int srcu_idx; 5402 + int ret = 0; 5403 + int i; 5404 + 5405 + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 5406 + return ret; 5407 + 5408 + mutex_lock(&kvm->arch.mmu_setup_lock); 5409 + mmu_was_ready = kvm->arch.mmu_ready; 5410 + if (kvm->arch.mmu_ready) { 5411 + kvm->arch.mmu_ready = 0; 5412 + /* order mmu_ready vs. vcpus_running */ 5413 + smp_mb(); 5414 + if (atomic_read(&kvm->arch.vcpus_running)) { 5415 + kvm->arch.mmu_ready = 1; 5416 + ret = -EBUSY; 5417 + goto out; 5418 + } 5419 + } 5420 + 5421 + srcu_idx = srcu_read_lock(&kvm->srcu); 5422 + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { 5423 + struct kvm_memory_slot *memslot; 5424 + struct kvm_memslots *slots = __kvm_memslots(kvm, i); 5425 + 5426 + if (!slots) 5427 + continue; 5428 + 5429 + kvm_for_each_memslot(memslot, slots) { 5430 + kvmppc_uvmem_drop_pages(memslot, kvm); 5431 + uv_unregister_mem_slot(kvm->arch.lpid, memslot->id); 5432 + } 5433 + } 5434 + srcu_read_unlock(&kvm->srcu, srcu_idx); 5435 + 5436 + ret = uv_svm_terminate(kvm->arch.lpid); 5437 + if (ret != U_SUCCESS) { 5438 + ret = -EINVAL; 5439 + goto out; 5440 + } 5441 + 5442 + /* 5443 + * When secure guest is reset, all the guest pages are sent 5444 + * to UV via UV_PAGE_IN before the non-boot vcpus get a 5445 + * chance to run and unpin their VPA pages. Unpinning of all 5446 + * VPA pages is done here explicitly so that VPA pages 5447 + * can be migrated to the secure side. 5448 + * 5449 + * This is required to for the secure SMP guest to reboot 5450 + * correctly. 5451 + */ 5452 + kvm_for_each_vcpu(i, vcpu, kvm) { 5453 + spin_lock(&vcpu->arch.vpa_update_lock); 5454 + unpin_vpa_reset(kvm, &vcpu->arch.dtl); 5455 + unpin_vpa_reset(kvm, &vcpu->arch.slb_shadow); 5456 + unpin_vpa_reset(kvm, &vcpu->arch.vpa); 5457 + spin_unlock(&vcpu->arch.vpa_update_lock); 5458 + } 5459 + 5460 + kvmppc_setup_partition_table(kvm); 5461 + kvm->arch.secure_guest = 0; 5462 + kvm->arch.mmu_ready = mmu_was_ready; 5463 + out: 5464 + mutex_unlock(&kvm->arch.mmu_setup_lock); 5465 + return ret; 5466 + } 5467 + 5429 5468 static struct kvmppc_ops kvm_ops_hv = { 5430 5469 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, 5431 5470 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, ··· 5557 5420 .enable_nested = kvmhv_enable_nested, 5558 5421 .load_from_eaddr = kvmhv_load_from_eaddr, 5559 5422 .store_to_eaddr = kvmhv_store_to_eaddr, 5423 + .svm_off = kvmhv_svm_off, 5560 5424 }; 5561 5425 5562 5426 static int kvm_init_subcore_bitmap(void) ··· 5666 5528 no_mixing_hpt_and_radix = true; 5667 5529 } 5668 5530 5531 + r = kvmppc_uvmem_init(); 5532 + if (r < 0) 5533 + pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r); 5534 + 5669 5535 return r; 5670 5536 } 5671 5537 5672 5538 static void kvmppc_book3s_exit_hv(void) 5673 5539 { 5540 + kvmppc_uvmem_free(); 5674 5541 kvmppc_free_host_rm_ops(); 5675 5542 if (kvmppc_radix_possible()) 5676 5543 kvmppc_radix_exit();
+785
arch/powerpc/kvm/book3s_hv_uvmem.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Secure pages management: Migration of pages between normal and secure 4 + * memory of KVM guests. 5 + * 6 + * Copyright 2018 Bharata B Rao, IBM Corp. <bharata@linux.ibm.com> 7 + */ 8 + 9 + /* 10 + * A pseries guest can be run as secure guest on Ultravisor-enabled 11 + * POWER platforms. On such platforms, this driver will be used to manage 12 + * the movement of guest pages between the normal memory managed by 13 + * hypervisor (HV) and secure memory managed by Ultravisor (UV). 14 + * 15 + * The page-in or page-out requests from UV will come to HV as hcalls and 16 + * HV will call back into UV via ultracalls to satisfy these page requests. 17 + * 18 + * Private ZONE_DEVICE memory equal to the amount of secure memory 19 + * available in the platform for running secure guests is hotplugged. 20 + * Whenever a page belonging to the guest becomes secure, a page from this 21 + * private device memory is used to represent and track that secure page 22 + * on the HV side. Some pages (like virtio buffers, VPA pages etc) are 23 + * shared between UV and HV. However such pages aren't represented by 24 + * device private memory and mappings to shared memory exist in both 25 + * UV and HV page tables. 26 + */ 27 + 28 + /* 29 + * Notes on locking 30 + * 31 + * kvm->arch.uvmem_lock is a per-guest lock that prevents concurrent 32 + * page-in and page-out requests for the same GPA. Concurrent accesses 33 + * can either come via UV (guest vCPUs requesting for same page) 34 + * or when HV and guest simultaneously access the same page. 35 + * This mutex serializes the migration of page from HV(normal) to 36 + * UV(secure) and vice versa. So the serialization points are around 37 + * migrate_vma routines and page-in/out routines. 38 + * 39 + * Per-guest mutex comes with a cost though. Mainly it serializes the 40 + * fault path as page-out can occur when HV faults on accessing secure 41 + * guest pages. Currently UV issues page-in requests for all the guest 42 + * PFNs one at a time during early boot (UV_ESM uvcall), so this is 43 + * not a cause for concern. Also currently the number of page-outs caused 44 + * by HV touching secure pages is very very low. If an when UV supports 45 + * overcommitting, then we might see concurrent guest driven page-outs. 46 + * 47 + * Locking order 48 + * 49 + * 1. kvm->srcu - Protects KVM memslots 50 + * 2. kvm->mm->mmap_sem - find_vma, migrate_vma_pages and helpers, ksm_madvise 51 + * 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting 52 + * as sync-points for page-in/out 53 + */ 54 + 55 + /* 56 + * Notes on page size 57 + * 58 + * Currently UV uses 2MB mappings internally, but will issue H_SVM_PAGE_IN 59 + * and H_SVM_PAGE_OUT hcalls in PAGE_SIZE(64K) granularity. HV tracks 60 + * secure GPAs at 64K page size and maintains one device PFN for each 61 + * 64K secure GPA. UV_PAGE_IN and UV_PAGE_OUT calls by HV are also issued 62 + * for 64K page at a time. 63 + * 64 + * HV faulting on secure pages: When HV touches any secure page, it 65 + * faults and issues a UV_PAGE_OUT request with 64K page size. Currently 66 + * UV splits and remaps the 2MB page if necessary and copies out the 67 + * required 64K page contents. 68 + * 69 + * Shared pages: Whenever guest shares a secure page, UV will split and 70 + * remap the 2MB page if required and issue H_SVM_PAGE_IN with 64K page size. 71 + * 72 + * HV invalidating a page: When a regular page belonging to secure 73 + * guest gets unmapped, HV informs UV with UV_PAGE_INVAL of 64K 74 + * page size. Using 64K page size is correct here because any non-secure 75 + * page will essentially be of 64K page size. Splitting by UV during sharing 76 + * and page-out ensures this. 77 + * 78 + * Page fault handling: When HV handles page fault of a page belonging 79 + * to secure guest, it sends that to UV with a 64K UV_PAGE_IN request. 80 + * Using 64K size is correct here too as UV would have split the 2MB page 81 + * into 64k mappings and would have done page-outs earlier. 82 + * 83 + * In summary, the current secure pages handling code in HV assumes 84 + * 64K page size and in fact fails any page-in/page-out requests of 85 + * non-64K size upfront. If and when UV starts supporting multiple 86 + * page-sizes, we need to break this assumption. 87 + */ 88 + 89 + #include <linux/pagemap.h> 90 + #include <linux/migrate.h> 91 + #include <linux/kvm_host.h> 92 + #include <linux/ksm.h> 93 + #include <asm/ultravisor.h> 94 + #include <asm/mman.h> 95 + #include <asm/kvm_ppc.h> 96 + 97 + static struct dev_pagemap kvmppc_uvmem_pgmap; 98 + static unsigned long *kvmppc_uvmem_bitmap; 99 + static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock); 100 + 101 + #define KVMPPC_UVMEM_PFN (1UL << 63) 102 + 103 + struct kvmppc_uvmem_slot { 104 + struct list_head list; 105 + unsigned long nr_pfns; 106 + unsigned long base_pfn; 107 + unsigned long *pfns; 108 + }; 109 + 110 + struct kvmppc_uvmem_page_pvt { 111 + struct kvm *kvm; 112 + unsigned long gpa; 113 + bool skip_page_out; 114 + }; 115 + 116 + int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) 117 + { 118 + struct kvmppc_uvmem_slot *p; 119 + 120 + p = kzalloc(sizeof(*p), GFP_KERNEL); 121 + if (!p) 122 + return -ENOMEM; 123 + p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns))); 124 + if (!p->pfns) { 125 + kfree(p); 126 + return -ENOMEM; 127 + } 128 + p->nr_pfns = slot->npages; 129 + p->base_pfn = slot->base_gfn; 130 + 131 + mutex_lock(&kvm->arch.uvmem_lock); 132 + list_add(&p->list, &kvm->arch.uvmem_pfns); 133 + mutex_unlock(&kvm->arch.uvmem_lock); 134 + 135 + return 0; 136 + } 137 + 138 + /* 139 + * All device PFNs are already released by the time we come here. 140 + */ 141 + void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) 142 + { 143 + struct kvmppc_uvmem_slot *p, *next; 144 + 145 + mutex_lock(&kvm->arch.uvmem_lock); 146 + list_for_each_entry_safe(p, next, &kvm->arch.uvmem_pfns, list) { 147 + if (p->base_pfn == slot->base_gfn) { 148 + vfree(p->pfns); 149 + list_del(&p->list); 150 + kfree(p); 151 + break; 152 + } 153 + } 154 + mutex_unlock(&kvm->arch.uvmem_lock); 155 + } 156 + 157 + static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn, 158 + struct kvm *kvm) 159 + { 160 + struct kvmppc_uvmem_slot *p; 161 + 162 + list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { 163 + if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { 164 + unsigned long index = gfn - p->base_pfn; 165 + 166 + p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN; 167 + return; 168 + } 169 + } 170 + } 171 + 172 + static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm) 173 + { 174 + struct kvmppc_uvmem_slot *p; 175 + 176 + list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { 177 + if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { 178 + p->pfns[gfn - p->base_pfn] = 0; 179 + return; 180 + } 181 + } 182 + } 183 + 184 + static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, 185 + unsigned long *uvmem_pfn) 186 + { 187 + struct kvmppc_uvmem_slot *p; 188 + 189 + list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { 190 + if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { 191 + unsigned long index = gfn - p->base_pfn; 192 + 193 + if (p->pfns[index] & KVMPPC_UVMEM_PFN) { 194 + if (uvmem_pfn) 195 + *uvmem_pfn = p->pfns[index] & 196 + ~KVMPPC_UVMEM_PFN; 197 + return true; 198 + } else 199 + return false; 200 + } 201 + } 202 + return false; 203 + } 204 + 205 + unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) 206 + { 207 + struct kvm_memslots *slots; 208 + struct kvm_memory_slot *memslot; 209 + int ret = H_SUCCESS; 210 + int srcu_idx; 211 + 212 + if (!kvmppc_uvmem_bitmap) 213 + return H_UNSUPPORTED; 214 + 215 + /* Only radix guests can be secure guests */ 216 + if (!kvm_is_radix(kvm)) 217 + return H_UNSUPPORTED; 218 + 219 + srcu_idx = srcu_read_lock(&kvm->srcu); 220 + slots = kvm_memslots(kvm); 221 + kvm_for_each_memslot(memslot, slots) { 222 + if (kvmppc_uvmem_slot_init(kvm, memslot)) { 223 + ret = H_PARAMETER; 224 + goto out; 225 + } 226 + ret = uv_register_mem_slot(kvm->arch.lpid, 227 + memslot->base_gfn << PAGE_SHIFT, 228 + memslot->npages * PAGE_SIZE, 229 + 0, memslot->id); 230 + if (ret < 0) { 231 + kvmppc_uvmem_slot_free(kvm, memslot); 232 + ret = H_PARAMETER; 233 + goto out; 234 + } 235 + } 236 + kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_START; 237 + out: 238 + srcu_read_unlock(&kvm->srcu, srcu_idx); 239 + return ret; 240 + } 241 + 242 + unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) 243 + { 244 + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 245 + return H_UNSUPPORTED; 246 + 247 + kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; 248 + pr_info("LPID %d went secure\n", kvm->arch.lpid); 249 + return H_SUCCESS; 250 + } 251 + 252 + /* 253 + * Drop device pages that we maintain for the secure guest 254 + * 255 + * We first mark the pages to be skipped from UV_PAGE_OUT when there 256 + * is HV side fault on these pages. Next we *get* these pages, forcing 257 + * fault on them, do fault time migration to replace the device PTEs in 258 + * QEMU page table with normal PTEs from newly allocated pages. 259 + */ 260 + void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, 261 + struct kvm *kvm) 262 + { 263 + int i; 264 + struct kvmppc_uvmem_page_pvt *pvt; 265 + unsigned long pfn, uvmem_pfn; 266 + unsigned long gfn = free->base_gfn; 267 + 268 + for (i = free->npages; i; --i, ++gfn) { 269 + struct page *uvmem_page; 270 + 271 + mutex_lock(&kvm->arch.uvmem_lock); 272 + if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { 273 + mutex_unlock(&kvm->arch.uvmem_lock); 274 + continue; 275 + } 276 + 277 + uvmem_page = pfn_to_page(uvmem_pfn); 278 + pvt = uvmem_page->zone_device_data; 279 + pvt->skip_page_out = true; 280 + mutex_unlock(&kvm->arch.uvmem_lock); 281 + 282 + pfn = gfn_to_pfn(kvm, gfn); 283 + if (is_error_noslot_pfn(pfn)) 284 + continue; 285 + kvm_release_pfn_clean(pfn); 286 + } 287 + } 288 + 289 + /* 290 + * Get a free device PFN from the pool 291 + * 292 + * Called when a normal page is moved to secure memory (UV_PAGE_IN). Device 293 + * PFN will be used to keep track of the secure page on HV side. 294 + * 295 + * Called with kvm->arch.uvmem_lock held 296 + */ 297 + static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm) 298 + { 299 + struct page *dpage = NULL; 300 + unsigned long bit, uvmem_pfn; 301 + struct kvmppc_uvmem_page_pvt *pvt; 302 + unsigned long pfn_last, pfn_first; 303 + 304 + pfn_first = kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT; 305 + pfn_last = pfn_first + 306 + (resource_size(&kvmppc_uvmem_pgmap.res) >> PAGE_SHIFT); 307 + 308 + spin_lock(&kvmppc_uvmem_bitmap_lock); 309 + bit = find_first_zero_bit(kvmppc_uvmem_bitmap, 310 + pfn_last - pfn_first); 311 + if (bit >= (pfn_last - pfn_first)) 312 + goto out; 313 + bitmap_set(kvmppc_uvmem_bitmap, bit, 1); 314 + spin_unlock(&kvmppc_uvmem_bitmap_lock); 315 + 316 + pvt = kzalloc(sizeof(*pvt), GFP_KERNEL); 317 + if (!pvt) 318 + goto out_clear; 319 + 320 + uvmem_pfn = bit + pfn_first; 321 + kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); 322 + 323 + pvt->gpa = gpa; 324 + pvt->kvm = kvm; 325 + 326 + dpage = pfn_to_page(uvmem_pfn); 327 + dpage->zone_device_data = pvt; 328 + get_page(dpage); 329 + lock_page(dpage); 330 + return dpage; 331 + out_clear: 332 + spin_lock(&kvmppc_uvmem_bitmap_lock); 333 + bitmap_clear(kvmppc_uvmem_bitmap, bit, 1); 334 + out: 335 + spin_unlock(&kvmppc_uvmem_bitmap_lock); 336 + return NULL; 337 + } 338 + 339 + /* 340 + * Alloc a PFN from private device memory pool and copy page from normal 341 + * memory to secure memory using UV_PAGE_IN uvcall. 342 + */ 343 + static int 344 + kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, 345 + unsigned long end, unsigned long gpa, struct kvm *kvm, 346 + unsigned long page_shift, bool *downgrade) 347 + { 348 + unsigned long src_pfn, dst_pfn = 0; 349 + struct migrate_vma mig; 350 + struct page *spage; 351 + unsigned long pfn; 352 + struct page *dpage; 353 + int ret = 0; 354 + 355 + memset(&mig, 0, sizeof(mig)); 356 + mig.vma = vma; 357 + mig.start = start; 358 + mig.end = end; 359 + mig.src = &src_pfn; 360 + mig.dst = &dst_pfn; 361 + 362 + /* 363 + * We come here with mmap_sem write lock held just for 364 + * ksm_madvise(), otherwise we only need read mmap_sem. 365 + * Hence downgrade to read lock once ksm_madvise() is done. 366 + */ 367 + ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, 368 + MADV_UNMERGEABLE, &vma->vm_flags); 369 + downgrade_write(&kvm->mm->mmap_sem); 370 + *downgrade = true; 371 + if (ret) 372 + return ret; 373 + 374 + ret = migrate_vma_setup(&mig); 375 + if (ret) 376 + return ret; 377 + 378 + if (!(*mig.src & MIGRATE_PFN_MIGRATE)) { 379 + ret = -1; 380 + goto out_finalize; 381 + } 382 + 383 + dpage = kvmppc_uvmem_get_page(gpa, kvm); 384 + if (!dpage) { 385 + ret = -1; 386 + goto out_finalize; 387 + } 388 + 389 + pfn = *mig.src >> MIGRATE_PFN_SHIFT; 390 + spage = migrate_pfn_to_page(*mig.src); 391 + if (spage) 392 + uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, 393 + page_shift); 394 + 395 + *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; 396 + migrate_vma_pages(&mig); 397 + out_finalize: 398 + migrate_vma_finalize(&mig); 399 + return ret; 400 + } 401 + 402 + /* 403 + * Shares the page with HV, thus making it a normal page. 404 + * 405 + * - If the page is already secure, then provision a new page and share 406 + * - If the page is a normal page, share the existing page 407 + * 408 + * In the former case, uses dev_pagemap_ops.migrate_to_ram handler 409 + * to unmap the device page from QEMU's page tables. 410 + */ 411 + static unsigned long 412 + kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift) 413 + { 414 + 415 + int ret = H_PARAMETER; 416 + struct page *uvmem_page; 417 + struct kvmppc_uvmem_page_pvt *pvt; 418 + unsigned long pfn; 419 + unsigned long gfn = gpa >> page_shift; 420 + int srcu_idx; 421 + unsigned long uvmem_pfn; 422 + 423 + srcu_idx = srcu_read_lock(&kvm->srcu); 424 + mutex_lock(&kvm->arch.uvmem_lock); 425 + if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { 426 + uvmem_page = pfn_to_page(uvmem_pfn); 427 + pvt = uvmem_page->zone_device_data; 428 + pvt->skip_page_out = true; 429 + } 430 + 431 + retry: 432 + mutex_unlock(&kvm->arch.uvmem_lock); 433 + pfn = gfn_to_pfn(kvm, gfn); 434 + if (is_error_noslot_pfn(pfn)) 435 + goto out; 436 + 437 + mutex_lock(&kvm->arch.uvmem_lock); 438 + if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { 439 + uvmem_page = pfn_to_page(uvmem_pfn); 440 + pvt = uvmem_page->zone_device_data; 441 + pvt->skip_page_out = true; 442 + kvm_release_pfn_clean(pfn); 443 + goto retry; 444 + } 445 + 446 + if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift)) 447 + ret = H_SUCCESS; 448 + kvm_release_pfn_clean(pfn); 449 + mutex_unlock(&kvm->arch.uvmem_lock); 450 + out: 451 + srcu_read_unlock(&kvm->srcu, srcu_idx); 452 + return ret; 453 + } 454 + 455 + /* 456 + * H_SVM_PAGE_IN: Move page from normal memory to secure memory. 457 + * 458 + * H_PAGE_IN_SHARED flag makes the page shared which means that the same 459 + * memory in is visible from both UV and HV. 460 + */ 461 + unsigned long 462 + kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, 463 + unsigned long flags, unsigned long page_shift) 464 + { 465 + bool downgrade = false; 466 + unsigned long start, end; 467 + struct vm_area_struct *vma; 468 + int srcu_idx; 469 + unsigned long gfn = gpa >> page_shift; 470 + int ret; 471 + 472 + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 473 + return H_UNSUPPORTED; 474 + 475 + if (page_shift != PAGE_SHIFT) 476 + return H_P3; 477 + 478 + if (flags & ~H_PAGE_IN_SHARED) 479 + return H_P2; 480 + 481 + if (flags & H_PAGE_IN_SHARED) 482 + return kvmppc_share_page(kvm, gpa, page_shift); 483 + 484 + ret = H_PARAMETER; 485 + srcu_idx = srcu_read_lock(&kvm->srcu); 486 + down_write(&kvm->mm->mmap_sem); 487 + 488 + start = gfn_to_hva(kvm, gfn); 489 + if (kvm_is_error_hva(start)) 490 + goto out; 491 + 492 + mutex_lock(&kvm->arch.uvmem_lock); 493 + /* Fail the page-in request of an already paged-in page */ 494 + if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL)) 495 + goto out_unlock; 496 + 497 + end = start + (1UL << page_shift); 498 + vma = find_vma_intersection(kvm->mm, start, end); 499 + if (!vma || vma->vm_start > start || vma->vm_end < end) 500 + goto out_unlock; 501 + 502 + if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, 503 + &downgrade)) 504 + ret = H_SUCCESS; 505 + out_unlock: 506 + mutex_unlock(&kvm->arch.uvmem_lock); 507 + out: 508 + if (downgrade) 509 + up_read(&kvm->mm->mmap_sem); 510 + else 511 + up_write(&kvm->mm->mmap_sem); 512 + srcu_read_unlock(&kvm->srcu, srcu_idx); 513 + return ret; 514 + } 515 + 516 + /* 517 + * Provision a new page on HV side and copy over the contents 518 + * from secure memory using UV_PAGE_OUT uvcall. 519 + */ 520 + static int 521 + kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, 522 + unsigned long end, unsigned long page_shift, 523 + struct kvm *kvm, unsigned long gpa) 524 + { 525 + unsigned long src_pfn, dst_pfn = 0; 526 + struct migrate_vma mig; 527 + struct page *dpage, *spage; 528 + struct kvmppc_uvmem_page_pvt *pvt; 529 + unsigned long pfn; 530 + int ret = U_SUCCESS; 531 + 532 + memset(&mig, 0, sizeof(mig)); 533 + mig.vma = vma; 534 + mig.start = start; 535 + mig.end = end; 536 + mig.src = &src_pfn; 537 + mig.dst = &dst_pfn; 538 + 539 + mutex_lock(&kvm->arch.uvmem_lock); 540 + /* The requested page is already paged-out, nothing to do */ 541 + if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) 542 + goto out; 543 + 544 + ret = migrate_vma_setup(&mig); 545 + if (ret) 546 + return ret; 547 + 548 + spage = migrate_pfn_to_page(*mig.src); 549 + if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) 550 + goto out_finalize; 551 + 552 + if (!is_zone_device_page(spage)) 553 + goto out_finalize; 554 + 555 + dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); 556 + if (!dpage) { 557 + ret = -1; 558 + goto out_finalize; 559 + } 560 + 561 + lock_page(dpage); 562 + pvt = spage->zone_device_data; 563 + pfn = page_to_pfn(dpage); 564 + 565 + /* 566 + * This function is used in two cases: 567 + * - When HV touches a secure page, for which we do UV_PAGE_OUT 568 + * - When a secure page is converted to shared page, we *get* 569 + * the page to essentially unmap the device page. In this 570 + * case we skip page-out. 571 + */ 572 + if (!pvt->skip_page_out) 573 + ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, 574 + gpa, 0, page_shift); 575 + 576 + if (ret == U_SUCCESS) 577 + *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; 578 + else { 579 + unlock_page(dpage); 580 + __free_page(dpage); 581 + goto out_finalize; 582 + } 583 + 584 + migrate_vma_pages(&mig); 585 + out_finalize: 586 + migrate_vma_finalize(&mig); 587 + out: 588 + mutex_unlock(&kvm->arch.uvmem_lock); 589 + return ret; 590 + } 591 + 592 + /* 593 + * Fault handler callback that gets called when HV touches any page that 594 + * has been moved to secure memory, we ask UV to give back the page by 595 + * issuing UV_PAGE_OUT uvcall. 596 + * 597 + * This eventually results in dropping of device PFN and the newly 598 + * provisioned page/PFN gets populated in QEMU page tables. 599 + */ 600 + static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf) 601 + { 602 + struct kvmppc_uvmem_page_pvt *pvt = vmf->page->zone_device_data; 603 + 604 + if (kvmppc_svm_page_out(vmf->vma, vmf->address, 605 + vmf->address + PAGE_SIZE, PAGE_SHIFT, 606 + pvt->kvm, pvt->gpa)) 607 + return VM_FAULT_SIGBUS; 608 + else 609 + return 0; 610 + } 611 + 612 + /* 613 + * Release the device PFN back to the pool 614 + * 615 + * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT. 616 + * Gets called with kvm->arch.uvmem_lock held. 617 + */ 618 + static void kvmppc_uvmem_page_free(struct page *page) 619 + { 620 + unsigned long pfn = page_to_pfn(page) - 621 + (kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT); 622 + struct kvmppc_uvmem_page_pvt *pvt; 623 + 624 + spin_lock(&kvmppc_uvmem_bitmap_lock); 625 + bitmap_clear(kvmppc_uvmem_bitmap, pfn, 1); 626 + spin_unlock(&kvmppc_uvmem_bitmap_lock); 627 + 628 + pvt = page->zone_device_data; 629 + page->zone_device_data = NULL; 630 + kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); 631 + kfree(pvt); 632 + } 633 + 634 + static const struct dev_pagemap_ops kvmppc_uvmem_ops = { 635 + .page_free = kvmppc_uvmem_page_free, 636 + .migrate_to_ram = kvmppc_uvmem_migrate_to_ram, 637 + }; 638 + 639 + /* 640 + * H_SVM_PAGE_OUT: Move page from secure memory to normal memory. 641 + */ 642 + unsigned long 643 + kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa, 644 + unsigned long flags, unsigned long page_shift) 645 + { 646 + unsigned long gfn = gpa >> page_shift; 647 + unsigned long start, end; 648 + struct vm_area_struct *vma; 649 + int srcu_idx; 650 + int ret; 651 + 652 + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 653 + return H_UNSUPPORTED; 654 + 655 + if (page_shift != PAGE_SHIFT) 656 + return H_P3; 657 + 658 + if (flags) 659 + return H_P2; 660 + 661 + ret = H_PARAMETER; 662 + srcu_idx = srcu_read_lock(&kvm->srcu); 663 + down_read(&kvm->mm->mmap_sem); 664 + start = gfn_to_hva(kvm, gfn); 665 + if (kvm_is_error_hva(start)) 666 + goto out; 667 + 668 + end = start + (1UL << page_shift); 669 + vma = find_vma_intersection(kvm->mm, start, end); 670 + if (!vma || vma->vm_start > start || vma->vm_end < end) 671 + goto out; 672 + 673 + if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa)) 674 + ret = H_SUCCESS; 675 + out: 676 + up_read(&kvm->mm->mmap_sem); 677 + srcu_read_unlock(&kvm->srcu, srcu_idx); 678 + return ret; 679 + } 680 + 681 + int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) 682 + { 683 + unsigned long pfn; 684 + int ret = U_SUCCESS; 685 + 686 + pfn = gfn_to_pfn(kvm, gfn); 687 + if (is_error_noslot_pfn(pfn)) 688 + return -EFAULT; 689 + 690 + mutex_lock(&kvm->arch.uvmem_lock); 691 + if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL)) 692 + goto out; 693 + 694 + ret = uv_page_in(kvm->arch.lpid, pfn << PAGE_SHIFT, gfn << PAGE_SHIFT, 695 + 0, PAGE_SHIFT); 696 + out: 697 + kvm_release_pfn_clean(pfn); 698 + mutex_unlock(&kvm->arch.uvmem_lock); 699 + return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT; 700 + } 701 + 702 + static u64 kvmppc_get_secmem_size(void) 703 + { 704 + struct device_node *np; 705 + int i, len; 706 + const __be32 *prop; 707 + u64 size = 0; 708 + 709 + np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware"); 710 + if (!np) 711 + goto out; 712 + 713 + prop = of_get_property(np, "secure-memory-ranges", &len); 714 + if (!prop) 715 + goto out_put; 716 + 717 + for (i = 0; i < len / (sizeof(*prop) * 4); i++) 718 + size += of_read_number(prop + (i * 4) + 2, 2); 719 + 720 + out_put: 721 + of_node_put(np); 722 + out: 723 + return size; 724 + } 725 + 726 + int kvmppc_uvmem_init(void) 727 + { 728 + int ret = 0; 729 + unsigned long size; 730 + struct resource *res; 731 + void *addr; 732 + unsigned long pfn_last, pfn_first; 733 + 734 + size = kvmppc_get_secmem_size(); 735 + if (!size) { 736 + /* 737 + * Don't fail the initialization of kvm-hv module if 738 + * the platform doesn't export ibm,uv-firmware node. 739 + * Let normal guests run on such PEF-disabled platform. 740 + */ 741 + pr_info("KVMPPC-UVMEM: No support for secure guests\n"); 742 + goto out; 743 + } 744 + 745 + res = request_free_mem_region(&iomem_resource, size, "kvmppc_uvmem"); 746 + if (IS_ERR(res)) { 747 + ret = PTR_ERR(res); 748 + goto out; 749 + } 750 + 751 + kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE; 752 + kvmppc_uvmem_pgmap.res = *res; 753 + kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops; 754 + addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE); 755 + if (IS_ERR(addr)) { 756 + ret = PTR_ERR(addr); 757 + goto out_free_region; 758 + } 759 + 760 + pfn_first = res->start >> PAGE_SHIFT; 761 + pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT); 762 + kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first), 763 + sizeof(unsigned long), GFP_KERNEL); 764 + if (!kvmppc_uvmem_bitmap) { 765 + ret = -ENOMEM; 766 + goto out_unmap; 767 + } 768 + 769 + pr_info("KVMPPC-UVMEM: Secure Memory size 0x%lx\n", size); 770 + return ret; 771 + out_unmap: 772 + memunmap_pages(&kvmppc_uvmem_pgmap); 773 + out_free_region: 774 + release_mem_region(res->start, size); 775 + out: 776 + return ret; 777 + } 778 + 779 + void kvmppc_uvmem_free(void) 780 + { 781 + memunmap_pages(&kvmppc_uvmem_pgmap); 782 + release_mem_region(kvmppc_uvmem_pgmap.res.start, 783 + resource_size(&kvmppc_uvmem_pgmap.res)); 784 + kfree(kvmppc_uvmem_bitmap); 785 + }
+12
arch/powerpc/kvm/powerpc.c
··· 31 31 #include <asm/hvcall.h> 32 32 #include <asm/plpar_wrappers.h> 33 33 #endif 34 + #include <asm/ultravisor.h> 35 + #include <asm/kvm_host.h> 34 36 35 37 #include "timing.h" 36 38 #include "irq.h" ··· 2413 2411 r = kvmppc_get_cpu_char(&cpuchar); 2414 2412 if (r >= 0 && copy_to_user(argp, &cpuchar, sizeof(cpuchar))) 2415 2413 r = -EFAULT; 2414 + break; 2415 + } 2416 + case KVM_PPC_SVM_OFF: { 2417 + struct kvm *kvm = filp->private_data; 2418 + 2419 + r = 0; 2420 + if (!kvm->arch.kvm_ops->svm_off) 2421 + goto out; 2422 + 2423 + r = kvm->arch.kvm_ops->svm_off(kvm); 2416 2424 break; 2417 2425 } 2418 2426 default: {
+1
include/uapi/linux/kvm.h
··· 1348 1348 #define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char) 1349 1349 /* Available with KVM_CAP_PMU_EVENT_FILTER */ 1350 1350 #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) 1351 + #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) 1351 1352 1352 1353 /* ioctl for vm fd */ 1353 1354 #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
+1
mm/ksm.c
··· 2478 2478 2479 2479 return 0; 2480 2480 } 2481 + EXPORT_SYMBOL_GPL(ksm_madvise); 2481 2482 2482 2483 int __ksm_enter(struct mm_struct *mm) 2483 2484 {