Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: PPC: Book3S HV: Support for running secure guests

A pseries guest can be run as secure guest on Ultravisor-enabled
POWER platforms. On such platforms, this driver will be used to manage
the movement of guest pages between the normal memory managed by
hypervisor (HV) and secure memory managed by Ultravisor (UV).

HV is informed about the guest's transition to secure mode via hcalls:

H_SVM_INIT_START: Initiate securing a VM
H_SVM_INIT_DONE: Conclude securing a VM

As part of H_SVM_INIT_START, register all existing memslots with
the UV. H_SVM_INIT_DONE call by UV informs HV that transition of
the guest to secure mode is complete.

These two states (transition to secure mode STARTED and transition
to secure mode COMPLETED) are recorded in kvm->arch.secure_guest.
Setting these states will cause the assembly code that enters the
guest to call the UV_RETURN ucall instead of trying to enter the
guest directly.

Migration of pages betwen normal and secure memory of secure
guest is implemented in H_SVM_PAGE_IN and H_SVM_PAGE_OUT hcalls.

H_SVM_PAGE_IN: Move the content of a normal page to secure page
H_SVM_PAGE_OUT: Move the content of a secure page to normal page

Private ZONE_DEVICE memory equal to the amount of secure memory
available in the platform for running secure guests is created.
Whenever a page belonging to the guest becomes secure, a page from
this private device memory is used to represent and track that secure
page on the HV side. The movement of pages between normal and secure
memory is done via migrate_vma_pages() using UV_PAGE_IN and
UV_PAGE_OUT ucalls.

In order to prevent the device private pages (that correspond to pages
of secure guest) from participating in KSM merging, H_SVM_PAGE_IN
calls ksm_madvise() under read version of mmap_sem. However
ksm_madvise() needs to be under write lock. Hence we call
kvmppc_svm_page_in with mmap_sem held for writing, and it then
downgrades to a read lock after calling ksm_madvise.

[paulus@ozlabs.org - roll in patch "KVM: PPC: Book3S HV: Take write
mmap_sem when calling ksm_madvise"]

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>

authored by

Bharata B Rao and committed by
Paul Mackerras
ca9f4942 33cf1707

+769
+6
arch/powerpc/include/asm/hvcall.h
··· 342 342 #define H_TLB_INVALIDATE 0xF808 343 343 #define H_COPY_TOFROM_GUEST 0xF80C 344 344 345 + /* Platform-specific hcalls used by the Ultravisor */ 346 + #define H_SVM_PAGE_IN 0xEF00 347 + #define H_SVM_PAGE_OUT 0xEF04 348 + #define H_SVM_INIT_START 0xEF08 349 + #define H_SVM_INIT_DONE 0xEF0C 350 + 345 351 /* Values for 2nd argument to H_SET_MODE */ 346 352 #define H_SET_MODE_RESOURCE_SET_CIABR 1 347 353 #define H_SET_MODE_RESOURCE_SET_DAWR 2
+62
arch/powerpc/include/asm/kvm_book3s_uvmem.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __ASM_KVM_BOOK3S_UVMEM_H__ 3 + #define __ASM_KVM_BOOK3S_UVMEM_H__ 4 + 5 + #ifdef CONFIG_PPC_UV 6 + int kvmppc_uvmem_init(void); 7 + void kvmppc_uvmem_free(void); 8 + int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot); 9 + void kvmppc_uvmem_slot_free(struct kvm *kvm, 10 + const struct kvm_memory_slot *slot); 11 + unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, 12 + unsigned long gra, 13 + unsigned long flags, 14 + unsigned long page_shift); 15 + unsigned long kvmppc_h_svm_page_out(struct kvm *kvm, 16 + unsigned long gra, 17 + unsigned long flags, 18 + unsigned long page_shift); 19 + unsigned long kvmppc_h_svm_init_start(struct kvm *kvm); 20 + unsigned long kvmppc_h_svm_init_done(struct kvm *kvm); 21 + #else 22 + static inline int kvmppc_uvmem_init(void) 23 + { 24 + return 0; 25 + } 26 + 27 + static inline void kvmppc_uvmem_free(void) { } 28 + 29 + static inline int 30 + kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) 31 + { 32 + return 0; 33 + } 34 + 35 + static inline void 36 + kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) { } 37 + 38 + static inline unsigned long 39 + kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra, 40 + unsigned long flags, unsigned long page_shift) 41 + { 42 + return H_UNSUPPORTED; 43 + } 44 + 45 + static inline unsigned long 46 + kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra, 47 + unsigned long flags, unsigned long page_shift) 48 + { 49 + return H_UNSUPPORTED; 50 + } 51 + 52 + static inline unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) 53 + { 54 + return H_UNSUPPORTED; 55 + } 56 + 57 + static inline unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) 58 + { 59 + return H_UNSUPPORTED; 60 + } 61 + #endif /* CONFIG_PPC_UV */ 62 + #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
+6
arch/powerpc/include/asm/kvm_host.h
··· 275 275 276 276 struct kvm_resize_hpt; 277 277 278 + /* Flag values for kvm_arch.secure_guest */ 279 + #define KVMPPC_SECURE_INIT_START 0x1 /* H_SVM_INIT_START has been called */ 280 + #define KVMPPC_SECURE_INIT_DONE 0x2 /* H_SVM_INIT_DONE completed */ 281 + 278 282 struct kvm_arch { 279 283 unsigned int lpid; 280 284 unsigned int smt_mode; /* # vcpus per virtual core */ ··· 334 330 #endif 335 331 struct kvmppc_ops *kvm_ops; 336 332 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 333 + struct mutex uvmem_lock; 334 + struct list_head uvmem_pfns; 337 335 struct mutex mmu_setup_lock; /* nests inside vcpu mutexes */ 338 336 u64 l1_ptcr; 339 337 int max_nested_lpid;
+3
arch/powerpc/include/asm/ultravisor-api.h
··· 26 26 #define UV_WRITE_PATE 0xF104 27 27 #define UV_RETURN 0xF11C 28 28 #define UV_ESM 0xF110 29 + #define UV_REGISTER_MEM_SLOT 0xF120 30 + #define UV_PAGE_IN 0xF128 31 + #define UV_PAGE_OUT 0xF12C 29 32 #define UV_SHARE_PAGE 0xF130 30 33 #define UV_UNSHARE_PAGE 0xF134 31 34 #define UV_UNSHARE_ALL_PAGES 0xF140
+21
arch/powerpc/include/asm/ultravisor.h
··· 46 46 return ucall_norets(UV_UNSHARE_ALL_PAGES); 47 47 } 48 48 49 + static inline int uv_page_in(u64 lpid, u64 src_ra, u64 dst_gpa, u64 flags, 50 + u64 page_shift) 51 + { 52 + return ucall_norets(UV_PAGE_IN, lpid, src_ra, dst_gpa, flags, 53 + page_shift); 54 + } 55 + 56 + static inline int uv_page_out(u64 lpid, u64 dst_ra, u64 src_gpa, u64 flags, 57 + u64 page_shift) 58 + { 59 + return ucall_norets(UV_PAGE_OUT, lpid, dst_ra, src_gpa, flags, 60 + page_shift); 61 + } 62 + 63 + static inline int uv_register_mem_slot(u64 lpid, u64 start_gpa, u64 size, 64 + u64 flags, u64 slotid) 65 + { 66 + return ucall_norets(UV_REGISTER_MEM_SLOT, lpid, start_gpa, 67 + size, flags, slotid); 68 + } 69 + 49 70 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
+3
arch/powerpc/kvm/Makefile
··· 71 71 book3s_64_mmu_radix.o \ 72 72 book3s_hv_nested.o 73 73 74 + kvm-hv-$(CONFIG_PPC_UV) += \ 75 + book3s_hv_uvmem.o 76 + 74 77 kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ 75 78 book3s_hv_tm.o 76 79
+29
arch/powerpc/kvm/book3s_hv.c
··· 72 72 #include <asm/xics.h> 73 73 #include <asm/xive.h> 74 74 #include <asm/hw_breakpoint.h> 75 + #include <asm/kvm_host.h> 76 + #include <asm/kvm_book3s_uvmem.h> 75 77 76 78 #include "book3s.h" 77 79 ··· 1072 1070 kvmppc_get_gpr(vcpu, 5), 1073 1071 kvmppc_get_gpr(vcpu, 6)); 1074 1072 break; 1073 + case H_SVM_PAGE_IN: 1074 + ret = kvmppc_h_svm_page_in(vcpu->kvm, 1075 + kvmppc_get_gpr(vcpu, 4), 1076 + kvmppc_get_gpr(vcpu, 5), 1077 + kvmppc_get_gpr(vcpu, 6)); 1078 + break; 1079 + case H_SVM_PAGE_OUT: 1080 + ret = kvmppc_h_svm_page_out(vcpu->kvm, 1081 + kvmppc_get_gpr(vcpu, 4), 1082 + kvmppc_get_gpr(vcpu, 5), 1083 + kvmppc_get_gpr(vcpu, 6)); 1084 + break; 1085 + case H_SVM_INIT_START: 1086 + ret = kvmppc_h_svm_init_start(vcpu->kvm); 1087 + break; 1088 + case H_SVM_INIT_DONE: 1089 + ret = kvmppc_h_svm_init_done(vcpu->kvm); 1090 + break; 1091 + 1075 1092 default: 1076 1093 return RESUME_HOST; 1077 1094 } ··· 4788 4767 char buf[32]; 4789 4768 int ret; 4790 4769 4770 + mutex_init(&kvm->arch.uvmem_lock); 4771 + INIT_LIST_HEAD(&kvm->arch.uvmem_pfns); 4791 4772 mutex_init(&kvm->arch.mmu_setup_lock); 4792 4773 4793 4774 /* Allocate the guest's logical partition ID */ ··· 4961 4938 kvm->arch.process_table = 0; 4962 4939 kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0); 4963 4940 } 4941 + 4964 4942 kvmppc_free_lpid(kvm->arch.lpid); 4965 4943 4966 4944 kvmppc_free_pimap(kvm); ··· 5552 5528 no_mixing_hpt_and_radix = true; 5553 5529 } 5554 5530 5531 + r = kvmppc_uvmem_init(); 5532 + if (r < 0) 5533 + pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r); 5534 + 5555 5535 return r; 5556 5536 } 5557 5537 5558 5538 static void kvmppc_book3s_exit_hv(void) 5559 5539 { 5540 + kvmppc_uvmem_free(); 5560 5541 kvmppc_free_host_rm_ops(); 5561 5542 if (kvmppc_radix_possible()) 5562 5543 kvmppc_radix_exit();
+639
arch/powerpc/kvm/book3s_hv_uvmem.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Secure pages management: Migration of pages between normal and secure 4 + * memory of KVM guests. 5 + * 6 + * Copyright 2018 Bharata B Rao, IBM Corp. <bharata@linux.ibm.com> 7 + */ 8 + 9 + /* 10 + * A pseries guest can be run as secure guest on Ultravisor-enabled 11 + * POWER platforms. On such platforms, this driver will be used to manage 12 + * the movement of guest pages between the normal memory managed by 13 + * hypervisor (HV) and secure memory managed by Ultravisor (UV). 14 + * 15 + * The page-in or page-out requests from UV will come to HV as hcalls and 16 + * HV will call back into UV via ultracalls to satisfy these page requests. 17 + * 18 + * Private ZONE_DEVICE memory equal to the amount of secure memory 19 + * available in the platform for running secure guests is hotplugged. 20 + * Whenever a page belonging to the guest becomes secure, a page from this 21 + * private device memory is used to represent and track that secure page 22 + * on the HV side. 23 + */ 24 + 25 + /* 26 + * Notes on locking 27 + * 28 + * kvm->arch.uvmem_lock is a per-guest lock that prevents concurrent 29 + * page-in and page-out requests for the same GPA. Concurrent accesses 30 + * can either come via UV (guest vCPUs requesting for same page) 31 + * or when HV and guest simultaneously access the same page. 32 + * This mutex serializes the migration of page from HV(normal) to 33 + * UV(secure) and vice versa. So the serialization points are around 34 + * migrate_vma routines and page-in/out routines. 35 + * 36 + * Per-guest mutex comes with a cost though. Mainly it serializes the 37 + * fault path as page-out can occur when HV faults on accessing secure 38 + * guest pages. Currently UV issues page-in requests for all the guest 39 + * PFNs one at a time during early boot (UV_ESM uvcall), so this is 40 + * not a cause for concern. Also currently the number of page-outs caused 41 + * by HV touching secure pages is very very low. If an when UV supports 42 + * overcommitting, then we might see concurrent guest driven page-outs. 43 + * 44 + * Locking order 45 + * 46 + * 1. kvm->srcu - Protects KVM memslots 47 + * 2. kvm->mm->mmap_sem - find_vma, migrate_vma_pages and helpers, ksm_madvise 48 + * 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting 49 + * as sync-points for page-in/out 50 + */ 51 + 52 + /* 53 + * Notes on page size 54 + * 55 + * Currently UV uses 2MB mappings internally, but will issue H_SVM_PAGE_IN 56 + * and H_SVM_PAGE_OUT hcalls in PAGE_SIZE(64K) granularity. HV tracks 57 + * secure GPAs at 64K page size and maintains one device PFN for each 58 + * 64K secure GPA. UV_PAGE_IN and UV_PAGE_OUT calls by HV are also issued 59 + * for 64K page at a time. 60 + * 61 + * HV faulting on secure pages: When HV touches any secure page, it 62 + * faults and issues a UV_PAGE_OUT request with 64K page size. Currently 63 + * UV splits and remaps the 2MB page if necessary and copies out the 64 + * required 64K page contents. 65 + * 66 + * In summary, the current secure pages handling code in HV assumes 67 + * 64K page size and in fact fails any page-in/page-out requests of 68 + * non-64K size upfront. If and when UV starts supporting multiple 69 + * page-sizes, we need to break this assumption. 70 + */ 71 + 72 + #include <linux/pagemap.h> 73 + #include <linux/migrate.h> 74 + #include <linux/kvm_host.h> 75 + #include <linux/ksm.h> 76 + #include <asm/ultravisor.h> 77 + #include <asm/mman.h> 78 + #include <asm/kvm_ppc.h> 79 + 80 + static struct dev_pagemap kvmppc_uvmem_pgmap; 81 + static unsigned long *kvmppc_uvmem_bitmap; 82 + static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock); 83 + 84 + #define KVMPPC_UVMEM_PFN (1UL << 63) 85 + 86 + struct kvmppc_uvmem_slot { 87 + struct list_head list; 88 + unsigned long nr_pfns; 89 + unsigned long base_pfn; 90 + unsigned long *pfns; 91 + }; 92 + 93 + struct kvmppc_uvmem_page_pvt { 94 + struct kvm *kvm; 95 + unsigned long gpa; 96 + }; 97 + 98 + int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) 99 + { 100 + struct kvmppc_uvmem_slot *p; 101 + 102 + p = kzalloc(sizeof(*p), GFP_KERNEL); 103 + if (!p) 104 + return -ENOMEM; 105 + p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns))); 106 + if (!p->pfns) { 107 + kfree(p); 108 + return -ENOMEM; 109 + } 110 + p->nr_pfns = slot->npages; 111 + p->base_pfn = slot->base_gfn; 112 + 113 + mutex_lock(&kvm->arch.uvmem_lock); 114 + list_add(&p->list, &kvm->arch.uvmem_pfns); 115 + mutex_unlock(&kvm->arch.uvmem_lock); 116 + 117 + return 0; 118 + } 119 + 120 + /* 121 + * All device PFNs are already released by the time we come here. 122 + */ 123 + void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) 124 + { 125 + struct kvmppc_uvmem_slot *p, *next; 126 + 127 + mutex_lock(&kvm->arch.uvmem_lock); 128 + list_for_each_entry_safe(p, next, &kvm->arch.uvmem_pfns, list) { 129 + if (p->base_pfn == slot->base_gfn) { 130 + vfree(p->pfns); 131 + list_del(&p->list); 132 + kfree(p); 133 + break; 134 + } 135 + } 136 + mutex_unlock(&kvm->arch.uvmem_lock); 137 + } 138 + 139 + static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn, 140 + struct kvm *kvm) 141 + { 142 + struct kvmppc_uvmem_slot *p; 143 + 144 + list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { 145 + if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { 146 + unsigned long index = gfn - p->base_pfn; 147 + 148 + p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN; 149 + return; 150 + } 151 + } 152 + } 153 + 154 + static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm) 155 + { 156 + struct kvmppc_uvmem_slot *p; 157 + 158 + list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { 159 + if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { 160 + p->pfns[gfn - p->base_pfn] = 0; 161 + return; 162 + } 163 + } 164 + } 165 + 166 + static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, 167 + unsigned long *uvmem_pfn) 168 + { 169 + struct kvmppc_uvmem_slot *p; 170 + 171 + list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { 172 + if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { 173 + unsigned long index = gfn - p->base_pfn; 174 + 175 + if (p->pfns[index] & KVMPPC_UVMEM_PFN) { 176 + if (uvmem_pfn) 177 + *uvmem_pfn = p->pfns[index] & 178 + ~KVMPPC_UVMEM_PFN; 179 + return true; 180 + } else 181 + return false; 182 + } 183 + } 184 + return false; 185 + } 186 + 187 + unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) 188 + { 189 + struct kvm_memslots *slots; 190 + struct kvm_memory_slot *memslot; 191 + int ret = H_SUCCESS; 192 + int srcu_idx; 193 + 194 + if (!kvmppc_uvmem_bitmap) 195 + return H_UNSUPPORTED; 196 + 197 + /* Only radix guests can be secure guests */ 198 + if (!kvm_is_radix(kvm)) 199 + return H_UNSUPPORTED; 200 + 201 + srcu_idx = srcu_read_lock(&kvm->srcu); 202 + slots = kvm_memslots(kvm); 203 + kvm_for_each_memslot(memslot, slots) { 204 + if (kvmppc_uvmem_slot_init(kvm, memslot)) { 205 + ret = H_PARAMETER; 206 + goto out; 207 + } 208 + ret = uv_register_mem_slot(kvm->arch.lpid, 209 + memslot->base_gfn << PAGE_SHIFT, 210 + memslot->npages * PAGE_SIZE, 211 + 0, memslot->id); 212 + if (ret < 0) { 213 + kvmppc_uvmem_slot_free(kvm, memslot); 214 + ret = H_PARAMETER; 215 + goto out; 216 + } 217 + } 218 + kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_START; 219 + out: 220 + srcu_read_unlock(&kvm->srcu, srcu_idx); 221 + return ret; 222 + } 223 + 224 + unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) 225 + { 226 + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 227 + return H_UNSUPPORTED; 228 + 229 + kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; 230 + pr_info("LPID %d went secure\n", kvm->arch.lpid); 231 + return H_SUCCESS; 232 + } 233 + 234 + /* 235 + * Get a free device PFN from the pool 236 + * 237 + * Called when a normal page is moved to secure memory (UV_PAGE_IN). Device 238 + * PFN will be used to keep track of the secure page on HV side. 239 + * 240 + * Called with kvm->arch.uvmem_lock held 241 + */ 242 + static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm) 243 + { 244 + struct page *dpage = NULL; 245 + unsigned long bit, uvmem_pfn; 246 + struct kvmppc_uvmem_page_pvt *pvt; 247 + unsigned long pfn_last, pfn_first; 248 + 249 + pfn_first = kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT; 250 + pfn_last = pfn_first + 251 + (resource_size(&kvmppc_uvmem_pgmap.res) >> PAGE_SHIFT); 252 + 253 + spin_lock(&kvmppc_uvmem_bitmap_lock); 254 + bit = find_first_zero_bit(kvmppc_uvmem_bitmap, 255 + pfn_last - pfn_first); 256 + if (bit >= (pfn_last - pfn_first)) 257 + goto out; 258 + bitmap_set(kvmppc_uvmem_bitmap, bit, 1); 259 + spin_unlock(&kvmppc_uvmem_bitmap_lock); 260 + 261 + pvt = kzalloc(sizeof(*pvt), GFP_KERNEL); 262 + if (!pvt) 263 + goto out_clear; 264 + 265 + uvmem_pfn = bit + pfn_first; 266 + kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); 267 + 268 + pvt->gpa = gpa; 269 + pvt->kvm = kvm; 270 + 271 + dpage = pfn_to_page(uvmem_pfn); 272 + dpage->zone_device_data = pvt; 273 + get_page(dpage); 274 + lock_page(dpage); 275 + return dpage; 276 + out_clear: 277 + spin_lock(&kvmppc_uvmem_bitmap_lock); 278 + bitmap_clear(kvmppc_uvmem_bitmap, bit, 1); 279 + out: 280 + spin_unlock(&kvmppc_uvmem_bitmap_lock); 281 + return NULL; 282 + } 283 + 284 + /* 285 + * Alloc a PFN from private device memory pool and copy page from normal 286 + * memory to secure memory using UV_PAGE_IN uvcall. 287 + */ 288 + static int 289 + kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, 290 + unsigned long end, unsigned long gpa, struct kvm *kvm, 291 + unsigned long page_shift, bool *downgrade) 292 + { 293 + unsigned long src_pfn, dst_pfn = 0; 294 + struct migrate_vma mig; 295 + struct page *spage; 296 + unsigned long pfn; 297 + struct page *dpage; 298 + int ret = 0; 299 + 300 + memset(&mig, 0, sizeof(mig)); 301 + mig.vma = vma; 302 + mig.start = start; 303 + mig.end = end; 304 + mig.src = &src_pfn; 305 + mig.dst = &dst_pfn; 306 + 307 + /* 308 + * We come here with mmap_sem write lock held just for 309 + * ksm_madvise(), otherwise we only need read mmap_sem. 310 + * Hence downgrade to read lock once ksm_madvise() is done. 311 + */ 312 + ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, 313 + MADV_UNMERGEABLE, &vma->vm_flags); 314 + downgrade_write(&kvm->mm->mmap_sem); 315 + *downgrade = true; 316 + if (ret) 317 + return ret; 318 + 319 + ret = migrate_vma_setup(&mig); 320 + if (ret) 321 + return ret; 322 + 323 + if (!(*mig.src & MIGRATE_PFN_MIGRATE)) { 324 + ret = -1; 325 + goto out_finalize; 326 + } 327 + 328 + dpage = kvmppc_uvmem_get_page(gpa, kvm); 329 + if (!dpage) { 330 + ret = -1; 331 + goto out_finalize; 332 + } 333 + 334 + pfn = *mig.src >> MIGRATE_PFN_SHIFT; 335 + spage = migrate_pfn_to_page(*mig.src); 336 + if (spage) 337 + uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, 338 + page_shift); 339 + 340 + *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; 341 + migrate_vma_pages(&mig); 342 + out_finalize: 343 + migrate_vma_finalize(&mig); 344 + return ret; 345 + } 346 + 347 + /* 348 + * H_SVM_PAGE_IN: Move page from normal memory to secure memory. 349 + */ 350 + unsigned long 351 + kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, 352 + unsigned long flags, unsigned long page_shift) 353 + { 354 + bool downgrade = false; 355 + unsigned long start, end; 356 + struct vm_area_struct *vma; 357 + int srcu_idx; 358 + unsigned long gfn = gpa >> page_shift; 359 + int ret; 360 + 361 + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 362 + return H_UNSUPPORTED; 363 + 364 + if (page_shift != PAGE_SHIFT) 365 + return H_P3; 366 + 367 + if (flags) 368 + return H_P2; 369 + 370 + ret = H_PARAMETER; 371 + srcu_idx = srcu_read_lock(&kvm->srcu); 372 + down_write(&kvm->mm->mmap_sem); 373 + 374 + start = gfn_to_hva(kvm, gfn); 375 + if (kvm_is_error_hva(start)) 376 + goto out; 377 + 378 + mutex_lock(&kvm->arch.uvmem_lock); 379 + /* Fail the page-in request of an already paged-in page */ 380 + if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL)) 381 + goto out_unlock; 382 + 383 + end = start + (1UL << page_shift); 384 + vma = find_vma_intersection(kvm->mm, start, end); 385 + if (!vma || vma->vm_start > start || vma->vm_end < end) 386 + goto out_unlock; 387 + 388 + if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, 389 + &downgrade)) 390 + ret = H_SUCCESS; 391 + out_unlock: 392 + mutex_unlock(&kvm->arch.uvmem_lock); 393 + out: 394 + if (downgrade) 395 + up_read(&kvm->mm->mmap_sem); 396 + else 397 + up_write(&kvm->mm->mmap_sem); 398 + srcu_read_unlock(&kvm->srcu, srcu_idx); 399 + return ret; 400 + } 401 + 402 + /* 403 + * Provision a new page on HV side and copy over the contents 404 + * from secure memory using UV_PAGE_OUT uvcall. 405 + */ 406 + static int 407 + kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, 408 + unsigned long end, unsigned long page_shift, 409 + struct kvm *kvm, unsigned long gpa) 410 + { 411 + unsigned long src_pfn, dst_pfn = 0; 412 + struct migrate_vma mig; 413 + struct page *dpage, *spage; 414 + unsigned long pfn; 415 + int ret = U_SUCCESS; 416 + 417 + memset(&mig, 0, sizeof(mig)); 418 + mig.vma = vma; 419 + mig.start = start; 420 + mig.end = end; 421 + mig.src = &src_pfn; 422 + mig.dst = &dst_pfn; 423 + 424 + mutex_lock(&kvm->arch.uvmem_lock); 425 + /* The requested page is already paged-out, nothing to do */ 426 + if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) 427 + goto out; 428 + 429 + ret = migrate_vma_setup(&mig); 430 + if (ret) 431 + return ret; 432 + 433 + spage = migrate_pfn_to_page(*mig.src); 434 + if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) 435 + goto out_finalize; 436 + 437 + if (!is_zone_device_page(spage)) 438 + goto out_finalize; 439 + 440 + dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); 441 + if (!dpage) { 442 + ret = -1; 443 + goto out_finalize; 444 + } 445 + 446 + lock_page(dpage); 447 + pfn = page_to_pfn(dpage); 448 + 449 + ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, 450 + gpa, 0, page_shift); 451 + if (ret == U_SUCCESS) 452 + *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; 453 + else { 454 + unlock_page(dpage); 455 + __free_page(dpage); 456 + goto out_finalize; 457 + } 458 + 459 + migrate_vma_pages(&mig); 460 + out_finalize: 461 + migrate_vma_finalize(&mig); 462 + out: 463 + mutex_unlock(&kvm->arch.uvmem_lock); 464 + return ret; 465 + } 466 + 467 + /* 468 + * Fault handler callback that gets called when HV touches any page that 469 + * has been moved to secure memory, we ask UV to give back the page by 470 + * issuing UV_PAGE_OUT uvcall. 471 + * 472 + * This eventually results in dropping of device PFN and the newly 473 + * provisioned page/PFN gets populated in QEMU page tables. 474 + */ 475 + static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf) 476 + { 477 + struct kvmppc_uvmem_page_pvt *pvt = vmf->page->zone_device_data; 478 + 479 + if (kvmppc_svm_page_out(vmf->vma, vmf->address, 480 + vmf->address + PAGE_SIZE, PAGE_SHIFT, 481 + pvt->kvm, pvt->gpa)) 482 + return VM_FAULT_SIGBUS; 483 + else 484 + return 0; 485 + } 486 + 487 + /* 488 + * Release the device PFN back to the pool 489 + * 490 + * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT. 491 + * Gets called with kvm->arch.uvmem_lock held. 492 + */ 493 + static void kvmppc_uvmem_page_free(struct page *page) 494 + { 495 + unsigned long pfn = page_to_pfn(page) - 496 + (kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT); 497 + struct kvmppc_uvmem_page_pvt *pvt; 498 + 499 + spin_lock(&kvmppc_uvmem_bitmap_lock); 500 + bitmap_clear(kvmppc_uvmem_bitmap, pfn, 1); 501 + spin_unlock(&kvmppc_uvmem_bitmap_lock); 502 + 503 + pvt = page->zone_device_data; 504 + page->zone_device_data = NULL; 505 + kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); 506 + kfree(pvt); 507 + } 508 + 509 + static const struct dev_pagemap_ops kvmppc_uvmem_ops = { 510 + .page_free = kvmppc_uvmem_page_free, 511 + .migrate_to_ram = kvmppc_uvmem_migrate_to_ram, 512 + }; 513 + 514 + /* 515 + * H_SVM_PAGE_OUT: Move page from secure memory to normal memory. 516 + */ 517 + unsigned long 518 + kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa, 519 + unsigned long flags, unsigned long page_shift) 520 + { 521 + unsigned long gfn = gpa >> page_shift; 522 + unsigned long start, end; 523 + struct vm_area_struct *vma; 524 + int srcu_idx; 525 + int ret; 526 + 527 + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 528 + return H_UNSUPPORTED; 529 + 530 + if (page_shift != PAGE_SHIFT) 531 + return H_P3; 532 + 533 + if (flags) 534 + return H_P2; 535 + 536 + ret = H_PARAMETER; 537 + srcu_idx = srcu_read_lock(&kvm->srcu); 538 + down_read(&kvm->mm->mmap_sem); 539 + start = gfn_to_hva(kvm, gfn); 540 + if (kvm_is_error_hva(start)) 541 + goto out; 542 + 543 + end = start + (1UL << page_shift); 544 + vma = find_vma_intersection(kvm->mm, start, end); 545 + if (!vma || vma->vm_start > start || vma->vm_end < end) 546 + goto out; 547 + 548 + if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa)) 549 + ret = H_SUCCESS; 550 + out: 551 + up_read(&kvm->mm->mmap_sem); 552 + srcu_read_unlock(&kvm->srcu, srcu_idx); 553 + return ret; 554 + } 555 + 556 + static u64 kvmppc_get_secmem_size(void) 557 + { 558 + struct device_node *np; 559 + int i, len; 560 + const __be32 *prop; 561 + u64 size = 0; 562 + 563 + np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware"); 564 + if (!np) 565 + goto out; 566 + 567 + prop = of_get_property(np, "secure-memory-ranges", &len); 568 + if (!prop) 569 + goto out_put; 570 + 571 + for (i = 0; i < len / (sizeof(*prop) * 4); i++) 572 + size += of_read_number(prop + (i * 4) + 2, 2); 573 + 574 + out_put: 575 + of_node_put(np); 576 + out: 577 + return size; 578 + } 579 + 580 + int kvmppc_uvmem_init(void) 581 + { 582 + int ret = 0; 583 + unsigned long size; 584 + struct resource *res; 585 + void *addr; 586 + unsigned long pfn_last, pfn_first; 587 + 588 + size = kvmppc_get_secmem_size(); 589 + if (!size) { 590 + /* 591 + * Don't fail the initialization of kvm-hv module if 592 + * the platform doesn't export ibm,uv-firmware node. 593 + * Let normal guests run on such PEF-disabled platform. 594 + */ 595 + pr_info("KVMPPC-UVMEM: No support for secure guests\n"); 596 + goto out; 597 + } 598 + 599 + res = request_free_mem_region(&iomem_resource, size, "kvmppc_uvmem"); 600 + if (IS_ERR(res)) { 601 + ret = PTR_ERR(res); 602 + goto out; 603 + } 604 + 605 + kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE; 606 + kvmppc_uvmem_pgmap.res = *res; 607 + kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops; 608 + addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE); 609 + if (IS_ERR(addr)) { 610 + ret = PTR_ERR(addr); 611 + goto out_free_region; 612 + } 613 + 614 + pfn_first = res->start >> PAGE_SHIFT; 615 + pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT); 616 + kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first), 617 + sizeof(unsigned long), GFP_KERNEL); 618 + if (!kvmppc_uvmem_bitmap) { 619 + ret = -ENOMEM; 620 + goto out_unmap; 621 + } 622 + 623 + pr_info("KVMPPC-UVMEM: Secure Memory size 0x%lx\n", size); 624 + return ret; 625 + out_unmap: 626 + memunmap_pages(&kvmppc_uvmem_pgmap); 627 + out_free_region: 628 + release_mem_region(res->start, size); 629 + out: 630 + return ret; 631 + } 632 + 633 + void kvmppc_uvmem_free(void) 634 + { 635 + memunmap_pages(&kvmppc_uvmem_pgmap); 636 + release_mem_region(kvmppc_uvmem_pgmap.res.start, 637 + resource_size(&kvmppc_uvmem_pgmap.res)); 638 + kfree(kvmppc_uvmem_bitmap); 639 + }