Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: PPC: Add support for multiple-TCE hcalls

This adds real and virtual mode handlers for the H_PUT_TCE_INDIRECT and
H_STUFF_TCE hypercalls for user space emulated devices such as IBMVIO
devices or emulated PCI. These calls allow adding multiple entries
(up to 512) into the TCE table in one call which saves time on
transition between kernel and user space.

The current implementation of kvmppc_h_stuff_tce() allows it to be
executed in both real and virtual modes so there is one helper.
The kvmppc_rm_h_put_tce_indirect() needs to translate the guest address
to the host address and since the translation is different, there are
2 helpers - one for each mode.

This implements the KVM_CAP_PPC_MULTITCE capability. When present,
the kernel will try handling H_PUT_TCE_INDIRECT and H_STUFF_TCE if these
are enabled by the userspace via KVM_CAP_PPC_ENABLE_HCALL.
If they can not be handled by the kernel, they are passed on to
the user space. The user space still has to have an implementation
for these.

Both HV and PR-syle KVM are supported.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>

authored by

Alexey Kardashevskiy and committed by
Paul Mackerras
d3695aa4 5ee7af18

+306 -9
+25
Documentation/virtual/kvm/api.txt
··· 3035 3035 3036 3036 Queues an SMI on the thread's vcpu. 3037 3037 3038 + 4.97 KVM_CAP_PPC_MULTITCE 3039 + 3040 + Capability: KVM_CAP_PPC_MULTITCE 3041 + Architectures: ppc 3042 + Type: vm 3043 + 3044 + This capability means the kernel is capable of handling hypercalls 3045 + H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user 3046 + space. This significantly accelerates DMA operations for PPC KVM guests. 3047 + User space should expect that its handlers for these hypercalls 3048 + are not going to be called if user space previously registered LIOBN 3049 + in KVM (via KVM_CREATE_SPAPR_TCE or similar calls). 3050 + 3051 + In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest, 3052 + user space might have to advertise it for the guest. For example, 3053 + IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is 3054 + present in the "ibm,hypertas-functions" device-tree property. 3055 + 3056 + The hypercalls mentioned above may or may not be processed successfully 3057 + in the kernel based fast path. If they can not be handled by the kernel, 3058 + they will get passed on to user space. So user space still has to have 3059 + an implementation for these despite the in kernel acceleration. 3060 + 3061 + This capability is always enabled. 3062 + 3038 3063 5. The kvm_run structure 3039 3064 ------------------------ 3040 3065
+12
arch/powerpc/include/asm/kvm_ppc.h
··· 166 166 167 167 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 168 168 struct kvm_create_spapr_tce *args); 169 + extern struct kvmppc_spapr_tce_table *kvmppc_find_table( 170 + struct kvm_vcpu *vcpu, unsigned long liobn); 169 171 extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt, 170 172 unsigned long ioba, unsigned long npages); 171 173 extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt, 172 174 unsigned long tce); 175 + extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, 176 + unsigned long *ua, unsigned long **prmap); 177 + extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt, 178 + unsigned long idx, unsigned long tce); 173 179 extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 174 180 unsigned long ioba, unsigned long tce); 181 + extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, 182 + unsigned long liobn, unsigned long ioba, 183 + unsigned long tce_list, unsigned long npages); 184 + extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, 185 + unsigned long liobn, unsigned long ioba, 186 + unsigned long tce_value, unsigned long npages); 175 187 extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 176 188 unsigned long ioba); 177 189 extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
+58 -2
arch/powerpc/kvm/book3s_64_vio.c
··· 14 14 * 15 15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 16 16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> 17 + * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com> 17 18 */ 18 19 19 20 #include <linux/types.h> ··· 38 37 #include <asm/kvm_host.h> 39 38 #include <asm/udbg.h> 40 39 #include <asm/iommu.h> 41 - 42 - #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) 40 + #include <asm/tce.h> 43 41 44 42 static unsigned long kvmppc_tce_pages(unsigned long window_size) 45 43 { ··· 204 204 } 205 205 return ret; 206 206 } 207 + 208 + long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, 209 + unsigned long liobn, unsigned long ioba, 210 + unsigned long tce_list, unsigned long npages) 211 + { 212 + struct kvmppc_spapr_tce_table *stt; 213 + long i, ret = H_SUCCESS, idx; 214 + unsigned long entry, ua = 0; 215 + u64 __user *tces, tce; 216 + 217 + stt = kvmppc_find_table(vcpu, liobn); 218 + if (!stt) 219 + return H_TOO_HARD; 220 + 221 + entry = ioba >> IOMMU_PAGE_SHIFT_4K; 222 + /* 223 + * SPAPR spec says that the maximum size of the list is 512 TCEs 224 + * so the whole table fits in 4K page 225 + */ 226 + if (npages > 512) 227 + return H_PARAMETER; 228 + 229 + if (tce_list & (SZ_4K - 1)) 230 + return H_PARAMETER; 231 + 232 + ret = kvmppc_ioba_validate(stt, ioba, npages); 233 + if (ret != H_SUCCESS) 234 + return ret; 235 + 236 + idx = srcu_read_lock(&vcpu->kvm->srcu); 237 + if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { 238 + ret = H_TOO_HARD; 239 + goto unlock_exit; 240 + } 241 + tces = (u64 __user *) ua; 242 + 243 + for (i = 0; i < npages; ++i) { 244 + if (get_user(tce, tces + i)) { 245 + ret = H_TOO_HARD; 246 + goto unlock_exit; 247 + } 248 + tce = be64_to_cpu(tce); 249 + 250 + ret = kvmppc_tce_validate(stt, tce); 251 + if (ret != H_SUCCESS) 252 + goto unlock_exit; 253 + 254 + kvmppc_tce_put(stt, entry + i, tce); 255 + } 256 + 257 + unlock_exit: 258 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 259 + 260 + return ret; 261 + } 262 + EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);
+146 -4
arch/powerpc/kvm/book3s_64_vio_hv.c
··· 14 14 * 15 15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 16 16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> 17 + * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com> 17 18 */ 18 19 19 20 #include <linux/types.h> ··· 31 30 #include <asm/kvm_ppc.h> 32 31 #include <asm/kvm_book3s.h> 33 32 #include <asm/mmu-hash64.h> 33 + #include <asm/mmu_context.h> 34 34 #include <asm/hvcall.h> 35 35 #include <asm/synch.h> 36 36 #include <asm/ppc-opcode.h> ··· 39 37 #include <asm/udbg.h> 40 38 #include <asm/iommu.h> 41 39 #include <asm/tce.h> 40 + #include <asm/iommu.h> 42 41 43 42 #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) 44 43 ··· 49 46 * WARNING: This will be called in real or virtual mode on HV KVM and virtual 50 47 * mode on PR KVM 51 48 */ 52 - static struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu, 49 + struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu, 53 50 unsigned long liobn) 54 51 { 55 52 struct kvm *kvm = vcpu->kvm; ··· 61 58 62 59 return NULL; 63 60 } 61 + EXPORT_SYMBOL_GPL(kvmppc_find_table); 64 62 65 63 /* 66 64 * Validates IO address. ··· 155 151 } 156 152 EXPORT_SYMBOL_GPL(kvmppc_tce_put); 157 153 158 - /* WARNING: This will be called in real-mode on HV KVM and virtual 159 - * mode on PR KVM 160 - */ 154 + long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, 155 + unsigned long *ua, unsigned long **prmap) 156 + { 157 + unsigned long gfn = gpa >> PAGE_SHIFT; 158 + struct kvm_memory_slot *memslot; 159 + 160 + memslot = search_memslots(kvm_memslots(kvm), gfn); 161 + if (!memslot) 162 + return -EINVAL; 163 + 164 + *ua = __gfn_to_hva_memslot(memslot, gfn) | 165 + (gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); 166 + 167 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 168 + if (prmap) 169 + *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; 170 + #endif 171 + 172 + return 0; 173 + } 174 + EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); 175 + 176 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 161 177 long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 162 178 unsigned long ioba, unsigned long tce) 163 179 { ··· 204 180 } 205 181 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); 206 182 183 + static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, 184 + unsigned long ua, unsigned long *phpa) 185 + { 186 + pte_t *ptep, pte; 187 + unsigned shift = 0; 188 + 189 + ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift); 190 + if (!ptep || !pte_present(*ptep)) 191 + return -ENXIO; 192 + pte = *ptep; 193 + 194 + if (!shift) 195 + shift = PAGE_SHIFT; 196 + 197 + /* Avoid handling anything potentially complicated in realmode */ 198 + if (shift > PAGE_SHIFT) 199 + return -EAGAIN; 200 + 201 + if (!pte_young(pte)) 202 + return -EAGAIN; 203 + 204 + *phpa = (pte_pfn(pte) << PAGE_SHIFT) | (ua & ((1ULL << shift) - 1)) | 205 + (ua & ~PAGE_MASK); 206 + 207 + return 0; 208 + } 209 + 210 + long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, 211 + unsigned long liobn, unsigned long ioba, 212 + unsigned long tce_list, unsigned long npages) 213 + { 214 + struct kvmppc_spapr_tce_table *stt; 215 + long i, ret = H_SUCCESS; 216 + unsigned long tces, entry, ua = 0; 217 + unsigned long *rmap = NULL; 218 + 219 + stt = kvmppc_find_table(vcpu, liobn); 220 + if (!stt) 221 + return H_TOO_HARD; 222 + 223 + entry = ioba >> IOMMU_PAGE_SHIFT_4K; 224 + /* 225 + * The spec says that the maximum size of the list is 512 TCEs 226 + * so the whole table addressed resides in 4K page 227 + */ 228 + if (npages > 512) 229 + return H_PARAMETER; 230 + 231 + if (tce_list & (SZ_4K - 1)) 232 + return H_PARAMETER; 233 + 234 + ret = kvmppc_ioba_validate(stt, ioba, npages); 235 + if (ret != H_SUCCESS) 236 + return ret; 237 + 238 + if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) 239 + return H_TOO_HARD; 240 + 241 + rmap = (void *) vmalloc_to_phys(rmap); 242 + 243 + /* 244 + * Synchronize with the MMU notifier callbacks in 245 + * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.). 246 + * While we have the rmap lock, code running on other CPUs 247 + * cannot finish unmapping the host real page that backs 248 + * this guest real page, so we are OK to access the host 249 + * real page. 250 + */ 251 + lock_rmap(rmap); 252 + if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) { 253 + ret = H_TOO_HARD; 254 + goto unlock_exit; 255 + } 256 + 257 + for (i = 0; i < npages; ++i) { 258 + unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); 259 + 260 + ret = kvmppc_tce_validate(stt, tce); 261 + if (ret != H_SUCCESS) 262 + goto unlock_exit; 263 + 264 + kvmppc_tce_put(stt, entry + i, tce); 265 + } 266 + 267 + unlock_exit: 268 + unlock_rmap(rmap); 269 + 270 + return ret; 271 + } 272 + 273 + long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, 274 + unsigned long liobn, unsigned long ioba, 275 + unsigned long tce_value, unsigned long npages) 276 + { 277 + struct kvmppc_spapr_tce_table *stt; 278 + long i, ret; 279 + 280 + stt = kvmppc_find_table(vcpu, liobn); 281 + if (!stt) 282 + return H_TOO_HARD; 283 + 284 + ret = kvmppc_ioba_validate(stt, ioba, npages); 285 + if (ret != H_SUCCESS) 286 + return ret; 287 + 288 + /* Check permission bits only to allow userspace poison TCE for debug */ 289 + if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) 290 + return H_PARAMETER; 291 + 292 + for (i = 0; i < npages; ++i, ioba += IOMMU_PAGE_SIZE_4K) 293 + kvmppc_tce_put(stt, ioba >> IOMMU_PAGE_SHIFT_4K, tce_value); 294 + 295 + return H_SUCCESS; 296 + } 297 + EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); 298 + 207 299 long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 208 300 unsigned long ioba) 209 301 { ··· 345 205 return H_SUCCESS; 346 206 } 347 207 EXPORT_SYMBOL_GPL(kvmppc_h_get_tce); 208 + 209 + #endif /* KVM_BOOK3S_HV_POSSIBLE */
+25 -1
arch/powerpc/kvm/book3s_hv.c
··· 768 768 if (kvmppc_xics_enabled(vcpu)) { 769 769 ret = kvmppc_xics_hcall(vcpu, req); 770 770 break; 771 - } /* fallthrough */ 771 + } 772 + return RESUME_HOST; 773 + case H_PUT_TCE: 774 + ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4), 775 + kvmppc_get_gpr(vcpu, 5), 776 + kvmppc_get_gpr(vcpu, 6)); 777 + if (ret == H_TOO_HARD) 778 + return RESUME_HOST; 779 + break; 780 + case H_PUT_TCE_INDIRECT: 781 + ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4), 782 + kvmppc_get_gpr(vcpu, 5), 783 + kvmppc_get_gpr(vcpu, 6), 784 + kvmppc_get_gpr(vcpu, 7)); 785 + if (ret == H_TOO_HARD) 786 + return RESUME_HOST; 787 + break; 788 + case H_STUFF_TCE: 789 + ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4), 790 + kvmppc_get_gpr(vcpu, 5), 791 + kvmppc_get_gpr(vcpu, 6), 792 + kvmppc_get_gpr(vcpu, 7)); 793 + if (ret == H_TOO_HARD) 794 + return RESUME_HOST; 795 + break; 772 796 default: 773 797 return RESUME_HOST; 774 798 }
+2 -2
arch/powerpc/kvm/book3s_hv_rmhandlers.S
··· 2006 2006 .long 0 /* 0x12c */ 2007 2007 .long 0 /* 0x130 */ 2008 2008 .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table 2009 - .long 0 /* 0x138 */ 2010 - .long 0 /* 0x13c */ 2009 + .long DOTSYM(kvmppc_h_stuff_tce) - hcall_real_table 2010 + .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table 2011 2011 .long 0 /* 0x140 */ 2012 2012 .long 0 /* 0x144 */ 2013 2013 .long 0 /* 0x148 */
+35
arch/powerpc/kvm/book3s_pr_papr.c
··· 280 280 return EMULATE_DONE; 281 281 } 282 282 283 + static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu) 284 + { 285 + unsigned long liobn = kvmppc_get_gpr(vcpu, 4); 286 + unsigned long ioba = kvmppc_get_gpr(vcpu, 5); 287 + unsigned long tce = kvmppc_get_gpr(vcpu, 6); 288 + unsigned long npages = kvmppc_get_gpr(vcpu, 7); 289 + long rc; 290 + 291 + rc = kvmppc_h_put_tce_indirect(vcpu, liobn, ioba, 292 + tce, npages); 293 + if (rc == H_TOO_HARD) 294 + return EMULATE_FAIL; 295 + kvmppc_set_gpr(vcpu, 3, rc); 296 + return EMULATE_DONE; 297 + } 298 + 299 + static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu) 300 + { 301 + unsigned long liobn = kvmppc_get_gpr(vcpu, 4); 302 + unsigned long ioba = kvmppc_get_gpr(vcpu, 5); 303 + unsigned long tce_value = kvmppc_get_gpr(vcpu, 6); 304 + unsigned long npages = kvmppc_get_gpr(vcpu, 7); 305 + long rc; 306 + 307 + rc = kvmppc_h_stuff_tce(vcpu, liobn, ioba, tce_value, npages); 308 + if (rc == H_TOO_HARD) 309 + return EMULATE_FAIL; 310 + kvmppc_set_gpr(vcpu, 3, rc); 311 + return EMULATE_DONE; 312 + } 313 + 283 314 static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) 284 315 { 285 316 long rc = kvmppc_xics_hcall(vcpu, cmd); ··· 337 306 return kvmppc_h_pr_bulk_remove(vcpu); 338 307 case H_PUT_TCE: 339 308 return kvmppc_h_pr_put_tce(vcpu); 309 + case H_PUT_TCE_INDIRECT: 310 + return kvmppc_h_pr_put_tce_indirect(vcpu); 311 + case H_STUFF_TCE: 312 + return kvmppc_h_pr_stuff_tce(vcpu); 340 313 case H_CEDE: 341 314 kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE); 342 315 kvm_vcpu_block(vcpu);
+3
arch/powerpc/kvm/powerpc.c
··· 569 569 case KVM_CAP_PPC_GET_SMMU_INFO: 570 570 r = 1; 571 571 break; 572 + case KVM_CAP_SPAPR_MULTITCE: 573 + r = 1; 574 + break; 572 575 #endif 573 576 default: 574 577 r = 0;