Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: PPC: Book3S: Add API for in-kernel XICS emulation

This adds the API for userspace to instantiate an XICS device in a VM
and connect VCPUs to it. The API consists of a new device type for
the KVM_CREATE_DEVICE ioctl, a new capability KVM_CAP_IRQ_XICS, which
functions similarly to KVM_CAP_IRQ_MPIC, and the KVM_IRQ_LINE ioctl,
which is used to assert and deassert interrupt inputs of the XICS.

The XICS device has one attribute group, KVM_DEV_XICS_GRP_SOURCES.
Each attribute within this group corresponds to the state of one
interrupt source. The attribute number is the same as the interrupt
source number.

This does not support irq routing or irqfd yet.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>

authored by

Paul Mackerras and committed by
Alexander Graf
5975a2e0 d133b40f

+289 -27
+8
Documentation/virtual/kvm/api.txt
··· 2772 2772 args[1] is the MPIC CPU number for this vcpu 2773 2773 2774 2774 This capability connects the vcpu to an in-kernel MPIC device. 2775 + 2776 + 6.7 KVM_CAP_IRQ_XICS 2777 + 2778 + Architectures: ppc 2779 + Parameters: args[0] is the XICS device fd 2780 + args[1] is the XICS CPU number (server ID) for this vcpu 2781 + 2782 + This capability connects the vcpu to an in-kernel XICS device.
+66
Documentation/virtual/kvm/devices/xics.txt
··· 1 + XICS interrupt controller 2 + 3 + Device type supported: KVM_DEV_TYPE_XICS 4 + 5 + Groups: 6 + KVM_DEV_XICS_SOURCES 7 + Attributes: One per interrupt source, indexed by the source number. 8 + 9 + This device emulates the XICS (eXternal Interrupt Controller 10 + Specification) defined in PAPR. The XICS has a set of interrupt 11 + sources, each identified by a 20-bit source number, and a set of 12 + Interrupt Control Presentation (ICP) entities, also called "servers", 13 + each associated with a virtual CPU. 14 + 15 + The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH 16 + capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and 17 + the interrupt server number (i.e. the vcpu number from the XICS's 18 + point of view) in args[1] of the kvm_enable_cap struct. Each ICP has 19 + 64 bits of state which can be read and written using the 20 + KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu. The 64 bit 21 + state word has the following bitfields, starting at the 22 + least-significant end of the word: 23 + 24 + * Unused, 16 bits 25 + 26 + * Pending interrupt priority, 8 bits 27 + Zero is the highest priority, 255 means no interrupt is pending. 28 + 29 + * Pending IPI (inter-processor interrupt) priority, 8 bits 30 + Zero is the highest priority, 255 means no IPI is pending. 31 + 32 + * Pending interrupt source number, 24 bits 33 + Zero means no interrupt pending, 2 means an IPI is pending 34 + 35 + * Current processor priority, 8 bits 36 + Zero is the highest priority, meaning no interrupts can be 37 + delivered, and 255 is the lowest priority. 38 + 39 + Each source has 64 bits of state that can be read and written using 40 + the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the 41 + KVM_DEV_XICS_SOURCES attribute group, with the attribute number being 42 + the interrupt source number. The 64 bit state word has the following 43 + bitfields, starting from the least-significant end of the word: 44 + 45 + * Destination (server number), 32 bits 46 + This specifies where the interrupt should be sent, and is the 47 + interrupt server number specified for the destination vcpu. 48 + 49 + * Priority, 8 bits 50 + This is the priority specified for this interrupt source, where 0 is 51 + the highest priority and 255 is the lowest. An interrupt with a 52 + priority of 255 will never be delivered. 53 + 54 + * Level sensitive flag, 1 bit 55 + This bit is 1 for a level-sensitive interrupt source, or 0 for 56 + edge-sensitive (or MSI). 57 + 58 + * Masked flag, 1 bit 59 + This bit is set to 1 if the interrupt is masked (cannot be delivered 60 + regardless of its priority), for example by the ibm,int-off RTAS 61 + call, or 0 if it is not masked. 62 + 63 + * Pending flag, 1 bit 64 + This bit is 1 if the source has a pending interrupt, otherwise 0. 65 + 66 + Only one XICS instance may be created per VM.
+2
arch/powerpc/include/asm/kvm_ppc.h
··· 315 315 extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); 316 316 extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); 317 317 extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); 318 + extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, 319 + struct kvm_vcpu *vcpu, u32 cpu); 318 320 #else 319 321 static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) 320 322 { return 0; }
+12
arch/powerpc/include/uapi/asm/kvm.h
··· 499 499 #define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a) 500 500 #define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b) 501 501 502 + /* PPC64 eXternal Interrupt Controller Specification */ 503 + #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ 504 + 505 + /* Layout of 64-bit source attribute values */ 506 + #define KVM_XICS_DESTINATION_SHIFT 0 507 + #define KVM_XICS_DESTINATION_MASK 0xffffffffULL 508 + #define KVM_XICS_PRIORITY_SHIFT 32 509 + #define KVM_XICS_PRIORITY_MASK 0xff 510 + #define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) 511 + #define KVM_XICS_MASKED (1ULL << 41) 512 + #define KVM_XICS_PENDING (1ULL << 42) 513 + 502 514 #endif /* __LINUX_KVM_POWERPC_H */
+167 -27
arch/powerpc/kvm/book3s_xics.c
··· 11 11 #include <linux/kvm_host.h> 12 12 #include <linux/err.h> 13 13 #include <linux/gfp.h> 14 + #include <linux/anon_inodes.h> 14 15 15 16 #include <asm/uaccess.h> 16 17 #include <asm/kvm_book3s.h> ··· 56 55 * 57 56 * - Make ICS lockless as well, or at least a per-interrupt lock or hashed 58 57 * locks array to improve scalability 59 - * 60 - * - ioctl's to save/restore the entire state for snapshot & migration 61 58 */ 62 59 63 60 /* -- ICS routines -- */ ··· 63 64 static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, 64 65 u32 new_irq); 65 66 66 - static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) 67 + static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, 68 + bool report_status) 67 69 { 68 70 struct ics_irq_state *state; 69 71 struct kvmppc_ics *ics; ··· 81 81 if (!state->exists) 82 82 return -EINVAL; 83 83 84 + if (report_status) 85 + return state->asserted; 86 + 84 87 /* 85 88 * We set state->asserted locklessly. This should be fine as 86 89 * we are the only setter, thus concurrent access is undefined ··· 99 96 /* Attempt delivery */ 100 97 icp_deliver_irq(xics, NULL, irq); 101 98 102 - return 0; 99 + return state->asserted; 103 100 } 104 101 105 102 static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, ··· 894 891 kfree(name); 895 892 } 896 893 897 - struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, 898 - struct kvmppc_xics *xics, int irq) 894 + static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, 895 + struct kvmppc_xics *xics, int irq) 899 896 { 900 897 struct kvmppc_ics *ics; 901 898 int i, icsid; ··· 1047 1044 return 0; 1048 1045 } 1049 1046 1050 - /* -- ioctls -- */ 1051 - 1052 - int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args) 1047 + static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr) 1053 1048 { 1054 - struct kvmppc_xics *xics; 1055 - int r; 1049 + int ret; 1050 + struct kvmppc_ics *ics; 1051 + struct ics_irq_state *irqp; 1052 + u64 __user *ubufp = (u64 __user *) addr; 1053 + u16 idx; 1054 + u64 val, prio; 1056 1055 1057 - /* locking against multiple callers? */ 1056 + ics = kvmppc_xics_find_ics(xics, irq, &idx); 1057 + if (!ics) 1058 + return -ENOENT; 1058 1059 1059 - xics = kvm->arch.xics; 1060 - if (!xics) 1061 - return -ENODEV; 1062 - 1063 - switch (args->level) { 1064 - case KVM_INTERRUPT_SET: 1065 - case KVM_INTERRUPT_SET_LEVEL: 1066 - case KVM_INTERRUPT_UNSET: 1067 - r = ics_deliver_irq(xics, args->irq, args->level); 1068 - break; 1069 - default: 1070 - r = -EINVAL; 1060 + irqp = &ics->irq_state[idx]; 1061 + mutex_lock(&ics->lock); 1062 + ret = -ENOENT; 1063 + if (irqp->exists) { 1064 + val = irqp->server; 1065 + prio = irqp->priority; 1066 + if (prio == MASKED) { 1067 + val |= KVM_XICS_MASKED; 1068 + prio = irqp->saved_priority; 1069 + } 1070 + val |= prio << KVM_XICS_PRIORITY_SHIFT; 1071 + if (irqp->asserted) 1072 + val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING; 1073 + else if (irqp->masked_pending || irqp->resend) 1074 + val |= KVM_XICS_PENDING; 1075 + ret = 0; 1071 1076 } 1077 + mutex_unlock(&ics->lock); 1072 1078 1073 - return r; 1079 + if (!ret && put_user(val, ubufp)) 1080 + ret = -EFAULT; 1081 + 1082 + return ret; 1074 1083 } 1075 1084 1076 - void kvmppc_xics_free(struct kvmppc_xics *xics) 1085 + static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr) 1077 1086 { 1087 + struct kvmppc_ics *ics; 1088 + struct ics_irq_state *irqp; 1089 + u64 __user *ubufp = (u64 __user *) addr; 1090 + u16 idx; 1091 + u64 val; 1092 + u8 prio; 1093 + u32 server; 1094 + 1095 + if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS) 1096 + return -ENOENT; 1097 + 1098 + ics = kvmppc_xics_find_ics(xics, irq, &idx); 1099 + if (!ics) { 1100 + ics = kvmppc_xics_create_ics(xics->kvm, xics, irq); 1101 + if (!ics) 1102 + return -ENOMEM; 1103 + } 1104 + irqp = &ics->irq_state[idx]; 1105 + if (get_user(val, ubufp)) 1106 + return -EFAULT; 1107 + 1108 + server = val & KVM_XICS_DESTINATION_MASK; 1109 + prio = val >> KVM_XICS_PRIORITY_SHIFT; 1110 + if (prio != MASKED && 1111 + kvmppc_xics_find_server(xics->kvm, server) == NULL) 1112 + return -EINVAL; 1113 + 1114 + mutex_lock(&ics->lock); 1115 + irqp->server = server; 1116 + irqp->saved_priority = prio; 1117 + if (val & KVM_XICS_MASKED) 1118 + prio = MASKED; 1119 + irqp->priority = prio; 1120 + irqp->resend = 0; 1121 + irqp->masked_pending = 0; 1122 + irqp->asserted = 0; 1123 + if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE)) 1124 + irqp->asserted = 1; 1125 + irqp->exists = 1; 1126 + mutex_unlock(&ics->lock); 1127 + 1128 + if (val & KVM_XICS_PENDING) 1129 + icp_deliver_irq(xics, NULL, irqp->number); 1130 + 1131 + return 0; 1132 + } 1133 + 1134 + int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, 1135 + bool line_status) 1136 + { 1137 + struct kvmppc_xics *xics = kvm->arch.xics; 1138 + 1139 + return ics_deliver_irq(xics, irq, level, line_status); 1140 + } 1141 + 1142 + static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1143 + { 1144 + struct kvmppc_xics *xics = dev->private; 1145 + 1146 + switch (attr->group) { 1147 + case KVM_DEV_XICS_GRP_SOURCES: 1148 + return xics_set_source(xics, attr->attr, attr->addr); 1149 + } 1150 + return -ENXIO; 1151 + } 1152 + 1153 + static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1154 + { 1155 + struct kvmppc_xics *xics = dev->private; 1156 + 1157 + switch (attr->group) { 1158 + case KVM_DEV_XICS_GRP_SOURCES: 1159 + return xics_get_source(xics, attr->attr, attr->addr); 1160 + } 1161 + return -ENXIO; 1162 + } 1163 + 1164 + static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1165 + { 1166 + switch (attr->group) { 1167 + case KVM_DEV_XICS_GRP_SOURCES: 1168 + if (attr->attr >= KVMPPC_XICS_FIRST_IRQ && 1169 + attr->attr < KVMPPC_XICS_NR_IRQS) 1170 + return 0; 1171 + break; 1172 + } 1173 + return -ENXIO; 1174 + } 1175 + 1176 + static void kvmppc_xics_free(struct kvm_device *dev) 1177 + { 1178 + struct kvmppc_xics *xics = dev->private; 1078 1179 int i; 1079 1180 struct kvm *kvm = xics->kvm; 1080 1181 ··· 1190 1083 for (i = 0; i <= xics->max_icsid; i++) 1191 1084 kfree(xics->ics[i]); 1192 1085 kfree(xics); 1086 + kfree(dev); 1193 1087 } 1194 1088 1195 - int kvm_xics_create(struct kvm *kvm, u32 type) 1089 + static int kvmppc_xics_create(struct kvm_device *dev, u32 type) 1196 1090 { 1197 1091 struct kvmppc_xics *xics; 1092 + struct kvm *kvm = dev->kvm; 1198 1093 int ret = 0; 1199 1094 1200 1095 xics = kzalloc(sizeof(*xics), GFP_KERNEL); 1201 1096 if (!xics) 1202 1097 return -ENOMEM; 1203 1098 1099 + dev->private = xics; 1100 + xics->dev = dev; 1204 1101 xics->kvm = kvm; 1205 1102 1206 1103 /* Already there ? */ ··· 1229 1118 #endif /* CONFIG_KVM_BOOK3S_64_HV */ 1230 1119 1231 1120 return 0; 1121 + } 1122 + 1123 + struct kvm_device_ops kvm_xics_ops = { 1124 + .name = "kvm-xics", 1125 + .create = kvmppc_xics_create, 1126 + .destroy = kvmppc_xics_free, 1127 + .set_attr = xics_set_attr, 1128 + .get_attr = xics_get_attr, 1129 + .has_attr = xics_has_attr, 1130 + }; 1131 + 1132 + int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, 1133 + u32 xcpu) 1134 + { 1135 + struct kvmppc_xics *xics = dev->private; 1136 + int r = -EBUSY; 1137 + 1138 + if (dev->ops != &kvm_xics_ops) 1139 + return -EPERM; 1140 + if (xics->kvm != vcpu->kvm) 1141 + return -EPERM; 1142 + if (vcpu->arch.irq_type) 1143 + return -EBUSY; 1144 + 1145 + r = kvmppc_xics_create_icp(vcpu, xcpu); 1146 + if (!r) 1147 + vcpu->arch.irq_type = KVMPPC_IRQ_XICS; 1148 + 1149 + return r; 1232 1150 } 1233 1151 1234 1152 void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
+1
arch/powerpc/kvm/book3s_xics.h
··· 88 88 89 89 struct kvmppc_xics { 90 90 struct kvm *kvm; 91 + struct kvm_device *dev; 91 92 struct dentry *dentry; 92 93 u32 max_icsid; 93 94 bool real_mode;
+3
arch/powerpc/kvm/irq.h
··· 10 10 #ifdef CONFIG_KVM_MPIC 11 11 ret = ret || (kvm->arch.mpic != NULL); 12 12 #endif 13 + #ifdef CONFIG_KVM_XICS 14 + ret = ret || (kvm->arch.xics != NULL); 15 + #endif 13 16 smp_rmb(); 14 17 return ret; 15 18 }
+22
arch/powerpc/kvm/powerpc.c
··· 342 342 case KVM_CAP_SPAPR_TCE: 343 343 case KVM_CAP_PPC_ALLOC_HTAB: 344 344 case KVM_CAP_PPC_RTAS: 345 + #ifdef CONFIG_KVM_XICS 346 + case KVM_CAP_IRQ_XICS: 347 + #endif 345 348 r = 1; 346 349 break; 347 350 #endif /* CONFIG_PPC_BOOK3S_64 */ ··· 840 837 break; 841 838 } 842 839 #endif 840 + #ifdef CONFIG_KVM_XICS 841 + case KVM_CAP_IRQ_XICS: { 842 + struct file *filp; 843 + struct kvm_device *dev; 844 + 845 + r = -EBADF; 846 + filp = fget(cap->args[0]); 847 + if (!filp) 848 + break; 849 + 850 + r = -EPERM; 851 + dev = kvm_device_from_filp(filp); 852 + if (dev) 853 + r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); 854 + 855 + fput(filp); 856 + break; 857 + } 858 + #endif /* CONFIG_KVM_XICS */ 843 859 default: 844 860 r = -EINVAL; 845 861 break;
+1
include/linux/kvm_host.h
··· 1086 1086 struct kvm_device *kvm_device_from_filp(struct file *filp); 1087 1087 1088 1088 extern struct kvm_device_ops kvm_mpic_ops; 1089 + extern struct kvm_device_ops kvm_xics_ops; 1089 1090 1090 1091 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT 1091 1092
+2
include/uapi/linux/kvm.h
··· 665 665 #define KVM_CAP_DEVICE_CTRL 89 666 666 #define KVM_CAP_IRQ_MPIC 90 667 667 #define KVM_CAP_PPC_RTAS 91 668 + #define KVM_CAP_IRQ_XICS 92 668 669 669 670 #ifdef KVM_CAP_IRQ_ROUTING 670 671 ··· 838 837 839 838 #define KVM_DEV_TYPE_FSL_MPIC_20 1 840 839 #define KVM_DEV_TYPE_FSL_MPIC_42 2 840 + #define KVM_DEV_TYPE_XICS 3 841 841 842 842 /* 843 843 * ioctls for VM fds
+5
virt/kvm/kvm_main.c
··· 2247 2247 ops = &kvm_mpic_ops; 2248 2248 break; 2249 2249 #endif 2250 + #ifdef CONFIG_KVM_XICS 2251 + case KVM_DEV_TYPE_XICS: 2252 + ops = &kvm_xics_ops; 2253 + break; 2254 + #endif 2250 2255 default: 2251 2256 return -ENODEV; 2252 2257 }