Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

kvm: add device control API

Currently, devices that are emulated inside KVM are configured in a
hardcoded manner based on an assumption that any given architecture
only has one way to do it. If there's any need to access device state,
it is done through inflexible one-purpose-only IOCTLs (e.g.
KVM_GET/SET_LAPIC). Defining new IOCTLs for every little thing is
cumbersome and depletes a limited numberspace.

This API provides a mechanism to instantiate a device of a certain
type, returning an ID that can be used to set/get attributes of the
device. Attributes may include configuration parameters (e.g.
register base address), device state, operational commands, etc. It
is similar to the ONE_REG API, except that it acts on devices rather
than vcpus.

Both device types and individual attributes can be tested without having
to create the device or get/set the attribute, without the need for
separately managing enumerated capabilities.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

authored by

Scott Wood and committed by
Alexander Graf
852b6d57 7df35f54

+262
+70
Documentation/virtual/kvm/api.txt
··· 2189 2189 written, then `n_invalid' invalid entries, invalidating any previously 2190 2190 valid entries found. 2191 2191 2192 + 4.79 KVM_CREATE_DEVICE 2193 + 2194 + Capability: KVM_CAP_DEVICE_CTRL 2195 + Type: vm ioctl 2196 + Parameters: struct kvm_create_device (in/out) 2197 + Returns: 0 on success, -1 on error 2198 + Errors: 2199 + ENODEV: The device type is unknown or unsupported 2200 + EEXIST: Device already created, and this type of device may not 2201 + be instantiated multiple times 2202 + 2203 + Other error conditions may be defined by individual device types or 2204 + have their standard meanings. 2205 + 2206 + Creates an emulated device in the kernel. The file descriptor returned 2207 + in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR. 2208 + 2209 + If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the 2210 + device type is supported (not necessarily whether it can be created 2211 + in the current vm). 2212 + 2213 + Individual devices should not define flags. Attributes should be used 2214 + for specifying any behavior that is not implied by the device type 2215 + number. 2216 + 2217 + struct kvm_create_device { 2218 + __u32 type; /* in: KVM_DEV_TYPE_xxx */ 2219 + __u32 fd; /* out: device handle */ 2220 + __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */ 2221 + }; 2222 + 2223 + 4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR 2224 + 2225 + Capability: KVM_CAP_DEVICE_CTRL 2226 + Type: device ioctl 2227 + Parameters: struct kvm_device_attr 2228 + Returns: 0 on success, -1 on error 2229 + Errors: 2230 + ENXIO: The group or attribute is unknown/unsupported for this device 2231 + EPERM: The attribute cannot (currently) be accessed this way 2232 + (e.g. read-only attribute, or attribute that only makes 2233 + sense when the device is in a different state) 2234 + 2235 + Other error conditions may be defined by individual device types. 2236 + 2237 + Gets/sets a specified piece of device configuration and/or state. The 2238 + semantics are device-specific. See individual device documentation in 2239 + the "devices" directory. As with ONE_REG, the size of the data 2240 + transferred is defined by the particular attribute. 2241 + 2242 + struct kvm_device_attr { 2243 + __u32 flags; /* no flags currently defined */ 2244 + __u32 group; /* device-defined */ 2245 + __u64 attr; /* group-defined */ 2246 + __u64 addr; /* userspace address of attr data */ 2247 + }; 2248 + 2249 + 4.81 KVM_HAS_DEVICE_ATTR 2250 + 2251 + Capability: KVM_CAP_DEVICE_CTRL 2252 + Type: device ioctl 2253 + Parameters: struct kvm_device_attr 2254 + Returns: 0 on success, -1 on error 2255 + Errors: 2256 + ENXIO: The group or attribute is unknown/unsupported for this device 2257 + 2258 + Tests whether a device supports a particular attribute. A successful 2259 + return indicates the attribute is implemented. It does not necessarily 2260 + indicate that the attribute can be read or written in the device's 2261 + current state. "addr" is ignored. 2192 2262 2193 2263 4.77 KVM_ARM_VCPU_INIT 2194 2264
+1
Documentation/virtual/kvm/devices/README
··· 1 + This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.
+35
include/linux/kvm_host.h
··· 1064 1064 1065 1065 extern bool kvm_rebooting; 1066 1066 1067 + struct kvm_device_ops; 1068 + 1069 + struct kvm_device { 1070 + struct kvm_device_ops *ops; 1071 + struct kvm *kvm; 1072 + atomic_t users; 1073 + void *private; 1074 + }; 1075 + 1076 + /* create, destroy, and name are mandatory */ 1077 + struct kvm_device_ops { 1078 + const char *name; 1079 + int (*create)(struct kvm_device *dev, u32 type); 1080 + 1081 + /* 1082 + * Destroy is responsible for freeing dev. 1083 + * 1084 + * Destroy may be called before or after destructors are called 1085 + * on emulated I/O regions, depending on whether a reference is 1086 + * held by a vcpu or other kvm component that gets destroyed 1087 + * after the emulated I/O. 1088 + */ 1089 + void (*destroy)(struct kvm_device *dev); 1090 + 1091 + int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); 1092 + int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); 1093 + int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); 1094 + long (*ioctl)(struct kvm_device *dev, unsigned int ioctl, 1095 + unsigned long arg); 1096 + }; 1097 + 1098 + void kvm_device_get(struct kvm_device *dev); 1099 + void kvm_device_put(struct kvm_device *dev); 1100 + struct kvm_device *kvm_device_from_filp(struct file *filp); 1101 + 1067 1102 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT 1068 1103 1069 1104 static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
+27
include/uapi/linux/kvm.h
··· 666 666 #define KVM_CAP_PPC_EPR 86 667 667 #define KVM_CAP_ARM_PSCI 87 668 668 #define KVM_CAP_ARM_SET_DEVICE_ADDR 88 669 + #define KVM_CAP_DEVICE_CTRL 89 669 670 670 671 #ifdef KVM_CAP_IRQ_ROUTING 671 672 ··· 820 819 }; 821 820 822 821 /* 822 + * Device control API, available with KVM_CAP_DEVICE_CTRL 823 + */ 824 + #define KVM_CREATE_DEVICE_TEST 1 825 + 826 + struct kvm_create_device { 827 + __u32 type; /* in: KVM_DEV_TYPE_xxx */ 828 + __u32 fd; /* out: device handle */ 829 + __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */ 830 + }; 831 + 832 + struct kvm_device_attr { 833 + __u32 flags; /* no flags currently defined */ 834 + __u32 group; /* device-defined */ 835 + __u64 attr; /* group-defined */ 836 + __u64 addr; /* userspace address of attr data */ 837 + }; 838 + 839 + /* 823 840 * ioctls for VM fds 824 841 */ 825 842 #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) ··· 924 905 #define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) 925 906 /* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */ 926 907 #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) 908 + 909 + /* ioctl for vm fd */ 910 + #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) 911 + 912 + /* ioctls for fds returned by KVM_CREATE_DEVICE */ 913 + #define KVM_SET_DEVICE_ATTR _IOW(KVMIO, 0xe1, struct kvm_device_attr) 914 + #define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct kvm_device_attr) 915 + #define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct kvm_device_attr) 927 916 928 917 /* 929 918 * ioctls for vcpu fds
+129
virt/kvm/kvm_main.c
··· 2159 2159 } 2160 2160 #endif 2161 2161 2162 + static int kvm_device_ioctl_attr(struct kvm_device *dev, 2163 + int (*accessor)(struct kvm_device *dev, 2164 + struct kvm_device_attr *attr), 2165 + unsigned long arg) 2166 + { 2167 + struct kvm_device_attr attr; 2168 + 2169 + if (!accessor) 2170 + return -EPERM; 2171 + 2172 + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2173 + return -EFAULT; 2174 + 2175 + return accessor(dev, &attr); 2176 + } 2177 + 2178 + static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, 2179 + unsigned long arg) 2180 + { 2181 + struct kvm_device *dev = filp->private_data; 2182 + 2183 + switch (ioctl) { 2184 + case KVM_SET_DEVICE_ATTR: 2185 + return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); 2186 + case KVM_GET_DEVICE_ATTR: 2187 + return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); 2188 + case KVM_HAS_DEVICE_ATTR: 2189 + return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); 2190 + default: 2191 + if (dev->ops->ioctl) 2192 + return dev->ops->ioctl(dev, ioctl, arg); 2193 + 2194 + return -ENOTTY; 2195 + } 2196 + } 2197 + 2198 + void kvm_device_get(struct kvm_device *dev) 2199 + { 2200 + atomic_inc(&dev->users); 2201 + } 2202 + 2203 + void kvm_device_put(struct kvm_device *dev) 2204 + { 2205 + if (atomic_dec_and_test(&dev->users)) 2206 + dev->ops->destroy(dev); 2207 + } 2208 + 2209 + static int kvm_device_release(struct inode *inode, struct file *filp) 2210 + { 2211 + struct kvm_device *dev = filp->private_data; 2212 + struct kvm *kvm = dev->kvm; 2213 + 2214 + kvm_device_put(dev); 2215 + kvm_put_kvm(kvm); 2216 + return 0; 2217 + } 2218 + 2219 + static const struct file_operations kvm_device_fops = { 2220 + .unlocked_ioctl = kvm_device_ioctl, 2221 + .release = kvm_device_release, 2222 + }; 2223 + 2224 + struct kvm_device *kvm_device_from_filp(struct file *filp) 2225 + { 2226 + if (filp->f_op != &kvm_device_fops) 2227 + return NULL; 2228 + 2229 + return filp->private_data; 2230 + } 2231 + 2232 + static int kvm_ioctl_create_device(struct kvm *kvm, 2233 + struct kvm_create_device *cd) 2234 + { 2235 + struct kvm_device_ops *ops = NULL; 2236 + struct kvm_device *dev; 2237 + bool test = cd->flags & KVM_CREATE_DEVICE_TEST; 2238 + int ret; 2239 + 2240 + switch (cd->type) { 2241 + default: 2242 + return -ENODEV; 2243 + } 2244 + 2245 + if (test) 2246 + return 0; 2247 + 2248 + dev = kzalloc(sizeof(*dev), GFP_KERNEL); 2249 + if (!dev) 2250 + return -ENOMEM; 2251 + 2252 + dev->ops = ops; 2253 + dev->kvm = kvm; 2254 + atomic_set(&dev->users, 1); 2255 + 2256 + ret = ops->create(dev, cd->type); 2257 + if (ret < 0) { 2258 + kfree(dev); 2259 + return ret; 2260 + } 2261 + 2262 + ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR); 2263 + if (ret < 0) { 2264 + ops->destroy(dev); 2265 + return ret; 2266 + } 2267 + 2268 + kvm_get_kvm(kvm); 2269 + cd->fd = ret; 2270 + return 0; 2271 + } 2272 + 2162 2273 static long kvm_vm_ioctl(struct file *filp, 2163 2274 unsigned int ioctl, unsigned long arg) 2164 2275 { ··· 2415 2304 break; 2416 2305 } 2417 2306 #endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ 2307 + case KVM_CREATE_DEVICE: { 2308 + struct kvm_create_device cd; 2309 + 2310 + r = -EFAULT; 2311 + if (copy_from_user(&cd, argp, sizeof(cd))) 2312 + goto out; 2313 + 2314 + r = kvm_ioctl_create_device(kvm, &cd); 2315 + if (r) 2316 + goto out; 2317 + 2318 + r = -EFAULT; 2319 + if (copy_to_user(argp, &cd, sizeof(cd))) 2320 + goto out; 2321 + 2322 + r = 0; 2323 + break; 2324 + } 2418 2325 default: 2419 2326 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 2420 2327 if (r == -ENOTTY)