Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

kvm: Add VFIO device

So far we've succeeded at making KVM and VFIO mostly unaware of each
other, but areas are cropping up where a connection beyond eventfds
and irqfds needs to be made. This patch introduces a KVM-VFIO device
that is meant to be a gateway for such interaction. The user creates
the device and can add and remove VFIO groups to it via file
descriptors. When a group is added, KVM verifies the group is valid
and gets a reference to it via the VFIO external user interface.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by

Alex Williamson and committed by
Paolo Bonzini
ec53500f 84cffe49

+257 -1
+22
Documentation/virtual/kvm/devices/vfio.txt
··· 1 + VFIO virtual device 2 + =================== 3 + 4 + Device types supported: 5 + KVM_DEV_TYPE_VFIO 6 + 7 + Only one VFIO instance may be created per VM. The created device 8 + tracks VFIO groups in use by the VM and features of those groups 9 + important to the correctness and acceleration of the VM. As groups 10 + are enabled and disabled for use by the VM, KVM should be updated 11 + about their presence. When registered with KVM, a reference to the 12 + VFIO-group is held by KVM. 13 + 14 + Groups: 15 + KVM_DEV_VFIO_GROUP 16 + 17 + KVM_DEV_VFIO_GROUP attributes: 18 + KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking 19 + KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking 20 + 21 + For each, kvm_device_attr.addr points to an int32_t file descriptor 22 + for the VFIO group.
+1
arch/x86/kvm/Kconfig
··· 38 38 select PERF_EVENTS 39 39 select HAVE_KVM_MSI 40 40 select HAVE_KVM_CPU_RELAX_INTERCEPT 41 + select KVM_VFIO 41 42 ---help--- 42 43 Support hosting fully virtualized guest machines using hardware 43 44 virtualization extensions. You will need a fairly recent
+1 -1
arch/x86/kvm/Makefile
··· 9 9 10 10 kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ 11 11 $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ 12 - $(KVM)/eventfd.o $(KVM)/irqchip.o 12 + $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o 13 13 kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o 14 14 kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o 15 15
+1
include/linux/kvm_host.h
··· 1058 1058 1059 1059 extern struct kvm_device_ops kvm_mpic_ops; 1060 1060 extern struct kvm_device_ops kvm_xics_ops; 1061 + extern struct kvm_device_ops kvm_vfio_ops; 1061 1062 1062 1063 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT 1063 1064
+4
include/uapi/linux/kvm.h
··· 845 845 #define KVM_DEV_TYPE_FSL_MPIC_20 1 846 846 #define KVM_DEV_TYPE_FSL_MPIC_42 2 847 847 #define KVM_DEV_TYPE_XICS 3 848 + #define KVM_DEV_TYPE_VFIO 4 849 + #define KVM_DEV_VFIO_GROUP 1 850 + #define KVM_DEV_VFIO_GROUP_ADD 1 851 + #define KVM_DEV_VFIO_GROUP_DEL 2 848 852 849 853 /* 850 854 * ioctls for VM fds
+3
virt/kvm/Kconfig
··· 27 27 28 28 config HAVE_KVM_CPU_RELAX_INTERCEPT 29 29 bool 30 + 31 + config KVM_VFIO 32 + bool
+5
virt/kvm/kvm_main.c
··· 2271 2271 ops = &kvm_xics_ops; 2272 2272 break; 2273 2273 #endif 2274 + #ifdef CONFIG_KVM_VFIO 2275 + case KVM_DEV_TYPE_VFIO: 2276 + ops = &kvm_vfio_ops; 2277 + break; 2278 + #endif 2274 2279 default: 2275 2280 return -ENODEV; 2276 2281 }
+220
virt/kvm/vfio.c
··· 1 + /* 2 + * VFIO-KVM bridge pseudo device 3 + * 4 + * Copyright (C) 2013 Red Hat, Inc. All rights reserved. 5 + * Author: Alex Williamson <alex.williamson@redhat.com> 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + */ 11 + 12 + #include <linux/errno.h> 13 + #include <linux/file.h> 14 + #include <linux/kvm_host.h> 15 + #include <linux/list.h> 16 + #include <linux/module.h> 17 + #include <linux/mutex.h> 18 + #include <linux/slab.h> 19 + #include <linux/uaccess.h> 20 + #include <linux/vfio.h> 21 + 22 + struct kvm_vfio_group { 23 + struct list_head node; 24 + struct vfio_group *vfio_group; 25 + }; 26 + 27 + struct kvm_vfio { 28 + struct list_head group_list; 29 + struct mutex lock; 30 + }; 31 + 32 + static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) 33 + { 34 + struct vfio_group *vfio_group; 35 + struct vfio_group *(*fn)(struct file *); 36 + 37 + fn = symbol_get(vfio_group_get_external_user); 38 + if (!fn) 39 + return ERR_PTR(-EINVAL); 40 + 41 + vfio_group = fn(filep); 42 + 43 + symbol_put(vfio_group_get_external_user); 44 + 45 + return vfio_group; 46 + } 47 + 48 + static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) 49 + { 50 + void (*fn)(struct vfio_group *); 51 + 52 + fn = symbol_get(vfio_group_put_external_user); 53 + if (!fn) 54 + return; 55 + 56 + fn(vfio_group); 57 + 58 + symbol_put(vfio_group_put_external_user); 59 + } 60 + 61 + static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) 62 + { 63 + struct kvm_vfio *kv = dev->private; 64 + struct vfio_group *vfio_group; 65 + struct kvm_vfio_group *kvg; 66 + void __user *argp = (void __user *)arg; 67 + struct fd f; 68 + int32_t fd; 69 + int ret; 70 + 71 + switch (attr) { 72 + case KVM_DEV_VFIO_GROUP_ADD: 73 + if (get_user(fd, (int32_t __user *)argp)) 74 + return -EFAULT; 75 + 76 + f = fdget(fd); 77 + if (!f.file) 78 + return -EBADF; 79 + 80 + vfio_group = kvm_vfio_group_get_external_user(f.file); 81 + fdput(f); 82 + 83 + if (IS_ERR(vfio_group)) 84 + return PTR_ERR(vfio_group); 85 + 86 + mutex_lock(&kv->lock); 87 + 88 + list_for_each_entry(kvg, &kv->group_list, node) { 89 + if (kvg->vfio_group == vfio_group) { 90 + mutex_unlock(&kv->lock); 91 + kvm_vfio_group_put_external_user(vfio_group); 92 + return -EEXIST; 93 + } 94 + } 95 + 96 + kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); 97 + if (!kvg) { 98 + mutex_unlock(&kv->lock); 99 + kvm_vfio_group_put_external_user(vfio_group); 100 + return -ENOMEM; 101 + } 102 + 103 + list_add_tail(&kvg->node, &kv->group_list); 104 + kvg->vfio_group = vfio_group; 105 + 106 + mutex_unlock(&kv->lock); 107 + 108 + return 0; 109 + 110 + case KVM_DEV_VFIO_GROUP_DEL: 111 + if (get_user(fd, (int32_t __user *)argp)) 112 + return -EFAULT; 113 + 114 + f = fdget(fd); 115 + if (!f.file) 116 + return -EBADF; 117 + 118 + vfio_group = kvm_vfio_group_get_external_user(f.file); 119 + fdput(f); 120 + 121 + if (IS_ERR(vfio_group)) 122 + return PTR_ERR(vfio_group); 123 + 124 + ret = -ENOENT; 125 + 126 + mutex_lock(&kv->lock); 127 + 128 + list_for_each_entry(kvg, &kv->group_list, node) { 129 + if (kvg->vfio_group != vfio_group) 130 + continue; 131 + 132 + list_del(&kvg->node); 133 + kvm_vfio_group_put_external_user(kvg->vfio_group); 134 + kfree(kvg); 135 + ret = 0; 136 + break; 137 + } 138 + 139 + mutex_unlock(&kv->lock); 140 + 141 + kvm_vfio_group_put_external_user(vfio_group); 142 + 143 + return ret; 144 + } 145 + 146 + return -ENXIO; 147 + } 148 + 149 + static int kvm_vfio_set_attr(struct kvm_device *dev, 150 + struct kvm_device_attr *attr) 151 + { 152 + switch (attr->group) { 153 + case KVM_DEV_VFIO_GROUP: 154 + return kvm_vfio_set_group(dev, attr->attr, attr->addr); 155 + } 156 + 157 + return -ENXIO; 158 + } 159 + 160 + static int kvm_vfio_has_attr(struct kvm_device *dev, 161 + struct kvm_device_attr *attr) 162 + { 163 + switch (attr->group) { 164 + case KVM_DEV_VFIO_GROUP: 165 + switch (attr->attr) { 166 + case KVM_DEV_VFIO_GROUP_ADD: 167 + case KVM_DEV_VFIO_GROUP_DEL: 168 + return 0; 169 + } 170 + 171 + break; 172 + } 173 + 174 + return -ENXIO; 175 + } 176 + 177 + static void kvm_vfio_destroy(struct kvm_device *dev) 178 + { 179 + struct kvm_vfio *kv = dev->private; 180 + struct kvm_vfio_group *kvg, *tmp; 181 + 182 + list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { 183 + kvm_vfio_group_put_external_user(kvg->vfio_group); 184 + list_del(&kvg->node); 185 + kfree(kvg); 186 + } 187 + 188 + kfree(kv); 189 + kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ 190 + } 191 + 192 + static int kvm_vfio_create(struct kvm_device *dev, u32 type) 193 + { 194 + struct kvm_device *tmp; 195 + struct kvm_vfio *kv; 196 + 197 + /* Only one VFIO "device" per VM */ 198 + list_for_each_entry(tmp, &dev->kvm->devices, vm_node) 199 + if (tmp->ops == &kvm_vfio_ops) 200 + return -EBUSY; 201 + 202 + kv = kzalloc(sizeof(*kv), GFP_KERNEL); 203 + if (!kv) 204 + return -ENOMEM; 205 + 206 + INIT_LIST_HEAD(&kv->group_list); 207 + mutex_init(&kv->lock); 208 + 209 + dev->private = kv; 210 + 211 + return 0; 212 + } 213 + 214 + struct kvm_device_ops kvm_vfio_ops = { 215 + .name = "kvm-vfio", 216 + .create = kvm_vfio_create, 217 + .destroy = kvm_vfio_destroy, 218 + .set_attr = kvm_vfio_set_attr, 219 + .has_attr = kvm_vfio_has_attr, 220 + };