KVM: Create an inode per virtual machine

This avoids having filp->f_op and the corresponding inode->i_fop different,
which is a little unorthodox.

The ioctl list is split into two: global kvm ioctls and per-vm ioctls. A new
ioctl, KVM_CREATE_VM, is used to create VMs and return the VM fd.

Signed-off-by: Avi Kivity <avi@qumranet.com>

+180 -42
+171 -41
drivers/kvm/kvm_main.c
··· 36 36 #include <asm/desc.h> 37 37 #include <linux/sysdev.h> 38 38 #include <linux/cpu.h> 39 + #include <linux/file.h> 39 40 #include <linux/fs.h> 40 41 #include <linux/mount.h> 41 42 ··· 95 94 }; 96 95 97 96 #endif 97 + 98 + static struct inode *kvmfs_inode(struct file_operations *fops) 99 + { 100 + int error = -ENOMEM; 101 + struct inode *inode = new_inode(kvmfs_mnt->mnt_sb); 102 + 103 + if (!inode) 104 + goto eexit_1; 105 + 106 + inode->i_fop = fops; 107 + 108 + /* 109 + * Mark the inode dirty from the very beginning, 110 + * that way it will never be moved to the dirty 111 + * list because mark_inode_dirty() will think 112 + * that it already _is_ on the dirty list. 113 + */ 114 + inode->i_state = I_DIRTY; 115 + inode->i_mode = S_IRUSR | S_IWUSR; 116 + inode->i_uid = current->fsuid; 117 + inode->i_gid = current->fsgid; 118 + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 119 + return inode; 120 + 121 + eexit_1: 122 + return ERR_PTR(error); 123 + } 124 + 125 + static struct file *kvmfs_file(struct inode *inode, void *private_data) 126 + { 127 + struct file *file = get_empty_filp(); 128 + 129 + if (!file) 130 + return ERR_PTR(-ENFILE); 131 + 132 + file->f_path.mnt = mntget(kvmfs_mnt); 133 + file->f_path.dentry = d_alloc_anon(inode); 134 + if (!file->f_path.dentry) 135 + return ERR_PTR(-ENOMEM); 136 + file->f_mapping = inode->i_mapping; 137 + 138 + file->f_pos = 0; 139 + file->f_flags = O_RDWR; 140 + file->f_op = inode->i_fop; 141 + file->f_mode = FMODE_READ | FMODE_WRITE; 142 + file->f_version = 0; 143 + file->private_data = private_data; 144 + return file; 145 + } 98 146 99 147 unsigned long segment_base(u16 selector) 100 148 { ··· 272 222 mutex_unlock(&vcpu->mutex); 273 223 } 274 224 275 - static int kvm_dev_open(struct inode *inode, struct file *filp) 225 + static struct kvm *kvm_create_vm(void) 276 226 { 277 227 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); 278 228 int i; 279 229 280 230 if (!kvm) 281 - return -ENOMEM; 231 + return ERR_PTR(-ENOMEM); 282 232 283 233 spin_lock_init(&kvm->lock); 284 234 INIT_LIST_HEAD(&kvm->active_mmu_pages); ··· 294 244 list_add(&kvm->vm_list, &vm_list); 295 245 spin_unlock(&kvm_lock); 296 246 } 297 - filp->private_data = kvm; 247 + return kvm; 248 + } 249 + 250 + static int kvm_dev_open(struct inode *inode, struct file *filp) 251 + { 298 252 return 0; 299 253 } 300 254 ··· 354 300 355 301 static int kvm_dev_release(struct inode *inode, struct file *filp) 356 302 { 357 - struct kvm *kvm = filp->private_data; 303 + return 0; 304 + } 358 305 306 + static void kvm_destroy_vm(struct kvm *kvm) 307 + { 359 308 spin_lock(&kvm_lock); 360 309 list_del(&kvm->vm_list); 361 310 spin_unlock(&kvm_lock); 362 311 kvm_free_vcpus(kvm); 363 312 kvm_free_physmem(kvm); 364 313 kfree(kvm); 314 + } 315 + 316 + static int kvm_vm_release(struct inode *inode, struct file *filp) 317 + { 318 + struct kvm *kvm = filp->private_data; 319 + 320 + kvm_destroy_vm(kvm); 365 321 return 0; 366 322 } 367 323 ··· 1964 1900 return r; 1965 1901 } 1966 1902 1967 - static long kvm_dev_ioctl(struct file *filp, 1968 - unsigned int ioctl, unsigned long arg) 1903 + static long kvm_vm_ioctl(struct file *filp, 1904 + unsigned int ioctl, unsigned long arg) 1969 1905 { 1970 1906 struct kvm *kvm = filp->private_data; 1971 1907 void __user *argp = (void __user *)arg; 1972 1908 int r = -EINVAL; 1973 1909 1974 1910 switch (ioctl) { 1975 - case KVM_GET_API_VERSION: 1976 - r = KVM_API_VERSION; 1977 - break; 1978 1911 case KVM_CREATE_VCPU: 1979 1912 r = kvm_dev_ioctl_create_vcpu(kvm, arg); 1980 1913 if (r) ··· 2113 2052 case KVM_SET_MSRS: 2114 2053 r = msr_io(kvm, argp, do_set_msr, 0); 2115 2054 break; 2055 + default: 2056 + ; 2057 + } 2058 + out: 2059 + return r; 2060 + } 2061 + 2062 + static struct page *kvm_vm_nopage(struct vm_area_struct *vma, 2063 + unsigned long address, 2064 + int *type) 2065 + { 2066 + struct kvm *kvm = vma->vm_file->private_data; 2067 + unsigned long pgoff; 2068 + struct kvm_memory_slot *slot; 2069 + struct page *page; 2070 + 2071 + *type = VM_FAULT_MINOR; 2072 + pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 2073 + slot = gfn_to_memslot(kvm, pgoff); 2074 + if (!slot) 2075 + return NOPAGE_SIGBUS; 2076 + page = gfn_to_page(slot, pgoff); 2077 + if (!page) 2078 + return NOPAGE_SIGBUS; 2079 + get_page(page); 2080 + return page; 2081 + } 2082 + 2083 + static struct vm_operations_struct kvm_vm_vm_ops = { 2084 + .nopage = kvm_vm_nopage, 2085 + }; 2086 + 2087 + static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) 2088 + { 2089 + vma->vm_ops = &kvm_vm_vm_ops; 2090 + return 0; 2091 + } 2092 + 2093 + static struct file_operations kvm_vm_fops = { 2094 + .release = kvm_vm_release, 2095 + .unlocked_ioctl = kvm_vm_ioctl, 2096 + .compat_ioctl = kvm_vm_ioctl, 2097 + .mmap = kvm_vm_mmap, 2098 + }; 2099 + 2100 + static int kvm_dev_ioctl_create_vm(void) 2101 + { 2102 + int fd, r; 2103 + struct inode *inode; 2104 + struct file *file; 2105 + struct kvm *kvm; 2106 + 2107 + inode = kvmfs_inode(&kvm_vm_fops); 2108 + if (IS_ERR(inode)) { 2109 + r = PTR_ERR(inode); 2110 + goto out1; 2111 + } 2112 + 2113 + kvm = kvm_create_vm(); 2114 + if (IS_ERR(kvm)) { 2115 + r = PTR_ERR(kvm); 2116 + goto out2; 2117 + } 2118 + 2119 + file = kvmfs_file(inode, kvm); 2120 + if (IS_ERR(file)) { 2121 + r = PTR_ERR(file); 2122 + goto out3; 2123 + } 2124 + 2125 + r = get_unused_fd(); 2126 + if (r < 0) 2127 + goto out4; 2128 + fd = r; 2129 + fd_install(fd, file); 2130 + 2131 + return fd; 2132 + 2133 + out4: 2134 + fput(file); 2135 + out3: 2136 + kvm_destroy_vm(kvm); 2137 + out2: 2138 + iput(inode); 2139 + out1: 2140 + return r; 2141 + } 2142 + 2143 + static long kvm_dev_ioctl(struct file *filp, 2144 + unsigned int ioctl, unsigned long arg) 2145 + { 2146 + void __user *argp = (void __user *)arg; 2147 + int r = -EINVAL; 2148 + 2149 + switch (ioctl) { 2150 + case KVM_GET_API_VERSION: 2151 + r = KVM_API_VERSION; 2152 + break; 2153 + case KVM_CREATE_VM: 2154 + r = kvm_dev_ioctl_create_vm(); 2155 + break; 2116 2156 case KVM_GET_MSR_INDEX_LIST: { 2117 2157 struct kvm_msr_list __user *user_msr_list = argp; 2118 2158 struct kvm_msr_list msr_list; ··· 2248 2086 return r; 2249 2087 } 2250 2088 2251 - static struct page *kvm_dev_nopage(struct vm_area_struct *vma, 2252 - unsigned long address, 2253 - int *type) 2254 - { 2255 - struct kvm *kvm = vma->vm_file->private_data; 2256 - unsigned long pgoff; 2257 - struct kvm_memory_slot *slot; 2258 - struct page *page; 2259 - 2260 - *type = VM_FAULT_MINOR; 2261 - pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 2262 - slot = gfn_to_memslot(kvm, pgoff); 2263 - if (!slot) 2264 - return NOPAGE_SIGBUS; 2265 - page = gfn_to_page(slot, pgoff); 2266 - if (!page) 2267 - return NOPAGE_SIGBUS; 2268 - get_page(page); 2269 - return page; 2270 - } 2271 - 2272 - static struct vm_operations_struct kvm_dev_vm_ops = { 2273 - .nopage = kvm_dev_nopage, 2274 - }; 2275 - 2276 - static int kvm_dev_mmap(struct file *file, struct vm_area_struct *vma) 2277 - { 2278 - vma->vm_ops = &kvm_dev_vm_ops; 2279 - return 0; 2280 - } 2281 - 2282 2089 static struct file_operations kvm_chardev_ops = { 2283 2090 .open = kvm_dev_open, 2284 2091 .release = kvm_dev_release, 2285 2092 .unlocked_ioctl = kvm_dev_ioctl, 2286 2093 .compat_ioctl = kvm_dev_ioctl, 2287 - .mmap = kvm_dev_mmap, 2288 2094 }; 2289 2095 2290 2096 static struct miscdevice kvm_dev = {
+9 -1
include/linux/kvm.h
··· 224 224 225 225 #define KVMIO 0xAE 226 226 227 + /* 228 + * ioctls for /dev/kvm fds: 229 + */ 227 230 #define KVM_GET_API_VERSION _IO(KVMIO, 1) 231 + #define KVM_CREATE_VM _IO(KVMIO, 2) /* returns a VM fd */ 232 + #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 15, struct kvm_msr_list) 233 + 234 + /* 235 + * ioctls for VM fds 236 + */ 228 237 #define KVM_RUN _IOWR(KVMIO, 2, struct kvm_run) 229 238 #define KVM_GET_REGS _IOWR(KVMIO, 3, struct kvm_regs) 230 239 #define KVM_SET_REGS _IOW(KVMIO, 4, struct kvm_regs) ··· 247 238 #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log) 248 239 #define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs) 249 240 #define KVM_SET_MSRS _IOWR(KVMIO, 14, struct kvm_msrs) 250 - #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 15, struct kvm_msr_list) 251 241 252 242 #endif