Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v3.6-rc4 1415 lines 35 kB view raw
1/* 2 * VFIO core 3 * 4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 5 * Author: Alex Williamson <alex.williamson@redhat.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * Derived from original vfio: 12 * Copyright 2010 Cisco Systems, Inc. All rights reserved. 13 * Author: Tom Lyon, pugs@cisco.com 14 */ 15 16#include <linux/cdev.h> 17#include <linux/compat.h> 18#include <linux/device.h> 19#include <linux/file.h> 20#include <linux/anon_inodes.h> 21#include <linux/fs.h> 22#include <linux/idr.h> 23#include <linux/iommu.h> 24#include <linux/list.h> 25#include <linux/module.h> 26#include <linux/mutex.h> 27#include <linux/sched.h> 28#include <linux/slab.h> 29#include <linux/string.h> 30#include <linux/uaccess.h> 31#include <linux/vfio.h> 32#include <linux/wait.h> 33 34#define DRIVER_VERSION "0.3" 35#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 36#define DRIVER_DESC "VFIO - User Level meta-driver" 37 38static struct vfio { 39 struct class *class; 40 struct list_head iommu_drivers_list; 41 struct mutex iommu_drivers_lock; 42 struct list_head group_list; 43 struct idr group_idr; 44 struct mutex group_lock; 45 struct cdev group_cdev; 46 struct device *dev; 47 dev_t devt; 48 struct cdev cdev; 49 wait_queue_head_t release_q; 50} vfio; 51 52struct vfio_iommu_driver { 53 const struct vfio_iommu_driver_ops *ops; 54 struct list_head vfio_next; 55}; 56 57struct vfio_container { 58 struct kref kref; 59 struct list_head group_list; 60 struct mutex group_lock; 61 struct vfio_iommu_driver *iommu_driver; 62 void *iommu_data; 63}; 64 65struct vfio_group { 66 struct kref kref; 67 int minor; 68 atomic_t container_users; 69 struct iommu_group *iommu_group; 70 struct vfio_container *container; 71 struct list_head device_list; 72 struct mutex device_lock; 73 struct device *dev; 74 struct notifier_block nb; 75 struct list_head vfio_next; 76 struct list_head container_next; 77}; 78 79struct vfio_device { 80 struct kref kref; 81 struct device *dev; 82 const struct vfio_device_ops *ops; 83 struct vfio_group *group; 84 struct list_head group_next; 85 void *device_data; 86}; 87 88/** 89 * IOMMU driver registration 90 */ 91int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) 92{ 93 struct vfio_iommu_driver *driver, *tmp; 94 95 driver = kzalloc(sizeof(*driver), GFP_KERNEL); 96 if (!driver) 97 return -ENOMEM; 98 99 driver->ops = ops; 100 101 mutex_lock(&vfio.iommu_drivers_lock); 102 103 /* Check for duplicates */ 104 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { 105 if (tmp->ops == ops) { 106 mutex_unlock(&vfio.iommu_drivers_lock); 107 kfree(driver); 108 return -EINVAL; 109 } 110 } 111 112 list_add(&driver->vfio_next, &vfio.iommu_drivers_list); 113 114 mutex_unlock(&vfio.iommu_drivers_lock); 115 116 return 0; 117} 118EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); 119 120void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) 121{ 122 struct vfio_iommu_driver *driver; 123 124 mutex_lock(&vfio.iommu_drivers_lock); 125 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 126 if (driver->ops == ops) { 127 list_del(&driver->vfio_next); 128 mutex_unlock(&vfio.iommu_drivers_lock); 129 kfree(driver); 130 return; 131 } 132 } 133 mutex_unlock(&vfio.iommu_drivers_lock); 134} 135EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); 136 137/** 138 * Group minor allocation/free - both called with vfio.group_lock held 139 */ 140static int vfio_alloc_group_minor(struct vfio_group *group) 141{ 142 int ret, minor; 143 144again: 145 if (unlikely(idr_pre_get(&vfio.group_idr, GFP_KERNEL) == 0)) 146 return -ENOMEM; 147 148 /* index 0 is used by /dev/vfio/vfio */ 149 ret = idr_get_new_above(&vfio.group_idr, group, 1, &minor); 150 if (ret == -EAGAIN) 151 goto again; 152 if (ret || minor > MINORMASK) { 153 if (minor > MINORMASK) 154 idr_remove(&vfio.group_idr, minor); 155 return -ENOSPC; 156 } 157 158 return minor; 159} 160 161static void vfio_free_group_minor(int minor) 162{ 163 idr_remove(&vfio.group_idr, minor); 164} 165 166static int vfio_iommu_group_notifier(struct notifier_block *nb, 167 unsigned long action, void *data); 168static void vfio_group_get(struct vfio_group *group); 169 170/** 171 * Container objects - containers are created when /dev/vfio/vfio is 172 * opened, but their lifecycle extends until the last user is done, so 173 * it's freed via kref. Must support container/group/device being 174 * closed in any order. 175 */ 176static void vfio_container_get(struct vfio_container *container) 177{ 178 kref_get(&container->kref); 179} 180 181static void vfio_container_release(struct kref *kref) 182{ 183 struct vfio_container *container; 184 container = container_of(kref, struct vfio_container, kref); 185 186 kfree(container); 187} 188 189static void vfio_container_put(struct vfio_container *container) 190{ 191 kref_put(&container->kref, vfio_container_release); 192} 193 194/** 195 * Group objects - create, release, get, put, search 196 */ 197static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) 198{ 199 struct vfio_group *group, *tmp; 200 struct device *dev; 201 int ret, minor; 202 203 group = kzalloc(sizeof(*group), GFP_KERNEL); 204 if (!group) 205 return ERR_PTR(-ENOMEM); 206 207 kref_init(&group->kref); 208 INIT_LIST_HEAD(&group->device_list); 209 mutex_init(&group->device_lock); 210 atomic_set(&group->container_users, 0); 211 group->iommu_group = iommu_group; 212 213 group->nb.notifier_call = vfio_iommu_group_notifier; 214 215 /* 216 * blocking notifiers acquire a rwsem around registering and hold 217 * it around callback. Therefore, need to register outside of 218 * vfio.group_lock to avoid A-B/B-A contention. Our callback won't 219 * do anything unless it can find the group in vfio.group_list, so 220 * no harm in registering early. 221 */ 222 ret = iommu_group_register_notifier(iommu_group, &group->nb); 223 if (ret) { 224 kfree(group); 225 return ERR_PTR(ret); 226 } 227 228 mutex_lock(&vfio.group_lock); 229 230 minor = vfio_alloc_group_minor(group); 231 if (minor < 0) { 232 mutex_unlock(&vfio.group_lock); 233 kfree(group); 234 return ERR_PTR(minor); 235 } 236 237 /* Did we race creating this group? */ 238 list_for_each_entry(tmp, &vfio.group_list, vfio_next) { 239 if (tmp->iommu_group == iommu_group) { 240 vfio_group_get(tmp); 241 vfio_free_group_minor(minor); 242 mutex_unlock(&vfio.group_lock); 243 kfree(group); 244 return tmp; 245 } 246 } 247 248 dev = device_create(vfio.class, NULL, MKDEV(MAJOR(vfio.devt), minor), 249 group, "%d", iommu_group_id(iommu_group)); 250 if (IS_ERR(dev)) { 251 vfio_free_group_minor(minor); 252 mutex_unlock(&vfio.group_lock); 253 kfree(group); 254 return (struct vfio_group *)dev; /* ERR_PTR */ 255 } 256 257 group->minor = minor; 258 group->dev = dev; 259 260 list_add(&group->vfio_next, &vfio.group_list); 261 262 mutex_unlock(&vfio.group_lock); 263 264 return group; 265} 266 267/* called with vfio.group_lock held */ 268static void vfio_group_release(struct kref *kref) 269{ 270 struct vfio_group *group = container_of(kref, struct vfio_group, kref); 271 272 WARN_ON(!list_empty(&group->device_list)); 273 274 device_destroy(vfio.class, MKDEV(MAJOR(vfio.devt), group->minor)); 275 list_del(&group->vfio_next); 276 vfio_free_group_minor(group->minor); 277 278 mutex_unlock(&vfio.group_lock); 279 280 /* 281 * Unregister outside of lock. A spurious callback is harmless now 282 * that the group is no longer in vfio.group_list. 283 */ 284 iommu_group_unregister_notifier(group->iommu_group, &group->nb); 285 286 kfree(group); 287} 288 289static void vfio_group_put(struct vfio_group *group) 290{ 291 kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock); 292} 293 294/* Assume group_lock or group reference is held */ 295static void vfio_group_get(struct vfio_group *group) 296{ 297 kref_get(&group->kref); 298} 299 300/* 301 * Not really a try as we will sleep for mutex, but we need to make 302 * sure the group pointer is valid under lock and get a reference. 303 */ 304static struct vfio_group *vfio_group_try_get(struct vfio_group *group) 305{ 306 struct vfio_group *target = group; 307 308 mutex_lock(&vfio.group_lock); 309 list_for_each_entry(group, &vfio.group_list, vfio_next) { 310 if (group == target) { 311 vfio_group_get(group); 312 mutex_unlock(&vfio.group_lock); 313 return group; 314 } 315 } 316 mutex_unlock(&vfio.group_lock); 317 318 return NULL; 319} 320 321static 322struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group) 323{ 324 struct vfio_group *group; 325 326 mutex_lock(&vfio.group_lock); 327 list_for_each_entry(group, &vfio.group_list, vfio_next) { 328 if (group->iommu_group == iommu_group) { 329 vfio_group_get(group); 330 mutex_unlock(&vfio.group_lock); 331 return group; 332 } 333 } 334 mutex_unlock(&vfio.group_lock); 335 336 return NULL; 337} 338 339static struct vfio_group *vfio_group_get_from_minor(int minor) 340{ 341 struct vfio_group *group; 342 343 mutex_lock(&vfio.group_lock); 344 group = idr_find(&vfio.group_idr, minor); 345 if (!group) { 346 mutex_unlock(&vfio.group_lock); 347 return NULL; 348 } 349 vfio_group_get(group); 350 mutex_unlock(&vfio.group_lock); 351 352 return group; 353} 354 355/** 356 * Device objects - create, release, get, put, search 357 */ 358static 359struct vfio_device *vfio_group_create_device(struct vfio_group *group, 360 struct device *dev, 361 const struct vfio_device_ops *ops, 362 void *device_data) 363{ 364 struct vfio_device *device; 365 int ret; 366 367 device = kzalloc(sizeof(*device), GFP_KERNEL); 368 if (!device) 369 return ERR_PTR(-ENOMEM); 370 371 kref_init(&device->kref); 372 device->dev = dev; 373 device->group = group; 374 device->ops = ops; 375 device->device_data = device_data; 376 377 ret = dev_set_drvdata(dev, device); 378 if (ret) { 379 kfree(device); 380 return ERR_PTR(ret); 381 } 382 383 /* No need to get group_lock, caller has group reference */ 384 vfio_group_get(group); 385 386 mutex_lock(&group->device_lock); 387 list_add(&device->group_next, &group->device_list); 388 mutex_unlock(&group->device_lock); 389 390 return device; 391} 392 393static void vfio_device_release(struct kref *kref) 394{ 395 struct vfio_device *device = container_of(kref, 396 struct vfio_device, kref); 397 struct vfio_group *group = device->group; 398 399 list_del(&device->group_next); 400 mutex_unlock(&group->device_lock); 401 402 dev_set_drvdata(device->dev, NULL); 403 404 kfree(device); 405 406 /* vfio_del_group_dev may be waiting for this device */ 407 wake_up(&vfio.release_q); 408} 409 410/* Device reference always implies a group reference */ 411static void vfio_device_put(struct vfio_device *device) 412{ 413 struct vfio_group *group = device->group; 414 kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock); 415 vfio_group_put(group); 416} 417 418static void vfio_device_get(struct vfio_device *device) 419{ 420 vfio_group_get(device->group); 421 kref_get(&device->kref); 422} 423 424static struct vfio_device *vfio_group_get_device(struct vfio_group *group, 425 struct device *dev) 426{ 427 struct vfio_device *device; 428 429 mutex_lock(&group->device_lock); 430 list_for_each_entry(device, &group->device_list, group_next) { 431 if (device->dev == dev) { 432 vfio_device_get(device); 433 mutex_unlock(&group->device_lock); 434 return device; 435 } 436 } 437 mutex_unlock(&group->device_lock); 438 return NULL; 439} 440 441/* 442 * Whitelist some drivers that we know are safe (no dma) or just sit on 443 * a device. It's not always practical to leave a device within a group 444 * driverless as it could get re-bound to something unsafe. 445 */ 446static const char * const vfio_driver_whitelist[] = { "pci-stub" }; 447 448static bool vfio_whitelisted_driver(struct device_driver *drv) 449{ 450 int i; 451 452 for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) { 453 if (!strcmp(drv->name, vfio_driver_whitelist[i])) 454 return true; 455 } 456 457 return false; 458} 459 460/* 461 * A vfio group is viable for use by userspace if all devices are either 462 * driver-less or bound to a vfio or whitelisted driver. We test the 463 * latter by the existence of a struct vfio_device matching the dev. 464 */ 465static int vfio_dev_viable(struct device *dev, void *data) 466{ 467 struct vfio_group *group = data; 468 struct vfio_device *device; 469 470 if (!dev->driver || vfio_whitelisted_driver(dev->driver)) 471 return 0; 472 473 device = vfio_group_get_device(group, dev); 474 if (device) { 475 vfio_device_put(device); 476 return 0; 477 } 478 479 return -EINVAL; 480} 481 482/** 483 * Async device support 484 */ 485static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev) 486{ 487 struct vfio_device *device; 488 489 /* Do we already know about it? We shouldn't */ 490 device = vfio_group_get_device(group, dev); 491 if (WARN_ON_ONCE(device)) { 492 vfio_device_put(device); 493 return 0; 494 } 495 496 /* Nothing to do for idle groups */ 497 if (!atomic_read(&group->container_users)) 498 return 0; 499 500 /* TODO Prevent device auto probing */ 501 WARN("Device %s added to live group %d!\n", dev_name(dev), 502 iommu_group_id(group->iommu_group)); 503 504 return 0; 505} 506 507static int vfio_group_nb_del_dev(struct vfio_group *group, struct device *dev) 508{ 509 struct vfio_device *device; 510 511 /* 512 * Expect to fall out here. If a device was in use, it would 513 * have been bound to a vfio sub-driver, which would have blocked 514 * in .remove at vfio_del_group_dev. Sanity check that we no 515 * longer track the device, so it's safe to remove. 516 */ 517 device = vfio_group_get_device(group, dev); 518 if (likely(!device)) 519 return 0; 520 521 WARN("Device %s removed from live group %d!\n", dev_name(dev), 522 iommu_group_id(group->iommu_group)); 523 524 vfio_device_put(device); 525 return 0; 526} 527 528static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev) 529{ 530 /* We don't care what happens when the group isn't in use */ 531 if (!atomic_read(&group->container_users)) 532 return 0; 533 534 return vfio_dev_viable(dev, group); 535} 536 537static int vfio_iommu_group_notifier(struct notifier_block *nb, 538 unsigned long action, void *data) 539{ 540 struct vfio_group *group = container_of(nb, struct vfio_group, nb); 541 struct device *dev = data; 542 543 /* 544 * Need to go through a group_lock lookup to get a reference or 545 * we risk racing a group being removed. Leave a WARN_ON for 546 * debuging, but if the group no longer exists, a spurious notify 547 * is harmless. 548 */ 549 group = vfio_group_try_get(group); 550 if (WARN_ON(!group)) 551 return NOTIFY_OK; 552 553 switch (action) { 554 case IOMMU_GROUP_NOTIFY_ADD_DEVICE: 555 vfio_group_nb_add_dev(group, dev); 556 break; 557 case IOMMU_GROUP_NOTIFY_DEL_DEVICE: 558 vfio_group_nb_del_dev(group, dev); 559 break; 560 case IOMMU_GROUP_NOTIFY_BIND_DRIVER: 561 pr_debug("%s: Device %s, group %d binding to driver\n", 562 __func__, dev_name(dev), 563 iommu_group_id(group->iommu_group)); 564 break; 565 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER: 566 pr_debug("%s: Device %s, group %d bound to driver %s\n", 567 __func__, dev_name(dev), 568 iommu_group_id(group->iommu_group), dev->driver->name); 569 BUG_ON(vfio_group_nb_verify(group, dev)); 570 break; 571 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER: 572 pr_debug("%s: Device %s, group %d unbinding from driver %s\n", 573 __func__, dev_name(dev), 574 iommu_group_id(group->iommu_group), dev->driver->name); 575 break; 576 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER: 577 pr_debug("%s: Device %s, group %d unbound from driver\n", 578 __func__, dev_name(dev), 579 iommu_group_id(group->iommu_group)); 580 /* 581 * XXX An unbound device in a live group is ok, but we'd 582 * really like to avoid the above BUG_ON by preventing other 583 * drivers from binding to it. Once that occurs, we have to 584 * stop the system to maintain isolation. At a minimum, we'd 585 * want a toggle to disable driver auto probe for this device. 586 */ 587 break; 588 } 589 590 vfio_group_put(group); 591 return NOTIFY_OK; 592} 593 594/** 595 * VFIO driver API 596 */ 597int vfio_add_group_dev(struct device *dev, 598 const struct vfio_device_ops *ops, void *device_data) 599{ 600 struct iommu_group *iommu_group; 601 struct vfio_group *group; 602 struct vfio_device *device; 603 604 iommu_group = iommu_group_get(dev); 605 if (!iommu_group) 606 return -EINVAL; 607 608 group = vfio_group_get_from_iommu(iommu_group); 609 if (!group) { 610 group = vfio_create_group(iommu_group); 611 if (IS_ERR(group)) { 612 iommu_group_put(iommu_group); 613 return PTR_ERR(group); 614 } 615 } 616 617 device = vfio_group_get_device(group, dev); 618 if (device) { 619 WARN(1, "Device %s already exists on group %d\n", 620 dev_name(dev), iommu_group_id(iommu_group)); 621 vfio_device_put(device); 622 vfio_group_put(group); 623 iommu_group_put(iommu_group); 624 return -EBUSY; 625 } 626 627 device = vfio_group_create_device(group, dev, ops, device_data); 628 if (IS_ERR(device)) { 629 vfio_group_put(group); 630 iommu_group_put(iommu_group); 631 return PTR_ERR(device); 632 } 633 634 /* 635 * Added device holds reference to iommu_group and vfio_device 636 * (which in turn holds reference to vfio_group). Drop extra 637 * group reference used while acquiring device. 638 */ 639 vfio_group_put(group); 640 641 return 0; 642} 643EXPORT_SYMBOL_GPL(vfio_add_group_dev); 644 645/* Test whether a struct device is present in our tracking */ 646static bool vfio_dev_present(struct device *dev) 647{ 648 struct iommu_group *iommu_group; 649 struct vfio_group *group; 650 struct vfio_device *device; 651 652 iommu_group = iommu_group_get(dev); 653 if (!iommu_group) 654 return false; 655 656 group = vfio_group_get_from_iommu(iommu_group); 657 if (!group) { 658 iommu_group_put(iommu_group); 659 return false; 660 } 661 662 device = vfio_group_get_device(group, dev); 663 if (!device) { 664 vfio_group_put(group); 665 iommu_group_put(iommu_group); 666 return false; 667 } 668 669 vfio_device_put(device); 670 vfio_group_put(group); 671 iommu_group_put(iommu_group); 672 return true; 673} 674 675/* 676 * Decrement the device reference count and wait for the device to be 677 * removed. Open file descriptors for the device... */ 678void *vfio_del_group_dev(struct device *dev) 679{ 680 struct vfio_device *device = dev_get_drvdata(dev); 681 struct vfio_group *group = device->group; 682 struct iommu_group *iommu_group = group->iommu_group; 683 void *device_data = device->device_data; 684 685 vfio_device_put(device); 686 687 /* TODO send a signal to encourage this to be released */ 688 wait_event(vfio.release_q, !vfio_dev_present(dev)); 689 690 iommu_group_put(iommu_group); 691 692 return device_data; 693} 694EXPORT_SYMBOL_GPL(vfio_del_group_dev); 695 696/** 697 * VFIO base fd, /dev/vfio/vfio 698 */ 699static long vfio_ioctl_check_extension(struct vfio_container *container, 700 unsigned long arg) 701{ 702 struct vfio_iommu_driver *driver = container->iommu_driver; 703 long ret = 0; 704 705 switch (arg) { 706 /* No base extensions yet */ 707 default: 708 /* 709 * If no driver is set, poll all registered drivers for 710 * extensions and return the first positive result. If 711 * a driver is already set, further queries will be passed 712 * only to that driver. 713 */ 714 if (!driver) { 715 mutex_lock(&vfio.iommu_drivers_lock); 716 list_for_each_entry(driver, &vfio.iommu_drivers_list, 717 vfio_next) { 718 if (!try_module_get(driver->ops->owner)) 719 continue; 720 721 ret = driver->ops->ioctl(NULL, 722 VFIO_CHECK_EXTENSION, 723 arg); 724 module_put(driver->ops->owner); 725 if (ret > 0) 726 break; 727 } 728 mutex_unlock(&vfio.iommu_drivers_lock); 729 } else 730 ret = driver->ops->ioctl(container->iommu_data, 731 VFIO_CHECK_EXTENSION, arg); 732 } 733 734 return ret; 735} 736 737/* hold container->group_lock */ 738static int __vfio_container_attach_groups(struct vfio_container *container, 739 struct vfio_iommu_driver *driver, 740 void *data) 741{ 742 struct vfio_group *group; 743 int ret = -ENODEV; 744 745 list_for_each_entry(group, &container->group_list, container_next) { 746 ret = driver->ops->attach_group(data, group->iommu_group); 747 if (ret) 748 goto unwind; 749 } 750 751 return ret; 752 753unwind: 754 list_for_each_entry_continue_reverse(group, &container->group_list, 755 container_next) { 756 driver->ops->detach_group(data, group->iommu_group); 757 } 758 759 return ret; 760} 761 762static long vfio_ioctl_set_iommu(struct vfio_container *container, 763 unsigned long arg) 764{ 765 struct vfio_iommu_driver *driver; 766 long ret = -ENODEV; 767 768 mutex_lock(&container->group_lock); 769 770 /* 771 * The container is designed to be an unprivileged interface while 772 * the group can be assigned to specific users. Therefore, only by 773 * adding a group to a container does the user get the privilege of 774 * enabling the iommu, which may allocate finite resources. There 775 * is no unset_iommu, but by removing all the groups from a container, 776 * the container is deprivileged and returns to an unset state. 777 */ 778 if (list_empty(&container->group_list) || container->iommu_driver) { 779 mutex_unlock(&container->group_lock); 780 return -EINVAL; 781 } 782 783 mutex_lock(&vfio.iommu_drivers_lock); 784 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 785 void *data; 786 787 if (!try_module_get(driver->ops->owner)) 788 continue; 789 790 /* 791 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, 792 * so test which iommu driver reported support for this 793 * extension and call open on them. We also pass them the 794 * magic, allowing a single driver to support multiple 795 * interfaces if they'd like. 796 */ 797 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { 798 module_put(driver->ops->owner); 799 continue; 800 } 801 802 /* module reference holds the driver we're working on */ 803 mutex_unlock(&vfio.iommu_drivers_lock); 804 805 data = driver->ops->open(arg); 806 if (IS_ERR(data)) { 807 ret = PTR_ERR(data); 808 module_put(driver->ops->owner); 809 goto skip_drivers_unlock; 810 } 811 812 ret = __vfio_container_attach_groups(container, driver, data); 813 if (!ret) { 814 container->iommu_driver = driver; 815 container->iommu_data = data; 816 } else { 817 driver->ops->release(data); 818 module_put(driver->ops->owner); 819 } 820 821 goto skip_drivers_unlock; 822 } 823 824 mutex_unlock(&vfio.iommu_drivers_lock); 825skip_drivers_unlock: 826 mutex_unlock(&container->group_lock); 827 828 return ret; 829} 830 831static long vfio_fops_unl_ioctl(struct file *filep, 832 unsigned int cmd, unsigned long arg) 833{ 834 struct vfio_container *container = filep->private_data; 835 struct vfio_iommu_driver *driver; 836 void *data; 837 long ret = -EINVAL; 838 839 if (!container) 840 return ret; 841 842 driver = container->iommu_driver; 843 data = container->iommu_data; 844 845 switch (cmd) { 846 case VFIO_GET_API_VERSION: 847 ret = VFIO_API_VERSION; 848 break; 849 case VFIO_CHECK_EXTENSION: 850 ret = vfio_ioctl_check_extension(container, arg); 851 break; 852 case VFIO_SET_IOMMU: 853 ret = vfio_ioctl_set_iommu(container, arg); 854 break; 855 default: 856 if (driver) /* passthrough all unrecognized ioctls */ 857 ret = driver->ops->ioctl(data, cmd, arg); 858 } 859 860 return ret; 861} 862 863#ifdef CONFIG_COMPAT 864static long vfio_fops_compat_ioctl(struct file *filep, 865 unsigned int cmd, unsigned long arg) 866{ 867 arg = (unsigned long)compat_ptr(arg); 868 return vfio_fops_unl_ioctl(filep, cmd, arg); 869} 870#endif /* CONFIG_COMPAT */ 871 872static int vfio_fops_open(struct inode *inode, struct file *filep) 873{ 874 struct vfio_container *container; 875 876 container = kzalloc(sizeof(*container), GFP_KERNEL); 877 if (!container) 878 return -ENOMEM; 879 880 INIT_LIST_HEAD(&container->group_list); 881 mutex_init(&container->group_lock); 882 kref_init(&container->kref); 883 884 filep->private_data = container; 885 886 return 0; 887} 888 889static int vfio_fops_release(struct inode *inode, struct file *filep) 890{ 891 struct vfio_container *container = filep->private_data; 892 893 filep->private_data = NULL; 894 895 vfio_container_put(container); 896 897 return 0; 898} 899 900/* 901 * Once an iommu driver is set, we optionally pass read/write/mmap 902 * on to the driver, allowing management interfaces beyond ioctl. 903 */ 904static ssize_t vfio_fops_read(struct file *filep, char __user *buf, 905 size_t count, loff_t *ppos) 906{ 907 struct vfio_container *container = filep->private_data; 908 struct vfio_iommu_driver *driver = container->iommu_driver; 909 910 if (unlikely(!driver || !driver->ops->read)) 911 return -EINVAL; 912 913 return driver->ops->read(container->iommu_data, buf, count, ppos); 914} 915 916static ssize_t vfio_fops_write(struct file *filep, const char __user *buf, 917 size_t count, loff_t *ppos) 918{ 919 struct vfio_container *container = filep->private_data; 920 struct vfio_iommu_driver *driver = container->iommu_driver; 921 922 if (unlikely(!driver || !driver->ops->write)) 923 return -EINVAL; 924 925 return driver->ops->write(container->iommu_data, buf, count, ppos); 926} 927 928static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma) 929{ 930 struct vfio_container *container = filep->private_data; 931 struct vfio_iommu_driver *driver = container->iommu_driver; 932 933 if (unlikely(!driver || !driver->ops->mmap)) 934 return -EINVAL; 935 936 return driver->ops->mmap(container->iommu_data, vma); 937} 938 939static const struct file_operations vfio_fops = { 940 .owner = THIS_MODULE, 941 .open = vfio_fops_open, 942 .release = vfio_fops_release, 943 .read = vfio_fops_read, 944 .write = vfio_fops_write, 945 .unlocked_ioctl = vfio_fops_unl_ioctl, 946#ifdef CONFIG_COMPAT 947 .compat_ioctl = vfio_fops_compat_ioctl, 948#endif 949 .mmap = vfio_fops_mmap, 950}; 951 952/** 953 * VFIO Group fd, /dev/vfio/$GROUP 954 */ 955static void __vfio_group_unset_container(struct vfio_group *group) 956{ 957 struct vfio_container *container = group->container; 958 struct vfio_iommu_driver *driver; 959 960 mutex_lock(&container->group_lock); 961 962 driver = container->iommu_driver; 963 if (driver) 964 driver->ops->detach_group(container->iommu_data, 965 group->iommu_group); 966 967 group->container = NULL; 968 list_del(&group->container_next); 969 970 /* Detaching the last group deprivileges a container, remove iommu */ 971 if (driver && list_empty(&container->group_list)) { 972 driver->ops->release(container->iommu_data); 973 module_put(driver->ops->owner); 974 container->iommu_driver = NULL; 975 container->iommu_data = NULL; 976 } 977 978 mutex_unlock(&container->group_lock); 979 980 vfio_container_put(container); 981} 982 983/* 984 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or 985 * if there was no container to unset. Since the ioctl is called on 986 * the group, we know that still exists, therefore the only valid 987 * transition here is 1->0. 988 */ 989static int vfio_group_unset_container(struct vfio_group *group) 990{ 991 int users = atomic_cmpxchg(&group->container_users, 1, 0); 992 993 if (!users) 994 return -EINVAL; 995 if (users != 1) 996 return -EBUSY; 997 998 __vfio_group_unset_container(group); 999 1000 return 0; 1001} 1002 1003/* 1004 * When removing container users, anything that removes the last user 1005 * implicitly removes the group from the container. That is, if the 1006 * group file descriptor is closed, as well as any device file descriptors, 1007 * the group is free. 1008 */ 1009static void vfio_group_try_dissolve_container(struct vfio_group *group) 1010{ 1011 if (0 == atomic_dec_if_positive(&group->container_users)) 1012 __vfio_group_unset_container(group); 1013} 1014 1015static int vfio_group_set_container(struct vfio_group *group, int container_fd) 1016{ 1017 struct file *filep; 1018 struct vfio_container *container; 1019 struct vfio_iommu_driver *driver; 1020 int ret = 0; 1021 1022 if (atomic_read(&group->container_users)) 1023 return -EINVAL; 1024 1025 filep = fget(container_fd); 1026 if (!filep) 1027 return -EBADF; 1028 1029 /* Sanity check, is this really our fd? */ 1030 if (filep->f_op != &vfio_fops) { 1031 fput(filep); 1032 return -EINVAL; 1033 } 1034 1035 container = filep->private_data; 1036 WARN_ON(!container); /* fget ensures we don't race vfio_release */ 1037 1038 mutex_lock(&container->group_lock); 1039 1040 driver = container->iommu_driver; 1041 if (driver) { 1042 ret = driver->ops->attach_group(container->iommu_data, 1043 group->iommu_group); 1044 if (ret) 1045 goto unlock_out; 1046 } 1047 1048 group->container = container; 1049 list_add(&group->container_next, &container->group_list); 1050 1051 /* Get a reference on the container and mark a user within the group */ 1052 vfio_container_get(container); 1053 atomic_inc(&group->container_users); 1054 1055unlock_out: 1056 mutex_unlock(&container->group_lock); 1057 fput(filep); 1058 1059 return ret; 1060} 1061 1062static bool vfio_group_viable(struct vfio_group *group) 1063{ 1064 return (iommu_group_for_each_dev(group->iommu_group, 1065 group, vfio_dev_viable) == 0); 1066} 1067 1068static const struct file_operations vfio_device_fops; 1069 1070static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) 1071{ 1072 struct vfio_device *device; 1073 struct file *filep; 1074 int ret = -ENODEV; 1075 1076 if (0 == atomic_read(&group->container_users) || 1077 !group->container->iommu_driver || !vfio_group_viable(group)) 1078 return -EINVAL; 1079 1080 mutex_lock(&group->device_lock); 1081 list_for_each_entry(device, &group->device_list, group_next) { 1082 if (strcmp(dev_name(device->dev), buf)) 1083 continue; 1084 1085 ret = device->ops->open(device->device_data); 1086 if (ret) 1087 break; 1088 /* 1089 * We can't use anon_inode_getfd() because we need to modify 1090 * the f_mode flags directly to allow more than just ioctls 1091 */ 1092 ret = get_unused_fd(); 1093 if (ret < 0) { 1094 device->ops->release(device->device_data); 1095 break; 1096 } 1097 1098 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 1099 device, O_RDWR); 1100 if (IS_ERR(filep)) { 1101 put_unused_fd(ret); 1102 ret = PTR_ERR(filep); 1103 device->ops->release(device->device_data); 1104 break; 1105 } 1106 1107 /* 1108 * TODO: add an anon_inode interface to do this. 1109 * Appears to be missing by lack of need rather than 1110 * explicitly prevented. Now there's need. 1111 */ 1112 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); 1113 1114 vfio_device_get(device); 1115 atomic_inc(&group->container_users); 1116 1117 fd_install(ret, filep); 1118 break; 1119 } 1120 mutex_unlock(&group->device_lock); 1121 1122 return ret; 1123} 1124 1125static long vfio_group_fops_unl_ioctl(struct file *filep, 1126 unsigned int cmd, unsigned long arg) 1127{ 1128 struct vfio_group *group = filep->private_data; 1129 long ret = -ENOTTY; 1130 1131 switch (cmd) { 1132 case VFIO_GROUP_GET_STATUS: 1133 { 1134 struct vfio_group_status status; 1135 unsigned long minsz; 1136 1137 minsz = offsetofend(struct vfio_group_status, flags); 1138 1139 if (copy_from_user(&status, (void __user *)arg, minsz)) 1140 return -EFAULT; 1141 1142 if (status.argsz < minsz) 1143 return -EINVAL; 1144 1145 status.flags = 0; 1146 1147 if (vfio_group_viable(group)) 1148 status.flags |= VFIO_GROUP_FLAGS_VIABLE; 1149 1150 if (group->container) 1151 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET; 1152 1153 if (copy_to_user((void __user *)arg, &status, minsz)) 1154 return -EFAULT; 1155 1156 ret = 0; 1157 break; 1158 } 1159 case VFIO_GROUP_SET_CONTAINER: 1160 { 1161 int fd; 1162 1163 if (get_user(fd, (int __user *)arg)) 1164 return -EFAULT; 1165 1166 if (fd < 0) 1167 return -EINVAL; 1168 1169 ret = vfio_group_set_container(group, fd); 1170 break; 1171 } 1172 case VFIO_GROUP_UNSET_CONTAINER: 1173 ret = vfio_group_unset_container(group); 1174 break; 1175 case VFIO_GROUP_GET_DEVICE_FD: 1176 { 1177 char *buf; 1178 1179 buf = strndup_user((const char __user *)arg, PAGE_SIZE); 1180 if (IS_ERR(buf)) 1181 return PTR_ERR(buf); 1182 1183 ret = vfio_group_get_device_fd(group, buf); 1184 kfree(buf); 1185 break; 1186 } 1187 } 1188 1189 return ret; 1190} 1191 1192#ifdef CONFIG_COMPAT 1193static long vfio_group_fops_compat_ioctl(struct file *filep, 1194 unsigned int cmd, unsigned long arg) 1195{ 1196 arg = (unsigned long)compat_ptr(arg); 1197 return vfio_group_fops_unl_ioctl(filep, cmd, arg); 1198} 1199#endif /* CONFIG_COMPAT */ 1200 1201static int vfio_group_fops_open(struct inode *inode, struct file *filep) 1202{ 1203 struct vfio_group *group; 1204 1205 group = vfio_group_get_from_minor(iminor(inode)); 1206 if (!group) 1207 return -ENODEV; 1208 1209 if (group->container) { 1210 vfio_group_put(group); 1211 return -EBUSY; 1212 } 1213 1214 filep->private_data = group; 1215 1216 return 0; 1217} 1218 1219static int vfio_group_fops_release(struct inode *inode, struct file *filep) 1220{ 1221 struct vfio_group *group = filep->private_data; 1222 1223 filep->private_data = NULL; 1224 1225 vfio_group_try_dissolve_container(group); 1226 1227 vfio_group_put(group); 1228 1229 return 0; 1230} 1231 1232static const struct file_operations vfio_group_fops = { 1233 .owner = THIS_MODULE, 1234 .unlocked_ioctl = vfio_group_fops_unl_ioctl, 1235#ifdef CONFIG_COMPAT 1236 .compat_ioctl = vfio_group_fops_compat_ioctl, 1237#endif 1238 .open = vfio_group_fops_open, 1239 .release = vfio_group_fops_release, 1240}; 1241 1242/** 1243 * VFIO Device fd 1244 */ 1245static int vfio_device_fops_release(struct inode *inode, struct file *filep) 1246{ 1247 struct vfio_device *device = filep->private_data; 1248 1249 device->ops->release(device->device_data); 1250 1251 vfio_group_try_dissolve_container(device->group); 1252 1253 vfio_device_put(device); 1254 1255 return 0; 1256} 1257 1258static long vfio_device_fops_unl_ioctl(struct file *filep, 1259 unsigned int cmd, unsigned long arg) 1260{ 1261 struct vfio_device *device = filep->private_data; 1262 1263 if (unlikely(!device->ops->ioctl)) 1264 return -EINVAL; 1265 1266 return device->ops->ioctl(device->device_data, cmd, arg); 1267} 1268 1269static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 1270 size_t count, loff_t *ppos) 1271{ 1272 struct vfio_device *device = filep->private_data; 1273 1274 if (unlikely(!device->ops->read)) 1275 return -EINVAL; 1276 1277 return device->ops->read(device->device_data, buf, count, ppos); 1278} 1279 1280static ssize_t vfio_device_fops_write(struct file *filep, 1281 const char __user *buf, 1282 size_t count, loff_t *ppos) 1283{ 1284 struct vfio_device *device = filep->private_data; 1285 1286 if (unlikely(!device->ops->write)) 1287 return -EINVAL; 1288 1289 return device->ops->write(device->device_data, buf, count, ppos); 1290} 1291 1292static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 1293{ 1294 struct vfio_device *device = filep->private_data; 1295 1296 if (unlikely(!device->ops->mmap)) 1297 return -EINVAL; 1298 1299 return device->ops->mmap(device->device_data, vma); 1300} 1301 1302#ifdef CONFIG_COMPAT 1303static long vfio_device_fops_compat_ioctl(struct file *filep, 1304 unsigned int cmd, unsigned long arg) 1305{ 1306 arg = (unsigned long)compat_ptr(arg); 1307 return vfio_device_fops_unl_ioctl(filep, cmd, arg); 1308} 1309#endif /* CONFIG_COMPAT */ 1310 1311static const struct file_operations vfio_device_fops = { 1312 .owner = THIS_MODULE, 1313 .release = vfio_device_fops_release, 1314 .read = vfio_device_fops_read, 1315 .write = vfio_device_fops_write, 1316 .unlocked_ioctl = vfio_device_fops_unl_ioctl, 1317#ifdef CONFIG_COMPAT 1318 .compat_ioctl = vfio_device_fops_compat_ioctl, 1319#endif 1320 .mmap = vfio_device_fops_mmap, 1321}; 1322 1323/** 1324 * Module/class support 1325 */ 1326static char *vfio_devnode(struct device *dev, umode_t *mode) 1327{ 1328 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); 1329} 1330 1331static int __init vfio_init(void) 1332{ 1333 int ret; 1334 1335 idr_init(&vfio.group_idr); 1336 mutex_init(&vfio.group_lock); 1337 mutex_init(&vfio.iommu_drivers_lock); 1338 INIT_LIST_HEAD(&vfio.group_list); 1339 INIT_LIST_HEAD(&vfio.iommu_drivers_list); 1340 init_waitqueue_head(&vfio.release_q); 1341 1342 vfio.class = class_create(THIS_MODULE, "vfio"); 1343 if (IS_ERR(vfio.class)) { 1344 ret = PTR_ERR(vfio.class); 1345 goto err_class; 1346 } 1347 1348 vfio.class->devnode = vfio_devnode; 1349 1350 ret = alloc_chrdev_region(&vfio.devt, 0, MINORMASK, "vfio"); 1351 if (ret) 1352 goto err_base_chrdev; 1353 1354 cdev_init(&vfio.cdev, &vfio_fops); 1355 ret = cdev_add(&vfio.cdev, vfio.devt, 1); 1356 if (ret) 1357 goto err_base_cdev; 1358 1359 vfio.dev = device_create(vfio.class, NULL, vfio.devt, NULL, "vfio"); 1360 if (IS_ERR(vfio.dev)) { 1361 ret = PTR_ERR(vfio.dev); 1362 goto err_base_dev; 1363 } 1364 1365 /* /dev/vfio/$GROUP */ 1366 cdev_init(&vfio.group_cdev, &vfio_group_fops); 1367 ret = cdev_add(&vfio.group_cdev, 1368 MKDEV(MAJOR(vfio.devt), 1), MINORMASK - 1); 1369 if (ret) 1370 goto err_groups_cdev; 1371 1372 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 1373 1374 /* 1375 * Attempt to load known iommu-drivers. This gives us a working 1376 * environment without the user needing to explicitly load iommu 1377 * drivers. 1378 */ 1379 request_module_nowait("vfio_iommu_type1"); 1380 1381 return 0; 1382 1383err_groups_cdev: 1384 device_destroy(vfio.class, vfio.devt); 1385err_base_dev: 1386 cdev_del(&vfio.cdev); 1387err_base_cdev: 1388 unregister_chrdev_region(vfio.devt, MINORMASK); 1389err_base_chrdev: 1390 class_destroy(vfio.class); 1391 vfio.class = NULL; 1392err_class: 1393 return ret; 1394} 1395 1396static void __exit vfio_cleanup(void) 1397{ 1398 WARN_ON(!list_empty(&vfio.group_list)); 1399 1400 idr_destroy(&vfio.group_idr); 1401 cdev_del(&vfio.group_cdev); 1402 device_destroy(vfio.class, vfio.devt); 1403 cdev_del(&vfio.cdev); 1404 unregister_chrdev_region(vfio.devt, MINORMASK); 1405 class_destroy(vfio.class); 1406 vfio.class = NULL; 1407} 1408 1409module_init(vfio_init); 1410module_exit(vfio_cleanup); 1411 1412MODULE_VERSION(DRIVER_VERSION); 1413MODULE_LICENSE("GPL v2"); 1414MODULE_AUTHOR(DRIVER_AUTHOR); 1415MODULE_DESCRIPTION(DRIVER_DESC);