Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v3.7-rc4 1414 lines 35 kB view raw
1/* 2 * VFIO core 3 * 4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 5 * Author: Alex Williamson <alex.williamson@redhat.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * Derived from original vfio: 12 * Copyright 2010 Cisco Systems, Inc. All rights reserved. 13 * Author: Tom Lyon, pugs@cisco.com 14 */ 15 16#include <linux/cdev.h> 17#include <linux/compat.h> 18#include <linux/device.h> 19#include <linux/file.h> 20#include <linux/anon_inodes.h> 21#include <linux/fs.h> 22#include <linux/idr.h> 23#include <linux/iommu.h> 24#include <linux/list.h> 25#include <linux/module.h> 26#include <linux/mutex.h> 27#include <linux/sched.h> 28#include <linux/slab.h> 29#include <linux/string.h> 30#include <linux/uaccess.h> 31#include <linux/vfio.h> 32#include <linux/wait.h> 33 34#define DRIVER_VERSION "0.3" 35#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 36#define DRIVER_DESC "VFIO - User Level meta-driver" 37 38static struct vfio { 39 struct class *class; 40 struct list_head iommu_drivers_list; 41 struct mutex iommu_drivers_lock; 42 struct list_head group_list; 43 struct idr group_idr; 44 struct mutex group_lock; 45 struct cdev group_cdev; 46 struct device *dev; 47 dev_t devt; 48 struct cdev cdev; 49 wait_queue_head_t release_q; 50} vfio; 51 52struct vfio_iommu_driver { 53 const struct vfio_iommu_driver_ops *ops; 54 struct list_head vfio_next; 55}; 56 57struct vfio_container { 58 struct kref kref; 59 struct list_head group_list; 60 struct mutex group_lock; 61 struct vfio_iommu_driver *iommu_driver; 62 void *iommu_data; 63}; 64 65struct vfio_group { 66 struct kref kref; 67 int minor; 68 atomic_t container_users; 69 struct iommu_group *iommu_group; 70 struct vfio_container *container; 71 struct list_head device_list; 72 struct mutex device_lock; 73 struct device *dev; 74 struct notifier_block nb; 75 struct list_head vfio_next; 76 struct list_head container_next; 77}; 78 79struct vfio_device { 80 struct kref kref; 81 struct device *dev; 82 const struct vfio_device_ops *ops; 83 struct vfio_group *group; 84 struct list_head group_next; 85 void *device_data; 86}; 87 88/** 89 * IOMMU driver registration 90 */ 91int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) 92{ 93 struct vfio_iommu_driver *driver, *tmp; 94 95 driver = kzalloc(sizeof(*driver), GFP_KERNEL); 96 if (!driver) 97 return -ENOMEM; 98 99 driver->ops = ops; 100 101 mutex_lock(&vfio.iommu_drivers_lock); 102 103 /* Check for duplicates */ 104 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { 105 if (tmp->ops == ops) { 106 mutex_unlock(&vfio.iommu_drivers_lock); 107 kfree(driver); 108 return -EINVAL; 109 } 110 } 111 112 list_add(&driver->vfio_next, &vfio.iommu_drivers_list); 113 114 mutex_unlock(&vfio.iommu_drivers_lock); 115 116 return 0; 117} 118EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); 119 120void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) 121{ 122 struct vfio_iommu_driver *driver; 123 124 mutex_lock(&vfio.iommu_drivers_lock); 125 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 126 if (driver->ops == ops) { 127 list_del(&driver->vfio_next); 128 mutex_unlock(&vfio.iommu_drivers_lock); 129 kfree(driver); 130 return; 131 } 132 } 133 mutex_unlock(&vfio.iommu_drivers_lock); 134} 135EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); 136 137/** 138 * Group minor allocation/free - both called with vfio.group_lock held 139 */ 140static int vfio_alloc_group_minor(struct vfio_group *group) 141{ 142 int ret, minor; 143 144again: 145 if (unlikely(idr_pre_get(&vfio.group_idr, GFP_KERNEL) == 0)) 146 return -ENOMEM; 147 148 /* index 0 is used by /dev/vfio/vfio */ 149 ret = idr_get_new_above(&vfio.group_idr, group, 1, &minor); 150 if (ret == -EAGAIN) 151 goto again; 152 if (ret || minor > MINORMASK) { 153 if (minor > MINORMASK) 154 idr_remove(&vfio.group_idr, minor); 155 return -ENOSPC; 156 } 157 158 return minor; 159} 160 161static void vfio_free_group_minor(int minor) 162{ 163 idr_remove(&vfio.group_idr, minor); 164} 165 166static int vfio_iommu_group_notifier(struct notifier_block *nb, 167 unsigned long action, void *data); 168static void vfio_group_get(struct vfio_group *group); 169 170/** 171 * Container objects - containers are created when /dev/vfio/vfio is 172 * opened, but their lifecycle extends until the last user is done, so 173 * it's freed via kref. Must support container/group/device being 174 * closed in any order. 175 */ 176static void vfio_container_get(struct vfio_container *container) 177{ 178 kref_get(&container->kref); 179} 180 181static void vfio_container_release(struct kref *kref) 182{ 183 struct vfio_container *container; 184 container = container_of(kref, struct vfio_container, kref); 185 186 kfree(container); 187} 188 189static void vfio_container_put(struct vfio_container *container) 190{ 191 kref_put(&container->kref, vfio_container_release); 192} 193 194/** 195 * Group objects - create, release, get, put, search 196 */ 197static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) 198{ 199 struct vfio_group *group, *tmp; 200 struct device *dev; 201 int ret, minor; 202 203 group = kzalloc(sizeof(*group), GFP_KERNEL); 204 if (!group) 205 return ERR_PTR(-ENOMEM); 206 207 kref_init(&group->kref); 208 INIT_LIST_HEAD(&group->device_list); 209 mutex_init(&group->device_lock); 210 atomic_set(&group->container_users, 0); 211 group->iommu_group = iommu_group; 212 213 group->nb.notifier_call = vfio_iommu_group_notifier; 214 215 /* 216 * blocking notifiers acquire a rwsem around registering and hold 217 * it around callback. Therefore, need to register outside of 218 * vfio.group_lock to avoid A-B/B-A contention. Our callback won't 219 * do anything unless it can find the group in vfio.group_list, so 220 * no harm in registering early. 221 */ 222 ret = iommu_group_register_notifier(iommu_group, &group->nb); 223 if (ret) { 224 kfree(group); 225 return ERR_PTR(ret); 226 } 227 228 mutex_lock(&vfio.group_lock); 229 230 minor = vfio_alloc_group_minor(group); 231 if (minor < 0) { 232 mutex_unlock(&vfio.group_lock); 233 kfree(group); 234 return ERR_PTR(minor); 235 } 236 237 /* Did we race creating this group? */ 238 list_for_each_entry(tmp, &vfio.group_list, vfio_next) { 239 if (tmp->iommu_group == iommu_group) { 240 vfio_group_get(tmp); 241 vfio_free_group_minor(minor); 242 mutex_unlock(&vfio.group_lock); 243 kfree(group); 244 return tmp; 245 } 246 } 247 248 dev = device_create(vfio.class, NULL, MKDEV(MAJOR(vfio.devt), minor), 249 group, "%d", iommu_group_id(iommu_group)); 250 if (IS_ERR(dev)) { 251 vfio_free_group_minor(minor); 252 mutex_unlock(&vfio.group_lock); 253 kfree(group); 254 return (struct vfio_group *)dev; /* ERR_PTR */ 255 } 256 257 group->minor = minor; 258 group->dev = dev; 259 260 list_add(&group->vfio_next, &vfio.group_list); 261 262 mutex_unlock(&vfio.group_lock); 263 264 return group; 265} 266 267/* called with vfio.group_lock held */ 268static void vfio_group_release(struct kref *kref) 269{ 270 struct vfio_group *group = container_of(kref, struct vfio_group, kref); 271 272 WARN_ON(!list_empty(&group->device_list)); 273 274 device_destroy(vfio.class, MKDEV(MAJOR(vfio.devt), group->minor)); 275 list_del(&group->vfio_next); 276 vfio_free_group_minor(group->minor); 277 278 mutex_unlock(&vfio.group_lock); 279 280 /* 281 * Unregister outside of lock. A spurious callback is harmless now 282 * that the group is no longer in vfio.group_list. 283 */ 284 iommu_group_unregister_notifier(group->iommu_group, &group->nb); 285 286 kfree(group); 287} 288 289static void vfio_group_put(struct vfio_group *group) 290{ 291 kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock); 292} 293 294/* Assume group_lock or group reference is held */ 295static void vfio_group_get(struct vfio_group *group) 296{ 297 kref_get(&group->kref); 298} 299 300/* 301 * Not really a try as we will sleep for mutex, but we need to make 302 * sure the group pointer is valid under lock and get a reference. 303 */ 304static struct vfio_group *vfio_group_try_get(struct vfio_group *group) 305{ 306 struct vfio_group *target = group; 307 308 mutex_lock(&vfio.group_lock); 309 list_for_each_entry(group, &vfio.group_list, vfio_next) { 310 if (group == target) { 311 vfio_group_get(group); 312 mutex_unlock(&vfio.group_lock); 313 return group; 314 } 315 } 316 mutex_unlock(&vfio.group_lock); 317 318 return NULL; 319} 320 321static 322struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group) 323{ 324 struct vfio_group *group; 325 326 mutex_lock(&vfio.group_lock); 327 list_for_each_entry(group, &vfio.group_list, vfio_next) { 328 if (group->iommu_group == iommu_group) { 329 vfio_group_get(group); 330 mutex_unlock(&vfio.group_lock); 331 return group; 332 } 333 } 334 mutex_unlock(&vfio.group_lock); 335 336 return NULL; 337} 338 339static struct vfio_group *vfio_group_get_from_minor(int minor) 340{ 341 struct vfio_group *group; 342 343 mutex_lock(&vfio.group_lock); 344 group = idr_find(&vfio.group_idr, minor); 345 if (!group) { 346 mutex_unlock(&vfio.group_lock); 347 return NULL; 348 } 349 vfio_group_get(group); 350 mutex_unlock(&vfio.group_lock); 351 352 return group; 353} 354 355/** 356 * Device objects - create, release, get, put, search 357 */ 358static 359struct vfio_device *vfio_group_create_device(struct vfio_group *group, 360 struct device *dev, 361 const struct vfio_device_ops *ops, 362 void *device_data) 363{ 364 struct vfio_device *device; 365 int ret; 366 367 device = kzalloc(sizeof(*device), GFP_KERNEL); 368 if (!device) 369 return ERR_PTR(-ENOMEM); 370 371 kref_init(&device->kref); 372 device->dev = dev; 373 device->group = group; 374 device->ops = ops; 375 device->device_data = device_data; 376 377 ret = dev_set_drvdata(dev, device); 378 if (ret) { 379 kfree(device); 380 return ERR_PTR(ret); 381 } 382 383 /* No need to get group_lock, caller has group reference */ 384 vfio_group_get(group); 385 386 mutex_lock(&group->device_lock); 387 list_add(&device->group_next, &group->device_list); 388 mutex_unlock(&group->device_lock); 389 390 return device; 391} 392 393static void vfio_device_release(struct kref *kref) 394{ 395 struct vfio_device *device = container_of(kref, 396 struct vfio_device, kref); 397 struct vfio_group *group = device->group; 398 399 list_del(&device->group_next); 400 mutex_unlock(&group->device_lock); 401 402 dev_set_drvdata(device->dev, NULL); 403 404 kfree(device); 405 406 /* vfio_del_group_dev may be waiting for this device */ 407 wake_up(&vfio.release_q); 408} 409 410/* Device reference always implies a group reference */ 411static void vfio_device_put(struct vfio_device *device) 412{ 413 struct vfio_group *group = device->group; 414 kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock); 415 vfio_group_put(group); 416} 417 418static void vfio_device_get(struct vfio_device *device) 419{ 420 vfio_group_get(device->group); 421 kref_get(&device->kref); 422} 423 424static struct vfio_device *vfio_group_get_device(struct vfio_group *group, 425 struct device *dev) 426{ 427 struct vfio_device *device; 428 429 mutex_lock(&group->device_lock); 430 list_for_each_entry(device, &group->device_list, group_next) { 431 if (device->dev == dev) { 432 vfio_device_get(device); 433 mutex_unlock(&group->device_lock); 434 return device; 435 } 436 } 437 mutex_unlock(&group->device_lock); 438 return NULL; 439} 440 441/* 442 * Whitelist some drivers that we know are safe (no dma) or just sit on 443 * a device. It's not always practical to leave a device within a group 444 * driverless as it could get re-bound to something unsafe. 445 */ 446static const char * const vfio_driver_whitelist[] = { "pci-stub" }; 447 448static bool vfio_whitelisted_driver(struct device_driver *drv) 449{ 450 int i; 451 452 for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) { 453 if (!strcmp(drv->name, vfio_driver_whitelist[i])) 454 return true; 455 } 456 457 return false; 458} 459 460/* 461 * A vfio group is viable for use by userspace if all devices are either 462 * driver-less or bound to a vfio or whitelisted driver. We test the 463 * latter by the existence of a struct vfio_device matching the dev. 464 */ 465static int vfio_dev_viable(struct device *dev, void *data) 466{ 467 struct vfio_group *group = data; 468 struct vfio_device *device; 469 470 if (!dev->driver || vfio_whitelisted_driver(dev->driver)) 471 return 0; 472 473 device = vfio_group_get_device(group, dev); 474 if (device) { 475 vfio_device_put(device); 476 return 0; 477 } 478 479 return -EINVAL; 480} 481 482/** 483 * Async device support 484 */ 485static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev) 486{ 487 struct vfio_device *device; 488 489 /* Do we already know about it? We shouldn't */ 490 device = vfio_group_get_device(group, dev); 491 if (WARN_ON_ONCE(device)) { 492 vfio_device_put(device); 493 return 0; 494 } 495 496 /* Nothing to do for idle groups */ 497 if (!atomic_read(&group->container_users)) 498 return 0; 499 500 /* TODO Prevent device auto probing */ 501 WARN("Device %s added to live group %d!\n", dev_name(dev), 502 iommu_group_id(group->iommu_group)); 503 504 return 0; 505} 506 507static int vfio_group_nb_del_dev(struct vfio_group *group, struct device *dev) 508{ 509 struct vfio_device *device; 510 511 /* 512 * Expect to fall out here. If a device was in use, it would 513 * have been bound to a vfio sub-driver, which would have blocked 514 * in .remove at vfio_del_group_dev. Sanity check that we no 515 * longer track the device, so it's safe to remove. 516 */ 517 device = vfio_group_get_device(group, dev); 518 if (likely(!device)) 519 return 0; 520 521 WARN("Device %s removed from live group %d!\n", dev_name(dev), 522 iommu_group_id(group->iommu_group)); 523 524 vfio_device_put(device); 525 return 0; 526} 527 528static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev) 529{ 530 /* We don't care what happens when the group isn't in use */ 531 if (!atomic_read(&group->container_users)) 532 return 0; 533 534 return vfio_dev_viable(dev, group); 535} 536 537static int vfio_iommu_group_notifier(struct notifier_block *nb, 538 unsigned long action, void *data) 539{ 540 struct vfio_group *group = container_of(nb, struct vfio_group, nb); 541 struct device *dev = data; 542 543 /* 544 * Need to go through a group_lock lookup to get a reference or 545 * we risk racing a group being removed. Leave a WARN_ON for 546 * debuging, but if the group no longer exists, a spurious notify 547 * is harmless. 548 */ 549 group = vfio_group_try_get(group); 550 if (WARN_ON(!group)) 551 return NOTIFY_OK; 552 553 switch (action) { 554 case IOMMU_GROUP_NOTIFY_ADD_DEVICE: 555 vfio_group_nb_add_dev(group, dev); 556 break; 557 case IOMMU_GROUP_NOTIFY_DEL_DEVICE: 558 vfio_group_nb_del_dev(group, dev); 559 break; 560 case IOMMU_GROUP_NOTIFY_BIND_DRIVER: 561 pr_debug("%s: Device %s, group %d binding to driver\n", 562 __func__, dev_name(dev), 563 iommu_group_id(group->iommu_group)); 564 break; 565 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER: 566 pr_debug("%s: Device %s, group %d bound to driver %s\n", 567 __func__, dev_name(dev), 568 iommu_group_id(group->iommu_group), dev->driver->name); 569 BUG_ON(vfio_group_nb_verify(group, dev)); 570 break; 571 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER: 572 pr_debug("%s: Device %s, group %d unbinding from driver %s\n", 573 __func__, dev_name(dev), 574 iommu_group_id(group->iommu_group), dev->driver->name); 575 break; 576 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER: 577 pr_debug("%s: Device %s, group %d unbound from driver\n", 578 __func__, dev_name(dev), 579 iommu_group_id(group->iommu_group)); 580 /* 581 * XXX An unbound device in a live group is ok, but we'd 582 * really like to avoid the above BUG_ON by preventing other 583 * drivers from binding to it. Once that occurs, we have to 584 * stop the system to maintain isolation. At a minimum, we'd 585 * want a toggle to disable driver auto probe for this device. 586 */ 587 break; 588 } 589 590 vfio_group_put(group); 591 return NOTIFY_OK; 592} 593 594/** 595 * VFIO driver API 596 */ 597int vfio_add_group_dev(struct device *dev, 598 const struct vfio_device_ops *ops, void *device_data) 599{ 600 struct iommu_group *iommu_group; 601 struct vfio_group *group; 602 struct vfio_device *device; 603 604 iommu_group = iommu_group_get(dev); 605 if (!iommu_group) 606 return -EINVAL; 607 608 group = vfio_group_get_from_iommu(iommu_group); 609 if (!group) { 610 group = vfio_create_group(iommu_group); 611 if (IS_ERR(group)) { 612 iommu_group_put(iommu_group); 613 return PTR_ERR(group); 614 } 615 } 616 617 device = vfio_group_get_device(group, dev); 618 if (device) { 619 WARN(1, "Device %s already exists on group %d\n", 620 dev_name(dev), iommu_group_id(iommu_group)); 621 vfio_device_put(device); 622 vfio_group_put(group); 623 iommu_group_put(iommu_group); 624 return -EBUSY; 625 } 626 627 device = vfio_group_create_device(group, dev, ops, device_data); 628 if (IS_ERR(device)) { 629 vfio_group_put(group); 630 iommu_group_put(iommu_group); 631 return PTR_ERR(device); 632 } 633 634 /* 635 * Added device holds reference to iommu_group and vfio_device 636 * (which in turn holds reference to vfio_group). Drop extra 637 * group reference used while acquiring device. 638 */ 639 vfio_group_put(group); 640 641 return 0; 642} 643EXPORT_SYMBOL_GPL(vfio_add_group_dev); 644 645/* Test whether a struct device is present in our tracking */ 646static bool vfio_dev_present(struct device *dev) 647{ 648 struct iommu_group *iommu_group; 649 struct vfio_group *group; 650 struct vfio_device *device; 651 652 iommu_group = iommu_group_get(dev); 653 if (!iommu_group) 654 return false; 655 656 group = vfio_group_get_from_iommu(iommu_group); 657 if (!group) { 658 iommu_group_put(iommu_group); 659 return false; 660 } 661 662 device = vfio_group_get_device(group, dev); 663 if (!device) { 664 vfio_group_put(group); 665 iommu_group_put(iommu_group); 666 return false; 667 } 668 669 vfio_device_put(device); 670 vfio_group_put(group); 671 iommu_group_put(iommu_group); 672 return true; 673} 674 675/* 676 * Decrement the device reference count and wait for the device to be 677 * removed. Open file descriptors for the device... */ 678void *vfio_del_group_dev(struct device *dev) 679{ 680 struct vfio_device *device = dev_get_drvdata(dev); 681 struct vfio_group *group = device->group; 682 struct iommu_group *iommu_group = group->iommu_group; 683 void *device_data = device->device_data; 684 685 vfio_device_put(device); 686 687 /* TODO send a signal to encourage this to be released */ 688 wait_event(vfio.release_q, !vfio_dev_present(dev)); 689 690 iommu_group_put(iommu_group); 691 692 return device_data; 693} 694EXPORT_SYMBOL_GPL(vfio_del_group_dev); 695 696/** 697 * VFIO base fd, /dev/vfio/vfio 698 */ 699static long vfio_ioctl_check_extension(struct vfio_container *container, 700 unsigned long arg) 701{ 702 struct vfio_iommu_driver *driver = container->iommu_driver; 703 long ret = 0; 704 705 switch (arg) { 706 /* No base extensions yet */ 707 default: 708 /* 709 * If no driver is set, poll all registered drivers for 710 * extensions and return the first positive result. If 711 * a driver is already set, further queries will be passed 712 * only to that driver. 713 */ 714 if (!driver) { 715 mutex_lock(&vfio.iommu_drivers_lock); 716 list_for_each_entry(driver, &vfio.iommu_drivers_list, 717 vfio_next) { 718 if (!try_module_get(driver->ops->owner)) 719 continue; 720 721 ret = driver->ops->ioctl(NULL, 722 VFIO_CHECK_EXTENSION, 723 arg); 724 module_put(driver->ops->owner); 725 if (ret > 0) 726 break; 727 } 728 mutex_unlock(&vfio.iommu_drivers_lock); 729 } else 730 ret = driver->ops->ioctl(container->iommu_data, 731 VFIO_CHECK_EXTENSION, arg); 732 } 733 734 return ret; 735} 736 737/* hold container->group_lock */ 738static int __vfio_container_attach_groups(struct vfio_container *container, 739 struct vfio_iommu_driver *driver, 740 void *data) 741{ 742 struct vfio_group *group; 743 int ret = -ENODEV; 744 745 list_for_each_entry(group, &container->group_list, container_next) { 746 ret = driver->ops->attach_group(data, group->iommu_group); 747 if (ret) 748 goto unwind; 749 } 750 751 return ret; 752 753unwind: 754 list_for_each_entry_continue_reverse(group, &container->group_list, 755 container_next) { 756 driver->ops->detach_group(data, group->iommu_group); 757 } 758 759 return ret; 760} 761 762static long vfio_ioctl_set_iommu(struct vfio_container *container, 763 unsigned long arg) 764{ 765 struct vfio_iommu_driver *driver; 766 long ret = -ENODEV; 767 768 mutex_lock(&container->group_lock); 769 770 /* 771 * The container is designed to be an unprivileged interface while 772 * the group can be assigned to specific users. Therefore, only by 773 * adding a group to a container does the user get the privilege of 774 * enabling the iommu, which may allocate finite resources. There 775 * is no unset_iommu, but by removing all the groups from a container, 776 * the container is deprivileged and returns to an unset state. 777 */ 778 if (list_empty(&container->group_list) || container->iommu_driver) { 779 mutex_unlock(&container->group_lock); 780 return -EINVAL; 781 } 782 783 mutex_lock(&vfio.iommu_drivers_lock); 784 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 785 void *data; 786 787 if (!try_module_get(driver->ops->owner)) 788 continue; 789 790 /* 791 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, 792 * so test which iommu driver reported support for this 793 * extension and call open on them. We also pass them the 794 * magic, allowing a single driver to support multiple 795 * interfaces if they'd like. 796 */ 797 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { 798 module_put(driver->ops->owner); 799 continue; 800 } 801 802 /* module reference holds the driver we're working on */ 803 mutex_unlock(&vfio.iommu_drivers_lock); 804 805 data = driver->ops->open(arg); 806 if (IS_ERR(data)) { 807 ret = PTR_ERR(data); 808 module_put(driver->ops->owner); 809 goto skip_drivers_unlock; 810 } 811 812 ret = __vfio_container_attach_groups(container, driver, data); 813 if (!ret) { 814 container->iommu_driver = driver; 815 container->iommu_data = data; 816 } else { 817 driver->ops->release(data); 818 module_put(driver->ops->owner); 819 } 820 821 goto skip_drivers_unlock; 822 } 823 824 mutex_unlock(&vfio.iommu_drivers_lock); 825skip_drivers_unlock: 826 mutex_unlock(&container->group_lock); 827 828 return ret; 829} 830 831static long vfio_fops_unl_ioctl(struct file *filep, 832 unsigned int cmd, unsigned long arg) 833{ 834 struct vfio_container *container = filep->private_data; 835 struct vfio_iommu_driver *driver; 836 void *data; 837 long ret = -EINVAL; 838 839 if (!container) 840 return ret; 841 842 driver = container->iommu_driver; 843 data = container->iommu_data; 844 845 switch (cmd) { 846 case VFIO_GET_API_VERSION: 847 ret = VFIO_API_VERSION; 848 break; 849 case VFIO_CHECK_EXTENSION: 850 ret = vfio_ioctl_check_extension(container, arg); 851 break; 852 case VFIO_SET_IOMMU: 853 ret = vfio_ioctl_set_iommu(container, arg); 854 break; 855 default: 856 if (driver) /* passthrough all unrecognized ioctls */ 857 ret = driver->ops->ioctl(data, cmd, arg); 858 } 859 860 return ret; 861} 862 863#ifdef CONFIG_COMPAT 864static long vfio_fops_compat_ioctl(struct file *filep, 865 unsigned int cmd, unsigned long arg) 866{ 867 arg = (unsigned long)compat_ptr(arg); 868 return vfio_fops_unl_ioctl(filep, cmd, arg); 869} 870#endif /* CONFIG_COMPAT */ 871 872static int vfio_fops_open(struct inode *inode, struct file *filep) 873{ 874 struct vfio_container *container; 875 876 container = kzalloc(sizeof(*container), GFP_KERNEL); 877 if (!container) 878 return -ENOMEM; 879 880 INIT_LIST_HEAD(&container->group_list); 881 mutex_init(&container->group_lock); 882 kref_init(&container->kref); 883 884 filep->private_data = container; 885 886 return 0; 887} 888 889static int vfio_fops_release(struct inode *inode, struct file *filep) 890{ 891 struct vfio_container *container = filep->private_data; 892 893 filep->private_data = NULL; 894 895 vfio_container_put(container); 896 897 return 0; 898} 899 900/* 901 * Once an iommu driver is set, we optionally pass read/write/mmap 902 * on to the driver, allowing management interfaces beyond ioctl. 903 */ 904static ssize_t vfio_fops_read(struct file *filep, char __user *buf, 905 size_t count, loff_t *ppos) 906{ 907 struct vfio_container *container = filep->private_data; 908 struct vfio_iommu_driver *driver = container->iommu_driver; 909 910 if (unlikely(!driver || !driver->ops->read)) 911 return -EINVAL; 912 913 return driver->ops->read(container->iommu_data, buf, count, ppos); 914} 915 916static ssize_t vfio_fops_write(struct file *filep, const char __user *buf, 917 size_t count, loff_t *ppos) 918{ 919 struct vfio_container *container = filep->private_data; 920 struct vfio_iommu_driver *driver = container->iommu_driver; 921 922 if (unlikely(!driver || !driver->ops->write)) 923 return -EINVAL; 924 925 return driver->ops->write(container->iommu_data, buf, count, ppos); 926} 927 928static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma) 929{ 930 struct vfio_container *container = filep->private_data; 931 struct vfio_iommu_driver *driver = container->iommu_driver; 932 933 if (unlikely(!driver || !driver->ops->mmap)) 934 return -EINVAL; 935 936 return driver->ops->mmap(container->iommu_data, vma); 937} 938 939static const struct file_operations vfio_fops = { 940 .owner = THIS_MODULE, 941 .open = vfio_fops_open, 942 .release = vfio_fops_release, 943 .read = vfio_fops_read, 944 .write = vfio_fops_write, 945 .unlocked_ioctl = vfio_fops_unl_ioctl, 946#ifdef CONFIG_COMPAT 947 .compat_ioctl = vfio_fops_compat_ioctl, 948#endif 949 .mmap = vfio_fops_mmap, 950}; 951 952/** 953 * VFIO Group fd, /dev/vfio/$GROUP 954 */ 955static void __vfio_group_unset_container(struct vfio_group *group) 956{ 957 struct vfio_container *container = group->container; 958 struct vfio_iommu_driver *driver; 959 960 mutex_lock(&container->group_lock); 961 962 driver = container->iommu_driver; 963 if (driver) 964 driver->ops->detach_group(container->iommu_data, 965 group->iommu_group); 966 967 group->container = NULL; 968 list_del(&group->container_next); 969 970 /* Detaching the last group deprivileges a container, remove iommu */ 971 if (driver && list_empty(&container->group_list)) { 972 driver->ops->release(container->iommu_data); 973 module_put(driver->ops->owner); 974 container->iommu_driver = NULL; 975 container->iommu_data = NULL; 976 } 977 978 mutex_unlock(&container->group_lock); 979 980 vfio_container_put(container); 981} 982 983/* 984 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or 985 * if there was no container to unset. Since the ioctl is called on 986 * the group, we know that still exists, therefore the only valid 987 * transition here is 1->0. 988 */ 989static int vfio_group_unset_container(struct vfio_group *group) 990{ 991 int users = atomic_cmpxchg(&group->container_users, 1, 0); 992 993 if (!users) 994 return -EINVAL; 995 if (users != 1) 996 return -EBUSY; 997 998 __vfio_group_unset_container(group); 999 1000 return 0; 1001} 1002 1003/* 1004 * When removing container users, anything that removes the last user 1005 * implicitly removes the group from the container. That is, if the 1006 * group file descriptor is closed, as well as any device file descriptors, 1007 * the group is free. 1008 */ 1009static void vfio_group_try_dissolve_container(struct vfio_group *group) 1010{ 1011 if (0 == atomic_dec_if_positive(&group->container_users)) 1012 __vfio_group_unset_container(group); 1013} 1014 1015static int vfio_group_set_container(struct vfio_group *group, int container_fd) 1016{ 1017 struct fd f; 1018 struct vfio_container *container; 1019 struct vfio_iommu_driver *driver; 1020 int ret = 0; 1021 1022 if (atomic_read(&group->container_users)) 1023 return -EINVAL; 1024 1025 f = fdget(container_fd); 1026 if (!f.file) 1027 return -EBADF; 1028 1029 /* Sanity check, is this really our fd? */ 1030 if (f.file->f_op != &vfio_fops) { 1031 fdput(f); 1032 return -EINVAL; 1033 } 1034 1035 container = f.file->private_data; 1036 WARN_ON(!container); /* fget ensures we don't race vfio_release */ 1037 1038 mutex_lock(&container->group_lock); 1039 1040 driver = container->iommu_driver; 1041 if (driver) { 1042 ret = driver->ops->attach_group(container->iommu_data, 1043 group->iommu_group); 1044 if (ret) 1045 goto unlock_out; 1046 } 1047 1048 group->container = container; 1049 list_add(&group->container_next, &container->group_list); 1050 1051 /* Get a reference on the container and mark a user within the group */ 1052 vfio_container_get(container); 1053 atomic_inc(&group->container_users); 1054 1055unlock_out: 1056 mutex_unlock(&container->group_lock); 1057 fdput(f); 1058 return ret; 1059} 1060 1061static bool vfio_group_viable(struct vfio_group *group) 1062{ 1063 return (iommu_group_for_each_dev(group->iommu_group, 1064 group, vfio_dev_viable) == 0); 1065} 1066 1067static const struct file_operations vfio_device_fops; 1068 1069static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) 1070{ 1071 struct vfio_device *device; 1072 struct file *filep; 1073 int ret = -ENODEV; 1074 1075 if (0 == atomic_read(&group->container_users) || 1076 !group->container->iommu_driver || !vfio_group_viable(group)) 1077 return -EINVAL; 1078 1079 mutex_lock(&group->device_lock); 1080 list_for_each_entry(device, &group->device_list, group_next) { 1081 if (strcmp(dev_name(device->dev), buf)) 1082 continue; 1083 1084 ret = device->ops->open(device->device_data); 1085 if (ret) 1086 break; 1087 /* 1088 * We can't use anon_inode_getfd() because we need to modify 1089 * the f_mode flags directly to allow more than just ioctls 1090 */ 1091 ret = get_unused_fd(); 1092 if (ret < 0) { 1093 device->ops->release(device->device_data); 1094 break; 1095 } 1096 1097 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 1098 device, O_RDWR); 1099 if (IS_ERR(filep)) { 1100 put_unused_fd(ret); 1101 ret = PTR_ERR(filep); 1102 device->ops->release(device->device_data); 1103 break; 1104 } 1105 1106 /* 1107 * TODO: add an anon_inode interface to do this. 1108 * Appears to be missing by lack of need rather than 1109 * explicitly prevented. Now there's need. 1110 */ 1111 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); 1112 1113 vfio_device_get(device); 1114 atomic_inc(&group->container_users); 1115 1116 fd_install(ret, filep); 1117 break; 1118 } 1119 mutex_unlock(&group->device_lock); 1120 1121 return ret; 1122} 1123 1124static long vfio_group_fops_unl_ioctl(struct file *filep, 1125 unsigned int cmd, unsigned long arg) 1126{ 1127 struct vfio_group *group = filep->private_data; 1128 long ret = -ENOTTY; 1129 1130 switch (cmd) { 1131 case VFIO_GROUP_GET_STATUS: 1132 { 1133 struct vfio_group_status status; 1134 unsigned long minsz; 1135 1136 minsz = offsetofend(struct vfio_group_status, flags); 1137 1138 if (copy_from_user(&status, (void __user *)arg, minsz)) 1139 return -EFAULT; 1140 1141 if (status.argsz < minsz) 1142 return -EINVAL; 1143 1144 status.flags = 0; 1145 1146 if (vfio_group_viable(group)) 1147 status.flags |= VFIO_GROUP_FLAGS_VIABLE; 1148 1149 if (group->container) 1150 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET; 1151 1152 if (copy_to_user((void __user *)arg, &status, minsz)) 1153 return -EFAULT; 1154 1155 ret = 0; 1156 break; 1157 } 1158 case VFIO_GROUP_SET_CONTAINER: 1159 { 1160 int fd; 1161 1162 if (get_user(fd, (int __user *)arg)) 1163 return -EFAULT; 1164 1165 if (fd < 0) 1166 return -EINVAL; 1167 1168 ret = vfio_group_set_container(group, fd); 1169 break; 1170 } 1171 case VFIO_GROUP_UNSET_CONTAINER: 1172 ret = vfio_group_unset_container(group); 1173 break; 1174 case VFIO_GROUP_GET_DEVICE_FD: 1175 { 1176 char *buf; 1177 1178 buf = strndup_user((const char __user *)arg, PAGE_SIZE); 1179 if (IS_ERR(buf)) 1180 return PTR_ERR(buf); 1181 1182 ret = vfio_group_get_device_fd(group, buf); 1183 kfree(buf); 1184 break; 1185 } 1186 } 1187 1188 return ret; 1189} 1190 1191#ifdef CONFIG_COMPAT 1192static long vfio_group_fops_compat_ioctl(struct file *filep, 1193 unsigned int cmd, unsigned long arg) 1194{ 1195 arg = (unsigned long)compat_ptr(arg); 1196 return vfio_group_fops_unl_ioctl(filep, cmd, arg); 1197} 1198#endif /* CONFIG_COMPAT */ 1199 1200static int vfio_group_fops_open(struct inode *inode, struct file *filep) 1201{ 1202 struct vfio_group *group; 1203 1204 group = vfio_group_get_from_minor(iminor(inode)); 1205 if (!group) 1206 return -ENODEV; 1207 1208 if (group->container) { 1209 vfio_group_put(group); 1210 return -EBUSY; 1211 } 1212 1213 filep->private_data = group; 1214 1215 return 0; 1216} 1217 1218static int vfio_group_fops_release(struct inode *inode, struct file *filep) 1219{ 1220 struct vfio_group *group = filep->private_data; 1221 1222 filep->private_data = NULL; 1223 1224 vfio_group_try_dissolve_container(group); 1225 1226 vfio_group_put(group); 1227 1228 return 0; 1229} 1230 1231static const struct file_operations vfio_group_fops = { 1232 .owner = THIS_MODULE, 1233 .unlocked_ioctl = vfio_group_fops_unl_ioctl, 1234#ifdef CONFIG_COMPAT 1235 .compat_ioctl = vfio_group_fops_compat_ioctl, 1236#endif 1237 .open = vfio_group_fops_open, 1238 .release = vfio_group_fops_release, 1239}; 1240 1241/** 1242 * VFIO Device fd 1243 */ 1244static int vfio_device_fops_release(struct inode *inode, struct file *filep) 1245{ 1246 struct vfio_device *device = filep->private_data; 1247 1248 device->ops->release(device->device_data); 1249 1250 vfio_group_try_dissolve_container(device->group); 1251 1252 vfio_device_put(device); 1253 1254 return 0; 1255} 1256 1257static long vfio_device_fops_unl_ioctl(struct file *filep, 1258 unsigned int cmd, unsigned long arg) 1259{ 1260 struct vfio_device *device = filep->private_data; 1261 1262 if (unlikely(!device->ops->ioctl)) 1263 return -EINVAL; 1264 1265 return device->ops->ioctl(device->device_data, cmd, arg); 1266} 1267 1268static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 1269 size_t count, loff_t *ppos) 1270{ 1271 struct vfio_device *device = filep->private_data; 1272 1273 if (unlikely(!device->ops->read)) 1274 return -EINVAL; 1275 1276 return device->ops->read(device->device_data, buf, count, ppos); 1277} 1278 1279static ssize_t vfio_device_fops_write(struct file *filep, 1280 const char __user *buf, 1281 size_t count, loff_t *ppos) 1282{ 1283 struct vfio_device *device = filep->private_data; 1284 1285 if (unlikely(!device->ops->write)) 1286 return -EINVAL; 1287 1288 return device->ops->write(device->device_data, buf, count, ppos); 1289} 1290 1291static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 1292{ 1293 struct vfio_device *device = filep->private_data; 1294 1295 if (unlikely(!device->ops->mmap)) 1296 return -EINVAL; 1297 1298 return device->ops->mmap(device->device_data, vma); 1299} 1300 1301#ifdef CONFIG_COMPAT 1302static long vfio_device_fops_compat_ioctl(struct file *filep, 1303 unsigned int cmd, unsigned long arg) 1304{ 1305 arg = (unsigned long)compat_ptr(arg); 1306 return vfio_device_fops_unl_ioctl(filep, cmd, arg); 1307} 1308#endif /* CONFIG_COMPAT */ 1309 1310static const struct file_operations vfio_device_fops = { 1311 .owner = THIS_MODULE, 1312 .release = vfio_device_fops_release, 1313 .read = vfio_device_fops_read, 1314 .write = vfio_device_fops_write, 1315 .unlocked_ioctl = vfio_device_fops_unl_ioctl, 1316#ifdef CONFIG_COMPAT 1317 .compat_ioctl = vfio_device_fops_compat_ioctl, 1318#endif 1319 .mmap = vfio_device_fops_mmap, 1320}; 1321 1322/** 1323 * Module/class support 1324 */ 1325static char *vfio_devnode(struct device *dev, umode_t *mode) 1326{ 1327 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); 1328} 1329 1330static int __init vfio_init(void) 1331{ 1332 int ret; 1333 1334 idr_init(&vfio.group_idr); 1335 mutex_init(&vfio.group_lock); 1336 mutex_init(&vfio.iommu_drivers_lock); 1337 INIT_LIST_HEAD(&vfio.group_list); 1338 INIT_LIST_HEAD(&vfio.iommu_drivers_list); 1339 init_waitqueue_head(&vfio.release_q); 1340 1341 vfio.class = class_create(THIS_MODULE, "vfio"); 1342 if (IS_ERR(vfio.class)) { 1343 ret = PTR_ERR(vfio.class); 1344 goto err_class; 1345 } 1346 1347 vfio.class->devnode = vfio_devnode; 1348 1349 ret = alloc_chrdev_region(&vfio.devt, 0, MINORMASK, "vfio"); 1350 if (ret) 1351 goto err_base_chrdev; 1352 1353 cdev_init(&vfio.cdev, &vfio_fops); 1354 ret = cdev_add(&vfio.cdev, vfio.devt, 1); 1355 if (ret) 1356 goto err_base_cdev; 1357 1358 vfio.dev = device_create(vfio.class, NULL, vfio.devt, NULL, "vfio"); 1359 if (IS_ERR(vfio.dev)) { 1360 ret = PTR_ERR(vfio.dev); 1361 goto err_base_dev; 1362 } 1363 1364 /* /dev/vfio/$GROUP */ 1365 cdev_init(&vfio.group_cdev, &vfio_group_fops); 1366 ret = cdev_add(&vfio.group_cdev, 1367 MKDEV(MAJOR(vfio.devt), 1), MINORMASK - 1); 1368 if (ret) 1369 goto err_groups_cdev; 1370 1371 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 1372 1373 /* 1374 * Attempt to load known iommu-drivers. This gives us a working 1375 * environment without the user needing to explicitly load iommu 1376 * drivers. 1377 */ 1378 request_module_nowait("vfio_iommu_type1"); 1379 1380 return 0; 1381 1382err_groups_cdev: 1383 device_destroy(vfio.class, vfio.devt); 1384err_base_dev: 1385 cdev_del(&vfio.cdev); 1386err_base_cdev: 1387 unregister_chrdev_region(vfio.devt, MINORMASK); 1388err_base_chrdev: 1389 class_destroy(vfio.class); 1390 vfio.class = NULL; 1391err_class: 1392 return ret; 1393} 1394 1395static void __exit vfio_cleanup(void) 1396{ 1397 WARN_ON(!list_empty(&vfio.group_list)); 1398 1399 idr_destroy(&vfio.group_idr); 1400 cdev_del(&vfio.group_cdev); 1401 device_destroy(vfio.class, vfio.devt); 1402 cdev_del(&vfio.cdev); 1403 unregister_chrdev_region(vfio.devt, MINORMASK); 1404 class_destroy(vfio.class); 1405 vfio.class = NULL; 1406} 1407 1408module_init(vfio_init); 1409module_exit(vfio_cleanup); 1410 1411MODULE_VERSION(DRIVER_VERSION); 1412MODULE_LICENSE("GPL v2"); 1413MODULE_AUTHOR(DRIVER_AUTHOR); 1414MODULE_DESCRIPTION(DRIVER_DESC);