Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.12-rc2 2449 lines 63 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * VFIO core 4 * 5 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 6 * Author: Alex Williamson <alex.williamson@redhat.com> 7 * 8 * Derived from original vfio: 9 * Copyright 2010 Cisco Systems, Inc. All rights reserved. 10 * Author: Tom Lyon, pugs@cisco.com 11 */ 12 13#include <linux/cdev.h> 14#include <linux/compat.h> 15#include <linux/device.h> 16#include <linux/file.h> 17#include <linux/anon_inodes.h> 18#include <linux/fs.h> 19#include <linux/idr.h> 20#include <linux/iommu.h> 21#include <linux/list.h> 22#include <linux/miscdevice.h> 23#include <linux/module.h> 24#include <linux/mutex.h> 25#include <linux/pci.h> 26#include <linux/rwsem.h> 27#include <linux/sched.h> 28#include <linux/slab.h> 29#include <linux/stat.h> 30#include <linux/string.h> 31#include <linux/uaccess.h> 32#include <linux/vfio.h> 33#include <linux/wait.h> 34#include <linux/sched/signal.h> 35 36#define DRIVER_VERSION "0.3" 37#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 38#define DRIVER_DESC "VFIO - User Level meta-driver" 39 40static struct vfio { 41 struct class *class; 42 struct list_head iommu_drivers_list; 43 struct mutex iommu_drivers_lock; 44 struct list_head group_list; 45 struct idr group_idr; 46 struct mutex group_lock; 47 struct cdev group_cdev; 48 dev_t group_devt; 49 wait_queue_head_t release_q; 50} vfio; 51 52struct vfio_iommu_driver { 53 const struct vfio_iommu_driver_ops *ops; 54 struct list_head vfio_next; 55}; 56 57struct vfio_container { 58 struct kref kref; 59 struct list_head group_list; 60 struct rw_semaphore group_lock; 61 struct vfio_iommu_driver *iommu_driver; 62 void *iommu_data; 63 bool noiommu; 64}; 65 66struct vfio_unbound_dev { 67 struct device *dev; 68 struct list_head unbound_next; 69}; 70 71struct vfio_group { 72 struct kref kref; 73 int minor; 74 atomic_t container_users; 75 struct iommu_group *iommu_group; 76 struct vfio_container *container; 77 struct list_head device_list; 78 struct mutex device_lock; 79 struct device *dev; 80 struct notifier_block nb; 81 struct list_head vfio_next; 82 struct list_head container_next; 83 struct list_head unbound_list; 84 struct mutex unbound_lock; 85 atomic_t opened; 86 wait_queue_head_t container_q; 87 bool noiommu; 88 unsigned int dev_counter; 89 struct kvm *kvm; 90 struct blocking_notifier_head notifier; 91}; 92 93struct vfio_device { 94 struct kref kref; 95 struct device *dev; 96 const struct vfio_device_ops *ops; 97 struct vfio_group *group; 98 struct list_head group_next; 99 void *device_data; 100}; 101 102#ifdef CONFIG_VFIO_NOIOMMU 103static bool noiommu __read_mostly; 104module_param_named(enable_unsafe_noiommu_mode, 105 noiommu, bool, S_IRUGO | S_IWUSR); 106MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); 107#endif 108 109/* 110 * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe 111 * and remove functions, any use cases other than acquiring the first 112 * reference for the purpose of calling vfio_add_group_dev() or removing 113 * that symmetric reference after vfio_del_group_dev() should use the raw 114 * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put() 115 * removes the device from the dummy group and cannot be nested. 116 */ 117struct iommu_group *vfio_iommu_group_get(struct device *dev) 118{ 119 struct iommu_group *group; 120 int __maybe_unused ret; 121 122 group = iommu_group_get(dev); 123 124#ifdef CONFIG_VFIO_NOIOMMU 125 /* 126 * With noiommu enabled, an IOMMU group will be created for a device 127 * that doesn't already have one and doesn't have an iommu_ops on their 128 * bus. We set iommudata simply to be able to identify these groups 129 * as special use and for reclamation later. 130 */ 131 if (group || !noiommu || iommu_present(dev->bus)) 132 return group; 133 134 group = iommu_group_alloc(); 135 if (IS_ERR(group)) 136 return NULL; 137 138 iommu_group_set_name(group, "vfio-noiommu"); 139 iommu_group_set_iommudata(group, &noiommu, NULL); 140 ret = iommu_group_add_device(group, dev); 141 if (ret) { 142 iommu_group_put(group); 143 return NULL; 144 } 145 146 /* 147 * Where to taint? At this point we've added an IOMMU group for a 148 * device that is not backed by iommu_ops, therefore any iommu_ 149 * callback using iommu_ops can legitimately Oops. So, while we may 150 * be about to give a DMA capable device to a user without IOMMU 151 * protection, which is clearly taint-worthy, let's go ahead and do 152 * it here. 153 */ 154 add_taint(TAINT_USER, LOCKDEP_STILL_OK); 155 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); 156#endif 157 158 return group; 159} 160EXPORT_SYMBOL_GPL(vfio_iommu_group_get); 161 162void vfio_iommu_group_put(struct iommu_group *group, struct device *dev) 163{ 164#ifdef CONFIG_VFIO_NOIOMMU 165 if (iommu_group_get_iommudata(group) == &noiommu) 166 iommu_group_remove_device(dev); 167#endif 168 169 iommu_group_put(group); 170} 171EXPORT_SYMBOL_GPL(vfio_iommu_group_put); 172 173#ifdef CONFIG_VFIO_NOIOMMU 174static void *vfio_noiommu_open(unsigned long arg) 175{ 176 if (arg != VFIO_NOIOMMU_IOMMU) 177 return ERR_PTR(-EINVAL); 178 if (!capable(CAP_SYS_RAWIO)) 179 return ERR_PTR(-EPERM); 180 181 return NULL; 182} 183 184static void vfio_noiommu_release(void *iommu_data) 185{ 186} 187 188static long vfio_noiommu_ioctl(void *iommu_data, 189 unsigned int cmd, unsigned long arg) 190{ 191 if (cmd == VFIO_CHECK_EXTENSION) 192 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0; 193 194 return -ENOTTY; 195} 196 197static int vfio_noiommu_attach_group(void *iommu_data, 198 struct iommu_group *iommu_group) 199{ 200 return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL; 201} 202 203static void vfio_noiommu_detach_group(void *iommu_data, 204 struct iommu_group *iommu_group) 205{ 206} 207 208static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { 209 .name = "vfio-noiommu", 210 .owner = THIS_MODULE, 211 .open = vfio_noiommu_open, 212 .release = vfio_noiommu_release, 213 .ioctl = vfio_noiommu_ioctl, 214 .attach_group = vfio_noiommu_attach_group, 215 .detach_group = vfio_noiommu_detach_group, 216}; 217#endif 218 219 220/** 221 * IOMMU driver registration 222 */ 223int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) 224{ 225 struct vfio_iommu_driver *driver, *tmp; 226 227 driver = kzalloc(sizeof(*driver), GFP_KERNEL); 228 if (!driver) 229 return -ENOMEM; 230 231 driver->ops = ops; 232 233 mutex_lock(&vfio.iommu_drivers_lock); 234 235 /* Check for duplicates */ 236 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { 237 if (tmp->ops == ops) { 238 mutex_unlock(&vfio.iommu_drivers_lock); 239 kfree(driver); 240 return -EINVAL; 241 } 242 } 243 244 list_add(&driver->vfio_next, &vfio.iommu_drivers_list); 245 246 mutex_unlock(&vfio.iommu_drivers_lock); 247 248 return 0; 249} 250EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); 251 252void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) 253{ 254 struct vfio_iommu_driver *driver; 255 256 mutex_lock(&vfio.iommu_drivers_lock); 257 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 258 if (driver->ops == ops) { 259 list_del(&driver->vfio_next); 260 mutex_unlock(&vfio.iommu_drivers_lock); 261 kfree(driver); 262 return; 263 } 264 } 265 mutex_unlock(&vfio.iommu_drivers_lock); 266} 267EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); 268 269/** 270 * Group minor allocation/free - both called with vfio.group_lock held 271 */ 272static int vfio_alloc_group_minor(struct vfio_group *group) 273{ 274 return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL); 275} 276 277static void vfio_free_group_minor(int minor) 278{ 279 idr_remove(&vfio.group_idr, minor); 280} 281 282static int vfio_iommu_group_notifier(struct notifier_block *nb, 283 unsigned long action, void *data); 284static void vfio_group_get(struct vfio_group *group); 285 286/** 287 * Container objects - containers are created when /dev/vfio/vfio is 288 * opened, but their lifecycle extends until the last user is done, so 289 * it's freed via kref. Must support container/group/device being 290 * closed in any order. 291 */ 292static void vfio_container_get(struct vfio_container *container) 293{ 294 kref_get(&container->kref); 295} 296 297static void vfio_container_release(struct kref *kref) 298{ 299 struct vfio_container *container; 300 container = container_of(kref, struct vfio_container, kref); 301 302 kfree(container); 303} 304 305static void vfio_container_put(struct vfio_container *container) 306{ 307 kref_put(&container->kref, vfio_container_release); 308} 309 310static void vfio_group_unlock_and_free(struct vfio_group *group) 311{ 312 mutex_unlock(&vfio.group_lock); 313 /* 314 * Unregister outside of lock. A spurious callback is harmless now 315 * that the group is no longer in vfio.group_list. 316 */ 317 iommu_group_unregister_notifier(group->iommu_group, &group->nb); 318 kfree(group); 319} 320 321/** 322 * Group objects - create, release, get, put, search 323 */ 324static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) 325{ 326 struct vfio_group *group, *tmp; 327 struct device *dev; 328 int ret, minor; 329 330 group = kzalloc(sizeof(*group), GFP_KERNEL); 331 if (!group) 332 return ERR_PTR(-ENOMEM); 333 334 kref_init(&group->kref); 335 INIT_LIST_HEAD(&group->device_list); 336 mutex_init(&group->device_lock); 337 INIT_LIST_HEAD(&group->unbound_list); 338 mutex_init(&group->unbound_lock); 339 atomic_set(&group->container_users, 0); 340 atomic_set(&group->opened, 0); 341 init_waitqueue_head(&group->container_q); 342 group->iommu_group = iommu_group; 343#ifdef CONFIG_VFIO_NOIOMMU 344 group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu); 345#endif 346 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); 347 348 group->nb.notifier_call = vfio_iommu_group_notifier; 349 350 /* 351 * blocking notifiers acquire a rwsem around registering and hold 352 * it around callback. Therefore, need to register outside of 353 * vfio.group_lock to avoid A-B/B-A contention. Our callback won't 354 * do anything unless it can find the group in vfio.group_list, so 355 * no harm in registering early. 356 */ 357 ret = iommu_group_register_notifier(iommu_group, &group->nb); 358 if (ret) { 359 kfree(group); 360 return ERR_PTR(ret); 361 } 362 363 mutex_lock(&vfio.group_lock); 364 365 /* Did we race creating this group? */ 366 list_for_each_entry(tmp, &vfio.group_list, vfio_next) { 367 if (tmp->iommu_group == iommu_group) { 368 vfio_group_get(tmp); 369 vfio_group_unlock_and_free(group); 370 return tmp; 371 } 372 } 373 374 minor = vfio_alloc_group_minor(group); 375 if (minor < 0) { 376 vfio_group_unlock_and_free(group); 377 return ERR_PTR(minor); 378 } 379 380 dev = device_create(vfio.class, NULL, 381 MKDEV(MAJOR(vfio.group_devt), minor), 382 group, "%s%d", group->noiommu ? "noiommu-" : "", 383 iommu_group_id(iommu_group)); 384 if (IS_ERR(dev)) { 385 vfio_free_group_minor(minor); 386 vfio_group_unlock_and_free(group); 387 return ERR_CAST(dev); 388 } 389 390 group->minor = minor; 391 group->dev = dev; 392 393 list_add(&group->vfio_next, &vfio.group_list); 394 395 mutex_unlock(&vfio.group_lock); 396 397 return group; 398} 399 400/* called with vfio.group_lock held */ 401static void vfio_group_release(struct kref *kref) 402{ 403 struct vfio_group *group = container_of(kref, struct vfio_group, kref); 404 struct vfio_unbound_dev *unbound, *tmp; 405 struct iommu_group *iommu_group = group->iommu_group; 406 407 WARN_ON(!list_empty(&group->device_list)); 408 WARN_ON(group->notifier.head); 409 410 list_for_each_entry_safe(unbound, tmp, 411 &group->unbound_list, unbound_next) { 412 list_del(&unbound->unbound_next); 413 kfree(unbound); 414 } 415 416 device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor)); 417 list_del(&group->vfio_next); 418 vfio_free_group_minor(group->minor); 419 vfio_group_unlock_and_free(group); 420 iommu_group_put(iommu_group); 421} 422 423static void vfio_group_put(struct vfio_group *group) 424{ 425 kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock); 426} 427 428struct vfio_group_put_work { 429 struct work_struct work; 430 struct vfio_group *group; 431}; 432 433static void vfio_group_put_bg(struct work_struct *work) 434{ 435 struct vfio_group_put_work *do_work; 436 437 do_work = container_of(work, struct vfio_group_put_work, work); 438 439 vfio_group_put(do_work->group); 440 kfree(do_work); 441} 442 443static void vfio_group_schedule_put(struct vfio_group *group) 444{ 445 struct vfio_group_put_work *do_work; 446 447 do_work = kmalloc(sizeof(*do_work), GFP_KERNEL); 448 if (WARN_ON(!do_work)) 449 return; 450 451 INIT_WORK(&do_work->work, vfio_group_put_bg); 452 do_work->group = group; 453 schedule_work(&do_work->work); 454} 455 456/* Assume group_lock or group reference is held */ 457static void vfio_group_get(struct vfio_group *group) 458{ 459 kref_get(&group->kref); 460} 461 462/* 463 * Not really a try as we will sleep for mutex, but we need to make 464 * sure the group pointer is valid under lock and get a reference. 465 */ 466static struct vfio_group *vfio_group_try_get(struct vfio_group *group) 467{ 468 struct vfio_group *target = group; 469 470 mutex_lock(&vfio.group_lock); 471 list_for_each_entry(group, &vfio.group_list, vfio_next) { 472 if (group == target) { 473 vfio_group_get(group); 474 mutex_unlock(&vfio.group_lock); 475 return group; 476 } 477 } 478 mutex_unlock(&vfio.group_lock); 479 480 return NULL; 481} 482 483static 484struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group) 485{ 486 struct vfio_group *group; 487 488 mutex_lock(&vfio.group_lock); 489 list_for_each_entry(group, &vfio.group_list, vfio_next) { 490 if (group->iommu_group == iommu_group) { 491 vfio_group_get(group); 492 mutex_unlock(&vfio.group_lock); 493 return group; 494 } 495 } 496 mutex_unlock(&vfio.group_lock); 497 498 return NULL; 499} 500 501static struct vfio_group *vfio_group_get_from_minor(int minor) 502{ 503 struct vfio_group *group; 504 505 mutex_lock(&vfio.group_lock); 506 group = idr_find(&vfio.group_idr, minor); 507 if (!group) { 508 mutex_unlock(&vfio.group_lock); 509 return NULL; 510 } 511 vfio_group_get(group); 512 mutex_unlock(&vfio.group_lock); 513 514 return group; 515} 516 517static struct vfio_group *vfio_group_get_from_dev(struct device *dev) 518{ 519 struct iommu_group *iommu_group; 520 struct vfio_group *group; 521 522 iommu_group = iommu_group_get(dev); 523 if (!iommu_group) 524 return NULL; 525 526 group = vfio_group_get_from_iommu(iommu_group); 527 iommu_group_put(iommu_group); 528 529 return group; 530} 531 532/** 533 * Device objects - create, release, get, put, search 534 */ 535static 536struct vfio_device *vfio_group_create_device(struct vfio_group *group, 537 struct device *dev, 538 const struct vfio_device_ops *ops, 539 void *device_data) 540{ 541 struct vfio_device *device; 542 543 device = kzalloc(sizeof(*device), GFP_KERNEL); 544 if (!device) 545 return ERR_PTR(-ENOMEM); 546 547 kref_init(&device->kref); 548 device->dev = dev; 549 device->group = group; 550 device->ops = ops; 551 device->device_data = device_data; 552 dev_set_drvdata(dev, device); 553 554 /* No need to get group_lock, caller has group reference */ 555 vfio_group_get(group); 556 557 mutex_lock(&group->device_lock); 558 list_add(&device->group_next, &group->device_list); 559 group->dev_counter++; 560 mutex_unlock(&group->device_lock); 561 562 return device; 563} 564 565static void vfio_device_release(struct kref *kref) 566{ 567 struct vfio_device *device = container_of(kref, 568 struct vfio_device, kref); 569 struct vfio_group *group = device->group; 570 571 list_del(&device->group_next); 572 group->dev_counter--; 573 mutex_unlock(&group->device_lock); 574 575 dev_set_drvdata(device->dev, NULL); 576 577 kfree(device); 578 579 /* vfio_del_group_dev may be waiting for this device */ 580 wake_up(&vfio.release_q); 581} 582 583/* Device reference always implies a group reference */ 584void vfio_device_put(struct vfio_device *device) 585{ 586 struct vfio_group *group = device->group; 587 kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock); 588 vfio_group_put(group); 589} 590EXPORT_SYMBOL_GPL(vfio_device_put); 591 592static void vfio_device_get(struct vfio_device *device) 593{ 594 vfio_group_get(device->group); 595 kref_get(&device->kref); 596} 597 598static struct vfio_device *vfio_group_get_device(struct vfio_group *group, 599 struct device *dev) 600{ 601 struct vfio_device *device; 602 603 mutex_lock(&group->device_lock); 604 list_for_each_entry(device, &group->device_list, group_next) { 605 if (device->dev == dev) { 606 vfio_device_get(device); 607 mutex_unlock(&group->device_lock); 608 return device; 609 } 610 } 611 mutex_unlock(&group->device_lock); 612 return NULL; 613} 614 615/* 616 * Some drivers, like pci-stub, are only used to prevent other drivers from 617 * claiming a device and are therefore perfectly legitimate for a user owned 618 * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping 619 * of the device, but it does prevent the user from having direct access to 620 * the device, which is useful in some circumstances. 621 * 622 * We also assume that we can include PCI interconnect devices, ie. bridges. 623 * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge 624 * then all of the downstream devices will be part of the same IOMMU group as 625 * the bridge. Thus, if placing the bridge into the user owned IOVA space 626 * breaks anything, it only does so for user owned devices downstream. Note 627 * that error notification via MSI can be affected for platforms that handle 628 * MSI within the same IOVA space as DMA. 629 */ 630static const char * const vfio_driver_allowed[] = { "pci-stub" }; 631 632static bool vfio_dev_driver_allowed(struct device *dev, 633 struct device_driver *drv) 634{ 635 if (dev_is_pci(dev)) { 636 struct pci_dev *pdev = to_pci_dev(dev); 637 638 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) 639 return true; 640 } 641 642 return match_string(vfio_driver_allowed, 643 ARRAY_SIZE(vfio_driver_allowed), 644 drv->name) >= 0; 645} 646 647/* 648 * A vfio group is viable for use by userspace if all devices are in 649 * one of the following states: 650 * - driver-less 651 * - bound to a vfio driver 652 * - bound to an otherwise allowed driver 653 * - a PCI interconnect device 654 * 655 * We use two methods to determine whether a device is bound to a vfio 656 * driver. The first is to test whether the device exists in the vfio 657 * group. The second is to test if the device exists on the group 658 * unbound_list, indicating it's in the middle of transitioning from 659 * a vfio driver to driver-less. 660 */ 661static int vfio_dev_viable(struct device *dev, void *data) 662{ 663 struct vfio_group *group = data; 664 struct vfio_device *device; 665 struct device_driver *drv = READ_ONCE(dev->driver); 666 struct vfio_unbound_dev *unbound; 667 int ret = -EINVAL; 668 669 mutex_lock(&group->unbound_lock); 670 list_for_each_entry(unbound, &group->unbound_list, unbound_next) { 671 if (dev == unbound->dev) { 672 ret = 0; 673 break; 674 } 675 } 676 mutex_unlock(&group->unbound_lock); 677 678 if (!ret || !drv || vfio_dev_driver_allowed(dev, drv)) 679 return 0; 680 681 device = vfio_group_get_device(group, dev); 682 if (device) { 683 vfio_device_put(device); 684 return 0; 685 } 686 687 return ret; 688} 689 690/** 691 * Async device support 692 */ 693static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev) 694{ 695 struct vfio_device *device; 696 697 /* Do we already know about it? We shouldn't */ 698 device = vfio_group_get_device(group, dev); 699 if (WARN_ON_ONCE(device)) { 700 vfio_device_put(device); 701 return 0; 702 } 703 704 /* Nothing to do for idle groups */ 705 if (!atomic_read(&group->container_users)) 706 return 0; 707 708 /* TODO Prevent device auto probing */ 709 dev_WARN(dev, "Device added to live group %d!\n", 710 iommu_group_id(group->iommu_group)); 711 712 return 0; 713} 714 715static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev) 716{ 717 /* We don't care what happens when the group isn't in use */ 718 if (!atomic_read(&group->container_users)) 719 return 0; 720 721 return vfio_dev_viable(dev, group); 722} 723 724static int vfio_iommu_group_notifier(struct notifier_block *nb, 725 unsigned long action, void *data) 726{ 727 struct vfio_group *group = container_of(nb, struct vfio_group, nb); 728 struct device *dev = data; 729 struct vfio_unbound_dev *unbound; 730 731 /* 732 * Need to go through a group_lock lookup to get a reference or we 733 * risk racing a group being removed. Ignore spurious notifies. 734 */ 735 group = vfio_group_try_get(group); 736 if (!group) 737 return NOTIFY_OK; 738 739 switch (action) { 740 case IOMMU_GROUP_NOTIFY_ADD_DEVICE: 741 vfio_group_nb_add_dev(group, dev); 742 break; 743 case IOMMU_GROUP_NOTIFY_DEL_DEVICE: 744 /* 745 * Nothing to do here. If the device is in use, then the 746 * vfio sub-driver should block the remove callback until 747 * it is unused. If the device is unused or attached to a 748 * stub driver, then it should be released and we don't 749 * care that it will be going away. 750 */ 751 break; 752 case IOMMU_GROUP_NOTIFY_BIND_DRIVER: 753 dev_dbg(dev, "%s: group %d binding to driver\n", __func__, 754 iommu_group_id(group->iommu_group)); 755 break; 756 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER: 757 dev_dbg(dev, "%s: group %d bound to driver %s\n", __func__, 758 iommu_group_id(group->iommu_group), dev->driver->name); 759 BUG_ON(vfio_group_nb_verify(group, dev)); 760 break; 761 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER: 762 dev_dbg(dev, "%s: group %d unbinding from driver %s\n", 763 __func__, iommu_group_id(group->iommu_group), 764 dev->driver->name); 765 break; 766 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER: 767 dev_dbg(dev, "%s: group %d unbound from driver\n", __func__, 768 iommu_group_id(group->iommu_group)); 769 /* 770 * XXX An unbound device in a live group is ok, but we'd 771 * really like to avoid the above BUG_ON by preventing other 772 * drivers from binding to it. Once that occurs, we have to 773 * stop the system to maintain isolation. At a minimum, we'd 774 * want a toggle to disable driver auto probe for this device. 775 */ 776 777 mutex_lock(&group->unbound_lock); 778 list_for_each_entry(unbound, 779 &group->unbound_list, unbound_next) { 780 if (dev == unbound->dev) { 781 list_del(&unbound->unbound_next); 782 kfree(unbound); 783 break; 784 } 785 } 786 mutex_unlock(&group->unbound_lock); 787 break; 788 } 789 790 /* 791 * If we're the last reference to the group, the group will be 792 * released, which includes unregistering the iommu group notifier. 793 * We hold a read-lock on that notifier list, unregistering needs 794 * a write-lock... deadlock. Release our reference asynchronously 795 * to avoid that situation. 796 */ 797 vfio_group_schedule_put(group); 798 return NOTIFY_OK; 799} 800 801/** 802 * VFIO driver API 803 */ 804int vfio_add_group_dev(struct device *dev, 805 const struct vfio_device_ops *ops, void *device_data) 806{ 807 struct iommu_group *iommu_group; 808 struct vfio_group *group; 809 struct vfio_device *device; 810 811 iommu_group = iommu_group_get(dev); 812 if (!iommu_group) 813 return -EINVAL; 814 815 group = vfio_group_get_from_iommu(iommu_group); 816 if (!group) { 817 group = vfio_create_group(iommu_group); 818 if (IS_ERR(group)) { 819 iommu_group_put(iommu_group); 820 return PTR_ERR(group); 821 } 822 } else { 823 /* 824 * A found vfio_group already holds a reference to the 825 * iommu_group. A created vfio_group keeps the reference. 826 */ 827 iommu_group_put(iommu_group); 828 } 829 830 device = vfio_group_get_device(group, dev); 831 if (device) { 832 dev_WARN(dev, "Device already exists on group %d\n", 833 iommu_group_id(iommu_group)); 834 vfio_device_put(device); 835 vfio_group_put(group); 836 return -EBUSY; 837 } 838 839 device = vfio_group_create_device(group, dev, ops, device_data); 840 if (IS_ERR(device)) { 841 vfio_group_put(group); 842 return PTR_ERR(device); 843 } 844 845 /* 846 * Drop all but the vfio_device reference. The vfio_device holds 847 * a reference to the vfio_group, which holds a reference to the 848 * iommu_group. 849 */ 850 vfio_group_put(group); 851 852 return 0; 853} 854EXPORT_SYMBOL_GPL(vfio_add_group_dev); 855 856/** 857 * Get a reference to the vfio_device for a device. Even if the 858 * caller thinks they own the device, they could be racing with a 859 * release call path, so we can't trust drvdata for the shortcut. 860 * Go the long way around, from the iommu_group to the vfio_group 861 * to the vfio_device. 862 */ 863struct vfio_device *vfio_device_get_from_dev(struct device *dev) 864{ 865 struct vfio_group *group; 866 struct vfio_device *device; 867 868 group = vfio_group_get_from_dev(dev); 869 if (!group) 870 return NULL; 871 872 device = vfio_group_get_device(group, dev); 873 vfio_group_put(group); 874 875 return device; 876} 877EXPORT_SYMBOL_GPL(vfio_device_get_from_dev); 878 879static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, 880 char *buf) 881{ 882 struct vfio_device *it, *device = ERR_PTR(-ENODEV); 883 884 mutex_lock(&group->device_lock); 885 list_for_each_entry(it, &group->device_list, group_next) { 886 int ret; 887 888 if (it->ops->match) { 889 ret = it->ops->match(it->device_data, buf); 890 if (ret < 0) { 891 device = ERR_PTR(ret); 892 break; 893 } 894 } else { 895 ret = !strcmp(dev_name(it->dev), buf); 896 } 897 898 if (ret) { 899 device = it; 900 vfio_device_get(device); 901 break; 902 } 903 } 904 mutex_unlock(&group->device_lock); 905 906 return device; 907} 908 909/* 910 * Caller must hold a reference to the vfio_device 911 */ 912void *vfio_device_data(struct vfio_device *device) 913{ 914 return device->device_data; 915} 916EXPORT_SYMBOL_GPL(vfio_device_data); 917 918/* 919 * Decrement the device reference count and wait for the device to be 920 * removed. Open file descriptors for the device... */ 921void *vfio_del_group_dev(struct device *dev) 922{ 923 DEFINE_WAIT_FUNC(wait, woken_wake_function); 924 struct vfio_device *device = dev_get_drvdata(dev); 925 struct vfio_group *group = device->group; 926 void *device_data = device->device_data; 927 struct vfio_unbound_dev *unbound; 928 unsigned int i = 0; 929 bool interrupted = false; 930 931 /* 932 * The group exists so long as we have a device reference. Get 933 * a group reference and use it to scan for the device going away. 934 */ 935 vfio_group_get(group); 936 937 /* 938 * When the device is removed from the group, the group suddenly 939 * becomes non-viable; the device has a driver (until the unbind 940 * completes), but it's not present in the group. This is bad news 941 * for any external users that need to re-acquire a group reference 942 * in order to match and release their existing reference. To 943 * solve this, we track such devices on the unbound_list to bridge 944 * the gap until they're fully unbound. 945 */ 946 unbound = kzalloc(sizeof(*unbound), GFP_KERNEL); 947 if (unbound) { 948 unbound->dev = dev; 949 mutex_lock(&group->unbound_lock); 950 list_add(&unbound->unbound_next, &group->unbound_list); 951 mutex_unlock(&group->unbound_lock); 952 } 953 WARN_ON(!unbound); 954 955 vfio_device_put(device); 956 957 /* 958 * If the device is still present in the group after the above 959 * 'put', then it is in use and we need to request it from the 960 * bus driver. The driver may in turn need to request the 961 * device from the user. We send the request on an arbitrary 962 * interval with counter to allow the driver to take escalating 963 * measures to release the device if it has the ability to do so. 964 */ 965 add_wait_queue(&vfio.release_q, &wait); 966 967 do { 968 device = vfio_group_get_device(group, dev); 969 if (!device) 970 break; 971 972 if (device->ops->request) 973 device->ops->request(device_data, i++); 974 975 vfio_device_put(device); 976 977 if (interrupted) { 978 wait_woken(&wait, TASK_UNINTERRUPTIBLE, HZ * 10); 979 } else { 980 wait_woken(&wait, TASK_INTERRUPTIBLE, HZ * 10); 981 if (signal_pending(current)) { 982 interrupted = true; 983 dev_warn(dev, 984 "Device is currently in use, task" 985 " \"%s\" (%d) " 986 "blocked until device is released", 987 current->comm, task_pid_nr(current)); 988 } 989 } 990 991 } while (1); 992 993 remove_wait_queue(&vfio.release_q, &wait); 994 /* 995 * In order to support multiple devices per group, devices can be 996 * plucked from the group while other devices in the group are still 997 * in use. The container persists with this group and those remaining 998 * devices still attached. If the user creates an isolation violation 999 * by binding this device to another driver while the group is still in 1000 * use, that's their fault. However, in the case of removing the last, 1001 * or potentially the only, device in the group there can be no other 1002 * in-use devices in the group. The user has done their due diligence 1003 * and we should lay no claims to those devices. In order to do that, 1004 * we need to make sure the group is detached from the container. 1005 * Without this stall, we're potentially racing with a user process 1006 * that may attempt to immediately bind this device to another driver. 1007 */ 1008 if (list_empty(&group->device_list)) 1009 wait_event(group->container_q, !group->container); 1010 1011 vfio_group_put(group); 1012 1013 return device_data; 1014} 1015EXPORT_SYMBOL_GPL(vfio_del_group_dev); 1016 1017/** 1018 * VFIO base fd, /dev/vfio/vfio 1019 */ 1020static long vfio_ioctl_check_extension(struct vfio_container *container, 1021 unsigned long arg) 1022{ 1023 struct vfio_iommu_driver *driver; 1024 long ret = 0; 1025 1026 down_read(&container->group_lock); 1027 1028 driver = container->iommu_driver; 1029 1030 switch (arg) { 1031 /* No base extensions yet */ 1032 default: 1033 /* 1034 * If no driver is set, poll all registered drivers for 1035 * extensions and return the first positive result. If 1036 * a driver is already set, further queries will be passed 1037 * only to that driver. 1038 */ 1039 if (!driver) { 1040 mutex_lock(&vfio.iommu_drivers_lock); 1041 list_for_each_entry(driver, &vfio.iommu_drivers_list, 1042 vfio_next) { 1043 1044#ifdef CONFIG_VFIO_NOIOMMU 1045 if (!list_empty(&container->group_list) && 1046 (container->noiommu != 1047 (driver->ops == &vfio_noiommu_ops))) 1048 continue; 1049#endif 1050 1051 if (!try_module_get(driver->ops->owner)) 1052 continue; 1053 1054 ret = driver->ops->ioctl(NULL, 1055 VFIO_CHECK_EXTENSION, 1056 arg); 1057 module_put(driver->ops->owner); 1058 if (ret > 0) 1059 break; 1060 } 1061 mutex_unlock(&vfio.iommu_drivers_lock); 1062 } else 1063 ret = driver->ops->ioctl(container->iommu_data, 1064 VFIO_CHECK_EXTENSION, arg); 1065 } 1066 1067 up_read(&container->group_lock); 1068 1069 return ret; 1070} 1071 1072/* hold write lock on container->group_lock */ 1073static int __vfio_container_attach_groups(struct vfio_container *container, 1074 struct vfio_iommu_driver *driver, 1075 void *data) 1076{ 1077 struct vfio_group *group; 1078 int ret = -ENODEV; 1079 1080 list_for_each_entry(group, &container->group_list, container_next) { 1081 ret = driver->ops->attach_group(data, group->iommu_group); 1082 if (ret) 1083 goto unwind; 1084 } 1085 1086 return ret; 1087 1088unwind: 1089 list_for_each_entry_continue_reverse(group, &container->group_list, 1090 container_next) { 1091 driver->ops->detach_group(data, group->iommu_group); 1092 } 1093 1094 return ret; 1095} 1096 1097static long vfio_ioctl_set_iommu(struct vfio_container *container, 1098 unsigned long arg) 1099{ 1100 struct vfio_iommu_driver *driver; 1101 long ret = -ENODEV; 1102 1103 down_write(&container->group_lock); 1104 1105 /* 1106 * The container is designed to be an unprivileged interface while 1107 * the group can be assigned to specific users. Therefore, only by 1108 * adding a group to a container does the user get the privilege of 1109 * enabling the iommu, which may allocate finite resources. There 1110 * is no unset_iommu, but by removing all the groups from a container, 1111 * the container is deprivileged and returns to an unset state. 1112 */ 1113 if (list_empty(&container->group_list) || container->iommu_driver) { 1114 up_write(&container->group_lock); 1115 return -EINVAL; 1116 } 1117 1118 mutex_lock(&vfio.iommu_drivers_lock); 1119 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 1120 void *data; 1121 1122#ifdef CONFIG_VFIO_NOIOMMU 1123 /* 1124 * Only noiommu containers can use vfio-noiommu and noiommu 1125 * containers can only use vfio-noiommu. 1126 */ 1127 if (container->noiommu != (driver->ops == &vfio_noiommu_ops)) 1128 continue; 1129#endif 1130 1131 if (!try_module_get(driver->ops->owner)) 1132 continue; 1133 1134 /* 1135 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, 1136 * so test which iommu driver reported support for this 1137 * extension and call open on them. We also pass them the 1138 * magic, allowing a single driver to support multiple 1139 * interfaces if they'd like. 1140 */ 1141 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { 1142 module_put(driver->ops->owner); 1143 continue; 1144 } 1145 1146 data = driver->ops->open(arg); 1147 if (IS_ERR(data)) { 1148 ret = PTR_ERR(data); 1149 module_put(driver->ops->owner); 1150 continue; 1151 } 1152 1153 ret = __vfio_container_attach_groups(container, driver, data); 1154 if (ret) { 1155 driver->ops->release(data); 1156 module_put(driver->ops->owner); 1157 continue; 1158 } 1159 1160 container->iommu_driver = driver; 1161 container->iommu_data = data; 1162 break; 1163 } 1164 1165 mutex_unlock(&vfio.iommu_drivers_lock); 1166 up_write(&container->group_lock); 1167 1168 return ret; 1169} 1170 1171static long vfio_fops_unl_ioctl(struct file *filep, 1172 unsigned int cmd, unsigned long arg) 1173{ 1174 struct vfio_container *container = filep->private_data; 1175 struct vfio_iommu_driver *driver; 1176 void *data; 1177 long ret = -EINVAL; 1178 1179 if (!container) 1180 return ret; 1181 1182 switch (cmd) { 1183 case VFIO_GET_API_VERSION: 1184 ret = VFIO_API_VERSION; 1185 break; 1186 case VFIO_CHECK_EXTENSION: 1187 ret = vfio_ioctl_check_extension(container, arg); 1188 break; 1189 case VFIO_SET_IOMMU: 1190 ret = vfio_ioctl_set_iommu(container, arg); 1191 break; 1192 default: 1193 driver = container->iommu_driver; 1194 data = container->iommu_data; 1195 1196 if (driver) /* passthrough all unrecognized ioctls */ 1197 ret = driver->ops->ioctl(data, cmd, arg); 1198 } 1199 1200 return ret; 1201} 1202 1203static int vfio_fops_open(struct inode *inode, struct file *filep) 1204{ 1205 struct vfio_container *container; 1206 1207 container = kzalloc(sizeof(*container), GFP_KERNEL); 1208 if (!container) 1209 return -ENOMEM; 1210 1211 INIT_LIST_HEAD(&container->group_list); 1212 init_rwsem(&container->group_lock); 1213 kref_init(&container->kref); 1214 1215 filep->private_data = container; 1216 1217 return 0; 1218} 1219 1220static int vfio_fops_release(struct inode *inode, struct file *filep) 1221{ 1222 struct vfio_container *container = filep->private_data; 1223 struct vfio_iommu_driver *driver = container->iommu_driver; 1224 1225 if (driver && driver->ops->notify) 1226 driver->ops->notify(container->iommu_data, 1227 VFIO_IOMMU_CONTAINER_CLOSE); 1228 1229 filep->private_data = NULL; 1230 1231 vfio_container_put(container); 1232 1233 return 0; 1234} 1235 1236/* 1237 * Once an iommu driver is set, we optionally pass read/write/mmap 1238 * on to the driver, allowing management interfaces beyond ioctl. 1239 */ 1240static ssize_t vfio_fops_read(struct file *filep, char __user *buf, 1241 size_t count, loff_t *ppos) 1242{ 1243 struct vfio_container *container = filep->private_data; 1244 struct vfio_iommu_driver *driver; 1245 ssize_t ret = -EINVAL; 1246 1247 driver = container->iommu_driver; 1248 if (likely(driver && driver->ops->read)) 1249 ret = driver->ops->read(container->iommu_data, 1250 buf, count, ppos); 1251 1252 return ret; 1253} 1254 1255static ssize_t vfio_fops_write(struct file *filep, const char __user *buf, 1256 size_t count, loff_t *ppos) 1257{ 1258 struct vfio_container *container = filep->private_data; 1259 struct vfio_iommu_driver *driver; 1260 ssize_t ret = -EINVAL; 1261 1262 driver = container->iommu_driver; 1263 if (likely(driver && driver->ops->write)) 1264 ret = driver->ops->write(container->iommu_data, 1265 buf, count, ppos); 1266 1267 return ret; 1268} 1269 1270static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma) 1271{ 1272 struct vfio_container *container = filep->private_data; 1273 struct vfio_iommu_driver *driver; 1274 int ret = -EINVAL; 1275 1276 driver = container->iommu_driver; 1277 if (likely(driver && driver->ops->mmap)) 1278 ret = driver->ops->mmap(container->iommu_data, vma); 1279 1280 return ret; 1281} 1282 1283static const struct file_operations vfio_fops = { 1284 .owner = THIS_MODULE, 1285 .open = vfio_fops_open, 1286 .release = vfio_fops_release, 1287 .read = vfio_fops_read, 1288 .write = vfio_fops_write, 1289 .unlocked_ioctl = vfio_fops_unl_ioctl, 1290 .compat_ioctl = compat_ptr_ioctl, 1291 .mmap = vfio_fops_mmap, 1292}; 1293 1294/** 1295 * VFIO Group fd, /dev/vfio/$GROUP 1296 */ 1297static void __vfio_group_unset_container(struct vfio_group *group) 1298{ 1299 struct vfio_container *container = group->container; 1300 struct vfio_iommu_driver *driver; 1301 1302 down_write(&container->group_lock); 1303 1304 driver = container->iommu_driver; 1305 if (driver) 1306 driver->ops->detach_group(container->iommu_data, 1307 group->iommu_group); 1308 1309 group->container = NULL; 1310 wake_up(&group->container_q); 1311 list_del(&group->container_next); 1312 1313 /* Detaching the last group deprivileges a container, remove iommu */ 1314 if (driver && list_empty(&container->group_list)) { 1315 driver->ops->release(container->iommu_data); 1316 module_put(driver->ops->owner); 1317 container->iommu_driver = NULL; 1318 container->iommu_data = NULL; 1319 } 1320 1321 up_write(&container->group_lock); 1322 1323 vfio_container_put(container); 1324} 1325 1326/* 1327 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or 1328 * if there was no container to unset. Since the ioctl is called on 1329 * the group, we know that still exists, therefore the only valid 1330 * transition here is 1->0. 1331 */ 1332static int vfio_group_unset_container(struct vfio_group *group) 1333{ 1334 int users = atomic_cmpxchg(&group->container_users, 1, 0); 1335 1336 if (!users) 1337 return -EINVAL; 1338 if (users != 1) 1339 return -EBUSY; 1340 1341 __vfio_group_unset_container(group); 1342 1343 return 0; 1344} 1345 1346/* 1347 * When removing container users, anything that removes the last user 1348 * implicitly removes the group from the container. That is, if the 1349 * group file descriptor is closed, as well as any device file descriptors, 1350 * the group is free. 1351 */ 1352static void vfio_group_try_dissolve_container(struct vfio_group *group) 1353{ 1354 if (0 == atomic_dec_if_positive(&group->container_users)) 1355 __vfio_group_unset_container(group); 1356} 1357 1358static int vfio_group_set_container(struct vfio_group *group, int container_fd) 1359{ 1360 struct fd f; 1361 struct vfio_container *container; 1362 struct vfio_iommu_driver *driver; 1363 int ret = 0; 1364 1365 if (atomic_read(&group->container_users)) 1366 return -EINVAL; 1367 1368 if (group->noiommu && !capable(CAP_SYS_RAWIO)) 1369 return -EPERM; 1370 1371 f = fdget(container_fd); 1372 if (!f.file) 1373 return -EBADF; 1374 1375 /* Sanity check, is this really our fd? */ 1376 if (f.file->f_op != &vfio_fops) { 1377 fdput(f); 1378 return -EINVAL; 1379 } 1380 1381 container = f.file->private_data; 1382 WARN_ON(!container); /* fget ensures we don't race vfio_release */ 1383 1384 down_write(&container->group_lock); 1385 1386 /* Real groups and fake groups cannot mix */ 1387 if (!list_empty(&container->group_list) && 1388 container->noiommu != group->noiommu) { 1389 ret = -EPERM; 1390 goto unlock_out; 1391 } 1392 1393 driver = container->iommu_driver; 1394 if (driver) { 1395 ret = driver->ops->attach_group(container->iommu_data, 1396 group->iommu_group); 1397 if (ret) 1398 goto unlock_out; 1399 } 1400 1401 group->container = container; 1402 container->noiommu = group->noiommu; 1403 list_add(&group->container_next, &container->group_list); 1404 1405 /* Get a reference on the container and mark a user within the group */ 1406 vfio_container_get(container); 1407 atomic_inc(&group->container_users); 1408 1409unlock_out: 1410 up_write(&container->group_lock); 1411 fdput(f); 1412 return ret; 1413} 1414 1415static bool vfio_group_viable(struct vfio_group *group) 1416{ 1417 return (iommu_group_for_each_dev(group->iommu_group, 1418 group, vfio_dev_viable) == 0); 1419} 1420 1421static int vfio_group_add_container_user(struct vfio_group *group) 1422{ 1423 if (!atomic_inc_not_zero(&group->container_users)) 1424 return -EINVAL; 1425 1426 if (group->noiommu) { 1427 atomic_dec(&group->container_users); 1428 return -EPERM; 1429 } 1430 if (!group->container->iommu_driver || !vfio_group_viable(group)) { 1431 atomic_dec(&group->container_users); 1432 return -EINVAL; 1433 } 1434 1435 return 0; 1436} 1437 1438static const struct file_operations vfio_device_fops; 1439 1440static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) 1441{ 1442 struct vfio_device *device; 1443 struct file *filep; 1444 int ret; 1445 1446 if (0 == atomic_read(&group->container_users) || 1447 !group->container->iommu_driver || !vfio_group_viable(group)) 1448 return -EINVAL; 1449 1450 if (group->noiommu && !capable(CAP_SYS_RAWIO)) 1451 return -EPERM; 1452 1453 device = vfio_device_get_from_name(group, buf); 1454 if (IS_ERR(device)) 1455 return PTR_ERR(device); 1456 1457 ret = device->ops->open(device->device_data); 1458 if (ret) { 1459 vfio_device_put(device); 1460 return ret; 1461 } 1462 1463 /* 1464 * We can't use anon_inode_getfd() because we need to modify 1465 * the f_mode flags directly to allow more than just ioctls 1466 */ 1467 ret = get_unused_fd_flags(O_CLOEXEC); 1468 if (ret < 0) { 1469 device->ops->release(device->device_data); 1470 vfio_device_put(device); 1471 return ret; 1472 } 1473 1474 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 1475 device, O_RDWR); 1476 if (IS_ERR(filep)) { 1477 put_unused_fd(ret); 1478 ret = PTR_ERR(filep); 1479 device->ops->release(device->device_data); 1480 vfio_device_put(device); 1481 return ret; 1482 } 1483 1484 /* 1485 * TODO: add an anon_inode interface to do this. 1486 * Appears to be missing by lack of need rather than 1487 * explicitly prevented. Now there's need. 1488 */ 1489 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); 1490 1491 atomic_inc(&group->container_users); 1492 1493 fd_install(ret, filep); 1494 1495 if (group->noiommu) 1496 dev_warn(device->dev, "vfio-noiommu device opened by user " 1497 "(%s:%d)\n", current->comm, task_pid_nr(current)); 1498 1499 return ret; 1500} 1501 1502static long vfio_group_fops_unl_ioctl(struct file *filep, 1503 unsigned int cmd, unsigned long arg) 1504{ 1505 struct vfio_group *group = filep->private_data; 1506 long ret = -ENOTTY; 1507 1508 switch (cmd) { 1509 case VFIO_GROUP_GET_STATUS: 1510 { 1511 struct vfio_group_status status; 1512 unsigned long minsz; 1513 1514 minsz = offsetofend(struct vfio_group_status, flags); 1515 1516 if (copy_from_user(&status, (void __user *)arg, minsz)) 1517 return -EFAULT; 1518 1519 if (status.argsz < minsz) 1520 return -EINVAL; 1521 1522 status.flags = 0; 1523 1524 if (vfio_group_viable(group)) 1525 status.flags |= VFIO_GROUP_FLAGS_VIABLE; 1526 1527 if (group->container) 1528 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET; 1529 1530 if (copy_to_user((void __user *)arg, &status, minsz)) 1531 return -EFAULT; 1532 1533 ret = 0; 1534 break; 1535 } 1536 case VFIO_GROUP_SET_CONTAINER: 1537 { 1538 int fd; 1539 1540 if (get_user(fd, (int __user *)arg)) 1541 return -EFAULT; 1542 1543 if (fd < 0) 1544 return -EINVAL; 1545 1546 ret = vfio_group_set_container(group, fd); 1547 break; 1548 } 1549 case VFIO_GROUP_UNSET_CONTAINER: 1550 ret = vfio_group_unset_container(group); 1551 break; 1552 case VFIO_GROUP_GET_DEVICE_FD: 1553 { 1554 char *buf; 1555 1556 buf = strndup_user((const char __user *)arg, PAGE_SIZE); 1557 if (IS_ERR(buf)) 1558 return PTR_ERR(buf); 1559 1560 ret = vfio_group_get_device_fd(group, buf); 1561 kfree(buf); 1562 break; 1563 } 1564 } 1565 1566 return ret; 1567} 1568 1569static int vfio_group_fops_open(struct inode *inode, struct file *filep) 1570{ 1571 struct vfio_group *group; 1572 int opened; 1573 1574 group = vfio_group_get_from_minor(iminor(inode)); 1575 if (!group) 1576 return -ENODEV; 1577 1578 if (group->noiommu && !capable(CAP_SYS_RAWIO)) { 1579 vfio_group_put(group); 1580 return -EPERM; 1581 } 1582 1583 /* Do we need multiple instances of the group open? Seems not. */ 1584 opened = atomic_cmpxchg(&group->opened, 0, 1); 1585 if (opened) { 1586 vfio_group_put(group); 1587 return -EBUSY; 1588 } 1589 1590 /* Is something still in use from a previous open? */ 1591 if (group->container) { 1592 atomic_dec(&group->opened); 1593 vfio_group_put(group); 1594 return -EBUSY; 1595 } 1596 1597 /* Warn if previous user didn't cleanup and re-init to drop them */ 1598 if (WARN_ON(group->notifier.head)) 1599 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); 1600 1601 filep->private_data = group; 1602 1603 return 0; 1604} 1605 1606static int vfio_group_fops_release(struct inode *inode, struct file *filep) 1607{ 1608 struct vfio_group *group = filep->private_data; 1609 1610 filep->private_data = NULL; 1611 1612 vfio_group_try_dissolve_container(group); 1613 1614 atomic_dec(&group->opened); 1615 1616 vfio_group_put(group); 1617 1618 return 0; 1619} 1620 1621static const struct file_operations vfio_group_fops = { 1622 .owner = THIS_MODULE, 1623 .unlocked_ioctl = vfio_group_fops_unl_ioctl, 1624 .compat_ioctl = compat_ptr_ioctl, 1625 .open = vfio_group_fops_open, 1626 .release = vfio_group_fops_release, 1627}; 1628 1629/** 1630 * VFIO Device fd 1631 */ 1632static int vfio_device_fops_release(struct inode *inode, struct file *filep) 1633{ 1634 struct vfio_device *device = filep->private_data; 1635 1636 device->ops->release(device->device_data); 1637 1638 vfio_group_try_dissolve_container(device->group); 1639 1640 vfio_device_put(device); 1641 1642 return 0; 1643} 1644 1645static long vfio_device_fops_unl_ioctl(struct file *filep, 1646 unsigned int cmd, unsigned long arg) 1647{ 1648 struct vfio_device *device = filep->private_data; 1649 1650 if (unlikely(!device->ops->ioctl)) 1651 return -EINVAL; 1652 1653 return device->ops->ioctl(device->device_data, cmd, arg); 1654} 1655 1656static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 1657 size_t count, loff_t *ppos) 1658{ 1659 struct vfio_device *device = filep->private_data; 1660 1661 if (unlikely(!device->ops->read)) 1662 return -EINVAL; 1663 1664 return device->ops->read(device->device_data, buf, count, ppos); 1665} 1666 1667static ssize_t vfio_device_fops_write(struct file *filep, 1668 const char __user *buf, 1669 size_t count, loff_t *ppos) 1670{ 1671 struct vfio_device *device = filep->private_data; 1672 1673 if (unlikely(!device->ops->write)) 1674 return -EINVAL; 1675 1676 return device->ops->write(device->device_data, buf, count, ppos); 1677} 1678 1679static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 1680{ 1681 struct vfio_device *device = filep->private_data; 1682 1683 if (unlikely(!device->ops->mmap)) 1684 return -EINVAL; 1685 1686 return device->ops->mmap(device->device_data, vma); 1687} 1688 1689static const struct file_operations vfio_device_fops = { 1690 .owner = THIS_MODULE, 1691 .release = vfio_device_fops_release, 1692 .read = vfio_device_fops_read, 1693 .write = vfio_device_fops_write, 1694 .unlocked_ioctl = vfio_device_fops_unl_ioctl, 1695 .compat_ioctl = compat_ptr_ioctl, 1696 .mmap = vfio_device_fops_mmap, 1697}; 1698 1699/** 1700 * External user API, exported by symbols to be linked dynamically. 1701 * 1702 * The protocol includes: 1703 * 1. do normal VFIO init operation: 1704 * - opening a new container; 1705 * - attaching group(s) to it; 1706 * - setting an IOMMU driver for a container. 1707 * When IOMMU is set for a container, all groups in it are 1708 * considered ready to use by an external user. 1709 * 1710 * 2. User space passes a group fd to an external user. 1711 * The external user calls vfio_group_get_external_user() 1712 * to verify that: 1713 * - the group is initialized; 1714 * - IOMMU is set for it. 1715 * If both checks passed, vfio_group_get_external_user() 1716 * increments the container user counter to prevent 1717 * the VFIO group from disposal before KVM exits. 1718 * 1719 * 3. The external user calls vfio_external_user_iommu_id() 1720 * to know an IOMMU ID. 1721 * 1722 * 4. When the external KVM finishes, it calls 1723 * vfio_group_put_external_user() to release the VFIO group. 1724 * This call decrements the container user counter. 1725 */ 1726struct vfio_group *vfio_group_get_external_user(struct file *filep) 1727{ 1728 struct vfio_group *group = filep->private_data; 1729 int ret; 1730 1731 if (filep->f_op != &vfio_group_fops) 1732 return ERR_PTR(-EINVAL); 1733 1734 ret = vfio_group_add_container_user(group); 1735 if (ret) 1736 return ERR_PTR(ret); 1737 1738 vfio_group_get(group); 1739 1740 return group; 1741} 1742EXPORT_SYMBOL_GPL(vfio_group_get_external_user); 1743 1744/** 1745 * External user API, exported by symbols to be linked dynamically. 1746 * The external user passes in a device pointer 1747 * to verify that: 1748 * - A VFIO group is assiciated with the device; 1749 * - IOMMU is set for the group. 1750 * If both checks passed, vfio_group_get_external_user_from_dev() 1751 * increments the container user counter to prevent the VFIO group 1752 * from disposal before external user exits and returns the pointer 1753 * to the VFIO group. 1754 * 1755 * When the external user finishes using the VFIO group, it calls 1756 * vfio_group_put_external_user() to release the VFIO group and 1757 * decrement the container user counter. 1758 * 1759 * @dev [in] : device 1760 * Return error PTR or pointer to VFIO group. 1761 */ 1762 1763struct vfio_group *vfio_group_get_external_user_from_dev(struct device *dev) 1764{ 1765 struct vfio_group *group; 1766 int ret; 1767 1768 group = vfio_group_get_from_dev(dev); 1769 if (!group) 1770 return ERR_PTR(-ENODEV); 1771 1772 ret = vfio_group_add_container_user(group); 1773 if (ret) { 1774 vfio_group_put(group); 1775 return ERR_PTR(ret); 1776 } 1777 1778 return group; 1779} 1780EXPORT_SYMBOL_GPL(vfio_group_get_external_user_from_dev); 1781 1782void vfio_group_put_external_user(struct vfio_group *group) 1783{ 1784 vfio_group_try_dissolve_container(group); 1785 vfio_group_put(group); 1786} 1787EXPORT_SYMBOL_GPL(vfio_group_put_external_user); 1788 1789bool vfio_external_group_match_file(struct vfio_group *test_group, 1790 struct file *filep) 1791{ 1792 struct vfio_group *group = filep->private_data; 1793 1794 return (filep->f_op == &vfio_group_fops) && (group == test_group); 1795} 1796EXPORT_SYMBOL_GPL(vfio_external_group_match_file); 1797 1798int vfio_external_user_iommu_id(struct vfio_group *group) 1799{ 1800 return iommu_group_id(group->iommu_group); 1801} 1802EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id); 1803 1804long vfio_external_check_extension(struct vfio_group *group, unsigned long arg) 1805{ 1806 return vfio_ioctl_check_extension(group->container, arg); 1807} 1808EXPORT_SYMBOL_GPL(vfio_external_check_extension); 1809 1810/** 1811 * Sub-module support 1812 */ 1813/* 1814 * Helper for managing a buffer of info chain capabilities, allocate or 1815 * reallocate a buffer with additional @size, filling in @id and @version 1816 * of the capability. A pointer to the new capability is returned. 1817 * 1818 * NB. The chain is based at the head of the buffer, so new entries are 1819 * added to the tail, vfio_info_cap_shift() should be called to fixup the 1820 * next offsets prior to copying to the user buffer. 1821 */ 1822struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, 1823 size_t size, u16 id, u16 version) 1824{ 1825 void *buf; 1826 struct vfio_info_cap_header *header, *tmp; 1827 1828 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); 1829 if (!buf) { 1830 kfree(caps->buf); 1831 caps->size = 0; 1832 return ERR_PTR(-ENOMEM); 1833 } 1834 1835 caps->buf = buf; 1836 header = buf + caps->size; 1837 1838 /* Eventually copied to user buffer, zero */ 1839 memset(header, 0, size); 1840 1841 header->id = id; 1842 header->version = version; 1843 1844 /* Add to the end of the capability chain */ 1845 for (tmp = buf; tmp->next; tmp = buf + tmp->next) 1846 ; /* nothing */ 1847 1848 tmp->next = caps->size; 1849 caps->size += size; 1850 1851 return header; 1852} 1853EXPORT_SYMBOL_GPL(vfio_info_cap_add); 1854 1855void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) 1856{ 1857 struct vfio_info_cap_header *tmp; 1858 void *buf = (void *)caps->buf; 1859 1860 for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) 1861 tmp->next += offset; 1862} 1863EXPORT_SYMBOL(vfio_info_cap_shift); 1864 1865int vfio_info_add_capability(struct vfio_info_cap *caps, 1866 struct vfio_info_cap_header *cap, size_t size) 1867{ 1868 struct vfio_info_cap_header *header; 1869 1870 header = vfio_info_cap_add(caps, size, cap->id, cap->version); 1871 if (IS_ERR(header)) 1872 return PTR_ERR(header); 1873 1874 memcpy(header + 1, cap + 1, size - sizeof(*header)); 1875 1876 return 0; 1877} 1878EXPORT_SYMBOL(vfio_info_add_capability); 1879 1880int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, 1881 int max_irq_type, size_t *data_size) 1882{ 1883 unsigned long minsz; 1884 size_t size; 1885 1886 minsz = offsetofend(struct vfio_irq_set, count); 1887 1888 if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || 1889 (hdr->count >= (U32_MAX - hdr->start)) || 1890 (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | 1891 VFIO_IRQ_SET_ACTION_TYPE_MASK))) 1892 return -EINVAL; 1893 1894 if (data_size) 1895 *data_size = 0; 1896 1897 if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) 1898 return -EINVAL; 1899 1900 switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { 1901 case VFIO_IRQ_SET_DATA_NONE: 1902 size = 0; 1903 break; 1904 case VFIO_IRQ_SET_DATA_BOOL: 1905 size = sizeof(uint8_t); 1906 break; 1907 case VFIO_IRQ_SET_DATA_EVENTFD: 1908 size = sizeof(int32_t); 1909 break; 1910 default: 1911 return -EINVAL; 1912 } 1913 1914 if (size) { 1915 if (hdr->argsz - minsz < hdr->count * size) 1916 return -EINVAL; 1917 1918 if (!data_size) 1919 return -EINVAL; 1920 1921 *data_size = hdr->count * size; 1922 } 1923 1924 return 0; 1925} 1926EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); 1927 1928/* 1929 * Pin a set of guest PFNs and return their associated host PFNs for local 1930 * domain only. 1931 * @dev [in] : device 1932 * @user_pfn [in]: array of user/guest PFNs to be pinned. 1933 * @npage [in] : count of elements in user_pfn array. This count should not 1934 * be greater VFIO_PIN_PAGES_MAX_ENTRIES. 1935 * @prot [in] : protection flags 1936 * @phys_pfn[out]: array of host PFNs 1937 * Return error or number of pages pinned. 1938 */ 1939int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, 1940 int prot, unsigned long *phys_pfn) 1941{ 1942 struct vfio_container *container; 1943 struct vfio_group *group; 1944 struct vfio_iommu_driver *driver; 1945 int ret; 1946 1947 if (!dev || !user_pfn || !phys_pfn || !npage) 1948 return -EINVAL; 1949 1950 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 1951 return -E2BIG; 1952 1953 group = vfio_group_get_from_dev(dev); 1954 if (!group) 1955 return -ENODEV; 1956 1957 if (group->dev_counter > 1) { 1958 ret = -EINVAL; 1959 goto err_pin_pages; 1960 } 1961 1962 ret = vfio_group_add_container_user(group); 1963 if (ret) 1964 goto err_pin_pages; 1965 1966 container = group->container; 1967 driver = container->iommu_driver; 1968 if (likely(driver && driver->ops->pin_pages)) 1969 ret = driver->ops->pin_pages(container->iommu_data, 1970 group->iommu_group, user_pfn, 1971 npage, prot, phys_pfn); 1972 else 1973 ret = -ENOTTY; 1974 1975 vfio_group_try_dissolve_container(group); 1976 1977err_pin_pages: 1978 vfio_group_put(group); 1979 return ret; 1980} 1981EXPORT_SYMBOL(vfio_pin_pages); 1982 1983/* 1984 * Unpin set of host PFNs for local domain only. 1985 * @dev [in] : device 1986 * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest 1987 * PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 1988 * @npage [in] : count of elements in user_pfn array. This count should not 1989 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 1990 * Return error or number of pages unpinned. 1991 */ 1992int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage) 1993{ 1994 struct vfio_container *container; 1995 struct vfio_group *group; 1996 struct vfio_iommu_driver *driver; 1997 int ret; 1998 1999 if (!dev || !user_pfn || !npage) 2000 return -EINVAL; 2001 2002 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 2003 return -E2BIG; 2004 2005 group = vfio_group_get_from_dev(dev); 2006 if (!group) 2007 return -ENODEV; 2008 2009 ret = vfio_group_add_container_user(group); 2010 if (ret) 2011 goto err_unpin_pages; 2012 2013 container = group->container; 2014 driver = container->iommu_driver; 2015 if (likely(driver && driver->ops->unpin_pages)) 2016 ret = driver->ops->unpin_pages(container->iommu_data, user_pfn, 2017 npage); 2018 else 2019 ret = -ENOTTY; 2020 2021 vfio_group_try_dissolve_container(group); 2022 2023err_unpin_pages: 2024 vfio_group_put(group); 2025 return ret; 2026} 2027EXPORT_SYMBOL(vfio_unpin_pages); 2028 2029/* 2030 * Pin a set of guest IOVA PFNs and return their associated host PFNs for a 2031 * VFIO group. 2032 * 2033 * The caller needs to call vfio_group_get_external_user() or 2034 * vfio_group_get_external_user_from_dev() prior to calling this interface, 2035 * so as to prevent the VFIO group from disposal in the middle of the call. 2036 * But it can keep the reference to the VFIO group for several calls into 2037 * this interface. 2038 * After finishing using of the VFIO group, the caller needs to release the 2039 * VFIO group by calling vfio_group_put_external_user(). 2040 * 2041 * @group [in] : VFIO group 2042 * @user_iova_pfn [in] : array of user/guest IOVA PFNs to be pinned. 2043 * @npage [in] : count of elements in user_iova_pfn array. 2044 * This count should not be greater 2045 * VFIO_PIN_PAGES_MAX_ENTRIES. 2046 * @prot [in] : protection flags 2047 * @phys_pfn [out] : array of host PFNs 2048 * Return error or number of pages pinned. 2049 */ 2050int vfio_group_pin_pages(struct vfio_group *group, 2051 unsigned long *user_iova_pfn, int npage, 2052 int prot, unsigned long *phys_pfn) 2053{ 2054 struct vfio_container *container; 2055 struct vfio_iommu_driver *driver; 2056 int ret; 2057 2058 if (!group || !user_iova_pfn || !phys_pfn || !npage) 2059 return -EINVAL; 2060 2061 if (group->dev_counter > 1) 2062 return -EINVAL; 2063 2064 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 2065 return -E2BIG; 2066 2067 container = group->container; 2068 driver = container->iommu_driver; 2069 if (likely(driver && driver->ops->pin_pages)) 2070 ret = driver->ops->pin_pages(container->iommu_data, 2071 group->iommu_group, user_iova_pfn, 2072 npage, prot, phys_pfn); 2073 else 2074 ret = -ENOTTY; 2075 2076 return ret; 2077} 2078EXPORT_SYMBOL(vfio_group_pin_pages); 2079 2080/* 2081 * Unpin a set of guest IOVA PFNs for a VFIO group. 2082 * 2083 * The caller needs to call vfio_group_get_external_user() or 2084 * vfio_group_get_external_user_from_dev() prior to calling this interface, 2085 * so as to prevent the VFIO group from disposal in the middle of the call. 2086 * But it can keep the reference to the VFIO group for several calls into 2087 * this interface. 2088 * After finishing using of the VFIO group, the caller needs to release the 2089 * VFIO group by calling vfio_group_put_external_user(). 2090 * 2091 * @group [in] : vfio group 2092 * @user_iova_pfn [in] : array of user/guest IOVA PFNs to be unpinned. 2093 * @npage [in] : count of elements in user_iova_pfn array. 2094 * This count should not be greater than 2095 * VFIO_PIN_PAGES_MAX_ENTRIES. 2096 * Return error or number of pages unpinned. 2097 */ 2098int vfio_group_unpin_pages(struct vfio_group *group, 2099 unsigned long *user_iova_pfn, int npage) 2100{ 2101 struct vfio_container *container; 2102 struct vfio_iommu_driver *driver; 2103 int ret; 2104 2105 if (!group || !user_iova_pfn || !npage) 2106 return -EINVAL; 2107 2108 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 2109 return -E2BIG; 2110 2111 container = group->container; 2112 driver = container->iommu_driver; 2113 if (likely(driver && driver->ops->unpin_pages)) 2114 ret = driver->ops->unpin_pages(container->iommu_data, 2115 user_iova_pfn, npage); 2116 else 2117 ret = -ENOTTY; 2118 2119 return ret; 2120} 2121EXPORT_SYMBOL(vfio_group_unpin_pages); 2122 2123 2124/* 2125 * This interface allows the CPUs to perform some sort of virtual DMA on 2126 * behalf of the device. 2127 * 2128 * CPUs read/write from/into a range of IOVAs pointing to user space memory 2129 * into/from a kernel buffer. 2130 * 2131 * As the read/write of user space memory is conducted via the CPUs and is 2132 * not a real device DMA, it is not necessary to pin the user space memory. 2133 * 2134 * The caller needs to call vfio_group_get_external_user() or 2135 * vfio_group_get_external_user_from_dev() prior to calling this interface, 2136 * so as to prevent the VFIO group from disposal in the middle of the call. 2137 * But it can keep the reference to the VFIO group for several calls into 2138 * this interface. 2139 * After finishing using of the VFIO group, the caller needs to release the 2140 * VFIO group by calling vfio_group_put_external_user(). 2141 * 2142 * @group [in] : VFIO group 2143 * @user_iova [in] : base IOVA of a user space buffer 2144 * @data [in] : pointer to kernel buffer 2145 * @len [in] : kernel buffer length 2146 * @write : indicate read or write 2147 * Return error code on failure or 0 on success. 2148 */ 2149int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova, 2150 void *data, size_t len, bool write) 2151{ 2152 struct vfio_container *container; 2153 struct vfio_iommu_driver *driver; 2154 int ret = 0; 2155 2156 if (!group || !data || len <= 0) 2157 return -EINVAL; 2158 2159 container = group->container; 2160 driver = container->iommu_driver; 2161 2162 if (likely(driver && driver->ops->dma_rw)) 2163 ret = driver->ops->dma_rw(container->iommu_data, 2164 user_iova, data, len, write); 2165 else 2166 ret = -ENOTTY; 2167 2168 return ret; 2169} 2170EXPORT_SYMBOL(vfio_dma_rw); 2171 2172static int vfio_register_iommu_notifier(struct vfio_group *group, 2173 unsigned long *events, 2174 struct notifier_block *nb) 2175{ 2176 struct vfio_container *container; 2177 struct vfio_iommu_driver *driver; 2178 int ret; 2179 2180 ret = vfio_group_add_container_user(group); 2181 if (ret) 2182 return -EINVAL; 2183 2184 container = group->container; 2185 driver = container->iommu_driver; 2186 if (likely(driver && driver->ops->register_notifier)) 2187 ret = driver->ops->register_notifier(container->iommu_data, 2188 events, nb); 2189 else 2190 ret = -ENOTTY; 2191 2192 vfio_group_try_dissolve_container(group); 2193 2194 return ret; 2195} 2196 2197static int vfio_unregister_iommu_notifier(struct vfio_group *group, 2198 struct notifier_block *nb) 2199{ 2200 struct vfio_container *container; 2201 struct vfio_iommu_driver *driver; 2202 int ret; 2203 2204 ret = vfio_group_add_container_user(group); 2205 if (ret) 2206 return -EINVAL; 2207 2208 container = group->container; 2209 driver = container->iommu_driver; 2210 if (likely(driver && driver->ops->unregister_notifier)) 2211 ret = driver->ops->unregister_notifier(container->iommu_data, 2212 nb); 2213 else 2214 ret = -ENOTTY; 2215 2216 vfio_group_try_dissolve_container(group); 2217 2218 return ret; 2219} 2220 2221void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm) 2222{ 2223 group->kvm = kvm; 2224 blocking_notifier_call_chain(&group->notifier, 2225 VFIO_GROUP_NOTIFY_SET_KVM, kvm); 2226} 2227EXPORT_SYMBOL_GPL(vfio_group_set_kvm); 2228 2229static int vfio_register_group_notifier(struct vfio_group *group, 2230 unsigned long *events, 2231 struct notifier_block *nb) 2232{ 2233 int ret; 2234 bool set_kvm = false; 2235 2236 if (*events & VFIO_GROUP_NOTIFY_SET_KVM) 2237 set_kvm = true; 2238 2239 /* clear known events */ 2240 *events &= ~VFIO_GROUP_NOTIFY_SET_KVM; 2241 2242 /* refuse to continue if still events remaining */ 2243 if (*events) 2244 return -EINVAL; 2245 2246 ret = vfio_group_add_container_user(group); 2247 if (ret) 2248 return -EINVAL; 2249 2250 ret = blocking_notifier_chain_register(&group->notifier, nb); 2251 2252 /* 2253 * The attaching of kvm and vfio_group might already happen, so 2254 * here we replay once upon registration. 2255 */ 2256 if (!ret && set_kvm && group->kvm) 2257 blocking_notifier_call_chain(&group->notifier, 2258 VFIO_GROUP_NOTIFY_SET_KVM, group->kvm); 2259 2260 vfio_group_try_dissolve_container(group); 2261 2262 return ret; 2263} 2264 2265static int vfio_unregister_group_notifier(struct vfio_group *group, 2266 struct notifier_block *nb) 2267{ 2268 int ret; 2269 2270 ret = vfio_group_add_container_user(group); 2271 if (ret) 2272 return -EINVAL; 2273 2274 ret = blocking_notifier_chain_unregister(&group->notifier, nb); 2275 2276 vfio_group_try_dissolve_container(group); 2277 2278 return ret; 2279} 2280 2281int vfio_register_notifier(struct device *dev, enum vfio_notify_type type, 2282 unsigned long *events, struct notifier_block *nb) 2283{ 2284 struct vfio_group *group; 2285 int ret; 2286 2287 if (!dev || !nb || !events || (*events == 0)) 2288 return -EINVAL; 2289 2290 group = vfio_group_get_from_dev(dev); 2291 if (!group) 2292 return -ENODEV; 2293 2294 switch (type) { 2295 case VFIO_IOMMU_NOTIFY: 2296 ret = vfio_register_iommu_notifier(group, events, nb); 2297 break; 2298 case VFIO_GROUP_NOTIFY: 2299 ret = vfio_register_group_notifier(group, events, nb); 2300 break; 2301 default: 2302 ret = -EINVAL; 2303 } 2304 2305 vfio_group_put(group); 2306 return ret; 2307} 2308EXPORT_SYMBOL(vfio_register_notifier); 2309 2310int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type, 2311 struct notifier_block *nb) 2312{ 2313 struct vfio_group *group; 2314 int ret; 2315 2316 if (!dev || !nb) 2317 return -EINVAL; 2318 2319 group = vfio_group_get_from_dev(dev); 2320 if (!group) 2321 return -ENODEV; 2322 2323 switch (type) { 2324 case VFIO_IOMMU_NOTIFY: 2325 ret = vfio_unregister_iommu_notifier(group, nb); 2326 break; 2327 case VFIO_GROUP_NOTIFY: 2328 ret = vfio_unregister_group_notifier(group, nb); 2329 break; 2330 default: 2331 ret = -EINVAL; 2332 } 2333 2334 vfio_group_put(group); 2335 return ret; 2336} 2337EXPORT_SYMBOL(vfio_unregister_notifier); 2338 2339struct iommu_domain *vfio_group_iommu_domain(struct vfio_group *group) 2340{ 2341 struct vfio_container *container; 2342 struct vfio_iommu_driver *driver; 2343 2344 if (!group) 2345 return ERR_PTR(-EINVAL); 2346 2347 container = group->container; 2348 driver = container->iommu_driver; 2349 if (likely(driver && driver->ops->group_iommu_domain)) 2350 return driver->ops->group_iommu_domain(container->iommu_data, 2351 group->iommu_group); 2352 2353 return ERR_PTR(-ENOTTY); 2354} 2355EXPORT_SYMBOL_GPL(vfio_group_iommu_domain); 2356 2357/** 2358 * Module/class support 2359 */ 2360static char *vfio_devnode(struct device *dev, umode_t *mode) 2361{ 2362 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); 2363} 2364 2365static struct miscdevice vfio_dev = { 2366 .minor = VFIO_MINOR, 2367 .name = "vfio", 2368 .fops = &vfio_fops, 2369 .nodename = "vfio/vfio", 2370 .mode = S_IRUGO | S_IWUGO, 2371}; 2372 2373static int __init vfio_init(void) 2374{ 2375 int ret; 2376 2377 idr_init(&vfio.group_idr); 2378 mutex_init(&vfio.group_lock); 2379 mutex_init(&vfio.iommu_drivers_lock); 2380 INIT_LIST_HEAD(&vfio.group_list); 2381 INIT_LIST_HEAD(&vfio.iommu_drivers_list); 2382 init_waitqueue_head(&vfio.release_q); 2383 2384 ret = misc_register(&vfio_dev); 2385 if (ret) { 2386 pr_err("vfio: misc device register failed\n"); 2387 return ret; 2388 } 2389 2390 /* /dev/vfio/$GROUP */ 2391 vfio.class = class_create(THIS_MODULE, "vfio"); 2392 if (IS_ERR(vfio.class)) { 2393 ret = PTR_ERR(vfio.class); 2394 goto err_class; 2395 } 2396 2397 vfio.class->devnode = vfio_devnode; 2398 2399 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); 2400 if (ret) 2401 goto err_alloc_chrdev; 2402 2403 cdev_init(&vfio.group_cdev, &vfio_group_fops); 2404 ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK + 1); 2405 if (ret) 2406 goto err_cdev_add; 2407 2408 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 2409 2410#ifdef CONFIG_VFIO_NOIOMMU 2411 vfio_register_iommu_driver(&vfio_noiommu_ops); 2412#endif 2413 return 0; 2414 2415err_cdev_add: 2416 unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 2417err_alloc_chrdev: 2418 class_destroy(vfio.class); 2419 vfio.class = NULL; 2420err_class: 2421 misc_deregister(&vfio_dev); 2422 return ret; 2423} 2424 2425static void __exit vfio_cleanup(void) 2426{ 2427 WARN_ON(!list_empty(&vfio.group_list)); 2428 2429#ifdef CONFIG_VFIO_NOIOMMU 2430 vfio_unregister_iommu_driver(&vfio_noiommu_ops); 2431#endif 2432 idr_destroy(&vfio.group_idr); 2433 cdev_del(&vfio.group_cdev); 2434 unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 2435 class_destroy(vfio.class); 2436 vfio.class = NULL; 2437 misc_deregister(&vfio_dev); 2438} 2439 2440module_init(vfio_init); 2441module_exit(vfio_cleanup); 2442 2443MODULE_VERSION(DRIVER_VERSION); 2444MODULE_LICENSE("GPL v2"); 2445MODULE_AUTHOR(DRIVER_AUTHOR); 2446MODULE_DESCRIPTION(DRIVER_DESC); 2447MODULE_ALIAS_MISCDEV(VFIO_MINOR); 2448MODULE_ALIAS("devname:vfio/vfio"); 2449MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");