drivers/vfio/vfio.c at v5.5

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / drivers / vfio / vfio.c
at v5.5 2223 lines 57 kB view raw
wrap content
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * VFIO core
   4 *
   5 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
   6 *     Author: Alex Williamson <alex.williamson@redhat.com>
   7 *
   8 * Derived from original vfio:
   9 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
  10 * Author: Tom Lyon, pugs@cisco.com
  11 */
  12
  13#include <linux/cdev.h>
  14#include <linux/compat.h>
  15#include <linux/device.h>
  16#include <linux/file.h>
  17#include <linux/anon_inodes.h>
  18#include <linux/fs.h>
  19#include <linux/idr.h>
  20#include <linux/iommu.h>
  21#include <linux/list.h>
  22#include <linux/miscdevice.h>
  23#include <linux/module.h>
  24#include <linux/mutex.h>
  25#include <linux/pci.h>
  26#include <linux/rwsem.h>
  27#include <linux/sched.h>
  28#include <linux/slab.h>
  29#include <linux/stat.h>
  30#include <linux/string.h>
  31#include <linux/uaccess.h>
  32#include <linux/vfio.h>
  33#include <linux/wait.h>
  34#include <linux/sched/signal.h>
  35
  36#define DRIVER_VERSION	"0.3"
  37#define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
  38#define DRIVER_DESC	"VFIO - User Level meta-driver"
  39
  40static struct vfio {
  41	struct class			*class;
  42	struct list_head		iommu_drivers_list;
  43	struct mutex			iommu_drivers_lock;
  44	struct list_head		group_list;
  45	struct idr			group_idr;
  46	struct mutex			group_lock;
  47	struct cdev			group_cdev;
  48	dev_t				group_devt;
  49	wait_queue_head_t		release_q;
  50} vfio;
  51
  52struct vfio_iommu_driver {
  53	const struct vfio_iommu_driver_ops	*ops;
  54	struct list_head			vfio_next;
  55};
  56
  57struct vfio_container {
  58	struct kref			kref;
  59	struct list_head		group_list;
  60	struct rw_semaphore		group_lock;
  61	struct vfio_iommu_driver	*iommu_driver;
  62	void				*iommu_data;
  63	bool				noiommu;
  64};
  65
  66struct vfio_unbound_dev {
  67	struct device			*dev;
  68	struct list_head		unbound_next;
  69};
  70
  71struct vfio_group {
  72	struct kref			kref;
  73	int				minor;
  74	atomic_t			container_users;
  75	struct iommu_group		*iommu_group;
  76	struct vfio_container		*container;
  77	struct list_head		device_list;
  78	struct mutex			device_lock;
  79	struct device			*dev;
  80	struct notifier_block		nb;
  81	struct list_head		vfio_next;
  82	struct list_head		container_next;
  83	struct list_head		unbound_list;
  84	struct mutex			unbound_lock;
  85	atomic_t			opened;
  86	wait_queue_head_t		container_q;
  87	bool				noiommu;
  88	struct kvm			*kvm;
  89	struct blocking_notifier_head	notifier;
  90};
  91
  92struct vfio_device {
  93	struct kref			kref;
  94	struct device			*dev;
  95	const struct vfio_device_ops	*ops;
  96	struct vfio_group		*group;
  97	struct list_head		group_next;
  98	void				*device_data;
  99};
 100
 101#ifdef CONFIG_VFIO_NOIOMMU
 102static bool noiommu __read_mostly;
 103module_param_named(enable_unsafe_noiommu_mode,
 104		   noiommu, bool, S_IRUGO | S_IWUSR);
 105MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
 106#endif
 107
 108/*
 109 * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
 110 * and remove functions, any use cases other than acquiring the first
 111 * reference for the purpose of calling vfio_add_group_dev() or removing
 112 * that symmetric reference after vfio_del_group_dev() should use the raw
 113 * iommu_group_{get,put} functions.  In particular, vfio_iommu_group_put()
 114 * removes the device from the dummy group and cannot be nested.
 115 */
 116struct iommu_group *vfio_iommu_group_get(struct device *dev)
 117{
 118	struct iommu_group *group;
 119	int __maybe_unused ret;
 120
 121	group = iommu_group_get(dev);
 122
 123#ifdef CONFIG_VFIO_NOIOMMU
 124	/*
 125	 * With noiommu enabled, an IOMMU group will be created for a device
 126	 * that doesn't already have one and doesn't have an iommu_ops on their
 127	 * bus.  We set iommudata simply to be able to identify these groups
 128	 * as special use and for reclamation later.
 129	 */
 130	if (group || !noiommu || iommu_present(dev->bus))
 131		return group;
 132
 133	group = iommu_group_alloc();
 134	if (IS_ERR(group))
 135		return NULL;
 136
 137	iommu_group_set_name(group, "vfio-noiommu");
 138	iommu_group_set_iommudata(group, &noiommu, NULL);
 139	ret = iommu_group_add_device(group, dev);
 140	if (ret) {
 141		iommu_group_put(group);
 142		return NULL;
 143	}
 144
 145	/*
 146	 * Where to taint?  At this point we've added an IOMMU group for a
 147	 * device that is not backed by iommu_ops, therefore any iommu_
 148	 * callback using iommu_ops can legitimately Oops.  So, while we may
 149	 * be about to give a DMA capable device to a user without IOMMU
 150	 * protection, which is clearly taint-worthy, let's go ahead and do
 151	 * it here.
 152	 */
 153	add_taint(TAINT_USER, LOCKDEP_STILL_OK);
 154	dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
 155#endif
 156
 157	return group;
 158}
 159EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
 160
 161void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
 162{
 163#ifdef CONFIG_VFIO_NOIOMMU
 164	if (iommu_group_get_iommudata(group) == &noiommu)
 165		iommu_group_remove_device(dev);
 166#endif
 167
 168	iommu_group_put(group);
 169}
 170EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
 171
 172#ifdef CONFIG_VFIO_NOIOMMU
 173static void *vfio_noiommu_open(unsigned long arg)
 174{
 175	if (arg != VFIO_NOIOMMU_IOMMU)
 176		return ERR_PTR(-EINVAL);
 177	if (!capable(CAP_SYS_RAWIO))
 178		return ERR_PTR(-EPERM);
 179
 180	return NULL;
 181}
 182
 183static void vfio_noiommu_release(void *iommu_data)
 184{
 185}
 186
 187static long vfio_noiommu_ioctl(void *iommu_data,
 188			       unsigned int cmd, unsigned long arg)
 189{
 190	if (cmd == VFIO_CHECK_EXTENSION)
 191		return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
 192
 193	return -ENOTTY;
 194}
 195
 196static int vfio_noiommu_attach_group(void *iommu_data,
 197				     struct iommu_group *iommu_group)
 198{
 199	return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
 200}
 201
 202static void vfio_noiommu_detach_group(void *iommu_data,
 203				      struct iommu_group *iommu_group)
 204{
 205}
 206
 207static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
 208	.name = "vfio-noiommu",
 209	.owner = THIS_MODULE,
 210	.open = vfio_noiommu_open,
 211	.release = vfio_noiommu_release,
 212	.ioctl = vfio_noiommu_ioctl,
 213	.attach_group = vfio_noiommu_attach_group,
 214	.detach_group = vfio_noiommu_detach_group,
 215};
 216#endif
 217
 218
 219/**
 220 * IOMMU driver registration
 221 */
 222int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
 223{
 224	struct vfio_iommu_driver *driver, *tmp;
 225
 226	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
 227	if (!driver)
 228		return -ENOMEM;
 229
 230	driver->ops = ops;
 231
 232	mutex_lock(&vfio.iommu_drivers_lock);
 233
 234	/* Check for duplicates */
 235	list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
 236		if (tmp->ops == ops) {
 237			mutex_unlock(&vfio.iommu_drivers_lock);
 238			kfree(driver);
 239			return -EINVAL;
 240		}
 241	}
 242
 243	list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
 244
 245	mutex_unlock(&vfio.iommu_drivers_lock);
 246
 247	return 0;
 248}
 249EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
 250
 251void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
 252{
 253	struct vfio_iommu_driver *driver;
 254
 255	mutex_lock(&vfio.iommu_drivers_lock);
 256	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
 257		if (driver->ops == ops) {
 258			list_del(&driver->vfio_next);
 259			mutex_unlock(&vfio.iommu_drivers_lock);
 260			kfree(driver);
 261			return;
 262		}
 263	}
 264	mutex_unlock(&vfio.iommu_drivers_lock);
 265}
 266EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
 267
 268/**
 269 * Group minor allocation/free - both called with vfio.group_lock held
 270 */
 271static int vfio_alloc_group_minor(struct vfio_group *group)
 272{
 273	return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL);
 274}
 275
 276static void vfio_free_group_minor(int minor)
 277{
 278	idr_remove(&vfio.group_idr, minor);
 279}
 280
 281static int vfio_iommu_group_notifier(struct notifier_block *nb,
 282				     unsigned long action, void *data);
 283static void vfio_group_get(struct vfio_group *group);
 284
 285/**
 286 * Container objects - containers are created when /dev/vfio/vfio is
 287 * opened, but their lifecycle extends until the last user is done, so
 288 * it's freed via kref.  Must support container/group/device being
 289 * closed in any order.
 290 */
 291static void vfio_container_get(struct vfio_container *container)
 292{
 293	kref_get(&container->kref);
 294}
 295
 296static void vfio_container_release(struct kref *kref)
 297{
 298	struct vfio_container *container;
 299	container = container_of(kref, struct vfio_container, kref);
 300
 301	kfree(container);
 302}
 303
 304static void vfio_container_put(struct vfio_container *container)
 305{
 306	kref_put(&container->kref, vfio_container_release);
 307}
 308
 309static void vfio_group_unlock_and_free(struct vfio_group *group)
 310{
 311	mutex_unlock(&vfio.group_lock);
 312	/*
 313	 * Unregister outside of lock.  A spurious callback is harmless now
 314	 * that the group is no longer in vfio.group_list.
 315	 */
 316	iommu_group_unregister_notifier(group->iommu_group, &group->nb);
 317	kfree(group);
 318}
 319
 320/**
 321 * Group objects - create, release, get, put, search
 322 */
 323static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 324{
 325	struct vfio_group *group, *tmp;
 326	struct device *dev;
 327	int ret, minor;
 328
 329	group = kzalloc(sizeof(*group), GFP_KERNEL);
 330	if (!group)
 331		return ERR_PTR(-ENOMEM);
 332
 333	kref_init(&group->kref);
 334	INIT_LIST_HEAD(&group->device_list);
 335	mutex_init(&group->device_lock);
 336	INIT_LIST_HEAD(&group->unbound_list);
 337	mutex_init(&group->unbound_lock);
 338	atomic_set(&group->container_users, 0);
 339	atomic_set(&group->opened, 0);
 340	init_waitqueue_head(&group->container_q);
 341	group->iommu_group = iommu_group;
 342#ifdef CONFIG_VFIO_NOIOMMU
 343	group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
 344#endif
 345	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
 346
 347	group->nb.notifier_call = vfio_iommu_group_notifier;
 348
 349	/*
 350	 * blocking notifiers acquire a rwsem around registering and hold
 351	 * it around callback.  Therefore, need to register outside of
 352	 * vfio.group_lock to avoid A-B/B-A contention.  Our callback won't
 353	 * do anything unless it can find the group in vfio.group_list, so
 354	 * no harm in registering early.
 355	 */
 356	ret = iommu_group_register_notifier(iommu_group, &group->nb);
 357	if (ret) {
 358		kfree(group);
 359		return ERR_PTR(ret);
 360	}
 361
 362	mutex_lock(&vfio.group_lock);
 363
 364	/* Did we race creating this group? */
 365	list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
 366		if (tmp->iommu_group == iommu_group) {
 367			vfio_group_get(tmp);
 368			vfio_group_unlock_and_free(group);
 369			return tmp;
 370		}
 371	}
 372
 373	minor = vfio_alloc_group_minor(group);
 374	if (minor < 0) {
 375		vfio_group_unlock_and_free(group);
 376		return ERR_PTR(minor);
 377	}
 378
 379	dev = device_create(vfio.class, NULL,
 380			    MKDEV(MAJOR(vfio.group_devt), minor),
 381			    group, "%s%d", group->noiommu ? "noiommu-" : "",
 382			    iommu_group_id(iommu_group));
 383	if (IS_ERR(dev)) {
 384		vfio_free_group_minor(minor);
 385		vfio_group_unlock_and_free(group);
 386		return ERR_CAST(dev);
 387	}
 388
 389	group->minor = minor;
 390	group->dev = dev;
 391
 392	list_add(&group->vfio_next, &vfio.group_list);
 393
 394	mutex_unlock(&vfio.group_lock);
 395
 396	return group;
 397}
 398
 399/* called with vfio.group_lock held */
 400static void vfio_group_release(struct kref *kref)
 401{
 402	struct vfio_group *group = container_of(kref, struct vfio_group, kref);
 403	struct vfio_unbound_dev *unbound, *tmp;
 404	struct iommu_group *iommu_group = group->iommu_group;
 405
 406	WARN_ON(!list_empty(&group->device_list));
 407	WARN_ON(group->notifier.head);
 408
 409	list_for_each_entry_safe(unbound, tmp,
 410				 &group->unbound_list, unbound_next) {
 411		list_del(&unbound->unbound_next);
 412		kfree(unbound);
 413	}
 414
 415	device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
 416	list_del(&group->vfio_next);
 417	vfio_free_group_minor(group->minor);
 418	vfio_group_unlock_and_free(group);
 419	iommu_group_put(iommu_group);
 420}
 421
 422static void vfio_group_put(struct vfio_group *group)
 423{
 424	kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
 425}
 426
 427struct vfio_group_put_work {
 428	struct work_struct work;
 429	struct vfio_group *group;
 430};
 431
 432static void vfio_group_put_bg(struct work_struct *work)
 433{
 434	struct vfio_group_put_work *do_work;
 435
 436	do_work = container_of(work, struct vfio_group_put_work, work);
 437
 438	vfio_group_put(do_work->group);
 439	kfree(do_work);
 440}
 441
 442static void vfio_group_schedule_put(struct vfio_group *group)
 443{
 444	struct vfio_group_put_work *do_work;
 445
 446	do_work = kmalloc(sizeof(*do_work), GFP_KERNEL);
 447	if (WARN_ON(!do_work))
 448		return;
 449
 450	INIT_WORK(&do_work->work, vfio_group_put_bg);
 451	do_work->group = group;
 452	schedule_work(&do_work->work);
 453}
 454
 455/* Assume group_lock or group reference is held */
 456static void vfio_group_get(struct vfio_group *group)
 457{
 458	kref_get(&group->kref);
 459}
 460
 461/*
 462 * Not really a try as we will sleep for mutex, but we need to make
 463 * sure the group pointer is valid under lock and get a reference.
 464 */
 465static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
 466{
 467	struct vfio_group *target = group;
 468
 469	mutex_lock(&vfio.group_lock);
 470	list_for_each_entry(group, &vfio.group_list, vfio_next) {
 471		if (group == target) {
 472			vfio_group_get(group);
 473			mutex_unlock(&vfio.group_lock);
 474			return group;
 475		}
 476	}
 477	mutex_unlock(&vfio.group_lock);
 478
 479	return NULL;
 480}
 481
 482static
 483struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
 484{
 485	struct vfio_group *group;
 486
 487	mutex_lock(&vfio.group_lock);
 488	list_for_each_entry(group, &vfio.group_list, vfio_next) {
 489		if (group->iommu_group == iommu_group) {
 490			vfio_group_get(group);
 491			mutex_unlock(&vfio.group_lock);
 492			return group;
 493		}
 494	}
 495	mutex_unlock(&vfio.group_lock);
 496
 497	return NULL;
 498}
 499
 500static struct vfio_group *vfio_group_get_from_minor(int minor)
 501{
 502	struct vfio_group *group;
 503
 504	mutex_lock(&vfio.group_lock);
 505	group = idr_find(&vfio.group_idr, minor);
 506	if (!group) {
 507		mutex_unlock(&vfio.group_lock);
 508		return NULL;
 509	}
 510	vfio_group_get(group);
 511	mutex_unlock(&vfio.group_lock);
 512
 513	return group;
 514}
 515
 516static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
 517{
 518	struct iommu_group *iommu_group;
 519	struct vfio_group *group;
 520
 521	iommu_group = iommu_group_get(dev);
 522	if (!iommu_group)
 523		return NULL;
 524
 525	group = vfio_group_get_from_iommu(iommu_group);
 526	iommu_group_put(iommu_group);
 527
 528	return group;
 529}
 530
 531/**
 532 * Device objects - create, release, get, put, search
 533 */
 534static
 535struct vfio_device *vfio_group_create_device(struct vfio_group *group,
 536					     struct device *dev,
 537					     const struct vfio_device_ops *ops,
 538					     void *device_data)
 539{
 540	struct vfio_device *device;
 541
 542	device = kzalloc(sizeof(*device), GFP_KERNEL);
 543	if (!device)
 544		return ERR_PTR(-ENOMEM);
 545
 546	kref_init(&device->kref);
 547	device->dev = dev;
 548	device->group = group;
 549	device->ops = ops;
 550	device->device_data = device_data;
 551	dev_set_drvdata(dev, device);
 552
 553	/* No need to get group_lock, caller has group reference */
 554	vfio_group_get(group);
 555
 556	mutex_lock(&group->device_lock);
 557	list_add(&device->group_next, &group->device_list);
 558	mutex_unlock(&group->device_lock);
 559
 560	return device;
 561}
 562
 563static void vfio_device_release(struct kref *kref)
 564{
 565	struct vfio_device *device = container_of(kref,
 566						  struct vfio_device, kref);
 567	struct vfio_group *group = device->group;
 568
 569	list_del(&device->group_next);
 570	mutex_unlock(&group->device_lock);
 571
 572	dev_set_drvdata(device->dev, NULL);
 573
 574	kfree(device);
 575
 576	/* vfio_del_group_dev may be waiting for this device */
 577	wake_up(&vfio.release_q);
 578}
 579
 580/* Device reference always implies a group reference */
 581void vfio_device_put(struct vfio_device *device)
 582{
 583	struct vfio_group *group = device->group;
 584	kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
 585	vfio_group_put(group);
 586}
 587EXPORT_SYMBOL_GPL(vfio_device_put);
 588
 589static void vfio_device_get(struct vfio_device *device)
 590{
 591	vfio_group_get(device->group);
 592	kref_get(&device->kref);
 593}
 594
 595static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
 596						 struct device *dev)
 597{
 598	struct vfio_device *device;
 599
 600	mutex_lock(&group->device_lock);
 601	list_for_each_entry(device, &group->device_list, group_next) {
 602		if (device->dev == dev) {
 603			vfio_device_get(device);
 604			mutex_unlock(&group->device_lock);
 605			return device;
 606		}
 607	}
 608	mutex_unlock(&group->device_lock);
 609	return NULL;
 610}
 611
 612/*
 613 * Some drivers, like pci-stub, are only used to prevent other drivers from
 614 * claiming a device and are therefore perfectly legitimate for a user owned
 615 * group.  The pci-stub driver has no dependencies on DMA or the IOVA mapping
 616 * of the device, but it does prevent the user from having direct access to
 617 * the device, which is useful in some circumstances.
 618 *
 619 * We also assume that we can include PCI interconnect devices, ie. bridges.
 620 * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge
 621 * then all of the downstream devices will be part of the same IOMMU group as
 622 * the bridge.  Thus, if placing the bridge into the user owned IOVA space
 623 * breaks anything, it only does so for user owned devices downstream.  Note
 624 * that error notification via MSI can be affected for platforms that handle
 625 * MSI within the same IOVA space as DMA.
 626 */
 627static const char * const vfio_driver_whitelist[] = { "pci-stub" };
 628
 629static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
 630{
 631	if (dev_is_pci(dev)) {
 632		struct pci_dev *pdev = to_pci_dev(dev);
 633
 634		if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
 635			return true;
 636	}
 637
 638	return match_string(vfio_driver_whitelist,
 639			    ARRAY_SIZE(vfio_driver_whitelist),
 640			    drv->name) >= 0;
 641}
 642
 643/*
 644 * A vfio group is viable for use by userspace if all devices are in
 645 * one of the following states:
 646 *  - driver-less
 647 *  - bound to a vfio driver
 648 *  - bound to a whitelisted driver
 649 *  - a PCI interconnect device
 650 *
 651 * We use two methods to determine whether a device is bound to a vfio
 652 * driver.  The first is to test whether the device exists in the vfio
 653 * group.  The second is to test if the device exists on the group
 654 * unbound_list, indicating it's in the middle of transitioning from
 655 * a vfio driver to driver-less.
 656 */
 657static int vfio_dev_viable(struct device *dev, void *data)
 658{
 659	struct vfio_group *group = data;
 660	struct vfio_device *device;
 661	struct device_driver *drv = READ_ONCE(dev->driver);
 662	struct vfio_unbound_dev *unbound;
 663	int ret = -EINVAL;
 664
 665	mutex_lock(&group->unbound_lock);
 666	list_for_each_entry(unbound, &group->unbound_list, unbound_next) {
 667		if (dev == unbound->dev) {
 668			ret = 0;
 669			break;
 670		}
 671	}
 672	mutex_unlock(&group->unbound_lock);
 673
 674	if (!ret || !drv || vfio_dev_whitelisted(dev, drv))
 675		return 0;
 676
 677	device = vfio_group_get_device(group, dev);
 678	if (device) {
 679		vfio_device_put(device);
 680		return 0;
 681	}
 682
 683	return ret;
 684}
 685
 686/**
 687 * Async device support
 688 */
 689static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
 690{
 691	struct vfio_device *device;
 692
 693	/* Do we already know about it?  We shouldn't */
 694	device = vfio_group_get_device(group, dev);
 695	if (WARN_ON_ONCE(device)) {
 696		vfio_device_put(device);
 697		return 0;
 698	}
 699
 700	/* Nothing to do for idle groups */
 701	if (!atomic_read(&group->container_users))
 702		return 0;
 703
 704	/* TODO Prevent device auto probing */
 705	dev_WARN(dev, "Device added to live group %d!\n",
 706		 iommu_group_id(group->iommu_group));
 707
 708	return 0;
 709}
 710
 711static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
 712{
 713	/* We don't care what happens when the group isn't in use */
 714	if (!atomic_read(&group->container_users))
 715		return 0;
 716
 717	return vfio_dev_viable(dev, group);
 718}
 719
 720static int vfio_iommu_group_notifier(struct notifier_block *nb,
 721				     unsigned long action, void *data)
 722{
 723	struct vfio_group *group = container_of(nb, struct vfio_group, nb);
 724	struct device *dev = data;
 725	struct vfio_unbound_dev *unbound;
 726
 727	/*
 728	 * Need to go through a group_lock lookup to get a reference or we
 729	 * risk racing a group being removed.  Ignore spurious notifies.
 730	 */
 731	group = vfio_group_try_get(group);
 732	if (!group)
 733		return NOTIFY_OK;
 734
 735	switch (action) {
 736	case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
 737		vfio_group_nb_add_dev(group, dev);
 738		break;
 739	case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
 740		/*
 741		 * Nothing to do here.  If the device is in use, then the
 742		 * vfio sub-driver should block the remove callback until
 743		 * it is unused.  If the device is unused or attached to a
 744		 * stub driver, then it should be released and we don't
 745		 * care that it will be going away.
 746		 */
 747		break;
 748	case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
 749		dev_dbg(dev, "%s: group %d binding to driver\n", __func__,
 750			iommu_group_id(group->iommu_group));
 751		break;
 752	case IOMMU_GROUP_NOTIFY_BOUND_DRIVER:
 753		dev_dbg(dev, "%s: group %d bound to driver %s\n", __func__,
 754			iommu_group_id(group->iommu_group), dev->driver->name);
 755		BUG_ON(vfio_group_nb_verify(group, dev));
 756		break;
 757	case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER:
 758		dev_dbg(dev, "%s: group %d unbinding from driver %s\n",
 759			__func__, iommu_group_id(group->iommu_group),
 760			dev->driver->name);
 761		break;
 762	case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
 763		dev_dbg(dev, "%s: group %d unbound from driver\n", __func__,
 764			iommu_group_id(group->iommu_group));
 765		/*
 766		 * XXX An unbound device in a live group is ok, but we'd
 767		 * really like to avoid the above BUG_ON by preventing other
 768		 * drivers from binding to it.  Once that occurs, we have to
 769		 * stop the system to maintain isolation.  At a minimum, we'd
 770		 * want a toggle to disable driver auto probe for this device.
 771		 */
 772
 773		mutex_lock(&group->unbound_lock);
 774		list_for_each_entry(unbound,
 775				    &group->unbound_list, unbound_next) {
 776			if (dev == unbound->dev) {
 777				list_del(&unbound->unbound_next);
 778				kfree(unbound);
 779				break;
 780			}
 781		}
 782		mutex_unlock(&group->unbound_lock);
 783		break;
 784	}
 785
 786	/*
 787	 * If we're the last reference to the group, the group will be
 788	 * released, which includes unregistering the iommu group notifier.
 789	 * We hold a read-lock on that notifier list, unregistering needs
 790	 * a write-lock... deadlock.  Release our reference asynchronously
 791	 * to avoid that situation.
 792	 */
 793	vfio_group_schedule_put(group);
 794	return NOTIFY_OK;
 795}
 796
 797/**
 798 * VFIO driver API
 799 */
 800int vfio_add_group_dev(struct device *dev,
 801		       const struct vfio_device_ops *ops, void *device_data)
 802{
 803	struct iommu_group *iommu_group;
 804	struct vfio_group *group;
 805	struct vfio_device *device;
 806
 807	iommu_group = iommu_group_get(dev);
 808	if (!iommu_group)
 809		return -EINVAL;
 810
 811	group = vfio_group_get_from_iommu(iommu_group);
 812	if (!group) {
 813		group = vfio_create_group(iommu_group);
 814		if (IS_ERR(group)) {
 815			iommu_group_put(iommu_group);
 816			return PTR_ERR(group);
 817		}
 818	} else {
 819		/*
 820		 * A found vfio_group already holds a reference to the
 821		 * iommu_group.  A created vfio_group keeps the reference.
 822		 */
 823		iommu_group_put(iommu_group);
 824	}
 825
 826	device = vfio_group_get_device(group, dev);
 827	if (device) {
 828		dev_WARN(dev, "Device already exists on group %d\n",
 829			 iommu_group_id(iommu_group));
 830		vfio_device_put(device);
 831		vfio_group_put(group);
 832		return -EBUSY;
 833	}
 834
 835	device = vfio_group_create_device(group, dev, ops, device_data);
 836	if (IS_ERR(device)) {
 837		vfio_group_put(group);
 838		return PTR_ERR(device);
 839	}
 840
 841	/*
 842	 * Drop all but the vfio_device reference.  The vfio_device holds
 843	 * a reference to the vfio_group, which holds a reference to the
 844	 * iommu_group.
 845	 */
 846	vfio_group_put(group);
 847
 848	return 0;
 849}
 850EXPORT_SYMBOL_GPL(vfio_add_group_dev);
 851
 852/**
 853 * Get a reference to the vfio_device for a device.  Even if the
 854 * caller thinks they own the device, they could be racing with a
 855 * release call path, so we can't trust drvdata for the shortcut.
 856 * Go the long way around, from the iommu_group to the vfio_group
 857 * to the vfio_device.
 858 */
 859struct vfio_device *vfio_device_get_from_dev(struct device *dev)
 860{
 861	struct vfio_group *group;
 862	struct vfio_device *device;
 863
 864	group = vfio_group_get_from_dev(dev);
 865	if (!group)
 866		return NULL;
 867
 868	device = vfio_group_get_device(group, dev);
 869	vfio_group_put(group);
 870
 871	return device;
 872}
 873EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
 874
 875static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
 876						     char *buf)
 877{
 878	struct vfio_device *it, *device = NULL;
 879
 880	mutex_lock(&group->device_lock);
 881	list_for_each_entry(it, &group->device_list, group_next) {
 882		if (!strcmp(dev_name(it->dev), buf)) {
 883			device = it;
 884			vfio_device_get(device);
 885			break;
 886		}
 887	}
 888	mutex_unlock(&group->device_lock);
 889
 890	return device;
 891}
 892
 893/*
 894 * Caller must hold a reference to the vfio_device
 895 */
 896void *vfio_device_data(struct vfio_device *device)
 897{
 898	return device->device_data;
 899}
 900EXPORT_SYMBOL_GPL(vfio_device_data);
 901
 902/*
 903 * Decrement the device reference count and wait for the device to be
 904 * removed.  Open file descriptors for the device... */
 905void *vfio_del_group_dev(struct device *dev)
 906{
 907	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 908	struct vfio_device *device = dev_get_drvdata(dev);
 909	struct vfio_group *group = device->group;
 910	void *device_data = device->device_data;
 911	struct vfio_unbound_dev *unbound;
 912	unsigned int i = 0;
 913	bool interrupted = false;
 914
 915	/*
 916	 * The group exists so long as we have a device reference.  Get
 917	 * a group reference and use it to scan for the device going away.
 918	 */
 919	vfio_group_get(group);
 920
 921	/*
 922	 * When the device is removed from the group, the group suddenly
 923	 * becomes non-viable; the device has a driver (until the unbind
 924	 * completes), but it's not present in the group.  This is bad news
 925	 * for any external users that need to re-acquire a group reference
 926	 * in order to match and release their existing reference.  To
 927	 * solve this, we track such devices on the unbound_list to bridge
 928	 * the gap until they're fully unbound.
 929	 */
 930	unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
 931	if (unbound) {
 932		unbound->dev = dev;
 933		mutex_lock(&group->unbound_lock);
 934		list_add(&unbound->unbound_next, &group->unbound_list);
 935		mutex_unlock(&group->unbound_lock);
 936	}
 937	WARN_ON(!unbound);
 938
 939	vfio_device_put(device);
 940
 941	/*
 942	 * If the device is still present in the group after the above
 943	 * 'put', then it is in use and we need to request it from the
 944	 * bus driver.  The driver may in turn need to request the
 945	 * device from the user.  We send the request on an arbitrary
 946	 * interval with counter to allow the driver to take escalating
 947	 * measures to release the device if it has the ability to do so.
 948	 */
 949	add_wait_queue(&vfio.release_q, &wait);
 950
 951	do {
 952		device = vfio_group_get_device(group, dev);
 953		if (!device)
 954			break;
 955
 956		if (device->ops->request)
 957			device->ops->request(device_data, i++);
 958
 959		vfio_device_put(device);
 960
 961		if (interrupted) {
 962			wait_woken(&wait, TASK_UNINTERRUPTIBLE, HZ * 10);
 963		} else {
 964			wait_woken(&wait, TASK_INTERRUPTIBLE, HZ * 10);
 965			if (signal_pending(current)) {
 966				interrupted = true;
 967				dev_warn(dev,
 968					 "Device is currently in use, task"
 969					 " \"%s\" (%d) "
 970					 "blocked until device is released",
 971					 current->comm, task_pid_nr(current));
 972			}
 973		}
 974
 975	} while (1);
 976
 977	remove_wait_queue(&vfio.release_q, &wait);
 978	/*
 979	 * In order to support multiple devices per group, devices can be
 980	 * plucked from the group while other devices in the group are still
 981	 * in use.  The container persists with this group and those remaining
 982	 * devices still attached.  If the user creates an isolation violation
 983	 * by binding this device to another driver while the group is still in
 984	 * use, that's their fault.  However, in the case of removing the last,
 985	 * or potentially the only, device in the group there can be no other
 986	 * in-use devices in the group.  The user has done their due diligence
 987	 * and we should lay no claims to those devices.  In order to do that,
 988	 * we need to make sure the group is detached from the container.
 989	 * Without this stall, we're potentially racing with a user process
 990	 * that may attempt to immediately bind this device to another driver.
 991	 */
 992	if (list_empty(&group->device_list))
 993		wait_event(group->container_q, !group->container);
 994
 995	vfio_group_put(group);
 996
 997	return device_data;
 998}
 999EXPORT_SYMBOL_GPL(vfio_del_group_dev);
1000
1001/**
1002 * VFIO base fd, /dev/vfio/vfio
1003 */
1004static long vfio_ioctl_check_extension(struct vfio_container *container,
1005				       unsigned long arg)
1006{
1007	struct vfio_iommu_driver *driver;
1008	long ret = 0;
1009
1010	down_read(&container->group_lock);
1011
1012	driver = container->iommu_driver;
1013
1014	switch (arg) {
1015		/* No base extensions yet */
1016	default:
1017		/*
1018		 * If no driver is set, poll all registered drivers for
1019		 * extensions and return the first positive result.  If
1020		 * a driver is already set, further queries will be passed
1021		 * only to that driver.
1022		 */
1023		if (!driver) {
1024			mutex_lock(&vfio.iommu_drivers_lock);
1025			list_for_each_entry(driver, &vfio.iommu_drivers_list,
1026					    vfio_next) {
1027
1028#ifdef CONFIG_VFIO_NOIOMMU
1029				if (!list_empty(&container->group_list) &&
1030				    (container->noiommu !=
1031				     (driver->ops == &vfio_noiommu_ops)))
1032					continue;
1033#endif
1034
1035				if (!try_module_get(driver->ops->owner))
1036					continue;
1037
1038				ret = driver->ops->ioctl(NULL,
1039							 VFIO_CHECK_EXTENSION,
1040							 arg);
1041				module_put(driver->ops->owner);
1042				if (ret > 0)
1043					break;
1044			}
1045			mutex_unlock(&vfio.iommu_drivers_lock);
1046		} else
1047			ret = driver->ops->ioctl(container->iommu_data,
1048						 VFIO_CHECK_EXTENSION, arg);
1049	}
1050
1051	up_read(&container->group_lock);
1052
1053	return ret;
1054}
1055
1056/* hold write lock on container->group_lock */
1057static int __vfio_container_attach_groups(struct vfio_container *container,
1058					  struct vfio_iommu_driver *driver,
1059					  void *data)
1060{
1061	struct vfio_group *group;
1062	int ret = -ENODEV;
1063
1064	list_for_each_entry(group, &container->group_list, container_next) {
1065		ret = driver->ops->attach_group(data, group->iommu_group);
1066		if (ret)
1067			goto unwind;
1068	}
1069
1070	return ret;
1071
1072unwind:
1073	list_for_each_entry_continue_reverse(group, &container->group_list,
1074					     container_next) {
1075		driver->ops->detach_group(data, group->iommu_group);
1076	}
1077
1078	return ret;
1079}
1080
1081static long vfio_ioctl_set_iommu(struct vfio_container *container,
1082				 unsigned long arg)
1083{
1084	struct vfio_iommu_driver *driver;
1085	long ret = -ENODEV;
1086
1087	down_write(&container->group_lock);
1088
1089	/*
1090	 * The container is designed to be an unprivileged interface while
1091	 * the group can be assigned to specific users.  Therefore, only by
1092	 * adding a group to a container does the user get the privilege of
1093	 * enabling the iommu, which may allocate finite resources.  There
1094	 * is no unset_iommu, but by removing all the groups from a container,
1095	 * the container is deprivileged and returns to an unset state.
1096	 */
1097	if (list_empty(&container->group_list) || container->iommu_driver) {
1098		up_write(&container->group_lock);
1099		return -EINVAL;
1100	}
1101
1102	mutex_lock(&vfio.iommu_drivers_lock);
1103	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
1104		void *data;
1105
1106#ifdef CONFIG_VFIO_NOIOMMU
1107		/*
1108		 * Only noiommu containers can use vfio-noiommu and noiommu
1109		 * containers can only use vfio-noiommu.
1110		 */
1111		if (container->noiommu != (driver->ops == &vfio_noiommu_ops))
1112			continue;
1113#endif
1114
1115		if (!try_module_get(driver->ops->owner))
1116			continue;
1117
1118		/*
1119		 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
1120		 * so test which iommu driver reported support for this
1121		 * extension and call open on them.  We also pass them the
1122		 * magic, allowing a single driver to support multiple
1123		 * interfaces if they'd like.
1124		 */
1125		if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
1126			module_put(driver->ops->owner);
1127			continue;
1128		}
1129
1130		data = driver->ops->open(arg);
1131		if (IS_ERR(data)) {
1132			ret = PTR_ERR(data);
1133			module_put(driver->ops->owner);
1134			continue;
1135		}
1136
1137		ret = __vfio_container_attach_groups(container, driver, data);
1138		if (ret) {
1139			driver->ops->release(data);
1140			module_put(driver->ops->owner);
1141			continue;
1142		}
1143
1144		container->iommu_driver = driver;
1145		container->iommu_data = data;
1146		break;
1147	}
1148
1149	mutex_unlock(&vfio.iommu_drivers_lock);
1150	up_write(&container->group_lock);
1151
1152	return ret;
1153}
1154
1155static long vfio_fops_unl_ioctl(struct file *filep,
1156				unsigned int cmd, unsigned long arg)
1157{
1158	struct vfio_container *container = filep->private_data;
1159	struct vfio_iommu_driver *driver;
1160	void *data;
1161	long ret = -EINVAL;
1162
1163	if (!container)
1164		return ret;
1165
1166	switch (cmd) {
1167	case VFIO_GET_API_VERSION:
1168		ret = VFIO_API_VERSION;
1169		break;
1170	case VFIO_CHECK_EXTENSION:
1171		ret = vfio_ioctl_check_extension(container, arg);
1172		break;
1173	case VFIO_SET_IOMMU:
1174		ret = vfio_ioctl_set_iommu(container, arg);
1175		break;
1176	default:
1177		driver = container->iommu_driver;
1178		data = container->iommu_data;
1179
1180		if (driver) /* passthrough all unrecognized ioctls */
1181			ret = driver->ops->ioctl(data, cmd, arg);
1182	}
1183
1184	return ret;
1185}
1186
1187static int vfio_fops_open(struct inode *inode, struct file *filep)
1188{
1189	struct vfio_container *container;
1190
1191	container = kzalloc(sizeof(*container), GFP_KERNEL);
1192	if (!container)
1193		return -ENOMEM;
1194
1195	INIT_LIST_HEAD(&container->group_list);
1196	init_rwsem(&container->group_lock);
1197	kref_init(&container->kref);
1198
1199	filep->private_data = container;
1200
1201	return 0;
1202}
1203
1204static int vfio_fops_release(struct inode *inode, struct file *filep)
1205{
1206	struct vfio_container *container = filep->private_data;
1207
1208	filep->private_data = NULL;
1209
1210	vfio_container_put(container);
1211
1212	return 0;
1213}
1214
1215/*
1216 * Once an iommu driver is set, we optionally pass read/write/mmap
1217 * on to the driver, allowing management interfaces beyond ioctl.
1218 */
1219static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
1220			      size_t count, loff_t *ppos)
1221{
1222	struct vfio_container *container = filep->private_data;
1223	struct vfio_iommu_driver *driver;
1224	ssize_t ret = -EINVAL;
1225
1226	driver = container->iommu_driver;
1227	if (likely(driver && driver->ops->read))
1228		ret = driver->ops->read(container->iommu_data,
1229					buf, count, ppos);
1230
1231	return ret;
1232}
1233
1234static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
1235			       size_t count, loff_t *ppos)
1236{
1237	struct vfio_container *container = filep->private_data;
1238	struct vfio_iommu_driver *driver;
1239	ssize_t ret = -EINVAL;
1240
1241	driver = container->iommu_driver;
1242	if (likely(driver && driver->ops->write))
1243		ret = driver->ops->write(container->iommu_data,
1244					 buf, count, ppos);
1245
1246	return ret;
1247}
1248
1249static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1250{
1251	struct vfio_container *container = filep->private_data;
1252	struct vfio_iommu_driver *driver;
1253	int ret = -EINVAL;
1254
1255	driver = container->iommu_driver;
1256	if (likely(driver && driver->ops->mmap))
1257		ret = driver->ops->mmap(container->iommu_data, vma);
1258
1259	return ret;
1260}
1261
1262static const struct file_operations vfio_fops = {
1263	.owner		= THIS_MODULE,
1264	.open		= vfio_fops_open,
1265	.release	= vfio_fops_release,
1266	.read		= vfio_fops_read,
1267	.write		= vfio_fops_write,
1268	.unlocked_ioctl	= vfio_fops_unl_ioctl,
1269	.compat_ioctl	= compat_ptr_ioctl,
1270	.mmap		= vfio_fops_mmap,
1271};
1272
1273/**
1274 * VFIO Group fd, /dev/vfio/$GROUP
1275 */
1276static void __vfio_group_unset_container(struct vfio_group *group)
1277{
1278	struct vfio_container *container = group->container;
1279	struct vfio_iommu_driver *driver;
1280
1281	down_write(&container->group_lock);
1282
1283	driver = container->iommu_driver;
1284	if (driver)
1285		driver->ops->detach_group(container->iommu_data,
1286					  group->iommu_group);
1287
1288	group->container = NULL;
1289	wake_up(&group->container_q);
1290	list_del(&group->container_next);
1291
1292	/* Detaching the last group deprivileges a container, remove iommu */
1293	if (driver && list_empty(&container->group_list)) {
1294		driver->ops->release(container->iommu_data);
1295		module_put(driver->ops->owner);
1296		container->iommu_driver = NULL;
1297		container->iommu_data = NULL;
1298	}
1299
1300	up_write(&container->group_lock);
1301
1302	vfio_container_put(container);
1303}
1304
1305/*
1306 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
1307 * if there was no container to unset.  Since the ioctl is called on
1308 * the group, we know that still exists, therefore the only valid
1309 * transition here is 1->0.
1310 */
1311static int vfio_group_unset_container(struct vfio_group *group)
1312{
1313	int users = atomic_cmpxchg(&group->container_users, 1, 0);
1314
1315	if (!users)
1316		return -EINVAL;
1317	if (users != 1)
1318		return -EBUSY;
1319
1320	__vfio_group_unset_container(group);
1321
1322	return 0;
1323}
1324
1325/*
1326 * When removing container users, anything that removes the last user
1327 * implicitly removes the group from the container.  That is, if the
1328 * group file descriptor is closed, as well as any device file descriptors,
1329 * the group is free.
1330 */
1331static void vfio_group_try_dissolve_container(struct vfio_group *group)
1332{
1333	if (0 == atomic_dec_if_positive(&group->container_users))
1334		__vfio_group_unset_container(group);
1335}
1336
1337static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1338{
1339	struct fd f;
1340	struct vfio_container *container;
1341	struct vfio_iommu_driver *driver;
1342	int ret = 0;
1343
1344	if (atomic_read(&group->container_users))
1345		return -EINVAL;
1346
1347	if (group->noiommu && !capable(CAP_SYS_RAWIO))
1348		return -EPERM;
1349
1350	f = fdget(container_fd);
1351	if (!f.file)
1352		return -EBADF;
1353
1354	/* Sanity check, is this really our fd? */
1355	if (f.file->f_op != &vfio_fops) {
1356		fdput(f);
1357		return -EINVAL;
1358	}
1359
1360	container = f.file->private_data;
1361	WARN_ON(!container); /* fget ensures we don't race vfio_release */
1362
1363	down_write(&container->group_lock);
1364
1365	/* Real groups and fake groups cannot mix */
1366	if (!list_empty(&container->group_list) &&
1367	    container->noiommu != group->noiommu) {
1368		ret = -EPERM;
1369		goto unlock_out;
1370	}
1371
1372	driver = container->iommu_driver;
1373	if (driver) {
1374		ret = driver->ops->attach_group(container->iommu_data,
1375						group->iommu_group);
1376		if (ret)
1377			goto unlock_out;
1378	}
1379
1380	group->container = container;
1381	container->noiommu = group->noiommu;
1382	list_add(&group->container_next, &container->group_list);
1383
1384	/* Get a reference on the container and mark a user within the group */
1385	vfio_container_get(container);
1386	atomic_inc(&group->container_users);
1387
1388unlock_out:
1389	up_write(&container->group_lock);
1390	fdput(f);
1391	return ret;
1392}
1393
1394static bool vfio_group_viable(struct vfio_group *group)
1395{
1396	return (iommu_group_for_each_dev(group->iommu_group,
1397					 group, vfio_dev_viable) == 0);
1398}
1399
1400static int vfio_group_add_container_user(struct vfio_group *group)
1401{
1402	if (!atomic_inc_not_zero(&group->container_users))
1403		return -EINVAL;
1404
1405	if (group->noiommu) {
1406		atomic_dec(&group->container_users);
1407		return -EPERM;
1408	}
1409	if (!group->container->iommu_driver || !vfio_group_viable(group)) {
1410		atomic_dec(&group->container_users);
1411		return -EINVAL;
1412	}
1413
1414	return 0;
1415}
1416
1417static const struct file_operations vfio_device_fops;
1418
1419static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1420{
1421	struct vfio_device *device;
1422	struct file *filep;
1423	int ret;
1424
1425	if (0 == atomic_read(&group->container_users) ||
1426	    !group->container->iommu_driver || !vfio_group_viable(group))
1427		return -EINVAL;
1428
1429	if (group->noiommu && !capable(CAP_SYS_RAWIO))
1430		return -EPERM;
1431
1432	device = vfio_device_get_from_name(group, buf);
1433	if (!device)
1434		return -ENODEV;
1435
1436	ret = device->ops->open(device->device_data);
1437	if (ret) {
1438		vfio_device_put(device);
1439		return ret;
1440	}
1441
1442	/*
1443	 * We can't use anon_inode_getfd() because we need to modify
1444	 * the f_mode flags directly to allow more than just ioctls
1445	 */
1446	ret = get_unused_fd_flags(O_CLOEXEC);
1447	if (ret < 0) {
1448		device->ops->release(device->device_data);
1449		vfio_device_put(device);
1450		return ret;
1451	}
1452
1453	filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1454				   device, O_RDWR);
1455	if (IS_ERR(filep)) {
1456		put_unused_fd(ret);
1457		ret = PTR_ERR(filep);
1458		device->ops->release(device->device_data);
1459		vfio_device_put(device);
1460		return ret;
1461	}
1462
1463	/*
1464	 * TODO: add an anon_inode interface to do this.
1465	 * Appears to be missing by lack of need rather than
1466	 * explicitly prevented.  Now there's need.
1467	 */
1468	filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1469
1470	atomic_inc(&group->container_users);
1471
1472	fd_install(ret, filep);
1473
1474	if (group->noiommu)
1475		dev_warn(device->dev, "vfio-noiommu device opened by user "
1476			 "(%s:%d)\n", current->comm, task_pid_nr(current));
1477
1478	return ret;
1479}
1480
1481static long vfio_group_fops_unl_ioctl(struct file *filep,
1482				      unsigned int cmd, unsigned long arg)
1483{
1484	struct vfio_group *group = filep->private_data;
1485	long ret = -ENOTTY;
1486
1487	switch (cmd) {
1488	case VFIO_GROUP_GET_STATUS:
1489	{
1490		struct vfio_group_status status;
1491		unsigned long minsz;
1492
1493		minsz = offsetofend(struct vfio_group_status, flags);
1494
1495		if (copy_from_user(&status, (void __user *)arg, minsz))
1496			return -EFAULT;
1497
1498		if (status.argsz < minsz)
1499			return -EINVAL;
1500
1501		status.flags = 0;
1502
1503		if (vfio_group_viable(group))
1504			status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1505
1506		if (group->container)
1507			status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
1508
1509		if (copy_to_user((void __user *)arg, &status, minsz))
1510			return -EFAULT;
1511
1512		ret = 0;
1513		break;
1514	}
1515	case VFIO_GROUP_SET_CONTAINER:
1516	{
1517		int fd;
1518
1519		if (get_user(fd, (int __user *)arg))
1520			return -EFAULT;
1521
1522		if (fd < 0)
1523			return -EINVAL;
1524
1525		ret = vfio_group_set_container(group, fd);
1526		break;
1527	}
1528	case VFIO_GROUP_UNSET_CONTAINER:
1529		ret = vfio_group_unset_container(group);
1530		break;
1531	case VFIO_GROUP_GET_DEVICE_FD:
1532	{
1533		char *buf;
1534
1535		buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1536		if (IS_ERR(buf))
1537			return PTR_ERR(buf);
1538
1539		ret = vfio_group_get_device_fd(group, buf);
1540		kfree(buf);
1541		break;
1542	}
1543	}
1544
1545	return ret;
1546}
1547
1548static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1549{
1550	struct vfio_group *group;
1551	int opened;
1552
1553	group = vfio_group_get_from_minor(iminor(inode));
1554	if (!group)
1555		return -ENODEV;
1556
1557	if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
1558		vfio_group_put(group);
1559		return -EPERM;
1560	}
1561
1562	/* Do we need multiple instances of the group open?  Seems not. */
1563	opened = atomic_cmpxchg(&group->opened, 0, 1);
1564	if (opened) {
1565		vfio_group_put(group);
1566		return -EBUSY;
1567	}
1568
1569	/* Is something still in use from a previous open? */
1570	if (group->container) {
1571		atomic_dec(&group->opened);
1572		vfio_group_put(group);
1573		return -EBUSY;
1574	}
1575
1576	/* Warn if previous user didn't cleanup and re-init to drop them */
1577	if (WARN_ON(group->notifier.head))
1578		BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
1579
1580	filep->private_data = group;
1581
1582	return 0;
1583}
1584
1585static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1586{
1587	struct vfio_group *group = filep->private_data;
1588
1589	filep->private_data = NULL;
1590
1591	vfio_group_try_dissolve_container(group);
1592
1593	atomic_dec(&group->opened);
1594
1595	vfio_group_put(group);
1596
1597	return 0;
1598}
1599
1600static const struct file_operations vfio_group_fops = {
1601	.owner		= THIS_MODULE,
1602	.unlocked_ioctl	= vfio_group_fops_unl_ioctl,
1603	.compat_ioctl	= compat_ptr_ioctl,
1604	.open		= vfio_group_fops_open,
1605	.release	= vfio_group_fops_release,
1606};
1607
1608/**
1609 * VFIO Device fd
1610 */
1611static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1612{
1613	struct vfio_device *device = filep->private_data;
1614
1615	device->ops->release(device->device_data);
1616
1617	vfio_group_try_dissolve_container(device->group);
1618
1619	vfio_device_put(device);
1620
1621	return 0;
1622}
1623
1624static long vfio_device_fops_unl_ioctl(struct file *filep,
1625				       unsigned int cmd, unsigned long arg)
1626{
1627	struct vfio_device *device = filep->private_data;
1628
1629	if (unlikely(!device->ops->ioctl))
1630		return -EINVAL;
1631
1632	return device->ops->ioctl(device->device_data, cmd, arg);
1633}
1634
1635static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1636				     size_t count, loff_t *ppos)
1637{
1638	struct vfio_device *device = filep->private_data;
1639
1640	if (unlikely(!device->ops->read))
1641		return -EINVAL;
1642
1643	return device->ops->read(device->device_data, buf, count, ppos);
1644}
1645
1646static ssize_t vfio_device_fops_write(struct file *filep,
1647				      const char __user *buf,
1648				      size_t count, loff_t *ppos)
1649{
1650	struct vfio_device *device = filep->private_data;
1651
1652	if (unlikely(!device->ops->write))
1653		return -EINVAL;
1654
1655	return device->ops->write(device->device_data, buf, count, ppos);
1656}
1657
1658static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1659{
1660	struct vfio_device *device = filep->private_data;
1661
1662	if (unlikely(!device->ops->mmap))
1663		return -EINVAL;
1664
1665	return device->ops->mmap(device->device_data, vma);
1666}
1667
1668static const struct file_operations vfio_device_fops = {
1669	.owner		= THIS_MODULE,
1670	.release	= vfio_device_fops_release,
1671	.read		= vfio_device_fops_read,
1672	.write		= vfio_device_fops_write,
1673	.unlocked_ioctl	= vfio_device_fops_unl_ioctl,
1674	.compat_ioctl	= compat_ptr_ioctl,
1675	.mmap		= vfio_device_fops_mmap,
1676};
1677
1678/**
1679 * External user API, exported by symbols to be linked dynamically.
1680 *
1681 * The protocol includes:
1682 *  1. do normal VFIO init operation:
1683 *	- opening a new container;
1684 *	- attaching group(s) to it;
1685 *	- setting an IOMMU driver for a container.
1686 * When IOMMU is set for a container, all groups in it are
1687 * considered ready to use by an external user.
1688 *
1689 * 2. User space passes a group fd to an external user.
1690 * The external user calls vfio_group_get_external_user()
1691 * to verify that:
1692 *	- the group is initialized;
1693 *	- IOMMU is set for it.
1694 * If both checks passed, vfio_group_get_external_user()
1695 * increments the container user counter to prevent
1696 * the VFIO group from disposal before KVM exits.
1697 *
1698 * 3. The external user calls vfio_external_user_iommu_id()
1699 * to know an IOMMU ID.
1700 *
1701 * 4. When the external KVM finishes, it calls
1702 * vfio_group_put_external_user() to release the VFIO group.
1703 * This call decrements the container user counter.
1704 */
1705struct vfio_group *vfio_group_get_external_user(struct file *filep)
1706{
1707	struct vfio_group *group = filep->private_data;
1708	int ret;
1709
1710	if (filep->f_op != &vfio_group_fops)
1711		return ERR_PTR(-EINVAL);
1712
1713	ret = vfio_group_add_container_user(group);
1714	if (ret)
1715		return ERR_PTR(ret);
1716
1717	vfio_group_get(group);
1718
1719	return group;
1720}
1721EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
1722
1723void vfio_group_put_external_user(struct vfio_group *group)
1724{
1725	vfio_group_try_dissolve_container(group);
1726	vfio_group_put(group);
1727}
1728EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
1729
1730bool vfio_external_group_match_file(struct vfio_group *test_group,
1731				    struct file *filep)
1732{
1733	struct vfio_group *group = filep->private_data;
1734
1735	return (filep->f_op == &vfio_group_fops) && (group == test_group);
1736}
1737EXPORT_SYMBOL_GPL(vfio_external_group_match_file);
1738
1739int vfio_external_user_iommu_id(struct vfio_group *group)
1740{
1741	return iommu_group_id(group->iommu_group);
1742}
1743EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
1744
1745long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
1746{
1747	return vfio_ioctl_check_extension(group->container, arg);
1748}
1749EXPORT_SYMBOL_GPL(vfio_external_check_extension);
1750
1751/**
1752 * Sub-module support
1753 */
1754/*
1755 * Helper for managing a buffer of info chain capabilities, allocate or
1756 * reallocate a buffer with additional @size, filling in @id and @version
1757 * of the capability.  A pointer to the new capability is returned.
1758 *
1759 * NB. The chain is based at the head of the buffer, so new entries are
1760 * added to the tail, vfio_info_cap_shift() should be called to fixup the
1761 * next offsets prior to copying to the user buffer.
1762 */
1763struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
1764					       size_t size, u16 id, u16 version)
1765{
1766	void *buf;
1767	struct vfio_info_cap_header *header, *tmp;
1768
1769	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
1770	if (!buf) {
1771		kfree(caps->buf);
1772		caps->size = 0;
1773		return ERR_PTR(-ENOMEM);
1774	}
1775
1776	caps->buf = buf;
1777	header = buf + caps->size;
1778
1779	/* Eventually copied to user buffer, zero */
1780	memset(header, 0, size);
1781
1782	header->id = id;
1783	header->version = version;
1784
1785	/* Add to the end of the capability chain */
1786	for (tmp = buf; tmp->next; tmp = buf + tmp->next)
1787		; /* nothing */
1788
1789	tmp->next = caps->size;
1790	caps->size += size;
1791
1792	return header;
1793}
1794EXPORT_SYMBOL_GPL(vfio_info_cap_add);
1795
1796void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
1797{
1798	struct vfio_info_cap_header *tmp;
1799	void *buf = (void *)caps->buf;
1800
1801	for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
1802		tmp->next += offset;
1803}
1804EXPORT_SYMBOL(vfio_info_cap_shift);
1805
1806int vfio_info_add_capability(struct vfio_info_cap *caps,
1807			     struct vfio_info_cap_header *cap, size_t size)
1808{
1809	struct vfio_info_cap_header *header;
1810
1811	header = vfio_info_cap_add(caps, size, cap->id, cap->version);
1812	if (IS_ERR(header))
1813		return PTR_ERR(header);
1814
1815	memcpy(header + 1, cap + 1, size - sizeof(*header));
1816
1817	return 0;
1818}
1819EXPORT_SYMBOL(vfio_info_add_capability);
1820
1821int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
1822				       int max_irq_type, size_t *data_size)
1823{
1824	unsigned long minsz;
1825	size_t size;
1826
1827	minsz = offsetofend(struct vfio_irq_set, count);
1828
1829	if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
1830	    (hdr->count >= (U32_MAX - hdr->start)) ||
1831	    (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
1832				VFIO_IRQ_SET_ACTION_TYPE_MASK)))
1833		return -EINVAL;
1834
1835	if (data_size)
1836		*data_size = 0;
1837
1838	if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
1839		return -EINVAL;
1840
1841	switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
1842	case VFIO_IRQ_SET_DATA_NONE:
1843		size = 0;
1844		break;
1845	case VFIO_IRQ_SET_DATA_BOOL:
1846		size = sizeof(uint8_t);
1847		break;
1848	case VFIO_IRQ_SET_DATA_EVENTFD:
1849		size = sizeof(int32_t);
1850		break;
1851	default:
1852		return -EINVAL;
1853	}
1854
1855	if (size) {
1856		if (hdr->argsz - minsz < hdr->count * size)
1857			return -EINVAL;
1858
1859		if (!data_size)
1860			return -EINVAL;
1861
1862		*data_size = hdr->count * size;
1863	}
1864
1865	return 0;
1866}
1867EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
1868
1869/*
1870 * Pin a set of guest PFNs and return their associated host PFNs for local
1871 * domain only.
1872 * @dev [in]     : device
1873 * @user_pfn [in]: array of user/guest PFNs to be pinned.
1874 * @npage [in]   : count of elements in user_pfn array.  This count should not
1875 *		   be greater VFIO_PIN_PAGES_MAX_ENTRIES.
1876 * @prot [in]    : protection flags
1877 * @phys_pfn[out]: array of host PFNs
1878 * Return error or number of pages pinned.
1879 */
1880int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
1881		   int prot, unsigned long *phys_pfn)
1882{
1883	struct vfio_container *container;
1884	struct vfio_group *group;
1885	struct vfio_iommu_driver *driver;
1886	int ret;
1887
1888	if (!dev || !user_pfn || !phys_pfn || !npage)
1889		return -EINVAL;
1890
1891	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1892		return -E2BIG;
1893
1894	group = vfio_group_get_from_dev(dev);
1895	if (!group)
1896		return -ENODEV;
1897
1898	ret = vfio_group_add_container_user(group);
1899	if (ret)
1900		goto err_pin_pages;
1901
1902	container = group->container;
1903	driver = container->iommu_driver;
1904	if (likely(driver && driver->ops->pin_pages))
1905		ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
1906					     npage, prot, phys_pfn);
1907	else
1908		ret = -ENOTTY;
1909
1910	vfio_group_try_dissolve_container(group);
1911
1912err_pin_pages:
1913	vfio_group_put(group);
1914	return ret;
1915}
1916EXPORT_SYMBOL(vfio_pin_pages);
1917
1918/*
1919 * Unpin set of host PFNs for local domain only.
1920 * @dev [in]     : device
1921 * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest
1922 *		   PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1923 * @npage [in]   : count of elements in user_pfn array.  This count should not
1924 *                 be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1925 * Return error or number of pages unpinned.
1926 */
1927int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
1928{
1929	struct vfio_container *container;
1930	struct vfio_group *group;
1931	struct vfio_iommu_driver *driver;
1932	int ret;
1933
1934	if (!dev || !user_pfn || !npage)
1935		return -EINVAL;
1936
1937	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1938		return -E2BIG;
1939
1940	group = vfio_group_get_from_dev(dev);
1941	if (!group)
1942		return -ENODEV;
1943
1944	ret = vfio_group_add_container_user(group);
1945	if (ret)
1946		goto err_unpin_pages;
1947
1948	container = group->container;
1949	driver = container->iommu_driver;
1950	if (likely(driver && driver->ops->unpin_pages))
1951		ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
1952					       npage);
1953	else
1954		ret = -ENOTTY;
1955
1956	vfio_group_try_dissolve_container(group);
1957
1958err_unpin_pages:
1959	vfio_group_put(group);
1960	return ret;
1961}
1962EXPORT_SYMBOL(vfio_unpin_pages);
1963
1964static int vfio_register_iommu_notifier(struct vfio_group *group,
1965					unsigned long *events,
1966					struct notifier_block *nb)
1967{
1968	struct vfio_container *container;
1969	struct vfio_iommu_driver *driver;
1970	int ret;
1971
1972	ret = vfio_group_add_container_user(group);
1973	if (ret)
1974		return -EINVAL;
1975
1976	container = group->container;
1977	driver = container->iommu_driver;
1978	if (likely(driver && driver->ops->register_notifier))
1979		ret = driver->ops->register_notifier(container->iommu_data,
1980						     events, nb);
1981	else
1982		ret = -ENOTTY;
1983
1984	vfio_group_try_dissolve_container(group);
1985
1986	return ret;
1987}
1988
1989static int vfio_unregister_iommu_notifier(struct vfio_group *group,
1990					  struct notifier_block *nb)
1991{
1992	struct vfio_container *container;
1993	struct vfio_iommu_driver *driver;
1994	int ret;
1995
1996	ret = vfio_group_add_container_user(group);
1997	if (ret)
1998		return -EINVAL;
1999
2000	container = group->container;
2001	driver = container->iommu_driver;
2002	if (likely(driver && driver->ops->unregister_notifier))
2003		ret = driver->ops->unregister_notifier(container->iommu_data,
2004						       nb);
2005	else
2006		ret = -ENOTTY;
2007
2008	vfio_group_try_dissolve_container(group);
2009
2010	return ret;
2011}
2012
2013void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
2014{
2015	group->kvm = kvm;
2016	blocking_notifier_call_chain(&group->notifier,
2017				VFIO_GROUP_NOTIFY_SET_KVM, kvm);
2018}
2019EXPORT_SYMBOL_GPL(vfio_group_set_kvm);
2020
2021static int vfio_register_group_notifier(struct vfio_group *group,
2022					unsigned long *events,
2023					struct notifier_block *nb)
2024{
2025	int ret;
2026	bool set_kvm = false;
2027
2028	if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
2029		set_kvm = true;
2030
2031	/* clear known events */
2032	*events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
2033
2034	/* refuse to continue if still events remaining */
2035	if (*events)
2036		return -EINVAL;
2037
2038	ret = vfio_group_add_container_user(group);
2039	if (ret)
2040		return -EINVAL;
2041
2042	ret = blocking_notifier_chain_register(&group->notifier, nb);
2043
2044	/*
2045	 * The attaching of kvm and vfio_group might already happen, so
2046	 * here we replay once upon registration.
2047	 */
2048	if (!ret && set_kvm && group->kvm)
2049		blocking_notifier_call_chain(&group->notifier,
2050					VFIO_GROUP_NOTIFY_SET_KVM, group->kvm);
2051
2052	vfio_group_try_dissolve_container(group);
2053
2054	return ret;
2055}
2056
2057static int vfio_unregister_group_notifier(struct vfio_group *group,
2058					 struct notifier_block *nb)
2059{
2060	int ret;
2061
2062	ret = vfio_group_add_container_user(group);
2063	if (ret)
2064		return -EINVAL;
2065
2066	ret = blocking_notifier_chain_unregister(&group->notifier, nb);
2067
2068	vfio_group_try_dissolve_container(group);
2069
2070	return ret;
2071}
2072
2073int vfio_register_notifier(struct device *dev, enum vfio_notify_type type,
2074			   unsigned long *events, struct notifier_block *nb)
2075{
2076	struct vfio_group *group;
2077	int ret;
2078
2079	if (!dev || !nb || !events || (*events == 0))
2080		return -EINVAL;
2081
2082	group = vfio_group_get_from_dev(dev);
2083	if (!group)
2084		return -ENODEV;
2085
2086	switch (type) {
2087	case VFIO_IOMMU_NOTIFY:
2088		ret = vfio_register_iommu_notifier(group, events, nb);
2089		break;
2090	case VFIO_GROUP_NOTIFY:
2091		ret = vfio_register_group_notifier(group, events, nb);
2092		break;
2093	default:
2094		ret = -EINVAL;
2095	}
2096
2097	vfio_group_put(group);
2098	return ret;
2099}
2100EXPORT_SYMBOL(vfio_register_notifier);
2101
2102int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type,
2103			     struct notifier_block *nb)
2104{
2105	struct vfio_group *group;
2106	int ret;
2107
2108	if (!dev || !nb)
2109		return -EINVAL;
2110
2111	group = vfio_group_get_from_dev(dev);
2112	if (!group)
2113		return -ENODEV;
2114
2115	switch (type) {
2116	case VFIO_IOMMU_NOTIFY:
2117		ret = vfio_unregister_iommu_notifier(group, nb);
2118		break;
2119	case VFIO_GROUP_NOTIFY:
2120		ret = vfio_unregister_group_notifier(group, nb);
2121		break;
2122	default:
2123		ret = -EINVAL;
2124	}
2125
2126	vfio_group_put(group);
2127	return ret;
2128}
2129EXPORT_SYMBOL(vfio_unregister_notifier);
2130
2131/**
2132 * Module/class support
2133 */
2134static char *vfio_devnode(struct device *dev, umode_t *mode)
2135{
2136	return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
2137}
2138
2139static struct miscdevice vfio_dev = {
2140	.minor = VFIO_MINOR,
2141	.name = "vfio",
2142	.fops = &vfio_fops,
2143	.nodename = "vfio/vfio",
2144	.mode = S_IRUGO | S_IWUGO,
2145};
2146
2147static int __init vfio_init(void)
2148{
2149	int ret;
2150
2151	idr_init(&vfio.group_idr);
2152	mutex_init(&vfio.group_lock);
2153	mutex_init(&vfio.iommu_drivers_lock);
2154	INIT_LIST_HEAD(&vfio.group_list);
2155	INIT_LIST_HEAD(&vfio.iommu_drivers_list);
2156	init_waitqueue_head(&vfio.release_q);
2157
2158	ret = misc_register(&vfio_dev);
2159	if (ret) {
2160		pr_err("vfio: misc device register failed\n");
2161		return ret;
2162	}
2163
2164	/* /dev/vfio/$GROUP */
2165	vfio.class = class_create(THIS_MODULE, "vfio");
2166	if (IS_ERR(vfio.class)) {
2167		ret = PTR_ERR(vfio.class);
2168		goto err_class;
2169	}
2170
2171	vfio.class->devnode = vfio_devnode;
2172
2173	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
2174	if (ret)
2175		goto err_alloc_chrdev;
2176
2177	cdev_init(&vfio.group_cdev, &vfio_group_fops);
2178	ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK + 1);
2179	if (ret)
2180		goto err_cdev_add;
2181
2182	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
2183
2184#ifdef CONFIG_VFIO_NOIOMMU
2185	vfio_register_iommu_driver(&vfio_noiommu_ops);
2186#endif
2187	return 0;
2188
2189err_cdev_add:
2190	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
2191err_alloc_chrdev:
2192	class_destroy(vfio.class);
2193	vfio.class = NULL;
2194err_class:
2195	misc_deregister(&vfio_dev);
2196	return ret;
2197}
2198
2199static void __exit vfio_cleanup(void)
2200{
2201	WARN_ON(!list_empty(&vfio.group_list));
2202
2203#ifdef CONFIG_VFIO_NOIOMMU
2204	vfio_unregister_iommu_driver(&vfio_noiommu_ops);
2205#endif
2206	idr_destroy(&vfio.group_idr);
2207	cdev_del(&vfio.group_cdev);
2208	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
2209	class_destroy(vfio.class);
2210	vfio.class = NULL;
2211	misc_deregister(&vfio_dev);
2212}
2213
2214module_init(vfio_init);
2215module_exit(vfio_cleanup);
2216
2217MODULE_VERSION(DRIVER_VERSION);
2218MODULE_LICENSE("GPL v2");
2219MODULE_AUTHOR(DRIVER_AUTHOR);
2220MODULE_DESCRIPTION(DRIVER_DESC);
2221MODULE_ALIAS_MISCDEV(VFIO_MINOR);
2222MODULE_ALIAS("devname:vfio/vfio");
2223MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
Configure Feed

Configure Feed