Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vfio: Move vfio group specific code into group.c

This prepares for compiling out vfio group after vfio device cdev is
added. No vfio_group decode code should be in vfio_main.c, and neither
device->group reference should be in vfio_main.c.

No functional change is intended.

Link: https://lore.kernel.org/r/20221201145535.589687-11-yi.l.liu@intel.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Yu He <yu.he@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

authored by

Yi Liu and committed by
Jason Gunthorpe
9eefba80 8da7a0e7

+907 -870
+1
drivers/vfio/Makefile
··· 4 4 obj-$(CONFIG_VFIO) += vfio.o 5 5 6 6 vfio-y += vfio_main.o \ 7 + group.o \ 7 8 iova_bitmap.o 8 9 vfio-$(CONFIG_IOMMUFD) += iommufd.o 9 10 vfio-$(CONFIG_VFIO_CONTAINER) += container.o
+877
drivers/vfio/group.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * VFIO core 4 + * 5 + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 6 + * Author: Alex Williamson <alex.williamson@redhat.com> 7 + * 8 + * Derived from original vfio: 9 + * Copyright 2010 Cisco Systems, Inc. All rights reserved. 10 + * Author: Tom Lyon, pugs@cisco.com 11 + */ 12 + 13 + #include <linux/vfio.h> 14 + #include <linux/iommufd.h> 15 + #include <linux/anon_inodes.h> 16 + #include "vfio.h" 17 + 18 + static struct vfio { 19 + struct class *class; 20 + struct list_head group_list; 21 + struct mutex group_lock; /* locks group_list */ 22 + struct ida group_ida; 23 + dev_t group_devt; 24 + } vfio; 25 + 26 + static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, 27 + char *buf) 28 + { 29 + struct vfio_device *it, *device = ERR_PTR(-ENODEV); 30 + 31 + mutex_lock(&group->device_lock); 32 + list_for_each_entry(it, &group->device_list, group_next) { 33 + int ret; 34 + 35 + if (it->ops->match) { 36 + ret = it->ops->match(it, buf); 37 + if (ret < 0) { 38 + device = ERR_PTR(ret); 39 + break; 40 + } 41 + } else { 42 + ret = !strcmp(dev_name(it->dev), buf); 43 + } 44 + 45 + if (ret && vfio_device_try_get_registration(it)) { 46 + device = it; 47 + break; 48 + } 49 + } 50 + mutex_unlock(&group->device_lock); 51 + 52 + return device; 53 + } 54 + 55 + /* 56 + * VFIO Group fd, /dev/vfio/$GROUP 57 + */ 58 + static bool vfio_group_has_iommu(struct vfio_group *group) 59 + { 60 + lockdep_assert_held(&group->group_lock); 61 + /* 62 + * There can only be users if there is a container, and if there is a 63 + * container there must be users. 64 + */ 65 + WARN_ON(!group->container != !group->container_users); 66 + 67 + return group->container || group->iommufd; 68 + } 69 + 70 + /* 71 + * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or 72 + * if there was no container to unset. Since the ioctl is called on 73 + * the group, we know that still exists, therefore the only valid 74 + * transition here is 1->0. 75 + */ 76 + static int vfio_group_ioctl_unset_container(struct vfio_group *group) 77 + { 78 + int ret = 0; 79 + 80 + mutex_lock(&group->group_lock); 81 + if (!vfio_group_has_iommu(group)) { 82 + ret = -EINVAL; 83 + goto out_unlock; 84 + } 85 + if (group->container) { 86 + if (group->container_users != 1) { 87 + ret = -EBUSY; 88 + goto out_unlock; 89 + } 90 + vfio_group_detach_container(group); 91 + } 92 + if (group->iommufd) { 93 + iommufd_ctx_put(group->iommufd); 94 + group->iommufd = NULL; 95 + } 96 + 97 + out_unlock: 98 + mutex_unlock(&group->group_lock); 99 + return ret; 100 + } 101 + 102 + static int vfio_group_ioctl_set_container(struct vfio_group *group, 103 + int __user *arg) 104 + { 105 + struct vfio_container *container; 106 + struct iommufd_ctx *iommufd; 107 + struct fd f; 108 + int ret; 109 + int fd; 110 + 111 + if (get_user(fd, arg)) 112 + return -EFAULT; 113 + 114 + f = fdget(fd); 115 + if (!f.file) 116 + return -EBADF; 117 + 118 + mutex_lock(&group->group_lock); 119 + if (vfio_group_has_iommu(group)) { 120 + ret = -EINVAL; 121 + goto out_unlock; 122 + } 123 + if (!group->iommu_group) { 124 + ret = -ENODEV; 125 + goto out_unlock; 126 + } 127 + 128 + container = vfio_container_from_file(f.file); 129 + if (container) { 130 + ret = vfio_container_attach_group(container, group); 131 + goto out_unlock; 132 + } 133 + 134 + iommufd = iommufd_ctx_from_file(f.file); 135 + if (!IS_ERR(iommufd)) { 136 + u32 ioas_id; 137 + 138 + ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); 139 + if (ret) { 140 + iommufd_ctx_put(group->iommufd); 141 + goto out_unlock; 142 + } 143 + 144 + group->iommufd = iommufd; 145 + goto out_unlock; 146 + } 147 + 148 + /* The FD passed is not recognized. */ 149 + ret = -EBADFD; 150 + 151 + out_unlock: 152 + mutex_unlock(&group->group_lock); 153 + fdput(f); 154 + return ret; 155 + } 156 + 157 + static int vfio_device_group_open(struct vfio_device *device) 158 + { 159 + int ret; 160 + 161 + mutex_lock(&device->group->group_lock); 162 + if (!vfio_group_has_iommu(device->group)) { 163 + ret = -EINVAL; 164 + goto out_unlock; 165 + } 166 + 167 + /* 168 + * Here we pass the KVM pointer with the group under the lock. If the 169 + * device driver will use it, it must obtain a reference and release it 170 + * during close_device. 171 + */ 172 + ret = vfio_device_open(device, device->group->iommufd, 173 + device->group->kvm); 174 + 175 + out_unlock: 176 + mutex_unlock(&device->group->group_lock); 177 + return ret; 178 + } 179 + 180 + void vfio_device_group_close(struct vfio_device *device) 181 + { 182 + mutex_lock(&device->group->group_lock); 183 + vfio_device_close(device, device->group->iommufd); 184 + mutex_unlock(&device->group->group_lock); 185 + } 186 + 187 + static struct file *vfio_device_open_file(struct vfio_device *device) 188 + { 189 + struct file *filep; 190 + int ret; 191 + 192 + ret = vfio_device_group_open(device); 193 + if (ret) 194 + goto err_out; 195 + 196 + /* 197 + * We can't use anon_inode_getfd() because we need to modify 198 + * the f_mode flags directly to allow more than just ioctls 199 + */ 200 + filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 201 + device, O_RDWR); 202 + if (IS_ERR(filep)) { 203 + ret = PTR_ERR(filep); 204 + goto err_close_device; 205 + } 206 + 207 + /* 208 + * TODO: add an anon_inode interface to do this. 209 + * Appears to be missing by lack of need rather than 210 + * explicitly prevented. Now there's need. 211 + */ 212 + filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); 213 + 214 + if (device->group->type == VFIO_NO_IOMMU) 215 + dev_warn(device->dev, "vfio-noiommu device opened by user " 216 + "(%s:%d)\n", current->comm, task_pid_nr(current)); 217 + /* 218 + * On success the ref of device is moved to the file and 219 + * put in vfio_device_fops_release() 220 + */ 221 + return filep; 222 + 223 + err_close_device: 224 + vfio_device_group_close(device); 225 + err_out: 226 + return ERR_PTR(ret); 227 + } 228 + 229 + static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, 230 + char __user *arg) 231 + { 232 + struct vfio_device *device; 233 + struct file *filep; 234 + char *buf; 235 + int fdno; 236 + int ret; 237 + 238 + buf = strndup_user(arg, PAGE_SIZE); 239 + if (IS_ERR(buf)) 240 + return PTR_ERR(buf); 241 + 242 + device = vfio_device_get_from_name(group, buf); 243 + kfree(buf); 244 + if (IS_ERR(device)) 245 + return PTR_ERR(device); 246 + 247 + fdno = get_unused_fd_flags(O_CLOEXEC); 248 + if (fdno < 0) { 249 + ret = fdno; 250 + goto err_put_device; 251 + } 252 + 253 + filep = vfio_device_open_file(device); 254 + if (IS_ERR(filep)) { 255 + ret = PTR_ERR(filep); 256 + goto err_put_fdno; 257 + } 258 + 259 + fd_install(fdno, filep); 260 + return fdno; 261 + 262 + err_put_fdno: 263 + put_unused_fd(fdno); 264 + err_put_device: 265 + vfio_device_put_registration(device); 266 + return ret; 267 + } 268 + 269 + static int vfio_group_ioctl_get_status(struct vfio_group *group, 270 + struct vfio_group_status __user *arg) 271 + { 272 + unsigned long minsz = offsetofend(struct vfio_group_status, flags); 273 + struct vfio_group_status status; 274 + 275 + if (copy_from_user(&status, arg, minsz)) 276 + return -EFAULT; 277 + 278 + if (status.argsz < minsz) 279 + return -EINVAL; 280 + 281 + status.flags = 0; 282 + 283 + mutex_lock(&group->group_lock); 284 + if (!group->iommu_group) { 285 + mutex_unlock(&group->group_lock); 286 + return -ENODEV; 287 + } 288 + 289 + /* 290 + * With the container FD the iommu_group_claim_dma_owner() is done 291 + * during SET_CONTAINER but for IOMMFD this is done during 292 + * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd 293 + * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due 294 + * to viability. 295 + */ 296 + if (vfio_group_has_iommu(group)) 297 + status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | 298 + VFIO_GROUP_FLAGS_VIABLE; 299 + else if (!iommu_group_dma_owner_claimed(group->iommu_group)) 300 + status.flags |= VFIO_GROUP_FLAGS_VIABLE; 301 + mutex_unlock(&group->group_lock); 302 + 303 + if (copy_to_user(arg, &status, minsz)) 304 + return -EFAULT; 305 + return 0; 306 + } 307 + 308 + static long vfio_group_fops_unl_ioctl(struct file *filep, 309 + unsigned int cmd, unsigned long arg) 310 + { 311 + struct vfio_group *group = filep->private_data; 312 + void __user *uarg = (void __user *)arg; 313 + 314 + switch (cmd) { 315 + case VFIO_GROUP_GET_DEVICE_FD: 316 + return vfio_group_ioctl_get_device_fd(group, uarg); 317 + case VFIO_GROUP_GET_STATUS: 318 + return vfio_group_ioctl_get_status(group, uarg); 319 + case VFIO_GROUP_SET_CONTAINER: 320 + return vfio_group_ioctl_set_container(group, uarg); 321 + case VFIO_GROUP_UNSET_CONTAINER: 322 + return vfio_group_ioctl_unset_container(group); 323 + default: 324 + return -ENOTTY; 325 + } 326 + } 327 + 328 + static int vfio_group_fops_open(struct inode *inode, struct file *filep) 329 + { 330 + struct vfio_group *group = 331 + container_of(inode->i_cdev, struct vfio_group, cdev); 332 + int ret; 333 + 334 + mutex_lock(&group->group_lock); 335 + 336 + /* 337 + * drivers can be zero if this races with vfio_device_remove_group(), it 338 + * will be stable at 0 under the group rwsem 339 + */ 340 + if (refcount_read(&group->drivers) == 0) { 341 + ret = -ENODEV; 342 + goto out_unlock; 343 + } 344 + 345 + if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { 346 + ret = -EPERM; 347 + goto out_unlock; 348 + } 349 + 350 + /* 351 + * Do we need multiple instances of the group open? Seems not. 352 + */ 353 + if (group->opened_file) { 354 + ret = -EBUSY; 355 + goto out_unlock; 356 + } 357 + group->opened_file = filep; 358 + filep->private_data = group; 359 + ret = 0; 360 + out_unlock: 361 + mutex_unlock(&group->group_lock); 362 + return ret; 363 + } 364 + 365 + static int vfio_group_fops_release(struct inode *inode, struct file *filep) 366 + { 367 + struct vfio_group *group = filep->private_data; 368 + 369 + filep->private_data = NULL; 370 + 371 + mutex_lock(&group->group_lock); 372 + /* 373 + * Device FDs hold a group file reference, therefore the group release 374 + * is only called when there are no open devices. 375 + */ 376 + WARN_ON(group->notifier.head); 377 + if (group->container) 378 + vfio_group_detach_container(group); 379 + if (group->iommufd) { 380 + iommufd_ctx_put(group->iommufd); 381 + group->iommufd = NULL; 382 + } 383 + group->opened_file = NULL; 384 + mutex_unlock(&group->group_lock); 385 + return 0; 386 + } 387 + 388 + static const struct file_operations vfio_group_fops = { 389 + .owner = THIS_MODULE, 390 + .unlocked_ioctl = vfio_group_fops_unl_ioctl, 391 + .compat_ioctl = compat_ptr_ioctl, 392 + .open = vfio_group_fops_open, 393 + .release = vfio_group_fops_release, 394 + }; 395 + 396 + /* 397 + * Group objects - create, release, get, put, search 398 + */ 399 + static struct vfio_group * 400 + vfio_group_find_from_iommu(struct iommu_group *iommu_group) 401 + { 402 + struct vfio_group *group; 403 + 404 + lockdep_assert_held(&vfio.group_lock); 405 + 406 + /* 407 + * group->iommu_group from the vfio.group_list cannot be NULL 408 + * under the vfio.group_lock. 409 + */ 410 + list_for_each_entry(group, &vfio.group_list, vfio_next) { 411 + if (group->iommu_group == iommu_group) 412 + return group; 413 + } 414 + return NULL; 415 + } 416 + 417 + static void vfio_group_release(struct device *dev) 418 + { 419 + struct vfio_group *group = container_of(dev, struct vfio_group, dev); 420 + 421 + mutex_destroy(&group->device_lock); 422 + mutex_destroy(&group->group_lock); 423 + WARN_ON(group->iommu_group); 424 + ida_free(&vfio.group_ida, MINOR(group->dev.devt)); 425 + kfree(group); 426 + } 427 + 428 + static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, 429 + enum vfio_group_type type) 430 + { 431 + struct vfio_group *group; 432 + int minor; 433 + 434 + group = kzalloc(sizeof(*group), GFP_KERNEL); 435 + if (!group) 436 + return ERR_PTR(-ENOMEM); 437 + 438 + minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); 439 + if (minor < 0) { 440 + kfree(group); 441 + return ERR_PTR(minor); 442 + } 443 + 444 + device_initialize(&group->dev); 445 + group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); 446 + group->dev.class = vfio.class; 447 + group->dev.release = vfio_group_release; 448 + cdev_init(&group->cdev, &vfio_group_fops); 449 + group->cdev.owner = THIS_MODULE; 450 + 451 + refcount_set(&group->drivers, 1); 452 + mutex_init(&group->group_lock); 453 + INIT_LIST_HEAD(&group->device_list); 454 + mutex_init(&group->device_lock); 455 + group->iommu_group = iommu_group; 456 + /* put in vfio_group_release() */ 457 + iommu_group_ref_get(iommu_group); 458 + group->type = type; 459 + BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); 460 + 461 + return group; 462 + } 463 + 464 + static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, 465 + enum vfio_group_type type) 466 + { 467 + struct vfio_group *group; 468 + struct vfio_group *ret; 469 + int err; 470 + 471 + lockdep_assert_held(&vfio.group_lock); 472 + 473 + group = vfio_group_alloc(iommu_group, type); 474 + if (IS_ERR(group)) 475 + return group; 476 + 477 + err = dev_set_name(&group->dev, "%s%d", 478 + group->type == VFIO_NO_IOMMU ? "noiommu-" : "", 479 + iommu_group_id(iommu_group)); 480 + if (err) { 481 + ret = ERR_PTR(err); 482 + goto err_put; 483 + } 484 + 485 + err = cdev_device_add(&group->cdev, &group->dev); 486 + if (err) { 487 + ret = ERR_PTR(err); 488 + goto err_put; 489 + } 490 + 491 + list_add(&group->vfio_next, &vfio.group_list); 492 + 493 + return group; 494 + 495 + err_put: 496 + put_device(&group->dev); 497 + return ret; 498 + } 499 + 500 + static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, 501 + enum vfio_group_type type) 502 + { 503 + struct iommu_group *iommu_group; 504 + struct vfio_group *group; 505 + int ret; 506 + 507 + iommu_group = iommu_group_alloc(); 508 + if (IS_ERR(iommu_group)) 509 + return ERR_CAST(iommu_group); 510 + 511 + ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); 512 + if (ret) 513 + goto out_put_group; 514 + ret = iommu_group_add_device(iommu_group, dev); 515 + if (ret) 516 + goto out_put_group; 517 + 518 + mutex_lock(&vfio.group_lock); 519 + group = vfio_create_group(iommu_group, type); 520 + mutex_unlock(&vfio.group_lock); 521 + if (IS_ERR(group)) { 522 + ret = PTR_ERR(group); 523 + goto out_remove_device; 524 + } 525 + iommu_group_put(iommu_group); 526 + return group; 527 + 528 + out_remove_device: 529 + iommu_group_remove_device(dev); 530 + out_put_group: 531 + iommu_group_put(iommu_group); 532 + return ERR_PTR(ret); 533 + } 534 + 535 + static bool vfio_group_has_device(struct vfio_group *group, struct device *dev) 536 + { 537 + struct vfio_device *device; 538 + 539 + mutex_lock(&group->device_lock); 540 + list_for_each_entry(device, &group->device_list, group_next) { 541 + if (device->dev == dev) { 542 + mutex_unlock(&group->device_lock); 543 + return true; 544 + } 545 + } 546 + mutex_unlock(&group->device_lock); 547 + return false; 548 + } 549 + 550 + static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) 551 + { 552 + struct iommu_group *iommu_group; 553 + struct vfio_group *group; 554 + 555 + iommu_group = iommu_group_get(dev); 556 + if (!iommu_group && vfio_noiommu) { 557 + /* 558 + * With noiommu enabled, create an IOMMU group for devices that 559 + * don't already have one, implying no IOMMU hardware/driver 560 + * exists. Taint the kernel because we're about to give a DMA 561 + * capable device to a user without IOMMU protection. 562 + */ 563 + group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); 564 + if (!IS_ERR(group)) { 565 + add_taint(TAINT_USER, LOCKDEP_STILL_OK); 566 + dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); 567 + } 568 + return group; 569 + } 570 + 571 + if (!iommu_group) 572 + return ERR_PTR(-EINVAL); 573 + 574 + /* 575 + * VFIO always sets IOMMU_CACHE because we offer no way for userspace to 576 + * restore cache coherency. It has to be checked here because it is only 577 + * valid for cases where we are using iommu groups. 578 + */ 579 + if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { 580 + iommu_group_put(iommu_group); 581 + return ERR_PTR(-EINVAL); 582 + } 583 + 584 + mutex_lock(&vfio.group_lock); 585 + group = vfio_group_find_from_iommu(iommu_group); 586 + if (group) { 587 + if (WARN_ON(vfio_group_has_device(group, dev))) 588 + group = ERR_PTR(-EINVAL); 589 + else 590 + refcount_inc(&group->drivers); 591 + } else { 592 + group = vfio_create_group(iommu_group, VFIO_IOMMU); 593 + } 594 + mutex_unlock(&vfio.group_lock); 595 + 596 + /* The vfio_group holds a reference to the iommu_group */ 597 + iommu_group_put(iommu_group); 598 + return group; 599 + } 600 + 601 + int vfio_device_set_group(struct vfio_device *device, 602 + enum vfio_group_type type) 603 + { 604 + struct vfio_group *group; 605 + 606 + if (type == VFIO_IOMMU) 607 + group = vfio_group_find_or_alloc(device->dev); 608 + else 609 + group = vfio_noiommu_group_alloc(device->dev, type); 610 + 611 + if (IS_ERR(group)) 612 + return PTR_ERR(group); 613 + 614 + /* Our reference on group is moved to the device */ 615 + device->group = group; 616 + return 0; 617 + } 618 + 619 + void vfio_device_remove_group(struct vfio_device *device) 620 + { 621 + struct vfio_group *group = device->group; 622 + struct iommu_group *iommu_group; 623 + 624 + if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) 625 + iommu_group_remove_device(device->dev); 626 + 627 + /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */ 628 + if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock)) 629 + return; 630 + list_del(&group->vfio_next); 631 + 632 + /* 633 + * We could concurrently probe another driver in the group that might 634 + * race vfio_device_remove_group() with vfio_get_group(), so we have to 635 + * ensure that the sysfs is all cleaned up under lock otherwise the 636 + * cdev_device_add() will fail due to the name aready existing. 637 + */ 638 + cdev_device_del(&group->cdev, &group->dev); 639 + 640 + mutex_lock(&group->group_lock); 641 + /* 642 + * These data structures all have paired operations that can only be 643 + * undone when the caller holds a live reference on the device. Since 644 + * all pairs must be undone these WARN_ON's indicate some caller did not 645 + * properly hold the group reference. 646 + */ 647 + WARN_ON(!list_empty(&group->device_list)); 648 + WARN_ON(group->notifier.head); 649 + 650 + /* 651 + * Revoke all users of group->iommu_group. At this point we know there 652 + * are no devices active because we are unplugging the last one. Setting 653 + * iommu_group to NULL blocks all new users. 654 + */ 655 + if (group->container) 656 + vfio_group_detach_container(group); 657 + iommu_group = group->iommu_group; 658 + group->iommu_group = NULL; 659 + mutex_unlock(&group->group_lock); 660 + mutex_unlock(&vfio.group_lock); 661 + 662 + iommu_group_put(iommu_group); 663 + put_device(&group->dev); 664 + } 665 + 666 + void vfio_device_group_register(struct vfio_device *device) 667 + { 668 + mutex_lock(&device->group->device_lock); 669 + list_add(&device->group_next, &device->group->device_list); 670 + mutex_unlock(&device->group->device_lock); 671 + } 672 + 673 + void vfio_device_group_unregister(struct vfio_device *device) 674 + { 675 + mutex_lock(&device->group->device_lock); 676 + list_del(&device->group_next); 677 + mutex_unlock(&device->group->device_lock); 678 + } 679 + 680 + int vfio_device_group_use_iommu(struct vfio_device *device) 681 + { 682 + struct vfio_group *group = device->group; 683 + int ret = 0; 684 + 685 + lockdep_assert_held(&group->group_lock); 686 + 687 + if (WARN_ON(!group->container)) 688 + return -EINVAL; 689 + 690 + ret = vfio_group_use_container(group); 691 + if (ret) 692 + return ret; 693 + vfio_device_container_register(device); 694 + return 0; 695 + } 696 + 697 + void vfio_device_group_unuse_iommu(struct vfio_device *device) 698 + { 699 + struct vfio_group *group = device->group; 700 + 701 + lockdep_assert_held(&group->group_lock); 702 + 703 + if (WARN_ON(!group->container)) 704 + return; 705 + 706 + vfio_device_container_unregister(device); 707 + vfio_group_unuse_container(group); 708 + } 709 + 710 + bool vfio_device_has_container(struct vfio_device *device) 711 + { 712 + return device->group->container; 713 + } 714 + 715 + /** 716 + * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file 717 + * @file: VFIO group file 718 + * 719 + * The returned iommu_group is valid as long as a ref is held on the file. This 720 + * returns a reference on the group. This function is deprecated, only the SPAPR 721 + * path in kvm should call it. 722 + */ 723 + struct iommu_group *vfio_file_iommu_group(struct file *file) 724 + { 725 + struct vfio_group *group = file->private_data; 726 + struct iommu_group *iommu_group = NULL; 727 + 728 + if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU)) 729 + return NULL; 730 + 731 + if (!vfio_file_is_group(file)) 732 + return NULL; 733 + 734 + mutex_lock(&group->group_lock); 735 + if (group->iommu_group) { 736 + iommu_group = group->iommu_group; 737 + iommu_group_ref_get(iommu_group); 738 + } 739 + mutex_unlock(&group->group_lock); 740 + return iommu_group; 741 + } 742 + EXPORT_SYMBOL_GPL(vfio_file_iommu_group); 743 + 744 + /** 745 + * vfio_file_is_group - True if the file is usable with VFIO aPIS 746 + * @file: VFIO group file 747 + */ 748 + bool vfio_file_is_group(struct file *file) 749 + { 750 + return file->f_op == &vfio_group_fops; 751 + } 752 + EXPORT_SYMBOL_GPL(vfio_file_is_group); 753 + 754 + /** 755 + * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file 756 + * is always CPU cache coherent 757 + * @file: VFIO group file 758 + * 759 + * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop 760 + * bit in DMA transactions. A return of false indicates that the user has 761 + * rights to access additional instructions such as wbinvd on x86. 762 + */ 763 + bool vfio_file_enforced_coherent(struct file *file) 764 + { 765 + struct vfio_group *group = file->private_data; 766 + struct vfio_device *device; 767 + bool ret = true; 768 + 769 + if (!vfio_file_is_group(file)) 770 + return true; 771 + 772 + /* 773 + * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then 774 + * any domain later attached to it will also not support it. If the cap 775 + * is set then the iommu_domain eventually attached to the device/group 776 + * must use a domain with enforce_cache_coherency(). 777 + */ 778 + mutex_lock(&group->device_lock); 779 + list_for_each_entry(device, &group->device_list, group_next) { 780 + if (!device_iommu_capable(device->dev, 781 + IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) { 782 + ret = false; 783 + break; 784 + } 785 + } 786 + mutex_unlock(&group->device_lock); 787 + return ret; 788 + } 789 + EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); 790 + 791 + /** 792 + * vfio_file_set_kvm - Link a kvm with VFIO drivers 793 + * @file: VFIO group file 794 + * @kvm: KVM to link 795 + * 796 + * When a VFIO device is first opened the KVM will be available in 797 + * device->kvm if one was associated with the group. 798 + */ 799 + void vfio_file_set_kvm(struct file *file, struct kvm *kvm) 800 + { 801 + struct vfio_group *group = file->private_data; 802 + 803 + if (!vfio_file_is_group(file)) 804 + return; 805 + 806 + mutex_lock(&group->group_lock); 807 + group->kvm = kvm; 808 + mutex_unlock(&group->group_lock); 809 + } 810 + EXPORT_SYMBOL_GPL(vfio_file_set_kvm); 811 + 812 + /** 813 + * vfio_file_has_dev - True if the VFIO file is a handle for device 814 + * @file: VFIO file to check 815 + * @device: Device that must be part of the file 816 + * 817 + * Returns true if given file has permission to manipulate the given device. 818 + */ 819 + bool vfio_file_has_dev(struct file *file, struct vfio_device *device) 820 + { 821 + struct vfio_group *group = file->private_data; 822 + 823 + if (!vfio_file_is_group(file)) 824 + return false; 825 + 826 + return group == device->group; 827 + } 828 + EXPORT_SYMBOL_GPL(vfio_file_has_dev); 829 + 830 + static char *vfio_devnode(struct device *dev, umode_t *mode) 831 + { 832 + return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); 833 + } 834 + 835 + int __init vfio_group_init(void) 836 + { 837 + int ret; 838 + 839 + ida_init(&vfio.group_ida); 840 + mutex_init(&vfio.group_lock); 841 + INIT_LIST_HEAD(&vfio.group_list); 842 + 843 + ret = vfio_container_init(); 844 + if (ret) 845 + return ret; 846 + 847 + /* /dev/vfio/$GROUP */ 848 + vfio.class = class_create(THIS_MODULE, "vfio"); 849 + if (IS_ERR(vfio.class)) { 850 + ret = PTR_ERR(vfio.class); 851 + goto err_group_class; 852 + } 853 + 854 + vfio.class->devnode = vfio_devnode; 855 + 856 + ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); 857 + if (ret) 858 + goto err_alloc_chrdev; 859 + return 0; 860 + 861 + err_alloc_chrdev: 862 + class_destroy(vfio.class); 863 + vfio.class = NULL; 864 + err_group_class: 865 + vfio_container_cleanup(); 866 + return ret; 867 + } 868 + 869 + void vfio_group_cleanup(void) 870 + { 871 + WARN_ON(!list_empty(&vfio.group_list)); 872 + ida_destroy(&vfio.group_ida); 873 + unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 874 + class_destroy(vfio.class); 875 + vfio.class = NULL; 876 + vfio_container_cleanup(); 877 + }
+22
drivers/vfio/vfio.h
··· 6 6 #ifndef __VFIO_VFIO_H__ 7 7 #define __VFIO_VFIO_H__ 8 8 9 + #include <linux/file.h> 9 10 #include <linux/device.h> 10 11 #include <linux/cdev.h> 11 12 #include <linux/module.h> ··· 15 14 struct iommu_group; 16 15 struct vfio_device; 17 16 struct vfio_container; 17 + 18 + void vfio_device_put_registration(struct vfio_device *device); 19 + bool vfio_device_try_get_registration(struct vfio_device *device); 20 + int vfio_device_open(struct vfio_device *device, 21 + struct iommufd_ctx *iommufd, struct kvm *kvm); 22 + void vfio_device_close(struct vfio_device *device, 23 + struct iommufd_ctx *iommufd); 24 + 25 + extern const struct file_operations vfio_device_fops; 18 26 19 27 enum vfio_group_type { 20 28 /* ··· 75 65 struct blocking_notifier_head notifier; 76 66 struct iommufd_ctx *iommufd; 77 67 }; 68 + 69 + int vfio_device_set_group(struct vfio_device *device, 70 + enum vfio_group_type type); 71 + void vfio_device_remove_group(struct vfio_device *device); 72 + void vfio_device_group_register(struct vfio_device *device); 73 + void vfio_device_group_unregister(struct vfio_device *device); 74 + int vfio_device_group_use_iommu(struct vfio_device *device); 75 + void vfio_device_group_unuse_iommu(struct vfio_device *device); 76 + void vfio_device_group_close(struct vfio_device *device); 77 + bool vfio_device_has_container(struct vfio_device *device); 78 + int __init vfio_group_init(void); 79 + void vfio_group_cleanup(void); 78 80 79 81 #if IS_ENABLED(CONFIG_VFIO_CONTAINER) 80 82 /* events for the backend driver notify callback */
+7 -870
drivers/vfio/vfio_main.c
··· 13 13 #include <linux/cdev.h> 14 14 #include <linux/compat.h> 15 15 #include <linux/device.h> 16 - #include <linux/file.h> 17 - #include <linux/anon_inodes.h> 18 16 #include <linux/fs.h> 19 17 #include <linux/idr.h> 20 18 #include <linux/iommu.h> ··· 41 43 #define DRIVER_DESC "VFIO - User Level meta-driver" 42 44 43 45 static struct vfio { 44 - struct class *class; 45 - struct list_head group_list; 46 - struct mutex group_lock; /* locks group_list */ 47 - struct ida group_ida; 48 - dev_t group_devt; 49 46 struct class *device_class; 50 47 struct ida device_ida; 51 48 } vfio; 52 49 53 50 static DEFINE_XARRAY(vfio_device_set_xa); 54 - static const struct file_operations vfio_group_fops; 55 51 56 52 int vfio_assign_device_set(struct vfio_device *device, void *set_id) 57 53 { ··· 132 140 EXPORT_SYMBOL_GPL(vfio_device_set_open_count); 133 141 134 142 /* 135 - * Group objects - create, release, get, put, search 136 - */ 137 - static struct vfio_group * 138 - vfio_group_find_from_iommu(struct iommu_group *iommu_group) 139 - { 140 - struct vfio_group *group; 141 - 142 - lockdep_assert_held(&vfio.group_lock); 143 - 144 - /* 145 - * group->iommu_group from the vfio.group_list cannot be NULL 146 - * under the vfio.group_lock. 147 - */ 148 - list_for_each_entry(group, &vfio.group_list, vfio_next) { 149 - if (group->iommu_group == iommu_group) 150 - return group; 151 - } 152 - return NULL; 153 - } 154 - 155 - static void vfio_group_release(struct device *dev) 156 - { 157 - struct vfio_group *group = container_of(dev, struct vfio_group, dev); 158 - 159 - mutex_destroy(&group->device_lock); 160 - mutex_destroy(&group->group_lock); 161 - WARN_ON(group->iommu_group); 162 - ida_free(&vfio.group_ida, MINOR(group->dev.devt)); 163 - kfree(group); 164 - } 165 - 166 - static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, 167 - enum vfio_group_type type) 168 - { 169 - struct vfio_group *group; 170 - int minor; 171 - 172 - group = kzalloc(sizeof(*group), GFP_KERNEL); 173 - if (!group) 174 - return ERR_PTR(-ENOMEM); 175 - 176 - minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); 177 - if (minor < 0) { 178 - kfree(group); 179 - return ERR_PTR(minor); 180 - } 181 - 182 - device_initialize(&group->dev); 183 - group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); 184 - group->dev.class = vfio.class; 185 - group->dev.release = vfio_group_release; 186 - cdev_init(&group->cdev, &vfio_group_fops); 187 - group->cdev.owner = THIS_MODULE; 188 - 189 - refcount_set(&group->drivers, 1); 190 - mutex_init(&group->group_lock); 191 - INIT_LIST_HEAD(&group->device_list); 192 - mutex_init(&group->device_lock); 193 - group->iommu_group = iommu_group; 194 - /* put in vfio_group_release() */ 195 - iommu_group_ref_get(iommu_group); 196 - group->type = type; 197 - BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); 198 - 199 - return group; 200 - } 201 - 202 - static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, 203 - enum vfio_group_type type) 204 - { 205 - struct vfio_group *group; 206 - struct vfio_group *ret; 207 - int err; 208 - 209 - lockdep_assert_held(&vfio.group_lock); 210 - 211 - group = vfio_group_alloc(iommu_group, type); 212 - if (IS_ERR(group)) 213 - return group; 214 - 215 - err = dev_set_name(&group->dev, "%s%d", 216 - group->type == VFIO_NO_IOMMU ? "noiommu-" : "", 217 - iommu_group_id(iommu_group)); 218 - if (err) { 219 - ret = ERR_PTR(err); 220 - goto err_put; 221 - } 222 - 223 - err = cdev_device_add(&group->cdev, &group->dev); 224 - if (err) { 225 - ret = ERR_PTR(err); 226 - goto err_put; 227 - } 228 - 229 - list_add(&group->vfio_next, &vfio.group_list); 230 - 231 - return group; 232 - 233 - err_put: 234 - put_device(&group->dev); 235 - return ret; 236 - } 237 - 238 - static void vfio_device_remove_group(struct vfio_device *device) 239 - { 240 - struct vfio_group *group = device->group; 241 - struct iommu_group *iommu_group; 242 - 243 - if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) 244 - iommu_group_remove_device(device->dev); 245 - 246 - /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */ 247 - if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock)) 248 - return; 249 - list_del(&group->vfio_next); 250 - 251 - /* 252 - * We could concurrently probe another driver in the group that might 253 - * race vfio_device_remove_group() with vfio_get_group(), so we have to 254 - * ensure that the sysfs is all cleaned up under lock otherwise the 255 - * cdev_device_add() will fail due to the name aready existing. 256 - */ 257 - cdev_device_del(&group->cdev, &group->dev); 258 - 259 - mutex_lock(&group->group_lock); 260 - /* 261 - * These data structures all have paired operations that can only be 262 - * undone when the caller holds a live reference on the device. Since 263 - * all pairs must be undone these WARN_ON's indicate some caller did not 264 - * properly hold the group reference. 265 - */ 266 - WARN_ON(!list_empty(&group->device_list)); 267 - WARN_ON(group->notifier.head); 268 - 269 - /* 270 - * Revoke all users of group->iommu_group. At this point we know there 271 - * are no devices active because we are unplugging the last one. Setting 272 - * iommu_group to NULL blocks all new users. 273 - */ 274 - if (group->container) 275 - vfio_group_detach_container(group); 276 - iommu_group = group->iommu_group; 277 - group->iommu_group = NULL; 278 - mutex_unlock(&group->group_lock); 279 - mutex_unlock(&vfio.group_lock); 280 - 281 - iommu_group_put(iommu_group); 282 - put_device(&group->dev); 283 - } 284 - 285 - /* 286 143 * Device objects - create, release, get, put, search 287 144 */ 288 145 /* Device reference always implies a group reference */ 289 - static void vfio_device_put_registration(struct vfio_device *device) 146 + void vfio_device_put_registration(struct vfio_device *device) 290 147 { 291 148 if (refcount_dec_and_test(&device->refcount)) 292 149 complete(&device->comp); 293 150 } 294 151 295 - static bool vfio_device_try_get_registration(struct vfio_device *device) 152 + bool vfio_device_try_get_registration(struct vfio_device *device) 296 153 { 297 154 return refcount_inc_not_zero(&device->refcount); 298 155 } ··· 254 413 } 255 414 EXPORT_SYMBOL_GPL(vfio_free_device); 256 415 257 - static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, 258 - enum vfio_group_type type) 259 - { 260 - struct iommu_group *iommu_group; 261 - struct vfio_group *group; 262 - int ret; 263 - 264 - iommu_group = iommu_group_alloc(); 265 - if (IS_ERR(iommu_group)) 266 - return ERR_CAST(iommu_group); 267 - 268 - ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); 269 - if (ret) 270 - goto out_put_group; 271 - ret = iommu_group_add_device(iommu_group, dev); 272 - if (ret) 273 - goto out_put_group; 274 - 275 - mutex_lock(&vfio.group_lock); 276 - group = vfio_create_group(iommu_group, type); 277 - mutex_unlock(&vfio.group_lock); 278 - if (IS_ERR(group)) { 279 - ret = PTR_ERR(group); 280 - goto out_remove_device; 281 - } 282 - iommu_group_put(iommu_group); 283 - return group; 284 - 285 - out_remove_device: 286 - iommu_group_remove_device(dev); 287 - out_put_group: 288 - iommu_group_put(iommu_group); 289 - return ERR_PTR(ret); 290 - } 291 - 292 - static bool vfio_group_has_device(struct vfio_group *group, struct device *dev) 293 - { 294 - struct vfio_device *device; 295 - 296 - mutex_lock(&group->device_lock); 297 - list_for_each_entry(device, &group->device_list, group_next) { 298 - if (device->dev == dev) { 299 - mutex_unlock(&group->device_lock); 300 - return true; 301 - } 302 - } 303 - mutex_unlock(&group->device_lock); 304 - return false; 305 - } 306 - 307 - static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) 308 - { 309 - struct iommu_group *iommu_group; 310 - struct vfio_group *group; 311 - 312 - iommu_group = iommu_group_get(dev); 313 - if (!iommu_group && vfio_noiommu) { 314 - /* 315 - * With noiommu enabled, create an IOMMU group for devices that 316 - * don't already have one, implying no IOMMU hardware/driver 317 - * exists. Taint the kernel because we're about to give a DMA 318 - * capable device to a user without IOMMU protection. 319 - */ 320 - group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); 321 - if (!IS_ERR(group)) { 322 - add_taint(TAINT_USER, LOCKDEP_STILL_OK); 323 - dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); 324 - } 325 - return group; 326 - } 327 - 328 - if (!iommu_group) 329 - return ERR_PTR(-EINVAL); 330 - 331 - /* 332 - * VFIO always sets IOMMU_CACHE because we offer no way for userspace to 333 - * restore cache coherency. It has to be checked here because it is only 334 - * valid for cases where we are using iommu groups. 335 - */ 336 - if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { 337 - iommu_group_put(iommu_group); 338 - return ERR_PTR(-EINVAL); 339 - } 340 - 341 - mutex_lock(&vfio.group_lock); 342 - group = vfio_group_find_from_iommu(iommu_group); 343 - if (group) { 344 - if (WARN_ON(vfio_group_has_device(group, dev))) 345 - group = ERR_PTR(-EINVAL); 346 - else 347 - refcount_inc(&group->drivers); 348 - } else { 349 - group = vfio_create_group(iommu_group, VFIO_IOMMU); 350 - } 351 - mutex_unlock(&vfio.group_lock); 352 - 353 - /* The vfio_group holds a reference to the iommu_group */ 354 - iommu_group_put(iommu_group); 355 - return group; 356 - } 357 - 358 - static void vfio_device_group_register(struct vfio_device *device) 359 - { 360 - mutex_lock(&device->group->device_lock); 361 - list_add(&device->group_next, &device->group->device_list); 362 - mutex_unlock(&device->group->device_lock); 363 - } 364 - 365 - static void vfio_device_group_unregister(struct vfio_device *device) 366 - { 367 - mutex_lock(&device->group->device_lock); 368 - list_del(&device->group_next); 369 - mutex_unlock(&device->group->device_lock); 370 - } 371 - 372 - static int vfio_device_set_group(struct vfio_device *device, 373 - enum vfio_group_type type) 374 - { 375 - struct vfio_group *group; 376 - 377 - if (type == VFIO_IOMMU) 378 - group = vfio_group_find_or_alloc(device->dev); 379 - else 380 - group = vfio_noiommu_group_alloc(device->dev, type); 381 - 382 - if (IS_ERR(group)) 383 - return PTR_ERR(group); 384 - 385 - /* Our reference on group is moved to the device */ 386 - device->group = group; 387 - return 0; 388 - } 389 - 390 416 static int __vfio_register_dev(struct vfio_device *device, 391 417 enum vfio_group_type type) 392 418 { ··· 310 602 } 311 603 EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); 312 604 313 - static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, 314 - char *buf) 315 - { 316 - struct vfio_device *it, *device = ERR_PTR(-ENODEV); 317 - 318 - mutex_lock(&group->device_lock); 319 - list_for_each_entry(it, &group->device_list, group_next) { 320 - int ret; 321 - 322 - if (it->ops->match) { 323 - ret = it->ops->match(it, buf); 324 - if (ret < 0) { 325 - device = ERR_PTR(ret); 326 - break; 327 - } 328 - } else { 329 - ret = !strcmp(dev_name(it->dev), buf); 330 - } 331 - 332 - if (ret && vfio_device_try_get_registration(it)) { 333 - device = it; 334 - break; 335 - } 336 - } 337 - mutex_unlock(&group->device_lock); 338 - 339 - return device; 340 - } 341 - 342 605 /* 343 606 * Decrement the device reference count and wait for the device to be 344 607 * removed. Open file descriptors for the device... */ ··· 352 673 } 353 674 EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); 354 675 355 - /* 356 - * VFIO Group fd, /dev/vfio/$GROUP 357 - */ 358 - static bool vfio_group_has_iommu(struct vfio_group *group) 359 - { 360 - lockdep_assert_held(&group->group_lock); 361 - /* 362 - * There can only be users if there is a container, and if there is a 363 - * container there must be users. 364 - */ 365 - WARN_ON(!group->container != !group->container_users); 366 - 367 - return group->container || group->iommufd; 368 - } 369 - 370 - /* 371 - * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or 372 - * if there was no container to unset. Since the ioctl is called on 373 - * the group, we know that still exists, therefore the only valid 374 - * transition here is 1->0. 375 - */ 376 - static int vfio_group_ioctl_unset_container(struct vfio_group *group) 377 - { 378 - int ret = 0; 379 - 380 - mutex_lock(&group->group_lock); 381 - if (!vfio_group_has_iommu(group)) { 382 - ret = -EINVAL; 383 - goto out_unlock; 384 - } 385 - if (group->container) { 386 - if (group->container_users != 1) { 387 - ret = -EBUSY; 388 - goto out_unlock; 389 - } 390 - vfio_group_detach_container(group); 391 - } 392 - if (group->iommufd) { 393 - iommufd_ctx_put(group->iommufd); 394 - group->iommufd = NULL; 395 - } 396 - 397 - out_unlock: 398 - mutex_unlock(&group->group_lock); 399 - return ret; 400 - } 401 - 402 - static int vfio_group_ioctl_set_container(struct vfio_group *group, 403 - int __user *arg) 404 - { 405 - struct vfio_container *container; 406 - struct iommufd_ctx *iommufd; 407 - struct fd f; 408 - int ret; 409 - int fd; 410 - 411 - if (get_user(fd, arg)) 412 - return -EFAULT; 413 - 414 - f = fdget(fd); 415 - if (!f.file) 416 - return -EBADF; 417 - 418 - mutex_lock(&group->group_lock); 419 - if (vfio_group_has_iommu(group)) { 420 - ret = -EINVAL; 421 - goto out_unlock; 422 - } 423 - if (!group->iommu_group) { 424 - ret = -ENODEV; 425 - goto out_unlock; 426 - } 427 - 428 - container = vfio_container_from_file(f.file); 429 - if (container) { 430 - ret = vfio_container_attach_group(container, group); 431 - goto out_unlock; 432 - } 433 - 434 - iommufd = iommufd_ctx_from_file(f.file); 435 - if (!IS_ERR(iommufd)) { 436 - u32 ioas_id; 437 - 438 - ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); 439 - if (ret) { 440 - iommufd_ctx_put(group->iommufd); 441 - goto out_unlock; 442 - } 443 - 444 - group->iommufd = iommufd; 445 - goto out_unlock; 446 - } 447 - 448 - /* The FD passed is not recognized. */ 449 - ret = -EBADFD; 450 - 451 - out_unlock: 452 - mutex_unlock(&group->group_lock); 453 - fdput(f); 454 - return ret; 455 - } 456 - 457 - static const struct file_operations vfio_device_fops; 458 - 459 676 /* true if the vfio_device has open_device() called but not close_device() */ 460 677 static bool vfio_assert_device_open(struct vfio_device *device) 461 678 { 462 679 return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); 463 - } 464 - 465 - static int vfio_device_group_use_iommu(struct vfio_device *device) 466 - { 467 - struct vfio_group *group = device->group; 468 - int ret = 0; 469 - 470 - lockdep_assert_held(&group->group_lock); 471 - 472 - if (WARN_ON(!group->container)) 473 - return -EINVAL; 474 - 475 - ret = vfio_group_use_container(group); 476 - if (ret) 477 - return ret; 478 - vfio_device_container_register(device); 479 - return 0; 480 - } 481 - 482 - static void vfio_device_group_unuse_iommu(struct vfio_device *device) 483 - { 484 - struct vfio_group *group = device->group; 485 - 486 - lockdep_assert_held(&group->group_lock); 487 - 488 - if (WARN_ON(!group->container)) 489 - return; 490 - 491 - vfio_device_container_unregister(device); 492 - vfio_group_unuse_container(group); 493 680 } 494 681 495 682 static int vfio_device_first_open(struct vfio_device *device, ··· 409 864 module_put(device->dev->driver->owner); 410 865 } 411 866 412 - static int vfio_device_open(struct vfio_device *device, 413 - struct iommufd_ctx *iommufd, struct kvm *kvm) 867 + int vfio_device_open(struct vfio_device *device, 868 + struct iommufd_ctx *iommufd, struct kvm *kvm) 414 869 { 415 870 int ret = 0; 416 871 ··· 426 881 return ret; 427 882 } 428 883 429 - static void vfio_device_close(struct vfio_device *device, 430 - struct iommufd_ctx *iommufd) 884 + void vfio_device_close(struct vfio_device *device, 885 + struct iommufd_ctx *iommufd) 431 886 { 432 887 mutex_lock(&device->dev_set->lock); 433 888 vfio_assert_device_open(device); ··· 436 891 device->open_count--; 437 892 mutex_unlock(&device->dev_set->lock); 438 893 } 439 - 440 - static int vfio_device_group_open(struct vfio_device *device) 441 - { 442 - int ret; 443 - 444 - mutex_lock(&device->group->group_lock); 445 - if (!vfio_group_has_iommu(device->group)) { 446 - ret = -EINVAL; 447 - goto out_unlock; 448 - } 449 - 450 - /* 451 - * Here we pass the KVM pointer with the group under the lock. If the 452 - * device driver will use it, it must obtain a reference and release it 453 - * during close_device. 454 - */ 455 - ret = vfio_device_open(device, device->group->iommufd, 456 - device->group->kvm); 457 - 458 - out_unlock: 459 - mutex_unlock(&device->group->group_lock); 460 - return ret; 461 - } 462 - 463 - static void vfio_device_group_close(struct vfio_device *device) 464 - { 465 - mutex_lock(&device->group->group_lock); 466 - vfio_device_close(device, device->group->iommufd); 467 - mutex_unlock(&device->group->group_lock); 468 - } 469 - 470 - static struct file *vfio_device_open_file(struct vfio_device *device) 471 - { 472 - struct file *filep; 473 - int ret; 474 - 475 - ret = vfio_device_group_open(device); 476 - if (ret) 477 - goto err_out; 478 - 479 - /* 480 - * We can't use anon_inode_getfd() because we need to modify 481 - * the f_mode flags directly to allow more than just ioctls 482 - */ 483 - filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 484 - device, O_RDWR); 485 - if (IS_ERR(filep)) { 486 - ret = PTR_ERR(filep); 487 - goto err_close_device; 488 - } 489 - 490 - /* 491 - * TODO: add an anon_inode interface to do this. 492 - * Appears to be missing by lack of need rather than 493 - * explicitly prevented. Now there's need. 494 - */ 495 - filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); 496 - 497 - if (device->group->type == VFIO_NO_IOMMU) 498 - dev_warn(device->dev, "vfio-noiommu device opened by user " 499 - "(%s:%d)\n", current->comm, task_pid_nr(current)); 500 - /* 501 - * On success the ref of device is moved to the file and 502 - * put in vfio_device_fops_release() 503 - */ 504 - return filep; 505 - 506 - err_close_device: 507 - vfio_device_group_close(device); 508 - err_out: 509 - return ERR_PTR(ret); 510 - } 511 - 512 - static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, 513 - char __user *arg) 514 - { 515 - struct vfio_device *device; 516 - struct file *filep; 517 - char *buf; 518 - int fdno; 519 - int ret; 520 - 521 - buf = strndup_user(arg, PAGE_SIZE); 522 - if (IS_ERR(buf)) 523 - return PTR_ERR(buf); 524 - 525 - device = vfio_device_get_from_name(group, buf); 526 - kfree(buf); 527 - if (IS_ERR(device)) 528 - return PTR_ERR(device); 529 - 530 - fdno = get_unused_fd_flags(O_CLOEXEC); 531 - if (fdno < 0) { 532 - ret = fdno; 533 - goto err_put_device; 534 - } 535 - 536 - filep = vfio_device_open_file(device); 537 - if (IS_ERR(filep)) { 538 - ret = PTR_ERR(filep); 539 - goto err_put_fdno; 540 - } 541 - 542 - fd_install(fdno, filep); 543 - return fdno; 544 - 545 - err_put_fdno: 546 - put_unused_fd(fdno); 547 - err_put_device: 548 - vfio_device_put_registration(device); 549 - return ret; 550 - } 551 - 552 - static int vfio_group_ioctl_get_status(struct vfio_group *group, 553 - struct vfio_group_status __user *arg) 554 - { 555 - unsigned long minsz = offsetofend(struct vfio_group_status, flags); 556 - struct vfio_group_status status; 557 - 558 - if (copy_from_user(&status, arg, minsz)) 559 - return -EFAULT; 560 - 561 - if (status.argsz < minsz) 562 - return -EINVAL; 563 - 564 - status.flags = 0; 565 - 566 - mutex_lock(&group->group_lock); 567 - if (!group->iommu_group) { 568 - mutex_unlock(&group->group_lock); 569 - return -ENODEV; 570 - } 571 - 572 - /* 573 - * With the container FD the iommu_group_claim_dma_owner() is done 574 - * during SET_CONTAINER but for IOMMFD this is done during 575 - * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd 576 - * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due 577 - * to viability. 578 - */ 579 - if (vfio_group_has_iommu(group)) 580 - status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | 581 - VFIO_GROUP_FLAGS_VIABLE; 582 - else if (!iommu_group_dma_owner_claimed(group->iommu_group)) 583 - status.flags |= VFIO_GROUP_FLAGS_VIABLE; 584 - mutex_unlock(&group->group_lock); 585 - 586 - if (copy_to_user(arg, &status, minsz)) 587 - return -EFAULT; 588 - return 0; 589 - } 590 - 591 - static long vfio_group_fops_unl_ioctl(struct file *filep, 592 - unsigned int cmd, unsigned long arg) 593 - { 594 - struct vfio_group *group = filep->private_data; 595 - void __user *uarg = (void __user *)arg; 596 - 597 - switch (cmd) { 598 - case VFIO_GROUP_GET_DEVICE_FD: 599 - return vfio_group_ioctl_get_device_fd(group, uarg); 600 - case VFIO_GROUP_GET_STATUS: 601 - return vfio_group_ioctl_get_status(group, uarg); 602 - case VFIO_GROUP_SET_CONTAINER: 603 - return vfio_group_ioctl_set_container(group, uarg); 604 - case VFIO_GROUP_UNSET_CONTAINER: 605 - return vfio_group_ioctl_unset_container(group); 606 - default: 607 - return -ENOTTY; 608 - } 609 - } 610 - 611 - static int vfio_group_fops_open(struct inode *inode, struct file *filep) 612 - { 613 - struct vfio_group *group = 614 - container_of(inode->i_cdev, struct vfio_group, cdev); 615 - int ret; 616 - 617 - mutex_lock(&group->group_lock); 618 - 619 - /* 620 - * drivers can be zero if this races with vfio_device_remove_group(), it 621 - * will be stable at 0 under the group rwsem 622 - */ 623 - if (refcount_read(&group->drivers) == 0) { 624 - ret = -ENODEV; 625 - goto out_unlock; 626 - } 627 - 628 - if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { 629 - ret = -EPERM; 630 - goto out_unlock; 631 - } 632 - 633 - /* 634 - * Do we need multiple instances of the group open? Seems not. 635 - */ 636 - if (group->opened_file) { 637 - ret = -EBUSY; 638 - goto out_unlock; 639 - } 640 - group->opened_file = filep; 641 - filep->private_data = group; 642 - ret = 0; 643 - out_unlock: 644 - mutex_unlock(&group->group_lock); 645 - return ret; 646 - } 647 - 648 - static int vfio_group_fops_release(struct inode *inode, struct file *filep) 649 - { 650 - struct vfio_group *group = filep->private_data; 651 - 652 - filep->private_data = NULL; 653 - 654 - mutex_lock(&group->group_lock); 655 - /* 656 - * Device FDs hold a group file reference, therefore the group release 657 - * is only called when there are no open devices. 658 - */ 659 - WARN_ON(group->notifier.head); 660 - if (group->container) 661 - vfio_group_detach_container(group); 662 - if (group->iommufd) { 663 - iommufd_ctx_put(group->iommufd); 664 - group->iommufd = NULL; 665 - } 666 - group->opened_file = NULL; 667 - mutex_unlock(&group->group_lock); 668 - return 0; 669 - } 670 - 671 - static const struct file_operations vfio_group_fops = { 672 - .owner = THIS_MODULE, 673 - .unlocked_ioctl = vfio_group_fops_unl_ioctl, 674 - .compat_ioctl = compat_ptr_ioctl, 675 - .open = vfio_group_fops_open, 676 - .release = vfio_group_fops_release, 677 - }; 678 894 679 895 /* 680 896 * Wrapper around pm_runtime_resume_and_get(). ··· 1000 1694 return device->ops->mmap(device, vma); 1001 1695 } 1002 1696 1003 - static const struct file_operations vfio_device_fops = { 1697 + const struct file_operations vfio_device_fops = { 1004 1698 .owner = THIS_MODULE, 1005 1699 .release = vfio_device_fops_release, 1006 1700 .read = vfio_device_fops_read, ··· 1009 1703 .compat_ioctl = compat_ptr_ioctl, 1010 1704 .mmap = vfio_device_fops_mmap, 1011 1705 }; 1012 - 1013 - /** 1014 - * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file 1015 - * @file: VFIO group file 1016 - * 1017 - * The returned iommu_group is valid as long as a ref is held on the file. This 1018 - * returns a reference on the group. This function is deprecated, only the SPAPR 1019 - * path in kvm should call it. 1020 - */ 1021 - struct iommu_group *vfio_file_iommu_group(struct file *file) 1022 - { 1023 - struct vfio_group *group = file->private_data; 1024 - struct iommu_group *iommu_group = NULL; 1025 - 1026 - if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU)) 1027 - return NULL; 1028 - 1029 - if (!vfio_file_is_group(file)) 1030 - return NULL; 1031 - 1032 - mutex_lock(&group->group_lock); 1033 - if (group->iommu_group) { 1034 - iommu_group = group->iommu_group; 1035 - iommu_group_ref_get(iommu_group); 1036 - } 1037 - mutex_unlock(&group->group_lock); 1038 - return iommu_group; 1039 - } 1040 - EXPORT_SYMBOL_GPL(vfio_file_iommu_group); 1041 - 1042 - /** 1043 - * vfio_file_is_group - True if the file is usable with VFIO aPIS 1044 - * @file: VFIO group file 1045 - */ 1046 - bool vfio_file_is_group(struct file *file) 1047 - { 1048 - return file->f_op == &vfio_group_fops; 1049 - } 1050 - EXPORT_SYMBOL_GPL(vfio_file_is_group); 1051 - 1052 - /** 1053 - * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file 1054 - * is always CPU cache coherent 1055 - * @file: VFIO group file 1056 - * 1057 - * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop 1058 - * bit in DMA transactions. A return of false indicates that the user has 1059 - * rights to access additional instructions such as wbinvd on x86. 1060 - */ 1061 - bool vfio_file_enforced_coherent(struct file *file) 1062 - { 1063 - struct vfio_group *group = file->private_data; 1064 - struct vfio_device *device; 1065 - bool ret = true; 1066 - 1067 - if (!vfio_file_is_group(file)) 1068 - return true; 1069 - 1070 - /* 1071 - * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then 1072 - * any domain later attached to it will also not support it. If the cap 1073 - * is set then the iommu_domain eventually attached to the device/group 1074 - * must use a domain with enforce_cache_coherency(). 1075 - */ 1076 - mutex_lock(&group->device_lock); 1077 - list_for_each_entry(device, &group->device_list, group_next) { 1078 - if (!device_iommu_capable(device->dev, 1079 - IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) { 1080 - ret = false; 1081 - break; 1082 - } 1083 - } 1084 - mutex_unlock(&group->device_lock); 1085 - return ret; 1086 - } 1087 - EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); 1088 - 1089 - /** 1090 - * vfio_file_set_kvm - Link a kvm with VFIO drivers 1091 - * @file: VFIO group file 1092 - * @kvm: KVM to link 1093 - * 1094 - * When a VFIO device is first opened the KVM will be available in 1095 - * device->kvm if one was associated with the group. 1096 - */ 1097 - void vfio_file_set_kvm(struct file *file, struct kvm *kvm) 1098 - { 1099 - struct vfio_group *group = file->private_data; 1100 - 1101 - if (!vfio_file_is_group(file)) 1102 - return; 1103 - 1104 - mutex_lock(&group->group_lock); 1105 - group->kvm = kvm; 1106 - mutex_unlock(&group->group_lock); 1107 - } 1108 - EXPORT_SYMBOL_GPL(vfio_file_set_kvm); 1109 - 1110 - /** 1111 - * vfio_file_has_dev - True if the VFIO file is a handle for device 1112 - * @file: VFIO file to check 1113 - * @device: Device that must be part of the file 1114 - * 1115 - * Returns true if given file has permission to manipulate the given device. 1116 - */ 1117 - bool vfio_file_has_dev(struct file *file, struct vfio_device *device) 1118 - { 1119 - struct vfio_group *group = file->private_data; 1120 - 1121 - if (!vfio_file_is_group(file)) 1122 - return false; 1123 - 1124 - return group == device->group; 1125 - } 1126 - EXPORT_SYMBOL_GPL(vfio_file_has_dev); 1127 1706 1128 1707 /* 1129 1708 * Sub-module support ··· 1128 1937 return 0; 1129 1938 } 1130 1939 EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); 1131 - 1132 - static bool vfio_device_has_container(struct vfio_device *device) 1133 - { 1134 - return device->group->container; 1135 - } 1136 1940 1137 1941 /* 1138 1942 * Pin contiguous user pages and return their associated host pages for local ··· 1250 2064 /* 1251 2065 * Module/class support 1252 2066 */ 1253 - static char *vfio_devnode(struct device *dev, umode_t *mode) 1254 - { 1255 - return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); 1256 - } 1257 - 1258 - static int __init vfio_group_init(void) 1259 - { 1260 - int ret; 1261 - 1262 - ida_init(&vfio.group_ida); 1263 - mutex_init(&vfio.group_lock); 1264 - INIT_LIST_HEAD(&vfio.group_list); 1265 - 1266 - ret = vfio_container_init(); 1267 - if (ret) 1268 - return ret; 1269 - 1270 - /* /dev/vfio/$GROUP */ 1271 - vfio.class = class_create(THIS_MODULE, "vfio"); 1272 - if (IS_ERR(vfio.class)) { 1273 - ret = PTR_ERR(vfio.class); 1274 - goto err_group_class; 1275 - } 1276 - 1277 - vfio.class->devnode = vfio_devnode; 1278 - 1279 - ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); 1280 - if (ret) 1281 - goto err_alloc_chrdev; 1282 - return 0; 1283 - 1284 - err_alloc_chrdev: 1285 - class_destroy(vfio.class); 1286 - vfio.class = NULL; 1287 - err_group_class: 1288 - vfio_container_cleanup(); 1289 - return ret; 1290 - } 1291 - 1292 - static void vfio_group_cleanup(void) 1293 - { 1294 - WARN_ON(!list_empty(&vfio.group_list)); 1295 - ida_destroy(&vfio.group_ida); 1296 - unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 1297 - class_destroy(vfio.class); 1298 - vfio.class = NULL; 1299 - vfio_container_cleanup(); 1300 - } 1301 - 1302 2067 static int __init vfio_init(void) 1303 2068 { 1304 2069 int ret;