at v3.8 26 kB view raw
1/* 2 * Kernel-based Virtual Machine - device assignment support 3 * 4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * the COPYING file in the top-level directory. 8 * 9 */ 10 11#include <linux/kvm_host.h> 12#include <linux/kvm.h> 13#include <linux/uaccess.h> 14#include <linux/vmalloc.h> 15#include <linux/errno.h> 16#include <linux/spinlock.h> 17#include <linux/pci.h> 18#include <linux/interrupt.h> 19#include <linux/slab.h> 20#include <linux/namei.h> 21#include <linux/fs.h> 22#include "irq.h" 23 24static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, 25 int assigned_dev_id) 26{ 27 struct list_head *ptr; 28 struct kvm_assigned_dev_kernel *match; 29 30 list_for_each(ptr, head) { 31 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); 32 if (match->assigned_dev_id == assigned_dev_id) 33 return match; 34 } 35 return NULL; 36} 37 38static int find_index_from_host_irq(struct kvm_assigned_dev_kernel 39 *assigned_dev, int irq) 40{ 41 int i, index; 42 struct msix_entry *host_msix_entries; 43 44 host_msix_entries = assigned_dev->host_msix_entries; 45 46 index = -1; 47 for (i = 0; i < assigned_dev->entries_nr; i++) 48 if (irq == host_msix_entries[i].vector) { 49 index = i; 50 break; 51 } 52 if (index < 0) 53 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); 54 55 return index; 56} 57 58static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id) 59{ 60 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 61 int ret; 62 63 spin_lock(&assigned_dev->intx_lock); 64 if (pci_check_and_mask_intx(assigned_dev->dev)) { 65 assigned_dev->host_irq_disabled = true; 66 ret = IRQ_WAKE_THREAD; 67 } else 68 ret = IRQ_NONE; 69 spin_unlock(&assigned_dev->intx_lock); 70 71 return ret; 72} 73 74static void 75kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, 76 int vector) 77{ 78 if (unlikely(assigned_dev->irq_requested_type & 79 KVM_DEV_IRQ_GUEST_INTX)) { 80 spin_lock(&assigned_dev->intx_mask_lock); 81 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) 82 kvm_set_irq(assigned_dev->kvm, 83 assigned_dev->irq_source_id, vector, 1); 84 spin_unlock(&assigned_dev->intx_mask_lock); 85 } else 86 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, 87 vector, 1); 88} 89 90static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) 91{ 92 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 93 94 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { 95 spin_lock_irq(&assigned_dev->intx_lock); 96 disable_irq_nosync(irq); 97 assigned_dev->host_irq_disabled = true; 98 spin_unlock_irq(&assigned_dev->intx_lock); 99 } 100 101 kvm_assigned_dev_raise_guest_irq(assigned_dev, 102 assigned_dev->guest_irq); 103 104 return IRQ_HANDLED; 105} 106 107#ifdef __KVM_HAVE_MSI 108static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) 109{ 110 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 111 int ret = kvm_set_irq_inatomic(assigned_dev->kvm, 112 assigned_dev->irq_source_id, 113 assigned_dev->guest_irq, 1); 114 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; 115} 116 117static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) 118{ 119 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 120 121 kvm_assigned_dev_raise_guest_irq(assigned_dev, 122 assigned_dev->guest_irq); 123 124 return IRQ_HANDLED; 125} 126#endif 127 128#ifdef __KVM_HAVE_MSIX 129static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) 130{ 131 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 132 int index = find_index_from_host_irq(assigned_dev, irq); 133 u32 vector; 134 int ret = 0; 135 136 if (index >= 0) { 137 vector = assigned_dev->guest_msix_entries[index].vector; 138 ret = kvm_set_irq_inatomic(assigned_dev->kvm, 139 assigned_dev->irq_source_id, 140 vector, 1); 141 } 142 143 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; 144} 145 146static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) 147{ 148 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 149 int index = find_index_from_host_irq(assigned_dev, irq); 150 u32 vector; 151 152 if (index >= 0) { 153 vector = assigned_dev->guest_msix_entries[index].vector; 154 kvm_assigned_dev_raise_guest_irq(assigned_dev, vector); 155 } 156 157 return IRQ_HANDLED; 158} 159#endif 160 161/* Ack the irq line for an assigned device */ 162static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) 163{ 164 struct kvm_assigned_dev_kernel *dev = 165 container_of(kian, struct kvm_assigned_dev_kernel, 166 ack_notifier); 167 168 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); 169 170 spin_lock(&dev->intx_mask_lock); 171 172 if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { 173 bool reassert = false; 174 175 spin_lock_irq(&dev->intx_lock); 176 /* 177 * The guest IRQ may be shared so this ack can come from an 178 * IRQ for another guest device. 179 */ 180 if (dev->host_irq_disabled) { 181 if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) 182 enable_irq(dev->host_irq); 183 else if (!pci_check_and_unmask_intx(dev->dev)) 184 reassert = true; 185 dev->host_irq_disabled = reassert; 186 } 187 spin_unlock_irq(&dev->intx_lock); 188 189 if (reassert) 190 kvm_set_irq(dev->kvm, dev->irq_source_id, 191 dev->guest_irq, 1); 192 } 193 194 spin_unlock(&dev->intx_mask_lock); 195} 196 197static void deassign_guest_irq(struct kvm *kvm, 198 struct kvm_assigned_dev_kernel *assigned_dev) 199{ 200 if (assigned_dev->ack_notifier.gsi != -1) 201 kvm_unregister_irq_ack_notifier(kvm, 202 &assigned_dev->ack_notifier); 203 204 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, 205 assigned_dev->guest_irq, 0); 206 207 if (assigned_dev->irq_source_id != -1) 208 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); 209 assigned_dev->irq_source_id = -1; 210 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); 211} 212 213/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ 214static void deassign_host_irq(struct kvm *kvm, 215 struct kvm_assigned_dev_kernel *assigned_dev) 216{ 217 /* 218 * We disable irq here to prevent further events. 219 * 220 * Notice this maybe result in nested disable if the interrupt type is 221 * INTx, but it's OK for we are going to free it. 222 * 223 * If this function is a part of VM destroy, please ensure that till 224 * now, the kvm state is still legal for probably we also have to wait 225 * on a currently running IRQ handler. 226 */ 227 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { 228 int i; 229 for (i = 0; i < assigned_dev->entries_nr; i++) 230 disable_irq(assigned_dev->host_msix_entries[i].vector); 231 232 for (i = 0; i < assigned_dev->entries_nr; i++) 233 free_irq(assigned_dev->host_msix_entries[i].vector, 234 assigned_dev); 235 236 assigned_dev->entries_nr = 0; 237 kfree(assigned_dev->host_msix_entries); 238 kfree(assigned_dev->guest_msix_entries); 239 pci_disable_msix(assigned_dev->dev); 240 } else { 241 /* Deal with MSI and INTx */ 242 if ((assigned_dev->irq_requested_type & 243 KVM_DEV_IRQ_HOST_INTX) && 244 (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { 245 spin_lock_irq(&assigned_dev->intx_lock); 246 pci_intx(assigned_dev->dev, false); 247 spin_unlock_irq(&assigned_dev->intx_lock); 248 synchronize_irq(assigned_dev->host_irq); 249 } else 250 disable_irq(assigned_dev->host_irq); 251 252 free_irq(assigned_dev->host_irq, assigned_dev); 253 254 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) 255 pci_disable_msi(assigned_dev->dev); 256 } 257 258 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); 259} 260 261static int kvm_deassign_irq(struct kvm *kvm, 262 struct kvm_assigned_dev_kernel *assigned_dev, 263 unsigned long irq_requested_type) 264{ 265 unsigned long guest_irq_type, host_irq_type; 266 267 if (!irqchip_in_kernel(kvm)) 268 return -EINVAL; 269 /* no irq assignment to deassign */ 270 if (!assigned_dev->irq_requested_type) 271 return -ENXIO; 272 273 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; 274 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; 275 276 if (host_irq_type) 277 deassign_host_irq(kvm, assigned_dev); 278 if (guest_irq_type) 279 deassign_guest_irq(kvm, assigned_dev); 280 281 return 0; 282} 283 284static void kvm_free_assigned_irq(struct kvm *kvm, 285 struct kvm_assigned_dev_kernel *assigned_dev) 286{ 287 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); 288} 289 290static void kvm_free_assigned_device(struct kvm *kvm, 291 struct kvm_assigned_dev_kernel 292 *assigned_dev) 293{ 294 kvm_free_assigned_irq(kvm, assigned_dev); 295 296 pci_reset_function(assigned_dev->dev); 297 if (pci_load_and_free_saved_state(assigned_dev->dev, 298 &assigned_dev->pci_saved_state)) 299 printk(KERN_INFO "%s: Couldn't reload %s saved state\n", 300 __func__, dev_name(&assigned_dev->dev->dev)); 301 else 302 pci_restore_state(assigned_dev->dev); 303 304 assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; 305 306 pci_release_regions(assigned_dev->dev); 307 pci_disable_device(assigned_dev->dev); 308 pci_dev_put(assigned_dev->dev); 309 310 list_del(&assigned_dev->list); 311 kfree(assigned_dev); 312} 313 314void kvm_free_all_assigned_devices(struct kvm *kvm) 315{ 316 struct list_head *ptr, *ptr2; 317 struct kvm_assigned_dev_kernel *assigned_dev; 318 319 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { 320 assigned_dev = list_entry(ptr, 321 struct kvm_assigned_dev_kernel, 322 list); 323 324 kvm_free_assigned_device(kvm, assigned_dev); 325 } 326} 327 328static int assigned_device_enable_host_intx(struct kvm *kvm, 329 struct kvm_assigned_dev_kernel *dev) 330{ 331 irq_handler_t irq_handler; 332 unsigned long flags; 333 334 dev->host_irq = dev->dev->irq; 335 336 /* 337 * We can only share the IRQ line with other host devices if we are 338 * able to disable the IRQ source at device-level - independently of 339 * the guest driver. Otherwise host devices may suffer from unbounded 340 * IRQ latencies when the guest keeps the line asserted. 341 */ 342 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { 343 irq_handler = kvm_assigned_dev_intx; 344 flags = IRQF_SHARED; 345 } else { 346 irq_handler = NULL; 347 flags = IRQF_ONESHOT; 348 } 349 if (request_threaded_irq(dev->host_irq, irq_handler, 350 kvm_assigned_dev_thread_intx, flags, 351 dev->irq_name, dev)) 352 return -EIO; 353 354 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { 355 spin_lock_irq(&dev->intx_lock); 356 pci_intx(dev->dev, true); 357 spin_unlock_irq(&dev->intx_lock); 358 } 359 return 0; 360} 361 362#ifdef __KVM_HAVE_MSI 363static int assigned_device_enable_host_msi(struct kvm *kvm, 364 struct kvm_assigned_dev_kernel *dev) 365{ 366 int r; 367 368 if (!dev->dev->msi_enabled) { 369 r = pci_enable_msi(dev->dev); 370 if (r) 371 return r; 372 } 373 374 dev->host_irq = dev->dev->irq; 375 if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi, 376 kvm_assigned_dev_thread_msi, 0, 377 dev->irq_name, dev)) { 378 pci_disable_msi(dev->dev); 379 return -EIO; 380 } 381 382 return 0; 383} 384#endif 385 386#ifdef __KVM_HAVE_MSIX 387static int assigned_device_enable_host_msix(struct kvm *kvm, 388 struct kvm_assigned_dev_kernel *dev) 389{ 390 int i, r = -EINVAL; 391 392 /* host_msix_entries and guest_msix_entries should have been 393 * initialized */ 394 if (dev->entries_nr == 0) 395 return r; 396 397 r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); 398 if (r) 399 return r; 400 401 for (i = 0; i < dev->entries_nr; i++) { 402 r = request_threaded_irq(dev->host_msix_entries[i].vector, 403 kvm_assigned_dev_msix, 404 kvm_assigned_dev_thread_msix, 405 0, dev->irq_name, dev); 406 if (r) 407 goto err; 408 } 409 410 return 0; 411err: 412 for (i -= 1; i >= 0; i--) 413 free_irq(dev->host_msix_entries[i].vector, dev); 414 pci_disable_msix(dev->dev); 415 return r; 416} 417 418#endif 419 420static int assigned_device_enable_guest_intx(struct kvm *kvm, 421 struct kvm_assigned_dev_kernel *dev, 422 struct kvm_assigned_irq *irq) 423{ 424 dev->guest_irq = irq->guest_irq; 425 dev->ack_notifier.gsi = irq->guest_irq; 426 return 0; 427} 428 429#ifdef __KVM_HAVE_MSI 430static int assigned_device_enable_guest_msi(struct kvm *kvm, 431 struct kvm_assigned_dev_kernel *dev, 432 struct kvm_assigned_irq *irq) 433{ 434 dev->guest_irq = irq->guest_irq; 435 dev->ack_notifier.gsi = -1; 436 return 0; 437} 438#endif 439 440#ifdef __KVM_HAVE_MSIX 441static int assigned_device_enable_guest_msix(struct kvm *kvm, 442 struct kvm_assigned_dev_kernel *dev, 443 struct kvm_assigned_irq *irq) 444{ 445 dev->guest_irq = irq->guest_irq; 446 dev->ack_notifier.gsi = -1; 447 return 0; 448} 449#endif 450 451static int assign_host_irq(struct kvm *kvm, 452 struct kvm_assigned_dev_kernel *dev, 453 __u32 host_irq_type) 454{ 455 int r = -EEXIST; 456 457 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) 458 return r; 459 460 snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s", 461 pci_name(dev->dev)); 462 463 switch (host_irq_type) { 464 case KVM_DEV_IRQ_HOST_INTX: 465 r = assigned_device_enable_host_intx(kvm, dev); 466 break; 467#ifdef __KVM_HAVE_MSI 468 case KVM_DEV_IRQ_HOST_MSI: 469 r = assigned_device_enable_host_msi(kvm, dev); 470 break; 471#endif 472#ifdef __KVM_HAVE_MSIX 473 case KVM_DEV_IRQ_HOST_MSIX: 474 r = assigned_device_enable_host_msix(kvm, dev); 475 break; 476#endif 477 default: 478 r = -EINVAL; 479 } 480 dev->host_irq_disabled = false; 481 482 if (!r) 483 dev->irq_requested_type |= host_irq_type; 484 485 return r; 486} 487 488static int assign_guest_irq(struct kvm *kvm, 489 struct kvm_assigned_dev_kernel *dev, 490 struct kvm_assigned_irq *irq, 491 unsigned long guest_irq_type) 492{ 493 int id; 494 int r = -EEXIST; 495 496 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) 497 return r; 498 499 id = kvm_request_irq_source_id(kvm); 500 if (id < 0) 501 return id; 502 503 dev->irq_source_id = id; 504 505 switch (guest_irq_type) { 506 case KVM_DEV_IRQ_GUEST_INTX: 507 r = assigned_device_enable_guest_intx(kvm, dev, irq); 508 break; 509#ifdef __KVM_HAVE_MSI 510 case KVM_DEV_IRQ_GUEST_MSI: 511 r = assigned_device_enable_guest_msi(kvm, dev, irq); 512 break; 513#endif 514#ifdef __KVM_HAVE_MSIX 515 case KVM_DEV_IRQ_GUEST_MSIX: 516 r = assigned_device_enable_guest_msix(kvm, dev, irq); 517 break; 518#endif 519 default: 520 r = -EINVAL; 521 } 522 523 if (!r) { 524 dev->irq_requested_type |= guest_irq_type; 525 if (dev->ack_notifier.gsi != -1) 526 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); 527 } else 528 kvm_free_irq_source_id(kvm, dev->irq_source_id); 529 530 return r; 531} 532 533/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ 534static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, 535 struct kvm_assigned_irq *assigned_irq) 536{ 537 int r = -EINVAL; 538 struct kvm_assigned_dev_kernel *match; 539 unsigned long host_irq_type, guest_irq_type; 540 541 if (!irqchip_in_kernel(kvm)) 542 return r; 543 544 mutex_lock(&kvm->lock); 545 r = -ENODEV; 546 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 547 assigned_irq->assigned_dev_id); 548 if (!match) 549 goto out; 550 551 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); 552 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); 553 554 r = -EINVAL; 555 /* can only assign one type at a time */ 556 if (hweight_long(host_irq_type) > 1) 557 goto out; 558 if (hweight_long(guest_irq_type) > 1) 559 goto out; 560 if (host_irq_type == 0 && guest_irq_type == 0) 561 goto out; 562 563 r = 0; 564 if (host_irq_type) 565 r = assign_host_irq(kvm, match, host_irq_type); 566 if (r) 567 goto out; 568 569 if (guest_irq_type) 570 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); 571out: 572 mutex_unlock(&kvm->lock); 573 return r; 574} 575 576static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, 577 struct kvm_assigned_irq 578 *assigned_irq) 579{ 580 int r = -ENODEV; 581 struct kvm_assigned_dev_kernel *match; 582 unsigned long irq_type; 583 584 mutex_lock(&kvm->lock); 585 586 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 587 assigned_irq->assigned_dev_id); 588 if (!match) 589 goto out; 590 591 irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | 592 KVM_DEV_IRQ_GUEST_MASK); 593 r = kvm_deassign_irq(kvm, match, irq_type); 594out: 595 mutex_unlock(&kvm->lock); 596 return r; 597} 598 599/* 600 * We want to test whether the caller has been granted permissions to 601 * use this device. To be able to configure and control the device, 602 * the user needs access to PCI configuration space and BAR resources. 603 * These are accessed through PCI sysfs. PCI config space is often 604 * passed to the process calling this ioctl via file descriptor, so we 605 * can't rely on access to that file. We can check for permissions 606 * on each of the BAR resource files, which is a pretty clear 607 * indicator that the user has been granted access to the device. 608 */ 609static int probe_sysfs_permissions(struct pci_dev *dev) 610{ 611#ifdef CONFIG_SYSFS 612 int i; 613 bool bar_found = false; 614 615 for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { 616 char *kpath, *syspath; 617 struct path path; 618 struct inode *inode; 619 int r; 620 621 if (!pci_resource_len(dev, i)) 622 continue; 623 624 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); 625 if (!kpath) 626 return -ENOMEM; 627 628 /* Per sysfs-rules, sysfs is always at /sys */ 629 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); 630 kfree(kpath); 631 if (!syspath) 632 return -ENOMEM; 633 634 r = kern_path(syspath, LOOKUP_FOLLOW, &path); 635 kfree(syspath); 636 if (r) 637 return r; 638 639 inode = path.dentry->d_inode; 640 641 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); 642 path_put(&path); 643 if (r) 644 return r; 645 646 bar_found = true; 647 } 648 649 /* If no resources, probably something special */ 650 if (!bar_found) 651 return -EPERM; 652 653 return 0; 654#else 655 return -EINVAL; /* No way to control the device without sysfs */ 656#endif 657} 658 659static int kvm_vm_ioctl_assign_device(struct kvm *kvm, 660 struct kvm_assigned_pci_dev *assigned_dev) 661{ 662 int r = 0, idx; 663 struct kvm_assigned_dev_kernel *match; 664 struct pci_dev *dev; 665 666 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) 667 return -EINVAL; 668 669 mutex_lock(&kvm->lock); 670 idx = srcu_read_lock(&kvm->srcu); 671 672 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 673 assigned_dev->assigned_dev_id); 674 if (match) { 675 /* device already assigned */ 676 r = -EEXIST; 677 goto out; 678 } 679 680 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); 681 if (match == NULL) { 682 printk(KERN_INFO "%s: Couldn't allocate memory\n", 683 __func__); 684 r = -ENOMEM; 685 goto out; 686 } 687 dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, 688 assigned_dev->busnr, 689 assigned_dev->devfn); 690 if (!dev) { 691 printk(KERN_INFO "%s: host device not found\n", __func__); 692 r = -EINVAL; 693 goto out_free; 694 } 695 696 /* Don't allow bridges to be assigned */ 697 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) { 698 r = -EPERM; 699 goto out_put; 700 } 701 702 r = probe_sysfs_permissions(dev); 703 if (r) 704 goto out_put; 705 706 if (pci_enable_device(dev)) { 707 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); 708 r = -EBUSY; 709 goto out_put; 710 } 711 r = pci_request_regions(dev, "kvm_assigned_device"); 712 if (r) { 713 printk(KERN_INFO "%s: Could not get access to device regions\n", 714 __func__); 715 goto out_disable; 716 } 717 718 pci_reset_function(dev); 719 pci_save_state(dev); 720 match->pci_saved_state = pci_store_saved_state(dev); 721 if (!match->pci_saved_state) 722 printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", 723 __func__, dev_name(&dev->dev)); 724 725 if (!pci_intx_mask_supported(dev)) 726 assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3; 727 728 match->assigned_dev_id = assigned_dev->assigned_dev_id; 729 match->host_segnr = assigned_dev->segnr; 730 match->host_busnr = assigned_dev->busnr; 731 match->host_devfn = assigned_dev->devfn; 732 match->flags = assigned_dev->flags; 733 match->dev = dev; 734 spin_lock_init(&match->intx_lock); 735 spin_lock_init(&match->intx_mask_lock); 736 match->irq_source_id = -1; 737 match->kvm = kvm; 738 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; 739 740 list_add(&match->list, &kvm->arch.assigned_dev_head); 741 742 if (!kvm->arch.iommu_domain) { 743 r = kvm_iommu_map_guest(kvm); 744 if (r) 745 goto out_list_del; 746 } 747 r = kvm_assign_device(kvm, match); 748 if (r) 749 goto out_list_del; 750 751out: 752 srcu_read_unlock(&kvm->srcu, idx); 753 mutex_unlock(&kvm->lock); 754 return r; 755out_list_del: 756 if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) 757 printk(KERN_INFO "%s: Couldn't reload %s saved state\n", 758 __func__, dev_name(&dev->dev)); 759 list_del(&match->list); 760 pci_release_regions(dev); 761out_disable: 762 pci_disable_device(dev); 763out_put: 764 pci_dev_put(dev); 765out_free: 766 kfree(match); 767 srcu_read_unlock(&kvm->srcu, idx); 768 mutex_unlock(&kvm->lock); 769 return r; 770} 771 772static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, 773 struct kvm_assigned_pci_dev *assigned_dev) 774{ 775 int r = 0; 776 struct kvm_assigned_dev_kernel *match; 777 778 mutex_lock(&kvm->lock); 779 780 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 781 assigned_dev->assigned_dev_id); 782 if (!match) { 783 printk(KERN_INFO "%s: device hasn't been assigned before, " 784 "so cannot be deassigned\n", __func__); 785 r = -EINVAL; 786 goto out; 787 } 788 789 kvm_deassign_device(kvm, match); 790 791 kvm_free_assigned_device(kvm, match); 792 793out: 794 mutex_unlock(&kvm->lock); 795 return r; 796} 797 798 799#ifdef __KVM_HAVE_MSIX 800static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, 801 struct kvm_assigned_msix_nr *entry_nr) 802{ 803 int r = 0; 804 struct kvm_assigned_dev_kernel *adev; 805 806 mutex_lock(&kvm->lock); 807 808 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 809 entry_nr->assigned_dev_id); 810 if (!adev) { 811 r = -EINVAL; 812 goto msix_nr_out; 813 } 814 815 if (adev->entries_nr == 0) { 816 adev->entries_nr = entry_nr->entry_nr; 817 if (adev->entries_nr == 0 || 818 adev->entries_nr > KVM_MAX_MSIX_PER_DEV) { 819 r = -EINVAL; 820 goto msix_nr_out; 821 } 822 823 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * 824 entry_nr->entry_nr, 825 GFP_KERNEL); 826 if (!adev->host_msix_entries) { 827 r = -ENOMEM; 828 goto msix_nr_out; 829 } 830 adev->guest_msix_entries = 831 kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr, 832 GFP_KERNEL); 833 if (!adev->guest_msix_entries) { 834 kfree(adev->host_msix_entries); 835 r = -ENOMEM; 836 goto msix_nr_out; 837 } 838 } else /* Not allowed set MSI-X number twice */ 839 r = -EINVAL; 840msix_nr_out: 841 mutex_unlock(&kvm->lock); 842 return r; 843} 844 845static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, 846 struct kvm_assigned_msix_entry *entry) 847{ 848 int r = 0, i; 849 struct kvm_assigned_dev_kernel *adev; 850 851 mutex_lock(&kvm->lock); 852 853 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 854 entry->assigned_dev_id); 855 856 if (!adev) { 857 r = -EINVAL; 858 goto msix_entry_out; 859 } 860 861 for (i = 0; i < adev->entries_nr; i++) 862 if (adev->guest_msix_entries[i].vector == 0 || 863 adev->guest_msix_entries[i].entry == entry->entry) { 864 adev->guest_msix_entries[i].entry = entry->entry; 865 adev->guest_msix_entries[i].vector = entry->gsi; 866 adev->host_msix_entries[i].entry = entry->entry; 867 break; 868 } 869 if (i == adev->entries_nr) { 870 r = -ENOSPC; 871 goto msix_entry_out; 872 } 873 874msix_entry_out: 875 mutex_unlock(&kvm->lock); 876 877 return r; 878} 879#endif 880 881static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, 882 struct kvm_assigned_pci_dev *assigned_dev) 883{ 884 int r = 0; 885 struct kvm_assigned_dev_kernel *match; 886 887 mutex_lock(&kvm->lock); 888 889 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 890 assigned_dev->assigned_dev_id); 891 if (!match) { 892 r = -ENODEV; 893 goto out; 894 } 895 896 spin_lock(&match->intx_mask_lock); 897 898 match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; 899 match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; 900 901 if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { 902 if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { 903 kvm_set_irq(match->kvm, match->irq_source_id, 904 match->guest_irq, 0); 905 /* 906 * Masking at hardware-level is performed on demand, 907 * i.e. when an IRQ actually arrives at the host. 908 */ 909 } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { 910 /* 911 * Unmask the IRQ line if required. Unmasking at 912 * device level will be performed by user space. 913 */ 914 spin_lock_irq(&match->intx_lock); 915 if (match->host_irq_disabled) { 916 enable_irq(match->host_irq); 917 match->host_irq_disabled = false; 918 } 919 spin_unlock_irq(&match->intx_lock); 920 } 921 } 922 923 spin_unlock(&match->intx_mask_lock); 924 925out: 926 mutex_unlock(&kvm->lock); 927 return r; 928} 929 930long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, 931 unsigned long arg) 932{ 933 void __user *argp = (void __user *)arg; 934 int r; 935 936 switch (ioctl) { 937 case KVM_ASSIGN_PCI_DEVICE: { 938 struct kvm_assigned_pci_dev assigned_dev; 939 940 r = -EFAULT; 941 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 942 goto out; 943 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); 944 if (r) 945 goto out; 946 break; 947 } 948 case KVM_ASSIGN_IRQ: { 949 r = -EOPNOTSUPP; 950 break; 951 } 952 case KVM_ASSIGN_DEV_IRQ: { 953 struct kvm_assigned_irq assigned_irq; 954 955 r = -EFAULT; 956 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) 957 goto out; 958 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); 959 if (r) 960 goto out; 961 break; 962 } 963 case KVM_DEASSIGN_DEV_IRQ: { 964 struct kvm_assigned_irq assigned_irq; 965 966 r = -EFAULT; 967 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) 968 goto out; 969 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); 970 if (r) 971 goto out; 972 break; 973 } 974 case KVM_DEASSIGN_PCI_DEVICE: { 975 struct kvm_assigned_pci_dev assigned_dev; 976 977 r = -EFAULT; 978 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 979 goto out; 980 r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); 981 if (r) 982 goto out; 983 break; 984 } 985#ifdef KVM_CAP_IRQ_ROUTING 986 case KVM_SET_GSI_ROUTING: { 987 struct kvm_irq_routing routing; 988 struct kvm_irq_routing __user *urouting; 989 struct kvm_irq_routing_entry *entries; 990 991 r = -EFAULT; 992 if (copy_from_user(&routing, argp, sizeof(routing))) 993 goto out; 994 r = -EINVAL; 995 if (routing.nr >= KVM_MAX_IRQ_ROUTES) 996 goto out; 997 if (routing.flags) 998 goto out; 999 r = -ENOMEM; 1000 entries = vmalloc(routing.nr * sizeof(*entries)); 1001 if (!entries) 1002 goto out; 1003 r = -EFAULT; 1004 urouting = argp; 1005 if (copy_from_user(entries, urouting->entries, 1006 routing.nr * sizeof(*entries))) 1007 goto out_free_irq_routing; 1008 r = kvm_set_irq_routing(kvm, entries, routing.nr, 1009 routing.flags); 1010 out_free_irq_routing: 1011 vfree(entries); 1012 break; 1013 } 1014#endif /* KVM_CAP_IRQ_ROUTING */ 1015#ifdef __KVM_HAVE_MSIX 1016 case KVM_ASSIGN_SET_MSIX_NR: { 1017 struct kvm_assigned_msix_nr entry_nr; 1018 r = -EFAULT; 1019 if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) 1020 goto out; 1021 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); 1022 if (r) 1023 goto out; 1024 break; 1025 } 1026 case KVM_ASSIGN_SET_MSIX_ENTRY: { 1027 struct kvm_assigned_msix_entry entry; 1028 r = -EFAULT; 1029 if (copy_from_user(&entry, argp, sizeof entry)) 1030 goto out; 1031 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); 1032 if (r) 1033 goto out; 1034 break; 1035 } 1036#endif 1037 case KVM_ASSIGN_SET_INTX_MASK: { 1038 struct kvm_assigned_pci_dev assigned_dev; 1039 1040 r = -EFAULT; 1041 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 1042 goto out; 1043 r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); 1044 break; 1045 } 1046 default: 1047 r = -ENOTTY; 1048 break; 1049 } 1050out: 1051 return r; 1052}