at v3.4 25 kB view raw
1/* 2 * Kernel-based Virtual Machine - device assignment support 3 * 4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * the COPYING file in the top-level directory. 8 * 9 */ 10 11#include <linux/kvm_host.h> 12#include <linux/kvm.h> 13#include <linux/uaccess.h> 14#include <linux/vmalloc.h> 15#include <linux/errno.h> 16#include <linux/spinlock.h> 17#include <linux/pci.h> 18#include <linux/interrupt.h> 19#include <linux/slab.h> 20#include <linux/namei.h> 21#include <linux/fs.h> 22#include "irq.h" 23 24static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, 25 int assigned_dev_id) 26{ 27 struct list_head *ptr; 28 struct kvm_assigned_dev_kernel *match; 29 30 list_for_each(ptr, head) { 31 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); 32 if (match->assigned_dev_id == assigned_dev_id) 33 return match; 34 } 35 return NULL; 36} 37 38static int find_index_from_host_irq(struct kvm_assigned_dev_kernel 39 *assigned_dev, int irq) 40{ 41 int i, index; 42 struct msix_entry *host_msix_entries; 43 44 host_msix_entries = assigned_dev->host_msix_entries; 45 46 index = -1; 47 for (i = 0; i < assigned_dev->entries_nr; i++) 48 if (irq == host_msix_entries[i].vector) { 49 index = i; 50 break; 51 } 52 if (index < 0) 53 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); 54 55 return index; 56} 57 58static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id) 59{ 60 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 61 int ret; 62 63 spin_lock(&assigned_dev->intx_lock); 64 if (pci_check_and_mask_intx(assigned_dev->dev)) { 65 assigned_dev->host_irq_disabled = true; 66 ret = IRQ_WAKE_THREAD; 67 } else 68 ret = IRQ_NONE; 69 spin_unlock(&assigned_dev->intx_lock); 70 71 return ret; 72} 73 74static void 75kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, 76 int vector) 77{ 78 if (unlikely(assigned_dev->irq_requested_type & 79 KVM_DEV_IRQ_GUEST_INTX)) { 80 spin_lock(&assigned_dev->intx_mask_lock); 81 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) 82 kvm_set_irq(assigned_dev->kvm, 83 assigned_dev->irq_source_id, vector, 1); 84 spin_unlock(&assigned_dev->intx_mask_lock); 85 } else 86 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, 87 vector, 1); 88} 89 90static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) 91{ 92 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 93 94 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { 95 spin_lock_irq(&assigned_dev->intx_lock); 96 disable_irq_nosync(irq); 97 assigned_dev->host_irq_disabled = true; 98 spin_unlock_irq(&assigned_dev->intx_lock); 99 } 100 101 kvm_assigned_dev_raise_guest_irq(assigned_dev, 102 assigned_dev->guest_irq); 103 104 return IRQ_HANDLED; 105} 106 107#ifdef __KVM_HAVE_MSI 108static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) 109{ 110 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 111 112 kvm_assigned_dev_raise_guest_irq(assigned_dev, 113 assigned_dev->guest_irq); 114 115 return IRQ_HANDLED; 116} 117#endif 118 119#ifdef __KVM_HAVE_MSIX 120static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) 121{ 122 struct kvm_assigned_dev_kernel *assigned_dev = dev_id; 123 int index = find_index_from_host_irq(assigned_dev, irq); 124 u32 vector; 125 126 if (index >= 0) { 127 vector = assigned_dev->guest_msix_entries[index].vector; 128 kvm_assigned_dev_raise_guest_irq(assigned_dev, vector); 129 } 130 131 return IRQ_HANDLED; 132} 133#endif 134 135/* Ack the irq line for an assigned device */ 136static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) 137{ 138 struct kvm_assigned_dev_kernel *dev = 139 container_of(kian, struct kvm_assigned_dev_kernel, 140 ack_notifier); 141 142 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); 143 144 spin_lock(&dev->intx_mask_lock); 145 146 if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { 147 bool reassert = false; 148 149 spin_lock_irq(&dev->intx_lock); 150 /* 151 * The guest IRQ may be shared so this ack can come from an 152 * IRQ for another guest device. 153 */ 154 if (dev->host_irq_disabled) { 155 if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) 156 enable_irq(dev->host_irq); 157 else if (!pci_check_and_unmask_intx(dev->dev)) 158 reassert = true; 159 dev->host_irq_disabled = reassert; 160 } 161 spin_unlock_irq(&dev->intx_lock); 162 163 if (reassert) 164 kvm_set_irq(dev->kvm, dev->irq_source_id, 165 dev->guest_irq, 1); 166 } 167 168 spin_unlock(&dev->intx_mask_lock); 169} 170 171static void deassign_guest_irq(struct kvm *kvm, 172 struct kvm_assigned_dev_kernel *assigned_dev) 173{ 174 if (assigned_dev->ack_notifier.gsi != -1) 175 kvm_unregister_irq_ack_notifier(kvm, 176 &assigned_dev->ack_notifier); 177 178 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, 179 assigned_dev->guest_irq, 0); 180 181 if (assigned_dev->irq_source_id != -1) 182 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); 183 assigned_dev->irq_source_id = -1; 184 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); 185} 186 187/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ 188static void deassign_host_irq(struct kvm *kvm, 189 struct kvm_assigned_dev_kernel *assigned_dev) 190{ 191 /* 192 * We disable irq here to prevent further events. 193 * 194 * Notice this maybe result in nested disable if the interrupt type is 195 * INTx, but it's OK for we are going to free it. 196 * 197 * If this function is a part of VM destroy, please ensure that till 198 * now, the kvm state is still legal for probably we also have to wait 199 * on a currently running IRQ handler. 200 */ 201 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { 202 int i; 203 for (i = 0; i < assigned_dev->entries_nr; i++) 204 disable_irq(assigned_dev->host_msix_entries[i].vector); 205 206 for (i = 0; i < assigned_dev->entries_nr; i++) 207 free_irq(assigned_dev->host_msix_entries[i].vector, 208 assigned_dev); 209 210 assigned_dev->entries_nr = 0; 211 kfree(assigned_dev->host_msix_entries); 212 kfree(assigned_dev->guest_msix_entries); 213 pci_disable_msix(assigned_dev->dev); 214 } else { 215 /* Deal with MSI and INTx */ 216 if ((assigned_dev->irq_requested_type & 217 KVM_DEV_IRQ_HOST_INTX) && 218 (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { 219 spin_lock_irq(&assigned_dev->intx_lock); 220 pci_intx(assigned_dev->dev, false); 221 spin_unlock_irq(&assigned_dev->intx_lock); 222 synchronize_irq(assigned_dev->host_irq); 223 } else 224 disable_irq(assigned_dev->host_irq); 225 226 free_irq(assigned_dev->host_irq, assigned_dev); 227 228 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) 229 pci_disable_msi(assigned_dev->dev); 230 } 231 232 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); 233} 234 235static int kvm_deassign_irq(struct kvm *kvm, 236 struct kvm_assigned_dev_kernel *assigned_dev, 237 unsigned long irq_requested_type) 238{ 239 unsigned long guest_irq_type, host_irq_type; 240 241 if (!irqchip_in_kernel(kvm)) 242 return -EINVAL; 243 /* no irq assignment to deassign */ 244 if (!assigned_dev->irq_requested_type) 245 return -ENXIO; 246 247 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; 248 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; 249 250 if (host_irq_type) 251 deassign_host_irq(kvm, assigned_dev); 252 if (guest_irq_type) 253 deassign_guest_irq(kvm, assigned_dev); 254 255 return 0; 256} 257 258static void kvm_free_assigned_irq(struct kvm *kvm, 259 struct kvm_assigned_dev_kernel *assigned_dev) 260{ 261 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); 262} 263 264static void kvm_free_assigned_device(struct kvm *kvm, 265 struct kvm_assigned_dev_kernel 266 *assigned_dev) 267{ 268 kvm_free_assigned_irq(kvm, assigned_dev); 269 270 pci_reset_function(assigned_dev->dev); 271 if (pci_load_and_free_saved_state(assigned_dev->dev, 272 &assigned_dev->pci_saved_state)) 273 printk(KERN_INFO "%s: Couldn't reload %s saved state\n", 274 __func__, dev_name(&assigned_dev->dev->dev)); 275 else 276 pci_restore_state(assigned_dev->dev); 277 278 assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; 279 280 pci_release_regions(assigned_dev->dev); 281 pci_disable_device(assigned_dev->dev); 282 pci_dev_put(assigned_dev->dev); 283 284 list_del(&assigned_dev->list); 285 kfree(assigned_dev); 286} 287 288void kvm_free_all_assigned_devices(struct kvm *kvm) 289{ 290 struct list_head *ptr, *ptr2; 291 struct kvm_assigned_dev_kernel *assigned_dev; 292 293 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { 294 assigned_dev = list_entry(ptr, 295 struct kvm_assigned_dev_kernel, 296 list); 297 298 kvm_free_assigned_device(kvm, assigned_dev); 299 } 300} 301 302static int assigned_device_enable_host_intx(struct kvm *kvm, 303 struct kvm_assigned_dev_kernel *dev) 304{ 305 irq_handler_t irq_handler; 306 unsigned long flags; 307 308 dev->host_irq = dev->dev->irq; 309 310 /* 311 * We can only share the IRQ line with other host devices if we are 312 * able to disable the IRQ source at device-level - independently of 313 * the guest driver. Otherwise host devices may suffer from unbounded 314 * IRQ latencies when the guest keeps the line asserted. 315 */ 316 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { 317 irq_handler = kvm_assigned_dev_intx; 318 flags = IRQF_SHARED; 319 } else { 320 irq_handler = NULL; 321 flags = IRQF_ONESHOT; 322 } 323 if (request_threaded_irq(dev->host_irq, irq_handler, 324 kvm_assigned_dev_thread_intx, flags, 325 dev->irq_name, dev)) 326 return -EIO; 327 328 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { 329 spin_lock_irq(&dev->intx_lock); 330 pci_intx(dev->dev, true); 331 spin_unlock_irq(&dev->intx_lock); 332 } 333 return 0; 334} 335 336#ifdef __KVM_HAVE_MSI 337static int assigned_device_enable_host_msi(struct kvm *kvm, 338 struct kvm_assigned_dev_kernel *dev) 339{ 340 int r; 341 342 if (!dev->dev->msi_enabled) { 343 r = pci_enable_msi(dev->dev); 344 if (r) 345 return r; 346 } 347 348 dev->host_irq = dev->dev->irq; 349 if (request_threaded_irq(dev->host_irq, NULL, 350 kvm_assigned_dev_thread_msi, 0, 351 dev->irq_name, dev)) { 352 pci_disable_msi(dev->dev); 353 return -EIO; 354 } 355 356 return 0; 357} 358#endif 359 360#ifdef __KVM_HAVE_MSIX 361static int assigned_device_enable_host_msix(struct kvm *kvm, 362 struct kvm_assigned_dev_kernel *dev) 363{ 364 int i, r = -EINVAL; 365 366 /* host_msix_entries and guest_msix_entries should have been 367 * initialized */ 368 if (dev->entries_nr == 0) 369 return r; 370 371 r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); 372 if (r) 373 return r; 374 375 for (i = 0; i < dev->entries_nr; i++) { 376 r = request_threaded_irq(dev->host_msix_entries[i].vector, 377 NULL, kvm_assigned_dev_thread_msix, 378 0, dev->irq_name, dev); 379 if (r) 380 goto err; 381 } 382 383 return 0; 384err: 385 for (i -= 1; i >= 0; i--) 386 free_irq(dev->host_msix_entries[i].vector, dev); 387 pci_disable_msix(dev->dev); 388 return r; 389} 390 391#endif 392 393static int assigned_device_enable_guest_intx(struct kvm *kvm, 394 struct kvm_assigned_dev_kernel *dev, 395 struct kvm_assigned_irq *irq) 396{ 397 dev->guest_irq = irq->guest_irq; 398 dev->ack_notifier.gsi = irq->guest_irq; 399 return 0; 400} 401 402#ifdef __KVM_HAVE_MSI 403static int assigned_device_enable_guest_msi(struct kvm *kvm, 404 struct kvm_assigned_dev_kernel *dev, 405 struct kvm_assigned_irq *irq) 406{ 407 dev->guest_irq = irq->guest_irq; 408 dev->ack_notifier.gsi = -1; 409 return 0; 410} 411#endif 412 413#ifdef __KVM_HAVE_MSIX 414static int assigned_device_enable_guest_msix(struct kvm *kvm, 415 struct kvm_assigned_dev_kernel *dev, 416 struct kvm_assigned_irq *irq) 417{ 418 dev->guest_irq = irq->guest_irq; 419 dev->ack_notifier.gsi = -1; 420 return 0; 421} 422#endif 423 424static int assign_host_irq(struct kvm *kvm, 425 struct kvm_assigned_dev_kernel *dev, 426 __u32 host_irq_type) 427{ 428 int r = -EEXIST; 429 430 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) 431 return r; 432 433 snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s", 434 pci_name(dev->dev)); 435 436 switch (host_irq_type) { 437 case KVM_DEV_IRQ_HOST_INTX: 438 r = assigned_device_enable_host_intx(kvm, dev); 439 break; 440#ifdef __KVM_HAVE_MSI 441 case KVM_DEV_IRQ_HOST_MSI: 442 r = assigned_device_enable_host_msi(kvm, dev); 443 break; 444#endif 445#ifdef __KVM_HAVE_MSIX 446 case KVM_DEV_IRQ_HOST_MSIX: 447 r = assigned_device_enable_host_msix(kvm, dev); 448 break; 449#endif 450 default: 451 r = -EINVAL; 452 } 453 dev->host_irq_disabled = false; 454 455 if (!r) 456 dev->irq_requested_type |= host_irq_type; 457 458 return r; 459} 460 461static int assign_guest_irq(struct kvm *kvm, 462 struct kvm_assigned_dev_kernel *dev, 463 struct kvm_assigned_irq *irq, 464 unsigned long guest_irq_type) 465{ 466 int id; 467 int r = -EEXIST; 468 469 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) 470 return r; 471 472 id = kvm_request_irq_source_id(kvm); 473 if (id < 0) 474 return id; 475 476 dev->irq_source_id = id; 477 478 switch (guest_irq_type) { 479 case KVM_DEV_IRQ_GUEST_INTX: 480 r = assigned_device_enable_guest_intx(kvm, dev, irq); 481 break; 482#ifdef __KVM_HAVE_MSI 483 case KVM_DEV_IRQ_GUEST_MSI: 484 r = assigned_device_enable_guest_msi(kvm, dev, irq); 485 break; 486#endif 487#ifdef __KVM_HAVE_MSIX 488 case KVM_DEV_IRQ_GUEST_MSIX: 489 r = assigned_device_enable_guest_msix(kvm, dev, irq); 490 break; 491#endif 492 default: 493 r = -EINVAL; 494 } 495 496 if (!r) { 497 dev->irq_requested_type |= guest_irq_type; 498 if (dev->ack_notifier.gsi != -1) 499 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); 500 } else 501 kvm_free_irq_source_id(kvm, dev->irq_source_id); 502 503 return r; 504} 505 506/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ 507static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, 508 struct kvm_assigned_irq *assigned_irq) 509{ 510 int r = -EINVAL; 511 struct kvm_assigned_dev_kernel *match; 512 unsigned long host_irq_type, guest_irq_type; 513 514 if (!irqchip_in_kernel(kvm)) 515 return r; 516 517 mutex_lock(&kvm->lock); 518 r = -ENODEV; 519 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 520 assigned_irq->assigned_dev_id); 521 if (!match) 522 goto out; 523 524 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); 525 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); 526 527 r = -EINVAL; 528 /* can only assign one type at a time */ 529 if (hweight_long(host_irq_type) > 1) 530 goto out; 531 if (hweight_long(guest_irq_type) > 1) 532 goto out; 533 if (host_irq_type == 0 && guest_irq_type == 0) 534 goto out; 535 536 r = 0; 537 if (host_irq_type) 538 r = assign_host_irq(kvm, match, host_irq_type); 539 if (r) 540 goto out; 541 542 if (guest_irq_type) 543 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); 544out: 545 mutex_unlock(&kvm->lock); 546 return r; 547} 548 549static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, 550 struct kvm_assigned_irq 551 *assigned_irq) 552{ 553 int r = -ENODEV; 554 struct kvm_assigned_dev_kernel *match; 555 unsigned long irq_type; 556 557 mutex_lock(&kvm->lock); 558 559 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 560 assigned_irq->assigned_dev_id); 561 if (!match) 562 goto out; 563 564 irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | 565 KVM_DEV_IRQ_GUEST_MASK); 566 r = kvm_deassign_irq(kvm, match, irq_type); 567out: 568 mutex_unlock(&kvm->lock); 569 return r; 570} 571 572/* 573 * We want to test whether the caller has been granted permissions to 574 * use this device. To be able to configure and control the device, 575 * the user needs access to PCI configuration space and BAR resources. 576 * These are accessed through PCI sysfs. PCI config space is often 577 * passed to the process calling this ioctl via file descriptor, so we 578 * can't rely on access to that file. We can check for permissions 579 * on each of the BAR resource files, which is a pretty clear 580 * indicator that the user has been granted access to the device. 581 */ 582static int probe_sysfs_permissions(struct pci_dev *dev) 583{ 584#ifdef CONFIG_SYSFS 585 int i; 586 bool bar_found = false; 587 588 for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { 589 char *kpath, *syspath; 590 struct path path; 591 struct inode *inode; 592 int r; 593 594 if (!pci_resource_len(dev, i)) 595 continue; 596 597 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); 598 if (!kpath) 599 return -ENOMEM; 600 601 /* Per sysfs-rules, sysfs is always at /sys */ 602 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); 603 kfree(kpath); 604 if (!syspath) 605 return -ENOMEM; 606 607 r = kern_path(syspath, LOOKUP_FOLLOW, &path); 608 kfree(syspath); 609 if (r) 610 return r; 611 612 inode = path.dentry->d_inode; 613 614 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); 615 path_put(&path); 616 if (r) 617 return r; 618 619 bar_found = true; 620 } 621 622 /* If no resources, probably something special */ 623 if (!bar_found) 624 return -EPERM; 625 626 return 0; 627#else 628 return -EINVAL; /* No way to control the device without sysfs */ 629#endif 630} 631 632static int kvm_vm_ioctl_assign_device(struct kvm *kvm, 633 struct kvm_assigned_pci_dev *assigned_dev) 634{ 635 int r = 0, idx; 636 struct kvm_assigned_dev_kernel *match; 637 struct pci_dev *dev; 638 u8 header_type; 639 640 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) 641 return -EINVAL; 642 643 mutex_lock(&kvm->lock); 644 idx = srcu_read_lock(&kvm->srcu); 645 646 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 647 assigned_dev->assigned_dev_id); 648 if (match) { 649 /* device already assigned */ 650 r = -EEXIST; 651 goto out; 652 } 653 654 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); 655 if (match == NULL) { 656 printk(KERN_INFO "%s: Couldn't allocate memory\n", 657 __func__); 658 r = -ENOMEM; 659 goto out; 660 } 661 dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, 662 assigned_dev->busnr, 663 assigned_dev->devfn); 664 if (!dev) { 665 printk(KERN_INFO "%s: host device not found\n", __func__); 666 r = -EINVAL; 667 goto out_free; 668 } 669 670 /* Don't allow bridges to be assigned */ 671 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); 672 if ((header_type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) { 673 r = -EPERM; 674 goto out_put; 675 } 676 677 r = probe_sysfs_permissions(dev); 678 if (r) 679 goto out_put; 680 681 if (pci_enable_device(dev)) { 682 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); 683 r = -EBUSY; 684 goto out_put; 685 } 686 r = pci_request_regions(dev, "kvm_assigned_device"); 687 if (r) { 688 printk(KERN_INFO "%s: Could not get access to device regions\n", 689 __func__); 690 goto out_disable; 691 } 692 693 pci_reset_function(dev); 694 pci_save_state(dev); 695 match->pci_saved_state = pci_store_saved_state(dev); 696 if (!match->pci_saved_state) 697 printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", 698 __func__, dev_name(&dev->dev)); 699 700 if (!pci_intx_mask_supported(dev)) 701 assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3; 702 703 match->assigned_dev_id = assigned_dev->assigned_dev_id; 704 match->host_segnr = assigned_dev->segnr; 705 match->host_busnr = assigned_dev->busnr; 706 match->host_devfn = assigned_dev->devfn; 707 match->flags = assigned_dev->flags; 708 match->dev = dev; 709 spin_lock_init(&match->intx_lock); 710 spin_lock_init(&match->intx_mask_lock); 711 match->irq_source_id = -1; 712 match->kvm = kvm; 713 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; 714 715 list_add(&match->list, &kvm->arch.assigned_dev_head); 716 717 if (!kvm->arch.iommu_domain) { 718 r = kvm_iommu_map_guest(kvm); 719 if (r) 720 goto out_list_del; 721 } 722 r = kvm_assign_device(kvm, match); 723 if (r) 724 goto out_list_del; 725 726out: 727 srcu_read_unlock(&kvm->srcu, idx); 728 mutex_unlock(&kvm->lock); 729 return r; 730out_list_del: 731 if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) 732 printk(KERN_INFO "%s: Couldn't reload %s saved state\n", 733 __func__, dev_name(&dev->dev)); 734 list_del(&match->list); 735 pci_release_regions(dev); 736out_disable: 737 pci_disable_device(dev); 738out_put: 739 pci_dev_put(dev); 740out_free: 741 kfree(match); 742 srcu_read_unlock(&kvm->srcu, idx); 743 mutex_unlock(&kvm->lock); 744 return r; 745} 746 747static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, 748 struct kvm_assigned_pci_dev *assigned_dev) 749{ 750 int r = 0; 751 struct kvm_assigned_dev_kernel *match; 752 753 mutex_lock(&kvm->lock); 754 755 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 756 assigned_dev->assigned_dev_id); 757 if (!match) { 758 printk(KERN_INFO "%s: device hasn't been assigned before, " 759 "so cannot be deassigned\n", __func__); 760 r = -EINVAL; 761 goto out; 762 } 763 764 kvm_deassign_device(kvm, match); 765 766 kvm_free_assigned_device(kvm, match); 767 768out: 769 mutex_unlock(&kvm->lock); 770 return r; 771} 772 773 774#ifdef __KVM_HAVE_MSIX 775static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, 776 struct kvm_assigned_msix_nr *entry_nr) 777{ 778 int r = 0; 779 struct kvm_assigned_dev_kernel *adev; 780 781 mutex_lock(&kvm->lock); 782 783 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 784 entry_nr->assigned_dev_id); 785 if (!adev) { 786 r = -EINVAL; 787 goto msix_nr_out; 788 } 789 790 if (adev->entries_nr == 0) { 791 adev->entries_nr = entry_nr->entry_nr; 792 if (adev->entries_nr == 0 || 793 adev->entries_nr > KVM_MAX_MSIX_PER_DEV) { 794 r = -EINVAL; 795 goto msix_nr_out; 796 } 797 798 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * 799 entry_nr->entry_nr, 800 GFP_KERNEL); 801 if (!adev->host_msix_entries) { 802 r = -ENOMEM; 803 goto msix_nr_out; 804 } 805 adev->guest_msix_entries = 806 kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr, 807 GFP_KERNEL); 808 if (!adev->guest_msix_entries) { 809 kfree(adev->host_msix_entries); 810 r = -ENOMEM; 811 goto msix_nr_out; 812 } 813 } else /* Not allowed set MSI-X number twice */ 814 r = -EINVAL; 815msix_nr_out: 816 mutex_unlock(&kvm->lock); 817 return r; 818} 819 820static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, 821 struct kvm_assigned_msix_entry *entry) 822{ 823 int r = 0, i; 824 struct kvm_assigned_dev_kernel *adev; 825 826 mutex_lock(&kvm->lock); 827 828 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 829 entry->assigned_dev_id); 830 831 if (!adev) { 832 r = -EINVAL; 833 goto msix_entry_out; 834 } 835 836 for (i = 0; i < adev->entries_nr; i++) 837 if (adev->guest_msix_entries[i].vector == 0 || 838 adev->guest_msix_entries[i].entry == entry->entry) { 839 adev->guest_msix_entries[i].entry = entry->entry; 840 adev->guest_msix_entries[i].vector = entry->gsi; 841 adev->host_msix_entries[i].entry = entry->entry; 842 break; 843 } 844 if (i == adev->entries_nr) { 845 r = -ENOSPC; 846 goto msix_entry_out; 847 } 848 849msix_entry_out: 850 mutex_unlock(&kvm->lock); 851 852 return r; 853} 854#endif 855 856static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, 857 struct kvm_assigned_pci_dev *assigned_dev) 858{ 859 int r = 0; 860 struct kvm_assigned_dev_kernel *match; 861 862 mutex_lock(&kvm->lock); 863 864 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 865 assigned_dev->assigned_dev_id); 866 if (!match) { 867 r = -ENODEV; 868 goto out; 869 } 870 871 spin_lock(&match->intx_mask_lock); 872 873 match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; 874 match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; 875 876 if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { 877 if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { 878 kvm_set_irq(match->kvm, match->irq_source_id, 879 match->guest_irq, 0); 880 /* 881 * Masking at hardware-level is performed on demand, 882 * i.e. when an IRQ actually arrives at the host. 883 */ 884 } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { 885 /* 886 * Unmask the IRQ line if required. Unmasking at 887 * device level will be performed by user space. 888 */ 889 spin_lock_irq(&match->intx_lock); 890 if (match->host_irq_disabled) { 891 enable_irq(match->host_irq); 892 match->host_irq_disabled = false; 893 } 894 spin_unlock_irq(&match->intx_lock); 895 } 896 } 897 898 spin_unlock(&match->intx_mask_lock); 899 900out: 901 mutex_unlock(&kvm->lock); 902 return r; 903} 904 905long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, 906 unsigned long arg) 907{ 908 void __user *argp = (void __user *)arg; 909 int r; 910 911 switch (ioctl) { 912 case KVM_ASSIGN_PCI_DEVICE: { 913 struct kvm_assigned_pci_dev assigned_dev; 914 915 r = -EFAULT; 916 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 917 goto out; 918 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); 919 if (r) 920 goto out; 921 break; 922 } 923 case KVM_ASSIGN_IRQ: { 924 r = -EOPNOTSUPP; 925 break; 926 } 927 case KVM_ASSIGN_DEV_IRQ: { 928 struct kvm_assigned_irq assigned_irq; 929 930 r = -EFAULT; 931 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) 932 goto out; 933 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); 934 if (r) 935 goto out; 936 break; 937 } 938 case KVM_DEASSIGN_DEV_IRQ: { 939 struct kvm_assigned_irq assigned_irq; 940 941 r = -EFAULT; 942 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) 943 goto out; 944 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); 945 if (r) 946 goto out; 947 break; 948 } 949 case KVM_DEASSIGN_PCI_DEVICE: { 950 struct kvm_assigned_pci_dev assigned_dev; 951 952 r = -EFAULT; 953 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 954 goto out; 955 r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); 956 if (r) 957 goto out; 958 break; 959 } 960#ifdef KVM_CAP_IRQ_ROUTING 961 case KVM_SET_GSI_ROUTING: { 962 struct kvm_irq_routing routing; 963 struct kvm_irq_routing __user *urouting; 964 struct kvm_irq_routing_entry *entries; 965 966 r = -EFAULT; 967 if (copy_from_user(&routing, argp, sizeof(routing))) 968 goto out; 969 r = -EINVAL; 970 if (routing.nr >= KVM_MAX_IRQ_ROUTES) 971 goto out; 972 if (routing.flags) 973 goto out; 974 r = -ENOMEM; 975 entries = vmalloc(routing.nr * sizeof(*entries)); 976 if (!entries) 977 goto out; 978 r = -EFAULT; 979 urouting = argp; 980 if (copy_from_user(entries, urouting->entries, 981 routing.nr * sizeof(*entries))) 982 goto out_free_irq_routing; 983 r = kvm_set_irq_routing(kvm, entries, routing.nr, 984 routing.flags); 985 out_free_irq_routing: 986 vfree(entries); 987 break; 988 } 989#endif /* KVM_CAP_IRQ_ROUTING */ 990#ifdef __KVM_HAVE_MSIX 991 case KVM_ASSIGN_SET_MSIX_NR: { 992 struct kvm_assigned_msix_nr entry_nr; 993 r = -EFAULT; 994 if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) 995 goto out; 996 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); 997 if (r) 998 goto out; 999 break; 1000 } 1001 case KVM_ASSIGN_SET_MSIX_ENTRY: { 1002 struct kvm_assigned_msix_entry entry; 1003 r = -EFAULT; 1004 if (copy_from_user(&entry, argp, sizeof entry)) 1005 goto out; 1006 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); 1007 if (r) 1008 goto out; 1009 break; 1010 } 1011#endif 1012 case KVM_ASSIGN_SET_INTX_MASK: { 1013 struct kvm_assigned_pci_dev assigned_dev; 1014 1015 r = -EFAULT; 1016 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 1017 goto out; 1018 r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); 1019 break; 1020 } 1021 default: 1022 r = -ENOTTY; 1023 break; 1024 } 1025out: 1026 return r; 1027}