Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.19 1020 lines 23 kB view raw
1/* 2 * Copyright (C) 2010-2012 Advanced Micro Devices, Inc. 3 * Author: Joerg Roedel <jroedel@suse.de> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published 7 * by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 19#include <linux/mmu_notifier.h> 20#include <linux/amd-iommu.h> 21#include <linux/mm_types.h> 22#include <linux/profile.h> 23#include <linux/module.h> 24#include <linux/sched.h> 25#include <linux/sched/mm.h> 26#include <linux/iommu.h> 27#include <linux/wait.h> 28#include <linux/pci.h> 29#include <linux/gfp.h> 30 31#include "amd_iommu_types.h" 32#include "amd_iommu_proto.h" 33 34MODULE_LICENSE("GPL v2"); 35MODULE_AUTHOR("Joerg Roedel <jroedel@suse.de>"); 36 37#define MAX_DEVICES 0x10000 38#define PRI_QUEUE_SIZE 512 39 40struct pri_queue { 41 atomic_t inflight; 42 bool finish; 43 int status; 44}; 45 46struct pasid_state { 47 struct list_head list; /* For global state-list */ 48 atomic_t count; /* Reference count */ 49 unsigned mmu_notifier_count; /* Counting nested mmu_notifier 50 calls */ 51 struct mm_struct *mm; /* mm_struct for the faults */ 52 struct mmu_notifier mn; /* mmu_notifier handle */ 53 struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ 54 struct device_state *device_state; /* Link to our device_state */ 55 int pasid; /* PASID index */ 56 bool invalid; /* Used during setup and 57 teardown of the pasid */ 58 spinlock_t lock; /* Protect pri_queues and 59 mmu_notifer_count */ 60 wait_queue_head_t wq; /* To wait for count == 0 */ 61}; 62 63struct device_state { 64 struct list_head list; 65 u16 devid; 66 atomic_t count; 67 struct pci_dev *pdev; 68 struct pasid_state **states; 69 struct iommu_domain *domain; 70 int pasid_levels; 71 int max_pasids; 72 amd_iommu_invalid_ppr_cb inv_ppr_cb; 73 amd_iommu_invalidate_ctx inv_ctx_cb; 74 spinlock_t lock; 75 wait_queue_head_t wq; 76}; 77 78struct fault { 79 struct work_struct work; 80 struct device_state *dev_state; 81 struct pasid_state *state; 82 struct mm_struct *mm; 83 u64 address; 84 u16 devid; 85 u16 pasid; 86 u16 tag; 87 u16 finish; 88 u16 flags; 89}; 90 91static LIST_HEAD(state_list); 92static spinlock_t state_lock; 93 94static struct workqueue_struct *iommu_wq; 95 96static void free_pasid_states(struct device_state *dev_state); 97 98static u16 device_id(struct pci_dev *pdev) 99{ 100 u16 devid; 101 102 devid = pdev->bus->number; 103 devid = (devid << 8) | pdev->devfn; 104 105 return devid; 106} 107 108static struct device_state *__get_device_state(u16 devid) 109{ 110 struct device_state *dev_state; 111 112 list_for_each_entry(dev_state, &state_list, list) { 113 if (dev_state->devid == devid) 114 return dev_state; 115 } 116 117 return NULL; 118} 119 120static struct device_state *get_device_state(u16 devid) 121{ 122 struct device_state *dev_state; 123 unsigned long flags; 124 125 spin_lock_irqsave(&state_lock, flags); 126 dev_state = __get_device_state(devid); 127 if (dev_state != NULL) 128 atomic_inc(&dev_state->count); 129 spin_unlock_irqrestore(&state_lock, flags); 130 131 return dev_state; 132} 133 134static void free_device_state(struct device_state *dev_state) 135{ 136 struct iommu_group *group; 137 138 /* 139 * First detach device from domain - No more PRI requests will arrive 140 * from that device after it is unbound from the IOMMUv2 domain. 141 */ 142 group = iommu_group_get(&dev_state->pdev->dev); 143 if (WARN_ON(!group)) 144 return; 145 146 iommu_detach_group(dev_state->domain, group); 147 148 iommu_group_put(group); 149 150 /* Everything is down now, free the IOMMUv2 domain */ 151 iommu_domain_free(dev_state->domain); 152 153 /* Finally get rid of the device-state */ 154 kfree(dev_state); 155} 156 157static void put_device_state(struct device_state *dev_state) 158{ 159 if (atomic_dec_and_test(&dev_state->count)) 160 wake_up(&dev_state->wq); 161} 162 163/* Must be called under dev_state->lock */ 164static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state, 165 int pasid, bool alloc) 166{ 167 struct pasid_state **root, **ptr; 168 int level, index; 169 170 level = dev_state->pasid_levels; 171 root = dev_state->states; 172 173 while (true) { 174 175 index = (pasid >> (9 * level)) & 0x1ff; 176 ptr = &root[index]; 177 178 if (level == 0) 179 break; 180 181 if (*ptr == NULL) { 182 if (!alloc) 183 return NULL; 184 185 *ptr = (void *)get_zeroed_page(GFP_ATOMIC); 186 if (*ptr == NULL) 187 return NULL; 188 } 189 190 root = (struct pasid_state **)*ptr; 191 level -= 1; 192 } 193 194 return ptr; 195} 196 197static int set_pasid_state(struct device_state *dev_state, 198 struct pasid_state *pasid_state, 199 int pasid) 200{ 201 struct pasid_state **ptr; 202 unsigned long flags; 203 int ret; 204 205 spin_lock_irqsave(&dev_state->lock, flags); 206 ptr = __get_pasid_state_ptr(dev_state, pasid, true); 207 208 ret = -ENOMEM; 209 if (ptr == NULL) 210 goto out_unlock; 211 212 ret = -ENOMEM; 213 if (*ptr != NULL) 214 goto out_unlock; 215 216 *ptr = pasid_state; 217 218 ret = 0; 219 220out_unlock: 221 spin_unlock_irqrestore(&dev_state->lock, flags); 222 223 return ret; 224} 225 226static void clear_pasid_state(struct device_state *dev_state, int pasid) 227{ 228 struct pasid_state **ptr; 229 unsigned long flags; 230 231 spin_lock_irqsave(&dev_state->lock, flags); 232 ptr = __get_pasid_state_ptr(dev_state, pasid, true); 233 234 if (ptr == NULL) 235 goto out_unlock; 236 237 *ptr = NULL; 238 239out_unlock: 240 spin_unlock_irqrestore(&dev_state->lock, flags); 241} 242 243static struct pasid_state *get_pasid_state(struct device_state *dev_state, 244 int pasid) 245{ 246 struct pasid_state **ptr, *ret = NULL; 247 unsigned long flags; 248 249 spin_lock_irqsave(&dev_state->lock, flags); 250 ptr = __get_pasid_state_ptr(dev_state, pasid, false); 251 252 if (ptr == NULL) 253 goto out_unlock; 254 255 ret = *ptr; 256 if (ret) 257 atomic_inc(&ret->count); 258 259out_unlock: 260 spin_unlock_irqrestore(&dev_state->lock, flags); 261 262 return ret; 263} 264 265static void free_pasid_state(struct pasid_state *pasid_state) 266{ 267 kfree(pasid_state); 268} 269 270static void put_pasid_state(struct pasid_state *pasid_state) 271{ 272 if (atomic_dec_and_test(&pasid_state->count)) 273 wake_up(&pasid_state->wq); 274} 275 276static void put_pasid_state_wait(struct pasid_state *pasid_state) 277{ 278 atomic_dec(&pasid_state->count); 279 wait_event(pasid_state->wq, !atomic_read(&pasid_state->count)); 280 free_pasid_state(pasid_state); 281} 282 283static void unbind_pasid(struct pasid_state *pasid_state) 284{ 285 struct iommu_domain *domain; 286 287 domain = pasid_state->device_state->domain; 288 289 /* 290 * Mark pasid_state as invalid, no more faults will we added to the 291 * work queue after this is visible everywhere. 292 */ 293 pasid_state->invalid = true; 294 295 /* Make sure this is visible */ 296 smp_wmb(); 297 298 /* After this the device/pasid can't access the mm anymore */ 299 amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid); 300 301 /* Make sure no more pending faults are in the queue */ 302 flush_workqueue(iommu_wq); 303} 304 305static void free_pasid_states_level1(struct pasid_state **tbl) 306{ 307 int i; 308 309 for (i = 0; i < 512; ++i) { 310 if (tbl[i] == NULL) 311 continue; 312 313 free_page((unsigned long)tbl[i]); 314 } 315} 316 317static void free_pasid_states_level2(struct pasid_state **tbl) 318{ 319 struct pasid_state **ptr; 320 int i; 321 322 for (i = 0; i < 512; ++i) { 323 if (tbl[i] == NULL) 324 continue; 325 326 ptr = (struct pasid_state **)tbl[i]; 327 free_pasid_states_level1(ptr); 328 } 329} 330 331static void free_pasid_states(struct device_state *dev_state) 332{ 333 struct pasid_state *pasid_state; 334 int i; 335 336 for (i = 0; i < dev_state->max_pasids; ++i) { 337 pasid_state = get_pasid_state(dev_state, i); 338 if (pasid_state == NULL) 339 continue; 340 341 put_pasid_state(pasid_state); 342 343 /* 344 * This will call the mn_release function and 345 * unbind the PASID 346 */ 347 mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); 348 349 put_pasid_state_wait(pasid_state); /* Reference taken in 350 amd_iommu_bind_pasid */ 351 352 /* Drop reference taken in amd_iommu_bind_pasid */ 353 put_device_state(dev_state); 354 } 355 356 if (dev_state->pasid_levels == 2) 357 free_pasid_states_level2(dev_state->states); 358 else if (dev_state->pasid_levels == 1) 359 free_pasid_states_level1(dev_state->states); 360 else 361 BUG_ON(dev_state->pasid_levels != 0); 362 363 free_page((unsigned long)dev_state->states); 364} 365 366static struct pasid_state *mn_to_state(struct mmu_notifier *mn) 367{ 368 return container_of(mn, struct pasid_state, mn); 369} 370 371static void __mn_flush_page(struct mmu_notifier *mn, 372 unsigned long address) 373{ 374 struct pasid_state *pasid_state; 375 struct device_state *dev_state; 376 377 pasid_state = mn_to_state(mn); 378 dev_state = pasid_state->device_state; 379 380 amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address); 381} 382 383static int mn_clear_flush_young(struct mmu_notifier *mn, 384 struct mm_struct *mm, 385 unsigned long start, 386 unsigned long end) 387{ 388 for (; start < end; start += PAGE_SIZE) 389 __mn_flush_page(mn, start); 390 391 return 0; 392} 393 394static void mn_invalidate_range(struct mmu_notifier *mn, 395 struct mm_struct *mm, 396 unsigned long start, unsigned long end) 397{ 398 struct pasid_state *pasid_state; 399 struct device_state *dev_state; 400 401 pasid_state = mn_to_state(mn); 402 dev_state = pasid_state->device_state; 403 404 if ((start ^ (end - 1)) < PAGE_SIZE) 405 amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, 406 start); 407 else 408 amd_iommu_flush_tlb(dev_state->domain, pasid_state->pasid); 409} 410 411static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) 412{ 413 struct pasid_state *pasid_state; 414 struct device_state *dev_state; 415 bool run_inv_ctx_cb; 416 417 might_sleep(); 418 419 pasid_state = mn_to_state(mn); 420 dev_state = pasid_state->device_state; 421 run_inv_ctx_cb = !pasid_state->invalid; 422 423 if (run_inv_ctx_cb && dev_state->inv_ctx_cb) 424 dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid); 425 426 unbind_pasid(pasid_state); 427} 428 429static const struct mmu_notifier_ops iommu_mn = { 430 .flags = MMU_INVALIDATE_DOES_NOT_BLOCK, 431 .release = mn_release, 432 .clear_flush_young = mn_clear_flush_young, 433 .invalidate_range = mn_invalidate_range, 434}; 435 436static void set_pri_tag_status(struct pasid_state *pasid_state, 437 u16 tag, int status) 438{ 439 unsigned long flags; 440 441 spin_lock_irqsave(&pasid_state->lock, flags); 442 pasid_state->pri[tag].status = status; 443 spin_unlock_irqrestore(&pasid_state->lock, flags); 444} 445 446static void finish_pri_tag(struct device_state *dev_state, 447 struct pasid_state *pasid_state, 448 u16 tag) 449{ 450 unsigned long flags; 451 452 spin_lock_irqsave(&pasid_state->lock, flags); 453 if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) && 454 pasid_state->pri[tag].finish) { 455 amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid, 456 pasid_state->pri[tag].status, tag); 457 pasid_state->pri[tag].finish = false; 458 pasid_state->pri[tag].status = PPR_SUCCESS; 459 } 460 spin_unlock_irqrestore(&pasid_state->lock, flags); 461} 462 463static void handle_fault_error(struct fault *fault) 464{ 465 int status; 466 467 if (!fault->dev_state->inv_ppr_cb) { 468 set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); 469 return; 470 } 471 472 status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev, 473 fault->pasid, 474 fault->address, 475 fault->flags); 476 switch (status) { 477 case AMD_IOMMU_INV_PRI_RSP_SUCCESS: 478 set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS); 479 break; 480 case AMD_IOMMU_INV_PRI_RSP_INVALID: 481 set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); 482 break; 483 case AMD_IOMMU_INV_PRI_RSP_FAIL: 484 set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE); 485 break; 486 default: 487 BUG(); 488 } 489} 490 491static bool access_error(struct vm_area_struct *vma, struct fault *fault) 492{ 493 unsigned long requested = 0; 494 495 if (fault->flags & PPR_FAULT_EXEC) 496 requested |= VM_EXEC; 497 498 if (fault->flags & PPR_FAULT_READ) 499 requested |= VM_READ; 500 501 if (fault->flags & PPR_FAULT_WRITE) 502 requested |= VM_WRITE; 503 504 return (requested & ~vma->vm_flags) != 0; 505} 506 507static void do_fault(struct work_struct *work) 508{ 509 struct fault *fault = container_of(work, struct fault, work); 510 struct vm_area_struct *vma; 511 vm_fault_t ret = VM_FAULT_ERROR; 512 unsigned int flags = 0; 513 struct mm_struct *mm; 514 u64 address; 515 516 mm = fault->state->mm; 517 address = fault->address; 518 519 if (fault->flags & PPR_FAULT_USER) 520 flags |= FAULT_FLAG_USER; 521 if (fault->flags & PPR_FAULT_WRITE) 522 flags |= FAULT_FLAG_WRITE; 523 flags |= FAULT_FLAG_REMOTE; 524 525 down_read(&mm->mmap_sem); 526 vma = find_extend_vma(mm, address); 527 if (!vma || address < vma->vm_start) 528 /* failed to get a vma in the right range */ 529 goto out; 530 531 /* Check if we have the right permissions on the vma */ 532 if (access_error(vma, fault)) 533 goto out; 534 535 ret = handle_mm_fault(vma, address, flags); 536out: 537 up_read(&mm->mmap_sem); 538 539 if (ret & VM_FAULT_ERROR) 540 /* failed to service fault */ 541 handle_fault_error(fault); 542 543 finish_pri_tag(fault->dev_state, fault->state, fault->tag); 544 545 put_pasid_state(fault->state); 546 547 kfree(fault); 548} 549 550static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) 551{ 552 struct amd_iommu_fault *iommu_fault; 553 struct pasid_state *pasid_state; 554 struct device_state *dev_state; 555 unsigned long flags; 556 struct fault *fault; 557 bool finish; 558 u16 tag, devid; 559 int ret; 560 struct iommu_dev_data *dev_data; 561 struct pci_dev *pdev = NULL; 562 563 iommu_fault = data; 564 tag = iommu_fault->tag & 0x1ff; 565 finish = (iommu_fault->tag >> 9) & 1; 566 567 devid = iommu_fault->device_id; 568 pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), 569 devid & 0xff); 570 if (!pdev) 571 return -ENODEV; 572 dev_data = get_dev_data(&pdev->dev); 573 574 /* In kdump kernel pci dev is not initialized yet -> send INVALID */ 575 ret = NOTIFY_DONE; 576 if (translation_pre_enabled(amd_iommu_rlookup_table[devid]) 577 && dev_data->defer_attach) { 578 amd_iommu_complete_ppr(pdev, iommu_fault->pasid, 579 PPR_INVALID, tag); 580 goto out; 581 } 582 583 dev_state = get_device_state(iommu_fault->device_id); 584 if (dev_state == NULL) 585 goto out; 586 587 pasid_state = get_pasid_state(dev_state, iommu_fault->pasid); 588 if (pasid_state == NULL || pasid_state->invalid) { 589 /* We know the device but not the PASID -> send INVALID */ 590 amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid, 591 PPR_INVALID, tag); 592 goto out_drop_state; 593 } 594 595 spin_lock_irqsave(&pasid_state->lock, flags); 596 atomic_inc(&pasid_state->pri[tag].inflight); 597 if (finish) 598 pasid_state->pri[tag].finish = true; 599 spin_unlock_irqrestore(&pasid_state->lock, flags); 600 601 fault = kzalloc(sizeof(*fault), GFP_ATOMIC); 602 if (fault == NULL) { 603 /* We are OOM - send success and let the device re-fault */ 604 finish_pri_tag(dev_state, pasid_state, tag); 605 goto out_drop_state; 606 } 607 608 fault->dev_state = dev_state; 609 fault->address = iommu_fault->address; 610 fault->state = pasid_state; 611 fault->tag = tag; 612 fault->finish = finish; 613 fault->pasid = iommu_fault->pasid; 614 fault->flags = iommu_fault->flags; 615 INIT_WORK(&fault->work, do_fault); 616 617 queue_work(iommu_wq, &fault->work); 618 619 ret = NOTIFY_OK; 620 621out_drop_state: 622 623 if (ret != NOTIFY_OK && pasid_state) 624 put_pasid_state(pasid_state); 625 626 put_device_state(dev_state); 627 628out: 629 return ret; 630} 631 632static struct notifier_block ppr_nb = { 633 .notifier_call = ppr_notifier, 634}; 635 636int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, 637 struct task_struct *task) 638{ 639 struct pasid_state *pasid_state; 640 struct device_state *dev_state; 641 struct mm_struct *mm; 642 u16 devid; 643 int ret; 644 645 might_sleep(); 646 647 if (!amd_iommu_v2_supported()) 648 return -ENODEV; 649 650 devid = device_id(pdev); 651 dev_state = get_device_state(devid); 652 653 if (dev_state == NULL) 654 return -EINVAL; 655 656 ret = -EINVAL; 657 if (pasid < 0 || pasid >= dev_state->max_pasids) 658 goto out; 659 660 ret = -ENOMEM; 661 pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL); 662 if (pasid_state == NULL) 663 goto out; 664 665 666 atomic_set(&pasid_state->count, 1); 667 init_waitqueue_head(&pasid_state->wq); 668 spin_lock_init(&pasid_state->lock); 669 670 mm = get_task_mm(task); 671 pasid_state->mm = mm; 672 pasid_state->device_state = dev_state; 673 pasid_state->pasid = pasid; 674 pasid_state->invalid = true; /* Mark as valid only if we are 675 done with setting up the pasid */ 676 pasid_state->mn.ops = &iommu_mn; 677 678 if (pasid_state->mm == NULL) 679 goto out_free; 680 681 mmu_notifier_register(&pasid_state->mn, mm); 682 683 ret = set_pasid_state(dev_state, pasid_state, pasid); 684 if (ret) 685 goto out_unregister; 686 687 ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid, 688 __pa(pasid_state->mm->pgd)); 689 if (ret) 690 goto out_clear_state; 691 692 /* Now we are ready to handle faults */ 693 pasid_state->invalid = false; 694 695 /* 696 * Drop the reference to the mm_struct here. We rely on the 697 * mmu_notifier release call-back to inform us when the mm 698 * is going away. 699 */ 700 mmput(mm); 701 702 return 0; 703 704out_clear_state: 705 clear_pasid_state(dev_state, pasid); 706 707out_unregister: 708 mmu_notifier_unregister(&pasid_state->mn, mm); 709 mmput(mm); 710 711out_free: 712 free_pasid_state(pasid_state); 713 714out: 715 put_device_state(dev_state); 716 717 return ret; 718} 719EXPORT_SYMBOL(amd_iommu_bind_pasid); 720 721void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid) 722{ 723 struct pasid_state *pasid_state; 724 struct device_state *dev_state; 725 u16 devid; 726 727 might_sleep(); 728 729 if (!amd_iommu_v2_supported()) 730 return; 731 732 devid = device_id(pdev); 733 dev_state = get_device_state(devid); 734 if (dev_state == NULL) 735 return; 736 737 if (pasid < 0 || pasid >= dev_state->max_pasids) 738 goto out; 739 740 pasid_state = get_pasid_state(dev_state, pasid); 741 if (pasid_state == NULL) 742 goto out; 743 /* 744 * Drop reference taken here. We are safe because we still hold 745 * the reference taken in the amd_iommu_bind_pasid function. 746 */ 747 put_pasid_state(pasid_state); 748 749 /* Clear the pasid state so that the pasid can be re-used */ 750 clear_pasid_state(dev_state, pasid_state->pasid); 751 752 /* 753 * Call mmu_notifier_unregister to drop our reference 754 * to pasid_state->mm 755 */ 756 mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); 757 758 put_pasid_state_wait(pasid_state); /* Reference taken in 759 amd_iommu_bind_pasid */ 760out: 761 /* Drop reference taken in this function */ 762 put_device_state(dev_state); 763 764 /* Drop reference taken in amd_iommu_bind_pasid */ 765 put_device_state(dev_state); 766} 767EXPORT_SYMBOL(amd_iommu_unbind_pasid); 768 769int amd_iommu_init_device(struct pci_dev *pdev, int pasids) 770{ 771 struct device_state *dev_state; 772 struct iommu_group *group; 773 unsigned long flags; 774 int ret, tmp; 775 u16 devid; 776 777 might_sleep(); 778 779 if (!amd_iommu_v2_supported()) 780 return -ENODEV; 781 782 if (pasids <= 0 || pasids > (PASID_MASK + 1)) 783 return -EINVAL; 784 785 devid = device_id(pdev); 786 787 dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL); 788 if (dev_state == NULL) 789 return -ENOMEM; 790 791 spin_lock_init(&dev_state->lock); 792 init_waitqueue_head(&dev_state->wq); 793 dev_state->pdev = pdev; 794 dev_state->devid = devid; 795 796 tmp = pasids; 797 for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) 798 dev_state->pasid_levels += 1; 799 800 atomic_set(&dev_state->count, 1); 801 dev_state->max_pasids = pasids; 802 803 ret = -ENOMEM; 804 dev_state->states = (void *)get_zeroed_page(GFP_KERNEL); 805 if (dev_state->states == NULL) 806 goto out_free_dev_state; 807 808 dev_state->domain = iommu_domain_alloc(&pci_bus_type); 809 if (dev_state->domain == NULL) 810 goto out_free_states; 811 812 amd_iommu_domain_direct_map(dev_state->domain); 813 814 ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids); 815 if (ret) 816 goto out_free_domain; 817 818 group = iommu_group_get(&pdev->dev); 819 if (!group) { 820 ret = -EINVAL; 821 goto out_free_domain; 822 } 823 824 ret = iommu_attach_group(dev_state->domain, group); 825 if (ret != 0) 826 goto out_drop_group; 827 828 iommu_group_put(group); 829 830 spin_lock_irqsave(&state_lock, flags); 831 832 if (__get_device_state(devid) != NULL) { 833 spin_unlock_irqrestore(&state_lock, flags); 834 ret = -EBUSY; 835 goto out_free_domain; 836 } 837 838 list_add_tail(&dev_state->list, &state_list); 839 840 spin_unlock_irqrestore(&state_lock, flags); 841 842 return 0; 843 844out_drop_group: 845 iommu_group_put(group); 846 847out_free_domain: 848 iommu_domain_free(dev_state->domain); 849 850out_free_states: 851 free_page((unsigned long)dev_state->states); 852 853out_free_dev_state: 854 kfree(dev_state); 855 856 return ret; 857} 858EXPORT_SYMBOL(amd_iommu_init_device); 859 860void amd_iommu_free_device(struct pci_dev *pdev) 861{ 862 struct device_state *dev_state; 863 unsigned long flags; 864 u16 devid; 865 866 if (!amd_iommu_v2_supported()) 867 return; 868 869 devid = device_id(pdev); 870 871 spin_lock_irqsave(&state_lock, flags); 872 873 dev_state = __get_device_state(devid); 874 if (dev_state == NULL) { 875 spin_unlock_irqrestore(&state_lock, flags); 876 return; 877 } 878 879 list_del(&dev_state->list); 880 881 spin_unlock_irqrestore(&state_lock, flags); 882 883 /* Get rid of any remaining pasid states */ 884 free_pasid_states(dev_state); 885 886 put_device_state(dev_state); 887 /* 888 * Wait until the last reference is dropped before freeing 889 * the device state. 890 */ 891 wait_event(dev_state->wq, !atomic_read(&dev_state->count)); 892 free_device_state(dev_state); 893} 894EXPORT_SYMBOL(amd_iommu_free_device); 895 896int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, 897 amd_iommu_invalid_ppr_cb cb) 898{ 899 struct device_state *dev_state; 900 unsigned long flags; 901 u16 devid; 902 int ret; 903 904 if (!amd_iommu_v2_supported()) 905 return -ENODEV; 906 907 devid = device_id(pdev); 908 909 spin_lock_irqsave(&state_lock, flags); 910 911 ret = -EINVAL; 912 dev_state = __get_device_state(devid); 913 if (dev_state == NULL) 914 goto out_unlock; 915 916 dev_state->inv_ppr_cb = cb; 917 918 ret = 0; 919 920out_unlock: 921 spin_unlock_irqrestore(&state_lock, flags); 922 923 return ret; 924} 925EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb); 926 927int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, 928 amd_iommu_invalidate_ctx cb) 929{ 930 struct device_state *dev_state; 931 unsigned long flags; 932 u16 devid; 933 int ret; 934 935 if (!amd_iommu_v2_supported()) 936 return -ENODEV; 937 938 devid = device_id(pdev); 939 940 spin_lock_irqsave(&state_lock, flags); 941 942 ret = -EINVAL; 943 dev_state = __get_device_state(devid); 944 if (dev_state == NULL) 945 goto out_unlock; 946 947 dev_state->inv_ctx_cb = cb; 948 949 ret = 0; 950 951out_unlock: 952 spin_unlock_irqrestore(&state_lock, flags); 953 954 return ret; 955} 956EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb); 957 958static int __init amd_iommu_v2_init(void) 959{ 960 int ret; 961 962 pr_info("AMD IOMMUv2 driver by Joerg Roedel <jroedel@suse.de>\n"); 963 964 if (!amd_iommu_v2_supported()) { 965 pr_info("AMD IOMMUv2 functionality not available on this system\n"); 966 /* 967 * Load anyway to provide the symbols to other modules 968 * which may use AMD IOMMUv2 optionally. 969 */ 970 return 0; 971 } 972 973 spin_lock_init(&state_lock); 974 975 ret = -ENOMEM; 976 iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0); 977 if (iommu_wq == NULL) 978 goto out; 979 980 amd_iommu_register_ppr_notifier(&ppr_nb); 981 982 return 0; 983 984out: 985 return ret; 986} 987 988static void __exit amd_iommu_v2_exit(void) 989{ 990 struct device_state *dev_state; 991 int i; 992 993 if (!amd_iommu_v2_supported()) 994 return; 995 996 amd_iommu_unregister_ppr_notifier(&ppr_nb); 997 998 flush_workqueue(iommu_wq); 999 1000 /* 1001 * The loop below might call flush_workqueue(), so call 1002 * destroy_workqueue() after it 1003 */ 1004 for (i = 0; i < MAX_DEVICES; ++i) { 1005 dev_state = get_device_state(i); 1006 1007 if (dev_state == NULL) 1008 continue; 1009 1010 WARN_ON_ONCE(1); 1011 1012 put_device_state(dev_state); 1013 amd_iommu_free_device(dev_state->pdev); 1014 } 1015 1016 destroy_workqueue(iommu_wq); 1017} 1018 1019module_init(amd_iommu_v2_init); 1020module_exit(amd_iommu_v2_exit);