Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 689 lines 16 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * s390 kvm PCI passthrough support 4 * 5 * Copyright IBM Corp. 2022 6 * 7 * Author(s): Matthew Rosato <mjrosato@linux.ibm.com> 8 */ 9 10#include <linux/kvm_host.h> 11#include <linux/pci.h> 12#include <asm/pci.h> 13#include <asm/pci_insn.h> 14#include <asm/pci_io.h> 15#include <asm/sclp.h> 16#include "pci.h" 17#include "kvm-s390.h" 18 19struct zpci_aift *aift; 20 21static inline int __set_irq_noiib(u16 ctl, u8 isc) 22{ 23 union zpci_sic_iib iib = {{0}}; 24 25 return zpci_set_irq_ctrl(ctl, isc, &iib); 26} 27 28void kvm_s390_pci_aen_exit(void) 29{ 30 unsigned long flags; 31 struct kvm_zdev **gait_kzdev; 32 33 lockdep_assert_held(&aift->aift_lock); 34 35 /* 36 * Contents of the aipb remain registered for the life of the host 37 * kernel, the information preserved in zpci_aipb and zpci_aif_sbv 38 * in case we insert the KVM module again later. Clear the AIFT 39 * information and free anything not registered with underlying 40 * firmware. 41 */ 42 spin_lock_irqsave(&aift->gait_lock, flags); 43 gait_kzdev = aift->kzdev; 44 aift->gait = NULL; 45 aift->sbv = NULL; 46 aift->kzdev = NULL; 47 spin_unlock_irqrestore(&aift->gait_lock, flags); 48 49 kfree(gait_kzdev); 50} 51 52static int zpci_setup_aipb(u8 nisc) 53{ 54 struct page *page; 55 int size, rc; 56 57 zpci_aipb = kzalloc_obj(union zpci_sic_iib); 58 if (!zpci_aipb) 59 return -ENOMEM; 60 61 aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL); 62 if (!aift->sbv) { 63 rc = -ENOMEM; 64 goto free_aipb; 65 } 66 zpci_aif_sbv = aift->sbv; 67 size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES * 68 sizeof(struct zpci_gaite))); 69 page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size); 70 if (!page) { 71 rc = -ENOMEM; 72 goto free_sbv; 73 } 74 aift->gait = (struct zpci_gaite *)page_to_virt(page); 75 76 zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector); 77 zpci_aipb->aipb.gait = virt_to_phys(aift->gait); 78 zpci_aipb->aipb.afi = nisc; 79 zpci_aipb->aipb.faal = ZPCI_NR_DEVICES; 80 81 /* Setup Adapter Event Notification Interpretation */ 82 if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) { 83 rc = -EIO; 84 goto free_gait; 85 } 86 87 return 0; 88 89free_gait: 90 free_pages((unsigned long)aift->gait, size); 91free_sbv: 92 airq_iv_release(aift->sbv); 93 zpci_aif_sbv = NULL; 94free_aipb: 95 kfree(zpci_aipb); 96 zpci_aipb = NULL; 97 98 return rc; 99} 100 101static int zpci_reset_aipb(u8 nisc) 102{ 103 /* 104 * AEN registration can only happen once per system boot. If 105 * an aipb already exists then AEN was already registered and 106 * we can reuse the aipb contents. This can only happen if 107 * the KVM module was removed and re-inserted. However, we must 108 * ensure that the same forwarding ISC is used as this is assigned 109 * during KVM module load. 110 */ 111 if (zpci_aipb->aipb.afi != nisc) 112 return -EINVAL; 113 114 aift->sbv = zpci_aif_sbv; 115 aift->gait = phys_to_virt(zpci_aipb->aipb.gait); 116 117 return 0; 118} 119 120int kvm_s390_pci_aen_init(u8 nisc) 121{ 122 int rc = 0; 123 124 /* If already enabled for AEN, bail out now */ 125 if (aift->gait || aift->sbv) 126 return -EPERM; 127 128 mutex_lock(&aift->aift_lock); 129 aift->kzdev = kzalloc_objs(struct kvm_zdev *, ZPCI_NR_DEVICES); 130 if (!aift->kzdev) { 131 rc = -ENOMEM; 132 goto unlock; 133 } 134 135 if (!zpci_aipb) 136 rc = zpci_setup_aipb(nisc); 137 else 138 rc = zpci_reset_aipb(nisc); 139 if (rc) 140 goto free_zdev; 141 142 /* Enable floating IRQs */ 143 if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) { 144 rc = -EIO; 145 kvm_s390_pci_aen_exit(); 146 } 147 148 goto unlock; 149 150free_zdev: 151 kfree(aift->kzdev); 152unlock: 153 mutex_unlock(&aift->aift_lock); 154 return rc; 155} 156 157/* Modify PCI: Register floating adapter interruption forwarding */ 158static int kvm_zpci_set_airq(struct zpci_dev *zdev) 159{ 160 u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); 161 struct zpci_fib fib = {}; 162 u8 status; 163 164 fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc; 165 fib.fmt0.sum = 1; /* enable summary notifications */ 166 fib.fmt0.noi = airq_iv_end(zdev->aibv); 167 fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector); 168 fib.fmt0.aibvo = 0; 169 fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8); 170 fib.fmt0.aisbo = zdev->aisb & 63; 171 fib.gd = zdev->gisa; 172 173 return zpci_mod_fc(req, &fib, &status) ? -EIO : 0; 174} 175 176/* Modify PCI: Unregister floating adapter interruption forwarding */ 177static int kvm_zpci_clear_airq(struct zpci_dev *zdev) 178{ 179 u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT); 180 struct zpci_fib fib = {}; 181 u8 cc, status; 182 183 fib.gd = zdev->gisa; 184 185 cc = zpci_mod_fc(req, &fib, &status); 186 if (cc == 3 || (cc == 1 && status == 24)) 187 /* Function already gone or IRQs already deregistered. */ 188 cc = 0; 189 190 return cc ? -EIO : 0; 191} 192 193static inline void unaccount_mem(unsigned long nr_pages) 194{ 195 struct user_struct *user = get_uid(current_user()); 196 197 if (user) 198 atomic_long_sub(nr_pages, &user->locked_vm); 199 if (current->mm) 200 atomic64_sub(nr_pages, &current->mm->pinned_vm); 201} 202 203static inline int account_mem(unsigned long nr_pages) 204{ 205 struct user_struct *user = get_uid(current_user()); 206 unsigned long page_limit, cur_pages, new_pages; 207 208 page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 209 210 cur_pages = atomic_long_read(&user->locked_vm); 211 do { 212 new_pages = cur_pages + nr_pages; 213 if (new_pages > page_limit) 214 return -ENOMEM; 215 } while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages)); 216 217 atomic64_add(nr_pages, &current->mm->pinned_vm); 218 219 return 0; 220} 221 222static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib, 223 bool assist) 224{ 225 struct page *pages[1], *aibv_page, *aisb_page = NULL; 226 unsigned int msi_vecs, idx; 227 struct zpci_gaite *gaite; 228 unsigned long hva, bit; 229 struct kvm *kvm; 230 phys_addr_t gaddr; 231 int rc = 0, gisc, npages, pcount = 0; 232 233 /* 234 * Interrupt forwarding is only applicable if the device is already 235 * enabled for interpretation 236 */ 237 if (zdev->gisa == 0) 238 return -EINVAL; 239 240 kvm = zdev->kzdev->kvm; 241 msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi); 242 243 /* Get the associated forwarding ISC - if invalid, return the error */ 244 gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc); 245 if (gisc < 0) 246 return gisc; 247 248 /* Replace AIBV address */ 249 idx = srcu_read_lock(&kvm->srcu); 250 hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv)); 251 npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages); 252 srcu_read_unlock(&kvm->srcu, idx); 253 if (npages < 1) { 254 rc = -EIO; 255 goto out; 256 } 257 aibv_page = pages[0]; 258 pcount++; 259 gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK); 260 fib->fmt0.aibv = gaddr; 261 262 /* Pin the guest AISB if one was specified */ 263 if (fib->fmt0.sum == 1) { 264 idx = srcu_read_lock(&kvm->srcu); 265 hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb)); 266 npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, 267 pages); 268 srcu_read_unlock(&kvm->srcu, idx); 269 if (npages < 1) { 270 rc = -EIO; 271 goto unpin1; 272 } 273 aisb_page = pages[0]; 274 pcount++; 275 } 276 277 /* Account for pinned pages, roll back on failure */ 278 if (account_mem(pcount)) 279 goto unpin2; 280 281 /* AISB must be allocated before we can fill in GAITE */ 282 mutex_lock(&aift->aift_lock); 283 bit = airq_iv_alloc_bit(aift->sbv); 284 if (bit == -1UL) 285 goto unlock; 286 zdev->aisb = bit; /* store the summary bit number */ 287 zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | 288 AIRQ_IV_BITLOCK | 289 AIRQ_IV_GUESTVEC, 290 phys_to_virt(fib->fmt0.aibv)); 291 292 spin_lock_irq(&aift->gait_lock); 293 gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb * 294 sizeof(struct zpci_gaite)); 295 296 /* If assist not requested, host will get all alerts */ 297 if (assist) 298 gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); 299 else 300 gaite->gisa = 0; 301 302 gaite->gisc = fib->fmt0.isc; 303 gaite->count++; 304 gaite->aisbo = fib->fmt0.aisbo; 305 gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb & 306 ~PAGE_MASK)); 307 aift->kzdev[zdev->aisb] = zdev->kzdev; 308 spin_unlock_irq(&aift->gait_lock); 309 310 /* Update guest FIB for re-issue */ 311 fib->fmt0.aisbo = zdev->aisb & 63; 312 fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8); 313 fib->fmt0.isc = gisc; 314 315 /* Save some guest fib values in the host for later use */ 316 zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc; 317 zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv; 318 mutex_unlock(&aift->aift_lock); 319 320 /* Issue the clp to setup the irq now */ 321 rc = kvm_zpci_set_airq(zdev); 322 return rc; 323 324unlock: 325 mutex_unlock(&aift->aift_lock); 326unpin2: 327 if (fib->fmt0.sum == 1) 328 unpin_user_page(aisb_page); 329unpin1: 330 unpin_user_page(aibv_page); 331out: 332 return rc; 333} 334 335static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force) 336{ 337 struct kvm_zdev *kzdev = zdev->kzdev; 338 struct zpci_gaite *gaite; 339 struct page *vpage = NULL, *spage = NULL; 340 int rc, pcount = 0; 341 u8 isc; 342 343 if (zdev->gisa == 0) 344 return -EINVAL; 345 346 mutex_lock(&aift->aift_lock); 347 348 /* 349 * If the clear fails due to an error, leave now unless we know this 350 * device is about to go away (force) -- In that case clear the GAITE 351 * regardless. 352 */ 353 rc = kvm_zpci_clear_airq(zdev); 354 if (rc && !force) 355 goto out; 356 357 if (zdev->kzdev->fib.fmt0.aibv == 0) 358 goto out; 359 spin_lock_irq(&aift->gait_lock); 360 gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb * 361 sizeof(struct zpci_gaite)); 362 isc = gaite->gisc; 363 gaite->count--; 364 if (gaite->count == 0) { 365 /* Release guest AIBV and AISB */ 366 vpage = phys_to_page(kzdev->fib.fmt0.aibv); 367 if (gaite->aisb != 0) 368 spage = phys_to_page(gaite->aisb); 369 /* Clear the GAIT entry */ 370 gaite->aisb = 0; 371 gaite->gisc = 0; 372 gaite->aisbo = 0; 373 gaite->gisa = 0; 374 aift->kzdev[zdev->aisb] = NULL; 375 /* Clear zdev info */ 376 airq_iv_free_bit(aift->sbv, zdev->aisb); 377 airq_iv_release(zdev->aibv); 378 zdev->aisb = 0; 379 zdev->aibv = NULL; 380 } 381 spin_unlock_irq(&aift->gait_lock); 382 kvm_s390_gisc_unregister(kzdev->kvm, isc); 383 kzdev->fib.fmt0.isc = 0; 384 kzdev->fib.fmt0.aibv = 0; 385 386 if (vpage) { 387 unpin_user_page(vpage); 388 pcount++; 389 } 390 if (spage) { 391 unpin_user_page(spage); 392 pcount++; 393 } 394 if (pcount > 0) 395 unaccount_mem(pcount); 396out: 397 mutex_unlock(&aift->aift_lock); 398 399 return rc; 400} 401 402static int kvm_s390_pci_dev_open(struct zpci_dev *zdev) 403{ 404 struct kvm_zdev *kzdev; 405 406 kzdev = kzalloc_obj(struct kvm_zdev); 407 if (!kzdev) 408 return -ENOMEM; 409 410 kzdev->zdev = zdev; 411 zdev->kzdev = kzdev; 412 413 return 0; 414} 415 416static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) 417{ 418 struct kvm_zdev *kzdev; 419 420 kzdev = zdev->kzdev; 421 WARN_ON(kzdev->zdev != zdev); 422 zdev->kzdev = NULL; 423 kfree(kzdev); 424} 425 426 427/* 428 * Register device with the specified KVM. If interpretation facilities are 429 * available, enable them and let userspace indicate whether or not they will 430 * be used (specify SHM bit to disable). 431 */ 432static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) 433{ 434 struct zpci_dev *zdev = opaque; 435 int rc; 436 437 if (!zdev) 438 return -EINVAL; 439 440 mutex_lock(&zdev->kzdev_lock); 441 442 if (zdev->kzdev || zdev->gisa != 0 || !kvm) { 443 mutex_unlock(&zdev->kzdev_lock); 444 return -EINVAL; 445 } 446 447 kvm_get_kvm(kvm); 448 449 mutex_lock(&kvm->lock); 450 451 rc = kvm_s390_pci_dev_open(zdev); 452 if (rc) 453 goto err; 454 455 /* 456 * If interpretation facilities aren't available, add the device to 457 * the kzdev list but don't enable for interpretation. 458 */ 459 if (!kvm_s390_pci_interp_allowed()) 460 goto out; 461 462 /* 463 * If this is the first request to use an interpreted device, make the 464 * necessary vcpu changes 465 */ 466 if (!kvm->arch.use_zpci_interp) 467 kvm_s390_vcpu_pci_enable_interp(kvm); 468 469 if (zdev_enabled(zdev)) { 470 rc = zpci_disable_device(zdev); 471 if (rc) 472 goto err; 473 } 474 475 /* 476 * Store information about the identity of the kvm guest allowed to 477 * access this device via interpretation to be used by host CLP 478 */ 479 zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); 480 481 rc = zpci_reenable_device(zdev); 482 if (rc) 483 goto clear_gisa; 484 485out: 486 zdev->kzdev->kvm = kvm; 487 488 spin_lock(&kvm->arch.kzdev_list_lock); 489 list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list); 490 spin_unlock(&kvm->arch.kzdev_list_lock); 491 492 mutex_unlock(&kvm->lock); 493 mutex_unlock(&zdev->kzdev_lock); 494 return 0; 495 496clear_gisa: 497 zdev->gisa = 0; 498err: 499 if (zdev->kzdev) 500 kvm_s390_pci_dev_release(zdev); 501 mutex_unlock(&kvm->lock); 502 mutex_unlock(&zdev->kzdev_lock); 503 kvm_put_kvm(kvm); 504 return rc; 505} 506 507static void kvm_s390_pci_unregister_kvm(void *opaque) 508{ 509 struct zpci_dev *zdev = opaque; 510 struct kvm *kvm; 511 512 if (!zdev) 513 return; 514 515 mutex_lock(&zdev->kzdev_lock); 516 517 if (WARN_ON(!zdev->kzdev)) { 518 mutex_unlock(&zdev->kzdev_lock); 519 return; 520 } 521 522 kvm = zdev->kzdev->kvm; 523 mutex_lock(&kvm->lock); 524 525 /* 526 * A 0 gisa means interpretation was never enabled, just remove the 527 * device from the list. 528 */ 529 if (zdev->gisa == 0) 530 goto out; 531 532 /* Forwarding must be turned off before interpretation */ 533 if (zdev->kzdev->fib.fmt0.aibv != 0) 534 kvm_s390_pci_aif_disable(zdev, true); 535 536 /* Remove the host CLP guest designation */ 537 zdev->gisa = 0; 538 539 if (zdev_enabled(zdev)) { 540 if (zpci_disable_device(zdev)) 541 goto out; 542 } 543 544 zpci_reenable_device(zdev); 545 546out: 547 spin_lock(&kvm->arch.kzdev_list_lock); 548 list_del(&zdev->kzdev->entry); 549 spin_unlock(&kvm->arch.kzdev_list_lock); 550 kvm_s390_pci_dev_release(zdev); 551 552 mutex_unlock(&kvm->lock); 553 mutex_unlock(&zdev->kzdev_lock); 554 555 kvm_put_kvm(kvm); 556} 557 558void kvm_s390_pci_init_list(struct kvm *kvm) 559{ 560 spin_lock_init(&kvm->arch.kzdev_list_lock); 561 INIT_LIST_HEAD(&kvm->arch.kzdev_list); 562} 563 564void kvm_s390_pci_clear_list(struct kvm *kvm) 565{ 566 /* 567 * This list should already be empty, either via vfio device closures 568 * or kvm fd cleanup. 569 */ 570 spin_lock(&kvm->arch.kzdev_list_lock); 571 WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list)); 572 spin_unlock(&kvm->arch.kzdev_list_lock); 573} 574 575static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh) 576{ 577 struct zpci_dev *zdev = NULL; 578 struct kvm_zdev *kzdev; 579 580 spin_lock(&kvm->arch.kzdev_list_lock); 581 list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) { 582 if (kzdev->zdev->fh == fh) { 583 zdev = kzdev->zdev; 584 break; 585 } 586 } 587 spin_unlock(&kvm->arch.kzdev_list_lock); 588 589 return zdev; 590} 591 592static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev, 593 struct kvm_s390_zpci_op *args) 594{ 595 struct zpci_fib fib = {}; 596 bool hostflag; 597 598 fib.fmt0.aibv = args->u.reg_aen.ibv; 599 fib.fmt0.isc = args->u.reg_aen.isc; 600 fib.fmt0.noi = args->u.reg_aen.noi; 601 if (args->u.reg_aen.sb != 0) { 602 fib.fmt0.aisb = args->u.reg_aen.sb; 603 fib.fmt0.aisbo = args->u.reg_aen.sbo; 604 fib.fmt0.sum = 1; 605 } else { 606 fib.fmt0.aisb = 0; 607 fib.fmt0.aisbo = 0; 608 fib.fmt0.sum = 0; 609 } 610 611 hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST); 612 return kvm_s390_pci_aif_enable(zdev, &fib, hostflag); 613} 614 615int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args) 616{ 617 struct kvm_zdev *kzdev; 618 struct zpci_dev *zdev; 619 int r; 620 621 zdev = get_zdev_from_kvm_by_fh(kvm, args->fh); 622 if (!zdev) 623 return -ENODEV; 624 625 mutex_lock(&zdev->kzdev_lock); 626 mutex_lock(&kvm->lock); 627 628 kzdev = zdev->kzdev; 629 if (!kzdev) { 630 r = -ENODEV; 631 goto out; 632 } 633 if (kzdev->kvm != kvm) { 634 r = -EPERM; 635 goto out; 636 } 637 638 switch (args->op) { 639 case KVM_S390_ZPCIOP_REG_AEN: 640 /* Fail on unknown flags */ 641 if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) { 642 r = -EINVAL; 643 break; 644 } 645 r = kvm_s390_pci_zpci_reg_aen(zdev, args); 646 break; 647 case KVM_S390_ZPCIOP_DEREG_AEN: 648 r = kvm_s390_pci_aif_disable(zdev, false); 649 break; 650 default: 651 r = -EINVAL; 652 } 653 654out: 655 mutex_unlock(&kvm->lock); 656 mutex_unlock(&zdev->kzdev_lock); 657 return r; 658} 659 660int __init kvm_s390_pci_init(void) 661{ 662 zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm; 663 zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm; 664 665 if (!kvm_s390_pci_interp_allowed()) 666 return 0; 667 668 aift = kzalloc_obj(struct zpci_aift); 669 if (!aift) 670 return -ENOMEM; 671 672 spin_lock_init(&aift->gait_lock); 673 mutex_init(&aift->aift_lock); 674 675 return 0; 676} 677 678void kvm_s390_pci_exit(void) 679{ 680 zpci_kvm_hook.kvm_register = NULL; 681 zpci_kvm_hook.kvm_unregister = NULL; 682 683 if (!kvm_s390_pci_interp_allowed()) 684 return; 685 686 mutex_destroy(&aift->aift_lock); 687 688 kfree(aift); 689}