Merge branches 'v5.15/vfio/spdx-license-cleanups', 'v5.15/vfio/dma-valid-waited-v3', 'v5.15/vfio/vfio-pci-core-v5' and 'v5.15/vfio/vfio-ap' into v5.15/vfio/next
···103103 - subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF)104104 - class and classmask fields default to 0105105 - driver_data defaults to 0UL.106106+ - override_only field defaults to 0.106107107108Note that driver_data must match the value used by any of the pci_device_id108109entries defined in the driver. This makes the driver_data field mandatory
···25592559 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;25602560}2561256125622562+/*25632563+ * kvm_arch_crypto_set_masks25642564+ *25652565+ * @kvm: pointer to the target guest's KVM struct containing the crypto masks25662566+ * to be set.25672567+ * @apm: the mask identifying the accessible AP adapters25682568+ * @aqm: the mask identifying the accessible AP domains25692569+ * @adm: the mask identifying the accessible AP control domains25702570+ *25712571+ * Set the masks that identify the adapters, domains and control domains to25722572+ * which the KVM guest is granted access.25732573+ *25742574+ * Note: The kvm->lock mutex must be locked by the caller before invoking this25752575+ * function.25762576+ */25622577void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,25632578 unsigned long *aqm, unsigned long *adm)25642579{25652580 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;2566258125672567- mutex_lock(&kvm->lock);25682582 kvm_s390_vcpu_block_all(kvm);2569258325702584 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {···26092595 /* recreate the shadow crycb for each vcpu */26102596 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);26112597 kvm_s390_vcpu_unblock_all(kvm);26122612- mutex_unlock(&kvm->lock);26132598}26142599EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);2615260026012601+/*26022602+ * kvm_arch_crypto_clear_masks26032603+ *26042604+ * @kvm: pointer to the target guest's KVM struct containing the crypto masks26052605+ * to be cleared.26062606+ *26072607+ * Clear the masks that identify the adapters, domains and control domains to26082608+ * which the KVM guest is granted access.26092609+ *26102610+ * Note: The kvm->lock mutex must be locked by the caller before invoking this26112611+ * function.26122612+ */26162613void kvm_arch_crypto_clear_masks(struct kvm *kvm)26172614{26182618- mutex_lock(&kvm->lock);26192615 kvm_s390_vcpu_block_all(kvm);2620261626212617 memset(&kvm->arch.crypto.crycb->apcb0, 0,···26372613 /* recreate the shadow crycb for each vcpu */26382614 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);26392615 kvm_s390_vcpu_unblock_all(kvm);26402640- mutex_unlock(&kvm->lock);26412616}26422617EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);26432618···26532630{26542631 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;26552632 kvm_s390_set_crycb_format(kvm);26332633+ init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);2656263426572635 if (!test_kvm_facility(kvm, 76))26582636 return;
+9-6
arch/s390/kvm/priv.c
···610610static int handle_pqap(struct kvm_vcpu *vcpu)611611{612612 struct ap_queue_status status = {};613613+ crypto_hook pqap_hook;613614 unsigned long reg0;614615 int ret;615616 uint8_t fc;···655654 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);656655657656 /*658658- * Verify that the hook callback is registered, lock the owner659659- * and call the hook.657657+ * If the hook callback is registered, there will be a pointer to the658658+ * hook function pointer in the kvm_s390_crypto structure. Lock the659659+ * owner, retrieve the hook function pointer and call the hook.660660 */661661+ down_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);661662 if (vcpu->kvm->arch.crypto.pqap_hook) {662662- if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner))663663- return -EOPNOTSUPP;664664- ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu);665665- module_put(vcpu->kvm->arch.crypto.pqap_hook->owner);663663+ pqap_hook = *vcpu->kvm->arch.crypto.pqap_hook;664664+ ret = pqap_hook(vcpu);666665 if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)667666 kvm_s390_set_psw_cc(vcpu, 3);667667+ up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);668668 return ret;669669 }670670+ up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);670671 /*671672 * A vfio_driver must register a hook.672673 * No hook means no driver to enable the SIE CRYCB and no queues.
+21-7
drivers/pci/pci-driver.c
···136136 struct pci_dev *dev)137137{138138 struct pci_dynid *dynid;139139- const struct pci_device_id *found_id = NULL;139139+ const struct pci_device_id *found_id = NULL, *ids;140140141141 /* When driver_override is set, only bind to the matching driver */142142 if (dev->driver_override && strcmp(dev->driver_override, drv->name))···152152 }153153 spin_unlock(&drv->dynids.lock);154154155155- if (!found_id)156156- found_id = pci_match_id(drv->id_table, dev);155155+ if (found_id)156156+ return found_id;157157+158158+ for (ids = drv->id_table; (found_id = pci_match_id(ids, dev));159159+ ids = found_id + 1) {160160+ /*161161+ * The match table is split based on driver_override.162162+ * In case override_only was set, enforce driver_override163163+ * matching.164164+ */165165+ if (found_id->override_only) {166166+ if (dev->driver_override)167167+ return found_id;168168+ } else {169169+ return found_id;170170+ }171171+ }157172158173 /* driver_override will always match, send a dummy id */159159- if (!found_id && dev->driver_override)160160- found_id = &pci_device_id_any;161161-162162- return found_id;174174+ if (dev->driver_override)175175+ return &pci_device_id_any;176176+ return NULL;163177}164178165179/**
+132-150
drivers/s390/crypto/vfio_ap_ops.c
···2424#define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"2525#define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"26262727-static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev);2727+static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev);2828static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);2929+static const struct vfio_device_ops vfio_ap_matrix_dev_ops;29303031static int match_apqn(struct device *dev, const void *data)3132{···295294 matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,296295 struct ap_matrix_mdev, pqap_hook);297296298298- /*299299- * If the KVM pointer is in the process of being set, wait until the300300- * process has completed.301301- */302302- wait_event_cmd(matrix_mdev->wait_for_kvm,303303- !matrix_mdev->kvm_busy,304304- mutex_unlock(&matrix_dev->lock),305305- mutex_lock(&matrix_dev->lock));306306-307297 /* If the there is no guest using the mdev, there is nothing to do */308298 if (!matrix_mdev->kvm)309299 goto out_unlock;···327335 matrix->adm_max = info->apxa ? info->Nd : 15;328336}329337330330-static int vfio_ap_mdev_create(struct mdev_device *mdev)338338+static int vfio_ap_mdev_probe(struct mdev_device *mdev)331339{332340 struct ap_matrix_mdev *matrix_mdev;341341+ int ret;333342334343 if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))335344 return -EPERM;336345337346 matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL);338347 if (!matrix_mdev) {339339- atomic_inc(&matrix_dev->available_instances);340340- return -ENOMEM;348348+ ret = -ENOMEM;349349+ goto err_dec_available;341350 }351351+ vfio_init_group_dev(&matrix_mdev->vdev, &mdev->dev,352352+ &vfio_ap_matrix_dev_ops);342353343354 matrix_mdev->mdev = mdev;344355 vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);345345- init_waitqueue_head(&matrix_mdev->wait_for_kvm);346346- mdev_set_drvdata(mdev, matrix_mdev);347347- matrix_mdev->pqap_hook.hook = handle_pqap;348348- matrix_mdev->pqap_hook.owner = THIS_MODULE;356356+ matrix_mdev->pqap_hook = handle_pqap;349357 mutex_lock(&matrix_dev->lock);350358 list_add(&matrix_mdev->node, &matrix_dev->mdev_list);351359 mutex_unlock(&matrix_dev->lock);352360361361+ ret = vfio_register_group_dev(&matrix_mdev->vdev);362362+ if (ret)363363+ goto err_list;364364+ dev_set_drvdata(&mdev->dev, matrix_mdev);353365 return 0;366366+367367+err_list:368368+ mutex_lock(&matrix_dev->lock);369369+ list_del(&matrix_mdev->node);370370+ mutex_unlock(&matrix_dev->lock);371371+ kfree(matrix_mdev);372372+err_dec_available:373373+ atomic_inc(&matrix_dev->available_instances);374374+ return ret;354375}355376356356-static int vfio_ap_mdev_remove(struct mdev_device *mdev)377377+static void vfio_ap_mdev_remove(struct mdev_device *mdev)357378{358358- struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);379379+ struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev);380380+381381+ vfio_unregister_group_dev(&matrix_mdev->vdev);359382360383 mutex_lock(&matrix_dev->lock);361361- vfio_ap_mdev_reset_queues(mdev);384384+ vfio_ap_mdev_reset_queues(matrix_mdev);362385 list_del(&matrix_mdev->node);363386 kfree(matrix_mdev);364364- mdev_set_drvdata(mdev, NULL);365387 atomic_inc(&matrix_dev->available_instances);366388 mutex_unlock(&matrix_dev->lock);367367-368368- return 0;369389}370390371391static ssize_t name_show(struct mdev_type *mtype,···619615{620616 int ret;621617 unsigned long apid;622622- struct mdev_device *mdev = mdev_from_dev(dev);623623- struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);618618+ struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);624619625620 mutex_lock(&matrix_dev->lock);626621627627- /*628628- * If the KVM pointer is in flux or the guest is running, disallow629629- * un-assignment of adapter630630- */631631- if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {622622+ /* If the KVM guest is running, disallow assignment of adapter */623623+ if (matrix_mdev->kvm) {632624 ret = -EBUSY;633625 goto done;634626 }···688688{689689 int ret;690690 unsigned long apid;691691- struct mdev_device *mdev = mdev_from_dev(dev);692692- struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);691691+ struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);693692694693 mutex_lock(&matrix_dev->lock);695694696696- /*697697- * If the KVM pointer is in flux or the guest is running, disallow698698- * un-assignment of adapter699699- */700700- if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {695695+ /* If the KVM guest is running, disallow unassignment of adapter */696696+ if (matrix_mdev->kvm) {701697 ret = -EBUSY;702698 goto done;703699 }···773777{774778 int ret;775779 unsigned long apqi;776776- struct mdev_device *mdev = mdev_from_dev(dev);777777- struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);780780+ struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);778781 unsigned long max_apqi = matrix_mdev->matrix.aqm_max;779782780783 mutex_lock(&matrix_dev->lock);781784782782- /*783783- * If the KVM pointer is in flux or the guest is running, disallow784784- * assignment of domain785785- */786786- if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {785785+ /* If the KVM guest is running, disallow assignment of domain */786786+ if (matrix_mdev->kvm) {787787 ret = -EBUSY;788788 goto done;789789 }···838846{839847 int ret;840848 unsigned long apqi;841841- struct mdev_device *mdev = mdev_from_dev(dev);842842- struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);849849+ struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);843850844851 mutex_lock(&matrix_dev->lock);845852846846- /*847847- * If the KVM pointer is in flux or the guest is running, disallow848848- * un-assignment of domain849849- */850850- if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {853853+ /* If the KVM guest is running, disallow unassignment of domain */854854+ if (matrix_mdev->kvm) {851855 ret = -EBUSY;852856 goto done;853857 }···888900{889901 int ret;890902 unsigned long id;891891- struct mdev_device *mdev = mdev_from_dev(dev);892892- struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);903903+ struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);893904894905 mutex_lock(&matrix_dev->lock);895906896896- /*897897- * If the KVM pointer is in flux or the guest is running, disallow898898- * assignment of control domain.899899- */900900- if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {907907+ /* If the KVM guest is running, disallow assignment of control domain */908908+ if (matrix_mdev->kvm) {901909 ret = -EBUSY;902910 goto done;903911 }···942958{943959 int ret;944960 unsigned long domid;945945- struct mdev_device *mdev = mdev_from_dev(dev);946946- struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);961961+ struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);947962 unsigned long max_domid = matrix_mdev->matrix.adm_max;948963949964 mutex_lock(&matrix_dev->lock);950965951951- /*952952- * If the KVM pointer is in flux or the guest is running, disallow953953- * un-assignment of control domain.954954- */955955- if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {966966+ /* If a KVM guest is running, disallow unassignment of control domain */967967+ if (matrix_mdev->kvm) {956968 ret = -EBUSY;957969 goto done;958970 }···977997 int nchars = 0;978998 int n;979999 char *bufpos = buf;980980- struct mdev_device *mdev = mdev_from_dev(dev);981981- struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);10001000+ struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);9821001 unsigned long max_domid = matrix_mdev->matrix.adm_max;98310029841003 mutex_lock(&matrix_dev->lock);···9951016static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,9961017 char *buf)9971018{998998- struct mdev_device *mdev = mdev_from_dev(dev);999999- struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);10191019+ struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);10001020 char *bufpos = buf;10011021 unsigned long apid;10021022 unsigned long apqi;···10871109 struct ap_matrix_mdev *m;1088111010891111 if (kvm->arch.crypto.crycbd) {11121112+ down_write(&kvm->arch.crypto.pqap_hook_rwsem);11131113+ kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;11141114+ up_write(&kvm->arch.crypto.pqap_hook_rwsem);11151115+11161116+ mutex_lock(&kvm->lock);11171117+ mutex_lock(&matrix_dev->lock);11181118+10901119 list_for_each_entry(m, &matrix_dev->mdev_list, node) {10911091- if (m != matrix_mdev && m->kvm == kvm)11201120+ if (m != matrix_mdev && m->kvm == kvm) {11211121+ mutex_unlock(&kvm->lock);11221122+ mutex_unlock(&matrix_dev->lock);10921123 return -EPERM;11241124+ }10931125 }1094112610951127 kvm_get_kvm(kvm);10961096- matrix_mdev->kvm_busy = true;10971097- mutex_unlock(&matrix_dev->lock);11281128+ matrix_mdev->kvm = kvm;10981129 kvm_arch_crypto_set_masks(kvm,10991130 matrix_mdev->matrix.apm,11001131 matrix_mdev->matrix.aqm,11011132 matrix_mdev->matrix.adm);11021102- mutex_lock(&matrix_dev->lock);11031103- kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;11041104- matrix_mdev->kvm = kvm;11051105- matrix_mdev->kvm_busy = false;11061106- wake_up_all(&matrix_mdev->wait_for_kvm);11331133+11341134+ mutex_unlock(&kvm->lock);11351135+ mutex_unlock(&matrix_dev->lock);11071136 }1108113711091138 return 0;···11601175 * done under the @matrix_mdev->lock.11611176 *11621177 */11631163-static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)11781178+static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev,11791179+ struct kvm *kvm)11641180{11651165- /*11661166- * If the KVM pointer is in the process of being set, wait until the11671167- * process has completed.11681168- */11691169- wait_event_cmd(matrix_mdev->wait_for_kvm,11701170- !matrix_mdev->kvm_busy,11711171- mutex_unlock(&matrix_dev->lock),11721172- mutex_lock(&matrix_dev->lock));11811181+ if (kvm && kvm->arch.crypto.crycbd) {11821182+ down_write(&kvm->arch.crypto.pqap_hook_rwsem);11831183+ kvm->arch.crypto.pqap_hook = NULL;11841184+ up_write(&kvm->arch.crypto.pqap_hook_rwsem);1173118511741174- if (matrix_mdev->kvm) {11751175- matrix_mdev->kvm_busy = true;11761176- mutex_unlock(&matrix_dev->lock);11771177- kvm_arch_crypto_clear_masks(matrix_mdev->kvm);11861186+ mutex_lock(&kvm->lock);11781187 mutex_lock(&matrix_dev->lock);11791179- vfio_ap_mdev_reset_queues(matrix_mdev->mdev);11801180- matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;11811181- kvm_put_kvm(matrix_mdev->kvm);11881188+11891189+ kvm_arch_crypto_clear_masks(kvm);11901190+ vfio_ap_mdev_reset_queues(matrix_mdev);11911191+ kvm_put_kvm(kvm);11821192 matrix_mdev->kvm = NULL;11831183- matrix_mdev->kvm_busy = false;11841184- wake_up_all(&matrix_mdev->wait_for_kvm);11931193+11941194+ mutex_unlock(&kvm->lock);11951195+ mutex_unlock(&matrix_dev->lock);11851196 }11861197}11871198···11901209 if (action != VFIO_GROUP_NOTIFY_SET_KVM)11911210 return NOTIFY_OK;1192121111931193- mutex_lock(&matrix_dev->lock);11941212 matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);1195121311961214 if (!data)11971197- vfio_ap_mdev_unset_kvm(matrix_mdev);12151215+ vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm);11981216 else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))11991217 notify_rc = NOTIFY_DONE;12001200-12011201- mutex_unlock(&matrix_dev->lock);1202121812031219 return notify_rc;12041220}···12661288 return ret;12671289}1268129012691269-static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev)12911291+static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev)12701292{12711293 int ret;12721294 int rc = 0;12731295 unsigned long apid, apqi;12741296 struct vfio_ap_queue *q;12751275- struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);1276129712771298 for_each_set_bit_inv(apid, matrix_mdev->matrix.apm,12781299 matrix_mdev->matrix.apm_max + 1) {···12921315 return rc;12931316}1294131712951295-static int vfio_ap_mdev_open_device(struct mdev_device *mdev)13181318+static int vfio_ap_mdev_open_device(struct vfio_device *vdev)12961319{12971297- struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);13201320+ struct ap_matrix_mdev *matrix_mdev =13211321+ container_of(vdev, struct ap_matrix_mdev, vdev);12981322 unsigned long events;12991323 int ret;13001300-13011301-13021302- if (!try_module_get(THIS_MODULE))13031303- return -ENODEV;1304132413051325 matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;13061326 events = VFIO_GROUP_NOTIFY_SET_KVM;1307132713081308- ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,13281328+ ret = vfio_register_notifier(vdev->dev, VFIO_GROUP_NOTIFY,13091329 &events, &matrix_mdev->group_notifier);13101310- if (ret) {13111311- module_put(THIS_MODULE);13301330+ if (ret)13121331 return ret;13131313- }1314133213151333 matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;13161334 events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;13171317- ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,13351335+ ret = vfio_register_notifier(vdev->dev, VFIO_IOMMU_NOTIFY,13181336 &events, &matrix_mdev->iommu_notifier);13191319- if (!ret)13201320- return ret;13371337+ if (ret)13381338+ goto out_unregister_group;13391339+ return 0;1321134013221322- vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,13411341+out_unregister_group:13421342+ vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY,13231343 &matrix_mdev->group_notifier);13241324- module_put(THIS_MODULE);13251344 return ret;13261345}1327134613281328-static void vfio_ap_mdev_close_device(struct mdev_device *mdev)13471347+static void vfio_ap_mdev_close_device(struct vfio_device *vdev)13291348{13301330- struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);13491349+ struct ap_matrix_mdev *matrix_mdev =13501350+ container_of(vdev, struct ap_matrix_mdev, vdev);1331135113321332- mutex_lock(&matrix_dev->lock);13331333- vfio_ap_mdev_unset_kvm(matrix_mdev);13341334- mutex_unlock(&matrix_dev->lock);13351335-13361336- vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,13521352+ vfio_unregister_notifier(vdev->dev, VFIO_IOMMU_NOTIFY,13371353 &matrix_mdev->iommu_notifier);13381338- vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,13541354+ vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY,13391355 &matrix_mdev->group_notifier);13401340- module_put(THIS_MODULE);13561356+ vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm);13411357}1342135813431359static int vfio_ap_mdev_get_device_info(unsigned long arg)···13531383 return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;13541384}1355138513561356-static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,13861386+static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev,13571387 unsigned int cmd, unsigned long arg)13581388{13891389+ struct ap_matrix_mdev *matrix_mdev =13901390+ container_of(vdev, struct ap_matrix_mdev, vdev);13591391 int ret;13601360- struct ap_matrix_mdev *matrix_mdev;1361139213621393 mutex_lock(&matrix_dev->lock);13631394 switch (cmd) {···13661395 ret = vfio_ap_mdev_get_device_info(arg);13671396 break;13681397 case VFIO_DEVICE_RESET:13691369- matrix_mdev = mdev_get_drvdata(mdev);13701370- if (WARN(!matrix_mdev, "Driver data missing from mdev!!")) {13711371- ret = -EINVAL;13721372- break;13731373- }13741374-13751375- /*13761376- * If the KVM pointer is in the process of being set, wait until13771377- * the process has completed.13781378- */13791379- wait_event_cmd(matrix_mdev->wait_for_kvm,13801380- !matrix_mdev->kvm_busy,13811381- mutex_unlock(&matrix_dev->lock),13821382- mutex_lock(&matrix_dev->lock));13831383-13841384- ret = vfio_ap_mdev_reset_queues(mdev);13981398+ ret = vfio_ap_mdev_reset_queues(matrix_mdev);13851399 break;13861400 default:13871401 ret = -EOPNOTSUPP;···13771421 return ret;13781422}1379142314241424+static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {14251425+ .open_device = vfio_ap_mdev_open_device,14261426+ .close_device = vfio_ap_mdev_close_device,14271427+ .ioctl = vfio_ap_mdev_ioctl,14281428+};14291429+14301430+static struct mdev_driver vfio_ap_matrix_driver = {14311431+ .driver = {14321432+ .name = "vfio_ap_mdev",14331433+ .owner = THIS_MODULE,14341434+ .mod_name = KBUILD_MODNAME,14351435+ .dev_groups = vfio_ap_mdev_attr_groups,14361436+ },14371437+ .probe = vfio_ap_mdev_probe,14381438+ .remove = vfio_ap_mdev_remove,14391439+};14401440+13801441static const struct mdev_parent_ops vfio_ap_matrix_ops = {13811442 .owner = THIS_MODULE,14431443+ .device_driver = &vfio_ap_matrix_driver,13821444 .supported_type_groups = vfio_ap_mdev_type_groups,13831383- .mdev_attr_groups = vfio_ap_mdev_attr_groups,13841384- .create = vfio_ap_mdev_create,13851385- .remove = vfio_ap_mdev_remove,13861386- .open_device = vfio_ap_mdev_open_device,13871387- .close_device = vfio_ap_mdev_close_device,13881388- .ioctl = vfio_ap_mdev_ioctl,13891445};1390144613911447int vfio_ap_mdev_register(void)13921448{14491449+ int ret;14501450+13931451 atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);1394145213951395- return mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops);14531453+ ret = mdev_register_driver(&vfio_ap_matrix_driver);14541454+ if (ret)14551455+ return ret;14561456+14571457+ ret = mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops);14581458+ if (ret)14591459+ goto err_driver;14601460+ return 0;14611461+14621462+err_driver:14631463+ mdev_unregister_driver(&vfio_ap_matrix_driver);14641464+ return ret;13961465}1397146613981467void vfio_ap_mdev_unregister(void)13991468{14001469 mdev_unregister_device(&matrix_dev->device);14701470+ mdev_unregister_driver(&vfio_ap_matrix_driver);14011471}
···11# SPDX-License-Identifier: GPL-2.0-only22-config VFIO_IOMMU_TYPE133- tristate44- depends on VFIO55- default n66-77-config VFIO_IOMMU_SPAPR_TCE88- tristate99- depends on VFIO && SPAPR_TCE_IOMMU1010- default VFIO1111-1212-config VFIO_SPAPR_EEH1313- tristate1414- depends on EEH && VFIO_IOMMU_SPAPR_TCE1515- default VFIO1616-1717-config VFIO_VIRQFD1818- tristate1919- depends on VFIO && EVENTFD2020- default n2121-222menuconfig VFIO233 tristate "VFIO Non-Privileged userspace driver framework"244 select IOMMU_API···9291030 If you don't know what to do here, say N.11313232+if VFIO3333+config VFIO_IOMMU_TYPE13434+ tristate3535+ default n3636+3737+config VFIO_IOMMU_SPAPR_TCE3838+ tristate3939+ depends on SPAPR_TCE_IOMMU4040+ default VFIO4141+4242+config VFIO_SPAPR_EEH4343+ tristate4444+ depends on EEH && VFIO_IOMMU_SPAPR_TCE4545+ default VFIO4646+4747+config VFIO_VIRQFD4848+ tristate4949+ select EVENTFD5050+ default n5151+1252config VFIO_NOIOMMU1353 bool "VFIO No-IOMMU support"1414- depends on VFIO1554 help1655 VFIO is built on the ability to isolate devices using the IOMMU.1756 Only with an IOMMU can userspace access to DMA capable devices be···4748source "drivers/vfio/platform/Kconfig"4849source "drivers/vfio/mdev/Kconfig"4950source "drivers/vfio/fsl-mc/Kconfig"5151+endif5252+5053source "virt/lib/Kconfig"
+2-1
drivers/vfio/fsl-mc/Kconfig
···11config VFIO_FSL_MC22 tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices"33- depends on VFIO && FSL_MC_BUS && EVENTFD33+ depends on FSL_MC_BUS44+ select EVENTFD45 help56 Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc67 (Management Complex) devices. This is required to passthrough
-1
drivers/vfio/mdev/Kconfig
···2233config VFIO_MDEV44 tristate "Mediated device driver framework"55- depends on VFIO65 default n76 help87 Provides a framework to virtualize devices.
+22-18
drivers/vfio/pci/Kconfig
···11# SPDX-License-Identifier: GPL-2.0-only22-config VFIO_PCI33- tristate "VFIO support for PCI devices"44- depends on VFIO && PCI && EVENTFD55- depends on MMU22+if PCI && MMU33+config VFIO_PCI_CORE44+ tristate65 select VFIO_VIRQFD76 select IRQ_BYPASS_MANAGER77+88+config VFIO_PCI_MMAP99+ def_bool y if !S3901010+1111+config VFIO_PCI_INTX1212+ def_bool y if !S3901313+1414+config VFIO_PCI1515+ tristate "Generic VFIO support for any PCI device"1616+ select VFIO_PCI_CORE817 help99- Support for the PCI VFIO bus driver. This is required to make1010- use of PCI drivers using the VFIO framework.1818+ Support for the generic PCI VFIO bus driver which can connect any1919+ PCI device to the VFIO framework.11201221 If you don't know what to do here, say N.13222323+if VFIO_PCI1424config VFIO_PCI_VGA1515- bool "VFIO PCI support for VGA devices"1616- depends on VFIO_PCI && X86 && VGA_ARB2525+ bool "Generic VFIO PCI support for VGA devices"2626+ depends on X86 && VGA_ARB1727 help1828 Support for VGA extension to VFIO PCI. This exposes an additional1929 region on VGA devices for accessing legacy VGA addresses used by···31213222 If you don't know what to do here, say N.33233434-config VFIO_PCI_MMAP3535- depends on VFIO_PCI3636- def_bool y if !S3903737-3838-config VFIO_PCI_INTX3939- depends on VFIO_PCI4040- def_bool y if !S3904141-4224config VFIO_PCI_IGD4343- bool "VFIO PCI extensions for Intel graphics (GVT-d)"4444- depends on VFIO_PCI && X862525+ bool "Generic VFIO PCI extensions for Intel graphics (GVT-d)"2626+ depends on X864527 default y4628 help4729 Support for Intel IGD specific extensions to enable direct···4240 and LPC bridge config space.43414442 To enable Intel IGD assignment through vfio-pci, say Y.4343+endif4444+endif
···11// SPDX-License-Identifier: GPL-2.0-only22/*33+ * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved44+ *35 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.46 * Author: Alex Williamson <alex.williamson@redhat.com>57 *···2018#include <linux/module.h>2119#include <linux/mutex.h>2220#include <linux/notifier.h>2323-#include <linux/pci.h>2421#include <linux/pm_runtime.h>2522#include <linux/slab.h>2623#include <linux/types.h>2724#include <linux/uaccess.h>2828-#include <linux/vfio.h>2929-#include <linux/vgaarb.h>3030-#include <linux/nospec.h>3131-#include <linux/sched/mm.h>32253333-#include "vfio_pci_private.h"2626+#include <linux/vfio_pci_core.h>34273535-#define DRIVER_VERSION "0.2"3628#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"3729#define DRIVER_DESC "VFIO PCI - User Level meta-driver"3830···5963static bool disable_denylist;6064module_param(disable_denylist, bool, 0444);6165MODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users.");6262-6363-static inline bool vfio_vga_disabled(void)6464-{6565-#ifdef CONFIG_VFIO_PCI_VGA6666- return disable_vga;6767-#else6868- return true;6969-#endif7070-}71667267static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev)7368{···98111 return true;99112}100113101101-/*102102- * Our VGA arbiter participation is limited since we don't know anything103103- * about the device itself. However, if the device is the only VGA device104104- * downstream of a bridge and VFIO VGA support is disabled, then we can105105- * safely return legacy VGA IO and memory as not decoded since the user106106- * has no way to get to it and routing can be disabled externally at the107107- * bridge.108108- */109109-static unsigned int vfio_pci_set_vga_decode(void *opaque, bool single_vga)114114+static int vfio_pci_open_device(struct vfio_device *core_vdev)110115{111111- struct vfio_pci_device *vdev = opaque;112112- struct pci_dev *tmp = NULL, *pdev = vdev->pdev;113113- unsigned char max_busnr;114114- unsigned int decodes;115115-116116- if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus))117117- return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |118118- VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;119119-120120- max_busnr = pci_bus_max_busnr(pdev->bus);121121- decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;122122-123123- while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) {124124- if (tmp == pdev ||125125- pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) ||126126- pci_is_root_bus(tmp->bus))127127- continue;128128-129129- if (tmp->bus->number >= pdev->bus->number &&130130- tmp->bus->number <= max_busnr) {131131- pci_dev_put(tmp);132132- decodes |= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;133133- break;134134- }135135- }136136-137137- return decodes;138138-}139139-140140-static inline bool vfio_pci_is_vga(struct pci_dev *pdev)141141-{142142- return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;143143-}144144-145145-static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev)146146-{147147- struct resource *res;148148- int i;149149- struct vfio_pci_dummy_resource *dummy_res;150150-151151- for (i = 0; i < PCI_STD_NUM_BARS; i++) {152152- int bar = i + PCI_STD_RESOURCES;153153-154154- res = &vdev->pdev->resource[bar];155155-156156- if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP))157157- goto no_mmap;158158-159159- if (!(res->flags & IORESOURCE_MEM))160160- goto no_mmap;161161-162162- /*163163- * The PCI core shouldn't set up a resource with a164164- * type but zero size. But there may be bugs that165165- * cause us to do that.166166- */167167- if (!resource_size(res))168168- goto no_mmap;169169-170170- if (resource_size(res) >= PAGE_SIZE) {171171- vdev->bar_mmap_supported[bar] = true;172172- continue;173173- }174174-175175- if (!(res->start & ~PAGE_MASK)) {176176- /*177177- * Add a dummy resource to reserve the remainder178178- * of the exclusive page in case that hot-add179179- * device's bar is assigned into it.180180- */181181- dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);182182- if (dummy_res == NULL)183183- goto no_mmap;184184-185185- dummy_res->resource.name = "vfio sub-page reserved";186186- dummy_res->resource.start = res->end + 1;187187- dummy_res->resource.end = res->start + PAGE_SIZE - 1;188188- dummy_res->resource.flags = res->flags;189189- if (request_resource(res->parent,190190- &dummy_res->resource)) {191191- kfree(dummy_res);192192- goto no_mmap;193193- }194194- dummy_res->index = bar;195195- list_add(&dummy_res->res_next,196196- &vdev->dummy_resources_list);197197- vdev->bar_mmap_supported[bar] = true;198198- continue;199199- }200200- /*201201- * Here we don't handle the case when the BAR is not page202202- * aligned because we can't expect the BAR will be203203- * assigned into the same location in a page in guest204204- * when we passthrough the BAR. And it's hard to access205205- * this BAR in userspace because we have no way to get206206- * the BAR's location in a page.207207- */208208-no_mmap:209209- vdev->bar_mmap_supported[bar] = false;210210- }211211-}212212-213213-struct vfio_pci_group_info;214214-static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);215215-static void vfio_pci_disable(struct vfio_pci_device *vdev);216216-static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,217217- struct vfio_pci_group_info *groups);218218-219219-/*220220- * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND221221- * _and_ the ability detect when the device is asserting INTx via PCI_STATUS.222222- * If a device implements the former but not the latter we would typically223223- * expect broken_intx_masking be set and require an exclusive interrupt.224224- * However since we do have control of the device's ability to assert INTx,225225- * we can instead pretend that the device does not implement INTx, virtualizing226226- * the pin register to report zero and maintaining DisINTx set on the host.227227- */228228-static bool vfio_pci_nointx(struct pci_dev *pdev)229229-{230230- switch (pdev->vendor) {231231- case PCI_VENDOR_ID_INTEL:232232- switch (pdev->device) {233233- /* All i40e (XL710/X710/XXV710) 10/20/25/40GbE NICs */234234- case 0x1572:235235- case 0x1574:236236- case 0x1580 ... 0x1581:237237- case 0x1583 ... 0x158b:238238- case 0x37d0 ... 0x37d2:239239- /* X550 */240240- case 0x1563:241241- return true;242242- default:243243- return false;244244- }245245- }246246-247247- return false;248248-}249249-250250-static void vfio_pci_probe_power_state(struct vfio_pci_device *vdev)251251-{252252- struct pci_dev *pdev = vdev->pdev;253253- u16 pmcsr;254254-255255- if (!pdev->pm_cap)256256- return;257257-258258- pci_read_config_word(pdev, pdev->pm_cap + PCI_PM_CTRL, &pmcsr);259259-260260- vdev->needs_pm_restore = !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET);261261-}262262-263263-/*264264- * pci_set_power_state() wrapper handling devices which perform a soft reset on265265- * D3->D0 transition. Save state prior to D0/1/2->D3, stash it on the vdev,266266- * restore when returned to D0. Saved separately from pci_saved_state for use267267- * by PM capability emulation and separately from pci_dev internal saved state268268- * to avoid it being overwritten and consumed around other resets.269269- */270270-int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state)271271-{272272- struct pci_dev *pdev = vdev->pdev;273273- bool needs_restore = false, needs_save = false;274274- int ret;275275-276276- if (vdev->needs_pm_restore) {277277- if (pdev->current_state < PCI_D3hot && state >= PCI_D3hot) {278278- pci_save_state(pdev);279279- needs_save = true;280280- }281281-282282- if (pdev->current_state >= PCI_D3hot && state <= PCI_D0)283283- needs_restore = true;284284- }285285-286286- ret = pci_set_power_state(pdev, state);287287-288288- if (!ret) {289289- /* D3 might be unsupported via quirk, skip unless in D3 */290290- if (needs_save && pdev->current_state >= PCI_D3hot) {291291- vdev->pm_save = pci_store_saved_state(pdev);292292- } else if (needs_restore) {293293- pci_load_and_free_saved_state(pdev, &vdev->pm_save);294294- pci_restore_state(pdev);295295- }296296- }297297-298298- return ret;299299-}300300-301301-static int vfio_pci_enable(struct vfio_pci_device *vdev)302302-{116116+ struct vfio_pci_core_device *vdev =117117+ container_of(core_vdev, struct vfio_pci_core_device, vdev);303118 struct pci_dev *pdev = vdev->pdev;304119 int ret;305305- u16 cmd;306306- u8 msix_pos;307120308308- vfio_pci_set_power_state(vdev, PCI_D0);309309-310310- /* Don't allow our initial saved state to include busmaster */311311- pci_clear_master(pdev);312312-313313- ret = pci_enable_device(pdev);121121+ ret = vfio_pci_core_enable(vdev);314122 if (ret)315123 return ret;316316-317317- /* If reset fails because of the device lock, fail this path entirely */318318- ret = pci_try_reset_function(pdev);319319- if (ret == -EAGAIN) {320320- pci_disable_device(pdev);321321- return ret;322322- }323323-324324- vdev->reset_works = !ret;325325- pci_save_state(pdev);326326- vdev->pci_saved_state = pci_store_saved_state(pdev);327327- if (!vdev->pci_saved_state)328328- pci_dbg(pdev, "%s: Couldn't store saved state\n", __func__);329329-330330- if (likely(!nointxmask)) {331331- if (vfio_pci_nointx(pdev)) {332332- pci_info(pdev, "Masking broken INTx support\n");333333- vdev->nointx = true;334334- pci_intx(pdev, 0);335335- } else336336- vdev->pci_2_3 = pci_intx_mask_supported(pdev);337337- }338338-339339- pci_read_config_word(pdev, PCI_COMMAND, &cmd);340340- if (vdev->pci_2_3 && (cmd & PCI_COMMAND_INTX_DISABLE)) {341341- cmd &= ~PCI_COMMAND_INTX_DISABLE;342342- pci_write_config_word(pdev, PCI_COMMAND, cmd);343343- }344344-345345- ret = vfio_config_init(vdev);346346- if (ret) {347347- kfree(vdev->pci_saved_state);348348- vdev->pci_saved_state = NULL;349349- pci_disable_device(pdev);350350- return ret;351351- }352352-353353- msix_pos = pdev->msix_cap;354354- if (msix_pos) {355355- u16 flags;356356- u32 table;357357-358358- pci_read_config_word(pdev, msix_pos + PCI_MSIX_FLAGS, &flags);359359- pci_read_config_dword(pdev, msix_pos + PCI_MSIX_TABLE, &table);360360-361361- vdev->msix_bar = table & PCI_MSIX_TABLE_BIR;362362- vdev->msix_offset = table & PCI_MSIX_TABLE_OFFSET;363363- vdev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * 16;364364- } else365365- vdev->msix_bar = 0xFF;366366-367367- if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))368368- vdev->has_vga = true;369124370125 if (vfio_pci_is_vga(pdev) &&371126 pdev->vendor == PCI_VENDOR_ID_INTEL &&···115386 ret = vfio_pci_igd_init(vdev);116387 if (ret && ret != -ENODEV) {117388 pci_warn(pdev, "Failed to setup Intel IGD regions\n");118118- goto disable_exit;119119- }120120- }121121-122122- vfio_pci_probe_mmaps(vdev);123123-124124- return 0;125125-126126-disable_exit:127127- vfio_pci_disable(vdev);128128- return ret;129129-}130130-131131-static void vfio_pci_disable(struct vfio_pci_device *vdev)132132-{133133- struct pci_dev *pdev = vdev->pdev;134134- struct vfio_pci_dummy_resource *dummy_res, *tmp;135135- struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;136136- int i, bar;137137-138138- /* For needs_reset */139139- lockdep_assert_held(&vdev->vdev.dev_set->lock);140140-141141- /* Stop the device from further DMA */142142- pci_clear_master(pdev);143143-144144- vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |145145- VFIO_IRQ_SET_ACTION_TRIGGER,146146- vdev->irq_type, 0, 0, NULL);147147-148148- /* Device closed, don't need mutex here */149149- list_for_each_entry_safe(ioeventfd, ioeventfd_tmp,150150- &vdev->ioeventfds_list, next) {151151- vfio_virqfd_disable(&ioeventfd->virqfd);152152- list_del(&ioeventfd->next);153153- kfree(ioeventfd);154154- }155155- vdev->ioeventfds_nr = 0;156156-157157- vdev->virq_disabled = false;158158-159159- for (i = 0; i < vdev->num_regions; i++)160160- vdev->region[i].ops->release(vdev, &vdev->region[i]);161161-162162- vdev->num_regions = 0;163163- kfree(vdev->region);164164- vdev->region = NULL; /* don't krealloc a freed pointer */165165-166166- vfio_config_free(vdev);167167-168168- for (i = 0; i < PCI_STD_NUM_BARS; i++) {169169- bar = i + PCI_STD_RESOURCES;170170- if (!vdev->barmap[bar])171171- continue;172172- pci_iounmap(pdev, vdev->barmap[bar]);173173- pci_release_selected_regions(pdev, 1 << bar);174174- vdev->barmap[bar] = NULL;175175- }176176-177177- list_for_each_entry_safe(dummy_res, tmp,178178- &vdev->dummy_resources_list, res_next) {179179- list_del(&dummy_res->res_next);180180- release_resource(&dummy_res->resource);181181- kfree(dummy_res);182182- }183183-184184- vdev->needs_reset = true;185185-186186- /*187187- * If we have saved state, restore it. If we can reset the device,188188- * even better. Resetting with current state seems better than189189- * nothing, but saving and restoring current state without reset190190- * is just busy work.191191- */192192- if (pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state)) {193193- pci_info(pdev, "%s: Couldn't reload saved state\n", __func__);194194-195195- if (!vdev->reset_works)196196- goto out;197197-198198- pci_save_state(pdev);199199- }200200-201201- /*202202- * Disable INTx and MSI, presumably to avoid spurious interrupts203203- * during reset. Stolen from pci_reset_function()204204- */205205- pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);206206-207207- /*208208- * Try to get the locks ourselves to prevent a deadlock. The209209- * success of this is dependent on being able to lock the device,210210- * which is not always possible.211211- * We can not use the "try" reset interface here, which will212212- * overwrite the previously restored configuration information.213213- */214214- if (vdev->reset_works && pci_dev_trylock(pdev)) {215215- if (!__pci_reset_function_locked(pdev))216216- vdev->needs_reset = false;217217- pci_dev_unlock(pdev);218218- }219219-220220- pci_restore_state(pdev);221221-out:222222- pci_disable_device(pdev);223223-224224- if (!vfio_pci_dev_set_try_reset(vdev->vdev.dev_set) && !disable_idle_d3)225225- vfio_pci_set_power_state(vdev, PCI_D3hot);226226-}227227-228228-static struct pci_driver vfio_pci_driver;229229-230230-static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev)231231-{232232- struct pci_dev *physfn = pci_physfn(vdev->pdev);233233- struct vfio_device *pf_dev;234234-235235- if (!vdev->pdev->is_virtfn)236236- return NULL;237237-238238- pf_dev = vfio_device_get_from_dev(&physfn->dev);239239- if (!pf_dev)240240- return NULL;241241-242242- if (pci_dev_driver(physfn) != &vfio_pci_driver) {243243- vfio_device_put(pf_dev);244244- return NULL;245245- }246246-247247- return container_of(pf_dev, struct vfio_pci_device, vdev);248248-}249249-250250-static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val)251251-{252252- struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev);253253-254254- if (!pf_vdev)255255- return;256256-257257- mutex_lock(&pf_vdev->vf_token->lock);258258- pf_vdev->vf_token->users += val;259259- WARN_ON(pf_vdev->vf_token->users < 0);260260- mutex_unlock(&pf_vdev->vf_token->lock);261261-262262- vfio_device_put(&pf_vdev->vdev);263263-}264264-265265-static void vfio_pci_close_device(struct vfio_device *core_vdev)266266-{267267- struct vfio_pci_device *vdev =268268- container_of(core_vdev, struct vfio_pci_device, vdev);269269-270270- vfio_pci_vf_token_user_add(vdev, -1);271271- vfio_spapr_pci_eeh_release(vdev->pdev);272272- vfio_pci_disable(vdev);273273-274274- mutex_lock(&vdev->igate);275275- if (vdev->err_trigger) {276276- eventfd_ctx_put(vdev->err_trigger);277277- vdev->err_trigger = NULL;278278- }279279- if (vdev->req_trigger) {280280- eventfd_ctx_put(vdev->req_trigger);281281- vdev->req_trigger = NULL;282282- }283283- mutex_unlock(&vdev->igate);284284-}285285-286286-static int vfio_pci_open_device(struct vfio_device *core_vdev)287287-{288288- struct vfio_pci_device *vdev =289289- container_of(core_vdev, struct vfio_pci_device, vdev);290290- int ret = 0;291291-292292- ret = vfio_pci_enable(vdev);293293- if (ret)294294- return ret;295295-296296- vfio_spapr_pci_eeh_open(vdev->pdev);297297- vfio_pci_vf_token_user_add(vdev, 1);298298- return 0;299299-}300300-301301-static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)302302-{303303- if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {304304- u8 pin;305305-306306- if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) ||307307- vdev->nointx || vdev->pdev->is_virtfn)308308- return 0;309309-310310- pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);311311-312312- return pin ? 1 : 0;313313- } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {314314- u8 pos;315315- u16 flags;316316-317317- pos = vdev->pdev->msi_cap;318318- if (pos) {319319- pci_read_config_word(vdev->pdev,320320- pos + PCI_MSI_FLAGS, &flags);321321- return 1 << ((flags & PCI_MSI_FLAGS_QMASK) >> 1);322322- }323323- } else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {324324- u8 pos;325325- u16 flags;326326-327327- pos = vdev->pdev->msix_cap;328328- if (pos) {329329- pci_read_config_word(vdev->pdev,330330- pos + PCI_MSIX_FLAGS, &flags);331331-332332- return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;333333- }334334- } else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX) {335335- if (pci_is_pcie(vdev->pdev))336336- return 1;337337- } else if (irq_type == VFIO_PCI_REQ_IRQ_INDEX) {338338- return 1;339339- }340340-341341- return 0;342342-}343343-344344-static int vfio_pci_count_devs(struct pci_dev *pdev, void *data)345345-{346346- (*(int *)data)++;347347- return 0;348348-}349349-350350-struct vfio_pci_fill_info {351351- int max;352352- int cur;353353- struct vfio_pci_dependent_device *devices;354354-};355355-356356-static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)357357-{358358- struct vfio_pci_fill_info *fill = data;359359- struct iommu_group *iommu_group;360360-361361- if (fill->cur == fill->max)362362- return -EAGAIN; /* Something changed, try again */363363-364364- iommu_group = iommu_group_get(&pdev->dev);365365- if (!iommu_group)366366- return -EPERM; /* Cannot reset non-isolated devices */367367-368368- fill->devices[fill->cur].group_id = iommu_group_id(iommu_group);369369- fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus);370370- fill->devices[fill->cur].bus = pdev->bus->number;371371- fill->devices[fill->cur].devfn = pdev->devfn;372372- fill->cur++;373373- iommu_group_put(iommu_group);374374- return 0;375375-}376376-377377-struct vfio_pci_group_info {378378- int count;379379- struct vfio_group **groups;380380-};381381-382382-static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot)383383-{384384- for (; pdev; pdev = pdev->bus->self)385385- if (pdev->bus == slot->bus)386386- return (pdev->slot == slot);387387- return false;388388-}389389-390390-struct vfio_pci_walk_info {391391- int (*fn)(struct pci_dev *, void *data);392392- void *data;393393- struct pci_dev *pdev;394394- bool slot;395395- int ret;396396-};397397-398398-static int vfio_pci_walk_wrapper(struct pci_dev *pdev, void *data)399399-{400400- struct vfio_pci_walk_info *walk = data;401401-402402- if (!walk->slot || vfio_pci_dev_below_slot(pdev, walk->pdev->slot))403403- walk->ret = walk->fn(pdev, walk->data);404404-405405- return walk->ret;406406-}407407-408408-static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,409409- int (*fn)(struct pci_dev *,410410- void *data), void *data,411411- bool slot)412412-{413413- struct vfio_pci_walk_info walk = {414414- .fn = fn, .data = data, .pdev = pdev, .slot = slot, .ret = 0,415415- };416416-417417- pci_walk_bus(pdev->bus, vfio_pci_walk_wrapper, &walk);418418-419419- return walk.ret;420420-}421421-422422-static int msix_mmappable_cap(struct vfio_pci_device *vdev,423423- struct vfio_info_cap *caps)424424-{425425- struct vfio_info_cap_header header = {426426- .id = VFIO_REGION_INFO_CAP_MSIX_MAPPABLE,427427- .version = 1428428- };429429-430430- return vfio_info_add_capability(caps, &header, sizeof(header));431431-}432432-433433-int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,434434- unsigned int type, unsigned int subtype,435435- const struct vfio_pci_regops *ops,436436- size_t size, u32 flags, void *data)437437-{438438- struct vfio_pci_region *region;439439-440440- region = krealloc(vdev->region,441441- (vdev->num_regions + 1) * sizeof(*region),442442- GFP_KERNEL);443443- if (!region)444444- return -ENOMEM;445445-446446- vdev->region = region;447447- vdev->region[vdev->num_regions].type = type;448448- vdev->region[vdev->num_regions].subtype = subtype;449449- vdev->region[vdev->num_regions].ops = ops;450450- vdev->region[vdev->num_regions].size = size;451451- vdev->region[vdev->num_regions].flags = flags;452452- vdev->region[vdev->num_regions].data = data;453453-454454- vdev->num_regions++;455455-456456- return 0;457457-}458458-459459-static long vfio_pci_ioctl(struct vfio_device *core_vdev,460460- unsigned int cmd, unsigned long arg)461461-{462462- struct vfio_pci_device *vdev =463463- container_of(core_vdev, struct vfio_pci_device, vdev);464464- unsigned long minsz;465465-466466- if (cmd == VFIO_DEVICE_GET_INFO) {467467- struct vfio_device_info info;468468- struct vfio_info_cap caps = { .buf = NULL, .size = 0 };469469- unsigned long capsz;470470- int ret;471471-472472- minsz = offsetofend(struct vfio_device_info, num_irqs);473473-474474- /* For backward compatibility, cannot require this */475475- capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);476476-477477- if (copy_from_user(&info, (void __user *)arg, minsz))478478- return -EFAULT;479479-480480- if (info.argsz < minsz)481481- return -EINVAL;482482-483483- if (info.argsz >= capsz) {484484- minsz = capsz;485485- info.cap_offset = 0;486486- }487487-488488- info.flags = VFIO_DEVICE_FLAGS_PCI;489489-490490- if (vdev->reset_works)491491- info.flags |= VFIO_DEVICE_FLAGS_RESET;492492-493493- info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;494494- info.num_irqs = VFIO_PCI_NUM_IRQS;495495-496496- ret = vfio_pci_info_zdev_add_caps(vdev, &caps);497497- if (ret && ret != -ENODEV) {498498- pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");389389+ vfio_pci_core_disable(vdev);499390 return ret;500391 }501501-502502- if (caps.size) {503503- info.flags |= VFIO_DEVICE_FLAGS_CAPS;504504- if (info.argsz < sizeof(info) + caps.size) {505505- info.argsz = sizeof(info) + caps.size;506506- } else {507507- vfio_info_cap_shift(&caps, sizeof(info));508508- if (copy_to_user((void __user *)arg +509509- sizeof(info), caps.buf,510510- caps.size)) {511511- kfree(caps.buf);512512- return -EFAULT;513513- }514514- info.cap_offset = sizeof(info);515515- }516516-517517- kfree(caps.buf);518518- }519519-520520- return copy_to_user((void __user *)arg, &info, minsz) ?521521- -EFAULT : 0;522522-523523- } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {524524- struct pci_dev *pdev = vdev->pdev;525525- struct vfio_region_info info;526526- struct vfio_info_cap caps = { .buf = NULL, .size = 0 };527527- int i, ret;528528-529529- minsz = offsetofend(struct vfio_region_info, offset);530530-531531- if (copy_from_user(&info, (void __user *)arg, minsz))532532- return -EFAULT;533533-534534- if (info.argsz < minsz)535535- return -EINVAL;536536-537537- switch (info.index) {538538- case VFIO_PCI_CONFIG_REGION_INDEX:539539- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);540540- info.size = pdev->cfg_size;541541- info.flags = VFIO_REGION_INFO_FLAG_READ |542542- VFIO_REGION_INFO_FLAG_WRITE;543543- break;544544- case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:545545- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);546546- info.size = pci_resource_len(pdev, info.index);547547- if (!info.size) {548548- info.flags = 0;549549- break;550550- }551551-552552- info.flags = VFIO_REGION_INFO_FLAG_READ |553553- VFIO_REGION_INFO_FLAG_WRITE;554554- if (vdev->bar_mmap_supported[info.index]) {555555- info.flags |= VFIO_REGION_INFO_FLAG_MMAP;556556- if (info.index == vdev->msix_bar) {557557- ret = msix_mmappable_cap(vdev, &caps);558558- if (ret)559559- return ret;560560- }561561- }562562-563563- break;564564- case VFIO_PCI_ROM_REGION_INDEX:565565- {566566- void __iomem *io;567567- size_t size;568568- u16 cmd;569569-570570- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);571571- info.flags = 0;572572-573573- /* Report the BAR size, not the ROM size */574574- info.size = pci_resource_len(pdev, info.index);575575- if (!info.size) {576576- /* Shadow ROMs appear as PCI option ROMs */577577- if (pdev->resource[PCI_ROM_RESOURCE].flags &578578- IORESOURCE_ROM_SHADOW)579579- info.size = 0x20000;580580- else581581- break;582582- }583583-584584- /*585585- * Is it really there? Enable memory decode for586586- * implicit access in pci_map_rom().587587- */588588- cmd = vfio_pci_memory_lock_and_enable(vdev);589589- io = pci_map_rom(pdev, &size);590590- if (io) {591591- info.flags = VFIO_REGION_INFO_FLAG_READ;592592- pci_unmap_rom(pdev, io);593593- } else {594594- info.size = 0;595595- }596596- vfio_pci_memory_unlock_and_restore(vdev, cmd);597597-598598- break;599599- }600600- case VFIO_PCI_VGA_REGION_INDEX:601601- if (!vdev->has_vga)602602- return -EINVAL;603603-604604- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);605605- info.size = 0xc0000;606606- info.flags = VFIO_REGION_INFO_FLAG_READ |607607- VFIO_REGION_INFO_FLAG_WRITE;608608-609609- break;610610- default:611611- {612612- struct vfio_region_info_cap_type cap_type = {613613- .header.id = VFIO_REGION_INFO_CAP_TYPE,614614- .header.version = 1 };615615-616616- if (info.index >=617617- VFIO_PCI_NUM_REGIONS + vdev->num_regions)618618- return -EINVAL;619619- info.index = array_index_nospec(info.index,620620- VFIO_PCI_NUM_REGIONS +621621- vdev->num_regions);622622-623623- i = info.index - VFIO_PCI_NUM_REGIONS;624624-625625- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);626626- info.size = vdev->region[i].size;627627- info.flags = vdev->region[i].flags;628628-629629- cap_type.type = vdev->region[i].type;630630- cap_type.subtype = vdev->region[i].subtype;631631-632632- ret = vfio_info_add_capability(&caps, &cap_type.header,633633- sizeof(cap_type));634634- if (ret)635635- return ret;636636-637637- if (vdev->region[i].ops->add_capability) {638638- ret = vdev->region[i].ops->add_capability(vdev,639639- &vdev->region[i], &caps);640640- if (ret)641641- return ret;642642- }643643- }644644- }645645-646646- if (caps.size) {647647- info.flags |= VFIO_REGION_INFO_FLAG_CAPS;648648- if (info.argsz < sizeof(info) + caps.size) {649649- info.argsz = sizeof(info) + caps.size;650650- info.cap_offset = 0;651651- } else {652652- vfio_info_cap_shift(&caps, sizeof(info));653653- if (copy_to_user((void __user *)arg +654654- sizeof(info), caps.buf,655655- caps.size)) {656656- kfree(caps.buf);657657- return -EFAULT;658658- }659659- info.cap_offset = sizeof(info);660660- }661661-662662- kfree(caps.buf);663663- }664664-665665- return copy_to_user((void __user *)arg, &info, minsz) ?666666- -EFAULT : 0;667667-668668- } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {669669- struct vfio_irq_info info;670670-671671- minsz = offsetofend(struct vfio_irq_info, count);672672-673673- if (copy_from_user(&info, (void __user *)arg, minsz))674674- return -EFAULT;675675-676676- if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)677677- return -EINVAL;678678-679679- switch (info.index) {680680- case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:681681- case VFIO_PCI_REQ_IRQ_INDEX:682682- break;683683- case VFIO_PCI_ERR_IRQ_INDEX:684684- if (pci_is_pcie(vdev->pdev))685685- break;686686- fallthrough;687687- default:688688- return -EINVAL;689689- }690690-691691- info.flags = VFIO_IRQ_INFO_EVENTFD;692692-693693- info.count = vfio_pci_get_irq_count(vdev, info.index);694694-695695- if (info.index == VFIO_PCI_INTX_IRQ_INDEX)696696- info.flags |= (VFIO_IRQ_INFO_MASKABLE |697697- VFIO_IRQ_INFO_AUTOMASKED);698698- else699699- info.flags |= VFIO_IRQ_INFO_NORESIZE;700700-701701- return copy_to_user((void __user *)arg, &info, minsz) ?702702- -EFAULT : 0;703703-704704- } else if (cmd == VFIO_DEVICE_SET_IRQS) {705705- struct vfio_irq_set hdr;706706- u8 *data = NULL;707707- int max, ret = 0;708708- size_t data_size = 0;709709-710710- minsz = offsetofend(struct vfio_irq_set, count);711711-712712- if (copy_from_user(&hdr, (void __user *)arg, minsz))713713- return -EFAULT;714714-715715- max = vfio_pci_get_irq_count(vdev, hdr.index);716716-717717- ret = vfio_set_irqs_validate_and_prepare(&hdr, max,718718- VFIO_PCI_NUM_IRQS, &data_size);719719- if (ret)720720- return ret;721721-722722- if (data_size) {723723- data = memdup_user((void __user *)(arg + minsz),724724- data_size);725725- if (IS_ERR(data))726726- return PTR_ERR(data);727727- }728728-729729- mutex_lock(&vdev->igate);730730-731731- ret = vfio_pci_set_irqs_ioctl(vdev, hdr.flags, hdr.index,732732- hdr.start, hdr.count, data);733733-734734- mutex_unlock(&vdev->igate);735735- kfree(data);736736-737737- return ret;738738-739739- } else if (cmd == VFIO_DEVICE_RESET) {740740- int ret;741741-742742- if (!vdev->reset_works)743743- return -EINVAL;744744-745745- vfio_pci_zap_and_down_write_memory_lock(vdev);746746- ret = pci_try_reset_function(vdev->pdev);747747- up_write(&vdev->memory_lock);748748-749749- return ret;750750-751751- } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) {752752- struct vfio_pci_hot_reset_info hdr;753753- struct vfio_pci_fill_info fill = { 0 };754754- struct vfio_pci_dependent_device *devices = NULL;755755- bool slot = false;756756- int ret = 0;757757-758758- minsz = offsetofend(struct vfio_pci_hot_reset_info, count);759759-760760- if (copy_from_user(&hdr, (void __user *)arg, minsz))761761- return -EFAULT;762762-763763- if (hdr.argsz < minsz)764764- return -EINVAL;765765-766766- hdr.flags = 0;767767-768768- /* Can we do a slot or bus reset or neither? */769769- if (!pci_probe_reset_slot(vdev->pdev->slot))770770- slot = true;771771- else if (pci_probe_reset_bus(vdev->pdev->bus))772772- return -ENODEV;773773-774774- /* How many devices are affected? */775775- ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,776776- vfio_pci_count_devs,777777- &fill.max, slot);778778- if (ret)779779- return ret;780780-781781- WARN_ON(!fill.max); /* Should always be at least one */782782-783783- /*784784- * If there's enough space, fill it now, otherwise return785785- * -ENOSPC and the number of devices affected.786786- */787787- if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {788788- ret = -ENOSPC;789789- hdr.count = fill.max;790790- goto reset_info_exit;791791- }792792-793793- devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL);794794- if (!devices)795795- return -ENOMEM;796796-797797- fill.devices = devices;798798-799799- ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,800800- vfio_pci_fill_devs,801801- &fill, slot);802802-803803- /*804804- * If a device was removed between counting and filling,805805- * we may come up short of fill.max. If a device was806806- * added, we'll have a return of -EAGAIN above.807807- */808808- if (!ret)809809- hdr.count = fill.cur;810810-811811-reset_info_exit:812812- if (copy_to_user((void __user *)arg, &hdr, minsz))813813- ret = -EFAULT;814814-815815- if (!ret) {816816- if (copy_to_user((void __user *)(arg + minsz), devices,817817- hdr.count * sizeof(*devices)))818818- ret = -EFAULT;819819- }820820-821821- kfree(devices);822822- return ret;823823-824824- } else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {825825- struct vfio_pci_hot_reset hdr;826826- int32_t *group_fds;827827- struct vfio_group **groups;828828- struct vfio_pci_group_info info;829829- bool slot = false;830830- int group_idx, count = 0, ret = 0;831831-832832- minsz = offsetofend(struct vfio_pci_hot_reset, count);833833-834834- if (copy_from_user(&hdr, (void __user *)arg, minsz))835835- return -EFAULT;836836-837837- if (hdr.argsz < minsz || hdr.flags)838838- return -EINVAL;839839-840840- /* Can we do a slot or bus reset or neither? */841841- if (!pci_probe_reset_slot(vdev->pdev->slot))842842- slot = true;843843- else if (pci_probe_reset_bus(vdev->pdev->bus))844844- return -ENODEV;845845-846846- /*847847- * We can't let userspace give us an arbitrarily large848848- * buffer to copy, so verify how many we think there849849- * could be. Note groups can have multiple devices so850850- * one group per device is the max.851851- */852852- ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,853853- vfio_pci_count_devs,854854- &count, slot);855855- if (ret)856856- return ret;857857-858858- /* Somewhere between 1 and count is OK */859859- if (!hdr.count || hdr.count > count)860860- return -EINVAL;861861-862862- group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);863863- groups = kcalloc(hdr.count, sizeof(*groups), GFP_KERNEL);864864- if (!group_fds || !groups) {865865- kfree(group_fds);866866- kfree(groups);867867- return -ENOMEM;868868- }869869-870870- if (copy_from_user(group_fds, (void __user *)(arg + minsz),871871- hdr.count * sizeof(*group_fds))) {872872- kfree(group_fds);873873- kfree(groups);874874- return -EFAULT;875875- }876876-877877- /*878878- * For each group_fd, get the group through the vfio external879879- * user interface and store the group and iommu ID. This880880- * ensures the group is held across the reset.881881- */882882- for (group_idx = 0; group_idx < hdr.count; group_idx++) {883883- struct vfio_group *group;884884- struct fd f = fdget(group_fds[group_idx]);885885- if (!f.file) {886886- ret = -EBADF;887887- break;888888- }889889-890890- group = vfio_group_get_external_user(f.file);891891- fdput(f);892892- if (IS_ERR(group)) {893893- ret = PTR_ERR(group);894894- break;895895- }896896-897897- groups[group_idx] = group;898898- }899899-900900- kfree(group_fds);901901-902902- /* release reference to groups on error */903903- if (ret)904904- goto hot_reset_release;905905-906906- info.count = hdr.count;907907- info.groups = groups;908908-909909- ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);910910-911911-hot_reset_release:912912- for (group_idx--; group_idx >= 0; group_idx--)913913- vfio_group_put_external_user(groups[group_idx]);914914-915915- kfree(groups);916916- return ret;917917- } else if (cmd == VFIO_DEVICE_IOEVENTFD) {918918- struct vfio_device_ioeventfd ioeventfd;919919- int count;920920-921921- minsz = offsetofend(struct vfio_device_ioeventfd, fd);922922-923923- if (copy_from_user(&ioeventfd, (void __user *)arg, minsz))924924- return -EFAULT;925925-926926- if (ioeventfd.argsz < minsz)927927- return -EINVAL;928928-929929- if (ioeventfd.flags & ~VFIO_DEVICE_IOEVENTFD_SIZE_MASK)930930- return -EINVAL;931931-932932- count = ioeventfd.flags & VFIO_DEVICE_IOEVENTFD_SIZE_MASK;933933-934934- if (hweight8(count) != 1 || ioeventfd.fd < -1)935935- return -EINVAL;936936-937937- return vfio_pci_ioeventfd(vdev, ioeventfd.offset,938938- ioeventfd.data, count, ioeventfd.fd);939939- } else if (cmd == VFIO_DEVICE_FEATURE) {940940- struct vfio_device_feature feature;941941- uuid_t uuid;942942-943943- minsz = offsetofend(struct vfio_device_feature, flags);944944-945945- if (copy_from_user(&feature, (void __user *)arg, minsz))946946- return -EFAULT;947947-948948- if (feature.argsz < minsz)949949- return -EINVAL;950950-951951- /* Check unknown flags */952952- if (feature.flags & ~(VFIO_DEVICE_FEATURE_MASK |953953- VFIO_DEVICE_FEATURE_SET |954954- VFIO_DEVICE_FEATURE_GET |955955- VFIO_DEVICE_FEATURE_PROBE))956956- return -EINVAL;957957-958958- /* GET & SET are mutually exclusive except with PROBE */959959- if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&960960- (feature.flags & VFIO_DEVICE_FEATURE_SET) &&961961- (feature.flags & VFIO_DEVICE_FEATURE_GET))962962- return -EINVAL;963963-964964- switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {965965- case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:966966- if (!vdev->vf_token)967967- return -ENOTTY;968968-969969- /*970970- * We do not support GET of the VF Token UUID as this971971- * could expose the token of the previous device user.972972- */973973- if (feature.flags & VFIO_DEVICE_FEATURE_GET)974974- return -EINVAL;975975-976976- if (feature.flags & VFIO_DEVICE_FEATURE_PROBE)977977- return 0;978978-979979- /* Don't SET unless told to do so */980980- if (!(feature.flags & VFIO_DEVICE_FEATURE_SET))981981- return -EINVAL;982982-983983- if (feature.argsz < minsz + sizeof(uuid))984984- return -EINVAL;985985-986986- if (copy_from_user(&uuid, (void __user *)(arg + minsz),987987- sizeof(uuid)))988988- return -EFAULT;989989-990990- mutex_lock(&vdev->vf_token->lock);991991- uuid_copy(&vdev->vf_token->uuid, &uuid);992992- mutex_unlock(&vdev->vf_token->lock);993993-994994- return 0;995995- default:996996- return -ENOTTY;997997- }998392 }99939310001000- return -ENOTTY;10011001-}10021002-10031003-static ssize_t vfio_pci_rw(struct vfio_pci_device *vdev, char __user *buf,10041004- size_t count, loff_t *ppos, bool iswrite)10051005-{10061006- unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);10071007-10081008- if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)10091009- return -EINVAL;10101010-10111011- switch (index) {10121012- case VFIO_PCI_CONFIG_REGION_INDEX:10131013- return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite);10141014-10151015- case VFIO_PCI_ROM_REGION_INDEX:10161016- if (iswrite)10171017- return -EINVAL;10181018- return vfio_pci_bar_rw(vdev, buf, count, ppos, false);10191019-10201020- case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:10211021- return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite);10221022-10231023- case VFIO_PCI_VGA_REGION_INDEX:10241024- return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);10251025- default:10261026- index -= VFIO_PCI_NUM_REGIONS;10271027- return vdev->region[index].ops->rw(vdev, buf,10281028- count, ppos, iswrite);10291029- }10301030-10311031- return -EINVAL;10321032-}10331033-10341034-static ssize_t vfio_pci_read(struct vfio_device *core_vdev, char __user *buf,10351035- size_t count, loff_t *ppos)10361036-{10371037- struct vfio_pci_device *vdev =10381038- container_of(core_vdev, struct vfio_pci_device, vdev);10391039-10401040- if (!count)10411041- return 0;10421042-10431043- return vfio_pci_rw(vdev, buf, count, ppos, false);10441044-}10451045-10461046-static ssize_t vfio_pci_write(struct vfio_device *core_vdev, const char __user *buf,10471047- size_t count, loff_t *ppos)10481048-{10491049- struct vfio_pci_device *vdev =10501050- container_of(core_vdev, struct vfio_pci_device, vdev);10511051-10521052- if (!count)10531053- return 0;10541054-10551055- return vfio_pci_rw(vdev, (char __user *)buf, count, ppos, true);10561056-}10571057-10581058-/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */10591059-static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)10601060-{10611061- struct vfio_pci_mmap_vma *mmap_vma, *tmp;10621062-10631063- /*10641064- * Lock ordering:10651065- * vma_lock is nested under mmap_lock for vm_ops callback paths.10661066- * The memory_lock semaphore is used by both code paths calling10671067- * into this function to zap vmas and the vm_ops.fault callback10681068- * to protect the memory enable state of the device.10691069- *10701070- * When zapping vmas we need to maintain the mmap_lock => vma_lock10711071- * ordering, which requires using vma_lock to walk vma_list to10721072- * acquire an mm, then dropping vma_lock to get the mmap_lock and10731073- * reacquiring vma_lock. This logic is derived from similar10741074- * requirements in uverbs_user_mmap_disassociate().10751075- *10761076- * mmap_lock must always be the top-level lock when it is taken.10771077- * Therefore we can only hold the memory_lock write lock when10781078- * vma_list is empty, as we'd need to take mmap_lock to clear10791079- * entries. vma_list can only be guaranteed empty when holding10801080- * vma_lock, thus memory_lock is nested under vma_lock.10811081- *10821082- * This enables the vm_ops.fault callback to acquire vma_lock,10831083- * followed by memory_lock read lock, while already holding10841084- * mmap_lock without risk of deadlock.10851085- */10861086- while (1) {10871087- struct mm_struct *mm = NULL;10881088-10891089- if (try) {10901090- if (!mutex_trylock(&vdev->vma_lock))10911091- return 0;10921092- } else {10931093- mutex_lock(&vdev->vma_lock);10941094- }10951095- while (!list_empty(&vdev->vma_list)) {10961096- mmap_vma = list_first_entry(&vdev->vma_list,10971097- struct vfio_pci_mmap_vma,10981098- vma_next);10991099- mm = mmap_vma->vma->vm_mm;11001100- if (mmget_not_zero(mm))11011101- break;11021102-11031103- list_del(&mmap_vma->vma_next);11041104- kfree(mmap_vma);11051105- mm = NULL;11061106- }11071107- if (!mm)11081108- return 1;11091109- mutex_unlock(&vdev->vma_lock);11101110-11111111- if (try) {11121112- if (!mmap_read_trylock(mm)) {11131113- mmput(mm);11141114- return 0;11151115- }11161116- } else {11171117- mmap_read_lock(mm);11181118- }11191119- if (try) {11201120- if (!mutex_trylock(&vdev->vma_lock)) {11211121- mmap_read_unlock(mm);11221122- mmput(mm);11231123- return 0;11241124- }11251125- } else {11261126- mutex_lock(&vdev->vma_lock);11271127- }11281128- list_for_each_entry_safe(mmap_vma, tmp,11291129- &vdev->vma_list, vma_next) {11301130- struct vm_area_struct *vma = mmap_vma->vma;11311131-11321132- if (vma->vm_mm != mm)11331133- continue;11341134-11351135- list_del(&mmap_vma->vma_next);11361136- kfree(mmap_vma);11371137-11381138- zap_vma_ptes(vma, vma->vm_start,11391139- vma->vm_end - vma->vm_start);11401140- }11411141- mutex_unlock(&vdev->vma_lock);11421142- mmap_read_unlock(mm);11431143- mmput(mm);11441144- }11451145-}11461146-11471147-void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device *vdev)11481148-{11491149- vfio_pci_zap_and_vma_lock(vdev, false);11501150- down_write(&vdev->memory_lock);11511151- mutex_unlock(&vdev->vma_lock);11521152-}11531153-11541154-u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev)11551155-{11561156- u16 cmd;11571157-11581158- down_write(&vdev->memory_lock);11591159- pci_read_config_word(vdev->pdev, PCI_COMMAND, &cmd);11601160- if (!(cmd & PCI_COMMAND_MEMORY))11611161- pci_write_config_word(vdev->pdev, PCI_COMMAND,11621162- cmd | PCI_COMMAND_MEMORY);11631163-11641164- return cmd;11651165-}11661166-11671167-void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev, u16 cmd)11681168-{11691169- pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd);11701170- up_write(&vdev->memory_lock);11711171-}11721172-11731173-/* Caller holds vma_lock */11741174-static int __vfio_pci_add_vma(struct vfio_pci_device *vdev,11751175- struct vm_area_struct *vma)11761176-{11771177- struct vfio_pci_mmap_vma *mmap_vma;11781178-11791179- mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL);11801180- if (!mmap_vma)11811181- return -ENOMEM;11821182-11831183- mmap_vma->vma = vma;11841184- list_add(&mmap_vma->vma_next, &vdev->vma_list);394394+ vfio_pci_core_finish_enable(vdev);11853951186396 return 0;11871187-}11881188-11891189-/*11901190- * Zap mmaps on open so that we can fault them in on access and therefore11911191- * our vma_list only tracks mappings accessed since last zap.11921192- */11931193-static void vfio_pci_mmap_open(struct vm_area_struct *vma)11941194-{11951195- zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);11961196-}11971197-11981198-static void vfio_pci_mmap_close(struct vm_area_struct *vma)11991199-{12001200- struct vfio_pci_device *vdev = vma->vm_private_data;12011201- struct vfio_pci_mmap_vma *mmap_vma;12021202-12031203- mutex_lock(&vdev->vma_lock);12041204- list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {12051205- if (mmap_vma->vma == vma) {12061206- list_del(&mmap_vma->vma_next);12071207- kfree(mmap_vma);12081208- break;12091209- }12101210- }12111211- mutex_unlock(&vdev->vma_lock);12121212-}12131213-12141214-static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)12151215-{12161216- struct vm_area_struct *vma = vmf->vma;12171217- struct vfio_pci_device *vdev = vma->vm_private_data;12181218- struct vfio_pci_mmap_vma *mmap_vma;12191219- vm_fault_t ret = VM_FAULT_NOPAGE;12201220-12211221- mutex_lock(&vdev->vma_lock);12221222- down_read(&vdev->memory_lock);12231223-12241224- if (!__vfio_pci_memory_enabled(vdev)) {12251225- ret = VM_FAULT_SIGBUS;12261226- goto up_out;12271227- }12281228-12291229- /*12301230- * We populate the whole vma on fault, so we need to test whether12311231- * the vma has already been mapped, such as for concurrent faults12321232- * to the same vma. io_remap_pfn_range() will trigger a BUG_ON if12331233- * we ask it to fill the same range again.12341234- */12351235- list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {12361236- if (mmap_vma->vma == vma)12371237- goto up_out;12381238- }12391239-12401240- if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,12411241- vma->vm_end - vma->vm_start,12421242- vma->vm_page_prot)) {12431243- ret = VM_FAULT_SIGBUS;12441244- zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);12451245- goto up_out;12461246- }12471247-12481248- if (__vfio_pci_add_vma(vdev, vma)) {12491249- ret = VM_FAULT_OOM;12501250- zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);12511251- }12521252-12531253-up_out:12541254- up_read(&vdev->memory_lock);12551255- mutex_unlock(&vdev->vma_lock);12561256- return ret;12571257-}12581258-12591259-static const struct vm_operations_struct vfio_pci_mmap_ops = {12601260- .open = vfio_pci_mmap_open,12611261- .close = vfio_pci_mmap_close,12621262- .fault = vfio_pci_mmap_fault,12631263-};12641264-12651265-static int vfio_pci_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)12661266-{12671267- struct vfio_pci_device *vdev =12681268- container_of(core_vdev, struct vfio_pci_device, vdev);12691269- struct pci_dev *pdev = vdev->pdev;12701270- unsigned int index;12711271- u64 phys_len, req_len, pgoff, req_start;12721272- int ret;12731273-12741274- index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);12751275-12761276- if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)12771277- return -EINVAL;12781278- if (vma->vm_end < vma->vm_start)12791279- return -EINVAL;12801280- if ((vma->vm_flags & VM_SHARED) == 0)12811281- return -EINVAL;12821282- if (index >= VFIO_PCI_NUM_REGIONS) {12831283- int regnum = index - VFIO_PCI_NUM_REGIONS;12841284- struct vfio_pci_region *region = vdev->region + regnum;12851285-12861286- if (region->ops && region->ops->mmap &&12871287- (region->flags & VFIO_REGION_INFO_FLAG_MMAP))12881288- return region->ops->mmap(vdev, region, vma);12891289- return -EINVAL;12901290- }12911291- if (index >= VFIO_PCI_ROM_REGION_INDEX)12921292- return -EINVAL;12931293- if (!vdev->bar_mmap_supported[index])12941294- return -EINVAL;12951295-12961296- phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));12971297- req_len = vma->vm_end - vma->vm_start;12981298- pgoff = vma->vm_pgoff &12991299- ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);13001300- req_start = pgoff << PAGE_SHIFT;13011301-13021302- if (req_start + req_len > phys_len)13031303- return -EINVAL;13041304-13051305- /*13061306- * Even though we don't make use of the barmap for the mmap,13071307- * we need to request the region and the barmap tracks that.13081308- */13091309- if (!vdev->barmap[index]) {13101310- ret = pci_request_selected_regions(pdev,13111311- 1 << index, "vfio-pci");13121312- if (ret)13131313- return ret;13141314-13151315- vdev->barmap[index] = pci_iomap(pdev, index, 0);13161316- if (!vdev->barmap[index]) {13171317- pci_release_selected_regions(pdev, 1 << index);13181318- return -ENOMEM;13191319- }13201320- }13211321-13221322- vma->vm_private_data = vdev;13231323- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);13241324- vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;13251325-13261326- /*13271327- * See remap_pfn_range(), called from vfio_pci_fault() but we can't13281328- * change vm_flags within the fault handler. Set them now.13291329- */13301330- vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;13311331- vma->vm_ops = &vfio_pci_mmap_ops;13321332-13331333- return 0;13341334-}13351335-13361336-static void vfio_pci_request(struct vfio_device *core_vdev, unsigned int count)13371337-{13381338- struct vfio_pci_device *vdev =13391339- container_of(core_vdev, struct vfio_pci_device, vdev);13401340- struct pci_dev *pdev = vdev->pdev;13411341-13421342- mutex_lock(&vdev->igate);13431343-13441344- if (vdev->req_trigger) {13451345- if (!(count % 10))13461346- pci_notice_ratelimited(pdev,13471347- "Relaying device request to user (#%u)\n",13481348- count);13491349- eventfd_signal(vdev->req_trigger, 1);13501350- } else if (count == 0) {13511351- pci_warn(pdev,13521352- "No device request channel registered, blocked until released by user\n");13531353- }13541354-13551355- mutex_unlock(&vdev->igate);13561356-}13571357-13581358-static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,13591359- bool vf_token, uuid_t *uuid)13601360-{13611361- /*13621362- * There's always some degree of trust or collaboration between SR-IOV13631363- * PF and VFs, even if just that the PF hosts the SR-IOV capability and13641364- * can disrupt VFs with a reset, but often the PF has more explicit13651365- * access to deny service to the VF or access data passed through the13661366- * VF. We therefore require an opt-in via a shared VF token (UUID) to13671367- * represent this trust. This both prevents that a VF driver might13681368- * assume the PF driver is a trusted, in-kernel driver, and also that13691369- * a PF driver might be replaced with a rogue driver, unknown to in-use13701370- * VF drivers.13711371- *13721372- * Therefore when presented with a VF, if the PF is a vfio device and13731373- * it is bound to the vfio-pci driver, the user needs to provide a VF13741374- * token to access the device, in the form of appending a vf_token to13751375- * the device name, for example:13761376- *13771377- * "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3"13781378- *13791379- * When presented with a PF which has VFs in use, the user must also13801380- * provide the current VF token to prove collaboration with existing13811381- * VF users. If VFs are not in use, the VF token provided for the PF13821382- * device will act to set the VF token.13831383- *13841384- * If the VF token is provided but unused, an error is generated.13851385- */13861386- if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token)13871387- return 0; /* No VF token provided or required */13881388-13891389- if (vdev->pdev->is_virtfn) {13901390- struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev);13911391- bool match;13921392-13931393- if (!pf_vdev) {13941394- if (!vf_token)13951395- return 0; /* PF is not vfio-pci, no VF token */13961396-13971397- pci_info_ratelimited(vdev->pdev,13981398- "VF token incorrectly provided, PF not bound to vfio-pci\n");13991399- return -EINVAL;14001400- }14011401-14021402- if (!vf_token) {14031403- vfio_device_put(&pf_vdev->vdev);14041404- pci_info_ratelimited(vdev->pdev,14051405- "VF token required to access device\n");14061406- return -EACCES;14071407- }14081408-14091409- mutex_lock(&pf_vdev->vf_token->lock);14101410- match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);14111411- mutex_unlock(&pf_vdev->vf_token->lock);14121412-14131413- vfio_device_put(&pf_vdev->vdev);14141414-14151415- if (!match) {14161416- pci_info_ratelimited(vdev->pdev,14171417- "Incorrect VF token provided for device\n");14181418- return -EACCES;14191419- }14201420- } else if (vdev->vf_token) {14211421- mutex_lock(&vdev->vf_token->lock);14221422- if (vdev->vf_token->users) {14231423- if (!vf_token) {14241424- mutex_unlock(&vdev->vf_token->lock);14251425- pci_info_ratelimited(vdev->pdev,14261426- "VF token required to access device\n");14271427- return -EACCES;14281428- }14291429-14301430- if (!uuid_equal(uuid, &vdev->vf_token->uuid)) {14311431- mutex_unlock(&vdev->vf_token->lock);14321432- pci_info_ratelimited(vdev->pdev,14331433- "Incorrect VF token provided for device\n");14341434- return -EACCES;14351435- }14361436- } else if (vf_token) {14371437- uuid_copy(&vdev->vf_token->uuid, uuid);14381438- }14391439-14401440- mutex_unlock(&vdev->vf_token->lock);14411441- } else if (vf_token) {14421442- pci_info_ratelimited(vdev->pdev,14431443- "VF token incorrectly provided, not a PF or VF\n");14441444- return -EINVAL;14451445- }14461446-14471447- return 0;14481448-}14491449-14501450-#define VF_TOKEN_ARG "vf_token="14511451-14521452-static int vfio_pci_match(struct vfio_device *core_vdev, char *buf)14531453-{14541454- struct vfio_pci_device *vdev =14551455- container_of(core_vdev, struct vfio_pci_device, vdev);14561456- bool vf_token = false;14571457- uuid_t uuid;14581458- int ret;14591459-14601460- if (strncmp(pci_name(vdev->pdev), buf, strlen(pci_name(vdev->pdev))))14611461- return 0; /* No match */14621462-14631463- if (strlen(buf) > strlen(pci_name(vdev->pdev))) {14641464- buf += strlen(pci_name(vdev->pdev));14651465-14661466- if (*buf != ' ')14671467- return 0; /* No match: non-whitespace after name */14681468-14691469- while (*buf) {14701470- if (*buf == ' ') {14711471- buf++;14721472- continue;14731473- }14741474-14751475- if (!vf_token && !strncmp(buf, VF_TOKEN_ARG,14761476- strlen(VF_TOKEN_ARG))) {14771477- buf += strlen(VF_TOKEN_ARG);14781478-14791479- if (strlen(buf) < UUID_STRING_LEN)14801480- return -EINVAL;14811481-14821482- ret = uuid_parse(buf, &uuid);14831483- if (ret)14841484- return ret;14851485-14861486- vf_token = true;14871487- buf += UUID_STRING_LEN;14881488- } else {14891489- /* Unknown/duplicate option */14901490- return -EINVAL;14911491- }14921492- }14931493- }14941494-14951495- ret = vfio_pci_validate_vf_token(vdev, vf_token, &uuid);14961496- if (ret)14971497- return ret;14981498-14991499- return 1; /* Match */1500397}15013981502399static const struct vfio_device_ops vfio_pci_ops = {1503400 .name = "vfio-pci",1504401 .open_device = vfio_pci_open_device,15051505- .close_device = vfio_pci_close_device,15061506- .ioctl = vfio_pci_ioctl,15071507- .read = vfio_pci_read,15081508- .write = vfio_pci_write,15091509- .mmap = vfio_pci_mmap,15101510- .request = vfio_pci_request,15111511- .match = vfio_pci_match,402402+ .close_device = vfio_pci_core_close_device,403403+ .ioctl = vfio_pci_core_ioctl,404404+ .read = vfio_pci_core_read,405405+ .write = vfio_pci_core_write,406406+ .mmap = vfio_pci_core_mmap,407407+ .request = vfio_pci_core_request,408408+ .match = vfio_pci_core_match,1512409};15131513-15141514-static int vfio_pci_bus_notifier(struct notifier_block *nb,15151515- unsigned long action, void *data)15161516-{15171517- struct vfio_pci_device *vdev = container_of(nb,15181518- struct vfio_pci_device, nb);15191519- struct device *dev = data;15201520- struct pci_dev *pdev = to_pci_dev(dev);15211521- struct pci_dev *physfn = pci_physfn(pdev);15221522-15231523- if (action == BUS_NOTIFY_ADD_DEVICE &&15241524- pdev->is_virtfn && physfn == vdev->pdev) {15251525- pci_info(vdev->pdev, "Captured SR-IOV VF %s driver_override\n",15261526- pci_name(pdev));15271527- pdev->driver_override = kasprintf(GFP_KERNEL, "%s",15281528- vfio_pci_ops.name);15291529- } else if (action == BUS_NOTIFY_BOUND_DRIVER &&15301530- pdev->is_virtfn && physfn == vdev->pdev) {15311531- struct pci_driver *drv = pci_dev_driver(pdev);15321532-15331533- if (drv && drv != &vfio_pci_driver)15341534- pci_warn(vdev->pdev,15351535- "VF %s bound to driver %s while PF bound to vfio-pci\n",15361536- pci_name(pdev), drv->name);15371537- }15381538-15391539- return 0;15401540-}15411541-15421542-static int vfio_pci_vf_init(struct vfio_pci_device *vdev)15431543-{15441544- struct pci_dev *pdev = vdev->pdev;15451545- int ret;15461546-15471547- if (!pdev->is_physfn)15481548- return 0;15491549-15501550- vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);15511551- if (!vdev->vf_token)15521552- return -ENOMEM;15531553-15541554- mutex_init(&vdev->vf_token->lock);15551555- uuid_gen(&vdev->vf_token->uuid);15561556-15571557- vdev->nb.notifier_call = vfio_pci_bus_notifier;15581558- ret = bus_register_notifier(&pci_bus_type, &vdev->nb);15591559- if (ret) {15601560- kfree(vdev->vf_token);15611561- return ret;15621562- }15631563- return 0;15641564-}15651565-15661566-static void vfio_pci_vf_uninit(struct vfio_pci_device *vdev)15671567-{15681568- if (!vdev->vf_token)15691569- return;15701570-15711571- bus_unregister_notifier(&pci_bus_type, &vdev->nb);15721572- WARN_ON(vdev->vf_token->users);15731573- mutex_destroy(&vdev->vf_token->lock);15741574- kfree(vdev->vf_token);15751575-}15761576-15771577-static int vfio_pci_vga_init(struct vfio_pci_device *vdev)15781578-{15791579- struct pci_dev *pdev = vdev->pdev;15801580- int ret;15811581-15821582- if (!vfio_pci_is_vga(pdev))15831583- return 0;15841584-15851585- ret = vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode);15861586- if (ret)15871587- return ret;15881588- vga_set_legacy_decoding(pdev, vfio_pci_set_vga_decode(vdev, false));15891589- return 0;15901590-}15911591-15921592-static void vfio_pci_vga_uninit(struct vfio_pci_device *vdev)15931593-{15941594- struct pci_dev *pdev = vdev->pdev;15951595-15961596- if (!vfio_pci_is_vga(pdev))15971597- return;15981598- vga_client_register(pdev, NULL, NULL, NULL);15991599- vga_set_legacy_decoding(pdev, VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |16001600- VGA_RSRC_LEGACY_IO |16011601- VGA_RSRC_LEGACY_MEM);16021602-}16034101604411static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)1605412{16061606- struct vfio_pci_device *vdev;16071607- struct iommu_group *group;413413+ struct vfio_pci_core_device *vdev;1608414 int ret;16094151610416 if (vfio_pci_is_denylisted(pdev))1611417 return -EINVAL;161241816131613- if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)16141614- return -EINVAL;16151615-16161616- /*16171617- * Prevent binding to PFs with VFs enabled, the VFs might be in use16181618- * by the host or other users. We cannot capture the VFs if they16191619- * already exist, nor can we track VF users. Disabling SR-IOV here16201620- * would initiate removing the VFs, which would unbind the driver,16211621- * which is prone to blocking if that VF is also in use by vfio-pci.16221622- * Just reject these PFs and let the user sort it out.16231623- */16241624- if (pci_num_vf(pdev)) {16251625- pci_warn(pdev, "Cannot bind to PF with SR-IOV enabled\n");16261626- return -EBUSY;16271627- }16281628-16291629- group = vfio_iommu_group_get(&pdev->dev);16301630- if (!group)16311631- return -EINVAL;16321632-1633419 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);16341634- if (!vdev) {16351635- ret = -ENOMEM;16361636- goto out_group_put;16371637- }420420+ if (!vdev)421421+ return -ENOMEM;422422+ vfio_pci_core_init_device(vdev, pdev, &vfio_pci_ops);163842316391639- vfio_init_group_dev(&vdev->vdev, &pdev->dev, &vfio_pci_ops);16401640- vdev->pdev = pdev;16411641- vdev->irq_type = VFIO_PCI_NUM_IRQS;16421642- mutex_init(&vdev->igate);16431643- spin_lock_init(&vdev->irqlock);16441644- mutex_init(&vdev->ioeventfds_lock);16451645- INIT_LIST_HEAD(&vdev->dummy_resources_list);16461646- INIT_LIST_HEAD(&vdev->ioeventfds_list);16471647- mutex_init(&vdev->vma_lock);16481648- INIT_LIST_HEAD(&vdev->vma_list);16491649- init_rwsem(&vdev->memory_lock);16501650-16511651- if (pci_is_root_bus(pdev->bus)) {16521652- ret = vfio_assign_device_set(&vdev->vdev, vdev);16531653- } else if (!pci_probe_reset_slot(pdev->slot)) {16541654- ret = vfio_assign_device_set(&vdev->vdev, pdev->slot);16551655- } else {16561656- /*16571657- * If there is no slot reset support for this device, the whole16581658- * bus needs to be grouped together to support bus-wide resets.16591659- */16601660- ret = vfio_assign_device_set(&vdev->vdev, pdev->bus);16611661- }16621662-424424+ ret = vfio_pci_core_register_device(vdev);1663425 if (ret)16641664- goto out_uninit;16651665- ret = vfio_pci_vf_init(vdev);16661666- if (ret)16671667- goto out_uninit;16681668- ret = vfio_pci_vga_init(vdev);16691669- if (ret)16701670- goto out_vf;16711671-16721672- vfio_pci_probe_power_state(vdev);16731673-16741674- if (!disable_idle_d3) {16751675- /*16761676- * pci-core sets the device power state to an unknown value at16771677- * bootup and after being removed from a driver. The only16781678- * transition it allows from this unknown state is to D0, which16791679- * typically happens when a driver calls pci_enable_device().16801680- * We're not ready to enable the device yet, but we do want to16811681- * be able to get to D3. Therefore first do a D0 transition16821682- * before going to D3.16831683- */16841684- vfio_pci_set_power_state(vdev, PCI_D0);16851685- vfio_pci_set_power_state(vdev, PCI_D3hot);16861686- }16871687-16881688- ret = vfio_register_group_dev(&vdev->vdev);16891689- if (ret)16901690- goto out_power;426426+ goto out_free;1691427 dev_set_drvdata(&pdev->dev, vdev);1692428 return 0;169342916941694-out_power:16951695- if (!disable_idle_d3)16961696- vfio_pci_set_power_state(vdev, PCI_D0);16971697-out_vf:16981698- vfio_pci_vf_uninit(vdev);16991699-out_uninit:17001700- vfio_uninit_group_dev(&vdev->vdev);17011701- kfree(vdev->pm_save);430430+out_free:431431+ vfio_pci_core_uninit_device(vdev);1702432 kfree(vdev);17031703-out_group_put:17041704- vfio_iommu_group_put(group, &pdev->dev);1705433 return ret;1706434}17074351708436static void vfio_pci_remove(struct pci_dev *pdev)1709437{17101710- struct vfio_pci_device *vdev = dev_get_drvdata(&pdev->dev);438438+ struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev);171143917121712- pci_disable_sriov(pdev);17131713-17141714- vfio_unregister_group_dev(&vdev->vdev);17151715-17161716- vfio_pci_vf_uninit(vdev);17171717- vfio_uninit_group_dev(&vdev->vdev);17181718- vfio_pci_vga_uninit(vdev);17191719-17201720- vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);17211721-17221722- if (!disable_idle_d3)17231723- vfio_pci_set_power_state(vdev, PCI_D0);17241724-17251725- mutex_destroy(&vdev->ioeventfds_lock);17261726- kfree(vdev->region);17271727- kfree(vdev->pm_save);440440+ vfio_pci_core_unregister_device(vdev);441441+ vfio_pci_core_uninit_device(vdev);1728442 kfree(vdev);17291729-}17301730-17311731-static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,17321732- pci_channel_state_t state)17331733-{17341734- struct vfio_pci_device *vdev;17351735- struct vfio_device *device;17361736-17371737- device = vfio_device_get_from_dev(&pdev->dev);17381738- if (device == NULL)17391739- return PCI_ERS_RESULT_DISCONNECT;17401740-17411741- vdev = container_of(device, struct vfio_pci_device, vdev);17421742-17431743- mutex_lock(&vdev->igate);17441744-17451745- if (vdev->err_trigger)17461746- eventfd_signal(vdev->err_trigger, 1);17471747-17481748- mutex_unlock(&vdev->igate);17491749-17501750- vfio_device_put(device);17511751-17521752- return PCI_ERS_RESULT_CAN_RECOVER;1753443}17544441755445static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)1756446{17571757- struct vfio_device *device;17581758- int ret = 0;17591759-17601760- might_sleep();17611761-1762447 if (!enable_sriov)1763448 return -ENOENT;176444917651765- device = vfio_device_get_from_dev(&pdev->dev);17661766- if (!device)17671767- return -ENODEV;17681768-17691769- if (nr_virtfn == 0)17701770- pci_disable_sriov(pdev);17711771- else17721772- ret = pci_enable_sriov(pdev, nr_virtfn);17731773-17741774- vfio_device_put(device);17751775-17761776- return ret < 0 ? ret : nr_virtfn;450450+ return vfio_pci_core_sriov_configure(pdev, nr_virtfn);1777451}177845217791779-static const struct pci_error_handlers vfio_err_handlers = {17801780- .error_detected = vfio_pci_aer_err_detected,453453+static const struct pci_device_id vfio_pci_table[] = {454454+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_ANY_ID, PCI_ANY_ID) }, /* match all by default */455455+ {}1781456};457457+458458+MODULE_DEVICE_TABLE(pci, vfio_pci_table);17824591783460static struct pci_driver vfio_pci_driver = {1784461 .name = "vfio-pci",17851785- .id_table = NULL, /* only dynamic ids */462462+ .id_table = vfio_pci_table,1786463 .probe = vfio_pci_probe,1787464 .remove = vfio_pci_remove,1788465 .sriov_configure = vfio_pci_sriov_configure,17891789- .err_handler = &vfio_err_handlers,466466+ .err_handler = &vfio_pci_core_err_handlers,1790467};17911791-17921792-static bool vfio_dev_in_groups(struct vfio_pci_device *vdev,17931793- struct vfio_pci_group_info *groups)17941794-{17951795- unsigned int i;17961796-17971797- for (i = 0; i < groups->count; i++)17981798- if (groups->groups[i] == vdev->vdev.group)17991799- return true;18001800- return false;18011801-}18021802-18031803-static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data)18041804-{18051805- struct vfio_device_set *dev_set = data;18061806- struct vfio_device *cur;18071807-18081808- list_for_each_entry(cur, &dev_set->device_list, dev_set_list)18091809- if (cur->dev == &pdev->dev)18101810- return 0;18111811- return -EBUSY;18121812-}18131813-18141814-/*18151815- * vfio-core considers a group to be viable and will create a vfio_device even18161816- * if some devices are bound to drivers like pci-stub or pcieport. Here we18171817- * require all PCI devices to be inside our dev_set since that ensures they stay18181818- * put and that every driver controlling the device can co-ordinate with the18191819- * device reset.18201820- *18211821- * Returns the pci_dev to pass to pci_reset_bus() if every PCI device to be18221822- * reset is inside the dev_set, and pci_reset_bus() can succeed. NULL otherwise.18231823- */18241824-static struct pci_dev *18251825-vfio_pci_dev_set_resettable(struct vfio_device_set *dev_set)18261826-{18271827- struct pci_dev *pdev;18281828-18291829- lockdep_assert_held(&dev_set->lock);18301830-18311831- /*18321832- * By definition all PCI devices in the dev_set share the same PCI18331833- * reset, so any pci_dev will have the same outcomes for18341834- * pci_probe_reset_*() and pci_reset_bus().18351835- */18361836- pdev = list_first_entry(&dev_set->device_list, struct vfio_pci_device,18371837- vdev.dev_set_list)->pdev;18381838-18391839- /* pci_reset_bus() is supported */18401840- if (pci_probe_reset_slot(pdev->slot) && pci_probe_reset_bus(pdev->bus))18411841- return NULL;18421842-18431843- if (vfio_pci_for_each_slot_or_bus(pdev, vfio_pci_is_device_in_set,18441844- dev_set,18451845- !pci_probe_reset_slot(pdev->slot)))18461846- return NULL;18471847- return pdev;18481848-}18491849-18501850-/*18511851- * We need to get memory_lock for each device, but devices can share mmap_lock,18521852- * therefore we need to zap and hold the vma_lock for each device, and only then18531853- * get each memory_lock.18541854- */18551855-static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,18561856- struct vfio_pci_group_info *groups)18571857-{18581858- struct vfio_pci_device *cur_mem;18591859- struct vfio_pci_device *cur_vma;18601860- struct vfio_pci_device *cur;18611861- struct pci_dev *pdev;18621862- bool is_mem = true;18631863- int ret;18641864-18651865- mutex_lock(&dev_set->lock);18661866- cur_mem = list_first_entry(&dev_set->device_list,18671867- struct vfio_pci_device, vdev.dev_set_list);18681868-18691869- pdev = vfio_pci_dev_set_resettable(dev_set);18701870- if (!pdev) {18711871- ret = -EINVAL;18721872- goto err_unlock;18731873- }18741874-18751875- list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {18761876- /*18771877- * Test whether all the affected devices are contained by the18781878- * set of groups provided by the user.18791879- */18801880- if (!vfio_dev_in_groups(cur_vma, groups)) {18811881- ret = -EINVAL;18821882- goto err_undo;18831883- }18841884-18851885- /*18861886- * Locking multiple devices is prone to deadlock, runaway and18871887- * unwind if we hit contention.18881888- */18891889- if (!vfio_pci_zap_and_vma_lock(cur_vma, true)) {18901890- ret = -EBUSY;18911891- goto err_undo;18921892- }18931893- }18941894- cur_vma = NULL;18951895-18961896- list_for_each_entry(cur_mem, &dev_set->device_list, vdev.dev_set_list) {18971897- if (!down_write_trylock(&cur_mem->memory_lock)) {18981898- ret = -EBUSY;18991899- goto err_undo;19001900- }19011901- mutex_unlock(&cur_mem->vma_lock);19021902- }19031903- cur_mem = NULL;19041904-19051905- ret = pci_reset_bus(pdev);19061906-19071907-err_undo:19081908- list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {19091909- if (cur == cur_mem)19101910- is_mem = false;19111911- if (cur == cur_vma)19121912- break;19131913- if (is_mem)19141914- up_write(&cur->memory_lock);19151915- else19161916- mutex_unlock(&cur->vma_lock);19171917- }19181918-err_unlock:19191919- mutex_unlock(&dev_set->lock);19201920- return ret;19211921-}19221922-19231923-static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set)19241924-{19251925- struct vfio_pci_device *cur;19261926- bool needs_reset = false;19271927-19281928- list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {19291929- /* No VFIO device in the set can have an open device FD */19301930- if (cur->vdev.open_count)19311931- return false;19321932- needs_reset |= cur->needs_reset;19331933- }19341934- return needs_reset;19351935-}19361936-19371937-/*19381938- * If a bus or slot reset is available for the provided dev_set and:19391939- * - All of the devices affected by that bus or slot reset are unused19401940- * - At least one of the affected devices is marked dirty via19411941- * needs_reset (such as by lack of FLR support)19421942- * Then attempt to perform that bus or slot reset.19431943- * Returns true if the dev_set was reset.19441944- */19451945-static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set)19461946-{19471947- struct vfio_pci_device *cur;19481948- struct pci_dev *pdev;19491949- int ret;19501950-19511951- if (!vfio_pci_dev_set_needs_reset(dev_set))19521952- return false;19531953-19541954- pdev = vfio_pci_dev_set_resettable(dev_set);19551955- if (!pdev)19561956- return false;19571957-19581958- ret = pci_reset_bus(pdev);19591959- if (ret)19601960- return false;19611961-19621962- list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {19631963- cur->needs_reset = false;19641964- if (!disable_idle_d3)19651965- vfio_pci_set_power_state(cur, PCI_D3hot);19661966- }19671967- return true;19681968-}19691969-19701970-static void __exit vfio_pci_cleanup(void)19711971-{19721972- pci_unregister_driver(&vfio_pci_driver);19731973- vfio_pci_uninit_perm_bits();19741974-}19754681976469static void __init vfio_pci_fill_ids(void)1977470{···2392288static int __init vfio_pci_init(void)2402289{2412290 int ret;22912291+ bool is_disable_vga = true;2422292243243- /* Allocate shared config space permission data used by all devices */244244- ret = vfio_pci_init_perm_bits();245245- if (ret)246246- return ret;22932293+#ifdef CONFIG_VFIO_PCI_VGA22942294+ is_disable_vga = disable_vga;22952295+#endif22962296+22972297+ vfio_pci_core_set_params(nointxmask, is_disable_vga, disable_idle_d3);24722982482299 /* Register and scan for devices */2492300 ret = pci_register_driver(&vfio_pci_driver);2502301 if (ret)251251- goto out_driver;23022302+ return ret;25223032532304 vfio_pci_fill_ids();2542305···2582305 pr_warn("device denylist disabled.\n");25923062602307 return 0;261261-262262-out_driver:263263- vfio_pci_uninit_perm_bits();264264- return ret;2652308}266266-2672309module_init(vfio_pci_init);23102310+23112311+static void __exit vfio_pci_cleanup(void)23122312+{23132313+ pci_unregister_driver(&vfio_pci_driver);23142314+}2682315module_exit(vfio_pci_cleanup);2692316270270-MODULE_VERSION(DRIVER_VERSION);2712317MODULE_LICENSE("GPL v2");2722318MODULE_AUTHOR(DRIVER_AUTHOR);2732319MODULE_DESCRIPTION(DRIVER_DESC);
+35-35
drivers/vfio/pci/vfio_pci_config.c
···2626#include <linux/vfio.h>2727#include <linux/slab.h>28282929-#include "vfio_pci_private.h"2929+#include <linux/vfio_pci_core.h>30303131/* Fake capability ID for standard config space */3232#define PCI_CAP_ID_BASIC 0···108108struct perm_bits {109109 u8 *virt; /* read/write virtual data, not hw */110110 u8 *write; /* writeable bits */111111- int (*readfn)(struct vfio_pci_device *vdev, int pos, int count,111111+ int (*readfn)(struct vfio_pci_core_device *vdev, int pos, int count,112112 struct perm_bits *perm, int offset, __le32 *val);113113- int (*writefn)(struct vfio_pci_device *vdev, int pos, int count,113113+ int (*writefn)(struct vfio_pci_core_device *vdev, int pos, int count,114114 struct perm_bits *perm, int offset, __le32 val);115115};116116···171171 return ret;172172}173173174174-static int vfio_default_config_read(struct vfio_pci_device *vdev, int pos,174174+static int vfio_default_config_read(struct vfio_pci_core_device *vdev, int pos,175175 int count, struct perm_bits *perm,176176 int offset, __le32 *val)177177{···197197 return count;198198}199199200200-static int vfio_default_config_write(struct vfio_pci_device *vdev, int pos,200200+static int vfio_default_config_write(struct vfio_pci_core_device *vdev, int pos,201201 int count, struct perm_bits *perm,202202 int offset, __le32 val)203203{···244244}245245246246/* Allow direct read from hardware, except for capability next pointer */247247-static int vfio_direct_config_read(struct vfio_pci_device *vdev, int pos,247247+static int vfio_direct_config_read(struct vfio_pci_core_device *vdev, int pos,248248 int count, struct perm_bits *perm,249249 int offset, __le32 *val)250250{···269269}270270271271/* Raw access skips any kind of virtualization */272272-static int vfio_raw_config_write(struct vfio_pci_device *vdev, int pos,272272+static int vfio_raw_config_write(struct vfio_pci_core_device *vdev, int pos,273273 int count, struct perm_bits *perm,274274 int offset, __le32 val)275275{···282282 return count;283283}284284285285-static int vfio_raw_config_read(struct vfio_pci_device *vdev, int pos,285285+static int vfio_raw_config_read(struct vfio_pci_core_device *vdev, int pos,286286 int count, struct perm_bits *perm,287287 int offset, __le32 *val)288288{···296296}297297298298/* Virt access uses only virtualization */299299-static int vfio_virt_config_write(struct vfio_pci_device *vdev, int pos,299299+static int vfio_virt_config_write(struct vfio_pci_core_device *vdev, int pos,300300 int count, struct perm_bits *perm,301301 int offset, __le32 val)302302{···304304 return count;305305}306306307307-static int vfio_virt_config_read(struct vfio_pci_device *vdev, int pos,307307+static int vfio_virt_config_read(struct vfio_pci_core_device *vdev, int pos,308308 int count, struct perm_bits *perm,309309 int offset, __le32 *val)310310{···396396}397397398398/* Caller should hold memory_lock semaphore */399399-bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev)399399+bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev)400400{401401 struct pci_dev *pdev = vdev->pdev;402402 u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]);···413413 * Restore the *real* BARs after we detect a FLR or backdoor reset.414414 * (backdoor = some device specific technique that we didn't catch)415415 */416416-static void vfio_bar_restore(struct vfio_pci_device *vdev)416416+static void vfio_bar_restore(struct vfio_pci_core_device *vdev)417417{418418 struct pci_dev *pdev = vdev->pdev;419419 u32 *rbar = vdev->rbar;···460460 * Pretend we're hardware and tweak the values of the *virtual* PCI BARs461461 * to reflect the hardware capabilities. This implements BAR sizing.462462 */463463-static void vfio_bar_fixup(struct vfio_pci_device *vdev)463463+static void vfio_bar_fixup(struct vfio_pci_core_device *vdev)464464{465465 struct pci_dev *pdev = vdev->pdev;466466 int i;···514514 vdev->bardirty = false;515515}516516517517-static int vfio_basic_config_read(struct vfio_pci_device *vdev, int pos,517517+static int vfio_basic_config_read(struct vfio_pci_core_device *vdev, int pos,518518 int count, struct perm_bits *perm,519519 int offset, __le32 *val)520520{···536536}537537538538/* Test whether BARs match the value we think they should contain */539539-static bool vfio_need_bar_restore(struct vfio_pci_device *vdev)539539+static bool vfio_need_bar_restore(struct vfio_pci_core_device *vdev)540540{541541 int i = 0, pos = PCI_BASE_ADDRESS_0, ret;542542 u32 bar;···552552 return false;553553}554554555555-static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,555555+static int vfio_basic_config_write(struct vfio_pci_core_device *vdev, int pos,556556 int count, struct perm_bits *perm,557557 int offset, __le32 val)558558{···692692 return 0;693693}694694695695-static int vfio_pm_config_write(struct vfio_pci_device *vdev, int pos,695695+static int vfio_pm_config_write(struct vfio_pci_core_device *vdev, int pos,696696 int count, struct perm_bits *perm,697697 int offset, __le32 val)698698{···747747 return 0;748748}749749750750-static int vfio_vpd_config_write(struct vfio_pci_device *vdev, int pos,750750+static int vfio_vpd_config_write(struct vfio_pci_core_device *vdev, int pos,751751 int count, struct perm_bits *perm,752752 int offset, __le32 val)753753{···829829 return 0;830830}831831832832-static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos,832832+static int vfio_exp_config_write(struct vfio_pci_core_device *vdev, int pos,833833 int count, struct perm_bits *perm,834834 int offset, __le32 val)835835{···913913 return 0;914914}915915916916-static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos,916916+static int vfio_af_config_write(struct vfio_pci_core_device *vdev, int pos,917917 int count, struct perm_bits *perm,918918 int offset, __le32 val)919919{···10721072 return ret;10731073}1074107410751075-static int vfio_find_cap_start(struct vfio_pci_device *vdev, int pos)10751075+static int vfio_find_cap_start(struct vfio_pci_core_device *vdev, int pos)10761076{10771077 u8 cap;10781078 int base = (pos >= PCI_CFG_SPACE_SIZE) ? PCI_CFG_SPACE_SIZE :···10891089 return pos;10901090}1091109110921092-static int vfio_msi_config_read(struct vfio_pci_device *vdev, int pos,10921092+static int vfio_msi_config_read(struct vfio_pci_core_device *vdev, int pos,10931093 int count, struct perm_bits *perm,10941094 int offset, __le32 *val)10951095{···11091109 return vfio_default_config_read(vdev, pos, count, perm, offset, val);11101110}1111111111121112-static int vfio_msi_config_write(struct vfio_pci_device *vdev, int pos,11121112+static int vfio_msi_config_write(struct vfio_pci_core_device *vdev, int pos,11131113 int count, struct perm_bits *perm,11141114 int offset, __le32 val)11151115{···11891189}1190119011911191/* Determine MSI CAP field length; initialize msi_perms on 1st call per vdev */11921192-static int vfio_msi_cap_len(struct vfio_pci_device *vdev, u8 pos)11921192+static int vfio_msi_cap_len(struct vfio_pci_core_device *vdev, u8 pos)11931193{11941194 struct pci_dev *pdev = vdev->pdev;11951195 int len, ret;···12221222}1223122312241224/* Determine extended capability length for VC (2 & 9) and MFVC */12251225-static int vfio_vc_cap_len(struct vfio_pci_device *vdev, u16 pos)12251225+static int vfio_vc_cap_len(struct vfio_pci_core_device *vdev, u16 pos)12261226{12271227 struct pci_dev *pdev = vdev->pdev;12281228 u32 tmp;···12631263 return len;12641264}1265126512661266-static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos)12661266+static int vfio_cap_len(struct vfio_pci_core_device *vdev, u8 cap, u8 pos)12671267{12681268 struct pci_dev *pdev = vdev->pdev;12691269 u32 dword;···13381338 return 0;13391339}1340134013411341-static int vfio_ext_cap_len(struct vfio_pci_device *vdev, u16 ecap, u16 epos)13411341+static int vfio_ext_cap_len(struct vfio_pci_core_device *vdev, u16 ecap, u16 epos)13421342{13431343 struct pci_dev *pdev = vdev->pdev;13441344 u8 byte;···14121412 return 0;14131413}1414141414151415-static int vfio_fill_vconfig_bytes(struct vfio_pci_device *vdev,14151415+static int vfio_fill_vconfig_bytes(struct vfio_pci_core_device *vdev,14161416 int offset, int size)14171417{14181418 struct pci_dev *pdev = vdev->pdev;···14591459 return ret;14601460}1461146114621462-static int vfio_cap_init(struct vfio_pci_device *vdev)14621462+static int vfio_cap_init(struct vfio_pci_core_device *vdev)14631463{14641464 struct pci_dev *pdev = vdev->pdev;14651465 u8 *map = vdev->pci_config_map;···15491549 return 0;15501550}1551155115521552-static int vfio_ecap_init(struct vfio_pci_device *vdev)15521552+static int vfio_ecap_init(struct vfio_pci_core_device *vdev)15531553{15541554 struct pci_dev *pdev = vdev->pdev;15551555 u8 *map = vdev->pci_config_map;···16691669 * for each area requiring emulated bits, but the array of pointers16701670 * would be comparable in size (at least for standard config space).16711671 */16721672-int vfio_config_init(struct vfio_pci_device *vdev)16721672+int vfio_config_init(struct vfio_pci_core_device *vdev)16731673{16741674 struct pci_dev *pdev = vdev->pdev;16751675 u8 *map, *vconfig;···17731773 return pcibios_err_to_errno(ret);17741774}1775177517761776-void vfio_config_free(struct vfio_pci_device *vdev)17761776+void vfio_config_free(struct vfio_pci_core_device *vdev)17771777{17781778 kfree(vdev->vconfig);17791779 vdev->vconfig = NULL;···17901790 * Find the remaining number of bytes in a dword that match the given17911791 * position. Stop at either the end of the capability or the dword boundary.17921792 */17931793-static size_t vfio_pci_cap_remaining_dword(struct vfio_pci_device *vdev,17931793+static size_t vfio_pci_cap_remaining_dword(struct vfio_pci_core_device *vdev,17941794 loff_t pos)17951795{17961796 u8 cap = vdev->pci_config_map[pos];···18021802 return i;18031803}1804180418051805-static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf,18051805+static ssize_t vfio_config_do_rw(struct vfio_pci_core_device *vdev, char __user *buf,18061806 size_t count, loff_t *ppos, bool iswrite)18071807{18081808 struct pci_dev *pdev = vdev->pdev;···18851885 return ret;18861886}1887188718881888-ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, char __user *buf,18881888+ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, char __user *buf,18891889 size_t count, loff_t *ppos, bool iswrite)18901890{18911891 size_t done = 0;
+2158
drivers/vfio/pci/vfio_pci_core.c
···11+// SPDX-License-Identifier: GPL-2.0-only22+/*33+ * Copyright (C) 2012 Red Hat, Inc. All rights reserved.44+ * Author: Alex Williamson <alex.williamson@redhat.com>55+ *66+ * Derived from original vfio:77+ * Copyright 2010 Cisco Systems, Inc. All rights reserved.88+ * Author: Tom Lyon, pugs@cisco.com99+ */1010+1111+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt1212+1313+#include <linux/device.h>1414+#include <linux/eventfd.h>1515+#include <linux/file.h>1616+#include <linux/interrupt.h>1717+#include <linux/iommu.h>1818+#include <linux/module.h>1919+#include <linux/mutex.h>2020+#include <linux/notifier.h>2121+#include <linux/pci.h>2222+#include <linux/pm_runtime.h>2323+#include <linux/slab.h>2424+#include <linux/types.h>2525+#include <linux/uaccess.h>2626+#include <linux/vgaarb.h>2727+#include <linux/nospec.h>2828+#include <linux/sched/mm.h>2929+3030+#include <linux/vfio_pci_core.h>3131+3232+#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"3333+#define DRIVER_DESC "core driver for VFIO based PCI devices"3434+3535+static bool nointxmask;3636+static bool disable_vga;3737+static bool disable_idle_d3;3838+3939+static inline bool vfio_vga_disabled(void)4040+{4141+#ifdef CONFIG_VFIO_PCI_VGA4242+ return disable_vga;4343+#else4444+ return true;4545+#endif4646+}4747+4848+/*4949+ * Our VGA arbiter participation is limited since we don't know anything5050+ * about the device itself. However, if the device is the only VGA device5151+ * downstream of a bridge and VFIO VGA support is disabled, then we can5252+ * safely return legacy VGA IO and memory as not decoded since the user5353+ * has no way to get to it and routing can be disabled externally at the5454+ * bridge.5555+ */5656+static unsigned int vfio_pci_set_vga_decode(void *opaque, bool single_vga)5757+{5858+ struct vfio_pci_core_device *vdev = opaque;5959+ struct pci_dev *tmp = NULL, *pdev = vdev->pdev;6060+ unsigned char max_busnr;6161+ unsigned int decodes;6262+6363+ if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus))6464+ return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |6565+ VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;6666+6767+ max_busnr = pci_bus_max_busnr(pdev->bus);6868+ decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;6969+7070+ while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) {7171+ if (tmp == pdev ||7272+ pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) ||7373+ pci_is_root_bus(tmp->bus))7474+ continue;7575+7676+ if (tmp->bus->number >= pdev->bus->number &&7777+ tmp->bus->number <= max_busnr) {7878+ pci_dev_put(tmp);7979+ decodes |= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;8080+ break;8181+ }8282+ }8383+8484+ return decodes;8585+}8686+8787+static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)8888+{8989+ struct resource *res;9090+ int i;9191+ struct vfio_pci_dummy_resource *dummy_res;9292+9393+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {9494+ int bar = i + PCI_STD_RESOURCES;9595+9696+ res = &vdev->pdev->resource[bar];9797+9898+ if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP))9999+ goto no_mmap;100100+101101+ if (!(res->flags & IORESOURCE_MEM))102102+ goto no_mmap;103103+104104+ /*105105+ * The PCI core shouldn't set up a resource with a106106+ * type but zero size. But there may be bugs that107107+ * cause us to do that.108108+ */109109+ if (!resource_size(res))110110+ goto no_mmap;111111+112112+ if (resource_size(res) >= PAGE_SIZE) {113113+ vdev->bar_mmap_supported[bar] = true;114114+ continue;115115+ }116116+117117+ if (!(res->start & ~PAGE_MASK)) {118118+ /*119119+ * Add a dummy resource to reserve the remainder120120+ * of the exclusive page in case that hot-add121121+ * device's bar is assigned into it.122122+ */123123+ dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);124124+ if (dummy_res == NULL)125125+ goto no_mmap;126126+127127+ dummy_res->resource.name = "vfio sub-page reserved";128128+ dummy_res->resource.start = res->end + 1;129129+ dummy_res->resource.end = res->start + PAGE_SIZE - 1;130130+ dummy_res->resource.flags = res->flags;131131+ if (request_resource(res->parent,132132+ &dummy_res->resource)) {133133+ kfree(dummy_res);134134+ goto no_mmap;135135+ }136136+ dummy_res->index = bar;137137+ list_add(&dummy_res->res_next,138138+ &vdev->dummy_resources_list);139139+ vdev->bar_mmap_supported[bar] = true;140140+ continue;141141+ }142142+ /*143143+ * Here we don't handle the case when the BAR is not page144144+ * aligned because we can't expect the BAR will be145145+ * assigned into the same location in a page in guest146146+ * when we passthrough the BAR. And it's hard to access147147+ * this BAR in userspace because we have no way to get148148+ * the BAR's location in a page.149149+ */150150+no_mmap:151151+ vdev->bar_mmap_supported[bar] = false;152152+ }153153+}154154+155155+struct vfio_pci_group_info;156156+static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);157157+static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,158158+ struct vfio_pci_group_info *groups);159159+160160+/*161161+ * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND162162+ * _and_ the ability detect when the device is asserting INTx via PCI_STATUS.163163+ * If a device implements the former but not the latter we would typically164164+ * expect broken_intx_masking be set and require an exclusive interrupt.165165+ * However since we do have control of the device's ability to assert INTx,166166+ * we can instead pretend that the device does not implement INTx, virtualizing167167+ * the pin register to report zero and maintaining DisINTx set on the host.168168+ */169169+static bool vfio_pci_nointx(struct pci_dev *pdev)170170+{171171+ switch (pdev->vendor) {172172+ case PCI_VENDOR_ID_INTEL:173173+ switch (pdev->device) {174174+ /* All i40e (XL710/X710/XXV710) 10/20/25/40GbE NICs */175175+ case 0x1572:176176+ case 0x1574:177177+ case 0x1580 ... 0x1581:178178+ case 0x1583 ... 0x158b:179179+ case 0x37d0 ... 0x37d2:180180+ /* X550 */181181+ case 0x1563:182182+ return true;183183+ default:184184+ return false;185185+ }186186+ }187187+188188+ return false;189189+}190190+191191+static void vfio_pci_probe_power_state(struct vfio_pci_core_device *vdev)192192+{193193+ struct pci_dev *pdev = vdev->pdev;194194+ u16 pmcsr;195195+196196+ if (!pdev->pm_cap)197197+ return;198198+199199+ pci_read_config_word(pdev, pdev->pm_cap + PCI_PM_CTRL, &pmcsr);200200+201201+ vdev->needs_pm_restore = !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET);202202+}203203+204204+/*205205+ * pci_set_power_state() wrapper handling devices which perform a soft reset on206206+ * D3->D0 transition. Save state prior to D0/1/2->D3, stash it on the vdev,207207+ * restore when returned to D0. Saved separately from pci_saved_state for use208208+ * by PM capability emulation and separately from pci_dev internal saved state209209+ * to avoid it being overwritten and consumed around other resets.210210+ */211211+int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t state)212212+{213213+ struct pci_dev *pdev = vdev->pdev;214214+ bool needs_restore = false, needs_save = false;215215+ int ret;216216+217217+ if (vdev->needs_pm_restore) {218218+ if (pdev->current_state < PCI_D3hot && state >= PCI_D3hot) {219219+ pci_save_state(pdev);220220+ needs_save = true;221221+ }222222+223223+ if (pdev->current_state >= PCI_D3hot && state <= PCI_D0)224224+ needs_restore = true;225225+ }226226+227227+ ret = pci_set_power_state(pdev, state);228228+229229+ if (!ret) {230230+ /* D3 might be unsupported via quirk, skip unless in D3 */231231+ if (needs_save && pdev->current_state >= PCI_D3hot) {232232+ vdev->pm_save = pci_store_saved_state(pdev);233233+ } else if (needs_restore) {234234+ pci_load_and_free_saved_state(pdev, &vdev->pm_save);235235+ pci_restore_state(pdev);236236+ }237237+ }238238+239239+ return ret;240240+}241241+242242+int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)243243+{244244+ struct pci_dev *pdev = vdev->pdev;245245+ int ret;246246+ u16 cmd;247247+ u8 msix_pos;248248+249249+ vfio_pci_set_power_state(vdev, PCI_D0);250250+251251+ /* Don't allow our initial saved state to include busmaster */252252+ pci_clear_master(pdev);253253+254254+ ret = pci_enable_device(pdev);255255+ if (ret)256256+ return ret;257257+258258+ /* If reset fails because of the device lock, fail this path entirely */259259+ ret = pci_try_reset_function(pdev);260260+ if (ret == -EAGAIN) {261261+ pci_disable_device(pdev);262262+ return ret;263263+ }264264+265265+ vdev->reset_works = !ret;266266+ pci_save_state(pdev);267267+ vdev->pci_saved_state = pci_store_saved_state(pdev);268268+ if (!vdev->pci_saved_state)269269+ pci_dbg(pdev, "%s: Couldn't store saved state\n", __func__);270270+271271+ if (likely(!nointxmask)) {272272+ if (vfio_pci_nointx(pdev)) {273273+ pci_info(pdev, "Masking broken INTx support\n");274274+ vdev->nointx = true;275275+ pci_intx(pdev, 0);276276+ } else277277+ vdev->pci_2_3 = pci_intx_mask_supported(pdev);278278+ }279279+280280+ pci_read_config_word(pdev, PCI_COMMAND, &cmd);281281+ if (vdev->pci_2_3 && (cmd & PCI_COMMAND_INTX_DISABLE)) {282282+ cmd &= ~PCI_COMMAND_INTX_DISABLE;283283+ pci_write_config_word(pdev, PCI_COMMAND, cmd);284284+ }285285+286286+ ret = vfio_config_init(vdev);287287+ if (ret) {288288+ kfree(vdev->pci_saved_state);289289+ vdev->pci_saved_state = NULL;290290+ pci_disable_device(pdev);291291+ return ret;292292+ }293293+294294+ msix_pos = pdev->msix_cap;295295+ if (msix_pos) {296296+ u16 flags;297297+ u32 table;298298+299299+ pci_read_config_word(pdev, msix_pos + PCI_MSIX_FLAGS, &flags);300300+ pci_read_config_dword(pdev, msix_pos + PCI_MSIX_TABLE, &table);301301+302302+ vdev->msix_bar = table & PCI_MSIX_TABLE_BIR;303303+ vdev->msix_offset = table & PCI_MSIX_TABLE_OFFSET;304304+ vdev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * 16;305305+ } else306306+ vdev->msix_bar = 0xFF;307307+308308+ if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))309309+ vdev->has_vga = true;310310+311311+312312+ return 0;313313+}314314+EXPORT_SYMBOL_GPL(vfio_pci_core_enable);315315+316316+void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)317317+{318318+ struct pci_dev *pdev = vdev->pdev;319319+ struct vfio_pci_dummy_resource *dummy_res, *tmp;320320+ struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;321321+ int i, bar;322322+323323+ /* For needs_reset */324324+ lockdep_assert_held(&vdev->vdev.dev_set->lock);325325+326326+ /* Stop the device from further DMA */327327+ pci_clear_master(pdev);328328+329329+ vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |330330+ VFIO_IRQ_SET_ACTION_TRIGGER,331331+ vdev->irq_type, 0, 0, NULL);332332+333333+ /* Device closed, don't need mutex here */334334+ list_for_each_entry_safe(ioeventfd, ioeventfd_tmp,335335+ &vdev->ioeventfds_list, next) {336336+ vfio_virqfd_disable(&ioeventfd->virqfd);337337+ list_del(&ioeventfd->next);338338+ kfree(ioeventfd);339339+ }340340+ vdev->ioeventfds_nr = 0;341341+342342+ vdev->virq_disabled = false;343343+344344+ for (i = 0; i < vdev->num_regions; i++)345345+ vdev->region[i].ops->release(vdev, &vdev->region[i]);346346+347347+ vdev->num_regions = 0;348348+ kfree(vdev->region);349349+ vdev->region = NULL; /* don't krealloc a freed pointer */350350+351351+ vfio_config_free(vdev);352352+353353+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {354354+ bar = i + PCI_STD_RESOURCES;355355+ if (!vdev->barmap[bar])356356+ continue;357357+ pci_iounmap(pdev, vdev->barmap[bar]);358358+ pci_release_selected_regions(pdev, 1 << bar);359359+ vdev->barmap[bar] = NULL;360360+ }361361+362362+ list_for_each_entry_safe(dummy_res, tmp,363363+ &vdev->dummy_resources_list, res_next) {364364+ list_del(&dummy_res->res_next);365365+ release_resource(&dummy_res->resource);366366+ kfree(dummy_res);367367+ }368368+369369+ vdev->needs_reset = true;370370+371371+ /*372372+ * If we have saved state, restore it. If we can reset the device,373373+ * even better. Resetting with current state seems better than374374+ * nothing, but saving and restoring current state without reset375375+ * is just busy work.376376+ */377377+ if (pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state)) {378378+ pci_info(pdev, "%s: Couldn't reload saved state\n", __func__);379379+380380+ if (!vdev->reset_works)381381+ goto out;382382+383383+ pci_save_state(pdev);384384+ }385385+386386+ /*387387+ * Disable INTx and MSI, presumably to avoid spurious interrupts388388+ * during reset. Stolen from pci_reset_function()389389+ */390390+ pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);391391+392392+ /*393393+ * Try to get the locks ourselves to prevent a deadlock. The394394+ * success of this is dependent on being able to lock the device,395395+ * which is not always possible.396396+ * We can not use the "try" reset interface here, which will397397+ * overwrite the previously restored configuration information.398398+ */399399+ if (vdev->reset_works && pci_dev_trylock(pdev)) {400400+ if (!__pci_reset_function_locked(pdev))401401+ vdev->needs_reset = false;402402+ pci_dev_unlock(pdev);403403+ }404404+405405+ pci_restore_state(pdev);406406+out:407407+ pci_disable_device(pdev);408408+409409+ if (!vfio_pci_dev_set_try_reset(vdev->vdev.dev_set) && !disable_idle_d3)410410+ vfio_pci_set_power_state(vdev, PCI_D3hot);411411+}412412+EXPORT_SYMBOL_GPL(vfio_pci_core_disable);413413+414414+static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vdev)415415+{416416+ struct pci_dev *physfn = pci_physfn(vdev->pdev);417417+ struct vfio_device *pf_dev;418418+419419+ if (!vdev->pdev->is_virtfn)420420+ return NULL;421421+422422+ pf_dev = vfio_device_get_from_dev(&physfn->dev);423423+ if (!pf_dev)424424+ return NULL;425425+426426+ if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) {427427+ vfio_device_put(pf_dev);428428+ return NULL;429429+ }430430+431431+ return container_of(pf_dev, struct vfio_pci_core_device, vdev);432432+}433433+434434+static void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int val)435435+{436436+ struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev);437437+438438+ if (!pf_vdev)439439+ return;440440+441441+ mutex_lock(&pf_vdev->vf_token->lock);442442+ pf_vdev->vf_token->users += val;443443+ WARN_ON(pf_vdev->vf_token->users < 0);444444+ mutex_unlock(&pf_vdev->vf_token->lock);445445+446446+ vfio_device_put(&pf_vdev->vdev);447447+}448448+449449+void vfio_pci_core_close_device(struct vfio_device *core_vdev)450450+{451451+ struct vfio_pci_core_device *vdev =452452+ container_of(core_vdev, struct vfio_pci_core_device, vdev);453453+454454+ vfio_pci_vf_token_user_add(vdev, -1);455455+ vfio_spapr_pci_eeh_release(vdev->pdev);456456+ vfio_pci_core_disable(vdev);457457+458458+ mutex_lock(&vdev->igate);459459+ if (vdev->err_trigger) {460460+ eventfd_ctx_put(vdev->err_trigger);461461+ vdev->err_trigger = NULL;462462+ }463463+ if (vdev->req_trigger) {464464+ eventfd_ctx_put(vdev->req_trigger);465465+ vdev->req_trigger = NULL;466466+ }467467+ mutex_unlock(&vdev->igate);468468+}469469+EXPORT_SYMBOL_GPL(vfio_pci_core_close_device);470470+471471+void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev)472472+{473473+ vfio_pci_probe_mmaps(vdev);474474+ vfio_spapr_pci_eeh_open(vdev->pdev);475475+ vfio_pci_vf_token_user_add(vdev, 1);476476+}477477+EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable);478478+479479+static int vfio_pci_get_irq_count(struct vfio_pci_core_device *vdev, int irq_type)480480+{481481+ if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {482482+ u8 pin;483483+484484+ if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) ||485485+ vdev->nointx || vdev->pdev->is_virtfn)486486+ return 0;487487+488488+ pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);489489+490490+ return pin ? 1 : 0;491491+ } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {492492+ u8 pos;493493+ u16 flags;494494+495495+ pos = vdev->pdev->msi_cap;496496+ if (pos) {497497+ pci_read_config_word(vdev->pdev,498498+ pos + PCI_MSI_FLAGS, &flags);499499+ return 1 << ((flags & PCI_MSI_FLAGS_QMASK) >> 1);500500+ }501501+ } else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {502502+ u8 pos;503503+ u16 flags;504504+505505+ pos = vdev->pdev->msix_cap;506506+ if (pos) {507507+ pci_read_config_word(vdev->pdev,508508+ pos + PCI_MSIX_FLAGS, &flags);509509+510510+ return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;511511+ }512512+ } else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX) {513513+ if (pci_is_pcie(vdev->pdev))514514+ return 1;515515+ } else if (irq_type == VFIO_PCI_REQ_IRQ_INDEX) {516516+ return 1;517517+ }518518+519519+ return 0;520520+}521521+522522+static int vfio_pci_count_devs(struct pci_dev *pdev, void *data)523523+{524524+ (*(int *)data)++;525525+ return 0;526526+}527527+528528+struct vfio_pci_fill_info {529529+ int max;530530+ int cur;531531+ struct vfio_pci_dependent_device *devices;532532+};533533+534534+static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)535535+{536536+ struct vfio_pci_fill_info *fill = data;537537+ struct iommu_group *iommu_group;538538+539539+ if (fill->cur == fill->max)540540+ return -EAGAIN; /* Something changed, try again */541541+542542+ iommu_group = iommu_group_get(&pdev->dev);543543+ if (!iommu_group)544544+ return -EPERM; /* Cannot reset non-isolated devices */545545+546546+ fill->devices[fill->cur].group_id = iommu_group_id(iommu_group);547547+ fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus);548548+ fill->devices[fill->cur].bus = pdev->bus->number;549549+ fill->devices[fill->cur].devfn = pdev->devfn;550550+ fill->cur++;551551+ iommu_group_put(iommu_group);552552+ return 0;553553+}554554+555555+struct vfio_pci_group_info {556556+ int count;557557+ struct vfio_group **groups;558558+};559559+560560+static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot)561561+{562562+ for (; pdev; pdev = pdev->bus->self)563563+ if (pdev->bus == slot->bus)564564+ return (pdev->slot == slot);565565+ return false;566566+}567567+568568+struct vfio_pci_walk_info {569569+ int (*fn)(struct pci_dev *, void *data);570570+ void *data;571571+ struct pci_dev *pdev;572572+ bool slot;573573+ int ret;574574+};575575+576576+static int vfio_pci_walk_wrapper(struct pci_dev *pdev, void *data)577577+{578578+ struct vfio_pci_walk_info *walk = data;579579+580580+ if (!walk->slot || vfio_pci_dev_below_slot(pdev, walk->pdev->slot))581581+ walk->ret = walk->fn(pdev, walk->data);582582+583583+ return walk->ret;584584+}585585+586586+static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,587587+ int (*fn)(struct pci_dev *,588588+ void *data), void *data,589589+ bool slot)590590+{591591+ struct vfio_pci_walk_info walk = {592592+ .fn = fn, .data = data, .pdev = pdev, .slot = slot, .ret = 0,593593+ };594594+595595+ pci_walk_bus(pdev->bus, vfio_pci_walk_wrapper, &walk);596596+597597+ return walk.ret;598598+}599599+600600+static int msix_mmappable_cap(struct vfio_pci_core_device *vdev,601601+ struct vfio_info_cap *caps)602602+{603603+ struct vfio_info_cap_header header = {604604+ .id = VFIO_REGION_INFO_CAP_MSIX_MAPPABLE,605605+ .version = 1606606+ };607607+608608+ return vfio_info_add_capability(caps, &header, sizeof(header));609609+}610610+611611+int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev,612612+ unsigned int type, unsigned int subtype,613613+ const struct vfio_pci_regops *ops,614614+ size_t size, u32 flags, void *data)615615+{616616+ struct vfio_pci_region *region;617617+618618+ region = krealloc(vdev->region,619619+ (vdev->num_regions + 1) * sizeof(*region),620620+ GFP_KERNEL);621621+ if (!region)622622+ return -ENOMEM;623623+624624+ vdev->region = region;625625+ vdev->region[vdev->num_regions].type = type;626626+ vdev->region[vdev->num_regions].subtype = subtype;627627+ vdev->region[vdev->num_regions].ops = ops;628628+ vdev->region[vdev->num_regions].size = size;629629+ vdev->region[vdev->num_regions].flags = flags;630630+ vdev->region[vdev->num_regions].data = data;631631+632632+ vdev->num_regions++;633633+634634+ return 0;635635+}636636+EXPORT_SYMBOL_GPL(vfio_pci_register_dev_region);637637+638638+long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,639639+ unsigned long arg)640640+{641641+ struct vfio_pci_core_device *vdev =642642+ container_of(core_vdev, struct vfio_pci_core_device, vdev);643643+ unsigned long minsz;644644+645645+ if (cmd == VFIO_DEVICE_GET_INFO) {646646+ struct vfio_device_info info;647647+ struct vfio_info_cap caps = { .buf = NULL, .size = 0 };648648+ unsigned long capsz;649649+ int ret;650650+651651+ minsz = offsetofend(struct vfio_device_info, num_irqs);652652+653653+ /* For backward compatibility, cannot require this */654654+ capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);655655+656656+ if (copy_from_user(&info, (void __user *)arg, minsz))657657+ return -EFAULT;658658+659659+ if (info.argsz < minsz)660660+ return -EINVAL;661661+662662+ if (info.argsz >= capsz) {663663+ minsz = capsz;664664+ info.cap_offset = 0;665665+ }666666+667667+ info.flags = VFIO_DEVICE_FLAGS_PCI;668668+669669+ if (vdev->reset_works)670670+ info.flags |= VFIO_DEVICE_FLAGS_RESET;671671+672672+ info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;673673+ info.num_irqs = VFIO_PCI_NUM_IRQS;674674+675675+ ret = vfio_pci_info_zdev_add_caps(vdev, &caps);676676+ if (ret && ret != -ENODEV) {677677+ pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");678678+ return ret;679679+ }680680+681681+ if (caps.size) {682682+ info.flags |= VFIO_DEVICE_FLAGS_CAPS;683683+ if (info.argsz < sizeof(info) + caps.size) {684684+ info.argsz = sizeof(info) + caps.size;685685+ } else {686686+ vfio_info_cap_shift(&caps, sizeof(info));687687+ if (copy_to_user((void __user *)arg +688688+ sizeof(info), caps.buf,689689+ caps.size)) {690690+ kfree(caps.buf);691691+ return -EFAULT;692692+ }693693+ info.cap_offset = sizeof(info);694694+ }695695+696696+ kfree(caps.buf);697697+ }698698+699699+ return copy_to_user((void __user *)arg, &info, minsz) ?700700+ -EFAULT : 0;701701+702702+ } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {703703+ struct pci_dev *pdev = vdev->pdev;704704+ struct vfio_region_info info;705705+ struct vfio_info_cap caps = { .buf = NULL, .size = 0 };706706+ int i, ret;707707+708708+ minsz = offsetofend(struct vfio_region_info, offset);709709+710710+ if (copy_from_user(&info, (void __user *)arg, minsz))711711+ return -EFAULT;712712+713713+ if (info.argsz < minsz)714714+ return -EINVAL;715715+716716+ switch (info.index) {717717+ case VFIO_PCI_CONFIG_REGION_INDEX:718718+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);719719+ info.size = pdev->cfg_size;720720+ info.flags = VFIO_REGION_INFO_FLAG_READ |721721+ VFIO_REGION_INFO_FLAG_WRITE;722722+ break;723723+ case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:724724+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);725725+ info.size = pci_resource_len(pdev, info.index);726726+ if (!info.size) {727727+ info.flags = 0;728728+ break;729729+ }730730+731731+ info.flags = VFIO_REGION_INFO_FLAG_READ |732732+ VFIO_REGION_INFO_FLAG_WRITE;733733+ if (vdev->bar_mmap_supported[info.index]) {734734+ info.flags |= VFIO_REGION_INFO_FLAG_MMAP;735735+ if (info.index == vdev->msix_bar) {736736+ ret = msix_mmappable_cap(vdev, &caps);737737+ if (ret)738738+ return ret;739739+ }740740+ }741741+742742+ break;743743+ case VFIO_PCI_ROM_REGION_INDEX:744744+ {745745+ void __iomem *io;746746+ size_t size;747747+ u16 cmd;748748+749749+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);750750+ info.flags = 0;751751+752752+ /* Report the BAR size, not the ROM size */753753+ info.size = pci_resource_len(pdev, info.index);754754+ if (!info.size) {755755+ /* Shadow ROMs appear as PCI option ROMs */756756+ if (pdev->resource[PCI_ROM_RESOURCE].flags &757757+ IORESOURCE_ROM_SHADOW)758758+ info.size = 0x20000;759759+ else760760+ break;761761+ }762762+763763+ /*764764+ * Is it really there? Enable memory decode for765765+ * implicit access in pci_map_rom().766766+ */767767+ cmd = vfio_pci_memory_lock_and_enable(vdev);768768+ io = pci_map_rom(pdev, &size);769769+ if (io) {770770+ info.flags = VFIO_REGION_INFO_FLAG_READ;771771+ pci_unmap_rom(pdev, io);772772+ } else {773773+ info.size = 0;774774+ }775775+ vfio_pci_memory_unlock_and_restore(vdev, cmd);776776+777777+ break;778778+ }779779+ case VFIO_PCI_VGA_REGION_INDEX:780780+ if (!vdev->has_vga)781781+ return -EINVAL;782782+783783+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);784784+ info.size = 0xc0000;785785+ info.flags = VFIO_REGION_INFO_FLAG_READ |786786+ VFIO_REGION_INFO_FLAG_WRITE;787787+788788+ break;789789+ default:790790+ {791791+ struct vfio_region_info_cap_type cap_type = {792792+ .header.id = VFIO_REGION_INFO_CAP_TYPE,793793+ .header.version = 1 };794794+795795+ if (info.index >=796796+ VFIO_PCI_NUM_REGIONS + vdev->num_regions)797797+ return -EINVAL;798798+ info.index = array_index_nospec(info.index,799799+ VFIO_PCI_NUM_REGIONS +800800+ vdev->num_regions);801801+802802+ i = info.index - VFIO_PCI_NUM_REGIONS;803803+804804+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);805805+ info.size = vdev->region[i].size;806806+ info.flags = vdev->region[i].flags;807807+808808+ cap_type.type = vdev->region[i].type;809809+ cap_type.subtype = vdev->region[i].subtype;810810+811811+ ret = vfio_info_add_capability(&caps, &cap_type.header,812812+ sizeof(cap_type));813813+ if (ret)814814+ return ret;815815+816816+ if (vdev->region[i].ops->add_capability) {817817+ ret = vdev->region[i].ops->add_capability(vdev,818818+ &vdev->region[i], &caps);819819+ if (ret)820820+ return ret;821821+ }822822+ }823823+ }824824+825825+ if (caps.size) {826826+ info.flags |= VFIO_REGION_INFO_FLAG_CAPS;827827+ if (info.argsz < sizeof(info) + caps.size) {828828+ info.argsz = sizeof(info) + caps.size;829829+ info.cap_offset = 0;830830+ } else {831831+ vfio_info_cap_shift(&caps, sizeof(info));832832+ if (copy_to_user((void __user *)arg +833833+ sizeof(info), caps.buf,834834+ caps.size)) {835835+ kfree(caps.buf);836836+ return -EFAULT;837837+ }838838+ info.cap_offset = sizeof(info);839839+ }840840+841841+ kfree(caps.buf);842842+ }843843+844844+ return copy_to_user((void __user *)arg, &info, minsz) ?845845+ -EFAULT : 0;846846+847847+ } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {848848+ struct vfio_irq_info info;849849+850850+ minsz = offsetofend(struct vfio_irq_info, count);851851+852852+ if (copy_from_user(&info, (void __user *)arg, minsz))853853+ return -EFAULT;854854+855855+ if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)856856+ return -EINVAL;857857+858858+ switch (info.index) {859859+ case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:860860+ case VFIO_PCI_REQ_IRQ_INDEX:861861+ break;862862+ case VFIO_PCI_ERR_IRQ_INDEX:863863+ if (pci_is_pcie(vdev->pdev))864864+ break;865865+ fallthrough;866866+ default:867867+ return -EINVAL;868868+ }869869+870870+ info.flags = VFIO_IRQ_INFO_EVENTFD;871871+872872+ info.count = vfio_pci_get_irq_count(vdev, info.index);873873+874874+ if (info.index == VFIO_PCI_INTX_IRQ_INDEX)875875+ info.flags |= (VFIO_IRQ_INFO_MASKABLE |876876+ VFIO_IRQ_INFO_AUTOMASKED);877877+ else878878+ info.flags |= VFIO_IRQ_INFO_NORESIZE;879879+880880+ return copy_to_user((void __user *)arg, &info, minsz) ?881881+ -EFAULT : 0;882882+883883+ } else if (cmd == VFIO_DEVICE_SET_IRQS) {884884+ struct vfio_irq_set hdr;885885+ u8 *data = NULL;886886+ int max, ret = 0;887887+ size_t data_size = 0;888888+889889+ minsz = offsetofend(struct vfio_irq_set, count);890890+891891+ if (copy_from_user(&hdr, (void __user *)arg, minsz))892892+ return -EFAULT;893893+894894+ max = vfio_pci_get_irq_count(vdev, hdr.index);895895+896896+ ret = vfio_set_irqs_validate_and_prepare(&hdr, max,897897+ VFIO_PCI_NUM_IRQS, &data_size);898898+ if (ret)899899+ return ret;900900+901901+ if (data_size) {902902+ data = memdup_user((void __user *)(arg + minsz),903903+ data_size);904904+ if (IS_ERR(data))905905+ return PTR_ERR(data);906906+ }907907+908908+ mutex_lock(&vdev->igate);909909+910910+ ret = vfio_pci_set_irqs_ioctl(vdev, hdr.flags, hdr.index,911911+ hdr.start, hdr.count, data);912912+913913+ mutex_unlock(&vdev->igate);914914+ kfree(data);915915+916916+ return ret;917917+918918+ } else if (cmd == VFIO_DEVICE_RESET) {919919+ int ret;920920+921921+ if (!vdev->reset_works)922922+ return -EINVAL;923923+924924+ vfio_pci_zap_and_down_write_memory_lock(vdev);925925+ ret = pci_try_reset_function(vdev->pdev);926926+ up_write(&vdev->memory_lock);927927+928928+ return ret;929929+930930+ } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) {931931+ struct vfio_pci_hot_reset_info hdr;932932+ struct vfio_pci_fill_info fill = { 0 };933933+ struct vfio_pci_dependent_device *devices = NULL;934934+ bool slot = false;935935+ int ret = 0;936936+937937+ minsz = offsetofend(struct vfio_pci_hot_reset_info, count);938938+939939+ if (copy_from_user(&hdr, (void __user *)arg, minsz))940940+ return -EFAULT;941941+942942+ if (hdr.argsz < minsz)943943+ return -EINVAL;944944+945945+ hdr.flags = 0;946946+947947+ /* Can we do a slot or bus reset or neither? */948948+ if (!pci_probe_reset_slot(vdev->pdev->slot))949949+ slot = true;950950+ else if (pci_probe_reset_bus(vdev->pdev->bus))951951+ return -ENODEV;952952+953953+ /* How many devices are affected? */954954+ ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,955955+ vfio_pci_count_devs,956956+ &fill.max, slot);957957+ if (ret)958958+ return ret;959959+960960+ WARN_ON(!fill.max); /* Should always be at least one */961961+962962+ /*963963+ * If there's enough space, fill it now, otherwise return964964+ * -ENOSPC and the number of devices affected.965965+ */966966+ if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {967967+ ret = -ENOSPC;968968+ hdr.count = fill.max;969969+ goto reset_info_exit;970970+ }971971+972972+ devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL);973973+ if (!devices)974974+ return -ENOMEM;975975+976976+ fill.devices = devices;977977+978978+ ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,979979+ vfio_pci_fill_devs,980980+ &fill, slot);981981+982982+ /*983983+ * If a device was removed between counting and filling,984984+ * we may come up short of fill.max. If a device was985985+ * added, we'll have a return of -EAGAIN above.986986+ */987987+ if (!ret)988988+ hdr.count = fill.cur;989989+990990+reset_info_exit:991991+ if (copy_to_user((void __user *)arg, &hdr, minsz))992992+ ret = -EFAULT;993993+994994+ if (!ret) {995995+ if (copy_to_user((void __user *)(arg + minsz), devices,996996+ hdr.count * sizeof(*devices)))997997+ ret = -EFAULT;998998+ }999999+10001000+ kfree(devices);10011001+ return ret;10021002+10031003+ } else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {10041004+ struct vfio_pci_hot_reset hdr;10051005+ int32_t *group_fds;10061006+ struct vfio_group **groups;10071007+ struct vfio_pci_group_info info;10081008+ bool slot = false;10091009+ int group_idx, count = 0, ret = 0;10101010+10111011+ minsz = offsetofend(struct vfio_pci_hot_reset, count);10121012+10131013+ if (copy_from_user(&hdr, (void __user *)arg, minsz))10141014+ return -EFAULT;10151015+10161016+ if (hdr.argsz < minsz || hdr.flags)10171017+ return -EINVAL;10181018+10191019+ /* Can we do a slot or bus reset or neither? */10201020+ if (!pci_probe_reset_slot(vdev->pdev->slot))10211021+ slot = true;10221022+ else if (pci_probe_reset_bus(vdev->pdev->bus))10231023+ return -ENODEV;10241024+10251025+ /*10261026+ * We can't let userspace give us an arbitrarily large10271027+ * buffer to copy, so verify how many we think there10281028+ * could be. Note groups can have multiple devices so10291029+ * one group per device is the max.10301030+ */10311031+ ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,10321032+ vfio_pci_count_devs,10331033+ &count, slot);10341034+ if (ret)10351035+ return ret;10361036+10371037+ /* Somewhere between 1 and count is OK */10381038+ if (!hdr.count || hdr.count > count)10391039+ return -EINVAL;10401040+10411041+ group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);10421042+ groups = kcalloc(hdr.count, sizeof(*groups), GFP_KERNEL);10431043+ if (!group_fds || !groups) {10441044+ kfree(group_fds);10451045+ kfree(groups);10461046+ return -ENOMEM;10471047+ }10481048+10491049+ if (copy_from_user(group_fds, (void __user *)(arg + minsz),10501050+ hdr.count * sizeof(*group_fds))) {10511051+ kfree(group_fds);10521052+ kfree(groups);10531053+ return -EFAULT;10541054+ }10551055+10561056+ /*10571057+ * For each group_fd, get the group through the vfio external10581058+ * user interface and store the group and iommu ID. This10591059+ * ensures the group is held across the reset.10601060+ */10611061+ for (group_idx = 0; group_idx < hdr.count; group_idx++) {10621062+ struct vfio_group *group;10631063+ struct fd f = fdget(group_fds[group_idx]);10641064+ if (!f.file) {10651065+ ret = -EBADF;10661066+ break;10671067+ }10681068+10691069+ group = vfio_group_get_external_user(f.file);10701070+ fdput(f);10711071+ if (IS_ERR(group)) {10721072+ ret = PTR_ERR(group);10731073+ break;10741074+ }10751075+10761076+ groups[group_idx] = group;10771077+ }10781078+10791079+ kfree(group_fds);10801080+10811081+ /* release reference to groups on error */10821082+ if (ret)10831083+ goto hot_reset_release;10841084+10851085+ info.count = hdr.count;10861086+ info.groups = groups;10871087+10881088+ ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);10891089+10901090+hot_reset_release:10911091+ for (group_idx--; group_idx >= 0; group_idx--)10921092+ vfio_group_put_external_user(groups[group_idx]);10931093+10941094+ kfree(groups);10951095+ return ret;10961096+ } else if (cmd == VFIO_DEVICE_IOEVENTFD) {10971097+ struct vfio_device_ioeventfd ioeventfd;10981098+ int count;10991099+11001100+ minsz = offsetofend(struct vfio_device_ioeventfd, fd);11011101+11021102+ if (copy_from_user(&ioeventfd, (void __user *)arg, minsz))11031103+ return -EFAULT;11041104+11051105+ if (ioeventfd.argsz < minsz)11061106+ return -EINVAL;11071107+11081108+ if (ioeventfd.flags & ~VFIO_DEVICE_IOEVENTFD_SIZE_MASK)11091109+ return -EINVAL;11101110+11111111+ count = ioeventfd.flags & VFIO_DEVICE_IOEVENTFD_SIZE_MASK;11121112+11131113+ if (hweight8(count) != 1 || ioeventfd.fd < -1)11141114+ return -EINVAL;11151115+11161116+ return vfio_pci_ioeventfd(vdev, ioeventfd.offset,11171117+ ioeventfd.data, count, ioeventfd.fd);11181118+ } else if (cmd == VFIO_DEVICE_FEATURE) {11191119+ struct vfio_device_feature feature;11201120+ uuid_t uuid;11211121+11221122+ minsz = offsetofend(struct vfio_device_feature, flags);11231123+11241124+ if (copy_from_user(&feature, (void __user *)arg, minsz))11251125+ return -EFAULT;11261126+11271127+ if (feature.argsz < minsz)11281128+ return -EINVAL;11291129+11301130+ /* Check unknown flags */11311131+ if (feature.flags & ~(VFIO_DEVICE_FEATURE_MASK |11321132+ VFIO_DEVICE_FEATURE_SET |11331133+ VFIO_DEVICE_FEATURE_GET |11341134+ VFIO_DEVICE_FEATURE_PROBE))11351135+ return -EINVAL;11361136+11371137+ /* GET & SET are mutually exclusive except with PROBE */11381138+ if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&11391139+ (feature.flags & VFIO_DEVICE_FEATURE_SET) &&11401140+ (feature.flags & VFIO_DEVICE_FEATURE_GET))11411141+ return -EINVAL;11421142+11431143+ switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {11441144+ case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:11451145+ if (!vdev->vf_token)11461146+ return -ENOTTY;11471147+11481148+ /*11491149+ * We do not support GET of the VF Token UUID as this11501150+ * could expose the token of the previous device user.11511151+ */11521152+ if (feature.flags & VFIO_DEVICE_FEATURE_GET)11531153+ return -EINVAL;11541154+11551155+ if (feature.flags & VFIO_DEVICE_FEATURE_PROBE)11561156+ return 0;11571157+11581158+ /* Don't SET unless told to do so */11591159+ if (!(feature.flags & VFIO_DEVICE_FEATURE_SET))11601160+ return -EINVAL;11611161+11621162+ if (feature.argsz < minsz + sizeof(uuid))11631163+ return -EINVAL;11641164+11651165+ if (copy_from_user(&uuid, (void __user *)(arg + minsz),11661166+ sizeof(uuid)))11671167+ return -EFAULT;11681168+11691169+ mutex_lock(&vdev->vf_token->lock);11701170+ uuid_copy(&vdev->vf_token->uuid, &uuid);11711171+ mutex_unlock(&vdev->vf_token->lock);11721172+11731173+ return 0;11741174+ default:11751175+ return -ENOTTY;11761176+ }11771177+ }11781178+11791179+ return -ENOTTY;11801180+}11811181+EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl);11821182+11831183+static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf,11841184+ size_t count, loff_t *ppos, bool iswrite)11851185+{11861186+ unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);11871187+11881188+ if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)11891189+ return -EINVAL;11901190+11911191+ switch (index) {11921192+ case VFIO_PCI_CONFIG_REGION_INDEX:11931193+ return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite);11941194+11951195+ case VFIO_PCI_ROM_REGION_INDEX:11961196+ if (iswrite)11971197+ return -EINVAL;11981198+ return vfio_pci_bar_rw(vdev, buf, count, ppos, false);11991199+12001200+ case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:12011201+ return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite);12021202+12031203+ case VFIO_PCI_VGA_REGION_INDEX:12041204+ return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);12051205+ default:12061206+ index -= VFIO_PCI_NUM_REGIONS;12071207+ return vdev->region[index].ops->rw(vdev, buf,12081208+ count, ppos, iswrite);12091209+ }12101210+12111211+ return -EINVAL;12121212+}12131213+12141214+ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,12151215+ size_t count, loff_t *ppos)12161216+{12171217+ struct vfio_pci_core_device *vdev =12181218+ container_of(core_vdev, struct vfio_pci_core_device, vdev);12191219+12201220+ if (!count)12211221+ return 0;12221222+12231223+ return vfio_pci_rw(vdev, buf, count, ppos, false);12241224+}12251225+EXPORT_SYMBOL_GPL(vfio_pci_core_read);12261226+12271227+ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,12281228+ size_t count, loff_t *ppos)12291229+{12301230+ struct vfio_pci_core_device *vdev =12311231+ container_of(core_vdev, struct vfio_pci_core_device, vdev);12321232+12331233+ if (!count)12341234+ return 0;12351235+12361236+ return vfio_pci_rw(vdev, (char __user *)buf, count, ppos, true);12371237+}12381238+EXPORT_SYMBOL_GPL(vfio_pci_core_write);12391239+12401240+/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */12411241+static int vfio_pci_zap_and_vma_lock(struct vfio_pci_core_device *vdev, bool try)12421242+{12431243+ struct vfio_pci_mmap_vma *mmap_vma, *tmp;12441244+12451245+ /*12461246+ * Lock ordering:12471247+ * vma_lock is nested under mmap_lock for vm_ops callback paths.12481248+ * The memory_lock semaphore is used by both code paths calling12491249+ * into this function to zap vmas and the vm_ops.fault callback12501250+ * to protect the memory enable state of the device.12511251+ *12521252+ * When zapping vmas we need to maintain the mmap_lock => vma_lock12531253+ * ordering, which requires using vma_lock to walk vma_list to12541254+ * acquire an mm, then dropping vma_lock to get the mmap_lock and12551255+ * reacquiring vma_lock. This logic is derived from similar12561256+ * requirements in uverbs_user_mmap_disassociate().12571257+ *12581258+ * mmap_lock must always be the top-level lock when it is taken.12591259+ * Therefore we can only hold the memory_lock write lock when12601260+ * vma_list is empty, as we'd need to take mmap_lock to clear12611261+ * entries. vma_list can only be guaranteed empty when holding12621262+ * vma_lock, thus memory_lock is nested under vma_lock.12631263+ *12641264+ * This enables the vm_ops.fault callback to acquire vma_lock,12651265+ * followed by memory_lock read lock, while already holding12661266+ * mmap_lock without risk of deadlock.12671267+ */12681268+ while (1) {12691269+ struct mm_struct *mm = NULL;12701270+12711271+ if (try) {12721272+ if (!mutex_trylock(&vdev->vma_lock))12731273+ return 0;12741274+ } else {12751275+ mutex_lock(&vdev->vma_lock);12761276+ }12771277+ while (!list_empty(&vdev->vma_list)) {12781278+ mmap_vma = list_first_entry(&vdev->vma_list,12791279+ struct vfio_pci_mmap_vma,12801280+ vma_next);12811281+ mm = mmap_vma->vma->vm_mm;12821282+ if (mmget_not_zero(mm))12831283+ break;12841284+12851285+ list_del(&mmap_vma->vma_next);12861286+ kfree(mmap_vma);12871287+ mm = NULL;12881288+ }12891289+ if (!mm)12901290+ return 1;12911291+ mutex_unlock(&vdev->vma_lock);12921292+12931293+ if (try) {12941294+ if (!mmap_read_trylock(mm)) {12951295+ mmput(mm);12961296+ return 0;12971297+ }12981298+ } else {12991299+ mmap_read_lock(mm);13001300+ }13011301+ if (try) {13021302+ if (!mutex_trylock(&vdev->vma_lock)) {13031303+ mmap_read_unlock(mm);13041304+ mmput(mm);13051305+ return 0;13061306+ }13071307+ } else {13081308+ mutex_lock(&vdev->vma_lock);13091309+ }13101310+ list_for_each_entry_safe(mmap_vma, tmp,13111311+ &vdev->vma_list, vma_next) {13121312+ struct vm_area_struct *vma = mmap_vma->vma;13131313+13141314+ if (vma->vm_mm != mm)13151315+ continue;13161316+13171317+ list_del(&mmap_vma->vma_next);13181318+ kfree(mmap_vma);13191319+13201320+ zap_vma_ptes(vma, vma->vm_start,13211321+ vma->vm_end - vma->vm_start);13221322+ }13231323+ mutex_unlock(&vdev->vma_lock);13241324+ mmap_read_unlock(mm);13251325+ mmput(mm);13261326+ }13271327+}13281328+13291329+void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev)13301330+{13311331+ vfio_pci_zap_and_vma_lock(vdev, false);13321332+ down_write(&vdev->memory_lock);13331333+ mutex_unlock(&vdev->vma_lock);13341334+}13351335+13361336+u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev)13371337+{13381338+ u16 cmd;13391339+13401340+ down_write(&vdev->memory_lock);13411341+ pci_read_config_word(vdev->pdev, PCI_COMMAND, &cmd);13421342+ if (!(cmd & PCI_COMMAND_MEMORY))13431343+ pci_write_config_word(vdev->pdev, PCI_COMMAND,13441344+ cmd | PCI_COMMAND_MEMORY);13451345+13461346+ return cmd;13471347+}13481348+13491349+void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, u16 cmd)13501350+{13511351+ pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd);13521352+ up_write(&vdev->memory_lock);13531353+}13541354+13551355+/* Caller holds vma_lock */13561356+static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev,13571357+ struct vm_area_struct *vma)13581358+{13591359+ struct vfio_pci_mmap_vma *mmap_vma;13601360+13611361+ mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL);13621362+ if (!mmap_vma)13631363+ return -ENOMEM;13641364+13651365+ mmap_vma->vma = vma;13661366+ list_add(&mmap_vma->vma_next, &vdev->vma_list);13671367+13681368+ return 0;13691369+}13701370+13711371+/*13721372+ * Zap mmaps on open so that we can fault them in on access and therefore13731373+ * our vma_list only tracks mappings accessed since last zap.13741374+ */13751375+static void vfio_pci_mmap_open(struct vm_area_struct *vma)13761376+{13771377+ zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);13781378+}13791379+13801380+static void vfio_pci_mmap_close(struct vm_area_struct *vma)13811381+{13821382+ struct vfio_pci_core_device *vdev = vma->vm_private_data;13831383+ struct vfio_pci_mmap_vma *mmap_vma;13841384+13851385+ mutex_lock(&vdev->vma_lock);13861386+ list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {13871387+ if (mmap_vma->vma == vma) {13881388+ list_del(&mmap_vma->vma_next);13891389+ kfree(mmap_vma);13901390+ break;13911391+ }13921392+ }13931393+ mutex_unlock(&vdev->vma_lock);13941394+}13951395+13961396+static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)13971397+{13981398+ struct vm_area_struct *vma = vmf->vma;13991399+ struct vfio_pci_core_device *vdev = vma->vm_private_data;14001400+ struct vfio_pci_mmap_vma *mmap_vma;14011401+ vm_fault_t ret = VM_FAULT_NOPAGE;14021402+14031403+ mutex_lock(&vdev->vma_lock);14041404+ down_read(&vdev->memory_lock);14051405+14061406+ if (!__vfio_pci_memory_enabled(vdev)) {14071407+ ret = VM_FAULT_SIGBUS;14081408+ goto up_out;14091409+ }14101410+14111411+ /*14121412+ * We populate the whole vma on fault, so we need to test whether14131413+ * the vma has already been mapped, such as for concurrent faults14141414+ * to the same vma. io_remap_pfn_range() will trigger a BUG_ON if14151415+ * we ask it to fill the same range again.14161416+ */14171417+ list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {14181418+ if (mmap_vma->vma == vma)14191419+ goto up_out;14201420+ }14211421+14221422+ if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,14231423+ vma->vm_end - vma->vm_start,14241424+ vma->vm_page_prot)) {14251425+ ret = VM_FAULT_SIGBUS;14261426+ zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);14271427+ goto up_out;14281428+ }14291429+14301430+ if (__vfio_pci_add_vma(vdev, vma)) {14311431+ ret = VM_FAULT_OOM;14321432+ zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);14331433+ }14341434+14351435+up_out:14361436+ up_read(&vdev->memory_lock);14371437+ mutex_unlock(&vdev->vma_lock);14381438+ return ret;14391439+}14401440+14411441+static const struct vm_operations_struct vfio_pci_mmap_ops = {14421442+ .open = vfio_pci_mmap_open,14431443+ .close = vfio_pci_mmap_close,14441444+ .fault = vfio_pci_mmap_fault,14451445+};14461446+14471447+int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)14481448+{14491449+ struct vfio_pci_core_device *vdev =14501450+ container_of(core_vdev, struct vfio_pci_core_device, vdev);14511451+ struct pci_dev *pdev = vdev->pdev;14521452+ unsigned int index;14531453+ u64 phys_len, req_len, pgoff, req_start;14541454+ int ret;14551455+14561456+ index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);14571457+14581458+ if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)14591459+ return -EINVAL;14601460+ if (vma->vm_end < vma->vm_start)14611461+ return -EINVAL;14621462+ if ((vma->vm_flags & VM_SHARED) == 0)14631463+ return -EINVAL;14641464+ if (index >= VFIO_PCI_NUM_REGIONS) {14651465+ int regnum = index - VFIO_PCI_NUM_REGIONS;14661466+ struct vfio_pci_region *region = vdev->region + regnum;14671467+14681468+ if (region->ops && region->ops->mmap &&14691469+ (region->flags & VFIO_REGION_INFO_FLAG_MMAP))14701470+ return region->ops->mmap(vdev, region, vma);14711471+ return -EINVAL;14721472+ }14731473+ if (index >= VFIO_PCI_ROM_REGION_INDEX)14741474+ return -EINVAL;14751475+ if (!vdev->bar_mmap_supported[index])14761476+ return -EINVAL;14771477+14781478+ phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));14791479+ req_len = vma->vm_end - vma->vm_start;14801480+ pgoff = vma->vm_pgoff &14811481+ ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);14821482+ req_start = pgoff << PAGE_SHIFT;14831483+14841484+ if (req_start + req_len > phys_len)14851485+ return -EINVAL;14861486+14871487+ /*14881488+ * Even though we don't make use of the barmap for the mmap,14891489+ * we need to request the region and the barmap tracks that.14901490+ */14911491+ if (!vdev->barmap[index]) {14921492+ ret = pci_request_selected_regions(pdev,14931493+ 1 << index, "vfio-pci");14941494+ if (ret)14951495+ return ret;14961496+14971497+ vdev->barmap[index] = pci_iomap(pdev, index, 0);14981498+ if (!vdev->barmap[index]) {14991499+ pci_release_selected_regions(pdev, 1 << index);15001500+ return -ENOMEM;15011501+ }15021502+ }15031503+15041504+ vma->vm_private_data = vdev;15051505+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);15061506+ vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;15071507+15081508+ /*15091509+ * See remap_pfn_range(), called from vfio_pci_fault() but we can't15101510+ * change vm_flags within the fault handler. Set them now.15111511+ */15121512+ vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;15131513+ vma->vm_ops = &vfio_pci_mmap_ops;15141514+15151515+ return 0;15161516+}15171517+EXPORT_SYMBOL_GPL(vfio_pci_core_mmap);15181518+15191519+void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count)15201520+{15211521+ struct vfio_pci_core_device *vdev =15221522+ container_of(core_vdev, struct vfio_pci_core_device, vdev);15231523+ struct pci_dev *pdev = vdev->pdev;15241524+15251525+ mutex_lock(&vdev->igate);15261526+15271527+ if (vdev->req_trigger) {15281528+ if (!(count % 10))15291529+ pci_notice_ratelimited(pdev,15301530+ "Relaying device request to user (#%u)\n",15311531+ count);15321532+ eventfd_signal(vdev->req_trigger, 1);15331533+ } else if (count == 0) {15341534+ pci_warn(pdev,15351535+ "No device request channel registered, blocked until released by user\n");15361536+ }15371537+15381538+ mutex_unlock(&vdev->igate);15391539+}15401540+EXPORT_SYMBOL_GPL(vfio_pci_core_request);15411541+15421542+static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev,15431543+ bool vf_token, uuid_t *uuid)15441544+{15451545+ /*15461546+ * There's always some degree of trust or collaboration between SR-IOV15471547+ * PF and VFs, even if just that the PF hosts the SR-IOV capability and15481548+ * can disrupt VFs with a reset, but often the PF has more explicit15491549+ * access to deny service to the VF or access data passed through the15501550+ * VF. We therefore require an opt-in via a shared VF token (UUID) to15511551+ * represent this trust. This both prevents that a VF driver might15521552+ * assume the PF driver is a trusted, in-kernel driver, and also that15531553+ * a PF driver might be replaced with a rogue driver, unknown to in-use15541554+ * VF drivers.15551555+ *15561556+ * Therefore when presented with a VF, if the PF is a vfio device and15571557+ * it is bound to the vfio-pci driver, the user needs to provide a VF15581558+ * token to access the device, in the form of appending a vf_token to15591559+ * the device name, for example:15601560+ *15611561+ * "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3"15621562+ *15631563+ * When presented with a PF which has VFs in use, the user must also15641564+ * provide the current VF token to prove collaboration with existing15651565+ * VF users. If VFs are not in use, the VF token provided for the PF15661566+ * device will act to set the VF token.15671567+ *15681568+ * If the VF token is provided but unused, an error is generated.15691569+ */15701570+ if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token)15711571+ return 0; /* No VF token provided or required */15721572+15731573+ if (vdev->pdev->is_virtfn) {15741574+ struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev);15751575+ bool match;15761576+15771577+ if (!pf_vdev) {15781578+ if (!vf_token)15791579+ return 0; /* PF is not vfio-pci, no VF token */15801580+15811581+ pci_info_ratelimited(vdev->pdev,15821582+ "VF token incorrectly provided, PF not bound to vfio-pci\n");15831583+ return -EINVAL;15841584+ }15851585+15861586+ if (!vf_token) {15871587+ vfio_device_put(&pf_vdev->vdev);15881588+ pci_info_ratelimited(vdev->pdev,15891589+ "VF token required to access device\n");15901590+ return -EACCES;15911591+ }15921592+15931593+ mutex_lock(&pf_vdev->vf_token->lock);15941594+ match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);15951595+ mutex_unlock(&pf_vdev->vf_token->lock);15961596+15971597+ vfio_device_put(&pf_vdev->vdev);15981598+15991599+ if (!match) {16001600+ pci_info_ratelimited(vdev->pdev,16011601+ "Incorrect VF token provided for device\n");16021602+ return -EACCES;16031603+ }16041604+ } else if (vdev->vf_token) {16051605+ mutex_lock(&vdev->vf_token->lock);16061606+ if (vdev->vf_token->users) {16071607+ if (!vf_token) {16081608+ mutex_unlock(&vdev->vf_token->lock);16091609+ pci_info_ratelimited(vdev->pdev,16101610+ "VF token required to access device\n");16111611+ return -EACCES;16121612+ }16131613+16141614+ if (!uuid_equal(uuid, &vdev->vf_token->uuid)) {16151615+ mutex_unlock(&vdev->vf_token->lock);16161616+ pci_info_ratelimited(vdev->pdev,16171617+ "Incorrect VF token provided for device\n");16181618+ return -EACCES;16191619+ }16201620+ } else if (vf_token) {16211621+ uuid_copy(&vdev->vf_token->uuid, uuid);16221622+ }16231623+16241624+ mutex_unlock(&vdev->vf_token->lock);16251625+ } else if (vf_token) {16261626+ pci_info_ratelimited(vdev->pdev,16271627+ "VF token incorrectly provided, not a PF or VF\n");16281628+ return -EINVAL;16291629+ }16301630+16311631+ return 0;16321632+}16331633+16341634+#define VF_TOKEN_ARG "vf_token="16351635+16361636+int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf)16371637+{16381638+ struct vfio_pci_core_device *vdev =16391639+ container_of(core_vdev, struct vfio_pci_core_device, vdev);16401640+ bool vf_token = false;16411641+ uuid_t uuid;16421642+ int ret;16431643+16441644+ if (strncmp(pci_name(vdev->pdev), buf, strlen(pci_name(vdev->pdev))))16451645+ return 0; /* No match */16461646+16471647+ if (strlen(buf) > strlen(pci_name(vdev->pdev))) {16481648+ buf += strlen(pci_name(vdev->pdev));16491649+16501650+ if (*buf != ' ')16511651+ return 0; /* No match: non-whitespace after name */16521652+16531653+ while (*buf) {16541654+ if (*buf == ' ') {16551655+ buf++;16561656+ continue;16571657+ }16581658+16591659+ if (!vf_token && !strncmp(buf, VF_TOKEN_ARG,16601660+ strlen(VF_TOKEN_ARG))) {16611661+ buf += strlen(VF_TOKEN_ARG);16621662+16631663+ if (strlen(buf) < UUID_STRING_LEN)16641664+ return -EINVAL;16651665+16661666+ ret = uuid_parse(buf, &uuid);16671667+ if (ret)16681668+ return ret;16691669+16701670+ vf_token = true;16711671+ buf += UUID_STRING_LEN;16721672+ } else {16731673+ /* Unknown/duplicate option */16741674+ return -EINVAL;16751675+ }16761676+ }16771677+ }16781678+16791679+ ret = vfio_pci_validate_vf_token(vdev, vf_token, &uuid);16801680+ if (ret)16811681+ return ret;16821682+16831683+ return 1; /* Match */16841684+}16851685+EXPORT_SYMBOL_GPL(vfio_pci_core_match);16861686+16871687+static int vfio_pci_bus_notifier(struct notifier_block *nb,16881688+ unsigned long action, void *data)16891689+{16901690+ struct vfio_pci_core_device *vdev = container_of(nb,16911691+ struct vfio_pci_core_device, nb);16921692+ struct device *dev = data;16931693+ struct pci_dev *pdev = to_pci_dev(dev);16941694+ struct pci_dev *physfn = pci_physfn(pdev);16951695+16961696+ if (action == BUS_NOTIFY_ADD_DEVICE &&16971697+ pdev->is_virtfn && physfn == vdev->pdev) {16981698+ pci_info(vdev->pdev, "Captured SR-IOV VF %s driver_override\n",16991699+ pci_name(pdev));17001700+ pdev->driver_override = kasprintf(GFP_KERNEL, "%s",17011701+ vdev->vdev.ops->name);17021702+ } else if (action == BUS_NOTIFY_BOUND_DRIVER &&17031703+ pdev->is_virtfn && physfn == vdev->pdev) {17041704+ struct pci_driver *drv = pci_dev_driver(pdev);17051705+17061706+ if (drv && drv != pci_dev_driver(vdev->pdev))17071707+ pci_warn(vdev->pdev,17081708+ "VF %s bound to driver %s while PF bound to driver %s\n",17091709+ pci_name(pdev), drv->name,17101710+ pci_dev_driver(vdev->pdev)->name);17111711+ }17121712+17131713+ return 0;17141714+}17151715+17161716+static int vfio_pci_vf_init(struct vfio_pci_core_device *vdev)17171717+{17181718+ struct pci_dev *pdev = vdev->pdev;17191719+ int ret;17201720+17211721+ if (!pdev->is_physfn)17221722+ return 0;17231723+17241724+ vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);17251725+ if (!vdev->vf_token)17261726+ return -ENOMEM;17271727+17281728+ mutex_init(&vdev->vf_token->lock);17291729+ uuid_gen(&vdev->vf_token->uuid);17301730+17311731+ vdev->nb.notifier_call = vfio_pci_bus_notifier;17321732+ ret = bus_register_notifier(&pci_bus_type, &vdev->nb);17331733+ if (ret) {17341734+ kfree(vdev->vf_token);17351735+ return ret;17361736+ }17371737+ return 0;17381738+}17391739+17401740+static void vfio_pci_vf_uninit(struct vfio_pci_core_device *vdev)17411741+{17421742+ if (!vdev->vf_token)17431743+ return;17441744+17451745+ bus_unregister_notifier(&pci_bus_type, &vdev->nb);17461746+ WARN_ON(vdev->vf_token->users);17471747+ mutex_destroy(&vdev->vf_token->lock);17481748+ kfree(vdev->vf_token);17491749+}17501750+17511751+static int vfio_pci_vga_init(struct vfio_pci_core_device *vdev)17521752+{17531753+ struct pci_dev *pdev = vdev->pdev;17541754+ int ret;17551755+17561756+ if (!vfio_pci_is_vga(pdev))17571757+ return 0;17581758+17591759+ ret = vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode);17601760+ if (ret)17611761+ return ret;17621762+ vga_set_legacy_decoding(pdev, vfio_pci_set_vga_decode(vdev, false));17631763+ return 0;17641764+}17651765+17661766+static void vfio_pci_vga_uninit(struct vfio_pci_core_device *vdev)17671767+{17681768+ struct pci_dev *pdev = vdev->pdev;17691769+17701770+ if (!vfio_pci_is_vga(pdev))17711771+ return;17721772+ vga_client_register(pdev, NULL, NULL, NULL);17731773+ vga_set_legacy_decoding(pdev, VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |17741774+ VGA_RSRC_LEGACY_IO |17751775+ VGA_RSRC_LEGACY_MEM);17761776+}17771777+17781778+void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev,17791779+ struct pci_dev *pdev,17801780+ const struct vfio_device_ops *vfio_pci_ops)17811781+{17821782+ vfio_init_group_dev(&vdev->vdev, &pdev->dev, vfio_pci_ops);17831783+ vdev->pdev = pdev;17841784+ vdev->irq_type = VFIO_PCI_NUM_IRQS;17851785+ mutex_init(&vdev->igate);17861786+ spin_lock_init(&vdev->irqlock);17871787+ mutex_init(&vdev->ioeventfds_lock);17881788+ INIT_LIST_HEAD(&vdev->dummy_resources_list);17891789+ INIT_LIST_HEAD(&vdev->ioeventfds_list);17901790+ mutex_init(&vdev->vma_lock);17911791+ INIT_LIST_HEAD(&vdev->vma_list);17921792+ init_rwsem(&vdev->memory_lock);17931793+}17941794+EXPORT_SYMBOL_GPL(vfio_pci_core_init_device);17951795+17961796+void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev)17971797+{17981798+ mutex_destroy(&vdev->igate);17991799+ mutex_destroy(&vdev->ioeventfds_lock);18001800+ mutex_destroy(&vdev->vma_lock);18011801+ vfio_uninit_group_dev(&vdev->vdev);18021802+ kfree(vdev->region);18031803+ kfree(vdev->pm_save);18041804+}18051805+EXPORT_SYMBOL_GPL(vfio_pci_core_uninit_device);18061806+18071807+int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)18081808+{18091809+ struct pci_dev *pdev = vdev->pdev;18101810+ struct iommu_group *group;18111811+ int ret;18121812+18131813+ if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)18141814+ return -EINVAL;18151815+18161816+ /*18171817+ * Prevent binding to PFs with VFs enabled, the VFs might be in use18181818+ * by the host or other users. We cannot capture the VFs if they18191819+ * already exist, nor can we track VF users. Disabling SR-IOV here18201820+ * would initiate removing the VFs, which would unbind the driver,18211821+ * which is prone to blocking if that VF is also in use by vfio-pci.18221822+ * Just reject these PFs and let the user sort it out.18231823+ */18241824+ if (pci_num_vf(pdev)) {18251825+ pci_warn(pdev, "Cannot bind to PF with SR-IOV enabled\n");18261826+ return -EBUSY;18271827+ }18281828+18291829+ group = vfio_iommu_group_get(&pdev->dev);18301830+ if (!group)18311831+ return -EINVAL;18321832+18331833+ if (pci_is_root_bus(pdev->bus)) {18341834+ ret = vfio_assign_device_set(&vdev->vdev, vdev);18351835+ } else if (!pci_probe_reset_slot(pdev->slot)) {18361836+ ret = vfio_assign_device_set(&vdev->vdev, pdev->slot);18371837+ } else {18381838+ /*18391839+ * If there is no slot reset support for this device, the whole18401840+ * bus needs to be grouped together to support bus-wide resets.18411841+ */18421842+ ret = vfio_assign_device_set(&vdev->vdev, pdev->bus);18431843+ }18441844+18451845+ if (ret)18461846+ goto out_group_put;18471847+ ret = vfio_pci_vf_init(vdev);18481848+ if (ret)18491849+ goto out_group_put;18501850+ ret = vfio_pci_vga_init(vdev);18511851+ if (ret)18521852+ goto out_vf;18531853+18541854+ vfio_pci_probe_power_state(vdev);18551855+18561856+ if (!disable_idle_d3) {18571857+ /*18581858+ * pci-core sets the device power state to an unknown value at18591859+ * bootup and after being removed from a driver. The only18601860+ * transition it allows from this unknown state is to D0, which18611861+ * typically happens when a driver calls pci_enable_device().18621862+ * We're not ready to enable the device yet, but we do want to18631863+ * be able to get to D3. Therefore first do a D0 transition18641864+ * before going to D3.18651865+ */18661866+ vfio_pci_set_power_state(vdev, PCI_D0);18671867+ vfio_pci_set_power_state(vdev, PCI_D3hot);18681868+ }18691869+18701870+ ret = vfio_register_group_dev(&vdev->vdev);18711871+ if (ret)18721872+ goto out_power;18731873+ return 0;18741874+18751875+out_power:18761876+ if (!disable_idle_d3)18771877+ vfio_pci_set_power_state(vdev, PCI_D0);18781878+out_vf:18791879+ vfio_pci_vf_uninit(vdev);18801880+out_group_put:18811881+ vfio_iommu_group_put(group, &pdev->dev);18821882+ return ret;18831883+}18841884+EXPORT_SYMBOL_GPL(vfio_pci_core_register_device);18851885+18861886+void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev)18871887+{18881888+ struct pci_dev *pdev = vdev->pdev;18891889+18901890+ pci_disable_sriov(pdev);18911891+18921892+ vfio_unregister_group_dev(&vdev->vdev);18931893+18941894+ vfio_pci_vf_uninit(vdev);18951895+ vfio_pci_vga_uninit(vdev);18961896+18971897+ vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);18981898+18991899+ if (!disable_idle_d3)19001900+ vfio_pci_set_power_state(vdev, PCI_D0);19011901+}19021902+EXPORT_SYMBOL_GPL(vfio_pci_core_unregister_device);19031903+19041904+static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,19051905+ pci_channel_state_t state)19061906+{19071907+ struct vfio_pci_core_device *vdev;19081908+ struct vfio_device *device;19091909+19101910+ device = vfio_device_get_from_dev(&pdev->dev);19111911+ if (device == NULL)19121912+ return PCI_ERS_RESULT_DISCONNECT;19131913+19141914+ vdev = container_of(device, struct vfio_pci_core_device, vdev);19151915+19161916+ mutex_lock(&vdev->igate);19171917+19181918+ if (vdev->err_trigger)19191919+ eventfd_signal(vdev->err_trigger, 1);19201920+19211921+ mutex_unlock(&vdev->igate);19221922+19231923+ vfio_device_put(device);19241924+19251925+ return PCI_ERS_RESULT_CAN_RECOVER;19261926+}19271927+19281928+int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn)19291929+{19301930+ struct vfio_device *device;19311931+ int ret = 0;19321932+19331933+ device = vfio_device_get_from_dev(&pdev->dev);19341934+ if (!device)19351935+ return -ENODEV;19361936+19371937+ if (nr_virtfn == 0)19381938+ pci_disable_sriov(pdev);19391939+ else19401940+ ret = pci_enable_sriov(pdev, nr_virtfn);19411941+19421942+ vfio_device_put(device);19431943+19441944+ return ret < 0 ? ret : nr_virtfn;19451945+}19461946+EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure);19471947+19481948+const struct pci_error_handlers vfio_pci_core_err_handlers = {19491949+ .error_detected = vfio_pci_aer_err_detected,19501950+};19511951+EXPORT_SYMBOL_GPL(vfio_pci_core_err_handlers);19521952+19531953+static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev,19541954+ struct vfio_pci_group_info *groups)19551955+{19561956+ unsigned int i;19571957+19581958+ for (i = 0; i < groups->count; i++)19591959+ if (groups->groups[i] == vdev->vdev.group)19601960+ return true;19611961+ return false;19621962+}19631963+19641964+static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data)19651965+{19661966+ struct vfio_device_set *dev_set = data;19671967+ struct vfio_device *cur;19681968+19691969+ list_for_each_entry(cur, &dev_set->device_list, dev_set_list)19701970+ if (cur->dev == &pdev->dev)19711971+ return 0;19721972+ return -EBUSY;19731973+}19741974+19751975+/*19761976+ * vfio-core considers a group to be viable and will create a vfio_device even19771977+ * if some devices are bound to drivers like pci-stub or pcieport. Here we19781978+ * require all PCI devices to be inside our dev_set since that ensures they stay19791979+ * put and that every driver controlling the device can co-ordinate with the19801980+ * device reset.19811981+ *19821982+ * Returns the pci_dev to pass to pci_reset_bus() if every PCI device to be19831983+ * reset is inside the dev_set, and pci_reset_bus() can succeed. NULL otherwise.19841984+ */19851985+static struct pci_dev *19861986+vfio_pci_dev_set_resettable(struct vfio_device_set *dev_set)19871987+{19881988+ struct pci_dev *pdev;19891989+19901990+ lockdep_assert_held(&dev_set->lock);19911991+19921992+ /*19931993+ * By definition all PCI devices in the dev_set share the same PCI19941994+ * reset, so any pci_dev will have the same outcomes for19951995+ * pci_probe_reset_*() and pci_reset_bus().19961996+ */19971997+ pdev = list_first_entry(&dev_set->device_list,19981998+ struct vfio_pci_core_device,19991999+ vdev.dev_set_list)->pdev;20002000+20012001+ /* pci_reset_bus() is supported */20022002+ if (pci_probe_reset_slot(pdev->slot) && pci_probe_reset_bus(pdev->bus))20032003+ return NULL;20042004+20052005+ if (vfio_pci_for_each_slot_or_bus(pdev, vfio_pci_is_device_in_set,20062006+ dev_set,20072007+ !pci_probe_reset_slot(pdev->slot)))20082008+ return NULL;20092009+ return pdev;20102010+}20112011+20122012+/*20132013+ * We need to get memory_lock for each device, but devices can share mmap_lock,20142014+ * therefore we need to zap and hold the vma_lock for each device, and only then20152015+ * get each memory_lock.20162016+ */20172017+static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,20182018+ struct vfio_pci_group_info *groups)20192019+{20202020+ struct vfio_pci_core_device *cur_mem;20212021+ struct vfio_pci_core_device *cur_vma;20222022+ struct vfio_pci_core_device *cur;20232023+ struct pci_dev *pdev;20242024+ bool is_mem = true;20252025+ int ret;20262026+20272027+ mutex_lock(&dev_set->lock);20282028+ cur_mem = list_first_entry(&dev_set->device_list,20292029+ struct vfio_pci_core_device,20302030+ vdev.dev_set_list);20312031+20322032+ pdev = vfio_pci_dev_set_resettable(dev_set);20332033+ if (!pdev) {20342034+ ret = -EINVAL;20352035+ goto err_unlock;20362036+ }20372037+20382038+ list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {20392039+ /*20402040+ * Test whether all the affected devices are contained by the20412041+ * set of groups provided by the user.20422042+ */20432043+ if (!vfio_dev_in_groups(cur_vma, groups)) {20442044+ ret = -EINVAL;20452045+ goto err_undo;20462046+ }20472047+20482048+ /*20492049+ * Locking multiple devices is prone to deadlock, runaway and20502050+ * unwind if we hit contention.20512051+ */20522052+ if (!vfio_pci_zap_and_vma_lock(cur_vma, true)) {20532053+ ret = -EBUSY;20542054+ goto err_undo;20552055+ }20562056+ }20572057+ cur_vma = NULL;20582058+20592059+ list_for_each_entry(cur_mem, &dev_set->device_list, vdev.dev_set_list) {20602060+ if (!down_write_trylock(&cur_mem->memory_lock)) {20612061+ ret = -EBUSY;20622062+ goto err_undo;20632063+ }20642064+ mutex_unlock(&cur_mem->vma_lock);20652065+ }20662066+ cur_mem = NULL;20672067+20682068+ ret = pci_reset_bus(pdev);20692069+20702070+err_undo:20712071+ list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {20722072+ if (cur == cur_mem)20732073+ is_mem = false;20742074+ if (cur == cur_vma)20752075+ break;20762076+ if (is_mem)20772077+ up_write(&cur->memory_lock);20782078+ else20792079+ mutex_unlock(&cur->vma_lock);20802080+ }20812081+err_unlock:20822082+ mutex_unlock(&dev_set->lock);20832083+ return ret;20842084+}20852085+20862086+static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set)20872087+{20882088+ struct vfio_pci_core_device *cur;20892089+ bool needs_reset = false;20902090+20912091+ list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {20922092+ /* No VFIO device in the set can have an open device FD */20932093+ if (cur->vdev.open_count)20942094+ return false;20952095+ needs_reset |= cur->needs_reset;20962096+ }20972097+ return needs_reset;20982098+}20992099+21002100+/*21012101+ * If a bus or slot reset is available for the provided dev_set and:21022102+ * - All of the devices affected by that bus or slot reset are unused21032103+ * - At least one of the affected devices is marked dirty via21042104+ * needs_reset (such as by lack of FLR support)21052105+ * Then attempt to perform that bus or slot reset.21062106+ * Returns true if the dev_set was reset.21072107+ */21082108+static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set)21092109+{21102110+ struct vfio_pci_core_device *cur;21112111+ struct pci_dev *pdev;21122112+ int ret;21132113+21142114+ if (!vfio_pci_dev_set_needs_reset(dev_set))21152115+ return false;21162116+21172117+ pdev = vfio_pci_dev_set_resettable(dev_set);21182118+ if (!pdev)21192119+ return false;21202120+21212121+ ret = pci_reset_bus(pdev);21222122+ if (ret)21232123+ return false;21242124+21252125+ list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {21262126+ cur->needs_reset = false;21272127+ if (!disable_idle_d3)21282128+ vfio_pci_set_power_state(cur, PCI_D3hot);21292129+ }21302130+ return true;21312131+}21322132+21332133+void vfio_pci_core_set_params(bool is_nointxmask, bool is_disable_vga,21342134+ bool is_disable_idle_d3)21352135+{21362136+ nointxmask = is_nointxmask;21372137+ disable_vga = is_disable_vga;21382138+ disable_idle_d3 = is_disable_idle_d3;21392139+}21402140+EXPORT_SYMBOL_GPL(vfio_pci_core_set_params);21412141+21422142+static void vfio_pci_core_cleanup(void)21432143+{21442144+ vfio_pci_uninit_perm_bits();21452145+}21462146+21472147+static int __init vfio_pci_core_init(void)21482148+{21492149+ /* Allocate shared config space permission data used by all devices */21502150+ return vfio_pci_init_perm_bits();21512151+}21522152+21532153+module_init(vfio_pci_core_init);21542154+module_exit(vfio_pci_core_cleanup);21552155+21562156+MODULE_LICENSE("GPL v2");21572157+MODULE_AUTHOR(DRIVER_AUTHOR);21582158+MODULE_DESCRIPTION(DRIVER_DESC);
···1717#include <linux/vfio.h>1818#include <linux/vgaarb.h>19192020-#include "vfio_pci_private.h"2020+#include <linux/vfio_pci_core.h>21212222#ifdef __LITTLE_ENDIAN2323#define vfio_ioread64 ioread64···3838#define vfio_iowrite8 iowrite839394040#define VFIO_IOWRITE(size) \4141-static int vfio_pci_iowrite##size(struct vfio_pci_device *vdev, \4141+static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev, \4242 bool test_mem, u##size val, void __iomem *io) \4343{ \4444 if (test_mem) { \···6565#endif66666767#define VFIO_IOREAD(size) \6868-static int vfio_pci_ioread##size(struct vfio_pci_device *vdev, \6868+static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev, \6969 bool test_mem, u##size *val, void __iomem *io) \7070{ \7171 if (test_mem) { \···9494 * reads with -1. This is intended for handling MSI-X vector tables and9595 * leftover space for ROM BARs.9696 */9797-static ssize_t do_io_rw(struct vfio_pci_device *vdev, bool test_mem,9797+static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,9898 void __iomem *io, char __user *buf,9999 loff_t off, size_t count, size_t x_start,100100 size_t x_end, bool iswrite)···200200 return done;201201}202202203203-static int vfio_pci_setup_barmap(struct vfio_pci_device *vdev, int bar)203203+static int vfio_pci_setup_barmap(struct vfio_pci_core_device *vdev, int bar)204204{205205 struct pci_dev *pdev = vdev->pdev;206206 int ret;···224224 return 0;225225}226226227227-ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,227227+ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,228228 size_t count, loff_t *ppos, bool iswrite)229229{230230 struct pci_dev *pdev = vdev->pdev;···288288 return done;289289}290290291291-ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,291291+ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,292292 size_t count, loff_t *ppos, bool iswrite)293293{294294 int ret;···384384static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)385385{386386 struct vfio_pci_ioeventfd *ioeventfd = opaque;387387- struct vfio_pci_device *vdev = ioeventfd->vdev;387387+ struct vfio_pci_core_device *vdev = ioeventfd->vdev;388388389389 if (ioeventfd->test_mem) {390390 if (!down_read_trylock(&vdev->memory_lock))···410410 vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);411411}412412413413-long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,413413+long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,414414 uint64_t data, int count, int fd)415415{416416 struct pci_dev *pdev = vdev->pdev;
+2-2
drivers/vfio/pci/vfio_pci_zdev.c
···1414#include <asm/pci_clp.h>1515#include <asm/pci_io.h>16161717-#include "vfio_pci_private.h"1717+#include <linux/vfio_pci_core.h>18181919/*2020 * Add the Base PCI Function information to the device info region.···109109/*110110 * Add all supported capabilities to the VFIO_DEVICE_GET_INFO capability chain.111111 */112112-int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,112112+int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,113113 struct vfio_info_cap *caps)114114{115115 struct zpci_dev *zdev = to_zpci(vdev->pdev);
+4-2
drivers/vfio/platform/Kconfig
···11# SPDX-License-Identifier: GPL-2.0-only22config VFIO_PLATFORM33 tristate "VFIO support for platform devices"44- depends on VFIO && EVENTFD && (ARM || ARM64 || COMPILE_TEST)44+ depends on ARM || ARM64 || COMPILE_TEST55 select VFIO_VIRQFD66 help77 Support for platform devices with VFIO. This is required to make···10101111 If you don't know what to do here, say N.12121313+if VFIO_PLATFORM1314config VFIO_AMBA1415 tristate "VFIO support for AMBA devices"1515- depends on VFIO_PLATFORM && (ARM_AMBA || COMPILE_TEST)1616+ depends on ARM_AMBA || COMPILE_TEST1617 help1718 Support for ARM AMBA devices with VFIO. This is required to make1819 use of ARM AMBA devices present on the system using the VFIO···2221 If you don't know what to do here, say N.23222423source "drivers/vfio/platform/reset/Kconfig"2424+endif
+1-3
drivers/vfio/platform/reset/Kconfig
···11# SPDX-License-Identifier: GPL-2.0-only22config VFIO_PLATFORM_CALXEDAXGMAC_RESET33 tristate "VFIO support for calxeda xgmac reset"44- depends on VFIO_PLATFORM54 help65 Enables the VFIO platform driver to handle reset for Calxeda xgmac76···89910config VFIO_PLATFORM_AMDXGBE_RESET1011 tristate "VFIO support for AMD XGBE reset"1111- depends on VFIO_PLATFORM1212 help1313 Enables the VFIO platform driver to handle reset for AMD XGBE1414···15171618config VFIO_PLATFORM_BCMFLEXRM_RESET1719 tristate "VFIO support for Broadcom FlexRM reset"1818- depends on VFIO_PLATFORM && (ARCH_BCM_IPROC || COMPILE_TEST)2020+ depends on ARCH_BCM_IPROC || COMPILE_TEST1921 default ARCH_BCM_IPROC2022 help2123 Enables the VFIO platform driver to handle reset for Broadcom FlexRM
+4-4
drivers/vfio/vfio_iommu_type1.c
···612612static int vfio_find_dma_valid(struct vfio_iommu *iommu, dma_addr_t start,613613 size_t size, struct vfio_dma **dma_p)614614{615615- int ret;615615+ int ret = 0;616616617617 do {618618 *dma_p = vfio_find_dma(iommu, start, size);619619 if (!*dma_p)620620- ret = -EINVAL;620620+ return -EINVAL;621621 else if (!(*dma_p)->vaddr_invalid)622622- ret = 0;622622+ return ret;623623 else624624 ret = vfio_wait(iommu);625625- } while (ret > 0);625625+ } while (ret == WAITED);626626627627 return ret;628628}
+6
include/linux/mod_devicetable.h
···16161717#define PCI_ANY_ID (~0)18181919+enum {2020+ PCI_ID_F_VFIO_DRIVER_OVERRIDE = 1,2121+};2222+1923/**2024 * struct pci_device_id - PCI device ID structure2125 * @vendor: Vendor ID to match (or PCI_ANY_ID)···3834 * Best practice is to use driver_data as an index3935 * into a static list of equivalent device types,4036 * instead of using it as a pointer.3737+ * @override_only: Match only when dev->driver_override is this driver.4138 */4239struct pci_device_id {4340 __u32 vendor, device; /* Vendor and device ID or PCI_ANY_ID*/4441 __u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */4542 __u32 class, class_mask; /* (class,subclass,prog-if) triplet */4643 kernel_ulong_t driver_data; /* Data private to the driver */4444+ __u32 override_only;4745};48464947
+29
include/linux/pci.h
···902902 .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID903903904904/**905905+ * PCI_DEVICE_DRIVER_OVERRIDE - macro used to describe a PCI device with906906+ * override_only flags.907907+ * @vend: the 16 bit PCI Vendor ID908908+ * @dev: the 16 bit PCI Device ID909909+ * @driver_override: the 32 bit PCI Device override_only910910+ *911911+ * This macro is used to create a struct pci_device_id that matches only a912912+ * driver_override device. The subvendor and subdevice fields will be set to913913+ * PCI_ANY_ID.914914+ */915915+#define PCI_DEVICE_DRIVER_OVERRIDE(vend, dev, driver_override) \916916+ .vendor = (vend), .device = (dev), .subvendor = PCI_ANY_ID, \917917+ .subdevice = PCI_ANY_ID, .override_only = (driver_override)918918+919919+/**920920+ * PCI_DRIVER_OVERRIDE_DEVICE_VFIO - macro used to describe a VFIO921921+ * "driver_override" PCI device.922922+ * @vend: the 16 bit PCI Vendor ID923923+ * @dev: the 16 bit PCI Device ID924924+ *925925+ * This macro is used to create a struct pci_device_id that matches a926926+ * specific device. The subvendor and subdevice fields will be set to927927+ * PCI_ANY_ID and the driver_override will be set to928928+ * PCI_ID_F_VFIO_DRIVER_OVERRIDE.929929+ */930930+#define PCI_DRIVER_OVERRIDE_DEVICE_VFIO(vend, dev) \931931+ PCI_DEVICE_DRIVER_OVERRIDE(vend, dev, PCI_ID_F_VFIO_DRIVER_OVERRIDE)932932+933933+/**905934 * PCI_DEVICE_SUB - macro used to describe a specific PCI device with subsystem906935 * @vend: the 16 bit PCI Vendor ID907936 * @dev: the 16 bit PCI Device ID