Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'vfio-v5.10-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

- New fsl-mc vfio bus driver supporting userspace drivers of objects
within NXP's DPAA2 architecture (Diana Craciun)

- Support for exposing zPCI information on s390 (Matthew Rosato)

- Fixes for "detached" VFs on s390 (Matthew Rosato)

- Fixes for pin-pages and dma-rw accesses (Yan Zhao)

- Cleanups and optimize vconfig regen (Zenghui Yu)

- Fix duplicate irq-bypass token registration (Alex Williamson)

* tag 'vfio-v5.10-rc1' of git://github.com/awilliam/linux-vfio: (30 commits)
vfio iommu type1: Fix memory leak in vfio_iommu_type1_pin_pages
vfio/pci: Clear token on bypass registration failure
vfio/fsl-mc: fix the return of the uninitialized variable ret
vfio/fsl-mc: Fix the dead code in vfio_fsl_mc_set_irq_trigger
vfio/fsl-mc: Fixed vfio-fsl-mc driver compilation on 32 bit
MAINTAINERS: Add entry for s390 vfio-pci
vfio-pci/zdev: Add zPCI capabilities to VFIO_DEVICE_GET_INFO
vfio/fsl-mc: Add support for device reset
vfio/fsl-mc: Add read/write support for fsl-mc devices
vfio/fsl-mc: trigger an interrupt via eventfd
vfio/fsl-mc: Add irq infrastructure for fsl-mc devices
vfio/fsl-mc: Added lock support in preparation for interrupt handling
vfio/fsl-mc: Allow userspace to MMAP fsl-mc device MMIO regions
vfio/fsl-mc: Implement VFIO_DEVICE_GET_REGION_INFO ioctl call
vfio/fsl-mc: Implement VFIO_DEVICE_GET_INFO ioctl
vfio/fsl-mc: Scan DPRC objects on vfio-fsl-mc driver bind
vfio: Introduce capability definitions for VFIO_DEVICE_GET_INFO
s390/pci: track whether util_str is valid in the zpci_dev
s390/pci: stash version in the zpci_dev
vfio/fsl-mc: Add VFIO framework skeleton for fsl-mc devices
...

+1330 -20
+14
MAINTAINERS
··· 15287 15287 F: drivers/s390/cio/vfio_ccw* 15288 15288 F: include/uapi/linux/vfio_ccw.h 15289 15289 15290 + S390 VFIO-PCI DRIVER 15291 + M: Matthew Rosato <mjrosato@linux.ibm.com> 15292 + L: linux-s390@vger.kernel.org 15293 + L: kvm@vger.kernel.org 15294 + S: Supported 15295 + F: drivers/vfio/pci/vfio_pci_zdev.c 15296 + F: include/uapi/linux/vfio_zdev.h 15297 + 15290 15298 S390 ZCRYPT DRIVER 15291 15299 M: Harald Freudenberger <freude@linux.ibm.com> 15292 15300 L: linux-s390@vger.kernel.org ··· 18392 18384 F: drivers/vfio/ 18393 18385 F: include/linux/vfio.h 18394 18386 F: include/uapi/linux/vfio.h 18387 + 18388 + VFIO FSL-MC DRIVER 18389 + M: Diana Craciun <diana.craciun@oss.nxp.com> 18390 + L: kvm@vger.kernel.org 18391 + S: Maintained 18392 + F: drivers/vfio/fsl-mc/ 18395 18393 18396 18394 VFIO MEDIATED DEVICE DRIVERS 18397 18395 M: Kirti Wankhede <kwankhede@nvidia.com>
+3 -1
arch/s390/include/asm/pci.h
··· 132 132 u8 rid_available : 1; 133 133 u8 has_hp_slot : 1; 134 134 u8 is_physfn : 1; 135 - u8 reserved : 5; 135 + u8 util_str_avail : 1; 136 + u8 reserved : 4; 136 137 unsigned int devfn; /* DEVFN part of the RID*/ 137 138 138 139 struct mutex lock; ··· 180 179 atomic64_t mapped_pages; 181 180 atomic64_t unmapped_pages; 182 181 182 + u8 version; 183 183 enum pci_bus_speed max_bus_speed; 184 184 185 185 struct dentry *debugfs_dev;
+3 -2
arch/s390/pci/pci_bus.c
··· 135 135 * With pdev->no_vf_scan the common PCI probing code does not 136 136 * perform PF/VF linking. 137 137 */ 138 - if (zdev->vfn) 138 + if (zdev->vfn) { 139 139 zpci_iov_setup_virtfn(zdev->zbus, pdev, zdev->vfn); 140 - 140 + pdev->no_command_memory = 1; 141 + } 141 142 } 142 143 143 144 static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
+2
arch/s390/pci/pci_clp.c
··· 102 102 zdev->msi_addr = response->msia; 103 103 zdev->max_msi = response->noi; 104 104 zdev->fmb_update = response->mui; 105 + zdev->version = response->version; 105 106 106 107 switch (response->version) { 107 108 case 1: ··· 168 167 if (response->util_str_avail) { 169 168 memcpy(zdev->util_str, response->util_str, 170 169 sizeof(zdev->util_str)); 170 + zdev->util_str_avail = 1; 171 171 } 172 172 zdev->mio_capable = response->mio_addr_avail; 173 173 for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+1
drivers/pci/iov.c
··· 180 180 virtfn->device = iov->vf_device; 181 181 virtfn->is_virtfn = 1; 182 182 virtfn->physfn = pci_dev_get(dev); 183 + virtfn->no_command_memory = 1; 183 184 184 185 if (id == 0) 185 186 pci_read_vf_config_common(virtfn);
+1
drivers/vfio/Kconfig
··· 47 47 source "drivers/vfio/pci/Kconfig" 48 48 source "drivers/vfio/platform/Kconfig" 49 49 source "drivers/vfio/mdev/Kconfig" 50 + source "drivers/vfio/fsl-mc/Kconfig" 50 51 source "virt/lib/Kconfig"
+1
drivers/vfio/Makefile
··· 9 9 obj-$(CONFIG_VFIO_PCI) += pci/ 10 10 obj-$(CONFIG_VFIO_PLATFORM) += platform/ 11 11 obj-$(CONFIG_VFIO_MDEV) += mdev/ 12 + obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/
+9
drivers/vfio/fsl-mc/Kconfig
··· 1 + config VFIO_FSL_MC 2 + tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices" 3 + depends on VFIO && FSL_MC_BUS && EVENTFD 4 + help 5 + Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc 6 + (Management Complex) devices. This is required to passthrough 7 + fsl-mc bus devices using the VFIO framework. 8 + 9 + If you don't know what to do here, say N.
+4
drivers/vfio/fsl-mc/Makefile
··· 1 + # SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) 2 + 3 + vfio-fsl-mc-y := vfio_fsl_mc.o vfio_fsl_mc_intr.o 4 + obj-$(CONFIG_VFIO_FSL_MC) += vfio-fsl-mc.o
+683
drivers/vfio/fsl-mc/vfio_fsl_mc.c
··· 1 + // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) 2 + /* 3 + * Copyright 2013-2016 Freescale Semiconductor Inc. 4 + * Copyright 2016-2017,2019-2020 NXP 5 + */ 6 + 7 + #include <linux/device.h> 8 + #include <linux/iommu.h> 9 + #include <linux/module.h> 10 + #include <linux/mutex.h> 11 + #include <linux/slab.h> 12 + #include <linux/types.h> 13 + #include <linux/vfio.h> 14 + #include <linux/fsl/mc.h> 15 + #include <linux/delay.h> 16 + #include <linux/io-64-nonatomic-hi-lo.h> 17 + 18 + #include "vfio_fsl_mc_private.h" 19 + 20 + static struct fsl_mc_driver vfio_fsl_mc_driver; 21 + 22 + static DEFINE_MUTEX(reflck_lock); 23 + 24 + static void vfio_fsl_mc_reflck_get(struct vfio_fsl_mc_reflck *reflck) 25 + { 26 + kref_get(&reflck->kref); 27 + } 28 + 29 + static void vfio_fsl_mc_reflck_release(struct kref *kref) 30 + { 31 + struct vfio_fsl_mc_reflck *reflck = container_of(kref, 32 + struct vfio_fsl_mc_reflck, 33 + kref); 34 + 35 + mutex_destroy(&reflck->lock); 36 + kfree(reflck); 37 + mutex_unlock(&reflck_lock); 38 + } 39 + 40 + static void vfio_fsl_mc_reflck_put(struct vfio_fsl_mc_reflck *reflck) 41 + { 42 + kref_put_mutex(&reflck->kref, vfio_fsl_mc_reflck_release, &reflck_lock); 43 + } 44 + 45 + static struct vfio_fsl_mc_reflck *vfio_fsl_mc_reflck_alloc(void) 46 + { 47 + struct vfio_fsl_mc_reflck *reflck; 48 + 49 + reflck = kzalloc(sizeof(*reflck), GFP_KERNEL); 50 + if (!reflck) 51 + return ERR_PTR(-ENOMEM); 52 + 53 + kref_init(&reflck->kref); 54 + mutex_init(&reflck->lock); 55 + 56 + return reflck; 57 + } 58 + 59 + static int vfio_fsl_mc_reflck_attach(struct vfio_fsl_mc_device *vdev) 60 + { 61 + int ret = 0; 62 + 63 + mutex_lock(&reflck_lock); 64 + if (is_fsl_mc_bus_dprc(vdev->mc_dev)) { 65 + vdev->reflck = vfio_fsl_mc_reflck_alloc(); 66 + ret = PTR_ERR_OR_ZERO(vdev->reflck); 67 + } else { 68 + struct device *mc_cont_dev = vdev->mc_dev->dev.parent; 69 + struct vfio_device *device; 70 + struct vfio_fsl_mc_device *cont_vdev; 71 + 72 + device = vfio_device_get_from_dev(mc_cont_dev); 73 + if (!device) { 74 + ret = -ENODEV; 75 + goto unlock; 76 + } 77 + 78 + cont_vdev = vfio_device_data(device); 79 + if (!cont_vdev || !cont_vdev->reflck) { 80 + vfio_device_put(device); 81 + ret = -ENODEV; 82 + goto unlock; 83 + } 84 + vfio_fsl_mc_reflck_get(cont_vdev->reflck); 85 + vdev->reflck = cont_vdev->reflck; 86 + vfio_device_put(device); 87 + } 88 + 89 + unlock: 90 + mutex_unlock(&reflck_lock); 91 + return ret; 92 + } 93 + 94 + static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev) 95 + { 96 + struct fsl_mc_device *mc_dev = vdev->mc_dev; 97 + int count = mc_dev->obj_desc.region_count; 98 + int i; 99 + 100 + vdev->regions = kcalloc(count, sizeof(struct vfio_fsl_mc_region), 101 + GFP_KERNEL); 102 + if (!vdev->regions) 103 + return -ENOMEM; 104 + 105 + for (i = 0; i < count; i++) { 106 + struct resource *res = &mc_dev->regions[i]; 107 + int no_mmap = is_fsl_mc_bus_dprc(mc_dev); 108 + 109 + vdev->regions[i].addr = res->start; 110 + vdev->regions[i].size = resource_size(res); 111 + vdev->regions[i].type = mc_dev->regions[i].flags & IORESOURCE_BITS; 112 + /* 113 + * Only regions addressed with PAGE granularity may be 114 + * MMAPed securely. 115 + */ 116 + if (!no_mmap && !(vdev->regions[i].addr & ~PAGE_MASK) && 117 + !(vdev->regions[i].size & ~PAGE_MASK)) 118 + vdev->regions[i].flags |= 119 + VFIO_REGION_INFO_FLAG_MMAP; 120 + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ; 121 + if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY)) 122 + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE; 123 + } 124 + 125 + return 0; 126 + } 127 + 128 + static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev) 129 + { 130 + struct fsl_mc_device *mc_dev = vdev->mc_dev; 131 + int i; 132 + 133 + for (i = 0; i < mc_dev->obj_desc.region_count; i++) 134 + iounmap(vdev->regions[i].ioaddr); 135 + kfree(vdev->regions); 136 + } 137 + 138 + static int vfio_fsl_mc_open(void *device_data) 139 + { 140 + struct vfio_fsl_mc_device *vdev = device_data; 141 + int ret; 142 + 143 + if (!try_module_get(THIS_MODULE)) 144 + return -ENODEV; 145 + 146 + mutex_lock(&vdev->reflck->lock); 147 + if (!vdev->refcnt) { 148 + ret = vfio_fsl_mc_regions_init(vdev); 149 + if (ret) 150 + goto err_reg_init; 151 + } 152 + vdev->refcnt++; 153 + 154 + mutex_unlock(&vdev->reflck->lock); 155 + 156 + return 0; 157 + 158 + err_reg_init: 159 + mutex_unlock(&vdev->reflck->lock); 160 + module_put(THIS_MODULE); 161 + return ret; 162 + } 163 + 164 + static void vfio_fsl_mc_release(void *device_data) 165 + { 166 + struct vfio_fsl_mc_device *vdev = device_data; 167 + int ret; 168 + 169 + mutex_lock(&vdev->reflck->lock); 170 + 171 + if (!(--vdev->refcnt)) { 172 + struct fsl_mc_device *mc_dev = vdev->mc_dev; 173 + struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev); 174 + struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev); 175 + 176 + vfio_fsl_mc_regions_cleanup(vdev); 177 + 178 + /* reset the device before cleaning up the interrupts */ 179 + ret = dprc_reset_container(mc_cont->mc_io, 0, 180 + mc_cont->mc_handle, 181 + mc_cont->obj_desc.id, 182 + DPRC_RESET_OPTION_NON_RECURSIVE); 183 + 184 + if (ret) { 185 + dev_warn(&mc_cont->dev, "VFIO_FLS_MC: reset device has failed (%d)\n", 186 + ret); 187 + WARN_ON(1); 188 + } 189 + 190 + vfio_fsl_mc_irqs_cleanup(vdev); 191 + 192 + fsl_mc_cleanup_irq_pool(mc_cont); 193 + } 194 + 195 + mutex_unlock(&vdev->reflck->lock); 196 + 197 + module_put(THIS_MODULE); 198 + } 199 + 200 + static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd, 201 + unsigned long arg) 202 + { 203 + unsigned long minsz; 204 + struct vfio_fsl_mc_device *vdev = device_data; 205 + struct fsl_mc_device *mc_dev = vdev->mc_dev; 206 + 207 + switch (cmd) { 208 + case VFIO_DEVICE_GET_INFO: 209 + { 210 + struct vfio_device_info info; 211 + 212 + minsz = offsetofend(struct vfio_device_info, num_irqs); 213 + 214 + if (copy_from_user(&info, (void __user *)arg, minsz)) 215 + return -EFAULT; 216 + 217 + if (info.argsz < minsz) 218 + return -EINVAL; 219 + 220 + info.flags = VFIO_DEVICE_FLAGS_FSL_MC; 221 + 222 + if (is_fsl_mc_bus_dprc(mc_dev)) 223 + info.flags |= VFIO_DEVICE_FLAGS_RESET; 224 + 225 + info.num_regions = mc_dev->obj_desc.region_count; 226 + info.num_irqs = mc_dev->obj_desc.irq_count; 227 + 228 + return copy_to_user((void __user *)arg, &info, minsz) ? 229 + -EFAULT : 0; 230 + } 231 + case VFIO_DEVICE_GET_REGION_INFO: 232 + { 233 + struct vfio_region_info info; 234 + 235 + minsz = offsetofend(struct vfio_region_info, offset); 236 + 237 + if (copy_from_user(&info, (void __user *)arg, minsz)) 238 + return -EFAULT; 239 + 240 + if (info.argsz < minsz) 241 + return -EINVAL; 242 + 243 + if (info.index >= mc_dev->obj_desc.region_count) 244 + return -EINVAL; 245 + 246 + /* map offset to the physical address */ 247 + info.offset = VFIO_FSL_MC_INDEX_TO_OFFSET(info.index); 248 + info.size = vdev->regions[info.index].size; 249 + info.flags = vdev->regions[info.index].flags; 250 + 251 + return copy_to_user((void __user *)arg, &info, minsz); 252 + } 253 + case VFIO_DEVICE_GET_IRQ_INFO: 254 + { 255 + struct vfio_irq_info info; 256 + 257 + minsz = offsetofend(struct vfio_irq_info, count); 258 + if (copy_from_user(&info, (void __user *)arg, minsz)) 259 + return -EFAULT; 260 + 261 + if (info.argsz < minsz) 262 + return -EINVAL; 263 + 264 + if (info.index >= mc_dev->obj_desc.irq_count) 265 + return -EINVAL; 266 + 267 + info.flags = VFIO_IRQ_INFO_EVENTFD; 268 + info.count = 1; 269 + 270 + return copy_to_user((void __user *)arg, &info, minsz); 271 + } 272 + case VFIO_DEVICE_SET_IRQS: 273 + { 274 + struct vfio_irq_set hdr; 275 + u8 *data = NULL; 276 + int ret = 0; 277 + size_t data_size = 0; 278 + 279 + minsz = offsetofend(struct vfio_irq_set, count); 280 + 281 + if (copy_from_user(&hdr, (void __user *)arg, minsz)) 282 + return -EFAULT; 283 + 284 + ret = vfio_set_irqs_validate_and_prepare(&hdr, mc_dev->obj_desc.irq_count, 285 + mc_dev->obj_desc.irq_count, &data_size); 286 + if (ret) 287 + return ret; 288 + 289 + if (data_size) { 290 + data = memdup_user((void __user *)(arg + minsz), 291 + data_size); 292 + if (IS_ERR(data)) 293 + return PTR_ERR(data); 294 + } 295 + 296 + mutex_lock(&vdev->igate); 297 + ret = vfio_fsl_mc_set_irqs_ioctl(vdev, hdr.flags, 298 + hdr.index, hdr.start, 299 + hdr.count, data); 300 + mutex_unlock(&vdev->igate); 301 + kfree(data); 302 + 303 + return ret; 304 + } 305 + case VFIO_DEVICE_RESET: 306 + { 307 + int ret; 308 + struct fsl_mc_device *mc_dev = vdev->mc_dev; 309 + 310 + /* reset is supported only for the DPRC */ 311 + if (!is_fsl_mc_bus_dprc(mc_dev)) 312 + return -ENOTTY; 313 + 314 + ret = dprc_reset_container(mc_dev->mc_io, 0, 315 + mc_dev->mc_handle, 316 + mc_dev->obj_desc.id, 317 + DPRC_RESET_OPTION_NON_RECURSIVE); 318 + return ret; 319 + 320 + } 321 + default: 322 + return -ENOTTY; 323 + } 324 + } 325 + 326 + static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf, 327 + size_t count, loff_t *ppos) 328 + { 329 + struct vfio_fsl_mc_device *vdev = device_data; 330 + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos); 331 + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK; 332 + struct fsl_mc_device *mc_dev = vdev->mc_dev; 333 + struct vfio_fsl_mc_region *region; 334 + u64 data[8]; 335 + int i; 336 + 337 + if (index >= mc_dev->obj_desc.region_count) 338 + return -EINVAL; 339 + 340 + region = &vdev->regions[index]; 341 + 342 + if (!(region->flags & VFIO_REGION_INFO_FLAG_READ)) 343 + return -EINVAL; 344 + 345 + if (!region->ioaddr) { 346 + region->ioaddr = ioremap(region->addr, region->size); 347 + if (!region->ioaddr) 348 + return -ENOMEM; 349 + } 350 + 351 + if (count != 64 || off != 0) 352 + return -EINVAL; 353 + 354 + for (i = 7; i >= 0; i--) 355 + data[i] = readq(region->ioaddr + i * sizeof(uint64_t)); 356 + 357 + if (copy_to_user(buf, data, 64)) 358 + return -EFAULT; 359 + 360 + return count; 361 + } 362 + 363 + #define MC_CMD_COMPLETION_TIMEOUT_MS 5000 364 + #define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS 500 365 + 366 + static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data) 367 + { 368 + int i; 369 + enum mc_cmd_status status; 370 + unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000; 371 + 372 + /* Write at command parameter into portal */ 373 + for (i = 7; i >= 1; i--) 374 + writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t)); 375 + 376 + /* Write command header in the end */ 377 + writeq(cmd_data[0], ioaddr); 378 + 379 + /* Wait for response before returning to user-space 380 + * This can be optimized in future to even prepare response 381 + * before returning to user-space and avoid read ioctl. 382 + */ 383 + for (;;) { 384 + u64 header; 385 + struct mc_cmd_header *resp_hdr; 386 + 387 + header = cpu_to_le64(readq_relaxed(ioaddr)); 388 + 389 + resp_hdr = (struct mc_cmd_header *)&header; 390 + status = (enum mc_cmd_status)resp_hdr->status; 391 + if (status != MC_CMD_STATUS_READY) 392 + break; 393 + 394 + udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS); 395 + timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS; 396 + if (timeout_usecs == 0) 397 + return -ETIMEDOUT; 398 + } 399 + 400 + return 0; 401 + } 402 + 403 + static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf, 404 + size_t count, loff_t *ppos) 405 + { 406 + struct vfio_fsl_mc_device *vdev = device_data; 407 + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos); 408 + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK; 409 + struct fsl_mc_device *mc_dev = vdev->mc_dev; 410 + struct vfio_fsl_mc_region *region; 411 + u64 data[8]; 412 + int ret; 413 + 414 + if (index >= mc_dev->obj_desc.region_count) 415 + return -EINVAL; 416 + 417 + region = &vdev->regions[index]; 418 + 419 + if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE)) 420 + return -EINVAL; 421 + 422 + if (!region->ioaddr) { 423 + region->ioaddr = ioremap(region->addr, region->size); 424 + if (!region->ioaddr) 425 + return -ENOMEM; 426 + } 427 + 428 + if (count != 64 || off != 0) 429 + return -EINVAL; 430 + 431 + if (copy_from_user(&data, buf, 64)) 432 + return -EFAULT; 433 + 434 + ret = vfio_fsl_mc_send_command(region->ioaddr, data); 435 + if (ret) 436 + return ret; 437 + 438 + return count; 439 + 440 + } 441 + 442 + static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region, 443 + struct vm_area_struct *vma) 444 + { 445 + u64 size = vma->vm_end - vma->vm_start; 446 + u64 pgoff, base; 447 + u8 region_cacheable; 448 + 449 + pgoff = vma->vm_pgoff & 450 + ((1U << (VFIO_FSL_MC_OFFSET_SHIFT - PAGE_SHIFT)) - 1); 451 + base = pgoff << PAGE_SHIFT; 452 + 453 + if (region.size < PAGE_SIZE || base + size > region.size) 454 + return -EINVAL; 455 + 456 + region_cacheable = (region.type & FSL_MC_REGION_CACHEABLE) && 457 + (region.type & FSL_MC_REGION_SHAREABLE); 458 + if (!region_cacheable) 459 + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 460 + 461 + vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff; 462 + 463 + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, 464 + size, vma->vm_page_prot); 465 + } 466 + 467 + static int vfio_fsl_mc_mmap(void *device_data, struct vm_area_struct *vma) 468 + { 469 + struct vfio_fsl_mc_device *vdev = device_data; 470 + struct fsl_mc_device *mc_dev = vdev->mc_dev; 471 + int index; 472 + 473 + index = vma->vm_pgoff >> (VFIO_FSL_MC_OFFSET_SHIFT - PAGE_SHIFT); 474 + 475 + if (vma->vm_end < vma->vm_start) 476 + return -EINVAL; 477 + if (vma->vm_start & ~PAGE_MASK) 478 + return -EINVAL; 479 + if (vma->vm_end & ~PAGE_MASK) 480 + return -EINVAL; 481 + if (!(vma->vm_flags & VM_SHARED)) 482 + return -EINVAL; 483 + if (index >= mc_dev->obj_desc.region_count) 484 + return -EINVAL; 485 + 486 + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP)) 487 + return -EINVAL; 488 + 489 + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ) 490 + && (vma->vm_flags & VM_READ)) 491 + return -EINVAL; 492 + 493 + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE) 494 + && (vma->vm_flags & VM_WRITE)) 495 + return -EINVAL; 496 + 497 + vma->vm_private_data = mc_dev; 498 + 499 + return vfio_fsl_mc_mmap_mmio(vdev->regions[index], vma); 500 + } 501 + 502 + static const struct vfio_device_ops vfio_fsl_mc_ops = { 503 + .name = "vfio-fsl-mc", 504 + .open = vfio_fsl_mc_open, 505 + .release = vfio_fsl_mc_release, 506 + .ioctl = vfio_fsl_mc_ioctl, 507 + .read = vfio_fsl_mc_read, 508 + .write = vfio_fsl_mc_write, 509 + .mmap = vfio_fsl_mc_mmap, 510 + }; 511 + 512 + static int vfio_fsl_mc_bus_notifier(struct notifier_block *nb, 513 + unsigned long action, void *data) 514 + { 515 + struct vfio_fsl_mc_device *vdev = container_of(nb, 516 + struct vfio_fsl_mc_device, nb); 517 + struct device *dev = data; 518 + struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev); 519 + struct fsl_mc_device *mc_cont = to_fsl_mc_device(mc_dev->dev.parent); 520 + 521 + if (action == BUS_NOTIFY_ADD_DEVICE && 522 + vdev->mc_dev == mc_cont) { 523 + mc_dev->driver_override = kasprintf(GFP_KERNEL, "%s", 524 + vfio_fsl_mc_ops.name); 525 + if (!mc_dev->driver_override) 526 + dev_warn(dev, "VFIO_FSL_MC: Setting driver override for device in dprc %s failed\n", 527 + dev_name(&mc_cont->dev)); 528 + else 529 + dev_info(dev, "VFIO_FSL_MC: Setting driver override for device in dprc %s\n", 530 + dev_name(&mc_cont->dev)); 531 + } else if (action == BUS_NOTIFY_BOUND_DRIVER && 532 + vdev->mc_dev == mc_cont) { 533 + struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver); 534 + 535 + if (mc_drv && mc_drv != &vfio_fsl_mc_driver) 536 + dev_warn(dev, "VFIO_FSL_MC: Object %s bound to driver %s while DPRC bound to vfio-fsl-mc\n", 537 + dev_name(dev), mc_drv->driver.name); 538 + } 539 + 540 + return 0; 541 + } 542 + 543 + static int vfio_fsl_mc_init_device(struct vfio_fsl_mc_device *vdev) 544 + { 545 + struct fsl_mc_device *mc_dev = vdev->mc_dev; 546 + int ret; 547 + 548 + /* Non-dprc devices share mc_io from parent */ 549 + if (!is_fsl_mc_bus_dprc(mc_dev)) { 550 + struct fsl_mc_device *mc_cont = to_fsl_mc_device(mc_dev->dev.parent); 551 + 552 + mc_dev->mc_io = mc_cont->mc_io; 553 + return 0; 554 + } 555 + 556 + vdev->nb.notifier_call = vfio_fsl_mc_bus_notifier; 557 + ret = bus_register_notifier(&fsl_mc_bus_type, &vdev->nb); 558 + if (ret) 559 + return ret; 560 + 561 + /* open DPRC, allocate a MC portal */ 562 + ret = dprc_setup(mc_dev); 563 + if (ret) { 564 + dev_err(&mc_dev->dev, "VFIO_FSL_MC: Failed to setup DPRC (%d)\n", ret); 565 + goto out_nc_unreg; 566 + } 567 + 568 + ret = dprc_scan_container(mc_dev, false); 569 + if (ret) { 570 + dev_err(&mc_dev->dev, "VFIO_FSL_MC: Container scanning failed (%d)\n", ret); 571 + goto out_dprc_cleanup; 572 + } 573 + 574 + return 0; 575 + 576 + out_dprc_cleanup: 577 + dprc_remove_devices(mc_dev, NULL, 0); 578 + dprc_cleanup(mc_dev); 579 + out_nc_unreg: 580 + bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb); 581 + vdev->nb.notifier_call = NULL; 582 + 583 + return ret; 584 + } 585 + 586 + static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev) 587 + { 588 + struct iommu_group *group; 589 + struct vfio_fsl_mc_device *vdev; 590 + struct device *dev = &mc_dev->dev; 591 + int ret; 592 + 593 + group = vfio_iommu_group_get(dev); 594 + if (!group) { 595 + dev_err(dev, "VFIO_FSL_MC: No IOMMU group\n"); 596 + return -EINVAL; 597 + } 598 + 599 + vdev = devm_kzalloc(dev, sizeof(*vdev), GFP_KERNEL); 600 + if (!vdev) { 601 + ret = -ENOMEM; 602 + goto out_group_put; 603 + } 604 + 605 + vdev->mc_dev = mc_dev; 606 + 607 + ret = vfio_add_group_dev(dev, &vfio_fsl_mc_ops, vdev); 608 + if (ret) { 609 + dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n"); 610 + goto out_group_put; 611 + } 612 + 613 + ret = vfio_fsl_mc_reflck_attach(vdev); 614 + if (ret) 615 + goto out_group_dev; 616 + 617 + ret = vfio_fsl_mc_init_device(vdev); 618 + if (ret) 619 + goto out_reflck; 620 + 621 + mutex_init(&vdev->igate); 622 + 623 + return 0; 624 + 625 + out_reflck: 626 + vfio_fsl_mc_reflck_put(vdev->reflck); 627 + out_group_dev: 628 + vfio_del_group_dev(dev); 629 + out_group_put: 630 + vfio_iommu_group_put(group, dev); 631 + return ret; 632 + } 633 + 634 + static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev) 635 + { 636 + struct vfio_fsl_mc_device *vdev; 637 + struct device *dev = &mc_dev->dev; 638 + 639 + vdev = vfio_del_group_dev(dev); 640 + if (!vdev) 641 + return -EINVAL; 642 + 643 + mutex_destroy(&vdev->igate); 644 + 645 + vfio_fsl_mc_reflck_put(vdev->reflck); 646 + 647 + if (is_fsl_mc_bus_dprc(mc_dev)) { 648 + dprc_remove_devices(mc_dev, NULL, 0); 649 + dprc_cleanup(mc_dev); 650 + } 651 + 652 + if (vdev->nb.notifier_call) 653 + bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb); 654 + 655 + vfio_iommu_group_put(mc_dev->dev.iommu_group, dev); 656 + 657 + return 0; 658 + } 659 + 660 + static struct fsl_mc_driver vfio_fsl_mc_driver = { 661 + .probe = vfio_fsl_mc_probe, 662 + .remove = vfio_fsl_mc_remove, 663 + .driver = { 664 + .name = "vfio-fsl-mc", 665 + .owner = THIS_MODULE, 666 + }, 667 + }; 668 + 669 + static int __init vfio_fsl_mc_driver_init(void) 670 + { 671 + return fsl_mc_driver_register(&vfio_fsl_mc_driver); 672 + } 673 + 674 + static void __exit vfio_fsl_mc_driver_exit(void) 675 + { 676 + fsl_mc_driver_unregister(&vfio_fsl_mc_driver); 677 + } 678 + 679 + module_init(vfio_fsl_mc_driver_init); 680 + module_exit(vfio_fsl_mc_driver_exit); 681 + 682 + MODULE_LICENSE("Dual BSD/GPL"); 683 + MODULE_DESCRIPTION("VFIO for FSL-MC devices - User Level meta-driver");
+194
drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
··· 1 + // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) 2 + /* 3 + * Copyright 2013-2016 Freescale Semiconductor Inc. 4 + * Copyright 2019 NXP 5 + */ 6 + 7 + #include <linux/vfio.h> 8 + #include <linux/slab.h> 9 + #include <linux/types.h> 10 + #include <linux/eventfd.h> 11 + #include <linux/msi.h> 12 + 13 + #include "linux/fsl/mc.h" 14 + #include "vfio_fsl_mc_private.h" 15 + 16 + int vfio_fsl_mc_irqs_allocate(struct vfio_fsl_mc_device *vdev) 17 + { 18 + struct fsl_mc_device *mc_dev = vdev->mc_dev; 19 + struct vfio_fsl_mc_irq *mc_irq; 20 + int irq_count; 21 + int ret, i; 22 + 23 + /* Device does not support any interrupt */ 24 + if (mc_dev->obj_desc.irq_count == 0) 25 + return 0; 26 + 27 + /* interrupts were already allocated for this device */ 28 + if (vdev->mc_irqs) 29 + return 0; 30 + 31 + irq_count = mc_dev->obj_desc.irq_count; 32 + 33 + mc_irq = kcalloc(irq_count, sizeof(*mc_irq), GFP_KERNEL); 34 + if (!mc_irq) 35 + return -ENOMEM; 36 + 37 + /* Allocate IRQs */ 38 + ret = fsl_mc_allocate_irqs(mc_dev); 39 + if (ret) { 40 + kfree(mc_irq); 41 + return ret; 42 + } 43 + 44 + for (i = 0; i < irq_count; i++) { 45 + mc_irq[i].count = 1; 46 + mc_irq[i].flags = VFIO_IRQ_INFO_EVENTFD; 47 + } 48 + 49 + vdev->mc_irqs = mc_irq; 50 + 51 + return 0; 52 + } 53 + 54 + static irqreturn_t vfio_fsl_mc_irq_handler(int irq_num, void *arg) 55 + { 56 + struct vfio_fsl_mc_irq *mc_irq = (struct vfio_fsl_mc_irq *)arg; 57 + 58 + eventfd_signal(mc_irq->trigger, 1); 59 + return IRQ_HANDLED; 60 + } 61 + 62 + static int vfio_set_trigger(struct vfio_fsl_mc_device *vdev, 63 + int index, int fd) 64 + { 65 + struct vfio_fsl_mc_irq *irq = &vdev->mc_irqs[index]; 66 + struct eventfd_ctx *trigger; 67 + int hwirq; 68 + int ret; 69 + 70 + hwirq = vdev->mc_dev->irqs[index]->msi_desc->irq; 71 + if (irq->trigger) { 72 + free_irq(hwirq, irq); 73 + kfree(irq->name); 74 + eventfd_ctx_put(irq->trigger); 75 + irq->trigger = NULL; 76 + } 77 + 78 + if (fd < 0) /* Disable only */ 79 + return 0; 80 + 81 + irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)", 82 + hwirq, dev_name(&vdev->mc_dev->dev)); 83 + if (!irq->name) 84 + return -ENOMEM; 85 + 86 + trigger = eventfd_ctx_fdget(fd); 87 + if (IS_ERR(trigger)) { 88 + kfree(irq->name); 89 + return PTR_ERR(trigger); 90 + } 91 + 92 + irq->trigger = trigger; 93 + 94 + ret = request_irq(hwirq, vfio_fsl_mc_irq_handler, 0, 95 + irq->name, irq); 96 + if (ret) { 97 + kfree(irq->name); 98 + eventfd_ctx_put(trigger); 99 + irq->trigger = NULL; 100 + return ret; 101 + } 102 + 103 + return 0; 104 + } 105 + 106 + static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev, 107 + unsigned int index, unsigned int start, 108 + unsigned int count, u32 flags, 109 + void *data) 110 + { 111 + struct fsl_mc_device *mc_dev = vdev->mc_dev; 112 + int ret, hwirq; 113 + struct vfio_fsl_mc_irq *irq; 114 + struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev); 115 + struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev); 116 + 117 + if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) 118 + return vfio_set_trigger(vdev, index, -1); 119 + 120 + if (start != 0 || count != 1) 121 + return -EINVAL; 122 + 123 + mutex_lock(&vdev->reflck->lock); 124 + ret = fsl_mc_populate_irq_pool(mc_cont, 125 + FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS); 126 + if (ret) 127 + goto unlock; 128 + 129 + ret = vfio_fsl_mc_irqs_allocate(vdev); 130 + if (ret) 131 + goto unlock; 132 + mutex_unlock(&vdev->reflck->lock); 133 + 134 + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { 135 + s32 fd = *(s32 *)data; 136 + 137 + return vfio_set_trigger(vdev, index, fd); 138 + } 139 + 140 + hwirq = vdev->mc_dev->irqs[index]->msi_desc->irq; 141 + 142 + irq = &vdev->mc_irqs[index]; 143 + 144 + if (flags & VFIO_IRQ_SET_DATA_NONE) { 145 + vfio_fsl_mc_irq_handler(hwirq, irq); 146 + 147 + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { 148 + u8 trigger = *(u8 *)data; 149 + 150 + if (trigger) 151 + vfio_fsl_mc_irq_handler(hwirq, irq); 152 + } 153 + 154 + return 0; 155 + 156 + unlock: 157 + mutex_unlock(&vdev->reflck->lock); 158 + return ret; 159 + 160 + } 161 + 162 + int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev, 163 + u32 flags, unsigned int index, 164 + unsigned int start, unsigned int count, 165 + void *data) 166 + { 167 + if (flags & VFIO_IRQ_SET_ACTION_TRIGGER) 168 + return vfio_fsl_mc_set_irq_trigger(vdev, index, start, 169 + count, flags, data); 170 + else 171 + return -EINVAL; 172 + } 173 + 174 + /* Free All IRQs for the given MC object */ 175 + void vfio_fsl_mc_irqs_cleanup(struct vfio_fsl_mc_device *vdev) 176 + { 177 + struct fsl_mc_device *mc_dev = vdev->mc_dev; 178 + int irq_count = mc_dev->obj_desc.irq_count; 179 + int i; 180 + 181 + /* 182 + * Device does not support any interrupt or the interrupts 183 + * were not configured 184 + */ 185 + if (!vdev->mc_irqs) 186 + return; 187 + 188 + for (i = 0; i < irq_count; i++) 189 + vfio_set_trigger(vdev, i, -1); 190 + 191 + fsl_mc_free_irqs(mc_dev); 192 + kfree(vdev->mc_irqs); 193 + vdev->mc_irqs = NULL; 194 + }
+55
drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
··· 1 + /* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ 2 + /* 3 + * Copyright 2013-2016 Freescale Semiconductor Inc. 4 + * Copyright 2016,2019-2020 NXP 5 + */ 6 + 7 + #ifndef VFIO_FSL_MC_PRIVATE_H 8 + #define VFIO_FSL_MC_PRIVATE_H 9 + 10 + #define VFIO_FSL_MC_OFFSET_SHIFT 40 11 + #define VFIO_FSL_MC_OFFSET_MASK (((u64)(1) << VFIO_FSL_MC_OFFSET_SHIFT) - 1) 12 + 13 + #define VFIO_FSL_MC_OFFSET_TO_INDEX(off) ((off) >> VFIO_FSL_MC_OFFSET_SHIFT) 14 + 15 + #define VFIO_FSL_MC_INDEX_TO_OFFSET(index) \ 16 + ((u64)(index) << VFIO_FSL_MC_OFFSET_SHIFT) 17 + 18 + struct vfio_fsl_mc_irq { 19 + u32 flags; 20 + u32 count; 21 + struct eventfd_ctx *trigger; 22 + char *name; 23 + }; 24 + 25 + struct vfio_fsl_mc_reflck { 26 + struct kref kref; 27 + struct mutex lock; 28 + }; 29 + 30 + struct vfio_fsl_mc_region { 31 + u32 flags; 32 + u32 type; 33 + u64 addr; 34 + resource_size_t size; 35 + void __iomem *ioaddr; 36 + }; 37 + 38 + struct vfio_fsl_mc_device { 39 + struct fsl_mc_device *mc_dev; 40 + struct notifier_block nb; 41 + int refcnt; 42 + struct vfio_fsl_mc_region *regions; 43 + struct vfio_fsl_mc_reflck *reflck; 44 + struct mutex igate; 45 + struct vfio_fsl_mc_irq *mc_irqs; 46 + }; 47 + 48 + extern int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev, 49 + u32 flags, unsigned int index, 50 + unsigned int start, unsigned int count, 51 + void *data); 52 + 53 + void vfio_fsl_mc_irqs_cleanup(struct vfio_fsl_mc_device *vdev); 54 + 55 + #endif /* VFIO_FSL_MC_PRIVATE_H */
+12
drivers/vfio/pci/Kconfig
··· 45 45 depends on VFIO_PCI && PPC_POWERNV 46 46 help 47 47 VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs 48 + 49 + config VFIO_PCI_ZDEV 50 + bool "VFIO PCI ZPCI device CLP support" 51 + depends on VFIO_PCI && S390 52 + default y 53 + help 54 + Enabling this option exposes VFIO capabilities containing hardware 55 + configuration for zPCI devices. This enables userspace (e.g. QEMU) 56 + to supply proper configuration values instead of hard-coded defaults 57 + for zPCI devices passed through via VFIO on s390. 58 + 59 + Say Y here.
+1
drivers/vfio/pci/Makefile
··· 3 3 vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o 4 4 vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o 5 5 vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o 6 + vfio-pci-$(CONFIG_VFIO_PCI_ZDEV) += vfio_pci_zdev.o 6 7 7 8 obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
+37 -1
drivers/vfio/pci/vfio_pci.c
··· 807 807 808 808 if (cmd == VFIO_DEVICE_GET_INFO) { 809 809 struct vfio_device_info info; 810 + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; 811 + unsigned long capsz; 810 812 811 813 minsz = offsetofend(struct vfio_device_info, num_irqs); 814 + 815 + /* For backward compatibility, cannot require this */ 816 + capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset); 812 817 813 818 if (copy_from_user(&info, (void __user *)arg, minsz)) 814 819 return -EFAULT; 815 820 816 821 if (info.argsz < minsz) 817 822 return -EINVAL; 823 + 824 + if (info.argsz >= capsz) { 825 + minsz = capsz; 826 + info.cap_offset = 0; 827 + } 818 828 819 829 info.flags = VFIO_DEVICE_FLAGS_PCI; 820 830 ··· 833 823 834 824 info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions; 835 825 info.num_irqs = VFIO_PCI_NUM_IRQS; 826 + 827 + if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) { 828 + int ret = vfio_pci_info_zdev_add_caps(vdev, &caps); 829 + 830 + if (ret && ret != -ENODEV) { 831 + pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n"); 832 + return ret; 833 + } 834 + } 835 + 836 + if (caps.size) { 837 + info.flags |= VFIO_DEVICE_FLAGS_CAPS; 838 + if (info.argsz < sizeof(info) + caps.size) { 839 + info.argsz = sizeof(info) + caps.size; 840 + } else { 841 + vfio_info_cap_shift(&caps, sizeof(info)); 842 + if (copy_to_user((void __user *)arg + 843 + sizeof(info), caps.buf, 844 + caps.size)) { 845 + kfree(caps.buf); 846 + return -EFAULT; 847 + } 848 + info.cap_offset = sizeof(info); 849 + } 850 + 851 + kfree(caps.buf); 852 + } 836 853 837 854 return copy_to_user((void __user *)arg, &info, minsz) ? 838 855 -EFAULT : 0; ··· 1897 1860 1898 1861 static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev); 1899 1862 static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck); 1900 - static struct pci_driver vfio_pci_driver; 1901 1863 1902 1864 static int vfio_pci_bus_notifier(struct notifier_block *nb, 1903 1865 unsigned long action, void *data)
+17 -10
drivers/vfio/pci/vfio_pci_config.c
··· 406 406 * PF SR-IOV capability, there's therefore no need to trigger 407 407 * faults based on the virtual value. 408 408 */ 409 - return pdev->is_virtfn || (cmd & PCI_COMMAND_MEMORY); 409 + return pdev->no_command_memory || (cmd & PCI_COMMAND_MEMORY); 410 410 } 411 411 412 412 /* ··· 467 467 __le32 *vbar; 468 468 u64 mask; 469 469 470 + if (!vdev->bardirty) 471 + return; 472 + 470 473 vbar = (__le32 *)&vdev->vconfig[PCI_BASE_ADDRESS_0]; 471 474 472 475 for (i = 0; i < PCI_STD_NUM_BARS; i++, vbar++) { ··· 523 520 524 521 count = vfio_default_config_read(vdev, pos, count, perm, offset, val); 525 522 526 - /* Mask in virtual memory enable for SR-IOV devices */ 527 - if (offset == PCI_COMMAND && vdev->pdev->is_virtfn) { 523 + /* Mask in virtual memory enable */ 524 + if (offset == PCI_COMMAND && vdev->pdev->no_command_memory) { 528 525 u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]); 529 526 u32 tmp_val = le32_to_cpu(*val); 530 527 ··· 592 589 * shows it disabled (phys_mem/io, then the device has 593 590 * undergone some kind of backdoor reset and needs to be 594 591 * restored before we allow it to enable the bars. 595 - * SR-IOV devices will trigger this, but we catch them later 592 + * SR-IOV devices will trigger this - for mem enable let's 593 + * catch this now and for io enable it will be caught later 596 594 */ 597 - if ((new_mem && virt_mem && !phys_mem) || 595 + if ((new_mem && virt_mem && !phys_mem && 596 + !pdev->no_command_memory) || 598 597 (new_io && virt_io && !phys_io) || 599 598 vfio_need_bar_restore(vdev)) 600 599 vfio_bar_restore(vdev); ··· 1739 1734 vconfig[PCI_INTERRUPT_PIN]); 1740 1735 1741 1736 vconfig[PCI_INTERRUPT_PIN] = 0; /* Gratuitous for good VFs */ 1742 - 1737 + } 1738 + if (pdev->no_command_memory) { 1743 1739 /* 1744 - * VFs do no implement the memory enable bit of the COMMAND 1745 - * register therefore we'll not have it set in our initial 1746 - * copy of config space after pci_enable_device(). For 1747 - * consistency with PFs, set the virtual enable bit here. 1740 + * VFs and devices that set pdev->no_command_memory do not 1741 + * implement the memory enable bit of the COMMAND register 1742 + * therefore we'll not have it set in our initial copy of 1743 + * config space after pci_enable_device(). For consistency 1744 + * with PFs, set the virtual enable bit here. 1748 1745 */ 1749 1746 *(__le16 *)&vconfig[PCI_COMMAND] |= 1750 1747 cpu_to_le16(PCI_COMMAND_MEMORY);
+3 -1
drivers/vfio/pci/vfio_pci_intrs.c
··· 352 352 vdev->ctx[vector].producer.token = trigger; 353 353 vdev->ctx[vector].producer.irq = irq; 354 354 ret = irq_bypass_register_producer(&vdev->ctx[vector].producer); 355 - if (unlikely(ret)) 355 + if (unlikely(ret)) { 356 356 dev_info(&pdev->dev, 357 357 "irq bypass producer (token %p) registration fails: %d\n", 358 358 vdev->ctx[vector].producer.token, ret); 359 359 360 + vdev->ctx[vector].producer.token = NULL; 361 + } 360 362 vdev->ctx[vector].trigger = trigger; 361 363 362 364 return 0;
+12
drivers/vfio/pci/vfio_pci_private.h
··· 213 213 return -ENODEV; 214 214 } 215 215 #endif 216 + 217 + #ifdef CONFIG_VFIO_PCI_ZDEV 218 + extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, 219 + struct vfio_info_cap *caps); 220 + #else 221 + static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, 222 + struct vfio_info_cap *caps) 223 + { 224 + return -ENODEV; 225 + } 226 + #endif 227 + 216 228 #endif /* VFIO_PCI_PRIVATE_H */
+143
drivers/vfio/pci/vfio_pci_zdev.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* 3 + * VFIO ZPCI devices support 4 + * 5 + * Copyright (C) IBM Corp. 2020. All rights reserved. 6 + * Author(s): Pierre Morel <pmorel@linux.ibm.com> 7 + * Matthew Rosato <mjrosato@linux.ibm.com> 8 + * 9 + * This program is free software; you can redistribute it and/or modify 10 + * it under the terms of the GNU General Public License version 2 as 11 + * published by the Free Software Foundation. 12 + * 13 + */ 14 + #include <linux/io.h> 15 + #include <linux/pci.h> 16 + #include <linux/uaccess.h> 17 + #include <linux/vfio.h> 18 + #include <linux/vfio_zdev.h> 19 + #include <asm/pci_clp.h> 20 + #include <asm/pci_io.h> 21 + 22 + #include "vfio_pci_private.h" 23 + 24 + /* 25 + * Add the Base PCI Function information to the device info region. 26 + */ 27 + static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, 28 + struct vfio_info_cap *caps) 29 + { 30 + struct vfio_device_info_cap_zpci_base cap = { 31 + .header.id = VFIO_DEVICE_INFO_CAP_ZPCI_BASE, 32 + .header.version = 1, 33 + .start_dma = zdev->start_dma, 34 + .end_dma = zdev->end_dma, 35 + .pchid = zdev->pchid, 36 + .vfn = zdev->vfn, 37 + .fmb_length = zdev->fmb_length, 38 + .pft = zdev->pft, 39 + .gid = zdev->pfgid 40 + }; 41 + 42 + return vfio_info_add_capability(caps, &cap.header, sizeof(cap)); 43 + } 44 + 45 + /* 46 + * Add the Base PCI Function Group information to the device info region. 47 + */ 48 + static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, 49 + struct vfio_info_cap *caps) 50 + { 51 + struct vfio_device_info_cap_zpci_group cap = { 52 + .header.id = VFIO_DEVICE_INFO_CAP_ZPCI_GROUP, 53 + .header.version = 1, 54 + .dasm = zdev->dma_mask, 55 + .msi_addr = zdev->msi_addr, 56 + .flags = VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH, 57 + .mui = zdev->fmb_update, 58 + .noi = zdev->max_msi, 59 + .maxstbl = ZPCI_MAX_WRITE_SIZE, 60 + .version = zdev->version 61 + }; 62 + 63 + return vfio_info_add_capability(caps, &cap.header, sizeof(cap)); 64 + } 65 + 66 + /* 67 + * Add the device utility string to the device info region. 68 + */ 69 + static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, 70 + struct vfio_info_cap *caps) 71 + { 72 + struct vfio_device_info_cap_zpci_util *cap; 73 + int cap_size = sizeof(*cap) + CLP_UTIL_STR_LEN; 74 + int ret; 75 + 76 + cap = kmalloc(cap_size, GFP_KERNEL); 77 + 78 + cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_UTIL; 79 + cap->header.version = 1; 80 + cap->size = CLP_UTIL_STR_LEN; 81 + memcpy(cap->util_str, zdev->util_str, cap->size); 82 + 83 + ret = vfio_info_add_capability(caps, &cap->header, cap_size); 84 + 85 + kfree(cap); 86 + 87 + return ret; 88 + } 89 + 90 + /* 91 + * Add the function path string to the device info region. 92 + */ 93 + static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, 94 + struct vfio_info_cap *caps) 95 + { 96 + struct vfio_device_info_cap_zpci_pfip *cap; 97 + int cap_size = sizeof(*cap) + CLP_PFIP_NR_SEGMENTS; 98 + int ret; 99 + 100 + cap = kmalloc(cap_size, GFP_KERNEL); 101 + 102 + cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_PFIP; 103 + cap->header.version = 1; 104 + cap->size = CLP_PFIP_NR_SEGMENTS; 105 + memcpy(cap->pfip, zdev->pfip, cap->size); 106 + 107 + ret = vfio_info_add_capability(caps, &cap->header, cap_size); 108 + 109 + kfree(cap); 110 + 111 + return ret; 112 + } 113 + 114 + /* 115 + * Add all supported capabilities to the VFIO_DEVICE_GET_INFO capability chain. 116 + */ 117 + int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, 118 + struct vfio_info_cap *caps) 119 + { 120 + struct zpci_dev *zdev = to_zpci(vdev->pdev); 121 + int ret; 122 + 123 + if (!zdev) 124 + return -ENODEV; 125 + 126 + ret = zpci_base_cap(zdev, vdev, caps); 127 + if (ret) 128 + return ret; 129 + 130 + ret = zpci_group_cap(zdev, vdev, caps); 131 + if (ret) 132 + return ret; 133 + 134 + if (zdev->util_str_avail) { 135 + ret = zpci_util_cap(zdev, vdev, caps); 136 + if (ret) 137 + return ret; 138 + } 139 + 140 + ret = zpci_pfip_cap(zdev, vdev, caps); 141 + 142 + return ret; 143 + }
+7 -2
drivers/vfio/vfio.c
··· 1949 1949 if (!group) 1950 1950 return -ENODEV; 1951 1951 1952 - if (group->dev_counter > 1) 1953 - return -EINVAL; 1952 + if (group->dev_counter > 1) { 1953 + ret = -EINVAL; 1954 + goto err_pin_pages; 1955 + } 1954 1956 1955 1957 ret = vfio_group_add_container_user(group); 1956 1958 if (ret) ··· 2051 2049 int ret; 2052 2050 2053 2051 if (!group || !user_iova_pfn || !phys_pfn || !npage) 2052 + return -EINVAL; 2053 + 2054 + if (group->dev_counter > 1) 2054 2055 return -EINVAL; 2055 2056 2056 2057 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
+21 -2
drivers/vfio/vfio_iommu_type1.c
··· 693 693 694 694 ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]); 695 695 if (ret) { 696 - vfio_unpin_page_external(dma, iova, do_accounting); 696 + if (put_pfn(phys_pfn[i], dma->prot) && do_accounting) 697 + vfio_lock_acct(dma, -1, true); 697 698 goto pin_unwind; 698 699 } 699 700 ··· 2610 2609 return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig)); 2611 2610 } 2612 2611 2612 + static int vfio_iommu_dma_avail_build_caps(struct vfio_iommu *iommu, 2613 + struct vfio_info_cap *caps) 2614 + { 2615 + struct vfio_iommu_type1_info_dma_avail cap_dma_avail; 2616 + 2617 + cap_dma_avail.header.id = VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL; 2618 + cap_dma_avail.header.version = 1; 2619 + 2620 + cap_dma_avail.avail = iommu->dma_avail; 2621 + 2622 + return vfio_info_add_capability(caps, &cap_dma_avail.header, 2623 + sizeof(cap_dma_avail)); 2624 + } 2625 + 2613 2626 static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu, 2614 2627 unsigned long arg) 2615 2628 { ··· 2655 2640 info.iova_pgsizes = iommu->pgsize_bitmap; 2656 2641 2657 2642 ret = vfio_iommu_migration_build_caps(iommu, &caps); 2643 + 2644 + if (!ret) 2645 + ret = vfio_iommu_dma_avail_build_caps(iommu, &caps); 2658 2646 2659 2647 if (!ret) 2660 2648 ret = vfio_iommu_iova_build_caps(iommu, &caps); ··· 2951 2933 * size 2952 2934 */ 2953 2935 bitmap_set(dma->bitmap, offset >> pgshift, 2954 - *copied >> pgshift); 2936 + ((offset + *copied - 1) >> pgshift) - 2937 + (offset >> pgshift) + 1); 2955 2938 } 2956 2939 } else 2957 2940 *copied = copy_from_user(data, (void __user *)vaddr,
+1
include/linux/pci.h
··· 446 446 unsigned int is_probed:1; /* Device probing in progress */ 447 447 unsigned int link_active_reporting:1;/* Device capable of reporting link active */ 448 448 unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */ 449 + unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */ 449 450 pci_dev_flags_t dev_flags; 450 451 atomic_t enable_cnt; /* pci_enable_device has been called */ 451 452
+28 -1
include/uapi/linux/vfio.h
··· 201 201 #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ 202 202 #define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ 203 203 #define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */ 204 + #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */ 205 + #define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */ 204 206 __u32 num_regions; /* Max region index + 1 */ 205 207 __u32 num_irqs; /* Max IRQ index + 1 */ 208 + __u32 cap_offset; /* Offset within info struct of first cap */ 206 209 }; 207 210 #define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7) 208 211 ··· 220 217 #define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" 221 218 #define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" 222 219 #define VFIO_DEVICE_API_AP_STRING "vfio-ap" 220 + 221 + /* 222 + * The following capabilities are unique to s390 zPCI devices. Their contents 223 + * are further-defined in vfio_zdev.h 224 + */ 225 + #define VFIO_DEVICE_INFO_CAP_ZPCI_BASE 1 226 + #define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP 2 227 + #define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3 228 + #define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4 223 229 224 230 /** 225 231 * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, ··· 474 462 * 5. Resumed 475 463 * |--------->| 476 464 * 477 - * 0. Default state of VFIO device is _RUNNNG when the user application starts. 465 + * 0. Default state of VFIO device is _RUNNING when the user application starts. 478 466 * 1. During normal shutdown of the user application, the user application may 479 467 * optionally change the VFIO device state from _RUNNING to _STOP. This 480 468 * transition is optional. The vendor driver must support this transition but ··· 1049 1037 __u32 flags; 1050 1038 __u64 pgsize_bitmap; 1051 1039 __u64 max_dirty_bitmap_size; /* in bytes */ 1040 + }; 1041 + 1042 + /* 1043 + * The DMA available capability allows to report the current number of 1044 + * simultaneously outstanding DMA mappings that are allowed. 1045 + * 1046 + * The structure below defines version 1 of this capability. 1047 + * 1048 + * avail: specifies the current number of outstanding DMA mappings allowed. 1049 + */ 1050 + #define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3 1051 + 1052 + struct vfio_iommu_type1_info_dma_avail { 1053 + struct vfio_info_cap_header header; 1054 + __u32 avail; 1052 1055 }; 1053 1056 1054 1057 #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
+78
include/uapi/linux/vfio_zdev.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + /* 3 + * VFIO Region definitions for ZPCI devices 4 + * 5 + * Copyright IBM Corp. 2020 6 + * 7 + * Author(s): Pierre Morel <pmorel@linux.ibm.com> 8 + * Matthew Rosato <mjrosato@linux.ibm.com> 9 + */ 10 + 11 + #ifndef _VFIO_ZDEV_H_ 12 + #define _VFIO_ZDEV_H_ 13 + 14 + #include <linux/types.h> 15 + #include <linux/vfio.h> 16 + 17 + /** 18 + * VFIO_DEVICE_INFO_CAP_ZPCI_BASE - Base PCI Function information 19 + * 20 + * This capability provides a set of descriptive information about the 21 + * associated PCI function. 22 + */ 23 + struct vfio_device_info_cap_zpci_base { 24 + struct vfio_info_cap_header header; 25 + __u64 start_dma; /* Start of available DMA addresses */ 26 + __u64 end_dma; /* End of available DMA addresses */ 27 + __u16 pchid; /* Physical Channel ID */ 28 + __u16 vfn; /* Virtual function number */ 29 + __u16 fmb_length; /* Measurement Block Length (in bytes) */ 30 + __u8 pft; /* PCI Function Type */ 31 + __u8 gid; /* PCI function group ID */ 32 + }; 33 + 34 + /** 35 + * VFIO_DEVICE_INFO_CAP_ZPCI_GROUP - Base PCI Function Group information 36 + * 37 + * This capability provides a set of descriptive information about the group of 38 + * PCI functions that the associated device belongs to. 39 + */ 40 + struct vfio_device_info_cap_zpci_group { 41 + struct vfio_info_cap_header header; 42 + __u64 dasm; /* DMA Address space mask */ 43 + __u64 msi_addr; /* MSI address */ 44 + __u64 flags; 45 + #define VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH 1 /* Program-specified TLB refresh */ 46 + __u16 mui; /* Measurement Block Update Interval */ 47 + __u16 noi; /* Maximum number of MSIs */ 48 + __u16 maxstbl; /* Maximum Store Block Length */ 49 + __u8 version; /* Supported PCI Version */ 50 + }; 51 + 52 + /** 53 + * VFIO_DEVICE_INFO_CAP_ZPCI_UTIL - Utility String 54 + * 55 + * This capability provides the utility string for the associated device, which 56 + * is a device identifier string made up of EBCDID characters. 'size' specifies 57 + * the length of 'util_str'. 58 + */ 59 + struct vfio_device_info_cap_zpci_util { 60 + struct vfio_info_cap_header header; 61 + __u32 size; 62 + __u8 util_str[]; 63 + }; 64 + 65 + /** 66 + * VFIO_DEVICE_INFO_CAP_ZPCI_PFIP - PCI Function Path 67 + * 68 + * This capability provides the PCI function path string, which is an identifier 69 + * that describes the internal hardware path of the device. 'size' specifies 70 + * the length of 'pfip'. 71 + */ 72 + struct vfio_device_info_cap_zpci_pfip { 73 + struct vfio_info_cap_header header; 74 + __u32 size; 75 + __u8 pfip[]; 76 + }; 77 + 78 + #endif