Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

um: add PCI over virtio emulation driver

To support testing of PCI/PCIe drivers in UML, add a PCI bus
support driver. This driver uses virtio, which in UML is really
just vhost-user, to talk to devices, and adds the devices to
the virtual PCI bus in the system.

Since virtio already allows DMA/bus mastering this really isn't
all that hard, of course we need the logic_iomem infrastructure
that was added by a previous patch.

The protocol to talk to the device is has a few fairly simple
messages for reading to/writing from config and IO spaces, and
messages for the device to send the various interrupts (INT#,
MSI/MSI-X and while suspended PME#).

Note that currently no offical virtio device ID is assigned for
this protocol, as a consequence this patch requires defining it
in the Kconfig, with a default that makes the driver refuse to
work at all.

Finally, in order to add support for MSI/MSI-X interrupts, some
small changes are needed in the UML IRQ code, it needs to have
more interrupts, changing NR_IRQS from 64 to 128 if this driver
is enabled, but not actually use them for anything so that the
generic IRQ domain/MSI infrastructure can allocate IRQ numbers.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Richard Weinberger <richard@nod.at>

authored by

Johannes Berg and committed by
Richard Weinberger
68f5d3f3 a5ab7c84

+1054 -6
+12 -1
arch/um/Kconfig
··· 15 15 select HAVE_FUTEX_CMPXCHG if FUTEX 16 16 select HAVE_DEBUG_KMEMLEAK 17 17 select HAVE_DEBUG_BUGVERBOSE 18 - select NO_DMA 18 + select NO_DMA if !UML_DMA_EMULATION 19 19 select GENERIC_IRQ_SHOW 20 20 select GENERIC_CPU_DEVICES 21 21 select HAVE_GCC_PLUGINS ··· 26 26 bool 27 27 default y 28 28 29 + config UML_DMA_EMULATION 30 + bool 31 + 29 32 config NO_IOMEM 30 33 bool "disable IOMEM" if EXPERT 34 + depends on !INDIRECT_IOMEM 31 35 default y 36 + 37 + config UML_IOMEM_EMULATION 38 + bool 39 + select INDIRECT_IOMEM 40 + select GENERIC_PCI_IOMAP 41 + select GENERIC_IOMAP 42 + select NO_GENERIC_PCI_IOPORT_MAP 32 43 33 44 config NO_IOPORT_MAP 34 45 def_bool y
+20
arch/um/drivers/Kconfig
··· 357 357 rtcwake, especially in time-travel mode. This driver enables that 358 358 by providing a fake RTC clock that causes a wakeup at the right 359 359 time. 360 + 361 + config UML_PCI_OVER_VIRTIO 362 + bool "Enable PCI over VIRTIO device simulation" 363 + # in theory, just VIRTIO is enough, but that causes recursion 364 + depends on VIRTIO_UML 365 + select FORCE_PCI 366 + select UML_IOMEM_EMULATION 367 + select UML_DMA_EMULATION 368 + select PCI_MSI 369 + select PCI_MSI_IRQ_DOMAIN 370 + select PCI_LOCKLESS_CONFIG 371 + 372 + config UML_PCI_OVER_VIRTIO_DEVICE_ID 373 + int "set the virtio device ID for PCI emulation" 374 + default -1 375 + depends on UML_PCI_OVER_VIRTIO 376 + help 377 + There's no official device ID assigned (yet), set the one you 378 + wish to use for experimentation here. The default of -1 is 379 + not valid and will cause the driver to fail at probe.
+1
arch/um/drivers/Makefile
··· 64 64 obj-$(CONFIG_UML_RANDOM) += random.o 65 65 obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o 66 66 obj-$(CONFIG_UML_RTC) += rtc.o 67 + obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o 67 68 68 69 # pcap_user.o must be added explicitly. 69 70 USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
+885
arch/um/drivers/virt-pci.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2020 Intel Corporation 4 + * Author: Johannes Berg <johannes@sipsolutions.net> 5 + */ 6 + #include <linux/module.h> 7 + #include <linux/pci.h> 8 + #include <linux/virtio.h> 9 + #include <linux/virtio_config.h> 10 + #include <linux/logic_iomem.h> 11 + #include <linux/irqdomain.h> 12 + #include <linux/virtio_pcidev.h> 13 + #include <linux/delay.h> 14 + #include <linux/msi.h> 15 + #include <asm/unaligned.h> 16 + #include <irq_kern.h> 17 + 18 + #define MAX_DEVICES 8 19 + #define MAX_MSI_VECTORS 32 20 + #define CFG_SPACE_SIZE 4096 21 + 22 + /* for MSI-X we have a 32-bit payload */ 23 + #define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32)) 24 + #define NUM_IRQ_MSGS 10 25 + 26 + #define HANDLE_NO_FREE(ptr) ((void *)((unsigned long)(ptr) | 1)) 27 + #define HANDLE_IS_NO_FREE(ptr) ((unsigned long)(ptr) & 1) 28 + 29 + struct um_pci_device { 30 + struct virtio_device *vdev; 31 + 32 + /* for now just standard BARs */ 33 + u8 resptr[PCI_STD_NUM_BARS]; 34 + 35 + struct virtqueue *cmd_vq, *irq_vq; 36 + 37 + #define UM_PCI_STAT_WAITING 0 38 + unsigned long status; 39 + 40 + int irq; 41 + }; 42 + 43 + struct um_pci_device_reg { 44 + struct um_pci_device *dev; 45 + void __iomem *iomem; 46 + }; 47 + 48 + static struct pci_host_bridge *bridge; 49 + static DEFINE_MUTEX(um_pci_mtx); 50 + static struct um_pci_device_reg um_pci_devices[MAX_DEVICES]; 51 + static struct fwnode_handle *um_pci_fwnode; 52 + static struct irq_domain *um_pci_inner_domain; 53 + static struct irq_domain *um_pci_msi_domain; 54 + static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)]; 55 + 56 + #define UM_VIRT_PCI_MAXDELAY 40000 57 + 58 + static int um_pci_send_cmd(struct um_pci_device *dev, 59 + struct virtio_pcidev_msg *cmd, 60 + unsigned int cmd_size, 61 + const void *extra, unsigned int extra_size, 62 + void *out, unsigned int out_size) 63 + { 64 + struct scatterlist out_sg, extra_sg, in_sg; 65 + struct scatterlist *sgs_list[] = { 66 + [0] = &out_sg, 67 + [1] = extra ? &extra_sg : &in_sg, 68 + [2] = extra ? &in_sg : NULL, 69 + }; 70 + int delay_count = 0; 71 + int ret, len; 72 + bool posted; 73 + 74 + if (WARN_ON(cmd_size < sizeof(*cmd))) 75 + return -EINVAL; 76 + 77 + switch (cmd->op) { 78 + case VIRTIO_PCIDEV_OP_CFG_WRITE: 79 + case VIRTIO_PCIDEV_OP_MMIO_WRITE: 80 + case VIRTIO_PCIDEV_OP_MMIO_MEMSET: 81 + /* in PCI, writes are posted, so don't wait */ 82 + posted = !out; 83 + WARN_ON(!posted); 84 + break; 85 + default: 86 + posted = false; 87 + break; 88 + } 89 + 90 + if (posted) { 91 + u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC); 92 + 93 + if (ncmd) { 94 + memcpy(ncmd, cmd, cmd_size); 95 + if (extra) 96 + memcpy(ncmd + cmd_size, extra, extra_size); 97 + cmd = (void *)ncmd; 98 + cmd_size += extra_size; 99 + extra = NULL; 100 + extra_size = 0; 101 + } else { 102 + /* try without allocating memory */ 103 + posted = false; 104 + } 105 + } 106 + 107 + sg_init_one(&out_sg, cmd, cmd_size); 108 + if (extra) 109 + sg_init_one(&extra_sg, extra, extra_size); 110 + if (out) 111 + sg_init_one(&in_sg, out, out_size); 112 + 113 + /* add to internal virtio queue */ 114 + ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list, 115 + extra ? 2 : 1, 116 + out ? 1 : 0, 117 + posted ? cmd : HANDLE_NO_FREE(cmd), 118 + GFP_ATOMIC); 119 + if (ret) 120 + return ret; 121 + 122 + if (posted) { 123 + virtqueue_kick(dev->cmd_vq); 124 + return 0; 125 + } 126 + 127 + /* kick and poll for getting a response on the queue */ 128 + set_bit(UM_PCI_STAT_WAITING, &dev->status); 129 + virtqueue_kick(dev->cmd_vq); 130 + 131 + while (1) { 132 + void *completed = virtqueue_get_buf(dev->cmd_vq, &len); 133 + 134 + if (completed == HANDLE_NO_FREE(cmd)) 135 + break; 136 + 137 + if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) || 138 + ++delay_count > UM_VIRT_PCI_MAXDELAY, 139 + "um virt-pci delay: %d", delay_count)) { 140 + ret = -EIO; 141 + break; 142 + } 143 + udelay(1); 144 + } 145 + clear_bit(UM_PCI_STAT_WAITING, &dev->status); 146 + 147 + return ret; 148 + } 149 + 150 + static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, 151 + int size) 152 + { 153 + struct um_pci_device_reg *reg = priv; 154 + struct um_pci_device *dev = reg->dev; 155 + struct virtio_pcidev_msg hdr = { 156 + .op = VIRTIO_PCIDEV_OP_CFG_READ, 157 + .size = size, 158 + .addr = offset, 159 + }; 160 + /* maximum size - we may only use parts of it */ 161 + u8 data[8]; 162 + 163 + if (!dev) 164 + return ~0ULL; 165 + 166 + memset(data, 0xff, sizeof(data)); 167 + 168 + switch (size) { 169 + case 1: 170 + case 2: 171 + case 4: 172 + #ifdef CONFIG_64BIT 173 + case 8: 174 + #endif 175 + break; 176 + default: 177 + WARN(1, "invalid config space read size %d\n", size); 178 + return ~0ULL; 179 + } 180 + 181 + if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, 182 + data, sizeof(data))) 183 + return ~0ULL; 184 + 185 + switch (size) { 186 + case 1: 187 + return data[0]; 188 + case 2: 189 + return le16_to_cpup((void *)data); 190 + case 4: 191 + return le32_to_cpup((void *)data); 192 + #ifdef CONFIG_64BIT 193 + case 8: 194 + return le64_to_cpup((void *)data); 195 + #endif 196 + default: 197 + return ~0ULL; 198 + } 199 + } 200 + 201 + static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size, 202 + unsigned long val) 203 + { 204 + struct um_pci_device_reg *reg = priv; 205 + struct um_pci_device *dev = reg->dev; 206 + struct { 207 + struct virtio_pcidev_msg hdr; 208 + /* maximum size - we may only use parts of it */ 209 + u8 data[8]; 210 + } msg = { 211 + .hdr = { 212 + .op = VIRTIO_PCIDEV_OP_CFG_WRITE, 213 + .size = size, 214 + .addr = offset, 215 + }, 216 + }; 217 + 218 + if (!dev) 219 + return; 220 + 221 + switch (size) { 222 + case 1: 223 + msg.data[0] = (u8)val; 224 + break; 225 + case 2: 226 + put_unaligned_le16(val, (void *)msg.data); 227 + break; 228 + case 4: 229 + put_unaligned_le32(val, (void *)msg.data); 230 + break; 231 + #ifdef CONFIG_64BIT 232 + case 8: 233 + put_unaligned_le64(val, (void *)msg.data); 234 + break; 235 + #endif 236 + default: 237 + WARN(1, "invalid config space write size %d\n", size); 238 + return; 239 + } 240 + 241 + WARN_ON(um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0)); 242 + } 243 + 244 + static const struct logic_iomem_ops um_pci_device_cfgspace_ops = { 245 + .read = um_pci_cfgspace_read, 246 + .write = um_pci_cfgspace_write, 247 + }; 248 + 249 + static void um_pci_bar_copy_from(void *priv, void *buffer, 250 + unsigned int offset, int size) 251 + { 252 + u8 *resptr = priv; 253 + struct um_pci_device *dev = container_of(resptr - *resptr, 254 + struct um_pci_device, 255 + resptr[0]); 256 + struct virtio_pcidev_msg hdr = { 257 + .op = VIRTIO_PCIDEV_OP_MMIO_READ, 258 + .bar = *resptr, 259 + .size = size, 260 + .addr = offset, 261 + }; 262 + 263 + memset(buffer, 0xff, size); 264 + 265 + um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size); 266 + } 267 + 268 + static unsigned long um_pci_bar_read(void *priv, unsigned int offset, 269 + int size) 270 + { 271 + /* maximum size - we may only use parts of it */ 272 + u8 data[8]; 273 + 274 + switch (size) { 275 + case 1: 276 + case 2: 277 + case 4: 278 + #ifdef CONFIG_64BIT 279 + case 8: 280 + #endif 281 + break; 282 + default: 283 + WARN(1, "invalid config space read size %d\n", size); 284 + return ~0ULL; 285 + } 286 + 287 + um_pci_bar_copy_from(priv, data, offset, size); 288 + 289 + switch (size) { 290 + case 1: 291 + return data[0]; 292 + case 2: 293 + return le16_to_cpup((void *)data); 294 + case 4: 295 + return le32_to_cpup((void *)data); 296 + #ifdef CONFIG_64BIT 297 + case 8: 298 + return le64_to_cpup((void *)data); 299 + #endif 300 + default: 301 + return ~0ULL; 302 + } 303 + } 304 + 305 + static void um_pci_bar_copy_to(void *priv, unsigned int offset, 306 + const void *buffer, int size) 307 + { 308 + u8 *resptr = priv; 309 + struct um_pci_device *dev = container_of(resptr - *resptr, 310 + struct um_pci_device, 311 + resptr[0]); 312 + struct virtio_pcidev_msg hdr = { 313 + .op = VIRTIO_PCIDEV_OP_MMIO_WRITE, 314 + .bar = *resptr, 315 + .size = size, 316 + .addr = offset, 317 + }; 318 + 319 + um_pci_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0); 320 + } 321 + 322 + static void um_pci_bar_write(void *priv, unsigned int offset, int size, 323 + unsigned long val) 324 + { 325 + /* maximum size - we may only use parts of it */ 326 + u8 data[8]; 327 + 328 + switch (size) { 329 + case 1: 330 + data[0] = (u8)val; 331 + break; 332 + case 2: 333 + put_unaligned_le16(val, (void *)data); 334 + break; 335 + case 4: 336 + put_unaligned_le32(val, (void *)data); 337 + break; 338 + #ifdef CONFIG_64BIT 339 + case 8: 340 + put_unaligned_le64(val, (void *)data); 341 + break; 342 + #endif 343 + default: 344 + WARN(1, "invalid config space write size %d\n", size); 345 + return; 346 + } 347 + 348 + um_pci_bar_copy_to(priv, offset, data, size); 349 + } 350 + 351 + static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size) 352 + { 353 + u8 *resptr = priv; 354 + struct um_pci_device *dev = container_of(resptr - *resptr, 355 + struct um_pci_device, 356 + resptr[0]); 357 + struct { 358 + struct virtio_pcidev_msg hdr; 359 + u8 data; 360 + } msg = { 361 + .hdr = { 362 + .op = VIRTIO_PCIDEV_OP_CFG_WRITE, 363 + .bar = *resptr, 364 + .size = size, 365 + .addr = offset, 366 + }, 367 + .data = value, 368 + }; 369 + 370 + um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0); 371 + } 372 + 373 + static const struct logic_iomem_ops um_pci_device_bar_ops = { 374 + .read = um_pci_bar_read, 375 + .write = um_pci_bar_write, 376 + .set = um_pci_bar_set, 377 + .copy_from = um_pci_bar_copy_from, 378 + .copy_to = um_pci_bar_copy_to, 379 + }; 380 + 381 + static void __iomem *um_pci_map_bus(struct pci_bus *bus, unsigned int devfn, 382 + int where) 383 + { 384 + struct um_pci_device_reg *dev; 385 + unsigned int busn = bus->number; 386 + 387 + if (busn > 0) 388 + return NULL; 389 + 390 + /* not allowing functions for now ... */ 391 + if (devfn % 8) 392 + return NULL; 393 + 394 + if (devfn / 8 >= ARRAY_SIZE(um_pci_devices)) 395 + return NULL; 396 + 397 + dev = &um_pci_devices[devfn / 8]; 398 + if (!dev) 399 + return NULL; 400 + 401 + return (void __iomem *)((unsigned long)dev->iomem + where); 402 + } 403 + 404 + static struct pci_ops um_pci_ops = { 405 + .map_bus = um_pci_map_bus, 406 + .read = pci_generic_config_read, 407 + .write = pci_generic_config_write, 408 + }; 409 + 410 + static void um_pci_rescan(void) 411 + { 412 + pci_lock_rescan_remove(); 413 + pci_rescan_bus(bridge->bus); 414 + pci_unlock_rescan_remove(); 415 + } 416 + 417 + static void um_pci_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick) 418 + { 419 + struct scatterlist sg[1]; 420 + 421 + sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE); 422 + if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC)) 423 + kfree(buf); 424 + else if (kick) 425 + virtqueue_kick(vq); 426 + } 427 + 428 + static void um_pci_handle_irq_message(struct virtqueue *vq, 429 + struct virtio_pcidev_msg *msg) 430 + { 431 + struct virtio_device *vdev = vq->vdev; 432 + struct um_pci_device *dev = vdev->priv; 433 + 434 + /* we should properly chain interrupts, but on ARCH=um we don't care */ 435 + 436 + switch (msg->op) { 437 + case VIRTIO_PCIDEV_OP_INT: 438 + generic_handle_irq(dev->irq); 439 + break; 440 + case VIRTIO_PCIDEV_OP_MSI: 441 + /* our MSI message is just the interrupt number */ 442 + if (msg->size == sizeof(u32)) 443 + generic_handle_irq(le32_to_cpup((void *)msg->data)); 444 + else 445 + generic_handle_irq(le16_to_cpup((void *)msg->data)); 446 + break; 447 + case VIRTIO_PCIDEV_OP_PME: 448 + /* nothing to do - we already woke up due to the message */ 449 + break; 450 + default: 451 + dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op); 452 + break; 453 + } 454 + } 455 + 456 + static void um_pci_cmd_vq_cb(struct virtqueue *vq) 457 + { 458 + struct virtio_device *vdev = vq->vdev; 459 + struct um_pci_device *dev = vdev->priv; 460 + void *cmd; 461 + int len; 462 + 463 + if (test_bit(UM_PCI_STAT_WAITING, &dev->status)) 464 + return; 465 + 466 + while ((cmd = virtqueue_get_buf(vq, &len))) { 467 + if (WARN_ON(HANDLE_IS_NO_FREE(cmd))) 468 + continue; 469 + kfree(cmd); 470 + } 471 + } 472 + 473 + static void um_pci_irq_vq_cb(struct virtqueue *vq) 474 + { 475 + struct virtio_pcidev_msg *msg; 476 + int len; 477 + 478 + while ((msg = virtqueue_get_buf(vq, &len))) { 479 + if (len >= sizeof(*msg)) 480 + um_pci_handle_irq_message(vq, msg); 481 + 482 + /* recycle the message buffer */ 483 + um_pci_irq_vq_addbuf(vq, msg, true); 484 + } 485 + } 486 + 487 + static int um_pci_init_vqs(struct um_pci_device *dev) 488 + { 489 + struct virtqueue *vqs[2]; 490 + static const char *const names[2] = { "cmd", "irq" }; 491 + vq_callback_t *cbs[2] = { um_pci_cmd_vq_cb, um_pci_irq_vq_cb }; 492 + int err, i; 493 + 494 + err = virtio_find_vqs(dev->vdev, 2, vqs, cbs, names, NULL); 495 + if (err) 496 + return err; 497 + 498 + dev->cmd_vq = vqs[0]; 499 + dev->irq_vq = vqs[1]; 500 + 501 + for (i = 0; i < NUM_IRQ_MSGS; i++) { 502 + void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL); 503 + 504 + if (msg) 505 + um_pci_irq_vq_addbuf(dev->irq_vq, msg, false); 506 + } 507 + 508 + virtqueue_kick(dev->irq_vq); 509 + 510 + return 0; 511 + } 512 + 513 + static int um_pci_virtio_probe(struct virtio_device *vdev) 514 + { 515 + struct um_pci_device *dev; 516 + int i, free = -1; 517 + int err = -ENOSPC; 518 + 519 + dev = kzalloc(sizeof(*dev), GFP_KERNEL); 520 + if (!dev) 521 + return -ENOMEM; 522 + 523 + dev->vdev = vdev; 524 + vdev->priv = dev; 525 + 526 + mutex_lock(&um_pci_mtx); 527 + for (i = 0; i < MAX_DEVICES; i++) { 528 + if (um_pci_devices[i].dev) 529 + continue; 530 + free = i; 531 + break; 532 + } 533 + 534 + if (free < 0) 535 + goto error; 536 + 537 + err = um_pci_init_vqs(dev); 538 + if (err) 539 + goto error; 540 + 541 + dev->irq = irq_alloc_desc(numa_node_id()); 542 + if (dev->irq < 0) { 543 + err = dev->irq; 544 + goto error; 545 + } 546 + um_pci_devices[free].dev = dev; 547 + vdev->priv = dev; 548 + 549 + mutex_unlock(&um_pci_mtx); 550 + 551 + device_set_wakeup_enable(&vdev->dev, true); 552 + 553 + um_pci_rescan(); 554 + return 0; 555 + error: 556 + mutex_unlock(&um_pci_mtx); 557 + kfree(dev); 558 + return err; 559 + } 560 + 561 + static void um_pci_virtio_remove(struct virtio_device *vdev) 562 + { 563 + struct um_pci_device *dev = vdev->priv; 564 + int i; 565 + 566 + /* Stop all virtqueues */ 567 + vdev->config->reset(vdev); 568 + vdev->config->del_vqs(vdev); 569 + 570 + device_set_wakeup_enable(&vdev->dev, false); 571 + 572 + mutex_lock(&um_pci_mtx); 573 + for (i = 0; i < MAX_DEVICES; i++) { 574 + if (um_pci_devices[i].dev != dev) 575 + continue; 576 + um_pci_devices[i].dev = NULL; 577 + irq_free_desc(dev->irq); 578 + } 579 + mutex_unlock(&um_pci_mtx); 580 + 581 + um_pci_rescan(); 582 + 583 + kfree(dev); 584 + } 585 + 586 + static struct virtio_device_id id_table[] = { 587 + { CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID }, 588 + { 0 }, 589 + }; 590 + MODULE_DEVICE_TABLE(virtio, id_table); 591 + 592 + static struct virtio_driver um_pci_virtio_driver = { 593 + .driver.name = "virtio-pci", 594 + .driver.owner = THIS_MODULE, 595 + .id_table = id_table, 596 + .probe = um_pci_virtio_probe, 597 + .remove = um_pci_virtio_remove, 598 + }; 599 + 600 + static struct resource virt_cfgspace_resource = { 601 + .name = "PCI config space", 602 + .start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE, 603 + .end = 0xf0000000 - 1, 604 + .flags = IORESOURCE_MEM, 605 + }; 606 + 607 + static long um_pci_map_cfgspace(unsigned long offset, size_t size, 608 + const struct logic_iomem_ops **ops, 609 + void **priv) 610 + { 611 + if (WARN_ON(size > CFG_SPACE_SIZE || offset % CFG_SPACE_SIZE)) 612 + return -EINVAL; 613 + 614 + if (offset / CFG_SPACE_SIZE < MAX_DEVICES) { 615 + *ops = &um_pci_device_cfgspace_ops; 616 + *priv = &um_pci_devices[offset / CFG_SPACE_SIZE]; 617 + return 0; 618 + } 619 + 620 + WARN(1, "cannot map offset 0x%lx/0x%zx\n", offset, size); 621 + return -ENOENT; 622 + } 623 + 624 + static const struct logic_iomem_region_ops um_pci_cfgspace_ops = { 625 + .map = um_pci_map_cfgspace, 626 + }; 627 + 628 + static struct resource virt_iomem_resource = { 629 + .name = "PCI iomem", 630 + .start = 0xf0000000, 631 + .end = 0xffffffff, 632 + .flags = IORESOURCE_MEM, 633 + }; 634 + 635 + struct um_pci_map_iomem_data { 636 + unsigned long offset; 637 + size_t size; 638 + const struct logic_iomem_ops **ops; 639 + void **priv; 640 + long ret; 641 + }; 642 + 643 + static int um_pci_map_iomem_walk(struct pci_dev *pdev, void *_data) 644 + { 645 + struct um_pci_map_iomem_data *data = _data; 646 + struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8]; 647 + struct um_pci_device *dev; 648 + int i; 649 + 650 + if (!reg->dev) 651 + return 0; 652 + 653 + for (i = 0; i < ARRAY_SIZE(dev->resptr); i++) { 654 + struct resource *r = &pdev->resource[i]; 655 + 656 + if ((r->flags & IORESOURCE_TYPE_BITS) != IORESOURCE_MEM) 657 + continue; 658 + 659 + /* 660 + * must be the whole or part of the resource, 661 + * not allowed to only overlap 662 + */ 663 + if (data->offset < r->start || data->offset > r->end) 664 + continue; 665 + if (data->offset + data->size - 1 > r->end) 666 + continue; 667 + 668 + dev = reg->dev; 669 + *data->ops = &um_pci_device_bar_ops; 670 + dev->resptr[i] = i; 671 + *data->priv = &dev->resptr[i]; 672 + data->ret = data->offset - r->start; 673 + 674 + /* no need to continue */ 675 + return 1; 676 + } 677 + 678 + return 0; 679 + } 680 + 681 + static long um_pci_map_iomem(unsigned long offset, size_t size, 682 + const struct logic_iomem_ops **ops, 683 + void **priv) 684 + { 685 + struct um_pci_map_iomem_data data = { 686 + /* we want the full address here */ 687 + .offset = offset + virt_iomem_resource.start, 688 + .size = size, 689 + .ops = ops, 690 + .priv = priv, 691 + .ret = -ENOENT, 692 + }; 693 + 694 + pci_walk_bus(bridge->bus, um_pci_map_iomem_walk, &data); 695 + return data.ret; 696 + } 697 + 698 + static const struct logic_iomem_region_ops um_pci_iomem_ops = { 699 + .map = um_pci_map_iomem, 700 + }; 701 + 702 + static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) 703 + { 704 + /* 705 + * This is a very low address and not actually valid 'physical' memory 706 + * in UML, so we can simply map MSI(-X) vectors to there, it cannot be 707 + * legitimately written to by the device in any other way. 708 + * We use the (virtual) IRQ number here as the message to simplify the 709 + * code that receives the message, where for now we simply trust the 710 + * device to send the correct message. 711 + */ 712 + msg->address_hi = 0; 713 + msg->address_lo = 0xa0000; 714 + msg->data = data->irq; 715 + } 716 + 717 + static struct irq_chip um_pci_msi_bottom_irq_chip = { 718 + .name = "UM virtio MSI", 719 + .irq_compose_msi_msg = um_pci_compose_msi_msg, 720 + }; 721 + 722 + static int um_pci_inner_domain_alloc(struct irq_domain *domain, 723 + unsigned int virq, unsigned int nr_irqs, 724 + void *args) 725 + { 726 + unsigned long bit; 727 + 728 + WARN_ON(nr_irqs != 1); 729 + 730 + mutex_lock(&um_pci_mtx); 731 + bit = find_first_zero_bit(um_pci_msi_used, MAX_MSI_VECTORS); 732 + if (bit >= MAX_MSI_VECTORS) { 733 + mutex_unlock(&um_pci_mtx); 734 + return -ENOSPC; 735 + } 736 + 737 + set_bit(bit, um_pci_msi_used); 738 + mutex_unlock(&um_pci_mtx); 739 + 740 + irq_domain_set_info(domain, virq, bit, &um_pci_msi_bottom_irq_chip, 741 + domain->host_data, handle_simple_irq, 742 + NULL, NULL); 743 + 744 + return 0; 745 + } 746 + 747 + static void um_pci_inner_domain_free(struct irq_domain *domain, 748 + unsigned int virq, unsigned int nr_irqs) 749 + { 750 + struct irq_data *d = irq_domain_get_irq_data(domain, virq); 751 + 752 + mutex_lock(&um_pci_mtx); 753 + 754 + if (!test_bit(d->hwirq, um_pci_msi_used)) 755 + pr_err("trying to free unused MSI#%lu\n", d->hwirq); 756 + else 757 + __clear_bit(d->hwirq, um_pci_msi_used); 758 + 759 + mutex_unlock(&um_pci_mtx); 760 + } 761 + 762 + static const struct irq_domain_ops um_pci_inner_domain_ops = { 763 + .alloc = um_pci_inner_domain_alloc, 764 + .free = um_pci_inner_domain_free, 765 + }; 766 + 767 + static struct irq_chip um_pci_msi_irq_chip = { 768 + .name = "UM virtio PCIe MSI", 769 + .irq_mask = pci_msi_mask_irq, 770 + .irq_unmask = pci_msi_unmask_irq, 771 + }; 772 + 773 + static struct msi_domain_info um_pci_msi_domain_info = { 774 + .flags = MSI_FLAG_USE_DEF_DOM_OPS | 775 + MSI_FLAG_USE_DEF_CHIP_OPS | 776 + MSI_FLAG_PCI_MSIX, 777 + .chip = &um_pci_msi_irq_chip, 778 + }; 779 + 780 + static struct resource busn_resource = { 781 + .name = "PCI busn", 782 + .start = 0, 783 + .end = 0, 784 + .flags = IORESOURCE_BUS, 785 + }; 786 + 787 + static int um_pci_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin) 788 + { 789 + struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8]; 790 + 791 + if (WARN_ON(!reg->dev)) 792 + return -EINVAL; 793 + 794 + /* Yes, we map all pins to the same IRQ ... doesn't matter for now. */ 795 + return reg->dev->irq; 796 + } 797 + 798 + void *pci_root_bus_fwnode(struct pci_bus *bus) 799 + { 800 + return um_pci_fwnode; 801 + } 802 + 803 + int um_pci_init(void) 804 + { 805 + int err, i; 806 + 807 + WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource, 808 + &um_pci_cfgspace_ops)); 809 + WARN_ON(logic_iomem_add_region(&virt_iomem_resource, 810 + &um_pci_iomem_ops)); 811 + 812 + if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0, 813 + "No virtio device ID configured for PCI - no PCI support\n")) 814 + return 0; 815 + 816 + bridge = pci_alloc_host_bridge(0); 817 + if (!bridge) 818 + return -ENOMEM; 819 + 820 + um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci"); 821 + if (!um_pci_fwnode) { 822 + err = -ENOMEM; 823 + goto free; 824 + } 825 + 826 + um_pci_inner_domain = __irq_domain_add(um_pci_fwnode, MAX_MSI_VECTORS, 827 + MAX_MSI_VECTORS, 0, 828 + &um_pci_inner_domain_ops, NULL); 829 + if (!um_pci_inner_domain) { 830 + err = -ENOMEM; 831 + goto free; 832 + } 833 + 834 + um_pci_msi_domain = pci_msi_create_irq_domain(um_pci_fwnode, 835 + &um_pci_msi_domain_info, 836 + um_pci_inner_domain); 837 + if (!um_pci_msi_domain) { 838 + err = -ENOMEM; 839 + goto free; 840 + } 841 + 842 + pci_add_resource(&bridge->windows, &virt_iomem_resource); 843 + pci_add_resource(&bridge->windows, &busn_resource); 844 + bridge->ops = &um_pci_ops; 845 + bridge->map_irq = um_pci_map_irq; 846 + 847 + for (i = 0; i < MAX_DEVICES; i++) { 848 + resource_size_t start; 849 + 850 + start = virt_cfgspace_resource.start + i * CFG_SPACE_SIZE; 851 + um_pci_devices[i].iomem = ioremap(start, CFG_SPACE_SIZE); 852 + if (WARN(!um_pci_devices[i].iomem, "failed to map %d\n", i)) { 853 + err = -ENOMEM; 854 + goto free; 855 + } 856 + } 857 + 858 + err = pci_host_probe(bridge); 859 + if (err) 860 + goto free; 861 + 862 + err = register_virtio_driver(&um_pci_virtio_driver); 863 + if (err) 864 + goto free; 865 + return 0; 866 + free: 867 + if (um_pci_inner_domain) 868 + irq_domain_remove(um_pci_inner_domain); 869 + if (um_pci_fwnode) 870 + irq_domain_free_fwnode(um_pci_fwnode); 871 + pci_free_resource_list(&bridge->windows); 872 + pci_free_host_bridge(bridge); 873 + return err; 874 + } 875 + module_init(um_pci_init); 876 + 877 + void um_pci_exit(void) 878 + { 879 + unregister_virtio_driver(&um_pci_virtio_driver); 880 + irq_domain_remove(um_pci_msi_domain); 881 + irq_domain_remove(um_pci_inner_domain); 882 + pci_free_resource_list(&bridge->windows); 883 + pci_free_host_bridge(bridge); 884 + } 885 + module_exit(um_pci_exit);
-1
arch/um/include/asm/Kbuild
··· 18 18 generic-y += mmiowb.h 19 19 generic-y += module.lds.h 20 20 generic-y += param.h 21 - generic-y += pci.h 22 21 generic-y += percpu.h 23 22 generic-y += preempt.h 24 23 generic-y += softirq_stack.h
+7
arch/um/include/asm/io.h
··· 3 3 #define _ASM_UM_IO_H 4 4 #include <linux/types.h> 5 5 6 + /* get emulated iomem (if desired) */ 7 + #include <asm-generic/logic_io.h> 8 + 9 + #ifndef ioremap 6 10 #define ioremap ioremap 7 11 static inline void __iomem *ioremap(phys_addr_t offset, size_t size) 8 12 { 9 13 return NULL; 10 14 } 15 + #endif /* ioremap */ 11 16 17 + #ifndef iounmap 12 18 #define iounmap iounmap 13 19 static inline void iounmap(void __iomem *addr) 14 20 { 15 21 } 22 + #endif /* iounmap */ 16 23 17 24 #include <asm-generic/io.h> 18 25
+7 -1
arch/um/include/asm/irq.h
··· 31 31 32 32 #endif 33 33 34 - #define NR_IRQS 64 34 + #define UM_LAST_SIGNAL_IRQ 64 35 + /* If we have (simulated) PCI MSI, allow 64 more interrupt numbers for it */ 36 + #ifdef CONFIG_PCI_MSI 37 + #define NR_IRQS (UM_LAST_SIGNAL_IRQ + 64) 38 + #else 39 + #define NR_IRQS UM_LAST_SIGNAL_IRQ 40 + #endif /* CONFIG_PCI_MSI */ 35 41 36 42 #include <asm-generic/irq.h> 37 43 #endif
+1
arch/um/include/asm/msi.h
··· 1 + #include <asm-generic/msi.h>
+39
arch/um/include/asm/pci.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + #ifndef __ASM_UM_PCI_H 3 + #define __ASM_UM_PCI_H 4 + #include <linux/types.h> 5 + #include <asm/io.h> 6 + 7 + #define PCIBIOS_MIN_IO 0 8 + #define PCIBIOS_MIN_MEM 0 9 + 10 + #define pcibios_assign_all_busses() 1 11 + 12 + extern int isa_dma_bridge_buggy; 13 + 14 + #ifdef CONFIG_PCI 15 + static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) 16 + { 17 + /* no legacy IRQs */ 18 + return -ENODEV; 19 + } 20 + #endif 21 + 22 + #ifdef CONFIG_PCI_DOMAINS 23 + static inline int pci_proc_domain(struct pci_bus *bus) 24 + { 25 + /* always show the domain in /proc */ 26 + return 1; 27 + } 28 + #endif /* CONFIG_PCI */ 29 + 30 + #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN 31 + /* 32 + * This is a bit of an annoying hack, and it assumes we only have 33 + * the virt-pci (if anything). Which is true, but still. 34 + */ 35 + void *pci_root_bus_fwnode(struct pci_bus *bus); 36 + #define pci_root_bus_fwnode pci_root_bus_fwnode 37 + #endif 38 + 39 + #endif /* __ASM_UM_PCI_H */
+1
arch/um/kernel/Makefile
··· 23 23 obj-$(CONFIG_GPROF) += gprof_syms.o 24 24 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 25 25 obj-$(CONFIG_STACKTRACE) += stacktrace.o 26 + obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o 26 27 27 28 USER_OBJS := config.o 28 29
+13
arch/um/kernel/ioport.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2021 Intel Corporation 4 + * Author: Johannes Berg <johannes@sipsolutions.net> 5 + */ 6 + #include <asm/iomap.h> 7 + #include <asm-generic/pci_iomap.h> 8 + 9 + void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port, 10 + unsigned int nr) 11 + { 12 + return NULL; 13 + }
+4 -3
arch/um/kernel/irq.c
··· 56 56 57 57 static DEFINE_SPINLOCK(irq_lock); 58 58 static LIST_HEAD(active_fds); 59 - static DECLARE_BITMAP(irqs_allocated, NR_IRQS); 59 + static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ); 60 60 static bool irqs_suspended; 61 61 62 62 static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) ··· 419 419 420 420 void um_free_irq(int irq, void *dev) 421 421 { 422 - if (WARN(irq < 0 || irq > NR_IRQS, "freeing invalid irq %d", irq)) 422 + if (WARN(irq < 0 || irq > UM_LAST_SIGNAL_IRQ, 423 + "freeing invalid irq %d", irq)) 423 424 return; 424 425 425 426 free_irq_by_irq_and_dev(irq, dev); ··· 649 648 650 649 irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq); 651 650 652 - for (i = 1; i < NR_IRQS; i++) 651 + for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++) 653 652 irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); 654 653 /* Initialize EPOLL Loop */ 655 654 os_setup_epoll();
+64
include/uapi/linux/virtio_pcidev.h
··· 1 + /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ 2 + /* 3 + * Copyright (C) 2021 Intel Corporation 4 + * Author: Johannes Berg <johannes@sipsolutions.net> 5 + */ 6 + #ifndef _UAPI_LINUX_VIRTIO_PCIDEV_H 7 + #define _UAPI_LINUX_VIRTIO_PCIDEV_H 8 + #include <linux/types.h> 9 + 10 + /** 11 + * enum virtio_pcidev_ops - virtual PCI device operations 12 + * @VIRTIO_PCIDEV_OP_CFG_READ: read config space, size is 1, 2, 4 or 8; 13 + * the @data field should be filled in by the device (in little endian). 14 + * @VIRTIO_PCIDEV_OP_CFG_WRITE: write config space, size is 1, 2, 4 or 8; 15 + * the @data field contains the data to write (in little endian). 16 + * @VIRTIO_PCIDEV_OP_BAR_READ: read BAR mem/pio, size can be variable; 17 + * the @data field should be filled in by the device (in little endian). 18 + * @VIRTIO_PCIDEV_OP_BAR_WRITE: write BAR mem/pio, size can be variable; 19 + * the @data field contains the data to write (in little endian). 20 + * @VIRTIO_PCIDEV_OP_MMIO_MEMSET: memset MMIO, size is variable but 21 + * the @data field only has one byte (unlike @VIRTIO_PCIDEV_OP_MMIO_WRITE) 22 + * @VIRTIO_PCIDEV_OP_INT: legacy INTx# pin interrupt, the addr field is 1-4 for 23 + * the number 24 + * @VIRTIO_PCIDEV_OP_MSI: MSI(-X) interrupt, this message basically transports 25 + * the 16- or 32-bit write that would otherwise be done into memory, 26 + * analogous to the write messages (@VIRTIO_PCIDEV_OP_MMIO_WRITE) above 27 + * @VIRTIO_PCIDEV_OP_PME: Dummy message whose content is ignored (and should be 28 + * all zeroes) to signal the PME# pin. 29 + */ 30 + enum virtio_pcidev_ops { 31 + VIRTIO_PCIDEV_OP_RESERVED = 0, 32 + VIRTIO_PCIDEV_OP_CFG_READ, 33 + VIRTIO_PCIDEV_OP_CFG_WRITE, 34 + VIRTIO_PCIDEV_OP_MMIO_READ, 35 + VIRTIO_PCIDEV_OP_MMIO_WRITE, 36 + VIRTIO_PCIDEV_OP_MMIO_MEMSET, 37 + VIRTIO_PCIDEV_OP_INT, 38 + VIRTIO_PCIDEV_OP_MSI, 39 + VIRTIO_PCIDEV_OP_PME, 40 + }; 41 + 42 + /** 43 + * struct virtio_pcidev_msg - virtio PCI device operation 44 + * @op: the operation to do 45 + * @bar: the bar (only with BAR read/write messages) 46 + * @reserved: reserved 47 + * @size: the size of the read/write (in bytes) 48 + * @addr: the address to read/write 49 + * @data: the data, normally @size long, but just one byte for 50 + * %VIRTIO_PCIDEV_OP_MMIO_MEMSET 51 + * 52 + * Note: the fields are all in native (CPU) endian, however, the 53 + * @data values will often be in little endian (see the ops above.) 54 + */ 55 + struct virtio_pcidev_msg { 56 + __u8 op; 57 + __u8 bar; 58 + __u16 reserved; 59 + __u32 size; 60 + __u64 addr; 61 + __u8 data[]; 62 + }; 63 + 64 + #endif /* _UAPI_LINUX_VIRTIO_PCIDEV_H */