Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

virtio_pci: optional MSI-X support

This implements optional MSI-X support in virtio_pci.
MSI-X is used whenever the host supports at least 2 MSI-X
vectors: 1 for configuration changes and 1 for virtqueues.
Per-virtqueue vectors are allocated if enough vectors
available.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (+ whitespace, style)

authored by

Michael S. Tsirkin and committed by
Rusty Russell
82af8ce8 77cf5246

+218 -20
+209 -19
drivers/virtio/virtio_pci.c
··· 42 42 /* a list of queues so we can dispatch IRQs */ 43 43 spinlock_t lock; 44 44 struct list_head virtqueues; 45 + 46 + /* MSI-X support */ 47 + int msix_enabled; 48 + int intx_enabled; 49 + struct msix_entry *msix_entries; 50 + /* Name strings for interrupts. This size should be enough, 51 + * and I'm too lazy to allocate each name separately. */ 52 + char (*msix_names)[256]; 53 + /* Number of available vectors */ 54 + unsigned msix_vectors; 55 + /* Vectors allocated */ 56 + unsigned msix_used_vectors; 57 + }; 58 + 59 + /* Constants for MSI-X */ 60 + /* Use first vector for configuration changes, second and the rest for 61 + * virtqueues Thus, we need at least 2 vectors for MSI. */ 62 + enum { 63 + VP_MSIX_CONFIG_VECTOR = 0, 64 + VP_MSIX_VQ_VECTOR = 1, 45 65 }; 46 66 47 67 struct virtio_pci_vq_info ··· 80 60 81 61 /* the list node for the virtqueues list */ 82 62 struct list_head node; 63 + 64 + /* MSI-X vector (or none) */ 65 + unsigned vector; 83 66 }; 84 67 85 68 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ ··· 132 109 void *buf, unsigned len) 133 110 { 134 111 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 135 - void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset; 112 + void __iomem *ioaddr = vp_dev->ioaddr + 113 + VIRTIO_PCI_CONFIG(vp_dev) + offset; 136 114 u8 *ptr = buf; 137 115 int i; 138 116 ··· 147 123 const void *buf, unsigned len) 148 124 { 149 125 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 150 - void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset; 126 + void __iomem *ioaddr = vp_dev->ioaddr + 127 + VIRTIO_PCI_CONFIG(vp_dev) + offset; 151 128 const u8 *ptr = buf; 152 129 int i; 153 130 ··· 246 221 return vp_vring_interrupt(irq, opaque); 247 222 } 248 223 249 - /* the config->find_vq() implementation */ 224 + static void vp_free_vectors(struct virtio_device *vdev) 225 + { 226 + struct virtio_pci_device *vp_dev = to_vp_device(vdev); 227 + int i; 228 + 229 + if (vp_dev->intx_enabled) { 230 + free_irq(vp_dev->pci_dev->irq, vp_dev); 231 + vp_dev->intx_enabled = 0; 232 + } 233 + 234 + for (i = 0; i < vp_dev->msix_used_vectors; ++i) 235 + free_irq(vp_dev->msix_entries[i].vector, vp_dev); 236 + vp_dev->msix_used_vectors = 0; 237 + 238 + if (vp_dev->msix_enabled) { 239 + /* Disable the vector used for configuration */ 240 + iowrite16(VIRTIO_MSI_NO_VECTOR, 241 + vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); 242 + /* Flush the write out to device */ 243 + ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); 244 + 245 + vp_dev->msix_enabled = 0; 246 + pci_disable_msix(vp_dev->pci_dev); 247 + } 248 + } 249 + 250 + static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries, 251 + int *options, int noptions) 252 + { 253 + int i; 254 + for (i = 0; i < noptions; ++i) 255 + if (!pci_enable_msix(dev, entries, options[i])) 256 + return options[i]; 257 + return -EBUSY; 258 + } 259 + 260 + static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) 261 + { 262 + struct virtio_pci_device *vp_dev = to_vp_device(vdev); 263 + const char *name = dev_name(&vp_dev->vdev.dev); 264 + unsigned i, v; 265 + int err = -ENOMEM; 266 + /* We want at most one vector per queue and one for config changes. 267 + * Fallback to separate vectors for config and a shared for queues. 268 + * Finally fall back to regular interrupts. */ 269 + int options[] = { max_vqs + 1, 2 }; 270 + int nvectors = max(options[0], options[1]); 271 + 272 + vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, 273 + GFP_KERNEL); 274 + if (!vp_dev->msix_entries) 275 + goto error_entries; 276 + vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, 277 + GFP_KERNEL); 278 + if (!vp_dev->msix_names) 279 + goto error_names; 280 + 281 + for (i = 0; i < nvectors; ++i) 282 + vp_dev->msix_entries[i].entry = i; 283 + 284 + err = vp_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, 285 + options, ARRAY_SIZE(options)); 286 + if (err < 0) { 287 + /* Can't allocate enough MSI-X vectors, use regular interrupt */ 288 + vp_dev->msix_vectors = 0; 289 + err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, 290 + IRQF_SHARED, name, vp_dev); 291 + if (err) 292 + goto error_irq; 293 + vp_dev->intx_enabled = 1; 294 + } else { 295 + vp_dev->msix_vectors = err; 296 + vp_dev->msix_enabled = 1; 297 + 298 + /* Set the vector used for configuration */ 299 + v = vp_dev->msix_used_vectors; 300 + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, 301 + "%s-config", name); 302 + err = request_irq(vp_dev->msix_entries[v].vector, 303 + vp_config_changed, 0, vp_dev->msix_names[v], 304 + vp_dev); 305 + if (err) 306 + goto error_irq; 307 + ++vp_dev->msix_used_vectors; 308 + 309 + iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); 310 + /* Verify we had enough resources to assign the vector */ 311 + v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); 312 + if (v == VIRTIO_MSI_NO_VECTOR) { 313 + err = -EBUSY; 314 + goto error_irq; 315 + } 316 + } 317 + 318 + if (vp_dev->msix_vectors && vp_dev->msix_vectors != max_vqs + 1) { 319 + /* Shared vector for all VQs */ 320 + v = vp_dev->msix_used_vectors; 321 + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, 322 + "%s-virtqueues", name); 323 + err = request_irq(vp_dev->msix_entries[v].vector, 324 + vp_vring_interrupt, 0, vp_dev->msix_names[v], 325 + vp_dev); 326 + if (err) 327 + goto error_irq; 328 + ++vp_dev->msix_used_vectors; 329 + } 330 + return 0; 331 + error_irq: 332 + vp_free_vectors(vdev); 333 + kfree(vp_dev->msix_names); 334 + error_names: 335 + kfree(vp_dev->msix_entries); 336 + error_entries: 337 + return err; 338 + } 339 + 250 340 static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, 251 341 void (*callback)(struct virtqueue *vq), 252 342 const char *name) ··· 370 230 struct virtio_pci_vq_info *info; 371 231 struct virtqueue *vq; 372 232 unsigned long flags, size; 373 - u16 num; 233 + u16 num, vector; 374 234 int err; 375 235 376 236 /* Select the queue we're interested in */ ··· 389 249 390 250 info->queue_index = index; 391 251 info->num = num; 252 + info->vector = VIRTIO_MSI_NO_VECTOR; 392 253 393 254 size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); 394 255 info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); ··· 413 272 vq->priv = info; 414 273 info->vq = vq; 415 274 275 + /* allocate per-vq vector if available and necessary */ 276 + if (callback && vp_dev->msix_used_vectors < vp_dev->msix_vectors) { 277 + vector = vp_dev->msix_used_vectors; 278 + snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names, 279 + "%s-%s", dev_name(&vp_dev->vdev.dev), name); 280 + err = request_irq(vp_dev->msix_entries[vector].vector, 281 + vring_interrupt, 0, 282 + vp_dev->msix_names[vector], vq); 283 + if (err) 284 + goto out_request_irq; 285 + info->vector = vector; 286 + ++vp_dev->msix_used_vectors; 287 + } else 288 + vector = VP_MSIX_VQ_VECTOR; 289 + 290 + if (callback && vp_dev->msix_enabled) { 291 + iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); 292 + vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); 293 + if (vector == VIRTIO_MSI_NO_VECTOR) { 294 + err = -EBUSY; 295 + goto out_assign; 296 + } 297 + } 298 + 416 299 spin_lock_irqsave(&vp_dev->lock, flags); 417 300 list_add(&info->node, &vp_dev->virtqueues); 418 301 spin_unlock_irqrestore(&vp_dev->lock, flags); 419 302 420 303 return vq; 421 304 305 + out_assign: 306 + if (info->vector != VIRTIO_MSI_NO_VECTOR) { 307 + free_irq(vp_dev->msix_entries[info->vector].vector, vq); 308 + --vp_dev->msix_used_vectors; 309 + } 310 + out_request_irq: 311 + vring_del_virtqueue(vq); 422 312 out_activate_queue: 423 313 iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); 424 314 free_pages_exact(info->queue, size); ··· 458 286 return ERR_PTR(err); 459 287 } 460 288 461 - /* the config->del_vq() implementation */ 462 289 static void vp_del_vq(struct virtqueue *vq) 463 290 { 464 291 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); 465 292 struct virtio_pci_vq_info *info = vq->priv; 466 293 unsigned long size; 467 294 295 + iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); 296 + 297 + if (info->vector != VIRTIO_MSI_NO_VECTOR) 298 + free_irq(vp_dev->msix_entries[info->vector].vector, vq); 299 + 300 + if (vp_dev->msix_enabled) { 301 + iowrite16(VIRTIO_MSI_NO_VECTOR, 302 + vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); 303 + /* Flush the write out to device */ 304 + ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); 305 + } 306 + 468 307 vring_del_virtqueue(vq); 469 308 470 309 /* Select and deactivate the queue */ 471 - iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); 472 310 iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); 473 311 474 312 size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN)); ··· 486 304 kfree(info); 487 305 } 488 306 307 + /* the config->del_vqs() implementation */ 489 308 static void vp_del_vqs(struct virtio_device *vdev) 490 309 { 491 310 struct virtqueue *vq, *n; 492 311 493 312 list_for_each_entry_safe(vq, n, &vdev->vqs, list) 494 313 vp_del_vq(vq); 314 + 315 + vp_free_vectors(vdev); 495 316 } 496 317 318 + /* the config->find_vqs() implementation */ 497 319 static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, 498 320 struct virtqueue *vqs[], 499 321 vq_callback_t *callbacks[], 500 322 const char *names[]) 501 323 { 502 - int i; 324 + int vectors = 0; 325 + int i, err; 326 + 327 + /* How many vectors would we like? */ 328 + for (i = 0; i < nvqs; ++i) 329 + if (callbacks[i]) 330 + ++vectors; 331 + 332 + err = vp_request_vectors(vdev, vectors); 333 + if (err) 334 + goto error_request; 503 335 504 336 for (i = 0; i < nvqs; ++i) { 505 337 vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i]); 506 338 if (IS_ERR(vqs[i])) 507 - goto error; 339 + goto error_find; 508 340 } 509 341 return 0; 510 342 511 - error: 343 + error_find: 512 344 vp_del_vqs(vdev); 345 + 346 + error_request: 513 347 return PTR_ERR(vqs[i]); 514 348 } 515 349 ··· 547 349 struct virtio_pci_device *vp_dev = to_vp_device(dev); 548 350 struct pci_dev *pci_dev = vp_dev->pci_dev; 549 351 550 - free_irq(pci_dev->irq, vp_dev); 352 + vp_del_vqs(dev); 551 353 pci_set_drvdata(pci_dev, NULL); 552 354 pci_iounmap(pci_dev, vp_dev->ioaddr); 553 355 pci_release_regions(pci_dev); ··· 606 408 vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; 607 409 vp_dev->vdev.id.device = pci_dev->subsystem_device; 608 410 609 - /* register a handler for the queue with the PCI device's interrupt */ 610 - err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, 611 - dev_name(&vp_dev->vdev.dev), vp_dev); 612 - if (err) 613 - goto out_set_drvdata; 614 - 615 411 /* finally register the virtio device */ 616 412 err = register_virtio_device(&vp_dev->vdev); 617 413 if (err) 618 - goto out_req_irq; 414 + goto out_set_drvdata; 619 415 620 416 return 0; 621 417 622 - out_req_irq: 623 - free_irq(pci_dev->irq, vp_dev); 624 418 out_set_drvdata: 625 419 pci_set_drvdata(pci_dev, NULL); 626 420 pci_iounmap(pci_dev, vp_dev->ioaddr);
+9 -1
include/linux/virtio_pci.h
··· 47 47 /* The bit of the ISR which indicates a device configuration change. */ 48 48 #define VIRTIO_PCI_ISR_CONFIG 0x2 49 49 50 + /* MSI-X registers: only enabled if MSI-X is enabled. */ 51 + /* A 16-bit vector for configuration changes. */ 52 + #define VIRTIO_MSI_CONFIG_VECTOR 20 53 + /* A 16-bit vector for selected queue notifications. */ 54 + #define VIRTIO_MSI_QUEUE_VECTOR 22 55 + /* Vector value used to disable MSI for queue */ 56 + #define VIRTIO_MSI_NO_VECTOR 0xffff 57 + 50 58 /* The remaining space is defined by each driver as the per-driver 51 59 * configuration space */ 52 - #define VIRTIO_PCI_CONFIG 20 60 + #define VIRTIO_PCI_CONFIG(dev) ((dev)->msix_enabled ? 24 : 20) 53 61 54 62 /* Virtio ABI version, this must match exactly */ 55 63 #define VIRTIO_PCI_ABI_VERSION 0