Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

virtio-ring: store DMA metadata in desc_extra for split virtqueue

For split virtqueue, we used to depend on the address, length and
flags stored in the descriptor ring for DMA unmapping. This is unsafe
for the case since the device can manipulate the behavior of virtio
driver, IOMMU drivers and swiotlb.

For safety, maintain the DMA address, DMA length, descriptor flags and
next filed of the non indirect descriptors in vring_desc_state_extra
when DMA API is used for virtio as we did for packed virtqueue and use
those metadata for performing DMA operations. Indirect descriptors
should be safe since they are using streaming mappings.

With this the descriptor ring is write only form the view of the
driver.

This slight increase the footprint of the drive but it's not noticed
through pktgen (64B) test and netperf test in the case of virtio-net.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/20210604055350.58753-8-jasowang@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

authored by

Jason Wang and committed by
Michael S. Tsirkin
72b5e895 5bc72234

+87 -25
+87 -25
drivers/virtio/virtio_ring.c
··· 133 133 134 134 /* Per-descriptor state. */ 135 135 struct vring_desc_state_split *desc_state; 136 + struct vring_desc_extra *desc_extra; 136 137 137 138 /* DMA address and size information */ 138 139 dma_addr_t queue_dma_addr; ··· 368 367 * Split ring specific functions - *_split(). 369 368 */ 370 369 371 - static void vring_unmap_one_split(const struct vring_virtqueue *vq, 372 - struct vring_desc *desc) 370 + static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, 371 + struct vring_desc *desc) 373 372 { 374 373 u16 flags; 375 374 ··· 391 390 (flags & VRING_DESC_F_WRITE) ? 392 391 DMA_FROM_DEVICE : DMA_TO_DEVICE); 393 392 } 393 + } 394 + 395 + static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 396 + unsigned int i) 397 + { 398 + struct vring_desc_extra *extra = vq->split.desc_extra; 399 + u16 flags; 400 + 401 + if (!vq->use_dma_api) 402 + goto out; 403 + 404 + flags = extra[i].flags; 405 + 406 + if (flags & VRING_DESC_F_INDIRECT) { 407 + dma_unmap_single(vring_dma_dev(vq), 408 + extra[i].addr, 409 + extra[i].len, 410 + (flags & VRING_DESC_F_WRITE) ? 411 + DMA_FROM_DEVICE : DMA_TO_DEVICE); 412 + } else { 413 + dma_unmap_page(vring_dma_dev(vq), 414 + extra[i].addr, 415 + extra[i].len, 416 + (flags & VRING_DESC_F_WRITE) ? 417 + DMA_FROM_DEVICE : DMA_TO_DEVICE); 418 + } 419 + 420 + out: 421 + return extra[i].next; 394 422 } 395 423 396 424 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, ··· 450 420 unsigned int i, 451 421 dma_addr_t addr, 452 422 unsigned int len, 453 - u16 flags) 423 + u16 flags, 424 + bool indirect) 454 425 { 426 + struct vring_virtqueue *vring = to_vvq(vq); 427 + struct vring_desc_extra *extra = vring->split.desc_extra; 428 + u16 next; 429 + 455 430 desc[i].flags = cpu_to_virtio16(vq->vdev, flags); 456 431 desc[i].addr = cpu_to_virtio64(vq->vdev, addr); 457 432 desc[i].len = cpu_to_virtio32(vq->vdev, len); 458 433 459 - return virtio16_to_cpu(vq->vdev, desc[i].next); 434 + if (!indirect) { 435 + next = extra[i].next; 436 + desc[i].next = cpu_to_virtio16(vq->vdev, next); 437 + 438 + extra[i].addr = addr; 439 + extra[i].len = len; 440 + extra[i].flags = flags; 441 + } else 442 + next = virtio16_to_cpu(vq->vdev, desc[i].next); 443 + 444 + return next; 460 445 } 461 446 462 447 static inline int virtqueue_add_split(struct virtqueue *_vq, ··· 547 502 goto unmap_release; 548 503 549 504 prev = i; 505 + /* Note that we trust indirect descriptor 506 + * table since it use stream DMA mapping. 507 + */ 550 508 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, 551 - VRING_DESC_F_NEXT); 509 + VRING_DESC_F_NEXT, 510 + indirect); 552 511 } 553 512 } 554 513 for (; n < (out_sgs + in_sgs); n++) { ··· 562 513 goto unmap_release; 563 514 564 515 prev = i; 516 + /* Note that we trust indirect descriptor 517 + * table since it use stream DMA mapping. 518 + */ 565 519 i = virtqueue_add_desc_split(_vq, desc, i, addr, 566 520 sg->length, 567 521 VRING_DESC_F_NEXT | 568 - VRING_DESC_F_WRITE); 522 + VRING_DESC_F_WRITE, 523 + indirect); 569 524 } 570 525 } 571 526 /* Last one doesn't continue. */ 572 527 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 528 + if (!indirect && vq->use_dma_api) 529 + vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags = 530 + ~VRING_DESC_F_NEXT; 573 531 574 532 if (indirect) { 575 533 /* Now that the indirect table is filled in, map it. */ ··· 589 533 virtqueue_add_desc_split(_vq, vq->split.vring.desc, 590 534 head, addr, 591 535 total_sg * sizeof(struct vring_desc), 592 - VRING_DESC_F_INDIRECT); 536 + VRING_DESC_F_INDIRECT, 537 + false); 593 538 } 594 539 595 540 /* We're using some buffers from the free list. */ ··· 598 541 599 542 /* Update free pointer */ 600 543 if (indirect) 601 - vq->free_head = virtio16_to_cpu(_vq->vdev, 602 - vq->split.vring.desc[head].next); 544 + vq->free_head = vq->split.desc_extra[head].next; 603 545 else 604 546 vq->free_head = i; 605 547 ··· 643 587 for (n = 0; n < total_sg; n++) { 644 588 if (i == err_idx) 645 589 break; 646 - vring_unmap_one_split(vq, &desc[i]); 647 - i = virtio16_to_cpu(_vq->vdev, desc[i].next); 590 + if (indirect) { 591 + vring_unmap_one_split_indirect(vq, &desc[i]); 592 + i = virtio16_to_cpu(_vq->vdev, desc[i].next); 593 + } else 594 + i = vring_unmap_one_split(vq, i); 648 595 } 649 596 650 597 if (indirect) ··· 701 642 i = head; 702 643 703 644 while (vq->split.vring.desc[i].flags & nextflag) { 704 - vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 705 - i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); 645 + vring_unmap_one_split(vq, i); 646 + i = vq->split.desc_extra[i].next; 706 647 vq->vq.num_free++; 707 648 } 708 649 709 - vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 710 - vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, 711 - vq->free_head); 650 + vring_unmap_one_split(vq, i); 651 + vq->split.desc_extra[i].next = vq->free_head; 712 652 vq->free_head = head; 713 653 714 654 /* Plus final descriptor */ ··· 722 664 if (!indir_desc) 723 665 return; 724 666 725 - len = virtio32_to_cpu(vq->vq.vdev, 726 - vq->split.vring.desc[head].len); 667 + len = vq->split.desc_extra[head].len; 727 668 728 - BUG_ON(!(vq->split.vring.desc[head].flags & 729 - cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); 669 + BUG_ON(!(vq->split.desc_extra[head].flags & 670 + VRING_DESC_F_INDIRECT)); 730 671 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 731 672 732 673 for (j = 0; j < len / sizeof(struct vring_desc); j++) 733 - vring_unmap_one_split(vq, &indir_desc[j]); 674 + vring_unmap_one_split_indirect(vq, &indir_desc[j]); 734 675 735 676 kfree(indir_desc); 736 677 vq->split.desc_state[head].indir_desc = NULL; ··· 2165 2108 void (*callback)(struct virtqueue *), 2166 2109 const char *name) 2167 2110 { 2168 - unsigned int i; 2169 2111 struct vring_virtqueue *vq; 2170 2112 2171 2113 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) ··· 2220 2164 if (!vq->split.desc_state) 2221 2165 goto err_state; 2222 2166 2167 + vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num); 2168 + if (!vq->split.desc_extra) 2169 + goto err_extra; 2170 + 2223 2171 /* Put everything in free lists. */ 2224 2172 vq->free_head = 0; 2225 - for (i = 0; i < vring.num-1; i++) 2226 - vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 2227 2173 memset(vq->split.desc_state, 0, vring.num * 2228 2174 sizeof(struct vring_desc_state_split)); 2229 2175 2230 2176 list_add_tail(&vq->vq.list, &vdev->vqs); 2231 2177 return &vq->vq; 2232 2178 2179 + err_extra: 2180 + kfree(vq->split.desc_state); 2233 2181 err_state: 2234 2182 kfree(vq); 2235 2183 return NULL; ··· 2317 2257 vq->split.queue_dma_addr); 2318 2258 } 2319 2259 } 2320 - if (!vq->packed_ring) 2260 + if (!vq->packed_ring) { 2321 2261 kfree(vq->split.desc_state); 2262 + kfree(vq->split.desc_extra); 2263 + } 2322 2264 list_del(&_vq->list); 2323 2265 kfree(vq); 2324 2266 }