Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

block: virtio-blk: support multi virt queues per virtio-blk device

Firstly this patch supports more than one virtual queues for virtio-blk
device.

Secondly this patch maps the virtual queue to blk-mq's hardware queue.

With this approach, both scalability and performance can be improved.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>

authored by

Ming Lei and committed by
Jens Axboe
6a27b656 cb553215

+84 -20
+84 -20
drivers/block/virtio_blk.c
··· 15 15 #include <linux/numa.h> 16 16 17 17 #define PART_BITS 4 18 + #define VQ_NAME_LEN 16 18 19 19 20 static int major; 20 21 static DEFINE_IDA(vd_index_ida); 21 22 22 23 static struct workqueue_struct *virtblk_wq; 23 24 25 + struct virtio_blk_vq { 26 + struct virtqueue *vq; 27 + spinlock_t lock; 28 + char name[VQ_NAME_LEN]; 29 + } ____cacheline_aligned_in_smp; 30 + 24 31 struct virtio_blk 25 32 { 26 33 struct virtio_device *vdev; 27 - struct virtqueue *vq; 28 - spinlock_t vq_lock; 29 34 30 35 /* The disk structure for the kernel. */ 31 36 struct gendisk *disk; ··· 52 47 53 48 /* Ida index - used to track minor number allocations. */ 54 49 int index; 50 + 51 + /* num of vqs */ 52 + int num_vqs; 53 + struct virtio_blk_vq *vqs; 55 54 }; 56 55 57 56 struct virtblk_req ··· 142 133 { 143 134 struct virtio_blk *vblk = vq->vdev->priv; 144 135 bool req_done = false; 136 + int qid = vq->index; 145 137 struct virtblk_req *vbr; 146 138 unsigned long flags; 147 139 unsigned int len; 148 140 149 - spin_lock_irqsave(&vblk->vq_lock, flags); 141 + spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 150 142 do { 151 143 virtqueue_disable_cb(vq); 152 - while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { 144 + while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { 153 145 blk_mq_complete_request(vbr->req); 154 146 req_done = true; 155 147 } ··· 161 151 /* In case queue is stopped waiting for more buffers. */ 162 152 if (req_done) 163 153 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); 164 - spin_unlock_irqrestore(&vblk->vq_lock, flags); 154 + spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 165 155 } 166 156 167 157 static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) ··· 170 160 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 171 161 unsigned long flags; 172 162 unsigned int num; 163 + int qid = hctx->queue_num; 173 164 const bool last = (req->cmd_flags & REQ_END) != 0; 174 165 int err; 175 166 bool notify = false; ··· 213 202 vbr->out_hdr.type |= VIRTIO_BLK_T_IN; 214 203 } 215 204 216 - spin_lock_irqsave(&vblk->vq_lock, flags); 217 - err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num); 205 + spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 206 + err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); 218 207 if (err) { 219 - virtqueue_kick(vblk->vq); 208 + virtqueue_kick(vblk->vqs[qid].vq); 220 209 blk_mq_stop_hw_queue(hctx); 221 - spin_unlock_irqrestore(&vblk->vq_lock, flags); 210 + spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 222 211 /* Out of mem doesn't actually happen, since we fall back 223 212 * to direct descriptors */ 224 213 if (err == -ENOMEM || err == -ENOSPC) ··· 226 215 return BLK_MQ_RQ_QUEUE_ERROR; 227 216 } 228 217 229 - if (last && virtqueue_kick_prepare(vblk->vq)) 218 + if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) 230 219 notify = true; 231 - spin_unlock_irqrestore(&vblk->vq_lock, flags); 220 + spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 232 221 233 222 if (notify) 234 - virtqueue_notify(vblk->vq); 223 + virtqueue_notify(vblk->vqs[qid].vq); 235 224 return BLK_MQ_RQ_QUEUE_OK; 236 225 } 237 226 ··· 388 377 static int init_vq(struct virtio_blk *vblk) 389 378 { 390 379 int err = 0; 380 + int i; 381 + vq_callback_t **callbacks; 382 + const char **names; 383 + struct virtqueue **vqs; 384 + unsigned short num_vqs; 385 + struct virtio_device *vdev = vblk->vdev; 391 386 392 - /* We expect one virtqueue, for output. */ 393 - vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests"); 394 - if (IS_ERR(vblk->vq)) 395 - err = PTR_ERR(vblk->vq); 387 + err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, 388 + struct virtio_blk_config, num_queues, 389 + &num_vqs); 390 + if (err) 391 + num_vqs = 1; 396 392 393 + vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL); 394 + if (!vblk->vqs) { 395 + err = -ENOMEM; 396 + goto out; 397 + } 398 + 399 + names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL); 400 + if (!names) 401 + goto err_names; 402 + 403 + callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL); 404 + if (!callbacks) 405 + goto err_callbacks; 406 + 407 + vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL); 408 + if (!vqs) 409 + goto err_vqs; 410 + 411 + for (i = 0; i < num_vqs; i++) { 412 + callbacks[i] = virtblk_done; 413 + snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); 414 + names[i] = vblk->vqs[i].name; 415 + } 416 + 417 + /* Discover virtqueues and write information to configuration. */ 418 + err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names); 419 + if (err) 420 + goto err_find_vqs; 421 + 422 + for (i = 0; i < num_vqs; i++) { 423 + spin_lock_init(&vblk->vqs[i].lock); 424 + vblk->vqs[i].vq = vqs[i]; 425 + } 426 + vblk->num_vqs = num_vqs; 427 + 428 + err_find_vqs: 429 + kfree(vqs); 430 + err_vqs: 431 + kfree(callbacks); 432 + err_callbacks: 433 + kfree(names); 434 + err_names: 435 + if (err) 436 + kfree(vblk->vqs); 437 + out: 397 438 return err; 398 439 } 399 440 ··· 614 551 err = init_vq(vblk); 615 552 if (err) 616 553 goto out_free_vblk; 617 - spin_lock_init(&vblk->vq_lock); 618 554 619 555 /* FIXME: How many partitions? How long is a piece of string? */ 620 556 vblk->disk = alloc_disk(1 << PART_BITS); ··· 624 562 625 563 /* Default queue sizing is to fill the ring. */ 626 564 if (!virtblk_queue_depth) { 627 - virtblk_queue_depth = vblk->vq->num_free; 565 + virtblk_queue_depth = vblk->vqs[0].vq->num_free; 628 566 /* ... but without indirect descs, we use 2 descs per req */ 629 567 if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) 630 568 virtblk_queue_depth /= 2; ··· 632 570 633 571 memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); 634 572 vblk->tag_set.ops = &virtio_mq_ops; 635 - vblk->tag_set.nr_hw_queues = 1; 636 573 vblk->tag_set.queue_depth = virtblk_queue_depth; 637 574 vblk->tag_set.numa_node = NUMA_NO_NODE; 638 575 vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ··· 639 578 sizeof(struct virtblk_req) + 640 579 sizeof(struct scatterlist) * sg_elems; 641 580 vblk->tag_set.driver_data = vblk; 581 + vblk->tag_set.nr_hw_queues = vblk->num_vqs; 642 582 643 583 err = blk_mq_alloc_tag_set(&vblk->tag_set); 644 584 if (err) ··· 789 727 refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); 790 728 put_disk(vblk->disk); 791 729 vdev->config->del_vqs(vdev); 730 + kfree(vblk->vqs); 792 731 kfree(vblk); 793 732 794 733 /* Only free device id if we don't have any users */ ··· 840 777 static unsigned int features[] = { 841 778 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 842 779 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, 843 - VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE 780 + VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 781 + VIRTIO_BLK_F_MQ, 844 782 }; 845 783 846 784 static struct virtio_driver virtio_blk = {