Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

+5

block/Kconfig

··· 189 189 depends on BLOCK && PCI 190 190 default y 191 191 192 + config BLK_MQ_VIRTIO 193 + bool 194 + depends on BLOCK && VIRTIO 195 + default y 196 + 192 197 source block/Kconfig.iosched

+1

block/Makefile

··· 25 25 obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o 26 26 obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o 27 27 obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o 28 + obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o 28 29 obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o 29 30 obj-$(CONFIG_BLK_WBT) += blk-wbt.o 30 31 obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o

+54

block/blk-mq-virtio.c

··· 1 + /* 2 + * Copyright (c) 2016 Christoph Hellwig. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + */ 13 + #include <linux/device.h> 14 + #include <linux/blk-mq.h> 15 + #include <linux/blk-mq-virtio.h> 16 + #include <linux/virtio_config.h> 17 + #include <linux/module.h> 18 + #include "blk-mq.h" 19 + 20 + /** 21 + * blk_mq_virtio_map_queues - provide a default queue mapping for virtio device 22 + * @set: tagset to provide the mapping for 23 + * @vdev: virtio device associated with @set. 24 + * @first_vec: first interrupt vectors to use for queues (usually 0) 25 + * 26 + * This function assumes the virtio device @vdev has at least as many available 27 + * interrupt vetors as @set has queues. It will then queuery the vector 28 + * corresponding to each queue for it's affinity mask and built queue mapping 29 + * that maps a queue to the CPUs that have irq affinity for the corresponding 30 + * vector. 31 + */ 32 + int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, 33 + struct virtio_device *vdev, int first_vec) 34 + { 35 + const struct cpumask *mask; 36 + unsigned int queue, cpu; 37 + 38 + if (!vdev->config->get_vq_affinity) 39 + goto fallback; 40 + 41 + for (queue = 0; queue < set->nr_hw_queues; queue++) { 42 + mask = vdev->config->get_vq_affinity(vdev, first_vec + queue); 43 + if (!mask) 44 + goto fallback; 45 + 46 + for_each_cpu(cpu, mask) 47 + set->mq_map[cpu] = queue; 48 + } 49 + 50 + return 0; 51 + fallback: 52 + return blk_mq_map_queues(set); 53 + } 54 + EXPORT_SYMBOL_GPL(blk_mq_virtio_map_queues);

+13 -1

drivers/block/virtio_blk.c

··· 5 5 #include <linux/hdreg.h> 6 6 #include <linux/module.h> 7 7 #include <linux/mutex.h> 8 + #include <linux/interrupt.h> 8 9 #include <linux/virtio.h> 9 10 #include <linux/virtio_blk.h> 10 11 #include <linux/scatterlist.h> ··· 13 12 #include <scsi/scsi_cmnd.h> 14 13 #include <linux/idr.h> 15 14 #include <linux/blk-mq.h> 15 + #include <linux/blk-mq-virtio.h> 16 16 #include <linux/numa.h> 17 17 18 18 #define PART_BITS 4 ··· 428 426 struct virtqueue **vqs; 429 427 unsigned short num_vqs; 430 428 struct virtio_device *vdev = vblk->vdev; 429 + struct irq_affinity desc = { 0, }; 431 430 432 431 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, 433 432 struct virtio_blk_config, num_queues, ··· 455 452 } 456 453 457 454 /* Discover virtqueues and write information to configuration. */ 458 - err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names); 455 + err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names, 456 + &desc); 459 457 if (err) 460 458 goto out; 461 459 ··· 590 586 return 0; 591 587 } 592 588 589 + static int virtblk_map_queues(struct blk_mq_tag_set *set) 590 + { 591 + struct virtio_blk *vblk = set->driver_data; 592 + 593 + return blk_mq_virtio_map_queues(set, vblk->vdev, 0); 594 + } 595 + 593 596 static struct blk_mq_ops virtio_mq_ops = { 594 597 .queue_rq = virtio_queue_rq, 595 598 .complete = virtblk_request_done, 596 599 .init_request = virtblk_init_request, 600 + .map_queues = virtblk_map_queues, 597 601 }; 598 602 599 603 static unsigned int virtblk_queue_depth;

+11 -3

drivers/char/virtio_console.c

··· 1136 1136 { 1137 1137 struct port *port; 1138 1138 struct scatterlist sg[1]; 1139 + void *data; 1140 + int ret; 1139 1141 1140 1142 if (unlikely(early_put_chars)) 1141 1143 return early_put_chars(vtermno, buf, count); ··· 1146 1144 if (!port) 1147 1145 return -EPIPE; 1148 1146 1149 - sg_init_one(sg, buf, count); 1150 - return __send_to_port(port, sg, 1, count, (void *)buf, false); 1147 + data = kmemdup(buf, count, GFP_ATOMIC); 1148 + if (!data) 1149 + return -ENOMEM; 1150 + 1151 + sg_init_one(sg, data, count); 1152 + ret = __send_to_port(port, sg, 1, count, data, false); 1153 + kfree(data); 1154 + return ret; 1151 1155 } 1152 1156 1153 1157 /* ··· 1947 1939 /* Find the queues. */ 1948 1940 err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs, 1949 1941 io_callbacks, 1950 - (const char **)io_names); 1942 + (const char **)io_names, NULL); 1951 1943 if (err) 1952 1944 goto free; 1953 1945

+1 -1

drivers/crypto/virtio/virtio_crypto_core.c

··· 120 120 } 121 121 122 122 ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, 123 - names); 123 + names, NULL); 124 124 if (ret) 125 125 goto err_find; 126 126

+1 -1

drivers/gpu/drm/virtio/virtgpu_kms.c

··· 176 176 #endif 177 177 178 178 ret = vgdev->vdev->config->find_vqs(vgdev->vdev, 2, vqs, 179 - callbacks, names); 179 + callbacks, names, NULL); 180 180 if (ret) { 181 181 DRM_ERROR("failed to find virt queues\n"); 182 182 goto err_vqs;

+1 -1

drivers/misc/mic/vop/vop_main.c

··· 374 374 static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs, 375 375 struct virtqueue *vqs[], 376 376 vq_callback_t *callbacks[], 377 - const char * const names[]) 377 + const char * const names[], struct irq_affinity *desc) 378 378 { 379 379 struct _vop_vdev *vdev = to_vopvdev(dev); 380 380 struct vop_device *vpdev = vdev->vpdev;

+2 -1

drivers/net/caif/caif_virtio.c

··· 679 679 goto err; 680 680 681 681 /* Get the TX virtio ring. This is a "guest side vring". */ 682 - err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names); 682 + err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names, 683 + NULL); 683 684 if (err) 684 685 goto err; 685 686

+1 -1

drivers/net/virtio_net.c

··· 2080 2080 } 2081 2081 2082 2082 ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, 2083 - names); 2083 + names, NULL); 2084 2084 if (ret) 2085 2085 goto err_find; 2086 2086

+2 -1

drivers/remoteproc/remoteproc_virtio.c

··· 137 137 static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs, 138 138 struct virtqueue *vqs[], 139 139 vq_callback_t *callbacks[], 140 - const char * const names[]) 140 + const char * const names[], 141 + struct irq_affinity *desc) 141 142 { 142 143 int i, ret; 143 144

+1 -1

drivers/rpmsg/virtio_rpmsg_bus.c

··· 869 869 init_waitqueue_head(&vrp->sendq); 870 870 871 871 /* We expect two virtqueues, rx and tx (and in this order) */ 872 - err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names); 872 + err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names, NULL); 873 873 if (err) 874 874 goto free_vrp; 875 875

+2 -1

drivers/s390/virtio/kvm_virtio.c

··· 255 255 static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs, 256 256 struct virtqueue *vqs[], 257 257 vq_callback_t *callbacks[], 258 - const char * const names[]) 258 + const char * const names[], 259 + struct irq_affinity *desc) 259 260 { 260 261 struct kvm_device *kdev = to_kvmdev(vdev); 261 262 int i;

+2 -1

drivers/s390/virtio/virtio_ccw.c

··· 628 628 static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, 629 629 struct virtqueue *vqs[], 630 630 vq_callback_t *callbacks[], 631 - const char * const names[]) 631 + const char * const names[], 632 + struct irq_affinity *desc) 632 633 { 633 634 struct virtio_ccw_device *vcdev = to_vc_device(vdev); 634 635 unsigned long *indicatorp = NULL;

+13 -114

drivers/scsi/virtio_scsi.c

··· 18 18 #include <linux/module.h> 19 19 #include <linux/slab.h> 20 20 #include <linux/mempool.h> 21 + #include <linux/interrupt.h> 21 22 #include <linux/virtio.h> 22 23 #include <linux/virtio_ids.h> 23 24 #include <linux/virtio_config.h> ··· 30 29 #include <scsi/scsi_cmnd.h> 31 30 #include <scsi/scsi_tcq.h> 32 31 #include <linux/seqlock.h> 32 + #include <linux/blk-mq-virtio.h> 33 33 34 34 #define VIRTIO_SCSI_MEMPOOL_SZ 64 35 35 #define VIRTIO_SCSI_EVENT_LEN 8 ··· 110 108 bool affinity_hint_set; 111 109 112 110 struct hlist_node node; 113 - struct hlist_node node_dead; 114 111 115 112 /* Protected by event_vq lock */ 116 113 bool stop_events; ··· 119 118 struct virtio_scsi_vq req_vqs[]; 120 119 }; 121 120 122 - static enum cpuhp_state virtioscsi_online; 123 121 static struct kmem_cache *virtscsi_cmd_cache; 124 122 static mempool_t *virtscsi_cmd_pool; 125 123 ··· 766 766 kfree(tgt); 767 767 } 768 768 769 + static int virtscsi_map_queues(struct Scsi_Host *shost) 770 + { 771 + struct virtio_scsi *vscsi = shost_priv(shost); 772 + 773 + return blk_mq_virtio_map_queues(&shost->tag_set, vscsi->vdev, 2); 774 + } 775 + 769 776 static struct scsi_host_template virtscsi_host_template_single = { 770 777 .module = THIS_MODULE, 771 778 .name = "Virtio SCSI HBA", ··· 808 801 .use_clustering = ENABLE_CLUSTERING, 809 802 .target_alloc = virtscsi_target_alloc, 810 803 .target_destroy = virtscsi_target_destroy, 804 + .map_queues = virtscsi_map_queues, 811 805 .track_queue_depth = 1, 812 806 }; 813 807 ··· 825 817 virtio_cwrite(vdev, struct virtio_scsi_config, fld, &__val); \ 826 818 } while(0) 827 819 828 - static void __virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) 829 - { 830 - int i; 831 - int cpu; 832 - 833 - /* In multiqueue mode, when the number of cpu is equal 834 - * to the number of request queues, we let the qeueues 835 - * to be private to one cpu by setting the affinity hint 836 - * to eliminate the contention. 837 - */ 838 - if ((vscsi->num_queues == 1 || 839 - vscsi->num_queues != num_online_cpus()) && affinity) { 840 - if (vscsi->affinity_hint_set) 841 - affinity = false; 842 - else 843 - return; 844 - } 845 - 846 - if (affinity) { 847 - i = 0; 848 - for_each_online_cpu(cpu) { 849 - virtqueue_set_affinity(vscsi->req_vqs[i].vq, cpu); 850 - i++; 851 - } 852 - 853 - vscsi->affinity_hint_set = true; 854 - } else { 855 - for (i = 0; i < vscsi->num_queues; i++) { 856 - if (!vscsi->req_vqs[i].vq) 857 - continue; 858 - 859 - virtqueue_set_affinity(vscsi->req_vqs[i].vq, -1); 860 - } 861 - 862 - vscsi->affinity_hint_set = false; 863 - } 864 - } 865 - 866 - static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) 867 - { 868 - get_online_cpus(); 869 - __virtscsi_set_affinity(vscsi, affinity); 870 - put_online_cpus(); 871 - } 872 - 873 - static int virtscsi_cpu_online(unsigned int cpu, struct hlist_node *node) 874 - { 875 - struct virtio_scsi *vscsi = hlist_entry_safe(node, struct virtio_scsi, 876 - node); 877 - __virtscsi_set_affinity(vscsi, true); 878 - return 0; 879 - } 880 - 881 - static int virtscsi_cpu_notif_add(struct virtio_scsi *vi) 882 - { 883 - int ret; 884 - 885 - ret = cpuhp_state_add_instance(virtioscsi_online, &vi->node); 886 - if (ret) 887 - return ret; 888 - 889 - ret = cpuhp_state_add_instance(CPUHP_VIRT_SCSI_DEAD, &vi->node_dead); 890 - if (ret) 891 - cpuhp_state_remove_instance(virtioscsi_online, &vi->node); 892 - return ret; 893 - } 894 - 895 - static void virtscsi_cpu_notif_remove(struct virtio_scsi *vi) 896 - { 897 - cpuhp_state_remove_instance_nocalls(virtioscsi_online, &vi->node); 898 - cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_SCSI_DEAD, 899 - &vi->node_dead); 900 - } 901 - 902 820 static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq, 903 821 struct virtqueue *vq) 904 822 { ··· 834 900 835 901 static void virtscsi_remove_vqs(struct virtio_device *vdev) 836 902 { 837 - struct Scsi_Host *sh = virtio_scsi_host(vdev); 838 - struct virtio_scsi *vscsi = shost_priv(sh); 839 - 840 - virtscsi_set_affinity(vscsi, false); 841 - 842 903 /* Stop all the virtqueues. */ 843 904 vdev->config->reset(vdev); 844 - 845 905 vdev->config->del_vqs(vdev); 846 906 } 847 907 ··· 848 920 vq_callback_t **callbacks; 849 921 const char **names; 850 922 struct virtqueue **vqs; 923 + struct irq_affinity desc = { .pre_vectors = 2 }; 851 924 852 925 num_vqs = vscsi->num_queues + VIRTIO_SCSI_VQ_BASE; 853 926 vqs = kmalloc(num_vqs * sizeof(struct virtqueue *), GFP_KERNEL); ··· 870 941 } 871 942 872 943 /* Discover virtqueues and write information to configuration. */ 873 - err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names); 944 + err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names, 945 + &desc); 874 946 if (err) 875 947 goto out; 876 948 ··· 937 1007 if (err) 938 1008 goto virtscsi_init_failed; 939 1009 940 - err = virtscsi_cpu_notif_add(vscsi); 941 - if (err) 942 - goto scsi_add_host_failed; 943 - 944 1010 cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1; 945 1011 shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue); 946 1012 shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF; ··· 991 1065 virtscsi_cancel_event_work(vscsi); 992 1066 993 1067 scsi_remove_host(shost); 994 - 995 - virtscsi_cpu_notif_remove(vscsi); 996 - 997 1068 virtscsi_remove_vqs(vdev); 998 1069 scsi_host_put(shost); 999 1070 } ··· 998 1075 #ifdef CONFIG_PM_SLEEP 999 1076 static int virtscsi_freeze(struct virtio_device *vdev) 1000 1077 { 1001 - struct Scsi_Host *sh = virtio_scsi_host(vdev); 1002 - struct virtio_scsi *vscsi = shost_priv(sh); 1003 - 1004 - virtscsi_cpu_notif_remove(vscsi); 1005 1078 virtscsi_remove_vqs(vdev); 1006 1079 return 0; 1007 1080 } ··· 1012 1093 if (err) 1013 1094 return err; 1014 1095 1015 - err = virtscsi_cpu_notif_add(vscsi); 1016 - if (err) { 1017 - vdev->config->del_vqs(vdev); 1018 - return err; 1019 - } 1020 1096 virtio_device_ready(vdev); 1021 1097 1022 1098 if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) ··· 1066 1152 pr_err("mempool_create() for virtscsi_cmd_pool failed\n"); 1067 1153 goto error; 1068 1154 } 1069 - ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, 1070 - "scsi/virtio:online", 1071 - virtscsi_cpu_online, NULL); 1072 - if (ret < 0) 1073 - goto error; 1074 - virtioscsi_online = ret; 1075 - ret = cpuhp_setup_state_multi(CPUHP_VIRT_SCSI_DEAD, "scsi/virtio:dead", 1076 - NULL, virtscsi_cpu_online); 1077 - if (ret) 1078 - goto error; 1079 1155 ret = register_virtio_driver(&virtio_scsi_driver); 1080 1156 if (ret < 0) 1081 1157 goto error; ··· 1081 1177 kmem_cache_destroy(virtscsi_cmd_cache); 1082 1178 virtscsi_cmd_cache = NULL; 1083 1179 } 1084 - if (virtioscsi_online) 1085 - cpuhp_remove_multi_state(virtioscsi_online); 1086 - cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD); 1087 1180 return ret; 1088 1181 } 1089 1182 1090 1183 static void __exit fini(void) 1091 1184 { 1092 1185 unregister_virtio_driver(&virtio_scsi_driver); 1093 - cpuhp_remove_multi_state(virtioscsi_online); 1094 - cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD); 1095 1186 mempool_destroy(virtscsi_cmd_pool); 1096 1187 kmem_cache_destroy(virtscsi_cmd_cache); 1097 1188 }

+134 -43

drivers/vhost/vhost.c

··· 282 282 } 283 283 EXPORT_SYMBOL_GPL(vhost_poll_queue); 284 284 285 + static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq) 286 + { 287 + int j; 288 + 289 + for (j = 0; j < VHOST_NUM_ADDRS; j++) 290 + vq->meta_iotlb[j] = NULL; 291 + } 292 + 293 + static void vhost_vq_meta_reset(struct vhost_dev *d) 294 + { 295 + int i; 296 + 297 + for (i = 0; i < d->nvqs; ++i) 298 + __vhost_vq_meta_reset(d->vqs[i]); 299 + } 300 + 285 301 static void vhost_vq_reset(struct vhost_dev *dev, 286 302 struct vhost_virtqueue *vq) 287 303 { ··· 328 312 vq->busyloop_timeout = 0; 329 313 vq->umem = NULL; 330 314 vq->iotlb = NULL; 315 + __vhost_vq_meta_reset(vq); 331 316 } 332 317 333 318 static int vhost_worker(void *data) ··· 708 691 return 1; 709 692 } 710 693 694 + static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq, 695 + u64 addr, unsigned int size, 696 + int type) 697 + { 698 + const struct vhost_umem_node *node = vq->meta_iotlb[type]; 699 + 700 + if (!node) 701 + return NULL; 702 + 703 + return (void *)(uintptr_t)(node->userspace_addr + addr - node->start); 704 + } 705 + 711 706 /* Can we switch to this memory table? */ 712 707 /* Caller should have device mutex but not vq mutex */ 713 708 static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, ··· 762 733 * could be access through iotlb. So -EAGAIN should 763 734 * not happen in this case. 764 735 */ 765 - /* TODO: more fast path */ 766 736 struct iov_iter t; 737 + void __user *uaddr = vhost_vq_meta_fetch(vq, 738 + (u64)(uintptr_t)to, size, 739 + VHOST_ADDR_DESC); 740 + 741 + if (uaddr) 742 + return __copy_to_user(uaddr, from, size); 743 + 767 744 ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov, 768 745 ARRAY_SIZE(vq->iotlb_iov), 769 746 VHOST_ACCESS_WO); ··· 797 762 * could be access through iotlb. So -EAGAIN should 798 763 * not happen in this case. 799 764 */ 800 - /* TODO: more fast path */ 765 + void __user *uaddr = vhost_vq_meta_fetch(vq, 766 + (u64)(uintptr_t)from, size, 767 + VHOST_ADDR_DESC); 801 768 struct iov_iter f; 769 + 770 + if (uaddr) 771 + return __copy_from_user(to, uaddr, size); 772 + 802 773 ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov, 803 774 ARRAY_SIZE(vq->iotlb_iov), 804 775 VHOST_ACCESS_RO); ··· 824 783 return ret; 825 784 } 826 785 827 - static void __user *__vhost_get_user(struct vhost_virtqueue *vq, 828 - void __user *addr, unsigned size) 786 + static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq, 787 + void __user *addr, unsigned int size, 788 + int type) 829 789 { 830 790 int ret; 831 791 832 - /* This function should be called after iotlb 833 - * prefetch, which means we're sure that vq 834 - * could be access through iotlb. So -EAGAIN should 835 - * not happen in this case. 836 - */ 837 - /* TODO: more fast path */ 838 792 ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov, 839 793 ARRAY_SIZE(vq->iotlb_iov), 840 794 VHOST_ACCESS_RO); ··· 850 814 return vq->iotlb_iov[0].iov_base; 851 815 } 852 816 853 - #define vhost_put_user(vq, x, ptr) \ 817 + /* This function should be called after iotlb 818 + * prefetch, which means we're sure that vq 819 + * could be access through iotlb. So -EAGAIN should 820 + * not happen in this case. 821 + */ 822 + static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, 823 + void *addr, unsigned int size, 824 + int type) 825 + { 826 + void __user *uaddr = vhost_vq_meta_fetch(vq, 827 + (u64)(uintptr_t)addr, size, type); 828 + if (uaddr) 829 + return uaddr; 830 + 831 + return __vhost_get_user_slow(vq, addr, size, type); 832 + } 833 + 834 + #define vhost_put_user(vq, x, ptr) \ 854 835 ({ \ 855 836 int ret = -EFAULT; \ 856 837 if (!vq->iotlb) { \ 857 838 ret = __put_user(x, ptr); \ 858 839 } else { \ 859 840 __typeof__(ptr) to = \ 860 - (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ 841 + (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ 842 + sizeof(*ptr), VHOST_ADDR_USED); \ 861 843 if (to != NULL) \ 862 844 ret = __put_user(x, to); \ 863 845 else \ ··· 884 830 ret; \ 885 831 }) 886 832 887 - #define vhost_get_user(vq, x, ptr) \ 833 + #define vhost_get_user(vq, x, ptr, type) \ 888 834 ({ \ 889 835 int ret; \ 890 836 if (!vq->iotlb) { \ 891 837 ret = __get_user(x, ptr); \ 892 838 } else { \ 893 839 __typeof__(ptr) from = \ 894 - (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ 840 + (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ 841 + sizeof(*ptr), \ 842 + type); \ 895 843 if (from != NULL) \ 896 844 ret = __get_user(x, from); \ 897 845 else \ ··· 901 845 } \ 902 846 ret; \ 903 847 }) 848 + 849 + #define vhost_get_avail(vq, x, ptr) \ 850 + vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL) 851 + 852 + #define vhost_get_used(vq, x, ptr) \ 853 + vhost_get_user(vq, x, ptr, VHOST_ADDR_USED) 904 854 905 855 static void vhost_dev_lock_vqs(struct vhost_dev *d) 906 856 { ··· 1013 951 ret = -EFAULT; 1014 952 break; 1015 953 } 954 + vhost_vq_meta_reset(dev); 1016 955 if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size, 1017 956 msg->iova + msg->size - 1, 1018 957 msg->uaddr, msg->perm)) { ··· 1023 960 vhost_iotlb_notify_vq(dev, msg); 1024 961 break; 1025 962 case VHOST_IOTLB_INVALIDATE: 963 + vhost_vq_meta_reset(dev); 1026 964 vhost_del_umem_range(dev->iotlb, msg->iova, 1027 965 msg->iova + msg->size - 1); 1028 966 break; ··· 1167 1103 sizeof *used + num * sizeof *used->ring + s); 1168 1104 } 1169 1105 1106 + static void vhost_vq_meta_update(struct vhost_virtqueue *vq, 1107 + const struct vhost_umem_node *node, 1108 + int type) 1109 + { 1110 + int access = (type == VHOST_ADDR_USED) ? 1111 + VHOST_ACCESS_WO : VHOST_ACCESS_RO; 1112 + 1113 + if (likely(node->perm & access)) 1114 + vq->meta_iotlb[type] = node; 1115 + } 1116 + 1170 1117 static int iotlb_access_ok(struct vhost_virtqueue *vq, 1171 - int access, u64 addr, u64 len) 1118 + int access, u64 addr, u64 len, int type) 1172 1119 { 1173 1120 const struct vhost_umem_node *node; 1174 1121 struct vhost_umem *umem = vq->iotlb; 1175 - u64 s = 0, size; 1122 + u64 s = 0, size, orig_addr = addr; 1123 + 1124 + if (vhost_vq_meta_fetch(vq, addr, len, type)) 1125 + return true; 1176 1126 1177 1127 while (len > s) { 1178 1128 node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, ··· 1203 1125 } 1204 1126 1205 1127 size = node->size - addr + node->start; 1128 + 1129 + if (orig_addr == addr && size >= len) 1130 + vhost_vq_meta_update(vq, node, type); 1131 + 1206 1132 s += size; 1207 1133 addr += size; 1208 1134 } ··· 1223 1141 return 1; 1224 1142 1225 1143 return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, 1226 - num * sizeof *vq->desc) && 1144 + num * sizeof(*vq->desc), VHOST_ADDR_DESC) && 1227 1145 iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail, 1228 1146 sizeof *vq->avail + 1229 - num * sizeof *vq->avail->ring + s) && 1147 + num * sizeof(*vq->avail->ring) + s, 1148 + VHOST_ADDR_AVAIL) && 1230 1149 iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used, 1231 1150 sizeof *vq->used + 1232 - num * sizeof *vq->used->ring + s); 1151 + num * sizeof(*vq->used->ring) + s, 1152 + VHOST_ADDR_USED); 1233 1153 } 1234 1154 EXPORT_SYMBOL_GPL(vq_iotlb_prefetch); 1235 1155 ··· 1812 1728 r = -EFAULT; 1813 1729 goto err; 1814 1730 } 1815 - r = vhost_get_user(vq, last_used_idx, &vq->used->idx); 1731 + r = vhost_get_used(vq, last_used_idx, &vq->used->idx); 1816 1732 if (r) { 1817 1733 vq_err(vq, "Can't access used idx at %p\n", 1818 1734 &vq->used->idx); ··· 2014 1930 2015 1931 /* Check it isn't doing very strange things with descriptor numbers. */ 2016 1932 last_avail_idx = vq->last_avail_idx; 2017 - if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) { 2018 - vq_err(vq, "Failed to access avail idx at %p\n", 2019 - &vq->avail->idx); 2020 - return -EFAULT; 1933 + 1934 + if (vq->avail_idx == vq->last_avail_idx) { 1935 + if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) { 1936 + vq_err(vq, "Failed to access avail idx at %p\n", 1937 + &vq->avail->idx); 1938 + return -EFAULT; 1939 + } 1940 + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); 1941 + 1942 + if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { 1943 + vq_err(vq, "Guest moved used index from %u to %u", 1944 + last_avail_idx, vq->avail_idx); 1945 + return -EFAULT; 1946 + } 1947 + 1948 + /* If there's nothing new since last we looked, return 1949 + * invalid. 1950 + */ 1951 + if (vq->avail_idx == last_avail_idx) 1952 + return vq->num; 1953 + 1954 + /* Only get avail ring entries after they have been 1955 + * exposed by guest. 1956 + */ 1957 + smp_rmb(); 2021 1958 } 2022 - vq->avail_idx = vhost16_to_cpu(vq, avail_idx); 2023 - 2024 - if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { 2025 - vq_err(vq, "Guest moved used index from %u to %u", 2026 - last_avail_idx, vq->avail_idx); 2027 - return -EFAULT; 2028 - } 2029 - 2030 - /* If there's nothing new since last we looked, return invalid. */ 2031 - if (vq->avail_idx == last_avail_idx) 2032 - return vq->num; 2033 - 2034 - /* Only get avail ring entries after they have been exposed by guest. */ 2035 - smp_rmb(); 2036 1959 2037 1960 /* Grab the next descriptor number they're advertising, and increment 2038 1961 * the index we've seen. */ 2039 - if (unlikely(vhost_get_user(vq, ring_head, 1962 + if (unlikely(vhost_get_avail(vq, ring_head, 2040 1963 &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { 2041 1964 vq_err(vq, "Failed to read head: idx %d address %p\n", 2042 1965 last_avail_idx, ··· 2259 2168 * with the barrier that the Guest executes when enabling 2260 2169 * interrupts. */ 2261 2170 smp_mb(); 2262 - if (vhost_get_user(vq, flags, &vq->avail->flags)) { 2171 + if (vhost_get_avail(vq, flags, &vq->avail->flags)) { 2263 2172 vq_err(vq, "Failed to get flags"); 2264 2173 return true; 2265 2174 } ··· 2286 2195 * interrupts. */ 2287 2196 smp_mb(); 2288 2197 2289 - if (vhost_get_user(vq, event, vhost_used_event(vq))) { 2198 + if (vhost_get_avail(vq, event, vhost_used_event(vq))) { 2290 2199 vq_err(vq, "Failed to get used event idx"); 2291 2200 return true; 2292 2201 } ··· 2333 2242 if (vq->avail_idx != vq->last_avail_idx) 2334 2243 return false; 2335 2244 2336 - r = vhost_get_user(vq, avail_idx, &vq->avail->idx); 2245 + r = vhost_get_avail(vq, avail_idx, &vq->avail->idx); 2337 2246 if (unlikely(r)) 2338 2247 return false; 2339 2248 vq->avail_idx = vhost16_to_cpu(vq, avail_idx); ··· 2369 2278 /* They could have slipped one in as we were doing that: make 2370 2279 * sure it's written, then check again. */ 2371 2280 smp_mb(); 2372 - r = vhost_get_user(vq, avail_idx, &vq->avail->idx); 2281 + r = vhost_get_avail(vq, avail_idx, &vq->avail->idx); 2373 2282 if (r) { 2374 2283 vq_err(vq, "Failed to check avail idx at %p: %d\n", 2375 2284 &vq->avail->idx, r);

+8

drivers/vhost/vhost.h

··· 76 76 int numem; 77 77 }; 78 78 79 + enum vhost_uaddr_type { 80 + VHOST_ADDR_DESC = 0, 81 + VHOST_ADDR_AVAIL = 1, 82 + VHOST_ADDR_USED = 2, 83 + VHOST_NUM_ADDRS = 3, 84 + }; 85 + 79 86 /* The virtqueue structure describes a queue attached to a device. */ 80 87 struct vhost_virtqueue { 81 88 struct vhost_dev *dev; ··· 93 86 struct vring_desc __user *desc; 94 87 struct vring_avail __user *avail; 95 88 struct vring_used __user *used; 89 + const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; 96 90 struct file *kick; 97 91 struct file *call; 98 92 struct file *error;

+2 -1

drivers/virtio/virtio_balloon.c

··· 413 413 * optionally stat. 414 414 */ 415 415 nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; 416 - err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names); 416 + err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names, 417 + NULL); 417 418 if (err) 418 419 return err; 419 420

+2 -1

drivers/virtio/virtio_input.c

··· 173 173 static const char * const names[] = { "events", "status" }; 174 174 int err; 175 175 176 - err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names); 176 + err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names, 177 + NULL); 177 178 if (err) 178 179 return err; 179 180 vi->evt = vqs[0];

+3 -2

drivers/virtio/virtio_mmio.c

··· 70 70 #include <linux/spinlock.h> 71 71 #include <linux/virtio.h> 72 72 #include <linux/virtio_config.h> 73 - #include <linux/virtio_mmio.h> 73 + #include <uapi/linux/virtio_mmio.h> 74 74 #include <linux/virtio_ring.h> 75 75 76 76 ··· 446 446 static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, 447 447 struct virtqueue *vqs[], 448 448 vq_callback_t *callbacks[], 449 - const char * const names[]) 449 + const char * const names[], 450 + struct irq_affinity *desc) 450 451 { 451 452 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); 452 453 unsigned int irq = platform_get_irq(vm_dev->pdev, 0);

+160 -220

drivers/virtio/virtio_pci_common.c

··· 33 33 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 34 34 int i; 35 35 36 - if (vp_dev->intx_enabled) 37 - synchronize_irq(vp_dev->pci_dev->irq); 38 - 39 - for (i = 0; i < vp_dev->msix_vectors; ++i) 36 + synchronize_irq(pci_irq_vector(vp_dev->pci_dev, 0)); 37 + for (i = 1; i < vp_dev->msix_vectors; i++) 40 38 synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); 41 39 } 42 40 ··· 60 62 static irqreturn_t vp_vring_interrupt(int irq, void *opaque) 61 63 { 62 64 struct virtio_pci_device *vp_dev = opaque; 63 - struct virtio_pci_vq_info *info; 64 65 irqreturn_t ret = IRQ_NONE; 65 - unsigned long flags; 66 + struct virtqueue *vq; 66 67 67 - spin_lock_irqsave(&vp_dev->lock, flags); 68 - list_for_each_entry(info, &vp_dev->virtqueues, node) { 69 - if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) 68 + list_for_each_entry(vq, &vp_dev->vdev.vqs, list) { 69 + if (vq->callback && vring_interrupt(irq, vq) == IRQ_HANDLED) 70 70 ret = IRQ_HANDLED; 71 71 } 72 - spin_unlock_irqrestore(&vp_dev->lock, flags); 73 72 74 73 return ret; 75 74 } ··· 97 102 return vp_vring_interrupt(irq, opaque); 98 103 } 99 104 100 - static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, 101 - bool per_vq_vectors) 105 + static void vp_remove_vqs(struct virtio_device *vdev) 102 106 { 103 107 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 104 - const char *name = dev_name(&vp_dev->vdev.dev); 105 - unsigned i, v; 106 - int err = -ENOMEM; 108 + struct virtqueue *vq, *n; 107 109 108 - vp_dev->msix_vectors = nvectors; 110 + list_for_each_entry_safe(vq, n, &vdev->vqs, list) { 111 + if (vp_dev->msix_vector_map) { 112 + int v = vp_dev->msix_vector_map[vq->index]; 109 113 110 - vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, 111 - GFP_KERNEL); 112 - if (!vp_dev->msix_names) 113 - goto error; 114 - vp_dev->msix_affinity_masks 115 - = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks, 116 - GFP_KERNEL); 117 - if (!vp_dev->msix_affinity_masks) 118 - goto error; 119 - for (i = 0; i < nvectors; ++i) 120 - if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], 121 - GFP_KERNEL)) 122 - goto error; 123 - 124 - err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors, 125 - PCI_IRQ_MSIX); 126 - if (err < 0) 127 - goto error; 128 - vp_dev->msix_enabled = 1; 129 - 130 - /* Set the vector used for configuration */ 131 - v = vp_dev->msix_used_vectors; 132 - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, 133 - "%s-config", name); 134 - err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), 135 - vp_config_changed, 0, vp_dev->msix_names[v], 136 - vp_dev); 137 - if (err) 138 - goto error; 139 - ++vp_dev->msix_used_vectors; 140 - 141 - v = vp_dev->config_vector(vp_dev, v); 142 - /* Verify we had enough resources to assign the vector */ 143 - if (v == VIRTIO_MSI_NO_VECTOR) { 144 - err = -EBUSY; 145 - goto error; 114 + if (v != VIRTIO_MSI_NO_VECTOR) 115 + free_irq(pci_irq_vector(vp_dev->pci_dev, v), 116 + vq); 117 + } 118 + vp_dev->del_vq(vq); 146 119 } 147 - 148 - if (!per_vq_vectors) { 149 - /* Shared vector for all VQs */ 150 - v = vp_dev->msix_used_vectors; 151 - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, 152 - "%s-virtqueues", name); 153 - err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), 154 - vp_vring_interrupt, 0, vp_dev->msix_names[v], 155 - vp_dev); 156 - if (err) 157 - goto error; 158 - ++vp_dev->msix_used_vectors; 159 - } 160 - return 0; 161 - error: 162 - return err; 163 - } 164 - 165 - static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index, 166 - void (*callback)(struct virtqueue *vq), 167 - const char *name, 168 - u16 msix_vec) 169 - { 170 - struct virtio_pci_device *vp_dev = to_vp_device(vdev); 171 - struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL); 172 - struct virtqueue *vq; 173 - unsigned long flags; 174 - 175 - /* fill out our structure that represents an active queue */ 176 - if (!info) 177 - return ERR_PTR(-ENOMEM); 178 - 179 - vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, msix_vec); 180 - if (IS_ERR(vq)) 181 - goto out_info; 182 - 183 - info->vq = vq; 184 - if (callback) { 185 - spin_lock_irqsave(&vp_dev->lock, flags); 186 - list_add(&info->node, &vp_dev->virtqueues); 187 - spin_unlock_irqrestore(&vp_dev->lock, flags); 188 - } else { 189 - INIT_LIST_HEAD(&info->node); 190 - } 191 - 192 - vp_dev->vqs[index] = info; 193 - return vq; 194 - 195 - out_info: 196 - kfree(info); 197 - return vq; 198 - } 199 - 200 - static void vp_del_vq(struct virtqueue *vq) 201 - { 202 - struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); 203 - struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; 204 - unsigned long flags; 205 - 206 - spin_lock_irqsave(&vp_dev->lock, flags); 207 - list_del(&info->node); 208 - spin_unlock_irqrestore(&vp_dev->lock, flags); 209 - 210 - vp_dev->del_vq(info); 211 - kfree(info); 212 120 } 213 121 214 122 /* the config->del_vqs() implementation */ 215 123 void vp_del_vqs(struct virtio_device *vdev) 216 124 { 217 125 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 218 - struct virtqueue *vq, *n; 219 126 int i; 220 127 221 - list_for_each_entry_safe(vq, n, &vdev->vqs, list) { 222 - if (vp_dev->per_vq_vectors) { 223 - int v = vp_dev->vqs[vq->index]->msix_vector; 128 + if (WARN_ON_ONCE(list_empty_careful(&vdev->vqs))) 129 + return; 224 130 225 - if (v != VIRTIO_MSI_NO_VECTOR) 226 - free_irq(pci_irq_vector(vp_dev->pci_dev, v), 227 - vq); 228 - } 229 - vp_del_vq(vq); 230 - } 231 - vp_dev->per_vq_vectors = false; 131 + vp_remove_vqs(vdev); 232 132 233 - if (vp_dev->intx_enabled) { 234 - free_irq(vp_dev->pci_dev->irq, vp_dev); 235 - vp_dev->intx_enabled = 0; 236 - } 237 - 238 - for (i = 0; i < vp_dev->msix_used_vectors; ++i) 239 - free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev); 240 - 241 - for (i = 0; i < vp_dev->msix_vectors; i++) 242 - if (vp_dev->msix_affinity_masks[i]) 133 + if (vp_dev->pci_dev->msix_enabled) { 134 + for (i = 0; i < vp_dev->msix_vectors; i++) 243 135 free_cpumask_var(vp_dev->msix_affinity_masks[i]); 244 136 245 - if (vp_dev->msix_enabled) { 246 137 /* Disable the vector used for configuration */ 247 138 vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); 248 139 249 - pci_free_irq_vectors(vp_dev->pci_dev); 250 - vp_dev->msix_enabled = 0; 140 + kfree(vp_dev->msix_affinity_masks); 141 + kfree(vp_dev->msix_names); 142 + kfree(vp_dev->msix_vector_map); 251 143 } 252 144 253 - vp_dev->msix_vectors = 0; 254 - vp_dev->msix_used_vectors = 0; 255 - kfree(vp_dev->msix_names); 256 - vp_dev->msix_names = NULL; 257 - kfree(vp_dev->msix_affinity_masks); 258 - vp_dev->msix_affinity_masks = NULL; 259 - kfree(vp_dev->vqs); 260 - vp_dev->vqs = NULL; 145 + free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); 146 + pci_free_irq_vectors(vp_dev->pci_dev); 261 147 } 262 148 263 149 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, 264 - struct virtqueue *vqs[], 265 - vq_callback_t *callbacks[], 266 - const char * const names[], 267 - bool per_vq_vectors) 150 + struct virtqueue *vqs[], vq_callback_t *callbacks[], 151 + const char * const names[], struct irq_affinity *desc) 268 152 { 269 153 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 154 + const char *name = dev_name(&vp_dev->vdev.dev); 155 + int i, err = -ENOMEM, allocated_vectors, nvectors; 156 + unsigned flags = PCI_IRQ_MSIX; 157 + bool shared = false; 270 158 u16 msix_vec; 271 - int i, err, nvectors, allocated_vectors; 272 159 273 - vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); 274 - if (!vp_dev->vqs) 275 - return -ENOMEM; 276 - 277 - if (per_vq_vectors) { 278 - /* Best option: one for change interrupt, one per vq. */ 279 - nvectors = 1; 280 - for (i = 0; i < nvqs; ++i) 281 - if (callbacks[i]) 282 - ++nvectors; 283 - } else { 284 - /* Second best: one for change, shared for all vqs. */ 285 - nvectors = 2; 160 + if (desc) { 161 + flags |= PCI_IRQ_AFFINITY; 162 + desc->pre_vectors++; /* virtio config vector */ 286 163 } 287 164 288 - err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors); 289 - if (err) 290 - goto error_find; 165 + nvectors = 1; 166 + for (i = 0; i < nvqs; i++) 167 + if (callbacks[i]) 168 + nvectors++; 291 169 292 - vp_dev->per_vq_vectors = per_vq_vectors; 293 - allocated_vectors = vp_dev->msix_used_vectors; 170 + /* Try one vector per queue first. */ 171 + err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors, 172 + nvectors, flags, desc); 173 + if (err < 0) { 174 + /* Fallback to one vector for config, one shared for queues. */ 175 + shared = true; 176 + err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2, 177 + PCI_IRQ_MSIX); 178 + if (err < 0) 179 + return err; 180 + } 181 + if (err < 0) 182 + return err; 183 + 184 + vp_dev->msix_vectors = nvectors; 185 + vp_dev->msix_names = kmalloc_array(nvectors, 186 + sizeof(*vp_dev->msix_names), GFP_KERNEL); 187 + if (!vp_dev->msix_names) 188 + goto out_free_irq_vectors; 189 + 190 + vp_dev->msix_affinity_masks = kcalloc(nvectors, 191 + sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL); 192 + if (!vp_dev->msix_affinity_masks) 193 + goto out_free_msix_names; 194 + 195 + for (i = 0; i < nvectors; ++i) { 196 + if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], 197 + GFP_KERNEL)) 198 + goto out_free_msix_affinity_masks; 199 + } 200 + 201 + /* Set the vector used for configuration */ 202 + snprintf(vp_dev->msix_names[0], sizeof(*vp_dev->msix_names), 203 + "%s-config", name); 204 + err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed, 205 + 0, vp_dev->msix_names[0], vp_dev); 206 + if (err) 207 + goto out_free_msix_affinity_masks; 208 + 209 + /* Verify we had enough resources to assign the vector */ 210 + if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) { 211 + err = -EBUSY; 212 + goto out_free_config_irq; 213 + } 214 + 215 + vp_dev->msix_vector_map = kmalloc_array(nvqs, 216 + sizeof(*vp_dev->msix_vector_map), GFP_KERNEL); 217 + if (!vp_dev->msix_vector_map) 218 + goto out_disable_config_irq; 219 + 220 + allocated_vectors = 1; /* vector 0 is the config interrupt */ 294 221 for (i = 0; i < nvqs; ++i) { 295 222 if (!names[i]) { 296 223 vqs[i] = NULL; 297 224 continue; 298 225 } 299 226 300 - if (!callbacks[i]) 301 - msix_vec = VIRTIO_MSI_NO_VECTOR; 302 - else if (vp_dev->per_vq_vectors) 303 - msix_vec = allocated_vectors++; 227 + if (callbacks[i]) 228 + msix_vec = allocated_vectors; 304 229 else 305 - msix_vec = VP_MSIX_VQ_VECTOR; 306 - vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], msix_vec); 230 + msix_vec = VIRTIO_MSI_NO_VECTOR; 231 + 232 + vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i], 233 + msix_vec); 307 234 if (IS_ERR(vqs[i])) { 308 235 err = PTR_ERR(vqs[i]); 309 - goto error_find; 236 + goto out_remove_vqs; 310 237 } 311 238 312 - if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) 239 + if (msix_vec == VIRTIO_MSI_NO_VECTOR) { 240 + vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; 313 241 continue; 242 + } 314 243 315 - /* allocate per-vq irq if available and necessary */ 316 - snprintf(vp_dev->msix_names[msix_vec], 317 - sizeof *vp_dev->msix_names, 318 - "%s-%s", 244 + snprintf(vp_dev->msix_names[i + 1], 245 + sizeof(*vp_dev->msix_names), "%s-%s", 319 246 dev_name(&vp_dev->vdev.dev), names[i]); 320 247 err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), 321 - vring_interrupt, 0, 322 - vp_dev->msix_names[msix_vec], 323 - vqs[i]); 324 - if (err) 325 - goto error_find; 248 + vring_interrupt, IRQF_SHARED, 249 + vp_dev->msix_names[i + 1], vqs[i]); 250 + if (err) { 251 + /* don't free this irq on error */ 252 + vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; 253 + goto out_remove_vqs; 254 + } 255 + vp_dev->msix_vector_map[i] = msix_vec; 256 + 257 + /* 258 + * Use a different vector for each queue if they are available, 259 + * else share the same vector for all VQs. 260 + */ 261 + if (!shared) 262 + allocated_vectors++; 326 263 } 264 + 327 265 return 0; 328 266 329 - error_find: 330 - vp_del_vqs(vdev); 267 + out_remove_vqs: 268 + vp_remove_vqs(vdev); 269 + kfree(vp_dev->msix_vector_map); 270 + out_disable_config_irq: 271 + vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); 272 + out_free_config_irq: 273 + free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); 274 + out_free_msix_affinity_masks: 275 + for (i = 0; i < nvectors; i++) { 276 + if (vp_dev->msix_affinity_masks[i]) 277 + free_cpumask_var(vp_dev->msix_affinity_masks[i]); 278 + } 279 + kfree(vp_dev->msix_affinity_masks); 280 + out_free_msix_names: 281 + kfree(vp_dev->msix_names); 282 + out_free_irq_vectors: 283 + pci_free_irq_vectors(vp_dev->pci_dev); 331 284 return err; 332 285 } 333 286 ··· 286 343 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 287 344 int i, err; 288 345 289 - vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); 290 - if (!vp_dev->vqs) 291 - return -ENOMEM; 292 - 293 346 err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, 294 347 dev_name(&vdev->dev), vp_dev); 295 348 if (err) 296 - goto out_del_vqs; 349 + return err; 297 350 298 - vp_dev->intx_enabled = 1; 299 - vp_dev->per_vq_vectors = false; 300 351 for (i = 0; i < nvqs; ++i) { 301 352 if (!names[i]) { 302 353 vqs[i] = NULL; 303 354 continue; 304 355 } 305 - vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], 356 + vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i], 306 357 VIRTIO_MSI_NO_VECTOR); 307 358 if (IS_ERR(vqs[i])) { 308 359 err = PTR_ERR(vqs[i]); 309 - goto out_del_vqs; 360 + goto out_remove_vqs; 310 361 } 311 362 } 312 363 313 364 return 0; 314 - out_del_vqs: 315 - vp_del_vqs(vdev); 365 + 366 + out_remove_vqs: 367 + vp_remove_vqs(vdev); 368 + free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); 316 369 return err; 317 370 } 318 371 319 372 /* the config->find_vqs() implementation */ 320 373 int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, 321 - struct virtqueue *vqs[], 322 - vq_callback_t *callbacks[], 323 - const char * const names[]) 374 + struct virtqueue *vqs[], vq_callback_t *callbacks[], 375 + const char * const names[], struct irq_affinity *desc) 324 376 { 325 377 int err; 326 378 327 - /* Try MSI-X with one vector per queue. */ 328 - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true); 379 + err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, desc); 329 380 if (!err) 330 381 return 0; 331 - /* Fallback: MSI-X with one vector for config, one shared for queues. */ 332 - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false); 333 - if (!err) 334 - return 0; 335 - /* Finally fall back to regular interrupts. */ 336 382 return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names); 337 383 } 338 384 ··· 341 409 { 342 410 struct virtio_device *vdev = vq->vdev; 343 411 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 344 - struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; 345 - struct cpumask *mask; 346 - unsigned int irq; 347 412 348 413 if (!vq->callback) 349 414 return -EINVAL; 350 415 351 - if (vp_dev->msix_enabled) { 352 - mask = vp_dev->msix_affinity_masks[info->msix_vector]; 353 - irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector); 416 + if (vp_dev->pci_dev->msix_enabled) { 417 + int vec = vp_dev->msix_vector_map[vq->index]; 418 + struct cpumask *mask = vp_dev->msix_affinity_masks[vec]; 419 + unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec); 420 + 354 421 if (cpu == -1) 355 422 irq_set_affinity_hint(irq, NULL); 356 423 else { ··· 359 428 } 360 429 } 361 430 return 0; 431 + } 432 + 433 + const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index) 434 + { 435 + struct virtio_pci_device *vp_dev = to_vp_device(vdev); 436 + unsigned int *map = vp_dev->msix_vector_map; 437 + 438 + if (!map || map[index] == VIRTIO_MSI_NO_VECTOR) 439 + return NULL; 440 + 441 + return pci_irq_get_affinity(vp_dev->pci_dev, map[index]); 362 442 } 363 443 364 444 #ifdef CONFIG_PM_SLEEP ··· 440 498 vp_dev->vdev.dev.parent = &pci_dev->dev; 441 499 vp_dev->vdev.dev.release = virtio_pci_release_dev; 442 500 vp_dev->pci_dev = pci_dev; 443 - INIT_LIST_HEAD(&vp_dev->virtqueues); 444 - spin_lock_init(&vp_dev->lock); 445 501 446 502 /* enable the device */ 447 503 rc = pci_enable_device(pci_dev);

+9 -41

drivers/virtio/virtio_pci_common.h

··· 31 31 #include <linux/highmem.h> 32 32 #include <linux/spinlock.h> 33 33 34 - struct virtio_pci_vq_info { 35 - /* the actual virtqueue */ 36 - struct virtqueue *vq; 37 - 38 - /* the list node for the virtqueues list */ 39 - struct list_head node; 40 - 41 - /* MSI-X vector (or none) */ 42 - unsigned msix_vector; 43 - }; 44 - 45 34 /* Our device structure */ 46 35 struct virtio_pci_device { 47 36 struct virtio_device vdev; ··· 64 75 /* the IO mapping for the PCI config space */ 65 76 void __iomem *ioaddr; 66 77 67 - /* a list of queues so we can dispatch IRQs */ 68 - spinlock_t lock; 69 - struct list_head virtqueues; 70 - 71 - /* array of all queues for house-keeping */ 72 - struct virtio_pci_vq_info **vqs; 73 - 74 - /* MSI-X support */ 75 - int msix_enabled; 76 - int intx_enabled; 77 78 cpumask_var_t *msix_affinity_masks; 78 79 /* Name strings for interrupts. This size should be enough, 79 80 * and I'm too lazy to allocate each name separately. */ 80 81 char (*msix_names)[256]; 81 - /* Number of available vectors */ 82 - unsigned msix_vectors; 83 - /* Vectors allocated, excluding per-vq vectors if any */ 84 - unsigned msix_used_vectors; 85 - 86 - /* Whether we have vector per vq */ 87 - bool per_vq_vectors; 82 + /* Total Number of MSI-X vectors (including per-VQ ones). */ 83 + int msix_vectors; 84 + /* Map of per-VQ MSI-X vectors, may be NULL */ 85 + unsigned *msix_vector_map; 88 86 89 87 struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev, 90 - struct virtio_pci_vq_info *info, 91 88 unsigned idx, 92 89 void (*callback)(struct virtqueue *vq), 93 90 const char *name, 94 91 u16 msix_vec); 95 - void (*del_vq)(struct virtio_pci_vq_info *info); 92 + void (*del_vq)(struct virtqueue *vq); 96 93 97 94 u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); 98 - }; 99 - 100 - /* Constants for MSI-X */ 101 - /* Use first vector for configuration changes, second and the rest for 102 - * virtqueues Thus, we need at least 2 vectors for MSI. */ 103 - enum { 104 - VP_MSIX_CONFIG_VECTOR = 0, 105 - VP_MSIX_VQ_VECTOR = 1, 106 95 }; 107 96 108 97 /* Convert a generic virtio device to our structure */ ··· 97 130 void vp_del_vqs(struct virtio_device *vdev); 98 131 /* the config->find_vqs() implementation */ 99 132 int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, 100 - struct virtqueue *vqs[], 101 - vq_callback_t *callbacks[], 102 - const char * const names[]); 133 + struct virtqueue *vqs[], vq_callback_t *callbacks[], 134 + const char * const names[], struct irq_affinity *desc); 103 135 const char *vp_bus_name(struct virtio_device *vdev); 104 136 105 137 /* Setup the affinity for a virtqueue: ··· 107 141 * - ignore the affinity request if we're using INTX 108 142 */ 109 143 int vp_set_vq_affinity(struct virtqueue *vq, int cpu); 144 + 145 + const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index); 110 146 111 147 #if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY) 112 148 int virtio_pci_legacy_probe(struct virtio_pci_device *);

+3 -6

drivers/virtio/virtio_pci_legacy.c

··· 112 112 } 113 113 114 114 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, 115 - struct virtio_pci_vq_info *info, 116 115 unsigned index, 117 116 void (*callback)(struct virtqueue *vq), 118 117 const char *name, ··· 128 129 num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM); 129 130 if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) 130 131 return ERR_PTR(-ENOENT); 131 - 132 - info->msix_vector = msix_vec; 133 132 134 133 /* create the vring */ 135 134 vq = vring_create_virtqueue(index, num, ··· 159 162 return ERR_PTR(err); 160 163 } 161 164 162 - static void del_vq(struct virtio_pci_vq_info *info) 165 + static void del_vq(struct virtqueue *vq) 163 166 { 164 - struct virtqueue *vq = info->vq; 165 167 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); 166 168 167 169 iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); 168 170 169 - if (vp_dev->msix_enabled) { 171 + if (vp_dev->pci_dev->msix_enabled) { 170 172 iowrite16(VIRTIO_MSI_NO_VECTOR, 171 173 vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); 172 174 /* Flush the write out to device */ ··· 190 194 .finalize_features = vp_finalize_features, 191 195 .bus_name = vp_bus_name, 192 196 .set_vq_affinity = vp_set_vq_affinity, 197 + .get_vq_affinity = vp_get_vq_affinity, 193 198 }; 194 199 195 200 /* the PCI probing function */

+7 -10

drivers/virtio/virtio_pci_modern.c

··· 293 293 } 294 294 295 295 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, 296 - struct virtio_pci_vq_info *info, 297 296 unsigned index, 298 297 void (*callback)(struct virtqueue *vq), 299 298 const char *name, ··· 321 322 322 323 /* get offset of notification word for this vq */ 323 324 off = vp_ioread16(&cfg->queue_notify_off); 324 - 325 - info->msix_vector = msix_vec; 326 325 327 326 /* create the vring */ 328 327 vq = vring_create_virtqueue(index, num, ··· 384 387 } 385 388 386 389 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, 387 - struct virtqueue *vqs[], 388 - vq_callback_t *callbacks[], 389 - const char * const names[]) 390 + struct virtqueue *vqs[], vq_callback_t *callbacks[], 391 + const char * const names[], struct irq_affinity *desc) 390 392 { 391 393 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 392 394 struct virtqueue *vq; 393 - int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names); 395 + int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, desc); 394 396 395 397 if (rc) 396 398 return rc; ··· 405 409 return 0; 406 410 } 407 411 408 - static void del_vq(struct virtio_pci_vq_info *info) 412 + static void del_vq(struct virtqueue *vq) 409 413 { 410 - struct virtqueue *vq = info->vq; 411 414 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); 412 415 413 416 vp_iowrite16(vq->index, &vp_dev->common->queue_select); 414 417 415 - if (vp_dev->msix_enabled) { 418 + if (vp_dev->pci_dev->msix_enabled) { 416 419 vp_iowrite16(VIRTIO_MSI_NO_VECTOR, 417 420 &vp_dev->common->queue_msix_vector); 418 421 /* Flush the write out to device */ ··· 437 442 .finalize_features = vp_finalize_features, 438 443 .bus_name = vp_bus_name, 439 444 .set_vq_affinity = vp_set_vq_affinity, 445 + .get_vq_affinity = vp_get_vq_affinity, 440 446 }; 441 447 442 448 static const struct virtio_config_ops virtio_pci_config_ops = { ··· 453 457 .finalize_features = vp_finalize_features, 454 458 .bus_name = vp_bus_name, 455 459 .set_vq_affinity = vp_set_vq_affinity, 460 + .get_vq_affinity = vp_get_vq_affinity, 456 461 }; 457 462 458 463 /**

+10

include/linux/blk-mq-virtio.h

··· 1 + #ifndef _LINUX_BLK_MQ_VIRTIO_H 2 + #define _LINUX_BLK_MQ_VIRTIO_H 3 + 4 + struct blk_mq_tag_set; 5 + struct virtio_device; 6 + 7 + int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, 8 + struct virtio_device *vdev, int first_vec); 9 + 10 + #endif /* _LINUX_BLK_MQ_VIRTIO_H */

-1

include/linux/cpuhotplug.h

··· 26 26 CPUHP_ARM_OMAP_WAKE_DEAD, 27 27 CPUHP_IRQ_POLL_DEAD, 28 28 CPUHP_BLOCK_SOFTIRQ_DEAD, 29 - CPUHP_VIRT_SCSI_DEAD, 30 29 CPUHP_ACPI_CPUDRV_DEAD, 31 30 CPUHP_S390_PFAULT_DEAD, 32 31 CPUHP_BLK_MQ_DEAD,

+8 -4

include/linux/virtio_config.h

··· 7 7 #include <linux/virtio_byteorder.h> 8 8 #include <uapi/linux/virtio_config.h> 9 9 10 + struct irq_affinity; 11 + 10 12 /** 11 13 * virtio_config_ops - operations for configuring a virtio device 12 14 * @get: read the value of a configuration field ··· 58 56 * This returns a pointer to the bus name a la pci_name from which 59 57 * the caller can then copy. 60 58 * @set_vq_affinity: set the affinity for a virtqueue. 59 + * @get_vq_affinity: get the affinity for a virtqueue (optional). 61 60 */ 62 61 typedef void vq_callback_t(struct virtqueue *); 63 62 struct virtio_config_ops { ··· 71 68 void (*set_status)(struct virtio_device *vdev, u8 status); 72 69 void (*reset)(struct virtio_device *vdev); 73 70 int (*find_vqs)(struct virtio_device *, unsigned nvqs, 74 - struct virtqueue *vqs[], 75 - vq_callback_t *callbacks[], 76 - const char * const names[]); 71 + struct virtqueue *vqs[], vq_callback_t *callbacks[], 72 + const char * const names[], struct irq_affinity *desc); 77 73 void (*del_vqs)(struct virtio_device *); 78 74 u64 (*get_features)(struct virtio_device *vdev); 79 75 int (*finalize_features)(struct virtio_device *vdev); 80 76 const char *(*bus_name)(struct virtio_device *vdev); 81 77 int (*set_vq_affinity)(struct virtqueue *vq, int cpu); 78 + const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev, 79 + int index); 82 80 }; 83 81 84 82 /* If driver didn't advertise the feature, it will never appear. */ ··· 173 169 vq_callback_t *callbacks[] = { c }; 174 170 const char *names[] = { n }; 175 171 struct virtqueue *vq; 176 - int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names); 172 + int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL); 177 173 if (err < 0) 178 174 return ERR_PTR(err); 179 175 return vq;

include/linux/virtio_mmio.h include/uapi/linux/virtio_mmio.h

+1

include/uapi/linux/Kbuild

··· 466 466 header-y += virtio_gpu.h 467 467 header-y += virtio_ids.h 468 468 header-y += virtio_input.h 469 + header-y += virtio_mmio.h 469 470 header-y += virtio_net.h 470 471 header-y += virtio_pci.h 471 472 header-y += virtio_ring.h

+1 -1

include/uapi/linux/virtio_pci.h

··· 79 79 * configuration space */ 80 80 #define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20) 81 81 /* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */ 82 - #define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled) 82 + #define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->pci_dev->msix_enabled) 83 83 84 84 /* Virtio ABI version, this must match exactly */ 85 85 #define VIRTIO_PCI_ABI_VERSION 0

+2 -1

net/vmw_vsock/virtio_transport.c

··· 532 532 vsock->vdev = vdev; 533 533 534 534 ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, 535 - vsock->vqs, callbacks, names); 535 + vsock->vqs, callbacks, names, 536 + NULL); 536 537 if (ret < 0) 537 538 goto out; 538 539