Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vhost/vsock: add IOTLB API support

This patch enables the IOTLB API support for vhost-vsock devices,
allowing the userspace to emulate an IOMMU for the guest.

These changes were made following vhost-net, in details this patch:
- exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb
device if the feature is acked
- implements VHOST_GET_BACKEND_FEATURES and
VHOST_SET_BACKEND_FEATURES ioctls
- calls vq_meta_prefetch() before vq processing to prefetch vq
metadata address in IOTLB
- provides .read_iter, .write_iter, and .poll callbacks for the
chardev; they are used by the userspace to exchange IOTLB messages

This patch was tested specifying "intel_iommu=strict" in the guest
kernel command line. I used QEMU with a patch applied [1] to fix a
simple issue (that patch was merged in QEMU v5.2.0):
$ qemu -M q35,accel=kvm,kernel-irqchip=split \
-drive file=fedora.qcow2,format=qcow2,if=virtio \
-device intel-iommu,intremap=on,device-iotlb=on \
-device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on

[1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20201223143638.123417-1-sgarzare@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>

authored by

Stefano Garzarella and committed by
Michael S. Tsirkin
e13a6915 418eddef

+65 -3
+65 -3
drivers/vhost/vsock.c
··· 30 30 #define VHOST_VSOCK_PKT_WEIGHT 256 31 31 32 32 enum { 33 - VHOST_VSOCK_FEATURES = VHOST_FEATURES, 33 + VHOST_VSOCK_FEATURES = VHOST_FEATURES | 34 + (1ULL << VIRTIO_F_ACCESS_PLATFORM) 35 + }; 36 + 37 + enum { 38 + VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) 34 39 }; 35 40 36 41 /* Used to track all the vhost_vsock instances on the system. */ ··· 97 92 mutex_lock(&vq->mutex); 98 93 99 94 if (!vhost_vq_get_backend(vq)) 95 + goto out; 96 + 97 + if (!vq_meta_prefetch(vq)) 100 98 goto out; 101 99 102 100 /* Avoid further vmexits, we're already processing the virtqueue */ ··· 457 449 if (!vhost_vq_get_backend(vq)) 458 450 goto out; 459 451 452 + if (!vq_meta_prefetch(vq)) 453 + goto out; 454 + 460 455 vhost_disable_notify(&vsock->dev, vq); 461 456 do { 462 457 u32 len; ··· 777 766 mutex_lock(&vsock->dev.mutex); 778 767 if ((features & (1 << VHOST_F_LOG_ALL)) && 779 768 !vhost_log_access_ok(&vsock->dev)) { 780 - mutex_unlock(&vsock->dev.mutex); 781 - return -EFAULT; 769 + goto err; 770 + } 771 + 772 + if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) { 773 + if (vhost_init_device_iotlb(&vsock->dev, true)) 774 + goto err; 782 775 } 783 776 784 777 for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { ··· 793 778 } 794 779 mutex_unlock(&vsock->dev.mutex); 795 780 return 0; 781 + 782 + err: 783 + mutex_unlock(&vsock->dev.mutex); 784 + return -EFAULT; 796 785 } 797 786 798 787 static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, ··· 830 811 if (copy_from_user(&features, argp, sizeof(features))) 831 812 return -EFAULT; 832 813 return vhost_vsock_set_features(vsock, features); 814 + case VHOST_GET_BACKEND_FEATURES: 815 + features = VHOST_VSOCK_BACKEND_FEATURES; 816 + if (copy_to_user(argp, &features, sizeof(features))) 817 + return -EFAULT; 818 + return 0; 819 + case VHOST_SET_BACKEND_FEATURES: 820 + if (copy_from_user(&features, argp, sizeof(features))) 821 + return -EFAULT; 822 + if (features & ~VHOST_VSOCK_BACKEND_FEATURES) 823 + return -EOPNOTSUPP; 824 + vhost_set_backend_features(&vsock->dev, features); 825 + return 0; 833 826 default: 834 827 mutex_lock(&vsock->dev.mutex); 835 828 r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); ··· 854 823 } 855 824 } 856 825 826 + static ssize_t vhost_vsock_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) 827 + { 828 + struct file *file = iocb->ki_filp; 829 + struct vhost_vsock *vsock = file->private_data; 830 + struct vhost_dev *dev = &vsock->dev; 831 + int noblock = file->f_flags & O_NONBLOCK; 832 + 833 + return vhost_chr_read_iter(dev, to, noblock); 834 + } 835 + 836 + static ssize_t vhost_vsock_chr_write_iter(struct kiocb *iocb, 837 + struct iov_iter *from) 838 + { 839 + struct file *file = iocb->ki_filp; 840 + struct vhost_vsock *vsock = file->private_data; 841 + struct vhost_dev *dev = &vsock->dev; 842 + 843 + return vhost_chr_write_iter(dev, from); 844 + } 845 + 846 + static __poll_t vhost_vsock_chr_poll(struct file *file, poll_table *wait) 847 + { 848 + struct vhost_vsock *vsock = file->private_data; 849 + struct vhost_dev *dev = &vsock->dev; 850 + 851 + return vhost_chr_poll(file, dev, wait); 852 + } 853 + 857 854 static const struct file_operations vhost_vsock_fops = { 858 855 .owner = THIS_MODULE, 859 856 .open = vhost_vsock_dev_open, ··· 889 830 .llseek = noop_llseek, 890 831 .unlocked_ioctl = vhost_vsock_dev_ioctl, 891 832 .compat_ioctl = compat_ptr_ioctl, 833 + .read_iter = vhost_vsock_chr_read_iter, 834 + .write_iter = vhost_vsock_chr_write_iter, 835 + .poll = vhost_vsock_chr_poll, 892 836 }; 893 837 894 838 static struct miscdevice vhost_vsock_misc = {