Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: Protect device ops->create and list_add with kvm->lock

KVM devices were manipulating list data structures without any form of
synchronization, and some implementations of the create operations also
suffered from a lack of synchronization.

Now when we've split the xics create operation into create and init, we
can hold the kvm->lock mutex while calling the create operation and when
manipulating the devices list.

The error path in the generic code gets slightly ugly because we have to
take the mutex again and delete the device from the list, but holding
the mutex during anon_inode_getfd or releasing/locking the mutex in the
common non-error path seemed wrong.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>

authored by

Christoffer Dall and committed by
Radim Krčmář
a28ebea2 023e9fdd

+27 -17
+5 -1
arch/arm/kvm/arm.c
··· 1009 1009 1010 1010 switch (ioctl) { 1011 1011 case KVM_CREATE_IRQCHIP: { 1012 + int ret; 1012 1013 if (!vgic_present) 1013 1014 return -ENXIO; 1014 - return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); 1015 + mutex_lock(&kvm->lock); 1016 + ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); 1017 + mutex_unlock(&kvm->lock); 1018 + return ret; 1015 1019 } 1016 1020 case KVM_ARM_SET_DEVICE_ADDR: { 1017 1021 struct kvm_arm_device_addr dev_addr;
-2
arch/powerpc/kvm/book3s_xics.c
··· 1329 1329 xics->kvm = kvm; 1330 1330 1331 1331 /* Already there ? */ 1332 - mutex_lock(&kvm->lock); 1333 1332 if (kvm->arch.xics) 1334 1333 ret = -EEXIST; 1335 1334 else 1336 1335 kvm->arch.xics = xics; 1337 - mutex_unlock(&kvm->lock); 1338 1336 1339 1337 if (ret) { 1340 1338 kfree(xics);
+6
include/linux/kvm_host.h
··· 1113 1113 /* create, destroy, and name are mandatory */ 1114 1114 struct kvm_device_ops { 1115 1115 const char *name; 1116 + 1117 + /* 1118 + * create is called holding kvm->lock and any operations not suitable 1119 + * to do while holding the lock should be deferred to init (see 1120 + * below). 1121 + */ 1116 1122 int (*create)(struct kvm_device *dev, u32 type); 1117 1123 1118 1124 /*
+4 -13
virt/kvm/arm/vgic/vgic-init.c
··· 73 73 int i, vcpu_lock_idx = -1, ret; 74 74 struct kvm_vcpu *vcpu; 75 75 76 - mutex_lock(&kvm->lock); 77 - 78 - if (irqchip_in_kernel(kvm)) { 79 - ret = -EEXIST; 80 - goto out; 81 - } 76 + if (irqchip_in_kernel(kvm)) 77 + return -EEXIST; 82 78 83 79 /* 84 80 * This function is also called by the KVM_CREATE_IRQCHIP handler, ··· 83 87 * the proper checks already. 84 88 */ 85 89 if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && 86 - !kvm_vgic_global_state.can_emulate_gicv2) { 87 - ret = -ENODEV; 88 - goto out; 89 - } 90 + !kvm_vgic_global_state.can_emulate_gicv2) 91 + return -ENODEV; 90 92 91 93 /* 92 94 * Any time a vcpu is run, vcpu_load is called which tries to grab the ··· 132 138 vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); 133 139 mutex_unlock(&vcpu->mutex); 134 140 } 135 - 136 - out: 137 - mutex_unlock(&kvm->lock); 138 141 return ret; 139 142 } 140 143
+12 -1
virt/kvm/kvm_main.c
··· 696 696 { 697 697 struct kvm_device *dev, *tmp; 698 698 699 + /* 700 + * We do not need to take the kvm->lock here, because nobody else 701 + * has a reference to the struct kvm at this point and therefore 702 + * cannot access the devices list anyhow. 703 + */ 699 704 list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) { 700 705 list_del(&dev->vm_node); 701 706 dev->ops->destroy(dev); ··· 2837 2832 dev->ops = ops; 2838 2833 dev->kvm = kvm; 2839 2834 2835 + mutex_lock(&kvm->lock); 2840 2836 ret = ops->create(dev, cd->type); 2841 2837 if (ret < 0) { 2838 + mutex_unlock(&kvm->lock); 2842 2839 kfree(dev); 2843 2840 return ret; 2844 2841 } 2842 + list_add(&dev->vm_node, &kvm->devices); 2843 + mutex_unlock(&kvm->lock); 2845 2844 2846 2845 if (ops->init) 2847 2846 ops->init(dev); ··· 2853 2844 ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); 2854 2845 if (ret < 0) { 2855 2846 ops->destroy(dev); 2847 + mutex_lock(&kvm->lock); 2848 + list_del(&dev->vm_node); 2849 + mutex_unlock(&kvm->lock); 2856 2850 return ret; 2857 2851 } 2858 2852 2859 - list_add(&dev->vm_node, &kvm->devices); 2860 2853 kvm_get_kvm(kvm); 2861 2854 cd->fd = ret; 2862 2855 return 0;