Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

virtio: Add improved queue allocation API

This leaves vring_new_virtqueue alone for compatbility, but it
adds two new improved APIs:

vring_create_virtqueue: Creates a virtqueue backed by automatically
allocated coherent memory. (Some day it this could be extended to
support non-coherent memory, too, if there ends up being a platform
on which it's worthwhile.)

__vring_new_virtqueue: Creates a virtqueue with a manually-specified
layout. This should allow mic_virtio to work much more cleanly.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

authored by

Andy Lutomirski and committed by
Michael S. Tsirkin
2a2d1382 780bc790

+236 -33
+180 -31
drivers/virtio/virtio_ring.c
··· 95 95 /* How to notify other side. FIXME: commonalize hcalls! */ 96 96 bool (*notify)(struct virtqueue *vq); 97 97 98 + /* DMA, allocation, and size information */ 99 + bool we_own_ring; 100 + size_t queue_size_in_bytes; 101 + dma_addr_t queue_dma_addr; 102 + 98 103 #ifdef DEBUG 99 104 /* They're supposed to lock for us. */ 100 105 unsigned int in_use; ··· 883 878 } 884 879 EXPORT_SYMBOL_GPL(vring_interrupt); 885 880 886 - struct virtqueue *vring_new_virtqueue(unsigned int index, 887 - unsigned int num, 888 - unsigned int vring_align, 889 - struct virtio_device *vdev, 890 - bool weak_barriers, 891 - void *pages, 892 - bool (*notify)(struct virtqueue *), 893 - void (*callback)(struct virtqueue *), 894 - const char *name) 881 + struct virtqueue *__vring_new_virtqueue(unsigned int index, 882 + struct vring vring, 883 + struct virtio_device *vdev, 884 + bool weak_barriers, 885 + bool (*notify)(struct virtqueue *), 886 + void (*callback)(struct virtqueue *), 887 + const char *name) 895 888 { 896 - struct vring_virtqueue *vq; 897 889 unsigned int i; 890 + struct vring_virtqueue *vq; 898 891 899 - /* We assume num is a power of 2. */ 900 - if (num & (num - 1)) { 901 - dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 902 - return NULL; 903 - } 904 - 905 - vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state), 892 + vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state), 906 893 GFP_KERNEL); 907 894 if (!vq) 908 895 return NULL; 909 896 910 - vring_init(&vq->vring, num, pages, vring_align); 897 + vq->vring = vring; 911 898 vq->vq.callback = callback; 912 899 vq->vq.vdev = vdev; 913 900 vq->vq.name = name; 914 - vq->vq.num_free = num; 901 + vq->vq.num_free = vring.num; 915 902 vq->vq.index = index; 903 + vq->we_own_ring = false; 904 + vq->queue_dma_addr = 0; 905 + vq->queue_size_in_bytes = 0; 916 906 vq->notify = notify; 917 907 vq->weak_barriers = weak_barriers; 918 908 vq->broken = false; ··· 932 932 933 933 /* Put everything in free lists. */ 934 934 vq->free_head = 0; 935 - for (i = 0; i < num-1; i++) 935 + for (i = 0; i < vring.num-1; i++) 936 936 vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 937 - memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state)); 937 + memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state)); 938 938 939 939 return &vq->vq; 940 940 } 941 + EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 942 + 943 + static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 944 + dma_addr_t *dma_handle, gfp_t flag) 945 + { 946 + if (vring_use_dma_api(vdev)) { 947 + return dma_alloc_coherent(vdev->dev.parent, size, 948 + dma_handle, flag); 949 + } else { 950 + void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 951 + if (queue) { 952 + phys_addr_t phys_addr = virt_to_phys(queue); 953 + *dma_handle = (dma_addr_t)phys_addr; 954 + 955 + /* 956 + * Sanity check: make sure we dind't truncate 957 + * the address. The only arches I can find that 958 + * have 64-bit phys_addr_t but 32-bit dma_addr_t 959 + * are certain non-highmem MIPS and x86 960 + * configurations, but these configurations 961 + * should never allocate physical pages above 32 962 + * bits, so this is fine. Just in case, throw a 963 + * warning and abort if we end up with an 964 + * unrepresentable address. 965 + */ 966 + if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 967 + free_pages_exact(queue, PAGE_ALIGN(size)); 968 + return NULL; 969 + } 970 + } 971 + return queue; 972 + } 973 + } 974 + 975 + static void vring_free_queue(struct virtio_device *vdev, size_t size, 976 + void *queue, dma_addr_t dma_handle) 977 + { 978 + if (vring_use_dma_api(vdev)) { 979 + dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 980 + } else { 981 + free_pages_exact(queue, PAGE_ALIGN(size)); 982 + } 983 + } 984 + 985 + struct virtqueue *vring_create_virtqueue( 986 + unsigned int index, 987 + unsigned int num, 988 + unsigned int vring_align, 989 + struct virtio_device *vdev, 990 + bool weak_barriers, 991 + bool may_reduce_num, 992 + bool (*notify)(struct virtqueue *), 993 + void (*callback)(struct virtqueue *), 994 + const char *name) 995 + { 996 + struct virtqueue *vq; 997 + void *queue; 998 + dma_addr_t dma_addr; 999 + size_t queue_size_in_bytes; 1000 + struct vring vring; 1001 + 1002 + /* We assume num is a power of 2. */ 1003 + if (num & (num - 1)) { 1004 + dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 1005 + return NULL; 1006 + } 1007 + 1008 + /* TODO: allocate each queue chunk individually */ 1009 + for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 1010 + queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1011 + &dma_addr, 1012 + GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1013 + if (queue) 1014 + break; 1015 + } 1016 + 1017 + if (!num) 1018 + return NULL; 1019 + 1020 + if (!queue) { 1021 + /* Try to get a single page. You are my only hope! */ 1022 + queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1023 + &dma_addr, GFP_KERNEL|__GFP_ZERO); 1024 + } 1025 + if (!queue) 1026 + return NULL; 1027 + 1028 + queue_size_in_bytes = vring_size(num, vring_align); 1029 + vring_init(&vring, num, queue, vring_align); 1030 + 1031 + vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, 1032 + notify, callback, name); 1033 + if (!vq) { 1034 + vring_free_queue(vdev, queue_size_in_bytes, queue, 1035 + dma_addr); 1036 + return NULL; 1037 + } 1038 + 1039 + to_vvq(vq)->queue_dma_addr = dma_addr; 1040 + to_vvq(vq)->queue_size_in_bytes = queue_size_in_bytes; 1041 + to_vvq(vq)->we_own_ring = true; 1042 + 1043 + return vq; 1044 + } 1045 + EXPORT_SYMBOL_GPL(vring_create_virtqueue); 1046 + 1047 + struct virtqueue *vring_new_virtqueue(unsigned int index, 1048 + unsigned int num, 1049 + unsigned int vring_align, 1050 + struct virtio_device *vdev, 1051 + bool weak_barriers, 1052 + void *pages, 1053 + bool (*notify)(struct virtqueue *vq), 1054 + void (*callback)(struct virtqueue *vq), 1055 + const char *name) 1056 + { 1057 + struct vring vring; 1058 + vring_init(&vring, num, pages, vring_align); 1059 + return __vring_new_virtqueue(index, vring, vdev, weak_barriers, 1060 + notify, callback, name); 1061 + } 941 1062 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 942 1063 943 - void vring_del_virtqueue(struct virtqueue *vq) 1064 + void vring_del_virtqueue(struct virtqueue *_vq) 944 1065 { 945 - list_del(&vq->list); 946 - kfree(to_vvq(vq)); 1066 + struct vring_virtqueue *vq = to_vvq(_vq); 1067 + 1068 + if (vq->we_own_ring) { 1069 + vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes, 1070 + vq->vring.desc, vq->queue_dma_addr); 1071 + } 1072 + list_del(&_vq->list); 1073 + kfree(vq); 947 1074 } 948 1075 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 949 1076 ··· 1134 1007 } 1135 1008 EXPORT_SYMBOL_GPL(virtio_break_device); 1136 1009 1137 - void *virtqueue_get_avail(struct virtqueue *_vq) 1010 + dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 1138 1011 { 1139 1012 struct vring_virtqueue *vq = to_vvq(_vq); 1140 1013 1141 - return vq->vring.avail; 1142 - } 1143 - EXPORT_SYMBOL_GPL(virtqueue_get_avail); 1014 + BUG_ON(!vq->we_own_ring); 1144 1015 1145 - void *virtqueue_get_used(struct virtqueue *_vq) 1016 + return vq->queue_dma_addr; 1017 + } 1018 + EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 1019 + 1020 + dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 1146 1021 { 1147 1022 struct vring_virtqueue *vq = to_vvq(_vq); 1148 1023 1149 - return vq->vring.used; 1024 + BUG_ON(!vq->we_own_ring); 1025 + 1026 + return vq->queue_dma_addr + 1027 + ((char *)vq->vring.avail - (char *)vq->vring.desc); 1150 1028 } 1151 - EXPORT_SYMBOL_GPL(virtqueue_get_used); 1029 + EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 1030 + 1031 + dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 1032 + { 1033 + struct vring_virtqueue *vq = to_vvq(_vq); 1034 + 1035 + BUG_ON(!vq->we_own_ring); 1036 + 1037 + return vq->queue_dma_addr + 1038 + ((char *)vq->vring.used - (char *)vq->vring.desc); 1039 + } 1040 + EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 1041 + 1042 + const struct vring *virtqueue_get_vring(struct virtqueue *vq) 1043 + { 1044 + return &to_vvq(vq)->vring; 1045 + } 1046 + EXPORT_SYMBOL_GPL(virtqueue_get_vring); 1152 1047 1153 1048 MODULE_LICENSE("GPL");
+21 -2
include/linux/virtio.h
··· 75 75 76 76 bool virtqueue_is_broken(struct virtqueue *vq); 77 77 78 - void *virtqueue_get_avail(struct virtqueue *vq); 79 - void *virtqueue_get_used(struct virtqueue *vq); 78 + const struct vring *virtqueue_get_vring(struct virtqueue *vq); 79 + dma_addr_t virtqueue_get_desc_addr(struct virtqueue *vq); 80 + dma_addr_t virtqueue_get_avail_addr(struct virtqueue *vq); 81 + dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq); 82 + 83 + /* 84 + * Legacy accessors -- in almost all cases, these are the wrong functions 85 + * to use. 86 + */ 87 + static inline void *virtqueue_get_desc(struct virtqueue *vq) 88 + { 89 + return virtqueue_get_vring(vq)->desc; 90 + } 91 + static inline void *virtqueue_get_avail(struct virtqueue *vq) 92 + { 93 + return virtqueue_get_vring(vq)->avail; 94 + } 95 + static inline void *virtqueue_get_used(struct virtqueue *vq) 96 + { 97 + return virtqueue_get_vring(vq)->used; 98 + } 80 99 81 100 /** 82 101 * virtio_device - representation of a device using virtio
+35
include/linux/virtio_ring.h
··· 59 59 struct virtio_device; 60 60 struct virtqueue; 61 61 62 + /* 63 + * Creates a virtqueue and allocates the descriptor ring. If 64 + * may_reduce_num is set, then this may allocate a smaller ring than 65 + * expected. The caller should query virtqueue_get_ring_size to learn 66 + * the actual size of the ring. 67 + */ 68 + struct virtqueue *vring_create_virtqueue(unsigned int index, 69 + unsigned int num, 70 + unsigned int vring_align, 71 + struct virtio_device *vdev, 72 + bool weak_barriers, 73 + bool may_reduce_num, 74 + bool (*notify)(struct virtqueue *vq), 75 + void (*callback)(struct virtqueue *vq), 76 + const char *name); 77 + 78 + /* Creates a virtqueue with a custom layout. */ 79 + struct virtqueue *__vring_new_virtqueue(unsigned int index, 80 + struct vring vring, 81 + struct virtio_device *vdev, 82 + bool weak_barriers, 83 + bool (*notify)(struct virtqueue *), 84 + void (*callback)(struct virtqueue *), 85 + const char *name); 86 + 87 + /* 88 + * Creates a virtqueue with a standard layout but a caller-allocated 89 + * ring. 90 + */ 62 91 struct virtqueue *vring_new_virtqueue(unsigned int index, 63 92 unsigned int num, 64 93 unsigned int vring_align, ··· 97 68 bool (*notify)(struct virtqueue *vq), 98 69 void (*callback)(struct virtqueue *vq), 99 70 const char *name); 71 + 72 + /* 73 + * Destroys a virtqueue. If created with vring_create_virtqueue, this 74 + * also frees the ring. 75 + */ 100 76 void vring_del_virtqueue(struct virtqueue *vq); 77 + 101 78 /* Filter out transport-specific feature bits. */ 102 79 void vring_transport_features(struct virtio_device *vdev); 103 80