Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'vhost-net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

+842 -30
+1 -8
drivers/vhost/net.c
··· 10 10 #include <linux/eventfd.h> 11 11 #include <linux/vhost.h> 12 12 #include <linux/virtio_net.h> 13 - #include <linux/mmu_context.h> 14 13 #include <linux/miscdevice.h> 15 14 #include <linux/module.h> 16 15 #include <linux/mutex.h> ··· 142 143 return; 143 144 } 144 145 145 - use_mm(net->dev.mm); 146 146 mutex_lock(&vq->mutex); 147 147 vhost_disable_notify(vq); 148 148 ··· 206 208 } 207 209 208 210 mutex_unlock(&vq->mutex); 209 - unuse_mm(net->dev.mm); 210 211 } 211 212 212 213 static int peek_head_len(struct sock *sk) ··· 310 313 if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) 311 314 return; 312 315 313 - use_mm(net->dev.mm); 314 316 mutex_lock(&vq->mutex); 315 317 vhost_disable_notify(vq); 316 318 hdr_size = vq->vhost_hlen; ··· 388 392 } 389 393 390 394 mutex_unlock(&vq->mutex); 391 - unuse_mm(net->dev.mm); 392 395 } 393 396 394 397 /* Expects to be always run from workqueue - which acts as ··· 419 424 if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) 420 425 return; 421 426 422 - use_mm(net->dev.mm); 423 427 mutex_lock(&vq->mutex); 424 428 vhost_disable_notify(vq); 425 429 vhost_hlen = vq->vhost_hlen; ··· 453 459 move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in); 454 460 else 455 461 /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF: 456 - * needed because sendmsg can modify msg_iov. */ 462 + * needed because recvmsg can modify msg_iov. */ 457 463 copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in); 458 464 msg.msg_iovlen = in; 459 465 err = sock->ops->recvmsg(NULL, sock, &msg, ··· 495 501 } 496 502 497 503 mutex_unlock(&vq->mutex); 498 - unuse_mm(net->dev.mm); 499 504 } 500 505 501 506 static void handle_rx(struct vhost_net *net)
+320
drivers/vhost/test.c
··· 1 + /* Copyright (C) 2009 Red Hat, Inc. 2 + * Author: Michael S. Tsirkin <mst@redhat.com> 3 + * 4 + * This work is licensed under the terms of the GNU GPL, version 2. 5 + * 6 + * test virtio server in host kernel. 7 + */ 8 + 9 + #include <linux/compat.h> 10 + #include <linux/eventfd.h> 11 + #include <linux/vhost.h> 12 + #include <linux/miscdevice.h> 13 + #include <linux/module.h> 14 + #include <linux/mutex.h> 15 + #include <linux/workqueue.h> 16 + #include <linux/rcupdate.h> 17 + #include <linux/file.h> 18 + #include <linux/slab.h> 19 + 20 + #include "test.h" 21 + #include "vhost.c" 22 + 23 + /* Max number of bytes transferred before requeueing the job. 24 + * Using this limit prevents one virtqueue from starving others. */ 25 + #define VHOST_TEST_WEIGHT 0x80000 26 + 27 + enum { 28 + VHOST_TEST_VQ = 0, 29 + VHOST_TEST_VQ_MAX = 1, 30 + }; 31 + 32 + struct vhost_test { 33 + struct vhost_dev dev; 34 + struct vhost_virtqueue vqs[VHOST_TEST_VQ_MAX]; 35 + }; 36 + 37 + /* Expects to be always run from workqueue - which acts as 38 + * read-size critical section for our kind of RCU. */ 39 + static void handle_vq(struct vhost_test *n) 40 + { 41 + struct vhost_virtqueue *vq = &n->dev.vqs[VHOST_TEST_VQ]; 42 + unsigned out, in; 43 + int head; 44 + size_t len, total_len = 0; 45 + void *private; 46 + 47 + private = rcu_dereference_check(vq->private_data, 1); 48 + if (!private) 49 + return; 50 + 51 + mutex_lock(&vq->mutex); 52 + vhost_disable_notify(vq); 53 + 54 + for (;;) { 55 + head = vhost_get_vq_desc(&n->dev, vq, vq->iov, 56 + ARRAY_SIZE(vq->iov), 57 + &out, &in, 58 + NULL, NULL); 59 + /* On error, stop handling until the next kick. */ 60 + if (unlikely(head < 0)) 61 + break; 62 + /* Nothing new? Wait for eventfd to tell us they refilled. */ 63 + if (head == vq->num) { 64 + if (unlikely(vhost_enable_notify(vq))) { 65 + vhost_disable_notify(vq); 66 + continue; 67 + } 68 + break; 69 + } 70 + if (in) { 71 + vq_err(vq, "Unexpected descriptor format for TX: " 72 + "out %d, int %d\n", out, in); 73 + break; 74 + } 75 + len = iov_length(vq->iov, out); 76 + /* Sanity check */ 77 + if (!len) { 78 + vq_err(vq, "Unexpected 0 len for TX\n"); 79 + break; 80 + } 81 + vhost_add_used_and_signal(&n->dev, vq, head, 0); 82 + total_len += len; 83 + if (unlikely(total_len >= VHOST_TEST_WEIGHT)) { 84 + vhost_poll_queue(&vq->poll); 85 + break; 86 + } 87 + } 88 + 89 + mutex_unlock(&vq->mutex); 90 + } 91 + 92 + static void handle_vq_kick(struct vhost_work *work) 93 + { 94 + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 95 + poll.work); 96 + struct vhost_test *n = container_of(vq->dev, struct vhost_test, dev); 97 + 98 + handle_vq(n); 99 + } 100 + 101 + static int vhost_test_open(struct inode *inode, struct file *f) 102 + { 103 + struct vhost_test *n = kmalloc(sizeof *n, GFP_KERNEL); 104 + struct vhost_dev *dev; 105 + int r; 106 + 107 + if (!n) 108 + return -ENOMEM; 109 + 110 + dev = &n->dev; 111 + n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; 112 + r = vhost_dev_init(dev, n->vqs, VHOST_TEST_VQ_MAX); 113 + if (r < 0) { 114 + kfree(n); 115 + return r; 116 + } 117 + 118 + f->private_data = n; 119 + 120 + return 0; 121 + } 122 + 123 + static void *vhost_test_stop_vq(struct vhost_test *n, 124 + struct vhost_virtqueue *vq) 125 + { 126 + void *private; 127 + 128 + mutex_lock(&vq->mutex); 129 + private = rcu_dereference_protected(vq->private_data, 130 + lockdep_is_held(&vq->mutex)); 131 + rcu_assign_pointer(vq->private_data, NULL); 132 + mutex_unlock(&vq->mutex); 133 + return private; 134 + } 135 + 136 + static void vhost_test_stop(struct vhost_test *n, void **privatep) 137 + { 138 + *privatep = vhost_test_stop_vq(n, n->vqs + VHOST_TEST_VQ); 139 + } 140 + 141 + static void vhost_test_flush_vq(struct vhost_test *n, int index) 142 + { 143 + vhost_poll_flush(&n->dev.vqs[index].poll); 144 + } 145 + 146 + static void vhost_test_flush(struct vhost_test *n) 147 + { 148 + vhost_test_flush_vq(n, VHOST_TEST_VQ); 149 + } 150 + 151 + static int vhost_test_release(struct inode *inode, struct file *f) 152 + { 153 + struct vhost_test *n = f->private_data; 154 + void *private; 155 + 156 + vhost_test_stop(n, &private); 157 + vhost_test_flush(n); 158 + vhost_dev_cleanup(&n->dev); 159 + /* We do an extra flush before freeing memory, 160 + * since jobs can re-queue themselves. */ 161 + vhost_test_flush(n); 162 + kfree(n); 163 + return 0; 164 + } 165 + 166 + static long vhost_test_run(struct vhost_test *n, int test) 167 + { 168 + void *priv, *oldpriv; 169 + struct vhost_virtqueue *vq; 170 + int r, index; 171 + 172 + if (test < 0 || test > 1) 173 + return -EINVAL; 174 + 175 + mutex_lock(&n->dev.mutex); 176 + r = vhost_dev_check_owner(&n->dev); 177 + if (r) 178 + goto err; 179 + 180 + for (index = 0; index < n->dev.nvqs; ++index) { 181 + /* Verify that ring has been setup correctly. */ 182 + if (!vhost_vq_access_ok(&n->vqs[index])) { 183 + r = -EFAULT; 184 + goto err; 185 + } 186 + } 187 + 188 + for (index = 0; index < n->dev.nvqs; ++index) { 189 + vq = n->vqs + index; 190 + mutex_lock(&vq->mutex); 191 + priv = test ? n : NULL; 192 + 193 + /* start polling new socket */ 194 + oldpriv = rcu_dereference_protected(vq->private_data, 195 + lockdep_is_held(&vq->mutex)); 196 + rcu_assign_pointer(vq->private_data, priv); 197 + 198 + mutex_unlock(&vq->mutex); 199 + 200 + if (oldpriv) { 201 + vhost_test_flush_vq(n, index); 202 + } 203 + } 204 + 205 + mutex_unlock(&n->dev.mutex); 206 + return 0; 207 + 208 + err: 209 + mutex_unlock(&n->dev.mutex); 210 + return r; 211 + } 212 + 213 + static long vhost_test_reset_owner(struct vhost_test *n) 214 + { 215 + void *priv = NULL; 216 + long err; 217 + mutex_lock(&n->dev.mutex); 218 + err = vhost_dev_check_owner(&n->dev); 219 + if (err) 220 + goto done; 221 + vhost_test_stop(n, &priv); 222 + vhost_test_flush(n); 223 + err = vhost_dev_reset_owner(&n->dev); 224 + done: 225 + mutex_unlock(&n->dev.mutex); 226 + return err; 227 + } 228 + 229 + static int vhost_test_set_features(struct vhost_test *n, u64 features) 230 + { 231 + mutex_lock(&n->dev.mutex); 232 + if ((features & (1 << VHOST_F_LOG_ALL)) && 233 + !vhost_log_access_ok(&n->dev)) { 234 + mutex_unlock(&n->dev.mutex); 235 + return -EFAULT; 236 + } 237 + n->dev.acked_features = features; 238 + smp_wmb(); 239 + vhost_test_flush(n); 240 + mutex_unlock(&n->dev.mutex); 241 + return 0; 242 + } 243 + 244 + static long vhost_test_ioctl(struct file *f, unsigned int ioctl, 245 + unsigned long arg) 246 + { 247 + struct vhost_test *n = f->private_data; 248 + void __user *argp = (void __user *)arg; 249 + u64 __user *featurep = argp; 250 + int test; 251 + u64 features; 252 + int r; 253 + switch (ioctl) { 254 + case VHOST_TEST_RUN: 255 + if (copy_from_user(&test, argp, sizeof test)) 256 + return -EFAULT; 257 + return vhost_test_run(n, test); 258 + case VHOST_GET_FEATURES: 259 + features = VHOST_FEATURES; 260 + if (copy_to_user(featurep, &features, sizeof features)) 261 + return -EFAULT; 262 + return 0; 263 + case VHOST_SET_FEATURES: 264 + if (copy_from_user(&features, featurep, sizeof features)) 265 + return -EFAULT; 266 + if (features & ~VHOST_FEATURES) 267 + return -EOPNOTSUPP; 268 + return vhost_test_set_features(n, features); 269 + case VHOST_RESET_OWNER: 270 + return vhost_test_reset_owner(n); 271 + default: 272 + mutex_lock(&n->dev.mutex); 273 + r = vhost_dev_ioctl(&n->dev, ioctl, arg); 274 + vhost_test_flush(n); 275 + mutex_unlock(&n->dev.mutex); 276 + return r; 277 + } 278 + } 279 + 280 + #ifdef CONFIG_COMPAT 281 + static long vhost_test_compat_ioctl(struct file *f, unsigned int ioctl, 282 + unsigned long arg) 283 + { 284 + return vhost_test_ioctl(f, ioctl, (unsigned long)compat_ptr(arg)); 285 + } 286 + #endif 287 + 288 + static const struct file_operations vhost_test_fops = { 289 + .owner = THIS_MODULE, 290 + .release = vhost_test_release, 291 + .unlocked_ioctl = vhost_test_ioctl, 292 + #ifdef CONFIG_COMPAT 293 + .compat_ioctl = vhost_test_compat_ioctl, 294 + #endif 295 + .open = vhost_test_open, 296 + .llseek = noop_llseek, 297 + }; 298 + 299 + static struct miscdevice vhost_test_misc = { 300 + MISC_DYNAMIC_MINOR, 301 + "vhost-test", 302 + &vhost_test_fops, 303 + }; 304 + 305 + static int vhost_test_init(void) 306 + { 307 + return misc_register(&vhost_test_misc); 308 + } 309 + module_init(vhost_test_init); 310 + 311 + static void vhost_test_exit(void) 312 + { 313 + misc_deregister(&vhost_test_misc); 314 + } 315 + module_exit(vhost_test_exit); 316 + 317 + MODULE_VERSION("0.0.1"); 318 + MODULE_LICENSE("GPL v2"); 319 + MODULE_AUTHOR("Michael S. Tsirkin"); 320 + MODULE_DESCRIPTION("Host kernel side for virtio simulator");
+7
drivers/vhost/test.h
··· 1 + #ifndef LINUX_VHOST_TEST_H 2 + #define LINUX_VHOST_TEST_H 3 + 4 + /* Start a given test on the virtio null device. 0 stops all tests. */ 5 + #define VHOST_TEST_RUN _IOW(VHOST_VIRTIO, 0x31, int) 6 + 7 + #endif
+23 -21
drivers/vhost/vhost.c
··· 15 15 #include <linux/vhost.h> 16 16 #include <linux/virtio_net.h> 17 17 #include <linux/mm.h> 18 + #include <linux/mmu_context.h> 18 19 #include <linux/miscdevice.h> 19 20 #include <linux/mutex.h> 20 21 #include <linux/rcupdate.h> ··· 29 28 #include <linux/net.h> 30 29 #include <linux/if_packet.h> 31 30 #include <linux/if_arp.h> 32 - 33 - #include <net/sock.h> 34 31 35 32 #include "vhost.h" 36 33 ··· 156 157 vq->avail_idx = 0; 157 158 vq->last_used_idx = 0; 158 159 vq->used_flags = 0; 159 - vq->used_flags = 0; 160 160 vq->log_used = false; 161 161 vq->log_addr = -1ull; 162 162 vq->vhost_hlen = 0; ··· 176 178 struct vhost_work *work = NULL; 177 179 unsigned uninitialized_var(seq); 178 180 181 + use_mm(dev->mm); 182 + 179 183 for (;;) { 180 184 /* mb paired w/ kthread_stop */ 181 185 set_current_state(TASK_INTERRUPTIBLE); ··· 192 192 if (kthread_should_stop()) { 193 193 spin_unlock_irq(&dev->work_lock); 194 194 __set_current_state(TASK_RUNNING); 195 - return 0; 195 + break; 196 196 } 197 197 if (!list_empty(&dev->work_list)) { 198 198 work = list_first_entry(&dev->work_list, ··· 210 210 schedule(); 211 211 212 212 } 213 + unuse_mm(dev->mm); 214 + return 0; 213 215 } 214 216 215 217 /* Helper to allocate iovec buffers for all vqs. */ ··· 404 402 kfree(rcu_dereference_protected(dev->memory, 405 403 lockdep_is_held(&dev->mutex))); 406 404 RCU_INIT_POINTER(dev->memory, NULL); 407 - if (dev->mm) 408 - mmput(dev->mm); 409 - dev->mm = NULL; 410 - 411 405 WARN_ON(!list_empty(&dev->work_list)); 412 406 if (dev->worker) { 413 407 kthread_stop(dev->worker); 414 408 dev->worker = NULL; 415 409 } 410 + if (dev->mm) 411 + mmput(dev->mm); 412 + dev->mm = NULL; 416 413 } 417 414 418 415 static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) ··· 882 881 static int log_write(void __user *log_base, 883 882 u64 write_address, u64 write_length) 884 883 { 884 + u64 write_page = write_address / VHOST_PAGE_SIZE; 885 885 int r; 886 886 if (!write_length) 887 887 return 0; 888 - write_address /= VHOST_PAGE_SIZE; 888 + write_length += write_address % VHOST_PAGE_SIZE; 889 889 for (;;) { 890 890 u64 base = (u64)(unsigned long)log_base; 891 - u64 log = base + write_address / 8; 892 - int bit = write_address % 8; 891 + u64 log = base + write_page / 8; 892 + int bit = write_page % 8; 893 893 if ((u64)(unsigned long)log != log) 894 894 return -EFAULT; 895 895 r = set_bit_to_user(bit, (void __user *)(unsigned long)log); ··· 899 897 if (write_length <= VHOST_PAGE_SIZE) 900 898 break; 901 899 write_length -= VHOST_PAGE_SIZE; 902 - write_address += VHOST_PAGE_SIZE; 900 + write_page += 1; 903 901 } 904 902 return r; 905 903 } ··· 1094 1092 1095 1093 /* Check it isn't doing very strange things with descriptor numbers. */ 1096 1094 last_avail_idx = vq->last_avail_idx; 1097 - if (unlikely(get_user(vq->avail_idx, &vq->avail->idx))) { 1095 + if (unlikely(__get_user(vq->avail_idx, &vq->avail->idx))) { 1098 1096 vq_err(vq, "Failed to access avail idx at %p\n", 1099 1097 &vq->avail->idx); 1100 1098 return -EFAULT; ··· 1115 1113 1116 1114 /* Grab the next descriptor number they're advertising, and increment 1117 1115 * the index we've seen. */ 1118 - if (unlikely(get_user(head, 1119 - &vq->avail->ring[last_avail_idx % vq->num]))) { 1116 + if (unlikely(__get_user(head, 1117 + &vq->avail->ring[last_avail_idx % vq->num]))) { 1120 1118 vq_err(vq, "Failed to read head: idx %d address %p\n", 1121 1119 last_avail_idx, 1122 1120 &vq->avail->ring[last_avail_idx % vq->num]); ··· 1215 1213 /* The virtqueue contains a ring of used buffers. Get a pointer to the 1216 1214 * next entry in that used ring. */ 1217 1215 used = &vq->used->ring[vq->last_used_idx % vq->num]; 1218 - if (put_user(head, &used->id)) { 1216 + if (__put_user(head, &used->id)) { 1219 1217 vq_err(vq, "Failed to write used id"); 1220 1218 return -EFAULT; 1221 1219 } 1222 - if (put_user(len, &used->len)) { 1220 + if (__put_user(len, &used->len)) { 1223 1221 vq_err(vq, "Failed to write used len"); 1224 1222 return -EFAULT; 1225 1223 } 1226 1224 /* Make sure buffer is written before we update index. */ 1227 1225 smp_wmb(); 1228 - if (put_user(vq->last_used_idx + 1, &vq->used->idx)) { 1226 + if (__put_user(vq->last_used_idx + 1, &vq->used->idx)) { 1229 1227 vq_err(vq, "Failed to increment used idx"); 1230 1228 return -EFAULT; 1231 1229 } ··· 1257 1255 1258 1256 start = vq->last_used_idx % vq->num; 1259 1257 used = vq->used->ring + start; 1260 - if (copy_to_user(used, heads, count * sizeof *used)) { 1258 + if (__copy_to_user(used, heads, count * sizeof *used)) { 1261 1259 vq_err(vq, "Failed to write used"); 1262 1260 return -EFAULT; 1263 1261 } ··· 1318 1316 * interrupts. */ 1319 1317 smp_mb(); 1320 1318 1321 - if (get_user(flags, &vq->avail->flags)) { 1319 + if (__get_user(flags, &vq->avail->flags)) { 1322 1320 vq_err(vq, "Failed to get flags"); 1323 1321 return; 1324 1322 } ··· 1369 1367 /* They could have slipped one in as we were doing that: make 1370 1368 * sure it's written, then check again. */ 1371 1369 smp_mb(); 1372 - r = get_user(avail_idx, &vq->avail->idx); 1370 + r = __get_user(avail_idx, &vq->avail->idx); 1373 1371 if (r) { 1374 1372 vq_err(vq, "Failed to check avail idx at %p: %d\n", 1375 1373 &vq->avail->idx, r);
+1 -1
drivers/vhost/vhost.h
··· 102 102 * flush the vhost_work instead of synchronize_rcu. Therefore readers do 103 103 * not need to call rcu_read_lock/rcu_read_unlock: the beginning of 104 104 * vhost_work execution acts instead of rcu_read_lock() and the end of 105 - * vhost_work execution acts instead of rcu_read_lock(). 105 + * vhost_work execution acts instead of rcu_read_unlock(). 106 106 * Writers use virtqueue mutex. */ 107 107 void __rcu *private_data; 108 108 /* Log write descriptors */
+12
tools/virtio/Makefile
··· 1 + all: test mod 2 + test: virtio_test 3 + virtio_test: virtio_ring.o virtio_test.o 4 + CFLAGS += -g -O2 -Wall -I. -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -MMD 5 + vpath %.c ../../drivers/virtio 6 + mod: 7 + ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test 8 + .PHONY: all test mod clean 9 + clean: 10 + ${RM} *.o vhost_test/*.o vhost_test/.*.cmd \ 11 + vhost_test/Module.symvers vhost_test/modules.order *.d 12 + -include *.d
+2
tools/virtio/linux/device.h
··· 1 + #ifndef LINUX_DEVICE_H 2 + #endif
+2
tools/virtio/linux/slab.h
··· 1 + #ifndef LINUX_SLAB_H 2 + #endif
+223
tools/virtio/linux/virtio.h
··· 1 + #ifndef LINUX_VIRTIO_H 2 + #define LINUX_VIRTIO_H 3 + 4 + #include <stdbool.h> 5 + #include <stdlib.h> 6 + #include <stddef.h> 7 + #include <stdio.h> 8 + #include <string.h> 9 + #include <assert.h> 10 + 11 + #include <linux/types.h> 12 + #include <errno.h> 13 + 14 + typedef unsigned long long dma_addr_t; 15 + 16 + struct scatterlist { 17 + unsigned long page_link; 18 + unsigned int offset; 19 + unsigned int length; 20 + dma_addr_t dma_address; 21 + }; 22 + 23 + struct page { 24 + unsigned long long dummy; 25 + }; 26 + 27 + #define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond)) 28 + 29 + /* Physical == Virtual */ 30 + #define virt_to_phys(p) ((unsigned long)p) 31 + #define phys_to_virt(a) ((void *)(unsigned long)(a)) 32 + /* Page address: Virtual / 4K */ 33 + #define virt_to_page(p) ((struct page*)((virt_to_phys(p) / 4096) * \ 34 + sizeof(struct page))) 35 + #define offset_in_page(p) (((unsigned long)p) % 4096) 36 + #define sg_phys(sg) ((sg->page_link & ~0x3) / sizeof(struct page) * 4096 + \ 37 + sg->offset) 38 + static inline void sg_mark_end(struct scatterlist *sg) 39 + { 40 + /* 41 + * Set termination bit, clear potential chain bit 42 + */ 43 + sg->page_link |= 0x02; 44 + sg->page_link &= ~0x01; 45 + } 46 + static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents) 47 + { 48 + memset(sgl, 0, sizeof(*sgl) * nents); 49 + sg_mark_end(&sgl[nents - 1]); 50 + } 51 + static inline void sg_assign_page(struct scatterlist *sg, struct page *page) 52 + { 53 + unsigned long page_link = sg->page_link & 0x3; 54 + 55 + /* 56 + * In order for the low bit stealing approach to work, pages 57 + * must be aligned at a 32-bit boundary as a minimum. 58 + */ 59 + BUG_ON((unsigned long) page & 0x03); 60 + sg->page_link = page_link | (unsigned long) page; 61 + } 62 + 63 + static inline void sg_set_page(struct scatterlist *sg, struct page *page, 64 + unsigned int len, unsigned int offset) 65 + { 66 + sg_assign_page(sg, page); 67 + sg->offset = offset; 68 + sg->length = len; 69 + } 70 + 71 + static inline void sg_set_buf(struct scatterlist *sg, const void *buf, 72 + unsigned int buflen) 73 + { 74 + sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); 75 + } 76 + 77 + static inline void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen) 78 + { 79 + sg_init_table(sg, 1); 80 + sg_set_buf(sg, buf, buflen); 81 + } 82 + 83 + typedef __u16 u16; 84 + 85 + typedef enum { 86 + GFP_KERNEL, 87 + GFP_ATOMIC, 88 + } gfp_t; 89 + typedef enum { 90 + IRQ_NONE, 91 + IRQ_HANDLED 92 + } irqreturn_t; 93 + 94 + static inline void *kmalloc(size_t s, gfp_t gfp) 95 + { 96 + return malloc(s); 97 + } 98 + 99 + static inline void kfree(void *p) 100 + { 101 + free(p); 102 + } 103 + 104 + #define container_of(ptr, type, member) ({ \ 105 + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ 106 + (type *)( (char *)__mptr - offsetof(type,member) );}) 107 + 108 + #define uninitialized_var(x) x = x 109 + 110 + # ifndef likely 111 + # define likely(x) (__builtin_expect(!!(x), 1)) 112 + # endif 113 + # ifndef unlikely 114 + # define unlikely(x) (__builtin_expect(!!(x), 0)) 115 + # endif 116 + 117 + #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) 118 + #ifdef DEBUG 119 + #define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__) 120 + #else 121 + #define pr_debug(format, ...) do {} while (0) 122 + #endif 123 + #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) 124 + #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) 125 + 126 + /* TODO: empty stubs for now. Broken but enough for virtio_ring.c */ 127 + #define list_add_tail(a, b) do {} while (0) 128 + #define list_del(a) do {} while (0) 129 + 130 + #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) 131 + #define BITS_PER_BYTE 8 132 + #define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE) 133 + #define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) 134 + /* TODO: Not atomic as it should be: 135 + * we don't use this for anything important. */ 136 + static inline void clear_bit(int nr, volatile unsigned long *addr) 137 + { 138 + unsigned long mask = BIT_MASK(nr); 139 + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); 140 + 141 + *p &= ~mask; 142 + } 143 + 144 + static inline int test_bit(int nr, const volatile unsigned long *addr) 145 + { 146 + return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); 147 + } 148 + 149 + /* The only feature we care to support */ 150 + #define virtio_has_feature(dev, feature) \ 151 + test_bit((feature), (dev)->features) 152 + /* end of stubs */ 153 + 154 + struct virtio_device { 155 + void *dev; 156 + unsigned long features[1]; 157 + }; 158 + 159 + struct virtqueue { 160 + /* TODO: commented as list macros are empty stubs for now. 161 + * Broken but enough for virtio_ring.c 162 + * struct list_head list; */ 163 + void (*callback)(struct virtqueue *vq); 164 + const char *name; 165 + struct virtio_device *vdev; 166 + void *priv; 167 + }; 168 + 169 + #define EXPORT_SYMBOL_GPL(__EXPORT_SYMBOL_GPL_name) \ 170 + void __EXPORT_SYMBOL_GPL##__EXPORT_SYMBOL_GPL_name() { \ 171 + } 172 + #define MODULE_LICENSE(__MODULE_LICENSE_value) \ 173 + const char *__MODULE_LICENSE_name = __MODULE_LICENSE_value 174 + 175 + #define CONFIG_SMP 176 + 177 + #if defined(__i386__) || defined(__x86_64__) 178 + #define barrier() asm volatile("" ::: "memory") 179 + #define mb() __sync_synchronize() 180 + 181 + #define smp_mb() mb() 182 + # define smp_rmb() barrier() 183 + # define smp_wmb() barrier() 184 + #else 185 + #error Please fill in barrier macros 186 + #endif 187 + 188 + /* Interfaces exported by virtio_ring. */ 189 + int virtqueue_add_buf_gfp(struct virtqueue *vq, 190 + struct scatterlist sg[], 191 + unsigned int out_num, 192 + unsigned int in_num, 193 + void *data, 194 + gfp_t gfp); 195 + 196 + static inline int virtqueue_add_buf(struct virtqueue *vq, 197 + struct scatterlist sg[], 198 + unsigned int out_num, 199 + unsigned int in_num, 200 + void *data) 201 + { 202 + return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC); 203 + } 204 + 205 + void virtqueue_kick(struct virtqueue *vq); 206 + 207 + void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); 208 + 209 + void virtqueue_disable_cb(struct virtqueue *vq); 210 + 211 + bool virtqueue_enable_cb(struct virtqueue *vq); 212 + 213 + void *virtqueue_detach_unused_buf(struct virtqueue *vq); 214 + struct virtqueue *vring_new_virtqueue(unsigned int num, 215 + unsigned int vring_align, 216 + struct virtio_device *vdev, 217 + void *pages, 218 + void (*notify)(struct virtqueue *vq), 219 + void (*callback)(struct virtqueue *vq), 220 + const char *name); 221 + void vring_del_virtqueue(struct virtqueue *vq); 222 + 223 + #endif
+2
tools/virtio/vhost_test/Makefile
··· 1 + obj-m += vhost_test.o 2 + EXTRA_CFLAGS += -Idrivers/vhost
+1
tools/virtio/vhost_test/vhost_test.c
··· 1 + #include "test.c"
+248
tools/virtio/virtio_test.c
··· 1 + #define _GNU_SOURCE 2 + #include <getopt.h> 3 + #include <string.h> 4 + #include <poll.h> 5 + #include <sys/eventfd.h> 6 + #include <stdlib.h> 7 + #include <assert.h> 8 + #include <unistd.h> 9 + #include <sys/ioctl.h> 10 + #include <sys/stat.h> 11 + #include <sys/types.h> 12 + #include <fcntl.h> 13 + #include <linux/vhost.h> 14 + #include <linux/virtio.h> 15 + #include <linux/virtio_ring.h> 16 + #include "../../drivers/vhost/test.h" 17 + 18 + struct vq_info { 19 + int kick; 20 + int call; 21 + int num; 22 + int idx; 23 + void *ring; 24 + /* copy used for control */ 25 + struct vring vring; 26 + struct virtqueue *vq; 27 + }; 28 + 29 + struct vdev_info { 30 + struct virtio_device vdev; 31 + int control; 32 + struct pollfd fds[1]; 33 + struct vq_info vqs[1]; 34 + int nvqs; 35 + void *buf; 36 + size_t buf_size; 37 + struct vhost_memory *mem; 38 + }; 39 + 40 + void vq_notify(struct virtqueue *vq) 41 + { 42 + struct vq_info *info = vq->priv; 43 + unsigned long long v = 1; 44 + int r; 45 + r = write(info->kick, &v, sizeof v); 46 + assert(r == sizeof v); 47 + } 48 + 49 + void vq_callback(struct virtqueue *vq) 50 + { 51 + } 52 + 53 + 54 + void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info) 55 + { 56 + struct vhost_vring_state state = { .index = info->idx }; 57 + struct vhost_vring_file file = { .index = info->idx }; 58 + unsigned long long features = dev->vdev.features[0]; 59 + struct vhost_vring_addr addr = { 60 + .index = info->idx, 61 + .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc, 62 + .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail, 63 + .used_user_addr = (uint64_t)(unsigned long)info->vring.used, 64 + }; 65 + int r; 66 + r = ioctl(dev->control, VHOST_SET_FEATURES, &features); 67 + assert(r >= 0); 68 + state.num = info->vring.num; 69 + r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state); 70 + assert(r >= 0); 71 + state.num = 0; 72 + r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state); 73 + assert(r >= 0); 74 + r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr); 75 + assert(r >= 0); 76 + file.fd = info->kick; 77 + r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file); 78 + assert(r >= 0); 79 + file.fd = info->call; 80 + r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file); 81 + assert(r >= 0); 82 + } 83 + 84 + static void vq_info_add(struct vdev_info *dev, int num) 85 + { 86 + struct vq_info *info = &dev->vqs[dev->nvqs]; 87 + int r; 88 + info->idx = dev->nvqs; 89 + info->kick = eventfd(0, EFD_NONBLOCK); 90 + info->call = eventfd(0, EFD_NONBLOCK); 91 + r = posix_memalign(&info->ring, 4096, vring_size(num, 4096)); 92 + assert(r >= 0); 93 + memset(info->ring, 0, vring_size(num, 4096)); 94 + vring_init(&info->vring, num, info->ring, 4096); 95 + info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring, 96 + vq_notify, vq_callback, "test"); 97 + assert(info->vq); 98 + info->vq->priv = info; 99 + vhost_vq_setup(dev, info); 100 + dev->fds[info->idx].fd = info->call; 101 + dev->fds[info->idx].events = POLLIN; 102 + dev->nvqs++; 103 + } 104 + 105 + static void vdev_info_init(struct vdev_info* dev, unsigned long long features) 106 + { 107 + int r; 108 + memset(dev, 0, sizeof *dev); 109 + dev->vdev.features[0] = features; 110 + dev->vdev.features[1] = features >> 32; 111 + dev->buf_size = 1024; 112 + dev->buf = malloc(dev->buf_size); 113 + assert(dev->buf); 114 + dev->control = open("/dev/vhost-test", O_RDWR); 115 + assert(dev->control >= 0); 116 + r = ioctl(dev->control, VHOST_SET_OWNER, NULL); 117 + assert(r >= 0); 118 + dev->mem = malloc(offsetof(struct vhost_memory, regions) + 119 + sizeof dev->mem->regions[0]); 120 + assert(dev->mem); 121 + memset(dev->mem, 0, offsetof(struct vhost_memory, regions) + 122 + sizeof dev->mem->regions[0]); 123 + dev->mem->nregions = 1; 124 + dev->mem->regions[0].guest_phys_addr = (long)dev->buf; 125 + dev->mem->regions[0].userspace_addr = (long)dev->buf; 126 + dev->mem->regions[0].memory_size = dev->buf_size; 127 + r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem); 128 + assert(r >= 0); 129 + } 130 + 131 + /* TODO: this is pretty bad: we get a cache line bounce 132 + * for the wait queue on poll and another one on read, 133 + * plus the read which is there just to clear the 134 + * current state. */ 135 + static void wait_for_interrupt(struct vdev_info *dev) 136 + { 137 + int i; 138 + unsigned long long val; 139 + poll(dev->fds, dev->nvqs, -1); 140 + for (i = 0; i < dev->nvqs; ++i) 141 + if (dev->fds[i].revents & POLLIN) { 142 + read(dev->fds[i].fd, &val, sizeof val); 143 + } 144 + } 145 + 146 + static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs) 147 + { 148 + struct scatterlist sl; 149 + long started = 0, completed = 0; 150 + long completed_before; 151 + int r, test = 1; 152 + unsigned len; 153 + long long spurious = 0; 154 + r = ioctl(dev->control, VHOST_TEST_RUN, &test); 155 + assert(r >= 0); 156 + for (;;) { 157 + virtqueue_disable_cb(vq->vq); 158 + completed_before = completed; 159 + do { 160 + if (started < bufs) { 161 + sg_init_one(&sl, dev->buf, dev->buf_size); 162 + r = virtqueue_add_buf(vq->vq, &sl, 1, 0, 163 + dev->buf + started); 164 + if (likely(r >= 0)) { 165 + ++started; 166 + virtqueue_kick(vq->vq); 167 + } 168 + } else 169 + r = -1; 170 + 171 + /* Flush out completed bufs if any */ 172 + if (virtqueue_get_buf(vq->vq, &len)) { 173 + ++completed; 174 + r = 0; 175 + } 176 + 177 + } while (r >= 0); 178 + if (completed == completed_before) 179 + ++spurious; 180 + assert(completed <= bufs); 181 + assert(started <= bufs); 182 + if (completed == bufs) 183 + break; 184 + if (virtqueue_enable_cb(vq->vq)) { 185 + wait_for_interrupt(dev); 186 + } 187 + } 188 + test = 0; 189 + r = ioctl(dev->control, VHOST_TEST_RUN, &test); 190 + assert(r >= 0); 191 + fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious); 192 + } 193 + 194 + const char optstring[] = "h"; 195 + const struct option longopts[] = { 196 + { 197 + .name = "help", 198 + .val = 'h', 199 + }, 200 + { 201 + .name = "indirect", 202 + .val = 'I', 203 + }, 204 + { 205 + .name = "no-indirect", 206 + .val = 'i', 207 + }, 208 + { 209 + } 210 + }; 211 + 212 + static void help() 213 + { 214 + fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n"); 215 + } 216 + 217 + int main(int argc, char **argv) 218 + { 219 + struct vdev_info dev; 220 + unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC; 221 + int o; 222 + 223 + for (;;) { 224 + o = getopt_long(argc, argv, optstring, longopts, NULL); 225 + switch (o) { 226 + case -1: 227 + goto done; 228 + case '?': 229 + help(); 230 + exit(2); 231 + case 'h': 232 + help(); 233 + goto done; 234 + case 'i': 235 + features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); 236 + break; 237 + default: 238 + assert(0); 239 + break; 240 + } 241 + } 242 + 243 + done: 244 + vdev_info_init(&dev, features); 245 + vq_info_add(&dev, 256); 246 + run_test(&dev, &dev.vqs[0], 0x100000); 247 + return 0; 248 + }