Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio fixes from Michael Tsirkin:
"Fixes all over the place.

This includes a couple of tests that I would normally defer, but since
they have already been helpful in catching some bugs, don't build for
any users at all, and having them upstream makes life easier for
everyone, I think it's ok even at this late stage"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
tools/virtio: Use tools/include/list.h instead of stubs
tools/virtio: Reset index in virtio_test --reset.
tools/virtio: Extract virtqueue initialization in vq_reset
tools/virtio: Use __vring_new_virtqueue in virtio_test.c
tools/virtio: Add --reset
tools/virtio: Add --batch=random option
tools/virtio: Add --batch option
virtio-mem: add memory via add_memory_driver_managed()
virtio-mem: silence a static checker warning
vhost_vdpa: Fix potential underflow in vhost_vdpa_mmap()
vdpa: fix typos in the comments for __vdpa_alloc_device()

+207 -35
+1 -1
drivers/vdpa/vdpa.c
··· 63 63 * @config: the bus operations that is supported by this device 64 64 * @size: size of the parent structure that contains private data 65 65 * 66 - * Drvier should use vdap_alloc_device() wrapper macro instead of 66 + * Driver should use vdpa_alloc_device() wrapper macro instead of 67 67 * using this directly. 68 68 * 69 69 * Returns an error when parent/config/dma_dev is not set or fail to get
+57
drivers/vhost/test.c
··· 263 263 return 0; 264 264 } 265 265 266 + static long vhost_test_set_backend(struct vhost_test *n, unsigned index, int fd) 267 + { 268 + static void *backend; 269 + 270 + const bool enable = fd != -1; 271 + struct vhost_virtqueue *vq; 272 + int r; 273 + 274 + mutex_lock(&n->dev.mutex); 275 + r = vhost_dev_check_owner(&n->dev); 276 + if (r) 277 + goto err; 278 + 279 + if (index >= VHOST_TEST_VQ_MAX) { 280 + r = -ENOBUFS; 281 + goto err; 282 + } 283 + vq = &n->vqs[index]; 284 + mutex_lock(&vq->mutex); 285 + 286 + /* Verify that ring has been setup correctly. */ 287 + if (!vhost_vq_access_ok(vq)) { 288 + r = -EFAULT; 289 + goto err_vq; 290 + } 291 + if (!enable) { 292 + vhost_poll_stop(&vq->poll); 293 + backend = vhost_vq_get_backend(vq); 294 + vhost_vq_set_backend(vq, NULL); 295 + } else { 296 + vhost_vq_set_backend(vq, backend); 297 + r = vhost_vq_init_access(vq); 298 + if (r == 0) 299 + r = vhost_poll_start(&vq->poll, vq->kick); 300 + } 301 + 302 + mutex_unlock(&vq->mutex); 303 + 304 + if (enable) { 305 + vhost_test_flush_vq(n, index); 306 + } 307 + 308 + mutex_unlock(&n->dev.mutex); 309 + return 0; 310 + 311 + err_vq: 312 + mutex_unlock(&vq->mutex); 313 + err: 314 + mutex_unlock(&n->dev.mutex); 315 + return r; 316 + } 317 + 266 318 static long vhost_test_ioctl(struct file *f, unsigned int ioctl, 267 319 unsigned long arg) 268 320 { 321 + struct vhost_vring_file backend; 269 322 struct vhost_test *n = f->private_data; 270 323 void __user *argp = (void __user *)arg; 271 324 u64 __user *featurep = argp; ··· 330 277 if (copy_from_user(&test, argp, sizeof test)) 331 278 return -EFAULT; 332 279 return vhost_test_run(n, test); 280 + case VHOST_TEST_SET_BACKEND: 281 + if (copy_from_user(&backend, argp, sizeof backend)) 282 + return -EFAULT; 283 + return vhost_test_set_backend(n, backend.index, backend.fd); 333 284 case VHOST_GET_FEATURES: 334 285 features = VHOST_FEATURES; 335 286 if (copy_to_user(featurep, &features, sizeof features))
+1
drivers/vhost/test.h
··· 4 4 5 5 /* Start a given test on the virtio null device. 0 stops all tests. */ 6 6 #define VHOST_TEST_RUN _IOW(VHOST_VIRTIO, 0x31, int) 7 + #define VHOST_TEST_SET_BACKEND _IOW(VHOST_VIRTIO, 0x32, int) 7 8 8 9 #endif
+1 -1
drivers/vhost/vdpa.c
··· 818 818 struct vdpa_device *vdpa = v->vdpa; 819 819 const struct vdpa_config_ops *ops = vdpa->config; 820 820 struct vdpa_notification_area notify; 821 - int index = vma->vm_pgoff; 821 + unsigned long index = vma->vm_pgoff; 822 822 823 823 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 824 824 return -EINVAL;
+23 -4
drivers/virtio/virtio_mem.c
··· 101 101 102 102 /* The parent resource for all memory added via this device. */ 103 103 struct resource *parent_resource; 104 + /* 105 + * Copy of "System RAM (virtio_mem)" to be used for 106 + * add_memory_driver_managed(). 107 + */ 108 + const char *resource_name; 104 109 105 110 /* Summary of all memory block states. */ 106 111 unsigned long nb_mb_state[VIRTIO_MEM_MB_STATE_COUNT]; ··· 419 414 if (nid == NUMA_NO_NODE) 420 415 nid = memory_add_physaddr_to_nid(addr); 421 416 417 + /* 418 + * When force-unloading the driver and we still have memory added to 419 + * Linux, the resource name has to stay. 420 + */ 421 + if (!vm->resource_name) { 422 + vm->resource_name = kstrdup_const("System RAM (virtio_mem)", 423 + GFP_KERNEL); 424 + if (!vm->resource_name) 425 + return -ENOMEM; 426 + } 427 + 422 428 dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id); 423 - return add_memory(nid, addr, memory_block_size_bytes()); 429 + return add_memory_driver_managed(nid, addr, memory_block_size_bytes(), 430 + vm->resource_name); 424 431 } 425 432 426 433 /* ··· 1209 1192 VIRTIO_MEM_MB_STATE_OFFLINE); 1210 1193 } 1211 1194 1212 - return rc; 1195 + return 0; 1213 1196 } 1214 1197 1215 1198 /* ··· 1907 1890 vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL] || 1908 1891 vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE] || 1909 1892 vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL] || 1910 - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE]) 1893 + vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE]) { 1911 1894 dev_warn(&vdev->dev, "device still has system memory added\n"); 1912 - else 1895 + } else { 1913 1896 virtio_mem_delete_resource(vm); 1897 + kfree_const(vm->resource_name); 1898 + } 1914 1899 1915 1900 /* remove all tracking data - no locking needed */ 1916 1901 vfree(vm->mb_state);
+1 -6
tools/virtio/linux/kernel.h
··· 11 11 12 12 #include <linux/compiler.h> 13 13 #include <linux/types.h> 14 + #include <linux/list.h> 14 15 #include <linux/printk.h> 15 16 #include <linux/bug.h> 16 17 #include <errno.h> ··· 135 134 typeof(y) _min2 = (y); \ 136 135 (void) (&_min1 == &_min2); \ 137 136 _min1 < _min2 ? _min1 : _min2; }) 138 - 139 - /* TODO: empty stubs for now. Broken but enough for virtio_ring.c */ 140 - #define list_add_tail(a, b) do {} while (0) 141 - #define list_del(a) do {} while (0) 142 - #define list_for_each_entry(a, b, c) while (0) 143 - /* end of stubs */ 144 137 145 138 #endif /* KERNEL_H */
+2 -3
tools/virtio/linux/virtio.h
··· 11 11 struct virtio_device { 12 12 struct device dev; 13 13 u64 features; 14 + struct list_head vqs; 14 15 }; 15 16 16 17 struct virtqueue { 17 - /* TODO: commented as list macros are empty stubs for now. 18 - * Broken but enough for virtio_ring.c 19 - * struct list_head list; */ 18 + struct list_head list; 20 19 void (*callback)(struct virtqueue *vq); 21 20 const char *name; 22 21 struct virtio_device *vdev;
+119 -20
tools/virtio/virtio_test.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #define _GNU_SOURCE 3 3 #include <getopt.h> 4 + #include <limits.h> 4 5 #include <string.h> 5 6 #include <poll.h> 6 7 #include <sys/eventfd.h> ··· 18 17 #include <linux/virtio.h> 19 18 #include <linux/virtio_ring.h> 20 19 #include "../../drivers/vhost/test.h" 20 + 21 + #define RANDOM_BATCH -1 21 22 22 23 /* Unused */ 23 24 void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; ··· 45 42 size_t buf_size; 46 43 struct vhost_memory *mem; 47 44 }; 45 + 46 + static const struct vhost_vring_file no_backend = { .fd = -1 }, 47 + backend = { .fd = 1 }; 48 + static const struct vhost_vring_state null_state = {}; 48 49 49 50 bool vq_notify(struct virtqueue *vq) 50 51 { ··· 95 88 assert(r >= 0); 96 89 } 97 90 91 + static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev) 92 + { 93 + if (info->vq) 94 + vring_del_virtqueue(info->vq); 95 + 96 + memset(info->ring, 0, vring_size(num, 4096)); 97 + vring_init(&info->vring, num, info->ring, 4096); 98 + info->vq = __vring_new_virtqueue(info->idx, info->vring, vdev, true, 99 + false, vq_notify, vq_callback, "test"); 100 + assert(info->vq); 101 + info->vq->priv = info; 102 + } 103 + 98 104 static void vq_info_add(struct vdev_info *dev, int num) 99 105 { 100 106 struct vq_info *info = &dev->vqs[dev->nvqs]; ··· 117 97 info->call = eventfd(0, EFD_NONBLOCK); 118 98 r = posix_memalign(&info->ring, 4096, vring_size(num, 4096)); 119 99 assert(r >= 0); 120 - memset(info->ring, 0, vring_size(num, 4096)); 121 - vring_init(&info->vring, num, info->ring, 4096); 122 - info->vq = vring_new_virtqueue(info->idx, 123 - info->vring.num, 4096, &dev->vdev, 124 - true, false, info->ring, 125 - vq_notify, vq_callback, "test"); 126 - assert(info->vq); 127 - info->vq->priv = info; 100 + vq_reset(info, num, &dev->vdev); 128 101 vhost_vq_setup(dev, info); 129 102 dev->fds[info->idx].fd = info->call; 130 103 dev->fds[info->idx].events = POLLIN; ··· 129 116 int r; 130 117 memset(dev, 0, sizeof *dev); 131 118 dev->vdev.features = features; 119 + INIT_LIST_HEAD(&dev->vdev.vqs); 132 120 dev->buf_size = 1024; 133 121 dev->buf = malloc(dev->buf_size); 134 122 assert(dev->buf); ··· 166 152 } 167 153 168 154 static void run_test(struct vdev_info *dev, struct vq_info *vq, 169 - bool delayed, int bufs) 155 + bool delayed, int batch, int reset_n, int bufs) 170 156 { 171 157 struct scatterlist sl; 172 - long started = 0, completed = 0; 173 - long completed_before; 158 + long started = 0, completed = 0, next_reset = reset_n; 159 + long completed_before, started_before; 174 160 int r, test = 1; 175 161 unsigned len; 176 162 long long spurious = 0; 163 + const bool random_batch = batch == RANDOM_BATCH; 164 + 177 165 r = ioctl(dev->control, VHOST_TEST_RUN, &test); 178 166 assert(r >= 0); 167 + if (!reset_n) { 168 + next_reset = INT_MAX; 169 + } 170 + 179 171 for (;;) { 180 172 virtqueue_disable_cb(vq->vq); 181 173 completed_before = completed; 174 + started_before = started; 182 175 do { 183 - if (started < bufs) { 176 + const bool reset = completed > next_reset; 177 + if (random_batch) 178 + batch = (random() % vq->vring.num) + 1; 179 + 180 + while (started < bufs && 181 + (started - completed) < batch) { 184 182 sg_init_one(&sl, dev->buf, dev->buf_size); 185 183 r = virtqueue_add_outbuf(vq->vq, &sl, 1, 186 184 dev->buf + started, 187 185 GFP_ATOMIC); 188 - if (likely(r == 0)) { 189 - ++started; 190 - if (unlikely(!virtqueue_kick(vq->vq))) 186 + if (unlikely(r != 0)) { 187 + if (r == -ENOSPC && 188 + started > started_before) 189 + r = 0; 190 + else 191 191 r = -1; 192 + break; 192 193 } 193 - } else 194 + 195 + ++started; 196 + 197 + if (unlikely(!virtqueue_kick(vq->vq))) { 198 + r = -1; 199 + break; 200 + } 201 + } 202 + 203 + if (started >= bufs) 194 204 r = -1; 195 205 206 + if (reset) { 207 + r = ioctl(dev->control, VHOST_TEST_SET_BACKEND, 208 + &no_backend); 209 + assert(!r); 210 + } 211 + 196 212 /* Flush out completed bufs if any */ 197 - if (virtqueue_get_buf(vq->vq, &len)) { 213 + while (virtqueue_get_buf(vq->vq, &len)) { 198 214 ++completed; 199 215 r = 0; 200 216 } 201 217 218 + if (reset) { 219 + struct vhost_vring_state s = { .index = 0 }; 220 + 221 + vq_reset(vq, vq->vring.num, &dev->vdev); 222 + 223 + r = ioctl(dev->control, VHOST_GET_VRING_BASE, 224 + &s); 225 + assert(!r); 226 + 227 + s.num = 0; 228 + r = ioctl(dev->control, VHOST_SET_VRING_BASE, 229 + &null_state); 230 + assert(!r); 231 + 232 + r = ioctl(dev->control, VHOST_TEST_SET_BACKEND, 233 + &backend); 234 + assert(!r); 235 + 236 + started = completed; 237 + while (completed > next_reset) 238 + next_reset += completed; 239 + } 202 240 } while (r == 0); 203 - if (completed == completed_before) 241 + if (completed == completed_before && started == started_before) 204 242 ++spurious; 205 243 assert(completed <= bufs); 206 244 assert(started <= bufs); ··· 269 203 test = 0; 270 204 r = ioctl(dev->control, VHOST_TEST_RUN, &test); 271 205 assert(r >= 0); 272 - fprintf(stderr, "spurious wakeups: 0x%llx\n", spurious); 206 + fprintf(stderr, 207 + "spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n", 208 + spurious, started, completed); 273 209 } 274 210 275 211 const char optstring[] = "h"; ··· 313 245 .val = 'd', 314 246 }, 315 247 { 248 + .name = "batch", 249 + .val = 'b', 250 + .has_arg = required_argument, 251 + }, 252 + { 253 + .name = "reset", 254 + .val = 'r', 255 + .has_arg = optional_argument, 256 + }, 257 + { 316 258 } 317 259 }; 318 260 ··· 333 255 " [--no-event-idx]" 334 256 " [--no-virtio-1]" 335 257 " [--delayed-interrupt]" 258 + " [--batch=random/N]" 259 + " [--reset=N]" 336 260 "\n"); 337 261 } 338 262 ··· 343 263 struct vdev_info dev; 344 264 unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | 345 265 (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1); 266 + long batch = 1, reset = 0; 346 267 int o; 347 268 bool delayed = false; 348 269 ··· 370 289 case 'D': 371 290 delayed = true; 372 291 break; 292 + case 'b': 293 + if (0 == strcmp(optarg, "random")) { 294 + batch = RANDOM_BATCH; 295 + } else { 296 + batch = strtol(optarg, NULL, 10); 297 + assert(batch > 0); 298 + assert(batch < (long)INT_MAX + 1); 299 + } 300 + break; 301 + case 'r': 302 + if (!optarg) { 303 + reset = 1; 304 + } else { 305 + reset = strtol(optarg, NULL, 10); 306 + assert(reset > 0); 307 + assert(reset < (long)INT_MAX + 1); 308 + } 309 + break; 373 310 default: 374 311 assert(0); 375 312 break; ··· 397 298 done: 398 299 vdev_info_init(&dev, features); 399 300 vq_info_add(&dev, 256); 400 - run_test(&dev, &dev.vqs[0], delayed, 0x100000); 301 + run_test(&dev, &dev.vqs[0], delayed, batch, reset, 0x100000); 401 302 return 0; 402 303 }
+2
tools/virtio/vringh_test.c
··· 307 307 close(to_host[0]); 308 308 309 309 gvdev.vdev.features = features; 310 + INIT_LIST_HEAD(&gvdev.vdev.vqs); 310 311 gvdev.to_host_fd = to_host[1]; 311 312 gvdev.notifies = 0; 312 313 ··· 454 453 455 454 getrange = getrange_iov; 456 455 vdev.features = 0; 456 + INIT_LIST_HEAD(&vdev.vqs); 457 457 458 458 while (argv[1]) { 459 459 if (strcmp(argv[1], "--indirect") == 0)