Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tools/virtio: add ringtest utilities

This adds micro-benchmarks useful for tuning virtio ring layouts.
Three layouts are currently implemented:

- virtio 0.9 compatible one
- an experimental extension bypassing the ring index, polling ring
itself instead
- an experimental extension bypassing avail and used ring completely

Typical use:

sh run-on-all.sh perf stat -r 10 --log-fd 1 -- ./ring

It doesn't depend on the kernel directly, but it's handy
to have as much virtio stuff as possible in one tree.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

+1123
+22
tools/virtio/ringtest/Makefile
··· 1 + all: 2 + 3 + all: ring virtio_ring_0_9 virtio_ring_poll 4 + 5 + CFLAGS += -Wall 6 + CFLAGS += -pthread -O2 -ggdb 7 + LDFLAGS += -pthread -O2 -ggdb 8 + 9 + main.o: main.c main.h 10 + ring.o: ring.c main.h 11 + virtio_ring_0_9.o: virtio_ring_0_9.c main.h 12 + virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h 13 + ring: ring.o main.o 14 + virtio_ring_0_9: virtio_ring_0_9.o main.o 15 + virtio_ring_poll: virtio_ring_poll.o main.o 16 + clean: 17 + -rm main.o 18 + -rm ring.o ring 19 + -rm virtio_ring_0_9.o virtio_ring_0_9 20 + -rm virtio_ring_poll.o virtio_ring_poll 21 + 22 + .PHONY: all clean
+2
tools/virtio/ringtest/README
··· 1 + Partial implementation of various ring layouts, useful to tune virtio design. 2 + Uses shared memory heavily.
+366
tools/virtio/ringtest/main.c
··· 1 + /* 2 + * Copyright (C) 2016 Red Hat, Inc. 3 + * Author: Michael S. Tsirkin <mst@redhat.com> 4 + * This work is licensed under the terms of the GNU GPL, version 2. 5 + * 6 + * Command line processing and common functions for ring benchmarking. 7 + */ 8 + #define _GNU_SOURCE 9 + #include <getopt.h> 10 + #include <pthread.h> 11 + #include <assert.h> 12 + #include <sched.h> 13 + #include "main.h" 14 + #include <sys/eventfd.h> 15 + #include <stdlib.h> 16 + #include <stdio.h> 17 + #include <unistd.h> 18 + #include <limits.h> 19 + 20 + int runcycles = 10000000; 21 + int max_outstanding = INT_MAX; 22 + int batch = 1; 23 + 24 + bool do_sleep = false; 25 + bool do_relax = false; 26 + bool do_exit = true; 27 + 28 + unsigned ring_size = 256; 29 + 30 + static int kickfd = -1; 31 + static int callfd = -1; 32 + 33 + void notify(int fd) 34 + { 35 + unsigned long long v = 1; 36 + int r; 37 + 38 + vmexit(); 39 + r = write(fd, &v, sizeof v); 40 + assert(r == sizeof v); 41 + vmentry(); 42 + } 43 + 44 + void wait_for_notify(int fd) 45 + { 46 + unsigned long long v = 1; 47 + int r; 48 + 49 + vmexit(); 50 + r = read(fd, &v, sizeof v); 51 + assert(r == sizeof v); 52 + vmentry(); 53 + } 54 + 55 + void kick(void) 56 + { 57 + notify(kickfd); 58 + } 59 + 60 + void wait_for_kick(void) 61 + { 62 + wait_for_notify(kickfd); 63 + } 64 + 65 + void call(void) 66 + { 67 + notify(callfd); 68 + } 69 + 70 + void wait_for_call(void) 71 + { 72 + wait_for_notify(callfd); 73 + } 74 + 75 + void set_affinity(const char *arg) 76 + { 77 + cpu_set_t cpuset; 78 + int ret; 79 + pthread_t self; 80 + long int cpu; 81 + char *endptr; 82 + 83 + if (!arg) 84 + return; 85 + 86 + cpu = strtol(arg, &endptr, 0); 87 + assert(!*endptr); 88 + 89 + assert(cpu >= 0 || cpu < CPU_SETSIZE); 90 + 91 + self = pthread_self(); 92 + CPU_ZERO(&cpuset); 93 + CPU_SET(cpu, &cpuset); 94 + 95 + ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset); 96 + assert(!ret); 97 + } 98 + 99 + static void run_guest(void) 100 + { 101 + int completed_before; 102 + int completed = 0; 103 + int started = 0; 104 + int bufs = runcycles; 105 + int spurious = 0; 106 + int r; 107 + unsigned len; 108 + void *buf; 109 + int tokick = batch; 110 + 111 + for (;;) { 112 + if (do_sleep) 113 + disable_call(); 114 + completed_before = completed; 115 + do { 116 + if (started < bufs && 117 + started - completed < max_outstanding) { 118 + r = add_inbuf(0, NULL, "Hello, world!"); 119 + if (__builtin_expect(r == 0, true)) { 120 + ++started; 121 + if (!--tokick) { 122 + tokick = batch; 123 + if (do_sleep) 124 + kick_available(); 125 + } 126 + 127 + } 128 + } else 129 + r = -1; 130 + 131 + /* Flush out completed bufs if any */ 132 + if (get_buf(&len, &buf)) { 133 + ++completed; 134 + if (__builtin_expect(completed == bufs, false)) 135 + return; 136 + r = 0; 137 + } 138 + } while (r == 0); 139 + if (completed == completed_before) 140 + ++spurious; 141 + assert(completed <= bufs); 142 + assert(started <= bufs); 143 + if (do_sleep) { 144 + if (enable_call()) 145 + wait_for_call(); 146 + } else { 147 + poll_used(); 148 + } 149 + } 150 + } 151 + 152 + static void run_host(void) 153 + { 154 + int completed_before; 155 + int completed = 0; 156 + int spurious = 0; 157 + int bufs = runcycles; 158 + unsigned len; 159 + void *buf; 160 + 161 + for (;;) { 162 + if (do_sleep) { 163 + if (enable_kick()) 164 + wait_for_kick(); 165 + } else { 166 + poll_avail(); 167 + } 168 + if (do_sleep) 169 + disable_kick(); 170 + completed_before = completed; 171 + while (__builtin_expect(use_buf(&len, &buf), true)) { 172 + if (do_sleep) 173 + call_used(); 174 + ++completed; 175 + if (__builtin_expect(completed == bufs, false)) 176 + return; 177 + } 178 + if (completed == completed_before) 179 + ++spurious; 180 + assert(completed <= bufs); 181 + if (completed == bufs) 182 + break; 183 + } 184 + } 185 + 186 + void *start_guest(void *arg) 187 + { 188 + set_affinity(arg); 189 + run_guest(); 190 + pthread_exit(NULL); 191 + } 192 + 193 + void *start_host(void *arg) 194 + { 195 + set_affinity(arg); 196 + run_host(); 197 + pthread_exit(NULL); 198 + } 199 + 200 + static const char optstring[] = ""; 201 + static const struct option longopts[] = { 202 + { 203 + .name = "help", 204 + .has_arg = no_argument, 205 + .val = 'h', 206 + }, 207 + { 208 + .name = "host-affinity", 209 + .has_arg = required_argument, 210 + .val = 'H', 211 + }, 212 + { 213 + .name = "guest-affinity", 214 + .has_arg = required_argument, 215 + .val = 'G', 216 + }, 217 + { 218 + .name = "ring-size", 219 + .has_arg = required_argument, 220 + .val = 'R', 221 + }, 222 + { 223 + .name = "run-cycles", 224 + .has_arg = required_argument, 225 + .val = 'C', 226 + }, 227 + { 228 + .name = "outstanding", 229 + .has_arg = required_argument, 230 + .val = 'o', 231 + }, 232 + { 233 + .name = "batch", 234 + .has_arg = required_argument, 235 + .val = 'b', 236 + }, 237 + { 238 + .name = "sleep", 239 + .has_arg = no_argument, 240 + .val = 's', 241 + }, 242 + { 243 + .name = "relax", 244 + .has_arg = no_argument, 245 + .val = 'x', 246 + }, 247 + { 248 + .name = "exit", 249 + .has_arg = no_argument, 250 + .val = 'e', 251 + }, 252 + { 253 + } 254 + }; 255 + 256 + static void help(void) 257 + { 258 + fprintf(stderr, "Usage: <test> [--help]" 259 + " [--host-affinity H]" 260 + " [--guest-affinity G]" 261 + " [--ring-size R (default: %d)]" 262 + " [--run-cycles C (default: %d)]" 263 + " [--batch b]" 264 + " [--outstanding o]" 265 + " [--sleep]" 266 + " [--relax]" 267 + " [--exit]" 268 + "\n", 269 + ring_size, 270 + runcycles); 271 + } 272 + 273 + int main(int argc, char **argv) 274 + { 275 + int ret; 276 + pthread_t host, guest; 277 + void *tret; 278 + char *host_arg = NULL; 279 + char *guest_arg = NULL; 280 + char *endptr; 281 + long int c; 282 + 283 + kickfd = eventfd(0, 0); 284 + assert(kickfd >= 0); 285 + callfd = eventfd(0, 0); 286 + assert(callfd >= 0); 287 + 288 + for (;;) { 289 + int o = getopt_long(argc, argv, optstring, longopts, NULL); 290 + switch (o) { 291 + case -1: 292 + goto done; 293 + case '?': 294 + help(); 295 + exit(2); 296 + case 'H': 297 + host_arg = optarg; 298 + break; 299 + case 'G': 300 + guest_arg = optarg; 301 + break; 302 + case 'R': 303 + ring_size = strtol(optarg, &endptr, 0); 304 + assert(ring_size && !(ring_size & (ring_size - 1))); 305 + assert(!*endptr); 306 + break; 307 + case 'C': 308 + c = strtol(optarg, &endptr, 0); 309 + assert(!*endptr); 310 + assert(c > 0 && c < INT_MAX); 311 + runcycles = c; 312 + break; 313 + case 'o': 314 + c = strtol(optarg, &endptr, 0); 315 + assert(!*endptr); 316 + assert(c > 0 && c < INT_MAX); 317 + max_outstanding = c; 318 + break; 319 + case 'b': 320 + c = strtol(optarg, &endptr, 0); 321 + assert(!*endptr); 322 + assert(c > 0 && c < INT_MAX); 323 + batch = c; 324 + break; 325 + case 's': 326 + do_sleep = true; 327 + break; 328 + case 'x': 329 + do_relax = true; 330 + break; 331 + case 'e': 332 + do_exit = true; 333 + break; 334 + default: 335 + help(); 336 + exit(4); 337 + break; 338 + } 339 + } 340 + 341 + /* does nothing here, used to make sure all smp APIs compile */ 342 + smp_acquire(); 343 + smp_release(); 344 + smp_mb(); 345 + done: 346 + 347 + if (batch > max_outstanding) 348 + batch = max_outstanding; 349 + 350 + if (optind < argc) { 351 + help(); 352 + exit(4); 353 + } 354 + alloc_ring(); 355 + 356 + ret = pthread_create(&host, NULL, start_host, host_arg); 357 + assert(!ret); 358 + ret = pthread_create(&guest, NULL, start_guest, guest_arg); 359 + assert(!ret); 360 + 361 + ret = pthread_join(guest, &tret); 362 + assert(!ret); 363 + ret = pthread_join(host, &tret); 364 + assert(!ret); 365 + return 0; 366 + }
+119
tools/virtio/ringtest/main.h
··· 1 + /* 2 + * Copyright (C) 2016 Red Hat, Inc. 3 + * Author: Michael S. Tsirkin <mst@redhat.com> 4 + * This work is licensed under the terms of the GNU GPL, version 2. 5 + * 6 + * Common macros and functions for ring benchmarking. 7 + */ 8 + #ifndef MAIN_H 9 + #define MAIN_H 10 + 11 + #include <stdbool.h> 12 + 13 + extern bool do_exit; 14 + 15 + #if defined(__x86_64__) || defined(__i386__) 16 + #include "x86intrin.h" 17 + 18 + static inline void wait_cycles(unsigned long long cycles) 19 + { 20 + unsigned long long t; 21 + 22 + t = __rdtsc(); 23 + while (__rdtsc() - t < cycles) {} 24 + } 25 + 26 + #define VMEXIT_CYCLES 500 27 + #define VMENTRY_CYCLES 500 28 + 29 + #else 30 + static inline void wait_cycles(unsigned long long cycles) 31 + { 32 + _Exit(5); 33 + } 34 + #define VMEXIT_CYCLES 0 35 + #define VMENTRY_CYCLES 0 36 + #endif 37 + 38 + static inline void vmexit(void) 39 + { 40 + if (!do_exit) 41 + return; 42 + 43 + wait_cycles(VMEXIT_CYCLES); 44 + } 45 + static inline void vmentry(void) 46 + { 47 + if (!do_exit) 48 + return; 49 + 50 + wait_cycles(VMENTRY_CYCLES); 51 + } 52 + 53 + /* implemented by ring */ 54 + void alloc_ring(void); 55 + /* guest side */ 56 + int add_inbuf(unsigned, void *, void *); 57 + void *get_buf(unsigned *, void **); 58 + void disable_call(); 59 + bool enable_call(); 60 + void kick_available(); 61 + void poll_used(); 62 + /* host side */ 63 + void disable_kick(); 64 + bool enable_kick(); 65 + bool use_buf(unsigned *, void **); 66 + void call_used(); 67 + void poll_avail(); 68 + 69 + /* implemented by main */ 70 + extern bool do_sleep; 71 + void kick(void); 72 + void wait_for_kick(void); 73 + void call(void); 74 + void wait_for_call(void); 75 + 76 + extern unsigned ring_size; 77 + 78 + /* Compiler barrier - similar to what Linux uses */ 79 + #define barrier() asm volatile("" ::: "memory") 80 + 81 + /* Is there a portable way to do this? */ 82 + #if defined(__x86_64__) || defined(__i386__) 83 + #define cpu_relax() asm ("rep; nop" ::: "memory") 84 + #else 85 + #define cpu_relax() assert(0) 86 + #endif 87 + 88 + extern bool do_relax; 89 + 90 + static inline void busy_wait(void) 91 + { 92 + if (do_relax) 93 + cpu_relax(); 94 + else 95 + /* prevent compiler from removing busy loops */ 96 + barrier(); 97 + } 98 + 99 + /* 100 + * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized 101 + * with other __ATOMIC_SEQ_CST calls. 102 + */ 103 + #define smp_mb() __sync_synchronize() 104 + 105 + /* 106 + * This abuses the atomic builtins for thread fences, and 107 + * adds a compiler barrier. 108 + */ 109 + #define smp_release() do { \ 110 + barrier(); \ 111 + __atomic_thread_fence(__ATOMIC_RELEASE); \ 112 + } while (0) 113 + 114 + #define smp_acquire() do { \ 115 + __atomic_thread_fence(__ATOMIC_ACQUIRE); \ 116 + barrier(); \ 117 + } while (0) 118 + 119 + #endif
+272
tools/virtio/ringtest/ring.c
··· 1 + /* 2 + * Copyright (C) 2016 Red Hat, Inc. 3 + * Author: Michael S. Tsirkin <mst@redhat.com> 4 + * This work is licensed under the terms of the GNU GPL, version 2. 5 + * 6 + * Simple descriptor-based ring. virtio 0.9 compatible event index is used for 7 + * signalling, unconditionally. 8 + */ 9 + #define _GNU_SOURCE 10 + #include "main.h" 11 + #include <stdlib.h> 12 + #include <stdio.h> 13 + #include <string.h> 14 + 15 + /* Next - Where next entry will be written. 16 + * Prev - "Next" value when event triggered previously. 17 + * Event - Peer requested event after writing this entry. 18 + */ 19 + static inline bool need_event(unsigned short event, 20 + unsigned short next, 21 + unsigned short prev) 22 + { 23 + return (unsigned short)(next - event - 1) < (unsigned short)(next - prev); 24 + } 25 + 26 + /* Design: 27 + * Guest adds descriptors with unique index values and DESC_HW in flags. 28 + * Host overwrites used descriptors with correct len, index, and DESC_HW clear. 29 + * Flags are always set last. 30 + */ 31 + #define DESC_HW 0x1 32 + 33 + struct desc { 34 + unsigned short flags; 35 + unsigned short index; 36 + unsigned len; 37 + unsigned long long addr; 38 + }; 39 + 40 + /* how much padding is needed to avoid false cache sharing */ 41 + #define HOST_GUEST_PADDING 0x80 42 + 43 + /* Mostly read */ 44 + struct event { 45 + unsigned short kick_index; 46 + unsigned char reserved0[HOST_GUEST_PADDING - 2]; 47 + unsigned short call_index; 48 + unsigned char reserved1[HOST_GUEST_PADDING - 2]; 49 + }; 50 + 51 + struct data { 52 + void *buf; /* descriptor is writeable, we can't get buf from there */ 53 + void *data; 54 + } *data; 55 + 56 + struct desc *ring; 57 + struct event *event; 58 + 59 + struct guest { 60 + unsigned avail_idx; 61 + unsigned last_used_idx; 62 + unsigned num_free; 63 + unsigned kicked_avail_idx; 64 + unsigned char reserved[HOST_GUEST_PADDING - 12]; 65 + } guest; 66 + 67 + struct host { 68 + /* we do not need to track last avail index 69 + * unless we have more than one in flight. 70 + */ 71 + unsigned used_idx; 72 + unsigned called_used_idx; 73 + unsigned char reserved[HOST_GUEST_PADDING - 4]; 74 + } host; 75 + 76 + /* implemented by ring */ 77 + void alloc_ring(void) 78 + { 79 + int ret; 80 + int i; 81 + 82 + ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring); 83 + if (ret) { 84 + perror("Unable to allocate ring buffer.\n"); 85 + exit(3); 86 + } 87 + event = malloc(sizeof *event); 88 + if (!event) { 89 + perror("Unable to allocate event buffer.\n"); 90 + exit(3); 91 + } 92 + memset(event, 0, sizeof *event); 93 + guest.avail_idx = 0; 94 + guest.kicked_avail_idx = -1; 95 + guest.last_used_idx = 0; 96 + host.used_idx = 0; 97 + host.called_used_idx = -1; 98 + for (i = 0; i < ring_size; ++i) { 99 + struct desc desc = { 100 + .index = i, 101 + }; 102 + ring[i] = desc; 103 + } 104 + guest.num_free = ring_size; 105 + data = malloc(ring_size * sizeof *data); 106 + if (!data) { 107 + perror("Unable to allocate data buffer.\n"); 108 + exit(3); 109 + } 110 + memset(data, 0, ring_size * sizeof *data); 111 + } 112 + 113 + /* guest side */ 114 + int add_inbuf(unsigned len, void *buf, void *datap) 115 + { 116 + unsigned head, index; 117 + 118 + if (!guest.num_free) 119 + return -1; 120 + 121 + guest.num_free--; 122 + head = (ring_size - 1) & (guest.avail_idx++); 123 + 124 + /* Start with a write. On MESI architectures this helps 125 + * avoid a shared state with consumer that is polling this descriptor. 126 + */ 127 + ring[head].addr = (unsigned long)(void*)buf; 128 + ring[head].len = len; 129 + /* read below might bypass write above. That is OK because it's just an 130 + * optimization. If this happens, we will get the cache line in a 131 + * shared state which is unfortunate, but probably not worth it to 132 + * add an explicit full barrier to avoid this. 133 + */ 134 + barrier(); 135 + index = ring[head].index; 136 + data[index].buf = buf; 137 + data[index].data = datap; 138 + /* Barrier A (for pairing) */ 139 + smp_release(); 140 + ring[head].flags = DESC_HW; 141 + 142 + return 0; 143 + } 144 + 145 + void *get_buf(unsigned *lenp, void **bufp) 146 + { 147 + unsigned head = (ring_size - 1) & guest.last_used_idx; 148 + unsigned index; 149 + void *datap; 150 + 151 + if (ring[head].flags & DESC_HW) 152 + return NULL; 153 + /* Barrier B (for pairing) */ 154 + smp_acquire(); 155 + *lenp = ring[head].len; 156 + index = ring[head].index & (ring_size - 1); 157 + datap = data[index].data; 158 + *bufp = data[index].buf; 159 + data[index].buf = NULL; 160 + data[index].data = NULL; 161 + guest.num_free++; 162 + guest.last_used_idx++; 163 + return datap; 164 + } 165 + 166 + void poll_used(void) 167 + { 168 + unsigned head = (ring_size - 1) & guest.last_used_idx; 169 + 170 + while (ring[head].flags & DESC_HW) 171 + busy_wait(); 172 + } 173 + 174 + void disable_call() 175 + { 176 + /* Doing nothing to disable calls might cause 177 + * extra interrupts, but reduces the number of cache misses. 178 + */ 179 + } 180 + 181 + bool enable_call() 182 + { 183 + unsigned head = (ring_size - 1) & guest.last_used_idx; 184 + 185 + event->call_index = guest.last_used_idx; 186 + /* Flush call index write */ 187 + /* Barrier D (for pairing) */ 188 + smp_mb(); 189 + return ring[head].flags & DESC_HW; 190 + } 191 + 192 + void kick_available(void) 193 + { 194 + /* Flush in previous flags write */ 195 + /* Barrier C (for pairing) */ 196 + smp_mb(); 197 + if (!need_event(event->kick_index, 198 + guest.avail_idx, 199 + guest.kicked_avail_idx)) 200 + return; 201 + 202 + guest.kicked_avail_idx = guest.avail_idx; 203 + kick(); 204 + } 205 + 206 + /* host side */ 207 + void disable_kick() 208 + { 209 + /* Doing nothing to disable kicks might cause 210 + * extra interrupts, but reduces the number of cache misses. 211 + */ 212 + } 213 + 214 + bool enable_kick() 215 + { 216 + unsigned head = (ring_size - 1) & host.used_idx; 217 + 218 + event->kick_index = host.used_idx; 219 + /* Barrier C (for pairing) */ 220 + smp_mb(); 221 + return !(ring[head].flags & DESC_HW); 222 + } 223 + 224 + void poll_avail(void) 225 + { 226 + unsigned head = (ring_size - 1) & host.used_idx; 227 + 228 + while (!(ring[head].flags & DESC_HW)) 229 + busy_wait(); 230 + } 231 + 232 + bool use_buf(unsigned *lenp, void **bufp) 233 + { 234 + unsigned head = (ring_size - 1) & host.used_idx; 235 + 236 + if (!(ring[head].flags & DESC_HW)) 237 + return false; 238 + 239 + /* make sure length read below is not speculated */ 240 + /* Barrier A (for pairing) */ 241 + smp_acquire(); 242 + 243 + /* simple in-order completion: we don't need 244 + * to touch index at all. This also means we 245 + * can just modify the descriptor in-place. 246 + */ 247 + ring[head].len--; 248 + /* Make sure len is valid before flags. 249 + * Note: alternative is to write len and flags in one access - 250 + * possible on 64 bit architectures but wmb is free on Intel anyway 251 + * so I have no way to test whether it's a gain. 252 + */ 253 + /* Barrier B (for pairing) */ 254 + smp_release(); 255 + ring[head].flags = 0; 256 + host.used_idx++; 257 + return true; 258 + } 259 + 260 + void call_used(void) 261 + { 262 + /* Flush in previous flags write */ 263 + /* Barrier D (for pairing) */ 264 + smp_mb(); 265 + if (!need_event(event->call_index, 266 + host.used_idx, 267 + host.called_used_idx)) 268 + return; 269 + 270 + host.called_used_idx = host.used_idx; 271 + call(); 272 + }
+24
tools/virtio/ringtest/run-on-all.sh
··· 1 + #!/bin/sh 2 + 3 + #use last CPU for host. Why not the first? 4 + #many devices tend to use cpu0 by default so 5 + #it tends to be busier 6 + HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1) 7 + 8 + #run command on all cpus 9 + for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n); 10 + do 11 + #Don't run guest and host on same CPU 12 + #It actually works ok if using signalling 13 + if 14 + (echo "$@" | grep -e "--sleep" > /dev/null) || \ 15 + test $HOST_AFFINITY '!=' $cpu 16 + then 17 + echo "GUEST AFFINITY $cpu" 18 + "$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu 19 + fi 20 + done 21 + echo "NO GUEST AFFINITY" 22 + "$@" --host-affinity $HOST_AFFINITY 23 + echo "NO AFFINITY" 24 + "$@"
+316
tools/virtio/ringtest/virtio_ring_0_9.c
··· 1 + /* 2 + * Copyright (C) 2016 Red Hat, Inc. 3 + * Author: Michael S. Tsirkin <mst@redhat.com> 4 + * This work is licensed under the terms of the GNU GPL, version 2. 5 + * 6 + * Partial implementation of virtio 0.9. event index is used for signalling, 7 + * unconditionally. Design roughly follows linux kernel implementation in order 8 + * to be able to judge its performance. 9 + */ 10 + #define _GNU_SOURCE 11 + #include "main.h" 12 + #include <stdlib.h> 13 + #include <stdio.h> 14 + #include <assert.h> 15 + #include <string.h> 16 + #include <linux/virtio_ring.h> 17 + 18 + struct data { 19 + void *data; 20 + } *data; 21 + 22 + struct vring ring; 23 + 24 + /* enabling the below activates experimental ring polling code 25 + * (which skips index reads on consumer in favor of looking at 26 + * high bits of ring id ^ 0x8000). 27 + */ 28 + /* #ifdef RING_POLL */ 29 + 30 + /* how much padding is needed to avoid false cache sharing */ 31 + #define HOST_GUEST_PADDING 0x80 32 + 33 + struct guest { 34 + unsigned short avail_idx; 35 + unsigned short last_used_idx; 36 + unsigned short num_free; 37 + unsigned short kicked_avail_idx; 38 + unsigned short free_head; 39 + unsigned char reserved[HOST_GUEST_PADDING - 10]; 40 + } guest; 41 + 42 + struct host { 43 + /* we do not need to track last avail index 44 + * unless we have more than one in flight. 45 + */ 46 + unsigned short used_idx; 47 + unsigned short called_used_idx; 48 + unsigned char reserved[HOST_GUEST_PADDING - 4]; 49 + } host; 50 + 51 + /* implemented by ring */ 52 + void alloc_ring(void) 53 + { 54 + int ret; 55 + int i; 56 + void *p; 57 + 58 + ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000)); 59 + if (ret) { 60 + perror("Unable to allocate ring buffer.\n"); 61 + exit(3); 62 + } 63 + memset(p, 0, vring_size(ring_size, 0x1000)); 64 + vring_init(&ring, ring_size, p, 0x1000); 65 + 66 + guest.avail_idx = 0; 67 + guest.kicked_avail_idx = -1; 68 + guest.last_used_idx = 0; 69 + /* Put everything in free lists. */ 70 + guest.free_head = 0; 71 + for (i = 0; i < ring_size - 1; i++) 72 + ring.desc[i].next = i + 1; 73 + host.used_idx = 0; 74 + host.called_used_idx = -1; 75 + guest.num_free = ring_size; 76 + data = malloc(ring_size * sizeof *data); 77 + if (!data) { 78 + perror("Unable to allocate data buffer.\n"); 79 + exit(3); 80 + } 81 + memset(data, 0, ring_size * sizeof *data); 82 + } 83 + 84 + /* guest side */ 85 + int add_inbuf(unsigned len, void *buf, void *datap) 86 + { 87 + unsigned head, avail; 88 + struct vring_desc *desc; 89 + 90 + if (!guest.num_free) 91 + return -1; 92 + 93 + head = guest.free_head; 94 + guest.num_free--; 95 + 96 + desc = ring.desc; 97 + desc[head].flags = VRING_DESC_F_NEXT; 98 + desc[head].addr = (unsigned long)(void *)buf; 99 + desc[head].len = len; 100 + /* We do it like this to simulate the way 101 + * we'd have to flip it if we had multiple 102 + * descriptors. 103 + */ 104 + desc[head].flags &= ~VRING_DESC_F_NEXT; 105 + guest.free_head = desc[head].next; 106 + 107 + data[head].data = datap; 108 + 109 + #ifdef RING_POLL 110 + /* Barrier A (for pairing) */ 111 + smp_release(); 112 + avail = guest.avail_idx++; 113 + ring.avail->ring[avail & (ring_size - 1)] = 114 + (head | (avail & ~(ring_size - 1))) ^ 0x8000; 115 + #else 116 + avail = (ring_size - 1) & (guest.avail_idx++); 117 + ring.avail->ring[avail] = head; 118 + /* Barrier A (for pairing) */ 119 + smp_release(); 120 + #endif 121 + ring.avail->idx = guest.avail_idx; 122 + return 0; 123 + } 124 + 125 + void *get_buf(unsigned *lenp, void **bufp) 126 + { 127 + unsigned head; 128 + unsigned index; 129 + void *datap; 130 + 131 + #ifdef RING_POLL 132 + head = (ring_size - 1) & guest.last_used_idx; 133 + index = ring.used->ring[head].id; 134 + if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1)) 135 + return NULL; 136 + /* Barrier B (for pairing) */ 137 + smp_acquire(); 138 + index &= ring_size - 1; 139 + #else 140 + if (ring.used->idx == guest.last_used_idx) 141 + return NULL; 142 + /* Barrier B (for pairing) */ 143 + smp_acquire(); 144 + head = (ring_size - 1) & guest.last_used_idx; 145 + index = ring.used->ring[head].id; 146 + #endif 147 + *lenp = ring.used->ring[head].len; 148 + datap = data[index].data; 149 + *bufp = (void*)(unsigned long)ring.desc[index].addr; 150 + data[index].data = NULL; 151 + ring.desc[index].next = guest.free_head; 152 + guest.free_head = index; 153 + guest.num_free++; 154 + guest.last_used_idx++; 155 + return datap; 156 + } 157 + 158 + void poll_used(void) 159 + { 160 + #ifdef RING_POLL 161 + unsigned head = (ring_size - 1) & guest.last_used_idx; 162 + 163 + for (;;) { 164 + unsigned index = ring.used->ring[head].id; 165 + 166 + if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1)) 167 + busy_wait(); 168 + else 169 + break; 170 + } 171 + #else 172 + unsigned head = guest.last_used_idx; 173 + 174 + while (ring.used->idx == head) 175 + busy_wait(); 176 + #endif 177 + } 178 + 179 + void disable_call() 180 + { 181 + /* Doing nothing to disable calls might cause 182 + * extra interrupts, but reduces the number of cache misses. 183 + */ 184 + } 185 + 186 + bool enable_call() 187 + { 188 + unsigned short last_used_idx; 189 + 190 + vring_used_event(&ring) = (last_used_idx = guest.last_used_idx); 191 + /* Flush call index write */ 192 + /* Barrier D (for pairing) */ 193 + smp_mb(); 194 + #ifdef RING_POLL 195 + { 196 + unsigned short head = last_used_idx & (ring_size - 1); 197 + unsigned index = ring.used->ring[head].id; 198 + 199 + return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1); 200 + } 201 + #else 202 + return ring.used->idx == last_used_idx; 203 + #endif 204 + } 205 + 206 + void kick_available(void) 207 + { 208 + /* Flush in previous flags write */ 209 + /* Barrier C (for pairing) */ 210 + smp_mb(); 211 + if (!vring_need_event(vring_avail_event(&ring), 212 + guest.avail_idx, 213 + guest.kicked_avail_idx)) 214 + return; 215 + 216 + guest.kicked_avail_idx = guest.avail_idx; 217 + kick(); 218 + } 219 + 220 + /* host side */ 221 + void disable_kick() 222 + { 223 + /* Doing nothing to disable kicks might cause 224 + * extra interrupts, but reduces the number of cache misses. 225 + */ 226 + } 227 + 228 + bool enable_kick() 229 + { 230 + unsigned head = host.used_idx; 231 + 232 + vring_avail_event(&ring) = head; 233 + /* Barrier C (for pairing) */ 234 + smp_mb(); 235 + #ifdef RING_POLL 236 + { 237 + unsigned index = ring.avail->ring[head & (ring_size - 1)]; 238 + 239 + return (index ^ head ^ 0x8000) & ~(ring_size - 1); 240 + } 241 + #else 242 + return head == ring.avail->idx; 243 + #endif 244 + } 245 + 246 + void poll_avail(void) 247 + { 248 + unsigned head = host.used_idx; 249 + #ifdef RING_POLL 250 + for (;;) { 251 + unsigned index = ring.avail->ring[head & (ring_size - 1)]; 252 + if ((index ^ head ^ 0x8000) & ~(ring_size - 1)) 253 + busy_wait(); 254 + else 255 + break; 256 + } 257 + #else 258 + while (ring.avail->idx == head) 259 + busy_wait(); 260 + #endif 261 + } 262 + 263 + bool use_buf(unsigned *lenp, void **bufp) 264 + { 265 + unsigned used_idx = host.used_idx; 266 + struct vring_desc *desc; 267 + unsigned head; 268 + 269 + #ifdef RING_POLL 270 + head = ring.avail->ring[used_idx & (ring_size - 1)]; 271 + if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1)) 272 + return false; 273 + /* Barrier A (for pairing) */ 274 + smp_acquire(); 275 + 276 + used_idx &= ring_size - 1; 277 + desc = &ring.desc[head & (ring_size - 1)]; 278 + #else 279 + if (used_idx == ring.avail->idx) 280 + return false; 281 + 282 + /* Barrier A (for pairing) */ 283 + smp_acquire(); 284 + 285 + used_idx &= ring_size - 1; 286 + head = ring.avail->ring[used_idx]; 287 + desc = &ring.desc[head]; 288 + #endif 289 + 290 + *lenp = desc->len; 291 + *bufp = (void *)(unsigned long)desc->addr; 292 + 293 + /* now update used ring */ 294 + ring.used->ring[used_idx].id = head; 295 + ring.used->ring[used_idx].len = desc->len - 1; 296 + /* Barrier B (for pairing) */ 297 + smp_release(); 298 + host.used_idx++; 299 + ring.used->idx = host.used_idx; 300 + 301 + return true; 302 + } 303 + 304 + void call_used(void) 305 + { 306 + /* Flush in previous flags write */ 307 + /* Barrier D (for pairing) */ 308 + smp_mb(); 309 + if (!vring_need_event(vring_used_event(&ring), 310 + host.used_idx, 311 + host.called_used_idx)) 312 + return; 313 + 314 + host.called_used_idx = host.used_idx; 315 + call(); 316 + }
+2
tools/virtio/ringtest/virtio_ring_poll.c
··· 1 + #define RING_POLL 1 2 + #include "virtio_ring_0_9.c"