Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.6-rc6 379 lines 8.9 kB view raw
1/* 2 * mem-memcpy.c 3 * 4 * Simple memcpy() and memset() benchmarks 5 * 6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> 7 */ 8 9#include "../perf.h" 10#include "../util/util.h" 11#include <subcmd/parse-options.h> 12#include "../util/header.h" 13#include "../util/cloexec.h" 14#include "bench.h" 15#include "mem-memcpy-arch.h" 16#include "mem-memset-arch.h" 17 18#include <stdio.h> 19#include <stdlib.h> 20#include <string.h> 21#include <sys/time.h> 22#include <errno.h> 23 24#define K 1024 25 26static const char *size_str = "1MB"; 27static const char *function_str = "all"; 28static int nr_loops = 1; 29static bool use_cycles; 30static int cycles_fd; 31 32static const struct option options[] = { 33 OPT_STRING('s', "size", &size_str, "1MB", 34 "Specify the size of the memory buffers. " 35 "Available units: B, KB, MB, GB and TB (case insensitive)"), 36 37 OPT_STRING('f', "function", &function_str, "all", 38 "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"), 39 40 OPT_INTEGER('l', "nr_loops", &nr_loops, 41 "Specify the number of loops to run. (default: 1)"), 42 43 OPT_BOOLEAN('c', "cycles", &use_cycles, 44 "Use a cycles event instead of gettimeofday() to measure performance"), 45 46 OPT_END() 47}; 48 49typedef void *(*memcpy_t)(void *, const void *, size_t); 50typedef void *(*memset_t)(void *, int, size_t); 51 52struct function { 53 const char *name; 54 const char *desc; 55 union { 56 memcpy_t memcpy; 57 memset_t memset; 58 } fn; 59}; 60 61static struct perf_event_attr cycle_attr = { 62 .type = PERF_TYPE_HARDWARE, 63 .config = PERF_COUNT_HW_CPU_CYCLES 64}; 65 66static void init_cycles(void) 67{ 68 cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); 69 70 if (cycles_fd < 0 && errno == ENOSYS) 71 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 72 else 73 BUG_ON(cycles_fd < 0); 74} 75 76static u64 get_cycles(void) 77{ 78 int ret; 79 u64 clk; 80 81 ret = read(cycles_fd, &clk, sizeof(u64)); 82 BUG_ON(ret != sizeof(u64)); 83 84 return clk; 85} 86 87static double timeval2double(struct timeval *ts) 88{ 89 return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000; 90} 91 92#define print_bps(x) do { \ 93 if (x < K) \ 94 printf(" %14lf bytes/sec\n", x); \ 95 else if (x < K * K) \ 96 printf(" %14lfd KB/sec\n", x / K); \ 97 else if (x < K * K * K) \ 98 printf(" %14lf MB/sec\n", x / K / K); \ 99 else \ 100 printf(" %14lf GB/sec\n", x / K / K / K); \ 101 } while (0) 102 103struct bench_mem_info { 104 const struct function *functions; 105 u64 (*do_cycles)(const struct function *r, size_t size); 106 double (*do_gettimeofday)(const struct function *r, size_t size); 107 const char *const *usage; 108}; 109 110static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) 111{ 112 const struct function *r = &info->functions[r_idx]; 113 double result_bps = 0.0; 114 u64 result_cycles = 0; 115 116 printf("# function '%s' (%s)\n", r->name, r->desc); 117 118 if (bench_format == BENCH_FORMAT_DEFAULT) 119 printf("# Copying %s bytes ...\n\n", size_str); 120 121 if (use_cycles) { 122 result_cycles = info->do_cycles(r, size); 123 } else { 124 result_bps = info->do_gettimeofday(r, size); 125 } 126 127 switch (bench_format) { 128 case BENCH_FORMAT_DEFAULT: 129 if (use_cycles) { 130 printf(" %14lf cycles/byte\n", (double)result_cycles/size_total); 131 } else { 132 print_bps(result_bps); 133 } 134 break; 135 136 case BENCH_FORMAT_SIMPLE: 137 if (use_cycles) { 138 printf("%lf\n", (double)result_cycles/size_total); 139 } else { 140 printf("%lf\n", result_bps); 141 } 142 break; 143 144 default: 145 BUG_ON(1); 146 break; 147 } 148} 149 150static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) 151{ 152 int i; 153 size_t size; 154 double size_total; 155 156 argc = parse_options(argc, argv, options, info->usage, 0); 157 158 if (use_cycles) 159 init_cycles(); 160 161 size = (size_t)perf_atoll((char *)size_str); 162 size_total = (double)size * nr_loops; 163 164 if ((s64)size <= 0) { 165 fprintf(stderr, "Invalid size:%s\n", size_str); 166 return 1; 167 } 168 169 if (!strncmp(function_str, "all", 3)) { 170 for (i = 0; info->functions[i].name; i++) 171 __bench_mem_function(info, i, size, size_total); 172 return 0; 173 } 174 175 for (i = 0; info->functions[i].name; i++) { 176 if (!strcmp(info->functions[i].name, function_str)) 177 break; 178 } 179 if (!info->functions[i].name) { 180 if (strcmp(function_str, "help") && strcmp(function_str, "h")) 181 printf("Unknown function: %s\n", function_str); 182 printf("Available functions:\n"); 183 for (i = 0; info->functions[i].name; i++) { 184 printf("\t%s ... %s\n", 185 info->functions[i].name, info->functions[i].desc); 186 } 187 return 1; 188 } 189 190 __bench_mem_function(info, i, size, size_total); 191 192 return 0; 193} 194 195static void memcpy_alloc_mem(void **dst, void **src, size_t size) 196{ 197 *dst = zalloc(size); 198 if (!*dst) 199 die("memory allocation failed - maybe size is too large?\n"); 200 201 *src = zalloc(size); 202 if (!*src) 203 die("memory allocation failed - maybe size is too large?\n"); 204 205 /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ 206 memset(*src, 0, size); 207} 208 209static u64 do_memcpy_cycles(const struct function *r, size_t size) 210{ 211 u64 cycle_start = 0ULL, cycle_end = 0ULL; 212 void *src = NULL, *dst = NULL; 213 memcpy_t fn = r->fn.memcpy; 214 int i; 215 216 memcpy_alloc_mem(&dst, &src, size); 217 218 /* 219 * We prefault the freshly allocated memory range here, 220 * to not measure page fault overhead: 221 */ 222 fn(dst, src, size); 223 224 cycle_start = get_cycles(); 225 for (i = 0; i < nr_loops; ++i) 226 fn(dst, src, size); 227 cycle_end = get_cycles(); 228 229 free(src); 230 free(dst); 231 return cycle_end - cycle_start; 232} 233 234static double do_memcpy_gettimeofday(const struct function *r, size_t size) 235{ 236 struct timeval tv_start, tv_end, tv_diff; 237 memcpy_t fn = r->fn.memcpy; 238 void *src = NULL, *dst = NULL; 239 int i; 240 241 memcpy_alloc_mem(&dst, &src, size); 242 243 /* 244 * We prefault the freshly allocated memory range here, 245 * to not measure page fault overhead: 246 */ 247 fn(dst, src, size); 248 249 BUG_ON(gettimeofday(&tv_start, NULL)); 250 for (i = 0; i < nr_loops; ++i) 251 fn(dst, src, size); 252 BUG_ON(gettimeofday(&tv_end, NULL)); 253 254 timersub(&tv_end, &tv_start, &tv_diff); 255 256 free(src); 257 free(dst); 258 259 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 260} 261 262struct function memcpy_functions[] = { 263 { .name = "default", 264 .desc = "Default memcpy() provided by glibc", 265 .fn.memcpy = memcpy }, 266 267#ifdef HAVE_ARCH_X86_64_SUPPORT 268# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, 269# include "mem-memcpy-x86-64-asm-def.h" 270# undef MEMCPY_FN 271#endif 272 273 { .name = NULL, } 274}; 275 276static const char * const bench_mem_memcpy_usage[] = { 277 "perf bench mem memcpy <options>", 278 NULL 279}; 280 281int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) 282{ 283 struct bench_mem_info info = { 284 .functions = memcpy_functions, 285 .do_cycles = do_memcpy_cycles, 286 .do_gettimeofday = do_memcpy_gettimeofday, 287 .usage = bench_mem_memcpy_usage, 288 }; 289 290 return bench_mem_common(argc, argv, &info); 291} 292 293static void memset_alloc_mem(void **dst, size_t size) 294{ 295 *dst = zalloc(size); 296 if (!*dst) 297 die("memory allocation failed - maybe size is too large?\n"); 298} 299 300static u64 do_memset_cycles(const struct function *r, size_t size) 301{ 302 u64 cycle_start = 0ULL, cycle_end = 0ULL; 303 memset_t fn = r->fn.memset; 304 void *dst = NULL; 305 int i; 306 307 memset_alloc_mem(&dst, size); 308 309 /* 310 * We prefault the freshly allocated memory range here, 311 * to not measure page fault overhead: 312 */ 313 fn(dst, -1, size); 314 315 cycle_start = get_cycles(); 316 for (i = 0; i < nr_loops; ++i) 317 fn(dst, i, size); 318 cycle_end = get_cycles(); 319 320 free(dst); 321 return cycle_end - cycle_start; 322} 323 324static double do_memset_gettimeofday(const struct function *r, size_t size) 325{ 326 struct timeval tv_start, tv_end, tv_diff; 327 memset_t fn = r->fn.memset; 328 void *dst = NULL; 329 int i; 330 331 memset_alloc_mem(&dst, size); 332 333 /* 334 * We prefault the freshly allocated memory range here, 335 * to not measure page fault overhead: 336 */ 337 fn(dst, -1, size); 338 339 BUG_ON(gettimeofday(&tv_start, NULL)); 340 for (i = 0; i < nr_loops; ++i) 341 fn(dst, i, size); 342 BUG_ON(gettimeofday(&tv_end, NULL)); 343 344 timersub(&tv_end, &tv_start, &tv_diff); 345 346 free(dst); 347 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 348} 349 350static const char * const bench_mem_memset_usage[] = { 351 "perf bench mem memset <options>", 352 NULL 353}; 354 355static const struct function memset_functions[] = { 356 { .name = "default", 357 .desc = "Default memset() provided by glibc", 358 .fn.memset = memset }, 359 360#ifdef HAVE_ARCH_X86_64_SUPPORT 361# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, 362# include "mem-memset-x86-64-asm-def.h" 363# undef MEMSET_FN 364#endif 365 366 { .name = NULL, } 367}; 368 369int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused) 370{ 371 struct bench_mem_info info = { 372 .functions = memset_functions, 373 .do_cycles = do_memset_cycles, 374 .do_gettimeofday = do_memset_gettimeofday, 375 .usage = bench_mem_memset_usage, 376 }; 377 378 return bench_mem_common(argc, argv, &info); 379}