mem-functions.c 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * mem-memcpy.c
  4. *
  5. * Simple memcpy() and memset() benchmarks
  6. *
  7. * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  8. */
  9. #include "debug.h"
  10. #include "../perf-sys.h"
  11. #include <subcmd/parse-options.h>
  12. #include "../util/header.h"
  13. #include "../util/cloexec.h"
  14. #include "../util/string2.h"
  15. #include "bench.h"
  16. #include "mem-memcpy-arch.h"
  17. #include "mem-memset-arch.h"
  18. #include <stdio.h>
  19. #include <stdlib.h>
  20. #include <string.h>
  21. #include <unistd.h>
  22. #include <sys/time.h>
  23. #include <errno.h>
  24. #include <linux/time64.h>
  25. #include <linux/zalloc.h>
  26. #define K 1024
  27. static const char *size_str = "1MB";
  28. static const char *function_str = "all";
  29. static int nr_loops = 1;
  30. static bool use_cycles;
  31. static int cycles_fd;
  32. static const struct option options[] = {
  33. OPT_STRING('s', "size", &size_str, "1MB",
  34. "Specify the size of the memory buffers. "
  35. "Available units: B, KB, MB, GB and TB (case insensitive)"),
  36. OPT_STRING('f', "function", &function_str, "all",
  37. "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
  38. OPT_INTEGER('l', "nr_loops", &nr_loops,
  39. "Specify the number of loops to run. (default: 1)"),
  40. OPT_BOOLEAN('c', "cycles", &use_cycles,
  41. "Use a cycles event instead of gettimeofday() to measure performance"),
  42. OPT_END()
  43. };
  44. typedef void *(*memcpy_t)(void *, const void *, size_t);
  45. typedef void *(*memset_t)(void *, int, size_t);
  46. struct function {
  47. const char *name;
  48. const char *desc;
  49. union {
  50. memcpy_t memcpy;
  51. memset_t memset;
  52. } fn;
  53. };
  54. static struct perf_event_attr cycle_attr = {
  55. .type = PERF_TYPE_HARDWARE,
  56. .config = PERF_COUNT_HW_CPU_CYCLES
  57. };
  58. static int init_cycles(void)
  59. {
  60. cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
  61. if (cycles_fd < 0 && errno == ENOSYS) {
  62. pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
  63. return -1;
  64. }
  65. return cycles_fd;
  66. }
  67. static u64 get_cycles(void)
  68. {
  69. int ret;
  70. u64 clk;
  71. ret = read(cycles_fd, &clk, sizeof(u64));
  72. BUG_ON(ret != sizeof(u64));
  73. return clk;
  74. }
  75. static double timeval2double(struct timeval *ts)
  76. {
  77. return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
  78. }
  79. #define print_bps(x) do { \
  80. if (x < K) \
  81. printf(" %14lf bytes/sec\n", x); \
  82. else if (x < K * K) \
  83. printf(" %14lfd KB/sec\n", x / K); \
  84. else if (x < K * K * K) \
  85. printf(" %14lf MB/sec\n", x / K / K); \
  86. else \
  87. printf(" %14lf GB/sec\n", x / K / K / K); \
  88. } while (0)
  89. struct bench_mem_info {
  90. const struct function *functions;
  91. u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
  92. double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
  93. const char *const *usage;
  94. bool alloc_src;
  95. };
  96. static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
  97. {
  98. const struct function *r = &info->functions[r_idx];
  99. double result_bps = 0.0;
  100. u64 result_cycles = 0;
  101. void *src = NULL, *dst = zalloc(size);
  102. printf("# function '%s' (%s)\n", r->name, r->desc);
  103. if (dst == NULL)
  104. goto out_alloc_failed;
  105. if (info->alloc_src) {
  106. src = zalloc(size);
  107. if (src == NULL)
  108. goto out_alloc_failed;
  109. }
  110. if (bench_format == BENCH_FORMAT_DEFAULT)
  111. printf("# Copying %s bytes ...\n\n", size_str);
  112. if (use_cycles) {
  113. result_cycles = info->do_cycles(r, size, src, dst);
  114. } else {
  115. result_bps = info->do_gettimeofday(r, size, src, dst);
  116. }
  117. switch (bench_format) {
  118. case BENCH_FORMAT_DEFAULT:
  119. if (use_cycles) {
  120. printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
  121. } else {
  122. print_bps(result_bps);
  123. }
  124. break;
  125. case BENCH_FORMAT_SIMPLE:
  126. if (use_cycles) {
  127. printf("%lf\n", (double)result_cycles/size_total);
  128. } else {
  129. printf("%lf\n", result_bps);
  130. }
  131. break;
  132. default:
  133. BUG_ON(1);
  134. break;
  135. }
  136. out_free:
  137. free(src);
  138. free(dst);
  139. return;
  140. out_alloc_failed:
  141. printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
  142. goto out_free;
  143. }
  144. static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
  145. {
  146. int i;
  147. size_t size;
  148. double size_total;
  149. argc = parse_options(argc, argv, options, info->usage, 0);
  150. if (use_cycles) {
  151. i = init_cycles();
  152. if (i < 0) {
  153. fprintf(stderr, "Failed to open cycles counter\n");
  154. return i;
  155. }
  156. }
  157. size = (size_t)perf_atoll((char *)size_str);
  158. size_total = (double)size * nr_loops;
  159. if ((s64)size <= 0) {
  160. fprintf(stderr, "Invalid size:%s\n", size_str);
  161. return 1;
  162. }
  163. if (!strncmp(function_str, "all", 3)) {
  164. for (i = 0; info->functions[i].name; i++)
  165. __bench_mem_function(info, i, size, size_total);
  166. return 0;
  167. }
  168. for (i = 0; info->functions[i].name; i++) {
  169. if (!strcmp(info->functions[i].name, function_str))
  170. break;
  171. }
  172. if (!info->functions[i].name) {
  173. if (strcmp(function_str, "help") && strcmp(function_str, "h"))
  174. printf("Unknown function: %s\n", function_str);
  175. printf("Available functions:\n");
  176. for (i = 0; info->functions[i].name; i++) {
  177. printf("\t%s ... %s\n",
  178. info->functions[i].name, info->functions[i].desc);
  179. }
  180. return 1;
  181. }
  182. __bench_mem_function(info, i, size, size_total);
  183. return 0;
  184. }
  185. static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
  186. {
  187. /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
  188. memset(src, 0, size);
  189. /*
  190. * We prefault the freshly allocated memory range here,
  191. * to not measure page fault overhead:
  192. */
  193. fn(dst, src, size);
  194. }
  195. static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
  196. {
  197. u64 cycle_start = 0ULL, cycle_end = 0ULL;
  198. memcpy_t fn = r->fn.memcpy;
  199. int i;
  200. memcpy_prefault(fn, size, src, dst);
  201. cycle_start = get_cycles();
  202. for (i = 0; i < nr_loops; ++i)
  203. fn(dst, src, size);
  204. cycle_end = get_cycles();
  205. return cycle_end - cycle_start;
  206. }
  207. static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
  208. {
  209. struct timeval tv_start, tv_end, tv_diff;
  210. memcpy_t fn = r->fn.memcpy;
  211. int i;
  212. memcpy_prefault(fn, size, src, dst);
  213. BUG_ON(gettimeofday(&tv_start, NULL));
  214. for (i = 0; i < nr_loops; ++i)
  215. fn(dst, src, size);
  216. BUG_ON(gettimeofday(&tv_end, NULL));
  217. timersub(&tv_end, &tv_start, &tv_diff);
  218. return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
  219. }
  220. struct function memcpy_functions[] = {
  221. { .name = "default",
  222. .desc = "Default memcpy() provided by glibc",
  223. .fn.memcpy = memcpy },
  224. #ifdef HAVE_ARCH_X86_64_SUPPORT
  225. # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
  226. # include "mem-memcpy-x86-64-asm-def.h"
  227. # undef MEMCPY_FN
  228. #endif
  229. { .name = NULL, }
  230. };
  231. static const char * const bench_mem_memcpy_usage[] = {
  232. "perf bench mem memcpy <options>",
  233. NULL
  234. };
  235. int bench_mem_memcpy(int argc, const char **argv)
  236. {
  237. struct bench_mem_info info = {
  238. .functions = memcpy_functions,
  239. .do_cycles = do_memcpy_cycles,
  240. .do_gettimeofday = do_memcpy_gettimeofday,
  241. .usage = bench_mem_memcpy_usage,
  242. .alloc_src = true,
  243. };
  244. return bench_mem_common(argc, argv, &info);
  245. }
  246. static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
  247. {
  248. u64 cycle_start = 0ULL, cycle_end = 0ULL;
  249. memset_t fn = r->fn.memset;
  250. int i;
  251. /*
  252. * We prefault the freshly allocated memory range here,
  253. * to not measure page fault overhead:
  254. */
  255. fn(dst, -1, size);
  256. cycle_start = get_cycles();
  257. for (i = 0; i < nr_loops; ++i)
  258. fn(dst, i, size);
  259. cycle_end = get_cycles();
  260. return cycle_end - cycle_start;
  261. }
  262. static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
  263. {
  264. struct timeval tv_start, tv_end, tv_diff;
  265. memset_t fn = r->fn.memset;
  266. int i;
  267. /*
  268. * We prefault the freshly allocated memory range here,
  269. * to not measure page fault overhead:
  270. */
  271. fn(dst, -1, size);
  272. BUG_ON(gettimeofday(&tv_start, NULL));
  273. for (i = 0; i < nr_loops; ++i)
  274. fn(dst, i, size);
  275. BUG_ON(gettimeofday(&tv_end, NULL));
  276. timersub(&tv_end, &tv_start, &tv_diff);
  277. return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
  278. }
  279. static const char * const bench_mem_memset_usage[] = {
  280. "perf bench mem memset <options>",
  281. NULL
  282. };
  283. static const struct function memset_functions[] = {
  284. { .name = "default",
  285. .desc = "Default memset() provided by glibc",
  286. .fn.memset = memset },
  287. #ifdef HAVE_ARCH_X86_64_SUPPORT
  288. # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
  289. # include "mem-memset-x86-64-asm-def.h"
  290. # undef MEMSET_FN
  291. #endif
  292. { .name = NULL, }
  293. };
  294. int bench_mem_memset(int argc, const char **argv)
  295. {
  296. struct bench_mem_info info = {
  297. .functions = memset_functions,
  298. .do_cycles = do_memset_cycles,
  299. .do_gettimeofday = do_memset_gettimeofday,
  300. .usage = bench_mem_memset_usage,
  301. };
  302. return bench_mem_common(argc, argv, &info);
  303. }