epoll-ctl.c 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2018 Davidlohr Bueso.
  4. *
  5. * Benchmark the various operations allowed for epoll_ctl(2).
  6. * The idea is to concurrently stress a single epoll instance
  7. */
  8. #ifdef HAVE_EVENTFD_SUPPORT
  9. /* For the CLR_() macros */
  10. #include <string.h>
  11. #include <pthread.h>
  12. #include <errno.h>
  13. #include <inttypes.h>
  14. #include <signal.h>
  15. #include <stdlib.h>
  16. #include <unistd.h>
  17. #include <linux/compiler.h>
  18. #include <linux/kernel.h>
  19. #include <sys/time.h>
  20. #include <sys/resource.h>
  21. #include <sys/epoll.h>
  22. #include <sys/eventfd.h>
  23. #include <perf/cpumap.h>
  24. #include "../util/mutex.h"
  25. #include "../util/stat.h"
  26. #include <subcmd/parse-options.h>
  27. #include "bench.h"
  28. #include <err.h>
  29. #define printinfo(fmt, arg...) \
  30. do { if (__verbose) printf(fmt, ## arg); } while (0)
  31. static unsigned int nthreads = 0;
  32. static unsigned int nsecs = 8;
  33. static bool done, __verbose, randomize;
  34. /*
  35. * epoll related shared variables.
  36. */
  37. /* Maximum number of nesting allowed inside epoll sets */
  38. #define EPOLL_MAXNESTS 4
  39. enum {
  40. OP_EPOLL_ADD,
  41. OP_EPOLL_MOD,
  42. OP_EPOLL_DEL,
  43. EPOLL_NR_OPS,
  44. };
  45. static int epollfd;
  46. static int *epollfdp;
  47. static bool noaffinity;
  48. static unsigned int nested = 0;
  49. /* amount of fds to monitor, per thread */
  50. static unsigned int nfds = 64;
  51. static struct mutex thread_lock;
  52. static unsigned int threads_starting;
  53. static struct stats all_stats[EPOLL_NR_OPS];
  54. static struct cond thread_parent, thread_worker;
  55. struct worker {
  56. int tid;
  57. pthread_t thread;
  58. unsigned long ops[EPOLL_NR_OPS];
  59. int *fdmap;
  60. };
  61. static const struct option options[] = {
  62. OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
  63. OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
  64. OPT_UINTEGER('f', "nfds", &nfds, "Specify amount of file descriptors to monitor for each thread"),
  65. OPT_BOOLEAN( 'n', "noaffinity", &noaffinity, "Disables CPU affinity"),
  66. OPT_UINTEGER( 'N', "nested", &nested, "Nesting level epoll hierarchy (default is 0, no nesting)"),
  67. OPT_BOOLEAN( 'R', "randomize", &randomize, "Perform random operations on random fds"),
  68. OPT_BOOLEAN( 'v', "verbose", &__verbose, "Verbose mode"),
  69. OPT_END()
  70. };
  71. static const char * const bench_epoll_ctl_usage[] = {
  72. "perf bench epoll ctl <options>",
  73. NULL
  74. };
  75. static void toggle_done(int sig __maybe_unused,
  76. siginfo_t *info __maybe_unused,
  77. void *uc __maybe_unused)
  78. {
  79. /* inform all threads that we're done for the day */
  80. done = true;
  81. gettimeofday(&bench__end, NULL);
  82. timersub(&bench__end, &bench__start, &bench__runtime);
  83. }
  84. static void nest_epollfd(void)
  85. {
  86. unsigned int i;
  87. struct epoll_event ev;
  88. if (nested > EPOLL_MAXNESTS)
  89. nested = EPOLL_MAXNESTS;
  90. printinfo("Nesting level(s): %d\n", nested);
  91. epollfdp = calloc(nested, sizeof(int));
  92. if (!epollfdp)
  93. err(EXIT_FAILURE, "calloc");
  94. for (i = 0; i < nested; i++) {
  95. epollfdp[i] = epoll_create(1);
  96. if (epollfd < 0)
  97. err(EXIT_FAILURE, "epoll_create");
  98. }
  99. ev.events = EPOLLHUP; /* anything */
  100. ev.data.u64 = i; /* any number */
  101. for (i = nested - 1; i; i--) {
  102. if (epoll_ctl(epollfdp[i - 1], EPOLL_CTL_ADD,
  103. epollfdp[i], &ev) < 0)
  104. err(EXIT_FAILURE, "epoll_ctl");
  105. }
  106. if (epoll_ctl(epollfd, EPOLL_CTL_ADD, *epollfdp, &ev) < 0)
  107. err(EXIT_FAILURE, "epoll_ctl");
  108. }
  109. static inline void do_epoll_op(struct worker *w, int op, int fd)
  110. {
  111. int error;
  112. struct epoll_event ev;
  113. ev.events = EPOLLIN;
  114. ev.data.u64 = fd;
  115. switch (op) {
  116. case OP_EPOLL_ADD:
  117. error = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev);
  118. break;
  119. case OP_EPOLL_MOD:
  120. ev.events = EPOLLOUT;
  121. error = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &ev);
  122. break;
  123. case OP_EPOLL_DEL:
  124. error = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL);
  125. break;
  126. default:
  127. error = 1;
  128. break;
  129. }
  130. if (!error)
  131. w->ops[op]++;
  132. }
  133. static inline void do_random_epoll_op(struct worker *w)
  134. {
  135. unsigned long rnd1 = random(), rnd2 = random();
  136. int op, fd;
  137. fd = w->fdmap[rnd1 % nfds];
  138. op = rnd2 % EPOLL_NR_OPS;
  139. do_epoll_op(w, op, fd);
  140. }
  141. static void *workerfn(void *arg)
  142. {
  143. unsigned int i;
  144. struct worker *w = (struct worker *) arg;
  145. struct timespec ts = { .tv_sec = 0,
  146. .tv_nsec = 250 };
  147. mutex_lock(&thread_lock);
  148. threads_starting--;
  149. if (!threads_starting)
  150. cond_signal(&thread_parent);
  151. cond_wait(&thread_worker, &thread_lock);
  152. mutex_unlock(&thread_lock);
  153. /* Let 'em loose */
  154. do {
  155. /* random */
  156. if (randomize) {
  157. do_random_epoll_op(w);
  158. } else {
  159. for (i = 0; i < nfds; i++) {
  160. do_epoll_op(w, OP_EPOLL_ADD, w->fdmap[i]);
  161. do_epoll_op(w, OP_EPOLL_MOD, w->fdmap[i]);
  162. do_epoll_op(w, OP_EPOLL_DEL, w->fdmap[i]);
  163. }
  164. }
  165. nanosleep(&ts, NULL);
  166. } while (!done);
  167. return NULL;
  168. }
  169. static void init_fdmaps(struct worker *w, int pct)
  170. {
  171. unsigned int i;
  172. int inc;
  173. struct epoll_event ev;
  174. if (!pct)
  175. return;
  176. inc = 100/pct;
  177. for (i = 0; i < nfds; i+=inc) {
  178. ev.data.fd = w->fdmap[i];
  179. ev.events = EPOLLIN;
  180. if (epoll_ctl(epollfd, EPOLL_CTL_ADD, w->fdmap[i], &ev) < 0)
  181. err(EXIT_FAILURE, "epoll_ct");
  182. }
  183. }
  184. static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
  185. {
  186. pthread_attr_t thread_attr, *attrp = NULL;
  187. cpu_set_t *cpuset;
  188. unsigned int i, j;
  189. int ret = 0;
  190. int nrcpus;
  191. size_t size;
  192. if (!noaffinity)
  193. pthread_attr_init(&thread_attr);
  194. nrcpus = cpu__max_cpu().cpu;
  195. cpuset = CPU_ALLOC(nrcpus);
  196. BUG_ON(!cpuset);
  197. size = CPU_ALLOC_SIZE(nrcpus);
  198. for (i = 0; i < nthreads; i++) {
  199. struct worker *w = &worker[i];
  200. w->tid = i;
  201. w->fdmap = calloc(nfds, sizeof(int));
  202. if (!w->fdmap)
  203. return 1;
  204. for (j = 0; j < nfds; j++) {
  205. w->fdmap[j] = eventfd(0, EFD_NONBLOCK);
  206. if (w->fdmap[j] < 0)
  207. err(EXIT_FAILURE, "eventfd");
  208. }
  209. /*
  210. * Lets add 50% of the fdmap to the epoll instance, and
  211. * do it before any threads are started; otherwise there is
  212. * an initial bias of the call failing (mod and del ops).
  213. */
  214. if (randomize)
  215. init_fdmaps(w, 50);
  216. if (!noaffinity) {
  217. CPU_ZERO_S(size, cpuset);
  218. CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu,
  219. size, cpuset);
  220. ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
  221. if (ret) {
  222. CPU_FREE(cpuset);
  223. err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
  224. }
  225. attrp = &thread_attr;
  226. }
  227. ret = pthread_create(&w->thread, attrp, workerfn,
  228. (void *)(struct worker *) w);
  229. if (ret) {
  230. CPU_FREE(cpuset);
  231. err(EXIT_FAILURE, "pthread_create");
  232. }
  233. }
  234. CPU_FREE(cpuset);
  235. if (!noaffinity)
  236. pthread_attr_destroy(&thread_attr);
  237. return ret;
  238. }
  239. static void print_summary(void)
  240. {
  241. int i;
  242. unsigned long avg[EPOLL_NR_OPS];
  243. double stddev[EPOLL_NR_OPS];
  244. for (i = 0; i < EPOLL_NR_OPS; i++) {
  245. avg[i] = avg_stats(&all_stats[i]);
  246. stddev[i] = stddev_stats(&all_stats[i]);
  247. }
  248. printf("\nAveraged %ld ADD operations (+- %.2f%%)\n",
  249. avg[OP_EPOLL_ADD], rel_stddev_stats(stddev[OP_EPOLL_ADD],
  250. avg[OP_EPOLL_ADD]));
  251. printf("Averaged %ld MOD operations (+- %.2f%%)\n",
  252. avg[OP_EPOLL_MOD], rel_stddev_stats(stddev[OP_EPOLL_MOD],
  253. avg[OP_EPOLL_MOD]));
  254. printf("Averaged %ld DEL operations (+- %.2f%%)\n",
  255. avg[OP_EPOLL_DEL], rel_stddev_stats(stddev[OP_EPOLL_DEL],
  256. avg[OP_EPOLL_DEL]));
  257. }
  258. int bench_epoll_ctl(int argc, const char **argv)
  259. {
  260. int j, ret = 0;
  261. struct sigaction act;
  262. struct worker *worker = NULL;
  263. struct perf_cpu_map *cpu;
  264. struct rlimit rl, prevrl;
  265. unsigned int i;
  266. argc = parse_options(argc, argv, options, bench_epoll_ctl_usage, 0);
  267. if (argc) {
  268. usage_with_options(bench_epoll_ctl_usage, options);
  269. exit(EXIT_FAILURE);
  270. }
  271. memset(&act, 0, sizeof(act));
  272. sigfillset(&act.sa_mask);
  273. act.sa_sigaction = toggle_done;
  274. sigaction(SIGINT, &act, NULL);
  275. cpu = perf_cpu_map__new_online_cpus();
  276. if (!cpu)
  277. goto errmem;
  278. /* a single, main epoll instance */
  279. epollfd = epoll_create(1);
  280. if (epollfd < 0)
  281. err(EXIT_FAILURE, "epoll_create");
  282. /*
  283. * Deal with nested epolls, if any.
  284. */
  285. if (nested)
  286. nest_epollfd();
  287. /* default to the number of CPUs */
  288. if (!nthreads)
  289. nthreads = perf_cpu_map__nr(cpu);
  290. worker = calloc(nthreads, sizeof(*worker));
  291. if (!worker)
  292. goto errmem;
  293. if (getrlimit(RLIMIT_NOFILE, &prevrl))
  294. err(EXIT_FAILURE, "getrlimit");
  295. rl.rlim_cur = rl.rlim_max = nfds * nthreads * 2 + 50;
  296. printinfo("Setting RLIMIT_NOFILE rlimit from %" PRIu64 " to: %" PRIu64 "\n",
  297. (uint64_t)prevrl.rlim_max, (uint64_t)rl.rlim_max);
  298. if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
  299. err(EXIT_FAILURE, "setrlimit");
  300. printf("Run summary [PID %d]: %d threads doing epoll_ctl ops "
  301. "%d file-descriptors for %d secs.\n\n",
  302. getpid(), nthreads, nfds, nsecs);
  303. for (i = 0; i < EPOLL_NR_OPS; i++)
  304. init_stats(&all_stats[i]);
  305. mutex_init(&thread_lock);
  306. cond_init(&thread_parent);
  307. cond_init(&thread_worker);
  308. threads_starting = nthreads;
  309. gettimeofday(&bench__start, NULL);
  310. do_threads(worker, cpu);
  311. mutex_lock(&thread_lock);
  312. while (threads_starting)
  313. cond_wait(&thread_parent, &thread_lock);
  314. cond_broadcast(&thread_worker);
  315. mutex_unlock(&thread_lock);
  316. sleep(nsecs);
  317. toggle_done(0, NULL, NULL);
  318. printinfo("main thread: toggling done\n");
  319. for (i = 0; i < nthreads; i++) {
  320. ret = pthread_join(worker[i].thread, NULL);
  321. if (ret)
  322. err(EXIT_FAILURE, "pthread_join");
  323. }
  324. /* cleanup & report results */
  325. cond_destroy(&thread_parent);
  326. cond_destroy(&thread_worker);
  327. mutex_destroy(&thread_lock);
  328. for (i = 0; i < nthreads; i++) {
  329. unsigned long t[EPOLL_NR_OPS];
  330. for (j = 0; j < EPOLL_NR_OPS; j++) {
  331. t[j] = worker[i].ops[j];
  332. update_stats(&all_stats[j], t[j]);
  333. }
  334. if (nfds == 1)
  335. printf("[thread %2d] fdmap: %p [ add: %04ld; mod: %04ld; del: %04lds ops ]\n",
  336. worker[i].tid, &worker[i].fdmap[0],
  337. t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]);
  338. else
  339. printf("[thread %2d] fdmap: %p ... %p [ add: %04ld ops; mod: %04ld ops; del: %04ld ops ]\n",
  340. worker[i].tid, &worker[i].fdmap[0],
  341. &worker[i].fdmap[nfds-1],
  342. t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]);
  343. }
  344. print_summary();
  345. close(epollfd);
  346. perf_cpu_map__put(cpu);
  347. for (i = 0; i < nthreads; i++)
  348. free(worker[i].fdmap);
  349. free(worker);
  350. return ret;
  351. errmem:
  352. err(EXIT_FAILURE, "calloc");
  353. }
  354. #endif // HAVE_EVENTFD_SUPPORT