fuse_test.c 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * memfd GUP test-case
  4. * This tests memfd interactions with get_user_pages(). We require the
  5. * fuse_mnt.c program to provide a fake direct-IO FUSE mount-point for us. This
  6. * file-system delays _all_ reads by 1s and forces direct-IO. This means, any
  7. * read() on files in that file-system will pin the receive-buffer pages for at
  8. * least 1s via get_user_pages().
  9. *
  10. * We use this trick to race ADD_SEALS against a write on a memfd object. The
  11. * ADD_SEALS must fail if the memfd pages are still pinned. Note that we use
  12. * the read() syscall with our memory-mapped memfd object as receive buffer to
  13. * force the kernel to write into our memfd object.
  14. */
  15. #define _GNU_SOURCE
  16. #define __EXPORTED_HEADERS__
  17. #include <errno.h>
  18. #include <inttypes.h>
  19. #include <limits.h>
  20. #include <linux/falloc.h>
  21. #include <linux/fcntl.h>
  22. #include <linux/memfd.h>
  23. #include <sched.h>
  24. #include <stdio.h>
  25. #include <stdlib.h>
  26. #include <signal.h>
  27. #include <string.h>
  28. #include <sys/mman.h>
  29. #include <sys/stat.h>
  30. #include <sys/syscall.h>
  31. #include <sys/wait.h>
  32. #include <unistd.h>
  33. #include "common.h"
  34. #define MFD_DEF_SIZE 8192
  35. #define STACK_SIZE 65536
  36. static size_t mfd_def_size = MFD_DEF_SIZE;
  37. static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
  38. {
  39. int r, fd;
  40. fd = sys_memfd_create(name, flags);
  41. if (fd < 0) {
  42. printf("memfd_create(\"%s\", %u) failed: %m\n",
  43. name, flags);
  44. abort();
  45. }
  46. r = ftruncate(fd, sz);
  47. if (r < 0) {
  48. printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
  49. abort();
  50. }
  51. return fd;
  52. }
  53. static __u64 mfd_assert_get_seals(int fd)
  54. {
  55. long r;
  56. r = fcntl(fd, F_GET_SEALS);
  57. if (r < 0) {
  58. printf("GET_SEALS(%d) failed: %m\n", fd);
  59. abort();
  60. }
  61. return r;
  62. }
  63. static void mfd_assert_has_seals(int fd, __u64 seals)
  64. {
  65. __u64 s;
  66. s = mfd_assert_get_seals(fd);
  67. if (s != seals) {
  68. printf("%llu != %llu = GET_SEALS(%d)\n",
  69. (unsigned long long)seals, (unsigned long long)s, fd);
  70. abort();
  71. }
  72. }
  73. static void mfd_assert_add_seals(int fd, __u64 seals)
  74. {
  75. long r;
  76. __u64 s;
  77. s = mfd_assert_get_seals(fd);
  78. r = fcntl(fd, F_ADD_SEALS, seals);
  79. if (r < 0) {
  80. printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
  81. fd, (unsigned long long)s, (unsigned long long)seals);
  82. abort();
  83. }
  84. }
  85. static int mfd_busy_add_seals(int fd, __u64 seals)
  86. {
  87. long r;
  88. __u64 s;
  89. r = fcntl(fd, F_GET_SEALS);
  90. if (r < 0)
  91. s = 0;
  92. else
  93. s = r;
  94. r = fcntl(fd, F_ADD_SEALS, seals);
  95. if (r < 0 && errno != EBUSY) {
  96. printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected with EBUSY: %m\n",
  97. fd, (unsigned long long)s, (unsigned long long)seals);
  98. abort();
  99. }
  100. return r;
  101. }
  102. static void *mfd_assert_mmap_shared(int fd)
  103. {
  104. void *p;
  105. p = mmap(NULL,
  106. mfd_def_size,
  107. PROT_READ | PROT_WRITE,
  108. MAP_SHARED,
  109. fd,
  110. 0);
  111. if (p == MAP_FAILED) {
  112. printf("mmap() failed: %m\n");
  113. abort();
  114. }
  115. return p;
  116. }
  117. static void *mfd_assert_mmap_private(int fd)
  118. {
  119. void *p;
  120. p = mmap(NULL,
  121. mfd_def_size,
  122. PROT_READ | PROT_WRITE,
  123. MAP_PRIVATE,
  124. fd,
  125. 0);
  126. if (p == MAP_FAILED) {
  127. printf("mmap() failed: %m\n");
  128. abort();
  129. }
  130. return p;
  131. }
  132. static int global_mfd = -1;
  133. static void *global_p = NULL;
  134. static int sealing_thread_fn(void *arg)
  135. {
  136. int sig, r;
  137. /*
  138. * This thread first waits 200ms so any pending operation in the parent
  139. * is correctly started. After that, it tries to seal @global_mfd as
  140. * SEAL_WRITE. This _must_ fail as the parent thread has a read() into
  141. * that memory mapped object still ongoing.
  142. * We then wait one more second and try sealing again. This time it
  143. * must succeed as there shouldn't be anyone else pinning the pages.
  144. */
  145. /* wait 200ms for FUSE-request to be active */
  146. usleep(200000);
  147. /* unmount mapping before sealing to avoid i_mmap_writable failures */
  148. munmap(global_p, mfd_def_size);
  149. /* Try sealing the global file; expect EBUSY or success. Current
  150. * kernels will never succeed, but in the future, kernels might
  151. * implement page-replacements or other fancy ways to avoid racing
  152. * writes. */
  153. r = mfd_busy_add_seals(global_mfd, F_SEAL_WRITE);
  154. if (r >= 0) {
  155. printf("HURRAY! This kernel fixed GUP races!\n");
  156. } else {
  157. /* wait 1s more so the FUSE-request is done */
  158. sleep(1);
  159. /* try sealing the global file again */
  160. mfd_assert_add_seals(global_mfd, F_SEAL_WRITE);
  161. }
  162. return 0;
  163. }
  164. static pid_t spawn_sealing_thread(void)
  165. {
  166. uint8_t *stack;
  167. pid_t pid;
  168. stack = malloc(STACK_SIZE);
  169. if (!stack) {
  170. printf("malloc(STACK_SIZE) failed: %m\n");
  171. abort();
  172. }
  173. pid = clone(sealing_thread_fn,
  174. stack + STACK_SIZE,
  175. SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM,
  176. NULL);
  177. if (pid < 0) {
  178. printf("clone() failed: %m\n");
  179. abort();
  180. }
  181. return pid;
  182. }
  183. static void join_sealing_thread(pid_t pid)
  184. {
  185. waitpid(pid, NULL, 0);
  186. }
  187. int main(int argc, char **argv)
  188. {
  189. char *zero;
  190. int fd, mfd, r;
  191. void *p;
  192. int was_sealed;
  193. pid_t pid;
  194. if (argc < 2) {
  195. printf("error: please pass path to file in fuse_mnt mount-point\n");
  196. abort();
  197. }
  198. if (argc >= 3) {
  199. if (!strcmp(argv[2], "hugetlbfs")) {
  200. unsigned long hpage_size = default_huge_page_size();
  201. if (!hpage_size) {
  202. printf("Unable to determine huge page size\n");
  203. abort();
  204. }
  205. hugetlbfs_test = 1;
  206. mfd_def_size = hpage_size * 2;
  207. } else {
  208. printf("Unknown option: %s\n", argv[2]);
  209. abort();
  210. }
  211. }
  212. zero = calloc(sizeof(*zero), mfd_def_size);
  213. /* open FUSE memfd file for GUP testing */
  214. printf("opening: %s\n", argv[1]);
  215. fd = open(argv[1], O_RDONLY | O_CLOEXEC);
  216. if (fd < 0) {
  217. printf("cannot open(\"%s\"): %m\n", argv[1]);
  218. abort();
  219. }
  220. /* create new memfd-object */
  221. mfd = mfd_assert_new("kern_memfd_fuse",
  222. mfd_def_size,
  223. MFD_CLOEXEC | MFD_ALLOW_SEALING);
  224. /* mmap memfd-object for writing */
  225. p = mfd_assert_mmap_shared(mfd);
  226. /* pass mfd+mapping to a separate sealing-thread which tries to seal
  227. * the memfd objects with SEAL_WRITE while we write into it */
  228. global_mfd = mfd;
  229. global_p = p;
  230. pid = spawn_sealing_thread();
  231. /* Use read() on the FUSE file to read into our memory-mapped memfd
  232. * object. This races the other thread which tries to seal the
  233. * memfd-object.
  234. * If @fd is on the memfd-fake-FUSE-FS, the read() is delayed by 1s.
  235. * This guarantees that the receive-buffer is pinned for 1s until the
  236. * data is written into it. The racing ADD_SEALS should thus fail as
  237. * the pages are still pinned. */
  238. r = read(fd, p, mfd_def_size);
  239. if (r < 0) {
  240. printf("read() failed: %m\n");
  241. abort();
  242. } else if (!r) {
  243. printf("unexpected EOF on read()\n");
  244. abort();
  245. }
  246. was_sealed = mfd_assert_get_seals(mfd) & F_SEAL_WRITE;
  247. /* Wait for sealing-thread to finish and verify that it
  248. * successfully sealed the file after the second try. */
  249. join_sealing_thread(pid);
  250. mfd_assert_has_seals(mfd, F_SEAL_WRITE);
  251. /* *IF* the memfd-object was sealed at the time our read() returned,
  252. * then the kernel did a page-replacement or canceled the read() (or
  253. * whatever magic it did..). In that case, the memfd object is still
  254. * all zero.
  255. * In case the memfd-object was *not* sealed, the read() was successfull
  256. * and the memfd object must *not* be all zero.
  257. * Note that in real scenarios, there might be a mixture of both, but
  258. * in this test-cases, we have explicit 200ms delays which should be
  259. * enough to avoid any in-flight writes. */
  260. p = mfd_assert_mmap_private(mfd);
  261. if (was_sealed && memcmp(p, zero, mfd_def_size)) {
  262. printf("memfd sealed during read() but data not discarded\n");
  263. abort();
  264. } else if (!was_sealed && !memcmp(p, zero, mfd_def_size)) {
  265. printf("memfd sealed after read() but data discarded\n");
  266. abort();
  267. }
  268. close(mfd);
  269. close(fd);
  270. printf("fuse: DONE\n");
  271. free(zero);
  272. return 0;
  273. }