xfile.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <djwong@kernel.org>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_log_format.h"
  11. #include "xfs_trans_resv.h"
  12. #include "xfs_mount.h"
  13. #include "scrub/scrub.h"
  14. #include "scrub/xfile.h"
  15. #include "scrub/xfarray.h"
  16. #include "scrub/trace.h"
  17. #include <linux/shmem_fs.h>
  18. /*
  19. * Swappable Temporary Memory
  20. * ==========================
  21. *
  22. * Online checking sometimes needs to be able to stage a large amount of data
  23. * in memory. This information might not fit in the available memory and it
  24. * doesn't all need to be accessible at all times. In other words, we want an
  25. * indexed data buffer to store data that can be paged out.
  26. *
  27. * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
  28. * requirements. Therefore, the xfile mechanism uses an unlinked shmem file to
  29. * store our staging data. This file is not installed in the file descriptor
  30. * table so that user programs cannot access the data, which means that the
  31. * xfile must be freed with xfile_destroy.
  32. *
  33. * xfiles assume that the caller will handle all required concurrency
  34. * management; standard vfs locks (freezer and inode) are not taken. Reads
  35. * and writes are satisfied directly from the page cache.
  36. */
  37. /*
  38. * xfiles must not be exposed to userspace and require upper layers to
  39. * coordinate access to the one handle returned by the constructor, so
  40. * establish a separate lock class for xfiles to avoid confusing lockdep.
  41. */
  42. static struct lock_class_key xfile_i_mutex_key;
  43. /*
  44. * Create an xfile of the given size. The description will be used in the
  45. * trace output.
  46. */
  47. int
  48. xfile_create(
  49. const char *description,
  50. loff_t isize,
  51. struct xfile **xfilep)
  52. {
  53. struct inode *inode;
  54. struct xfile *xf;
  55. int error;
  56. xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS);
  57. if (!xf)
  58. return -ENOMEM;
  59. xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE);
  60. if (IS_ERR(xf->file)) {
  61. error = PTR_ERR(xf->file);
  62. goto out_xfile;
  63. }
  64. inode = file_inode(xf->file);
  65. lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key);
  66. /*
  67. * We don't want to bother with kmapping data during repair, so don't
  68. * allow highmem pages to back this mapping.
  69. */
  70. mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
  71. trace_xfile_create(xf);
  72. *xfilep = xf;
  73. return 0;
  74. out_xfile:
  75. kfree(xf);
  76. return error;
  77. }
  78. /* Close the file and release all resources. */
  79. void
  80. xfile_destroy(
  81. struct xfile *xf)
  82. {
  83. struct inode *inode = file_inode(xf->file);
  84. trace_xfile_destroy(xf);
  85. lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key);
  86. fput(xf->file);
  87. kfree(xf);
  88. }
  89. /*
  90. * Load an object. Since we're treating this file as "memory", any error or
  91. * short IO is treated as a failure to allocate memory.
  92. */
  93. int
  94. xfile_load(
  95. struct xfile *xf,
  96. void *buf,
  97. size_t count,
  98. loff_t pos)
  99. {
  100. struct inode *inode = file_inode(xf->file);
  101. unsigned int pflags;
  102. if (count > MAX_RW_COUNT)
  103. return -ENOMEM;
  104. if (inode->i_sb->s_maxbytes - pos < count)
  105. return -ENOMEM;
  106. trace_xfile_load(xf, pos, count);
  107. pflags = memalloc_nofs_save();
  108. while (count > 0) {
  109. struct folio *folio;
  110. unsigned int len;
  111. unsigned int offset;
  112. if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
  113. SGP_READ) < 0)
  114. break;
  115. if (!folio) {
  116. /*
  117. * No data stored at this offset, just zero the output
  118. * buffer until the next page boundary.
  119. */
  120. len = min_t(ssize_t, count,
  121. PAGE_SIZE - offset_in_page(pos));
  122. memset(buf, 0, len);
  123. } else {
  124. if (filemap_check_wb_err(inode->i_mapping, 0)) {
  125. folio_unlock(folio);
  126. folio_put(folio);
  127. break;
  128. }
  129. offset = offset_in_folio(folio, pos);
  130. len = min_t(ssize_t, count, folio_size(folio) - offset);
  131. memcpy(buf, folio_address(folio) + offset, len);
  132. folio_unlock(folio);
  133. folio_put(folio);
  134. }
  135. count -= len;
  136. pos += len;
  137. buf += len;
  138. }
  139. memalloc_nofs_restore(pflags);
  140. if (count)
  141. return -ENOMEM;
  142. return 0;
  143. }
  144. /*
  145. * Store an object. Since we're treating this file as "memory", any error or
  146. * short IO is treated as a failure to allocate memory.
  147. */
  148. int
  149. xfile_store(
  150. struct xfile *xf,
  151. const void *buf,
  152. size_t count,
  153. loff_t pos)
  154. {
  155. struct inode *inode = file_inode(xf->file);
  156. unsigned int pflags;
  157. if (count > MAX_RW_COUNT)
  158. return -ENOMEM;
  159. if (inode->i_sb->s_maxbytes - pos < count)
  160. return -ENOMEM;
  161. trace_xfile_store(xf, pos, count);
  162. /*
  163. * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
  164. * actually allocates a folio instead of erroring out.
  165. */
  166. if (pos + count > i_size_read(inode))
  167. i_size_write(inode, pos + count);
  168. pflags = memalloc_nofs_save();
  169. while (count > 0) {
  170. struct folio *folio;
  171. unsigned int len;
  172. unsigned int offset;
  173. if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
  174. SGP_CACHE) < 0)
  175. break;
  176. if (filemap_check_wb_err(inode->i_mapping, 0)) {
  177. folio_unlock(folio);
  178. folio_put(folio);
  179. break;
  180. }
  181. offset = offset_in_folio(folio, pos);
  182. len = min_t(ssize_t, count, folio_size(folio) - offset);
  183. memcpy(folio_address(folio) + offset, buf, len);
  184. folio_mark_dirty(folio);
  185. folio_unlock(folio);
  186. folio_put(folio);
  187. count -= len;
  188. pos += len;
  189. buf += len;
  190. }
  191. memalloc_nofs_restore(pflags);
  192. if (count)
  193. return -ENOMEM;
  194. return 0;
  195. }
  196. /* Find the next written area in the xfile data for a given offset. */
  197. loff_t
  198. xfile_seek_data(
  199. struct xfile *xf,
  200. loff_t pos)
  201. {
  202. loff_t ret;
  203. ret = vfs_llseek(xf->file, pos, SEEK_DATA);
  204. trace_xfile_seek_data(xf, pos, ret);
  205. return ret;
  206. }
  207. /*
  208. * Grab the (locked) folio for a memory object. The object cannot span a folio
  209. * boundary. Returns the locked folio if successful, NULL if there was no
  210. * folio or it didn't cover the range requested, or an ERR_PTR on failure.
  211. */
  212. struct folio *
  213. xfile_get_folio(
  214. struct xfile *xf,
  215. loff_t pos,
  216. size_t len,
  217. unsigned int flags)
  218. {
  219. struct inode *inode = file_inode(xf->file);
  220. struct folio *folio = NULL;
  221. unsigned int pflags;
  222. int error;
  223. if (inode->i_sb->s_maxbytes - pos < len)
  224. return ERR_PTR(-ENOMEM);
  225. trace_xfile_get_folio(xf, pos, len);
  226. /*
  227. * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
  228. * actually allocates a folio instead of erroring out.
  229. */
  230. if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode))
  231. i_size_write(inode, pos + len);
  232. pflags = memalloc_nofs_save();
  233. error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
  234. (flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ);
  235. memalloc_nofs_restore(pflags);
  236. if (error)
  237. return ERR_PTR(error);
  238. if (!folio)
  239. return NULL;
  240. if (len > folio_size(folio) - offset_in_folio(folio, pos)) {
  241. folio_unlock(folio);
  242. folio_put(folio);
  243. return NULL;
  244. }
  245. if (filemap_check_wb_err(inode->i_mapping, 0)) {
  246. folio_unlock(folio);
  247. folio_put(folio);
  248. return ERR_PTR(-EIO);
  249. }
  250. /*
  251. * Mark the folio dirty so that it won't be reclaimed once we drop the
  252. * (potentially last) reference in xfile_put_folio.
  253. */
  254. if (flags & XFILE_ALLOC)
  255. folio_mark_dirty(folio);
  256. return folio;
  257. }
  258. /*
  259. * Release the (locked) folio for a memory object.
  260. */
  261. void
  262. xfile_put_folio(
  263. struct xfile *xf,
  264. struct folio *folio)
  265. {
  266. trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio));
  267. folio_unlock(folio);
  268. folio_put(folio);
  269. }
  270. /* Discard the page cache that's backing a range of the xfile. */
  271. void
  272. xfile_discard(
  273. struct xfile *xf,
  274. loff_t pos,
  275. u64 count)
  276. {
  277. trace_xfile_discard(xf, pos, count);
  278. shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1);
  279. }