xfs_buf_mem.c 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (c) 2023-2024 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <djwong@kernel.org>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_buf.h"
  9. #include "xfs_buf_mem.h"
  10. #include "xfs_trace.h"
  11. #include <linux/shmem_fs.h>
  12. #include "xfs_log_format.h"
  13. #include "xfs_trans.h"
  14. #include "xfs_buf_item.h"
  15. #include "xfs_error.h"
  16. /*
  17. * Buffer Cache for In-Memory Files
  18. * ================================
  19. *
  20. * Online fsck wants to create ephemeral ordered recordsets. The existing
  21. * btree infrastructure can do this, but we need the buffer cache to target
  22. * memory instead of block devices.
  23. *
  24. * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
  25. * requirements. Therefore, the xmbuf mechanism uses an unlinked shmem file to
  26. * store our staging data. This file is not installed in the file descriptor
  27. * table so that user programs cannot access the data, which means that the
  28. * xmbuf must be freed with xmbuf_destroy.
  29. *
  30. * xmbufs assume that the caller will handle all required concurrency
  31. * management; standard vfs locks (freezer and inode) are not taken. Reads
  32. * and writes are satisfied directly from the page cache.
  33. *
  34. * The only supported block size is PAGE_SIZE, and we cannot use highmem.
  35. */
  36. /*
  37. * shmem files used to back an in-memory buffer cache must not be exposed to
  38. * userspace. Upper layers must coordinate access to the one handle returned
  39. * by the constructor, so establish a separate lock class for xmbufs to avoid
  40. * confusing lockdep.
  41. */
  42. static struct lock_class_key xmbuf_i_mutex_key;
  43. /*
  44. * Allocate a buffer cache target for a memory-backed file and set up the
  45. * buffer target.
  46. */
  47. int
  48. xmbuf_alloc(
  49. struct xfs_mount *mp,
  50. const char *descr,
  51. struct xfs_buftarg **btpp)
  52. {
  53. struct file *file;
  54. struct inode *inode;
  55. struct xfs_buftarg *btp;
  56. int error;
  57. btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL);
  58. if (!btp)
  59. return -ENOMEM;
  60. file = shmem_kernel_file_setup(descr, 0, 0);
  61. if (IS_ERR(file)) {
  62. error = PTR_ERR(file);
  63. goto out_free_btp;
  64. }
  65. inode = file_inode(file);
  66. /* private file, private locking */
  67. lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key);
  68. /*
  69. * We don't want to bother with kmapping data during repair, so don't
  70. * allow highmem pages to back this mapping.
  71. */
  72. mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
  73. /* ensure all writes are below EOF to avoid pagecache zeroing */
  74. i_size_write(inode, inode->i_sb->s_maxbytes);
  75. error = xfs_buf_cache_init(btp->bt_cache);
  76. if (error)
  77. goto out_file;
  78. /* Initialize buffer target */
  79. btp->bt_mount = mp;
  80. btp->bt_dev = (dev_t)-1U;
  81. btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */
  82. btp->bt_file = file;
  83. btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE;
  84. btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1;
  85. error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
  86. if (error)
  87. goto out_bcache;
  88. trace_xmbuf_create(btp);
  89. *btpp = btp;
  90. return 0;
  91. out_bcache:
  92. xfs_buf_cache_destroy(btp->bt_cache);
  93. out_file:
  94. fput(file);
  95. out_free_btp:
  96. kfree(btp);
  97. return error;
  98. }
  99. /* Free a buffer cache target for a memory-backed buffer cache. */
  100. void
  101. xmbuf_free(
  102. struct xfs_buftarg *btp)
  103. {
  104. ASSERT(xfs_buftarg_is_mem(btp));
  105. ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
  106. trace_xmbuf_free(btp);
  107. xfs_destroy_buftarg(btp);
  108. xfs_buf_cache_destroy(btp->bt_cache);
  109. fput(btp->bt_file);
  110. kfree(btp);
  111. }
  112. /* Directly map a shmem page into the buffer cache. */
  113. int
  114. xmbuf_map_page(
  115. struct xfs_buf *bp)
  116. {
  117. struct inode *inode = file_inode(bp->b_target->bt_file);
  118. struct folio *folio = NULL;
  119. struct page *page;
  120. loff_t pos = BBTOB(xfs_buf_daddr(bp));
  121. int error;
  122. ASSERT(xfs_buftarg_is_mem(bp->b_target));
  123. if (bp->b_map_count != 1)
  124. return -ENOMEM;
  125. if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE)
  126. return -ENOMEM;
  127. if (offset_in_page(pos) != 0) {
  128. ASSERT(offset_in_page(pos));
  129. return -ENOMEM;
  130. }
  131. error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, SGP_CACHE);
  132. if (error)
  133. return error;
  134. if (filemap_check_wb_err(inode->i_mapping, 0)) {
  135. folio_unlock(folio);
  136. folio_put(folio);
  137. return -EIO;
  138. }
  139. page = folio_file_page(folio, pos >> PAGE_SHIFT);
  140. /*
  141. * Mark the page dirty so that it won't be reclaimed once we drop the
  142. * (potentially last) reference in xmbuf_unmap_page.
  143. */
  144. set_page_dirty(page);
  145. unlock_page(page);
  146. bp->b_addr = page_address(page);
  147. bp->b_pages = bp->b_page_array;
  148. bp->b_pages[0] = page;
  149. bp->b_page_count = 1;
  150. return 0;
  151. }
  152. /* Unmap a shmem page that was mapped into the buffer cache. */
  153. void
  154. xmbuf_unmap_page(
  155. struct xfs_buf *bp)
  156. {
  157. struct page *page = bp->b_pages[0];
  158. ASSERT(xfs_buftarg_is_mem(bp->b_target));
  159. put_page(page);
  160. bp->b_addr = NULL;
  161. bp->b_pages[0] = NULL;
  162. bp->b_pages = NULL;
  163. bp->b_page_count = 0;
  164. }
  165. /* Is this a valid daddr within the buftarg? */
  166. bool
  167. xmbuf_verify_daddr(
  168. struct xfs_buftarg *btp,
  169. xfs_daddr_t daddr)
  170. {
  171. struct inode *inode = file_inode(btp->bt_file);
  172. ASSERT(xfs_buftarg_is_mem(btp));
  173. return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT);
  174. }
  175. /* Discard the page backing this buffer. */
  176. static void
  177. xmbuf_stale(
  178. struct xfs_buf *bp)
  179. {
  180. struct inode *inode = file_inode(bp->b_target->bt_file);
  181. loff_t pos;
  182. ASSERT(xfs_buftarg_is_mem(bp->b_target));
  183. pos = BBTOB(xfs_buf_daddr(bp));
  184. shmem_truncate_range(inode, pos, pos + BBTOB(bp->b_length) - 1);
  185. }
  186. /*
  187. * Finalize a buffer -- discard the backing page if it's stale, or run the
  188. * write verifier to detect problems.
  189. */
  190. int
  191. xmbuf_finalize(
  192. struct xfs_buf *bp)
  193. {
  194. xfs_failaddr_t fa;
  195. int error = 0;
  196. if (bp->b_flags & XBF_STALE) {
  197. xmbuf_stale(bp);
  198. return 0;
  199. }
  200. /*
  201. * Although this btree is ephemeral, validate the buffer structure so
  202. * that we can detect memory corruption errors and software bugs.
  203. */
  204. fa = bp->b_ops->verify_struct(bp);
  205. if (fa) {
  206. error = -EFSCORRUPTED;
  207. xfs_verifier_error(bp, error, fa);
  208. }
  209. return error;
  210. }
  211. /*
  212. * Detach this xmbuf buffer from the transaction by any means necessary.
  213. * All buffers are direct-mapped, so they do not need bwrite.
  214. */
  215. void
  216. xmbuf_trans_bdetach(
  217. struct xfs_trans *tp,
  218. struct xfs_buf *bp)
  219. {
  220. struct xfs_buf_log_item *bli = bp->b_log_item;
  221. ASSERT(bli != NULL);
  222. bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED |
  223. XFS_BLI_LOGGED | XFS_BLI_STALE);
  224. clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags);
  225. while (bp->b_log_item != NULL)
  226. xfs_trans_bdetach(tp, bp);
  227. }