| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270 |
- // SPDX-License-Identifier: GPL-2.0-or-later
- /*
- * Copyright (c) 2023-2024 Oracle. All Rights Reserved.
- * Author: Darrick J. Wong <djwong@kernel.org>
- */
- #include "xfs.h"
- #include "xfs_fs.h"
- #include "xfs_buf.h"
- #include "xfs_buf_mem.h"
- #include "xfs_trace.h"
- #include <linux/shmem_fs.h>
- #include "xfs_log_format.h"
- #include "xfs_trans.h"
- #include "xfs_buf_item.h"
- #include "xfs_error.h"
- /*
- * Buffer Cache for In-Memory Files
- * ================================
- *
- * Online fsck wants to create ephemeral ordered recordsets. The existing
- * btree infrastructure can do this, but we need the buffer cache to target
- * memory instead of block devices.
- *
- * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
- * requirements. Therefore, the xmbuf mechanism uses an unlinked shmem file to
- * store our staging data. This file is not installed in the file descriptor
- * table so that user programs cannot access the data, which means that the
- * xmbuf must be freed with xmbuf_destroy.
- *
- * xmbufs assume that the caller will handle all required concurrency
- * management; standard vfs locks (freezer and inode) are not taken. Reads
- * and writes are satisfied directly from the page cache.
- *
- * The only supported block size is PAGE_SIZE, and we cannot use highmem.
- */
- /*
- * shmem files used to back an in-memory buffer cache must not be exposed to
- * userspace. Upper layers must coordinate access to the one handle returned
- * by the constructor, so establish a separate lock class for xmbufs to avoid
- * confusing lockdep.
- */
- static struct lock_class_key xmbuf_i_mutex_key;
- /*
- * Allocate a buffer cache target for a memory-backed file and set up the
- * buffer target.
- */
- int
- xmbuf_alloc(
- struct xfs_mount *mp,
- const char *descr,
- struct xfs_buftarg **btpp)
- {
- struct file *file;
- struct inode *inode;
- struct xfs_buftarg *btp;
- int error;
- btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL);
- if (!btp)
- return -ENOMEM;
- file = shmem_kernel_file_setup(descr, 0, 0);
- if (IS_ERR(file)) {
- error = PTR_ERR(file);
- goto out_free_btp;
- }
- inode = file_inode(file);
- /* private file, private locking */
- lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key);
- /*
- * We don't want to bother with kmapping data during repair, so don't
- * allow highmem pages to back this mapping.
- */
- mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
- /* ensure all writes are below EOF to avoid pagecache zeroing */
- i_size_write(inode, inode->i_sb->s_maxbytes);
- error = xfs_buf_cache_init(btp->bt_cache);
- if (error)
- goto out_file;
- /* Initialize buffer target */
- btp->bt_mount = mp;
- btp->bt_dev = (dev_t)-1U;
- btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */
- btp->bt_file = file;
- btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE;
- btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1;
- error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
- if (error)
- goto out_bcache;
- trace_xmbuf_create(btp);
- *btpp = btp;
- return 0;
- out_bcache:
- xfs_buf_cache_destroy(btp->bt_cache);
- out_file:
- fput(file);
- out_free_btp:
- kfree(btp);
- return error;
- }
- /* Free a buffer cache target for a memory-backed buffer cache. */
- void
- xmbuf_free(
- struct xfs_buftarg *btp)
- {
- ASSERT(xfs_buftarg_is_mem(btp));
- ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
- trace_xmbuf_free(btp);
- xfs_destroy_buftarg(btp);
- xfs_buf_cache_destroy(btp->bt_cache);
- fput(btp->bt_file);
- kfree(btp);
- }
- /* Directly map a shmem page into the buffer cache. */
- int
- xmbuf_map_page(
- struct xfs_buf *bp)
- {
- struct inode *inode = file_inode(bp->b_target->bt_file);
- struct folio *folio = NULL;
- struct page *page;
- loff_t pos = BBTOB(xfs_buf_daddr(bp));
- int error;
- ASSERT(xfs_buftarg_is_mem(bp->b_target));
- if (bp->b_map_count != 1)
- return -ENOMEM;
- if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE)
- return -ENOMEM;
- if (offset_in_page(pos) != 0) {
- ASSERT(offset_in_page(pos));
- return -ENOMEM;
- }
- error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, SGP_CACHE);
- if (error)
- return error;
- if (filemap_check_wb_err(inode->i_mapping, 0)) {
- folio_unlock(folio);
- folio_put(folio);
- return -EIO;
- }
- page = folio_file_page(folio, pos >> PAGE_SHIFT);
- /*
- * Mark the page dirty so that it won't be reclaimed once we drop the
- * (potentially last) reference in xmbuf_unmap_page.
- */
- set_page_dirty(page);
- unlock_page(page);
- bp->b_addr = page_address(page);
- bp->b_pages = bp->b_page_array;
- bp->b_pages[0] = page;
- bp->b_page_count = 1;
- return 0;
- }
- /* Unmap a shmem page that was mapped into the buffer cache. */
- void
- xmbuf_unmap_page(
- struct xfs_buf *bp)
- {
- struct page *page = bp->b_pages[0];
- ASSERT(xfs_buftarg_is_mem(bp->b_target));
- put_page(page);
- bp->b_addr = NULL;
- bp->b_pages[0] = NULL;
- bp->b_pages = NULL;
- bp->b_page_count = 0;
- }
- /* Is this a valid daddr within the buftarg? */
- bool
- xmbuf_verify_daddr(
- struct xfs_buftarg *btp,
- xfs_daddr_t daddr)
- {
- struct inode *inode = file_inode(btp->bt_file);
- ASSERT(xfs_buftarg_is_mem(btp));
- return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT);
- }
- /* Discard the page backing this buffer. */
- static void
- xmbuf_stale(
- struct xfs_buf *bp)
- {
- struct inode *inode = file_inode(bp->b_target->bt_file);
- loff_t pos;
- ASSERT(xfs_buftarg_is_mem(bp->b_target));
- pos = BBTOB(xfs_buf_daddr(bp));
- shmem_truncate_range(inode, pos, pos + BBTOB(bp->b_length) - 1);
- }
- /*
- * Finalize a buffer -- discard the backing page if it's stale, or run the
- * write verifier to detect problems.
- */
- int
- xmbuf_finalize(
- struct xfs_buf *bp)
- {
- xfs_failaddr_t fa;
- int error = 0;
- if (bp->b_flags & XBF_STALE) {
- xmbuf_stale(bp);
- return 0;
- }
- /*
- * Although this btree is ephemeral, validate the buffer structure so
- * that we can detect memory corruption errors and software bugs.
- */
- fa = bp->b_ops->verify_struct(bp);
- if (fa) {
- error = -EFSCORRUPTED;
- xfs_verifier_error(bp, error, fa);
- }
- return error;
- }
- /*
- * Detach this xmbuf buffer from the transaction by any means necessary.
- * All buffers are direct-mapped, so they do not need bwrite.
- */
- void
- xmbuf_trans_bdetach(
- struct xfs_trans *tp,
- struct xfs_buf *bp)
- {
- struct xfs_buf_log_item *bli = bp->b_log_item;
- ASSERT(bli != NULL);
- bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED |
- XFS_BLI_LOGGED | XFS_BLI_STALE);
- clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags);
- while (bp->b_log_item != NULL)
- xfs_trans_bdetach(tp, bp);
- }
|