| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614 |
- // SPDX-License-Identifier: GPL-2.0-or-later
- /*
- * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
- * Author: Darrick J. Wong <djwong@kernel.org>
- */
- #include "xfs.h"
- #include "xfs_fs.h"
- #include "xfs_shared.h"
- #include "xfs_format.h"
- #include "xfs_trans_resv.h"
- #include "xfs_mount.h"
- #include "xfs_defer.h"
- #include "xfs_btree.h"
- #include "xfs_log_format.h"
- #include "xfs_trans.h"
- #include "xfs_inode.h"
- #include "xfs_inode_fork.h"
- #include "xfs_alloc.h"
- #include "xfs_bmap.h"
- #include "xfs_rmap.h"
- #include "xfs_refcount.h"
- #include "xfs_quota.h"
- #include "xfs_ialloc.h"
- #include "xfs_ag.h"
- #include "xfs_error.h"
- #include "xfs_errortag.h"
- #include "xfs_icache.h"
- #include "xfs_refcount_btree.h"
- #include "scrub/xfs_scrub.h"
- #include "scrub/scrub.h"
- #include "scrub/common.h"
- #include "scrub/trace.h"
- #include "scrub/repair.h"
- #include "scrub/bitmap.h"
- #include "scrub/off_bitmap.h"
- #include "scrub/fsb_bitmap.h"
- #include "scrub/reap.h"
- /*
- * CoW Fork Mapping Repair
- * =======================
- *
- * Although CoW staging extents are owned by incore CoW inode forks, on disk
- * they are owned by the refcount btree. The ondisk metadata does not record
- * any ownership information, which limits what we can do to repair the
- * mappings in the CoW fork. At most, we can replace ifork mappings that lack
- * an entry in the refcount btree or are described by a reverse mapping record
- * whose owner is not OWN_COW.
- *
- * Replacing extents is also tricky -- we can't touch written CoW fork extents
- * since they are undergoing writeback, and delalloc extents do not require
- * repair since they only exist incore. Hence the most we can do is find the
- * bad parts of unwritten mappings, allocate a replacement set of blocks, and
- * replace the incore mapping. We use the regular reaping process to unmap
- * or free the discarded blocks, as appropriate.
- */
- struct xrep_cow {
- struct xfs_scrub *sc;
- /* Bitmap of file offset ranges that need replacing. */
- struct xoff_bitmap bad_fileoffs;
- /* Bitmap of fsblocks that were removed from the CoW fork. */
- struct xfsb_bitmap old_cowfork_fsblocks;
- /* CoW fork mappings used to scan for bad CoW staging extents. */
- struct xfs_bmbt_irec irec;
- /* refcount btree block number of irec.br_startblock */
- unsigned int irec_startbno;
- /* refcount btree block number of the next refcount record we expect */
- unsigned int next_bno;
- };
- /* CoW staging extent. */
- struct xrep_cow_extent {
- xfs_fsblock_t fsbno;
- xfs_extlen_t len;
- };
- /*
- * Mark the part of the file range that corresponds to the given physical
- * space. Caller must ensure that the physical range is within xc->irec.
- */
- STATIC int
- xrep_cow_mark_file_range(
- struct xrep_cow *xc,
- xfs_fsblock_t startblock,
- xfs_filblks_t blockcount)
- {
- xfs_fileoff_t startoff;
- startoff = xc->irec.br_startoff +
- (startblock - xc->irec.br_startblock);
- trace_xrep_cow_mark_file_range(xc->sc->ip, startblock, startoff,
- blockcount);
- return xoff_bitmap_set(&xc->bad_fileoffs, startoff, blockcount);
- }
- /*
- * Trim @src to fit within the CoW fork mapping being examined, and put the
- * result in @dst.
- */
- static inline void
- xrep_cow_trim_refcount(
- struct xrep_cow *xc,
- struct xfs_refcount_irec *dst,
- const struct xfs_refcount_irec *src)
- {
- unsigned int adj;
- memcpy(dst, src, sizeof(*dst));
- if (dst->rc_startblock < xc->irec_startbno) {
- adj = xc->irec_startbno - dst->rc_startblock;
- dst->rc_blockcount -= adj;
- dst->rc_startblock += adj;
- }
- if (dst->rc_startblock + dst->rc_blockcount >
- xc->irec_startbno + xc->irec.br_blockcount) {
- adj = (dst->rc_startblock + dst->rc_blockcount) -
- (xc->irec_startbno + xc->irec.br_blockcount);
- dst->rc_blockcount -= adj;
- }
- }
- /* Mark any shared CoW staging extents. */
- STATIC int
- xrep_cow_mark_shared_staging(
- struct xfs_btree_cur *cur,
- const struct xfs_refcount_irec *rec,
- void *priv)
- {
- struct xrep_cow *xc = priv;
- struct xfs_refcount_irec rrec;
- xfs_fsblock_t fsbno;
- if (!xfs_refcount_check_domain(rec) ||
- rec->rc_domain != XFS_REFC_DOMAIN_SHARED)
- return -EFSCORRUPTED;
- xrep_cow_trim_refcount(xc, &rrec, rec);
- fsbno = XFS_AGB_TO_FSB(xc->sc->mp, cur->bc_ag.pag->pag_agno,
- rrec.rc_startblock);
- return xrep_cow_mark_file_range(xc, fsbno, rrec.rc_blockcount);
- }
- /*
- * Mark any portion of the CoW fork file offset range where there is not a CoW
- * staging extent record in the refcountbt, and keep a record of where we did
- * find correct refcountbt records. Staging records are always cleaned out at
- * mount time, so any two inodes trying to map the same staging area would have
- * already taken the fs down due to refcount btree verifier errors. Hence this
- * inode should be the sole creator of the staging extent records ondisk.
- */
- STATIC int
- xrep_cow_mark_missing_staging(
- struct xfs_btree_cur *cur,
- const struct xfs_refcount_irec *rec,
- void *priv)
- {
- struct xrep_cow *xc = priv;
- struct xfs_refcount_irec rrec;
- int error;
- if (!xfs_refcount_check_domain(rec) ||
- rec->rc_domain != XFS_REFC_DOMAIN_COW)
- return -EFSCORRUPTED;
- xrep_cow_trim_refcount(xc, &rrec, rec);
- if (xc->next_bno >= rrec.rc_startblock)
- goto next;
- error = xrep_cow_mark_file_range(xc,
- XFS_AGB_TO_FSB(xc->sc->mp, cur->bc_ag.pag->pag_agno,
- xc->next_bno),
- rrec.rc_startblock - xc->next_bno);
- if (error)
- return error;
- next:
- xc->next_bno = rrec.rc_startblock + rrec.rc_blockcount;
- return 0;
- }
- /*
- * Mark any area that does not correspond to a CoW staging rmap. These are
- * cross-linked areas that must be avoided.
- */
- STATIC int
- xrep_cow_mark_missing_staging_rmap(
- struct xfs_btree_cur *cur,
- const struct xfs_rmap_irec *rec,
- void *priv)
- {
- struct xrep_cow *xc = priv;
- xfs_fsblock_t fsbno;
- xfs_agblock_t rec_bno;
- xfs_extlen_t rec_len;
- unsigned int adj;
- if (rec->rm_owner == XFS_RMAP_OWN_COW)
- return 0;
- rec_bno = rec->rm_startblock;
- rec_len = rec->rm_blockcount;
- if (rec_bno < xc->irec_startbno) {
- adj = xc->irec_startbno - rec_bno;
- rec_len -= adj;
- rec_bno += adj;
- }
- if (rec_bno + rec_len > xc->irec_startbno + xc->irec.br_blockcount) {
- adj = (rec_bno + rec_len) -
- (xc->irec_startbno + xc->irec.br_blockcount);
- rec_len -= adj;
- }
- fsbno = XFS_AGB_TO_FSB(xc->sc->mp, cur->bc_ag.pag->pag_agno, rec_bno);
- return xrep_cow_mark_file_range(xc, fsbno, rec_len);
- }
- /*
- * Find any part of the CoW fork mapping that isn't a single-owner CoW staging
- * extent and mark the corresponding part of the file range in the bitmap.
- */
- STATIC int
- xrep_cow_find_bad(
- struct xrep_cow *xc)
- {
- struct xfs_refcount_irec rc_low = { 0 };
- struct xfs_refcount_irec rc_high = { 0 };
- struct xfs_rmap_irec rm_low = { 0 };
- struct xfs_rmap_irec rm_high = { 0 };
- struct xfs_perag *pag;
- struct xfs_scrub *sc = xc->sc;
- xfs_agnumber_t agno;
- int error;
- agno = XFS_FSB_TO_AGNO(sc->mp, xc->irec.br_startblock);
- xc->irec_startbno = XFS_FSB_TO_AGBNO(sc->mp, xc->irec.br_startblock);
- pag = xfs_perag_get(sc->mp, agno);
- if (!pag)
- return -EFSCORRUPTED;
- error = xrep_ag_init(sc, pag, &sc->sa);
- if (error)
- goto out_pag;
- /* Mark any CoW fork extents that are shared. */
- rc_low.rc_startblock = xc->irec_startbno;
- rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
- rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED;
- error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
- xrep_cow_mark_shared_staging, xc);
- if (error)
- goto out_sa;
- /* Make sure there are CoW staging extents for the whole mapping. */
- rc_low.rc_startblock = xc->irec_startbno;
- rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
- rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW;
- xc->next_bno = xc->irec_startbno;
- error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
- xrep_cow_mark_missing_staging, xc);
- if (error)
- goto out_sa;
- if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
- error = xrep_cow_mark_file_range(xc,
- XFS_AGB_TO_FSB(sc->mp, pag->pag_agno,
- xc->next_bno),
- xc->irec_startbno + xc->irec.br_blockcount -
- xc->next_bno);
- if (error)
- goto out_sa;
- }
- /* Mark any area has an rmap that isn't a COW staging extent. */
- rm_low.rm_startblock = xc->irec_startbno;
- memset(&rm_high, 0xFF, sizeof(rm_high));
- rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
- error = xfs_rmap_query_range(sc->sa.rmap_cur, &rm_low, &rm_high,
- xrep_cow_mark_missing_staging_rmap, xc);
- if (error)
- goto out_sa;
- /*
- * If userspace is forcing us to rebuild the CoW fork or someone turned
- * on the debugging knob, replace everything in the CoW fork.
- */
- if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
- XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
- error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
- xc->irec.br_blockcount);
- if (error)
- return error;
- }
- out_sa:
- xchk_ag_free(sc, &sc->sa);
- out_pag:
- xfs_perag_put(pag);
- return 0;
- }
- /*
- * Allocate a replacement CoW staging extent of up to the given number of
- * blocks, and fill out the mapping.
- */
- STATIC int
- xrep_cow_alloc(
- struct xfs_scrub *sc,
- xfs_extlen_t maxlen,
- struct xrep_cow_extent *repl)
- {
- struct xfs_alloc_arg args = {
- .tp = sc->tp,
- .mp = sc->mp,
- .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE,
- .minlen = 1,
- .maxlen = maxlen,
- .prod = 1,
- .resv = XFS_AG_RESV_NONE,
- .datatype = XFS_ALLOC_USERDATA,
- };
- int error;
- error = xfs_trans_reserve_more(sc->tp, maxlen, 0);
- if (error)
- return error;
- error = xfs_alloc_vextent_start_ag(&args,
- XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino));
- if (error)
- return error;
- if (args.fsbno == NULLFSBLOCK)
- return -ENOSPC;
- xfs_refcount_alloc_cow_extent(sc->tp, args.fsbno, args.len);
- repl->fsbno = args.fsbno;
- repl->len = args.len;
- return 0;
- }
- /*
- * Look up the current CoW fork mapping so that we only allocate enough to
- * replace a single mapping. If we don't find a mapping that covers the start
- * of the file range, or we find a delalloc or written extent, something is
- * seriously wrong, since we didn't drop the ILOCK.
- */
- static inline int
- xrep_cow_find_mapping(
- struct xrep_cow *xc,
- struct xfs_iext_cursor *icur,
- xfs_fileoff_t startoff,
- struct xfs_bmbt_irec *got)
- {
- struct xfs_inode *ip = xc->sc->ip;
- struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
- if (!xfs_iext_lookup_extent(ip, ifp, startoff, icur, got))
- goto bad;
- if (got->br_startoff > startoff)
- goto bad;
- if (got->br_blockcount == 0)
- goto bad;
- if (isnullstartblock(got->br_startblock))
- goto bad;
- if (xfs_bmap_is_written_extent(got))
- goto bad;
- return 0;
- bad:
- ASSERT(0);
- return -EFSCORRUPTED;
- }
- #define REPLACE_LEFT_SIDE (1U << 0)
- #define REPLACE_RIGHT_SIDE (1U << 1)
- /*
- * Given a CoW fork mapping @got and a replacement mapping @repl, remap the
- * beginning of @got with the space described by @rep.
- */
- static inline void
- xrep_cow_replace_mapping(
- struct xfs_inode *ip,
- struct xfs_iext_cursor *icur,
- const struct xfs_bmbt_irec *got,
- const struct xrep_cow_extent *repl)
- {
- struct xfs_bmbt_irec new = *got; /* struct copy */
- ASSERT(repl->len > 0);
- ASSERT(!isnullstartblock(got->br_startblock));
- trace_xrep_cow_replace_mapping(ip, got, repl->fsbno, repl->len);
- if (got->br_blockcount == repl->len) {
- /*
- * The new extent is a complete replacement for the existing
- * extent. Update the COW fork record.
- */
- new.br_startblock = repl->fsbno;
- xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
- return;
- }
- /*
- * The new extent can replace the beginning of the COW fork record.
- * Move the left side of @got upwards, then insert the new record.
- */
- new.br_startoff += repl->len;
- new.br_startblock += repl->len;
- new.br_blockcount -= repl->len;
- xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
- new.br_startoff = got->br_startoff;
- new.br_startblock = repl->fsbno;
- new.br_blockcount = repl->len;
- xfs_iext_insert(ip, icur, &new, BMAP_COWFORK);
- }
- /*
- * Replace the unwritten CoW staging extent backing the given file range with a
- * new space extent that isn't as problematic.
- */
- STATIC int
- xrep_cow_replace_range(
- struct xrep_cow *xc,
- xfs_fileoff_t startoff,
- xfs_extlen_t *blockcount)
- {
- struct xfs_iext_cursor icur;
- struct xrep_cow_extent repl;
- struct xfs_bmbt_irec got;
- struct xfs_scrub *sc = xc->sc;
- xfs_fileoff_t nextoff;
- xfs_extlen_t alloc_len;
- int error;
- /*
- * Put the existing CoW fork mapping in @got. If @got ends before
- * @rep, truncate @rep so we only replace one extent mapping at a time.
- */
- error = xrep_cow_find_mapping(xc, &icur, startoff, &got);
- if (error)
- return error;
- nextoff = min(startoff + *blockcount,
- got.br_startoff + got.br_blockcount);
- /*
- * Allocate a replacement extent. If we don't fill all the blocks,
- * shorten the quantity that will be deleted in this step.
- */
- alloc_len = min_t(xfs_fileoff_t, XFS_MAX_BMBT_EXTLEN,
- nextoff - startoff);
- error = xrep_cow_alloc(sc, alloc_len, &repl);
- if (error)
- return error;
- /*
- * Replace the old mapping with the new one, and commit the metadata
- * changes made so far.
- */
- xrep_cow_replace_mapping(sc->ip, &icur, &got, &repl);
- xfs_inode_set_cowblocks_tag(sc->ip);
- error = xfs_defer_finish(&sc->tp);
- if (error)
- return error;
- /* Note the old CoW staging extents; we'll reap them all later. */
- error = xfsb_bitmap_set(&xc->old_cowfork_fsblocks, got.br_startblock,
- repl.len);
- if (error)
- return error;
- *blockcount = repl.len;
- return 0;
- }
- /*
- * Replace a bad part of an unwritten CoW staging extent with a fresh delalloc
- * reservation.
- */
- STATIC int
- xrep_cow_replace(
- uint64_t startoff,
- uint64_t blockcount,
- void *priv)
- {
- struct xrep_cow *xc = priv;
- int error = 0;
- while (blockcount > 0) {
- xfs_extlen_t len = min_t(xfs_filblks_t, blockcount,
- XFS_MAX_BMBT_EXTLEN);
- error = xrep_cow_replace_range(xc, startoff, &len);
- if (error)
- break;
- blockcount -= len;
- startoff += len;
- }
- return error;
- }
- /*
- * Repair an inode's CoW fork. The CoW fork is an in-core structure, so
- * there's no btree to rebuid. Instead, we replace any mappings that are
- * cross-linked or lack ondisk CoW fork records in the refcount btree.
- */
- int
- xrep_bmap_cow(
- struct xfs_scrub *sc)
- {
- struct xrep_cow *xc;
- struct xfs_iext_cursor icur;
- struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, XFS_COW_FORK);
- int error;
- if (!xfs_has_rmapbt(sc->mp) || !xfs_has_reflink(sc->mp))
- return -EOPNOTSUPP;
- if (!ifp)
- return 0;
- /* realtime files aren't supported yet */
- if (XFS_IS_REALTIME_INODE(sc->ip))
- return -EOPNOTSUPP;
- /*
- * If we're somehow not in extents format, then reinitialize it to
- * an empty extent mapping fork and exit.
- */
- if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
- ifp->if_format = XFS_DINODE_FMT_EXTENTS;
- ifp->if_nextents = 0;
- return 0;
- }
- xc = kzalloc(sizeof(struct xrep_cow), XCHK_GFP_FLAGS);
- if (!xc)
- return -ENOMEM;
- xfs_trans_ijoin(sc->tp, sc->ip, 0);
- xc->sc = sc;
- xoff_bitmap_init(&xc->bad_fileoffs);
- xfsb_bitmap_init(&xc->old_cowfork_fsblocks);
- for_each_xfs_iext(ifp, &icur, &xc->irec) {
- if (xchk_should_terminate(sc, &error))
- goto out_bitmap;
- /*
- * delalloc reservations only exist incore, so there is no
- * ondisk metadata that we can examine. Hence we leave them
- * alone.
- */
- if (isnullstartblock(xc->irec.br_startblock))
- continue;
- /*
- * COW fork extents are only in the written state if writeback
- * is actively writing to disk. We cannot restart the write
- * at a different disk address since we've already issued the
- * IO, so we leave these alone and hope for the best.
- */
- if (xfs_bmap_is_written_extent(&xc->irec))
- continue;
- error = xrep_cow_find_bad(xc);
- if (error)
- goto out_bitmap;
- }
- /* Replace any bad unwritten mappings with fresh reservations. */
- error = xoff_bitmap_walk(&xc->bad_fileoffs, xrep_cow_replace, xc);
- if (error)
- goto out_bitmap;
- /*
- * Reap as many of the old CoW blocks as we can. They are owned ondisk
- * by the refcount btree, not the inode, so it is correct to treat them
- * like inode metadata.
- */
- error = xrep_reap_fsblocks(sc, &xc->old_cowfork_fsblocks,
- &XFS_RMAP_OINFO_COW);
- if (error)
- goto out_bitmap;
- out_bitmap:
- xfsb_bitmap_destroy(&xc->old_cowfork_fsblocks);
- xoff_bitmap_destroy(&xc->bad_fileoffs);
- kfree(xc);
- return error;
- }
|