| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568 |
- // SPDX-License-Identifier: GPL-2.0-or-later
- /*
- * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
- * Author: Darrick J. Wong <djwong@kernel.org>
- */
- #include "xfs.h"
- #include "xfs_fs.h"
- #include "xfs_shared.h"
- #include "xfs_format.h"
- #include "xfs_trans_resv.h"
- #include "xfs_mount.h"
- #include "xfs_btree.h"
- #include "xfs_btree_staging.h"
- #include "xfs_log_format.h"
- #include "xfs_trans.h"
- #include "xfs_sb.h"
- #include "xfs_inode.h"
- #include "xfs_alloc.h"
- #include "xfs_rmap.h"
- #include "xfs_ag.h"
- #include "xfs_defer.h"
- #include "scrub/scrub.h"
- #include "scrub/common.h"
- #include "scrub/trace.h"
- #include "scrub/repair.h"
- #include "scrub/newbt.h"
- /*
- * Estimate proper slack values for a btree that's being reloaded.
- *
- * Under most circumstances, we'll take whatever default loading value the
- * btree bulk loading code calculates for us. However, there are some
- * exceptions to this rule:
- *
- * (0) If someone turned one of the debug knobs.
- * (1) If this is a per-AG btree and the AG has less than 10% space free.
- * (2) If this is an inode btree and the FS has less than 10% space free.
- * In either case, format the new btree blocks almost completely full to
- * minimize space usage.
- */
- static void
- xrep_newbt_estimate_slack(
- struct xrep_newbt *xnr)
- {
- struct xfs_scrub *sc = xnr->sc;
- struct xfs_btree_bload *bload = &xnr->bload;
- uint64_t free;
- uint64_t sz;
- /*
- * The xfs_globals values are set to -1 (i.e. take the bload defaults)
- * unless someone has set them otherwise, so we just pull the values
- * here.
- */
- bload->leaf_slack = xfs_globals.bload_leaf_slack;
- bload->node_slack = xfs_globals.bload_node_slack;
- if (sc->ops->type == ST_PERAG) {
- free = sc->sa.pag->pagf_freeblks;
- sz = xfs_ag_block_count(sc->mp, sc->sa.pag->pag_agno);
- } else {
- free = percpu_counter_sum(&sc->mp->m_fdblocks);
- sz = sc->mp->m_sb.sb_dblocks;
- }
- /* No further changes if there's more than 10% free space left. */
- if (free >= div_u64(sz, 10))
- return;
- /*
- * We're low on space; load the btrees as tightly as possible. Leave
- * a couple of open slots in each btree block so that we don't end up
- * splitting the btrees like crazy after a mount.
- */
- if (bload->leaf_slack < 0)
- bload->leaf_slack = 2;
- if (bload->node_slack < 0)
- bload->node_slack = 2;
- }
- /* Initialize accounting resources for staging a new AG btree. */
- void
- xrep_newbt_init_ag(
- struct xrep_newbt *xnr,
- struct xfs_scrub *sc,
- const struct xfs_owner_info *oinfo,
- xfs_fsblock_t alloc_hint,
- enum xfs_ag_resv_type resv)
- {
- memset(xnr, 0, sizeof(struct xrep_newbt));
- xnr->sc = sc;
- xnr->oinfo = *oinfo; /* structure copy */
- xnr->alloc_hint = alloc_hint;
- xnr->resv = resv;
- INIT_LIST_HEAD(&xnr->resv_list);
- xnr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
- xrep_newbt_estimate_slack(xnr);
- }
- /* Initialize accounting resources for staging a new inode fork btree. */
- int
- xrep_newbt_init_inode(
- struct xrep_newbt *xnr,
- struct xfs_scrub *sc,
- int whichfork,
- const struct xfs_owner_info *oinfo)
- {
- struct xfs_ifork *ifp;
- ifp = kmem_cache_zalloc(xfs_ifork_cache, XCHK_GFP_FLAGS);
- if (!ifp)
- return -ENOMEM;
- xrep_newbt_init_ag(xnr, sc, oinfo,
- XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
- XFS_AG_RESV_NONE);
- xnr->ifake.if_fork = ifp;
- xnr->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, whichfork);
- return 0;
- }
- /*
- * Initialize accounting resources for staging a new btree. Callers are
- * expected to add their own reservations (and clean them up) manually.
- */
- void
- xrep_newbt_init_bare(
- struct xrep_newbt *xnr,
- struct xfs_scrub *sc)
- {
- xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
- XFS_AG_RESV_NONE);
- }
- /*
- * Designate specific blocks to be used to build our new btree. @pag must be
- * a passive reference.
- */
- STATIC int
- xrep_newbt_add_blocks(
- struct xrep_newbt *xnr,
- struct xfs_perag *pag,
- const struct xfs_alloc_arg *args)
- {
- struct xfs_mount *mp = xnr->sc->mp;
- struct xrep_newbt_resv *resv;
- int error;
- resv = kmalloc(sizeof(struct xrep_newbt_resv), XCHK_GFP_FLAGS);
- if (!resv)
- return -ENOMEM;
- INIT_LIST_HEAD(&resv->list);
- resv->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
- resv->len = args->len;
- resv->used = 0;
- resv->pag = xfs_perag_hold(pag);
- if (args->tp) {
- ASSERT(xnr->oinfo.oi_offset == 0);
- error = xfs_alloc_schedule_autoreap(args,
- XFS_FREE_EXTENT_SKIP_DISCARD, &resv->autoreap);
- if (error)
- goto out_pag;
- }
- list_add_tail(&resv->list, &xnr->resv_list);
- return 0;
- out_pag:
- xfs_perag_put(resv->pag);
- kfree(resv);
- return error;
- }
- /*
- * Add an extent to the new btree reservation pool. Callers are required to
- * reap this reservation manually if the repair is cancelled. @pag must be a
- * passive reference.
- */
- int
- xrep_newbt_add_extent(
- struct xrep_newbt *xnr,
- struct xfs_perag *pag,
- xfs_agblock_t agbno,
- xfs_extlen_t len)
- {
- struct xfs_mount *mp = xnr->sc->mp;
- struct xfs_alloc_arg args = {
- .tp = NULL, /* no autoreap */
- .oinfo = xnr->oinfo,
- .fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno),
- .len = len,
- .resv = xnr->resv,
- };
- return xrep_newbt_add_blocks(xnr, pag, &args);
- }
- /* Don't let our allocation hint take us beyond this AG */
- static inline void
- xrep_newbt_validate_ag_alloc_hint(
- struct xrep_newbt *xnr)
- {
- struct xfs_scrub *sc = xnr->sc;
- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(sc->mp, xnr->alloc_hint);
- if (agno == sc->sa.pag->pag_agno &&
- xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
- return;
- xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
- XFS_AGFL_BLOCK(sc->mp) + 1);
- }
- /* Allocate disk space for a new per-AG btree. */
- STATIC int
- xrep_newbt_alloc_ag_blocks(
- struct xrep_newbt *xnr,
- uint64_t nr_blocks)
- {
- struct xfs_scrub *sc = xnr->sc;
- struct xfs_mount *mp = sc->mp;
- int error = 0;
- ASSERT(sc->sa.pag != NULL);
- while (nr_blocks > 0) {
- struct xfs_alloc_arg args = {
- .tp = sc->tp,
- .mp = mp,
- .oinfo = xnr->oinfo,
- .minlen = 1,
- .maxlen = nr_blocks,
- .prod = 1,
- .resv = xnr->resv,
- };
- xfs_agnumber_t agno;
- xrep_newbt_validate_ag_alloc_hint(xnr);
- if (xnr->alloc_vextent)
- error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
- else
- error = xfs_alloc_vextent_near_bno(&args,
- xnr->alloc_hint);
- if (error)
- return error;
- if (args.fsbno == NULLFSBLOCK)
- return -ENOSPC;
- agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
- trace_xrep_newbt_alloc_ag_blocks(mp, agno,
- XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
- xnr->oinfo.oi_owner);
- if (agno != sc->sa.pag->pag_agno) {
- ASSERT(agno == sc->sa.pag->pag_agno);
- return -EFSCORRUPTED;
- }
- error = xrep_newbt_add_blocks(xnr, sc->sa.pag, &args);
- if (error)
- return error;
- nr_blocks -= args.len;
- xnr->alloc_hint = args.fsbno + args.len;
- error = xrep_defer_finish(sc);
- if (error)
- return error;
- }
- return 0;
- }
- /* Don't let our allocation hint take us beyond EOFS */
- static inline void
- xrep_newbt_validate_file_alloc_hint(
- struct xrep_newbt *xnr)
- {
- struct xfs_scrub *sc = xnr->sc;
- if (xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
- return;
- xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, 0, XFS_AGFL_BLOCK(sc->mp) + 1);
- }
- /* Allocate disk space for our new file-based btree. */
- STATIC int
- xrep_newbt_alloc_file_blocks(
- struct xrep_newbt *xnr,
- uint64_t nr_blocks)
- {
- struct xfs_scrub *sc = xnr->sc;
- struct xfs_mount *mp = sc->mp;
- int error = 0;
- while (nr_blocks > 0) {
- struct xfs_alloc_arg args = {
- .tp = sc->tp,
- .mp = mp,
- .oinfo = xnr->oinfo,
- .minlen = 1,
- .maxlen = nr_blocks,
- .prod = 1,
- .resv = xnr->resv,
- };
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
- xrep_newbt_validate_file_alloc_hint(xnr);
- if (xnr->alloc_vextent)
- error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
- else
- error = xfs_alloc_vextent_start_ag(&args,
- xnr->alloc_hint);
- if (error)
- return error;
- if (args.fsbno == NULLFSBLOCK)
- return -ENOSPC;
- agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
- trace_xrep_newbt_alloc_file_blocks(mp, agno,
- XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
- xnr->oinfo.oi_owner);
- pag = xfs_perag_get(mp, agno);
- if (!pag) {
- ASSERT(0);
- return -EFSCORRUPTED;
- }
- error = xrep_newbt_add_blocks(xnr, pag, &args);
- xfs_perag_put(pag);
- if (error)
- return error;
- nr_blocks -= args.len;
- xnr->alloc_hint = args.fsbno + args.len;
- error = xrep_defer_finish(sc);
- if (error)
- return error;
- }
- return 0;
- }
- /* Allocate disk space for our new btree. */
- int
- xrep_newbt_alloc_blocks(
- struct xrep_newbt *xnr,
- uint64_t nr_blocks)
- {
- if (xnr->sc->ip)
- return xrep_newbt_alloc_file_blocks(xnr, nr_blocks);
- return xrep_newbt_alloc_ag_blocks(xnr, nr_blocks);
- }
- /*
- * Free the unused part of a space extent that was reserved for a new ondisk
- * structure. Returns the number of EFIs logged or a negative errno.
- */
- STATIC int
- xrep_newbt_free_extent(
- struct xrep_newbt *xnr,
- struct xrep_newbt_resv *resv,
- bool btree_committed)
- {
- struct xfs_scrub *sc = xnr->sc;
- xfs_agblock_t free_agbno = resv->agbno;
- xfs_extlen_t free_aglen = resv->len;
- xfs_fsblock_t fsbno;
- int error;
- if (!btree_committed || resv->used == 0) {
- /*
- * If we're not committing a new btree or we didn't use the
- * space reservation, let the existing EFI free the entire
- * space extent.
- */
- trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno,
- free_agbno, free_aglen, xnr->oinfo.oi_owner);
- xfs_alloc_commit_autoreap(sc->tp, &resv->autoreap);
- return 1;
- }
- /*
- * We used space and committed the btree. Cancel the autoreap, remove
- * the written blocks from the reservation, and possibly log a new EFI
- * to free any unused reservation space.
- */
- xfs_alloc_cancel_autoreap(sc->tp, &resv->autoreap);
- free_agbno += resv->used;
- free_aglen -= resv->used;
- if (free_aglen == 0)
- return 0;
- trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
- free_aglen, xnr->oinfo.oi_owner);
- ASSERT(xnr->resv != XFS_AG_RESV_AGFL);
- ASSERT(xnr->resv != XFS_AG_RESV_IGNORE);
- /*
- * Use EFIs to free the reservations. This reduces the chance
- * that we leak blocks if the system goes down.
- */
- fsbno = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno, free_agbno);
- error = xfs_free_extent_later(sc->tp, fsbno, free_aglen, &xnr->oinfo,
- xnr->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
- if (error)
- return error;
- return 1;
- }
- /* Free all the accounting info and disk space we reserved for a new btree. */
- STATIC int
- xrep_newbt_free(
- struct xrep_newbt *xnr,
- bool btree_committed)
- {
- struct xfs_scrub *sc = xnr->sc;
- struct xrep_newbt_resv *resv, *n;
- unsigned int freed = 0;
- int error = 0;
- /*
- * If the filesystem already went down, we can't free the blocks. Skip
- * ahead to freeing the incore metadata because we can't fix anything.
- */
- if (xfs_is_shutdown(sc->mp))
- goto junkit;
- list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
- int ret;
- ret = xrep_newbt_free_extent(xnr, resv, btree_committed);
- list_del(&resv->list);
- xfs_perag_put(resv->pag);
- kfree(resv);
- if (ret < 0) {
- error = ret;
- goto junkit;
- }
- freed += ret;
- if (freed >= XREP_MAX_ITRUNCATE_EFIS) {
- error = xrep_defer_finish(sc);
- if (error)
- goto junkit;
- freed = 0;
- }
- }
- if (freed)
- error = xrep_defer_finish(sc);
- junkit:
- /*
- * If we still have reservations attached to @newbt, cleanup must have
- * failed and the filesystem is about to go down. Clean up the incore
- * reservations and try to commit to freeing the space we used.
- */
- list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
- xfs_alloc_commit_autoreap(sc->tp, &resv->autoreap);
- list_del(&resv->list);
- xfs_perag_put(resv->pag);
- kfree(resv);
- }
- if (sc->ip) {
- kmem_cache_free(xfs_ifork_cache, xnr->ifake.if_fork);
- xnr->ifake.if_fork = NULL;
- }
- return error;
- }
- /*
- * Free all the accounting info and unused disk space allocations after
- * committing a new btree.
- */
- int
- xrep_newbt_commit(
- struct xrep_newbt *xnr)
- {
- return xrep_newbt_free(xnr, true);
- }
- /*
- * Free all the accounting info and all of the disk space we reserved for a new
- * btree that we're not going to commit. We want to try to roll things back
- * cleanly for things like ENOSPC midway through allocation.
- */
- void
- xrep_newbt_cancel(
- struct xrep_newbt *xnr)
- {
- xrep_newbt_free(xnr, false);
- }
- /* Feed one of the reserved btree blocks to the bulk loader. */
- int
- xrep_newbt_claim_block(
- struct xfs_btree_cur *cur,
- struct xrep_newbt *xnr,
- union xfs_btree_ptr *ptr)
- {
- struct xrep_newbt_resv *resv;
- struct xfs_mount *mp = cur->bc_mp;
- xfs_agblock_t agbno;
- /*
- * The first item in the list should always have a free block unless
- * we're completely out.
- */
- resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
- if (resv->used == resv->len)
- return -ENOSPC;
- /*
- * Peel off a block from the start of the reservation. We allocate
- * blocks in order to place blocks on disk in increasing record or key
- * order. The block reservations tend to end up on the list in
- * decreasing order, which hopefully results in leaf blocks ending up
- * together.
- */
- agbno = resv->agbno + resv->used;
- resv->used++;
- /* If we used all the blocks in this reservation, move it to the end. */
- if (resv->used == resv->len)
- list_move_tail(&resv->list, &xnr->resv_list);
- trace_xrep_newbt_claim_block(mp, resv->pag->pag_agno, agbno, 1,
- xnr->oinfo.oi_owner);
- if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
- ptr->l = cpu_to_be64(XFS_AGB_TO_FSB(mp, resv->pag->pag_agno,
- agbno));
- else
- ptr->s = cpu_to_be32(agbno);
- /* Relog all the EFIs. */
- return xrep_defer_finish(xnr->sc);
- }
- /* How many reserved blocks are unused? */
- unsigned int
- xrep_newbt_unused_blocks(
- struct xrep_newbt *xnr)
- {
- struct xrep_newbt_resv *resv;
- unsigned int unused = 0;
- list_for_each_entry(resv, &xnr->resv_list, list)
- unused += resv->len - resv->used;
- return unused;
- }
|