| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077 |
- // SPDX-License-Identifier: GPL-2.0-or-later
- /*
- * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
- * Author: Darrick J. Wong <djwong@kernel.org>
- */
- #include "xfs.h"
- #include "xfs_fs.h"
- #include "xfs_shared.h"
- #include "xfs_format.h"
- #include "xfs_trans_resv.h"
- #include "xfs_mount.h"
- #include "xfs_log_format.h"
- #include "xfs_trans.h"
- #include "xfs_inode.h"
- #include "xfs_icache.h"
- #include "xfs_iwalk.h"
- #include "xfs_ialloc.h"
- #include "xfs_dir2.h"
- #include "xfs_dir2_priv.h"
- #include "xfs_ag.h"
- #include "xfs_parent.h"
- #include "scrub/scrub.h"
- #include "scrub/common.h"
- #include "scrub/repair.h"
- #include "scrub/xfile.h"
- #include "scrub/xfarray.h"
- #include "scrub/iscan.h"
- #include "scrub/orphanage.h"
- #include "scrub/nlinks.h"
- #include "scrub/trace.h"
- #include "scrub/readdir.h"
- #include "scrub/tempfile.h"
- #include "scrub/listxattr.h"
- /*
- * Live Inode Link Count Checking
- * ==============================
- *
- * Inode link counts are "summary" metadata, in the sense that they are
- * computed as the number of directory entries referencing each file on the
- * filesystem. Therefore, we compute the correct link counts by creating a
- * shadow link count structure and walking every inode.
- */
- /* Set us up to scrub inode link counts. */
- int
- xchk_setup_nlinks(
- struct xfs_scrub *sc)
- {
- struct xchk_nlink_ctrs *xnc;
- int error;
- xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
- if (xchk_could_repair(sc)) {
- error = xrep_setup_nlinks(sc);
- if (error)
- return error;
- }
- xnc = kvzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS);
- if (!xnc)
- return -ENOMEM;
- xnc->xname.name = xnc->namebuf;
- xnc->sc = sc;
- sc->buf = xnc;
- return xchk_setup_fs(sc);
- }
- /*
- * Part 1: Collecting file link counts. For each file, we create a shadow link
- * counting structure, then walk the entire directory tree, incrementing parent
- * and child link counts for each directory entry seen.
- *
- * To avoid false corruption reports in part 2, any failure in this part must
- * set the INCOMPLETE flag even when a negative errno is returned. This care
- * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
- * ECANCELED) that are absorbed into a scrub state flag update by
- * xchk_*_process_error. Scrub and repair share the same incore data
- * structures, so the INCOMPLETE flag is critical to prevent a repair based on
- * insufficient information.
- *
- * Because we are scanning a live filesystem, it's possible that another thread
- * will try to update the link counts for an inode that we've already scanned.
- * This will cause our counts to be incorrect. Therefore, we hook all
- * directory entry updates because that is when link count updates occur. By
- * shadowing transaction updates in this manner, live nlink check can ensure by
- * locking the inode and the shadow structure that its own copies are not out
- * of date. Because the hook code runs in a different process context from the
- * scrub code and the scrub state flags are not accessed atomically, failures
- * in the hook code must abort the iscan and the scrubber must notice the
- * aborted scan and set the incomplete flag.
- *
- * Note that we use jump labels and srcu notifier hooks to minimize the
- * overhead when live nlinks is /not/ running. Locking order for nlink
- * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock.
- */
- /*
- * Add a delta to an nlink counter, clamping the value to U32_MAX. Because
- * XFS_MAXLINK < U32_MAX, the checking code will produce the correct results
- * even if we lose some precision.
- */
- static inline void
- careful_add(
- xfs_nlink_t *nlinkp,
- int delta)
- {
- uint64_t new_value = (uint64_t)(*nlinkp) + delta;
- BUILD_BUG_ON(XFS_MAXLINK > U32_MAX);
- *nlinkp = min_t(uint64_t, new_value, U32_MAX);
- }
- /* Update incore link count information. Caller must hold the nlinks lock. */
- STATIC int
- xchk_nlinks_update_incore(
- struct xchk_nlink_ctrs *xnc,
- xfs_ino_t ino,
- int parents_delta,
- int backrefs_delta,
- int children_delta)
- {
- struct xchk_nlink nl;
- int error;
- if (!xnc->nlinks)
- return 0;
- error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
- if (error)
- return error;
- trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta,
- backrefs_delta, children_delta);
- careful_add(&nl.parents, parents_delta);
- careful_add(&nl.backrefs, backrefs_delta);
- careful_add(&nl.children, children_delta);
- nl.flags |= XCHK_NLINK_WRITTEN;
- error = xfarray_store(xnc->nlinks, ino, &nl);
- if (error == -EFBIG) {
- /*
- * EFBIG means we tried to store data at too high a byte offset
- * in the sparse array. IOWs, we cannot complete the check and
- * must notify userspace that the check was incomplete.
- */
- error = -ECANCELED;
- }
- return error;
- }
- /*
- * Apply a link count change from the regular filesystem into our shadow link
- * count structure based on a directory update in progress.
- */
- STATIC int
- xchk_nlinks_live_update(
- struct notifier_block *nb,
- unsigned long action,
- void *data)
- {
- struct xfs_dir_update_params *p = data;
- struct xchk_nlink_ctrs *xnc;
- int error;
- xnc = container_of(nb, struct xchk_nlink_ctrs, dhook.dirent_hook.nb);
- /*
- * Ignore temporary directories being used to stage dir repairs, since
- * we don't bump the link counts of the children.
- */
- if (xrep_is_tempfile(p->dp))
- return NOTIFY_DONE;
- trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino,
- p->delta, p->name->name, p->name->len);
- /*
- * If we've already scanned @dp, update the number of parents that link
- * to @ip. If @ip is a subdirectory, update the number of child links
- * going out of @dp.
- */
- if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) {
- mutex_lock(&xnc->lock);
- error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta,
- 0, 0);
- if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode))
- error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
- 0, p->delta);
- mutex_unlock(&xnc->lock);
- if (error)
- goto out_abort;
- }
- /*
- * If @ip is a subdirectory and we've already scanned it, update the
- * number of backrefs pointing to @dp.
- */
- if (S_ISDIR(VFS_IC(p->ip)->i_mode) &&
- xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) {
- mutex_lock(&xnc->lock);
- error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
- p->delta, 0);
- mutex_unlock(&xnc->lock);
- if (error)
- goto out_abort;
- }
- return NOTIFY_DONE;
- out_abort:
- xchk_iscan_abort(&xnc->collect_iscan);
- return NOTIFY_DONE;
- }
- /* Bump the observed link count for the inode referenced by this entry. */
- STATIC int
- xchk_nlinks_collect_dirent(
- struct xfs_scrub *sc,
- struct xfs_inode *dp,
- xfs_dir2_dataptr_t dapos,
- const struct xfs_name *name,
- xfs_ino_t ino,
- void *priv)
- {
- struct xchk_nlink_ctrs *xnc = priv;
- bool dot = false, dotdot = false;
- int error;
- /* Does this name make sense? */
- if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) {
- error = -ECANCELED;
- goto out_abort;
- }
- if (name->len == 1 && name->name[0] == '.')
- dot = true;
- else if (name->len == 2 && name->name[0] == '.' &&
- name->name[1] == '.')
- dotdot = true;
- /* Don't accept a '.' entry that points somewhere else. */
- if (dot && ino != dp->i_ino) {
- error = -ECANCELED;
- goto out_abort;
- }
- /* Don't accept an invalid inode number. */
- if (!xfs_verify_dir_ino(sc->mp, ino)) {
- error = -ECANCELED;
- goto out_abort;
- }
- /* Update the shadow link counts if we haven't already failed. */
- if (xchk_iscan_aborted(&xnc->collect_iscan)) {
- error = -ECANCELED;
- goto out_incomplete;
- }
- trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name);
- mutex_lock(&xnc->lock);
- /*
- * If this is a dotdot entry, it is a back link from dp to ino. How
- * we handle this depends on whether or not dp is the root directory.
- *
- * The root directory is its own parent, so we pretend the dotdot entry
- * establishes the "parent" of the root directory. Increment the
- * number of parents of the root directory.
- *
- * Otherwise, increment the number of backrefs pointing back to ino.
- *
- * If the filesystem has parent pointers, we walk the pptrs to
- * determine the backref count.
- */
- if (dotdot) {
- if (dp == sc->mp->m_rootip)
- error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
- else if (!xfs_has_parent(sc->mp))
- error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0);
- else
- error = 0;
- if (error)
- goto out_unlock;
- }
- /*
- * If this dirent is a forward link from dp to ino, increment the
- * number of parents linking into ino.
- */
- if (!dot && !dotdot) {
- error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
- if (error)
- goto out_unlock;
- }
- /*
- * If this dirent is a forward link to a subdirectory, increment the
- * number of child links of dp.
- */
- if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) {
- error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1);
- if (error)
- goto out_unlock;
- }
- mutex_unlock(&xnc->lock);
- return 0;
- out_unlock:
- mutex_unlock(&xnc->lock);
- out_abort:
- xchk_iscan_abort(&xnc->collect_iscan);
- out_incomplete:
- xchk_set_incomplete(sc);
- return error;
- }
- /* Bump the backref count for the inode referenced by this parent pointer. */
- STATIC int
- xchk_nlinks_collect_pptr(
- struct xfs_scrub *sc,
- struct xfs_inode *ip,
- unsigned int attr_flags,
- const unsigned char *name,
- unsigned int namelen,
- const void *value,
- unsigned int valuelen,
- void *priv)
- {
- struct xfs_name xname = {
- .name = name,
- .len = namelen,
- };
- struct xchk_nlink_ctrs *xnc = priv;
- const struct xfs_parent_rec *pptr_rec = value;
- xfs_ino_t parent_ino;
- int error;
- /* Update the shadow link counts if we haven't already failed. */
- if (xchk_iscan_aborted(&xnc->collect_iscan)) {
- error = -ECANCELED;
- goto out_incomplete;
- }
- if (!(attr_flags & XFS_ATTR_PARENT))
- return 0;
- error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value,
- valuelen, &parent_ino, NULL);
- if (error)
- return error;
- trace_xchk_nlinks_collect_pptr(sc->mp, ip, &xname, pptr_rec);
- mutex_lock(&xnc->lock);
- error = xchk_nlinks_update_incore(xnc, parent_ino, 0, 1, 0);
- if (error)
- goto out_unlock;
- mutex_unlock(&xnc->lock);
- return 0;
- out_unlock:
- mutex_unlock(&xnc->lock);
- xchk_iscan_abort(&xnc->collect_iscan);
- out_incomplete:
- xchk_set_incomplete(sc);
- return error;
- }
- static uint
- xchk_nlinks_ilock_dir(
- struct xfs_inode *ip)
- {
- uint lock_mode = XFS_ILOCK_SHARED;
- /*
- * We're going to scan the directory entries, so we must be ready to
- * pull the data fork mappings into memory if they aren't already.
- */
- if (xfs_need_iread_extents(&ip->i_df))
- lock_mode = XFS_ILOCK_EXCL;
- /*
- * We're going to scan the parent pointers, so we must be ready to
- * pull the attr fork mappings into memory if they aren't already.
- */
- if (xfs_has_parent(ip->i_mount) && xfs_inode_has_attr_fork(ip) &&
- xfs_need_iread_extents(&ip->i_af))
- lock_mode = XFS_ILOCK_EXCL;
- /*
- * Take the IOLOCK so that other threads cannot start a directory
- * update while we're scanning.
- */
- lock_mode |= XFS_IOLOCK_SHARED;
- xfs_ilock(ip, lock_mode);
- return lock_mode;
- }
- /* Walk a directory to bump the observed link counts of the children. */
- STATIC int
- xchk_nlinks_collect_dir(
- struct xchk_nlink_ctrs *xnc,
- struct xfs_inode *dp)
- {
- struct xfs_scrub *sc = xnc->sc;
- unsigned int lock_mode;
- int error = 0;
- /*
- * Ignore temporary directories being used to stage dir repairs, since
- * we don't bump the link counts of the children.
- */
- if (xrep_is_tempfile(dp))
- return 0;
- /* Prevent anyone from changing this directory while we walk it. */
- lock_mode = xchk_nlinks_ilock_dir(dp);
- /*
- * The dotdot entry of an unlinked directory still points to the last
- * parent, but the parent no longer links to this directory. Skip the
- * directory to avoid overcounting.
- */
- if (VFS_I(dp)->i_nlink == 0)
- goto out_unlock;
- /*
- * We cannot count file links if the directory looks as though it has
- * been zapped by the inode record repair code.
- */
- if (xchk_dir_looks_zapped(dp)) {
- error = -EBUSY;
- goto out_abort;
- }
- error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc);
- if (error == -ECANCELED) {
- error = 0;
- goto out_unlock;
- }
- if (error)
- goto out_abort;
- /* Walk the parent pointers to get real backref counts. */
- if (xfs_has_parent(sc->mp)) {
- /*
- * If the extended attributes look as though they has been
- * zapped by the inode record repair code, we cannot scan for
- * parent pointers.
- */
- if (xchk_pptr_looks_zapped(dp)) {
- error = -EBUSY;
- goto out_unlock;
- }
- error = xchk_xattr_walk(sc, dp, xchk_nlinks_collect_pptr, NULL,
- xnc);
- if (error == -ECANCELED) {
- error = 0;
- goto out_unlock;
- }
- if (error)
- goto out_abort;
- }
- xchk_iscan_mark_visited(&xnc->collect_iscan, dp);
- goto out_unlock;
- out_abort:
- xchk_set_incomplete(sc);
- xchk_iscan_abort(&xnc->collect_iscan);
- out_unlock:
- xfs_iunlock(dp, lock_mode);
- return error;
- }
- /* If this looks like a valid pointer, count it. */
- static inline int
- xchk_nlinks_collect_metafile(
- struct xchk_nlink_ctrs *xnc,
- xfs_ino_t ino)
- {
- if (!xfs_verify_ino(xnc->sc->mp, ino))
- return 0;
- trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino);
- return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
- }
- /* Bump the link counts of metadata files rooted in the superblock. */
- STATIC int
- xchk_nlinks_collect_metafiles(
- struct xchk_nlink_ctrs *xnc)
- {
- struct xfs_mount *mp = xnc->sc->mp;
- int error = -ECANCELED;
- if (xchk_iscan_aborted(&xnc->collect_iscan))
- goto out_incomplete;
- mutex_lock(&xnc->lock);
- error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino);
- if (error)
- goto out_abort;
- error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino);
- if (error)
- goto out_abort;
- error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino);
- if (error)
- goto out_abort;
- error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino);
- if (error)
- goto out_abort;
- error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino);
- if (error)
- goto out_abort;
- mutex_unlock(&xnc->lock);
- return 0;
- out_abort:
- mutex_unlock(&xnc->lock);
- xchk_iscan_abort(&xnc->collect_iscan);
- out_incomplete:
- xchk_set_incomplete(xnc->sc);
- return error;
- }
- /* Advance the collection scan cursor for this non-directory file. */
- static inline int
- xchk_nlinks_collect_file(
- struct xchk_nlink_ctrs *xnc,
- struct xfs_inode *ip)
- {
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
- xchk_iscan_mark_visited(&xnc->collect_iscan, ip);
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- return 0;
- }
- /* Walk all directories and count inode links. */
- STATIC int
- xchk_nlinks_collect(
- struct xchk_nlink_ctrs *xnc)
- {
- struct xfs_scrub *sc = xnc->sc;
- struct xfs_inode *ip;
- int error;
- /* Count the rt and quota files that are rooted in the superblock. */
- error = xchk_nlinks_collect_metafiles(xnc);
- if (error)
- return error;
- /*
- * Set up for a potentially lengthy filesystem scan by reducing our
- * transaction resource usage for the duration. Specifically:
- *
- * Cancel the transaction to release the log grant space while we scan
- * the filesystem.
- *
- * Create a new empty transaction to eliminate the possibility of the
- * inode scan deadlocking on cyclical metadata.
- *
- * We pass the empty transaction to the file scanning function to avoid
- * repeatedly cycling empty transactions. This can be done even though
- * we take the IOLOCK to quiesce the file because empty transactions
- * do not take sb_internal.
- */
- xchk_trans_cancel(sc);
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
- while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) {
- if (S_ISDIR(VFS_I(ip)->i_mode))
- error = xchk_nlinks_collect_dir(xnc, ip);
- else
- error = xchk_nlinks_collect_file(xnc, ip);
- xchk_irele(sc, ip);
- if (error)
- break;
- if (xchk_should_terminate(sc, &error))
- break;
- }
- xchk_iscan_iter_finish(&xnc->collect_iscan);
- if (error) {
- xchk_set_incomplete(sc);
- /*
- * If we couldn't grab an inode that was busy with a state
- * change, change the error code so that we exit to userspace
- * as quickly as possible.
- */
- if (error == -EBUSY)
- return -ECANCELED;
- return error;
- }
- /*
- * Switch out for a real transaction in preparation for building a new
- * tree.
- */
- xchk_trans_cancel(sc);
- return xchk_setup_fs(sc);
- }
- /*
- * Part 2: Comparing file link counters. Walk each inode and compare the link
- * counts against our shadow information; and then walk each shadow link count
- * structure (that wasn't covered in the first part), comparing it against the
- * file.
- */
- /* Read the observed link count for comparison with the actual inode. */
- STATIC int
- xchk_nlinks_comparison_read(
- struct xchk_nlink_ctrs *xnc,
- xfs_ino_t ino,
- struct xchk_nlink *obs)
- {
- struct xchk_nlink nl;
- int error;
- error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
- if (error)
- return error;
- nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN);
- error = xfarray_store(xnc->nlinks, ino, &nl);
- if (error == -EFBIG) {
- /*
- * EFBIG means we tried to store data at too high a byte offset
- * in the sparse array. IOWs, we cannot complete the check and
- * must notify userspace that the check was incomplete. This
- * shouldn't really happen outside of the collection phase.
- */
- xchk_set_incomplete(xnc->sc);
- return -ECANCELED;
- }
- if (error)
- return error;
- /* Copy the counters, but do not expose the internal state. */
- obs->parents = nl.parents;
- obs->backrefs = nl.backrefs;
- obs->children = nl.children;
- obs->flags = 0;
- return 0;
- }
- /* Check our link count against an inode. */
- STATIC int
- xchk_nlinks_compare_inode(
- struct xchk_nlink_ctrs *xnc,
- struct xfs_inode *ip)
- {
- struct xchk_nlink obs;
- struct xfs_scrub *sc = xnc->sc;
- uint64_t total_links;
- unsigned int actual_nlink;
- int error;
- /*
- * Ignore temporary files being used to stage repairs, since we assume
- * they're correct for non-directories, and the directory repair code
- * doesn't bump the link counts for the children.
- */
- if (xrep_is_tempfile(ip))
- return 0;
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- mutex_lock(&xnc->lock);
- if (xchk_iscan_aborted(&xnc->collect_iscan)) {
- xchk_set_incomplete(xnc->sc);
- error = -ECANCELED;
- goto out_scanlock;
- }
- error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs);
- if (error)
- goto out_scanlock;
- /*
- * If we don't have ftype to get an accurate count of the subdirectory
- * entries in this directory, take advantage of the fact that on a
- * consistent ftype=0 filesystem, the number of subdirectory
- * backreferences (dotdot entries) pointing towards this directory
- * should be equal to the number of subdirectory entries in the
- * directory.
- */
- if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode))
- obs.children = obs.backrefs;
- total_links = xchk_nlink_total(ip, &obs);
- actual_nlink = VFS_I(ip)->i_nlink;
- trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs);
- /*
- * If we found so many parents that we'd overflow i_nlink, we must flag
- * this as a corruption. The VFS won't let users increase the link
- * count, but it will let them decrease it.
- */
- if (total_links > XFS_NLINK_PINNED) {
- xchk_ino_set_corrupt(sc, ip->i_ino);
- goto out_corrupt;
- } else if (total_links > XFS_MAXLINK) {
- xchk_ino_set_warning(sc, ip->i_ino);
- }
- /* Link counts should match. */
- if (total_links != actual_nlink) {
- xchk_ino_set_corrupt(sc, ip->i_ino);
- goto out_corrupt;
- }
- if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) {
- /*
- * The collection phase ignores directories with zero link
- * count, so we ignore them here too.
- *
- * The number of subdirectory backreferences (dotdot entries)
- * pointing towards this directory should be equal to the
- * number of subdirectory entries in the directory.
- */
- if (obs.children != obs.backrefs)
- xchk_ino_xref_set_corrupt(sc, ip->i_ino);
- } else {
- /*
- * Non-directories and unlinked directories should not have
- * back references.
- */
- if (obs.backrefs != 0) {
- xchk_ino_set_corrupt(sc, ip->i_ino);
- goto out_corrupt;
- }
- /*
- * Non-directories and unlinked directories should not have
- * children.
- */
- if (obs.children != 0) {
- xchk_ino_set_corrupt(sc, ip->i_ino);
- goto out_corrupt;
- }
- }
- if (ip == sc->mp->m_rootip) {
- /*
- * For the root of a directory tree, both the '.' and '..'
- * entries should point to the root directory. The dotdot
- * entry is counted as a parent of the root /and/ a backref of
- * the root directory.
- */
- if (obs.parents != 1) {
- xchk_ino_set_corrupt(sc, ip->i_ino);
- goto out_corrupt;
- }
- } else if (actual_nlink > 0) {
- /*
- * Linked files that are not the root directory should have at
- * least one parent.
- */
- if (obs.parents == 0) {
- xchk_ino_set_corrupt(sc, ip->i_ino);
- goto out_corrupt;
- }
- }
- out_corrupt:
- if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- error = -ECANCELED;
- out_scanlock:
- mutex_unlock(&xnc->lock);
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- return error;
- }
- /*
- * Check our link count against an inode that wasn't checked previously. This
- * is intended to catch directories with dangling links, though we could be
- * racing with inode allocation in other threads.
- */
- STATIC int
- xchk_nlinks_compare_inum(
- struct xchk_nlink_ctrs *xnc,
- xfs_ino_t ino)
- {
- struct xchk_nlink obs;
- struct xfs_mount *mp = xnc->sc->mp;
- struct xfs_trans *tp = xnc->sc->tp;
- struct xfs_buf *agi_bp;
- struct xfs_inode *ip;
- int error;
- /*
- * The first iget failed, so try again with the variant that returns
- * either an incore inode or the AGI buffer. If the function returns
- * EINVAL/ENOENT, it should have passed us the AGI buffer so that we
- * can guarantee that the inode won't be allocated while we check for
- * a zero link count in the observed link count data.
- */
- error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip);
- if (!error) {
- /* Actually got an inode, so use the inode compare. */
- error = xchk_nlinks_compare_inode(xnc, ip);
- xchk_irele(xnc->sc, ip);
- return error;
- }
- if (error == -ENOENT || error == -EINVAL) {
- /* No inode was found. Check for zero link count below. */
- error = 0;
- }
- if (error)
- goto out_agi;
- /* Ensure that we have protected against inode allocation/freeing. */
- if (agi_bp == NULL) {
- ASSERT(agi_bp != NULL);
- xchk_set_incomplete(xnc->sc);
- return -ECANCELED;
- }
- if (xchk_iscan_aborted(&xnc->collect_iscan)) {
- xchk_set_incomplete(xnc->sc);
- error = -ECANCELED;
- goto out_agi;
- }
- mutex_lock(&xnc->lock);
- error = xchk_nlinks_comparison_read(xnc, ino, &obs);
- if (error)
- goto out_scanlock;
- trace_xchk_nlinks_check_zero(mp, ino, &obs);
- /*
- * If we can't grab the inode, the link count had better be zero. We
- * still hold the AGI to prevent inode allocation/freeing.
- */
- if (xchk_nlink_total(NULL, &obs) != 0) {
- xchk_ino_set_corrupt(xnc->sc, ino);
- error = -ECANCELED;
- }
- out_scanlock:
- mutex_unlock(&xnc->lock);
- out_agi:
- if (agi_bp)
- xfs_trans_brelse(tp, agi_bp);
- return error;
- }
- /*
- * Try to visit every inode in the filesystem to compare the link count. Move
- * on if we can't grab an inode, since we'll revisit unchecked nlink records in
- * the second part.
- */
- static int
- xchk_nlinks_compare_iter(
- struct xchk_nlink_ctrs *xnc,
- struct xfs_inode **ipp)
- {
- int error;
- do {
- error = xchk_iscan_iter(&xnc->compare_iscan, ipp);
- } while (error == -EBUSY);
- return error;
- }
- /* Compare the link counts we observed against the live information. */
- STATIC int
- xchk_nlinks_compare(
- struct xchk_nlink_ctrs *xnc)
- {
- struct xchk_nlink nl;
- struct xfs_scrub *sc = xnc->sc;
- struct xfs_inode *ip;
- xfarray_idx_t cur = XFARRAY_CURSOR_INIT;
- int error;
- if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- return 0;
- /*
- * Create a new empty transaction so that we can advance the iscan
- * cursor without deadlocking if the inobt has a cycle and push on the
- * inactivation workqueue.
- */
- xchk_trans_cancel(sc);
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
- /*
- * Use the inobt to walk all allocated inodes to compare the link
- * counts. Inodes skipped by _compare_iter will be tried again in the
- * next phase of the scan.
- */
- xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan);
- while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) {
- error = xchk_nlinks_compare_inode(xnc, ip);
- xchk_iscan_mark_visited(&xnc->compare_iscan, ip);
- xchk_irele(sc, ip);
- if (error)
- break;
- if (xchk_should_terminate(sc, &error))
- break;
- }
- xchk_iscan_iter_finish(&xnc->compare_iscan);
- xchk_iscan_teardown(&xnc->compare_iscan);
- if (error)
- return error;
- if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- return 0;
- /*
- * Walk all the non-null nlink observations that weren't checked in the
- * previous step.
- */
- mutex_lock(&xnc->lock);
- while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) {
- xfs_ino_t ino = cur - 1;
- if (nl.flags & XCHK_NLINK_COMPARE_SCANNED)
- continue;
- mutex_unlock(&xnc->lock);
- error = xchk_nlinks_compare_inum(xnc, ino);
- if (error)
- return error;
- if (xchk_should_terminate(xnc->sc, &error))
- return error;
- mutex_lock(&xnc->lock);
- }
- mutex_unlock(&xnc->lock);
- return error;
- }
- /* Tear down everything associated with a nlinks check. */
- static void
- xchk_nlinks_teardown_scan(
- void *priv)
- {
- struct xchk_nlink_ctrs *xnc = priv;
- /* Discourage any hook functions that might be running. */
- xchk_iscan_abort(&xnc->collect_iscan);
- xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook);
- xfarray_destroy(xnc->nlinks);
- xnc->nlinks = NULL;
- xchk_iscan_teardown(&xnc->collect_iscan);
- mutex_destroy(&xnc->lock);
- xnc->sc = NULL;
- }
- /*
- * Scan all inodes in the entire filesystem to generate link count data. If
- * the scan is successful, the counts will be left alive for a repair. If any
- * error occurs, we'll tear everything down.
- */
- STATIC int
- xchk_nlinks_setup_scan(
- struct xfs_scrub *sc,
- struct xchk_nlink_ctrs *xnc)
- {
- struct xfs_mount *mp = sc->mp;
- char *descr;
- unsigned long long max_inos;
- xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1;
- xfs_agino_t first_agino, last_agino;
- int error;
- mutex_init(&xnc->lock);
- /* Retry iget every tenth of a second for up to 30 seconds. */
- xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan);
- /*
- * Set up enough space to store an nlink record for the highest
- * possible inode number in this system.
- */
- xfs_agino_range(mp, last_agno, &first_agino, &last_agino);
- max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1;
- descr = xchk_xfile_descr(sc, "file link counts");
- error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos),
- sizeof(struct xchk_nlink), &xnc->nlinks);
- kfree(descr);
- if (error)
- goto out_teardown;
- /*
- * Hook into the directory entry code so that we can capture updates to
- * file link counts. The hook only triggers for inodes that were
- * already scanned, and the scanner thread takes each inode's ILOCK,
- * which means that any in-progress inode updates will finish before we
- * can scan the inode.
- */
- ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
- xfs_dir_hook_setup(&xnc->dhook, xchk_nlinks_live_update);
- error = xfs_dir_hook_add(mp, &xnc->dhook);
- if (error)
- goto out_teardown;
- /* Use deferred cleanup to pass the inode link count data to repair. */
- sc->buf_cleanup = xchk_nlinks_teardown_scan;
- return 0;
- out_teardown:
- xchk_nlinks_teardown_scan(xnc);
- return error;
- }
- /* Scrub the link count of all inodes on the filesystem. */
- int
- xchk_nlinks(
- struct xfs_scrub *sc)
- {
- struct xchk_nlink_ctrs *xnc = sc->buf;
- int error = 0;
- /* Set ourselves up to check link counts on the live filesystem. */
- error = xchk_nlinks_setup_scan(sc, xnc);
- if (error)
- return error;
- /* Walk all inodes, picking up link count information. */
- error = xchk_nlinks_collect(xnc);
- if (!xchk_xref_process_error(sc, 0, 0, &error))
- return error;
- /* Fail fast if we're not playing with a full dataset. */
- if (xchk_iscan_aborted(&xnc->collect_iscan))
- xchk_set_incomplete(sc);
- if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
- return 0;
- /* Compare link counts. */
- error = xchk_nlinks_compare(xnc);
- if (!xchk_xref_process_error(sc, 0, 0, &error))
- return error;
- /* Check one last time for an incomplete dataset. */
- if (xchk_iscan_aborted(&xnc->collect_iscan))
- xchk_set_incomplete(sc);
- return 0;
- }
|