nlinks.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <djwong@kernel.org>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_trans_resv.h"
  11. #include "xfs_mount.h"
  12. #include "xfs_log_format.h"
  13. #include "xfs_trans.h"
  14. #include "xfs_inode.h"
  15. #include "xfs_icache.h"
  16. #include "xfs_iwalk.h"
  17. #include "xfs_ialloc.h"
  18. #include "xfs_dir2.h"
  19. #include "xfs_dir2_priv.h"
  20. #include "xfs_ag.h"
  21. #include "xfs_parent.h"
  22. #include "scrub/scrub.h"
  23. #include "scrub/common.h"
  24. #include "scrub/repair.h"
  25. #include "scrub/xfile.h"
  26. #include "scrub/xfarray.h"
  27. #include "scrub/iscan.h"
  28. #include "scrub/orphanage.h"
  29. #include "scrub/nlinks.h"
  30. #include "scrub/trace.h"
  31. #include "scrub/readdir.h"
  32. #include "scrub/tempfile.h"
  33. #include "scrub/listxattr.h"
  34. /*
  35. * Live Inode Link Count Checking
  36. * ==============================
  37. *
  38. * Inode link counts are "summary" metadata, in the sense that they are
  39. * computed as the number of directory entries referencing each file on the
  40. * filesystem. Therefore, we compute the correct link counts by creating a
  41. * shadow link count structure and walking every inode.
  42. */
  43. /* Set us up to scrub inode link counts. */
  44. int
  45. xchk_setup_nlinks(
  46. struct xfs_scrub *sc)
  47. {
  48. struct xchk_nlink_ctrs *xnc;
  49. int error;
  50. xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
  51. if (xchk_could_repair(sc)) {
  52. error = xrep_setup_nlinks(sc);
  53. if (error)
  54. return error;
  55. }
  56. xnc = kvzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS);
  57. if (!xnc)
  58. return -ENOMEM;
  59. xnc->xname.name = xnc->namebuf;
  60. xnc->sc = sc;
  61. sc->buf = xnc;
  62. return xchk_setup_fs(sc);
  63. }
  64. /*
  65. * Part 1: Collecting file link counts. For each file, we create a shadow link
  66. * counting structure, then walk the entire directory tree, incrementing parent
  67. * and child link counts for each directory entry seen.
  68. *
  69. * To avoid false corruption reports in part 2, any failure in this part must
  70. * set the INCOMPLETE flag even when a negative errno is returned. This care
  71. * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
  72. * ECANCELED) that are absorbed into a scrub state flag update by
  73. * xchk_*_process_error. Scrub and repair share the same incore data
  74. * structures, so the INCOMPLETE flag is critical to prevent a repair based on
  75. * insufficient information.
  76. *
  77. * Because we are scanning a live filesystem, it's possible that another thread
  78. * will try to update the link counts for an inode that we've already scanned.
  79. * This will cause our counts to be incorrect. Therefore, we hook all
  80. * directory entry updates because that is when link count updates occur. By
  81. * shadowing transaction updates in this manner, live nlink check can ensure by
  82. * locking the inode and the shadow structure that its own copies are not out
  83. * of date. Because the hook code runs in a different process context from the
  84. * scrub code and the scrub state flags are not accessed atomically, failures
  85. * in the hook code must abort the iscan and the scrubber must notice the
  86. * aborted scan and set the incomplete flag.
  87. *
  88. * Note that we use jump labels and srcu notifier hooks to minimize the
  89. * overhead when live nlinks is /not/ running. Locking order for nlink
  90. * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock.
  91. */
  92. /*
  93. * Add a delta to an nlink counter, clamping the value to U32_MAX. Because
  94. * XFS_MAXLINK < U32_MAX, the checking code will produce the correct results
  95. * even if we lose some precision.
  96. */
  97. static inline void
  98. careful_add(
  99. xfs_nlink_t *nlinkp,
  100. int delta)
  101. {
  102. uint64_t new_value = (uint64_t)(*nlinkp) + delta;
  103. BUILD_BUG_ON(XFS_MAXLINK > U32_MAX);
  104. *nlinkp = min_t(uint64_t, new_value, U32_MAX);
  105. }
  106. /* Update incore link count information. Caller must hold the nlinks lock. */
  107. STATIC int
  108. xchk_nlinks_update_incore(
  109. struct xchk_nlink_ctrs *xnc,
  110. xfs_ino_t ino,
  111. int parents_delta,
  112. int backrefs_delta,
  113. int children_delta)
  114. {
  115. struct xchk_nlink nl;
  116. int error;
  117. if (!xnc->nlinks)
  118. return 0;
  119. error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
  120. if (error)
  121. return error;
  122. trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta,
  123. backrefs_delta, children_delta);
  124. careful_add(&nl.parents, parents_delta);
  125. careful_add(&nl.backrefs, backrefs_delta);
  126. careful_add(&nl.children, children_delta);
  127. nl.flags |= XCHK_NLINK_WRITTEN;
  128. error = xfarray_store(xnc->nlinks, ino, &nl);
  129. if (error == -EFBIG) {
  130. /*
  131. * EFBIG means we tried to store data at too high a byte offset
  132. * in the sparse array. IOWs, we cannot complete the check and
  133. * must notify userspace that the check was incomplete.
  134. */
  135. error = -ECANCELED;
  136. }
  137. return error;
  138. }
  139. /*
  140. * Apply a link count change from the regular filesystem into our shadow link
  141. * count structure based on a directory update in progress.
  142. */
  143. STATIC int
  144. xchk_nlinks_live_update(
  145. struct notifier_block *nb,
  146. unsigned long action,
  147. void *data)
  148. {
  149. struct xfs_dir_update_params *p = data;
  150. struct xchk_nlink_ctrs *xnc;
  151. int error;
  152. xnc = container_of(nb, struct xchk_nlink_ctrs, dhook.dirent_hook.nb);
  153. /*
  154. * Ignore temporary directories being used to stage dir repairs, since
  155. * we don't bump the link counts of the children.
  156. */
  157. if (xrep_is_tempfile(p->dp))
  158. return NOTIFY_DONE;
  159. trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino,
  160. p->delta, p->name->name, p->name->len);
  161. /*
  162. * If we've already scanned @dp, update the number of parents that link
  163. * to @ip. If @ip is a subdirectory, update the number of child links
  164. * going out of @dp.
  165. */
  166. if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) {
  167. mutex_lock(&xnc->lock);
  168. error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta,
  169. 0, 0);
  170. if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode))
  171. error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
  172. 0, p->delta);
  173. mutex_unlock(&xnc->lock);
  174. if (error)
  175. goto out_abort;
  176. }
  177. /*
  178. * If @ip is a subdirectory and we've already scanned it, update the
  179. * number of backrefs pointing to @dp.
  180. */
  181. if (S_ISDIR(VFS_IC(p->ip)->i_mode) &&
  182. xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) {
  183. mutex_lock(&xnc->lock);
  184. error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
  185. p->delta, 0);
  186. mutex_unlock(&xnc->lock);
  187. if (error)
  188. goto out_abort;
  189. }
  190. return NOTIFY_DONE;
  191. out_abort:
  192. xchk_iscan_abort(&xnc->collect_iscan);
  193. return NOTIFY_DONE;
  194. }
  195. /* Bump the observed link count for the inode referenced by this entry. */
  196. STATIC int
  197. xchk_nlinks_collect_dirent(
  198. struct xfs_scrub *sc,
  199. struct xfs_inode *dp,
  200. xfs_dir2_dataptr_t dapos,
  201. const struct xfs_name *name,
  202. xfs_ino_t ino,
  203. void *priv)
  204. {
  205. struct xchk_nlink_ctrs *xnc = priv;
  206. bool dot = false, dotdot = false;
  207. int error;
  208. /* Does this name make sense? */
  209. if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) {
  210. error = -ECANCELED;
  211. goto out_abort;
  212. }
  213. if (name->len == 1 && name->name[0] == '.')
  214. dot = true;
  215. else if (name->len == 2 && name->name[0] == '.' &&
  216. name->name[1] == '.')
  217. dotdot = true;
  218. /* Don't accept a '.' entry that points somewhere else. */
  219. if (dot && ino != dp->i_ino) {
  220. error = -ECANCELED;
  221. goto out_abort;
  222. }
  223. /* Don't accept an invalid inode number. */
  224. if (!xfs_verify_dir_ino(sc->mp, ino)) {
  225. error = -ECANCELED;
  226. goto out_abort;
  227. }
  228. /* Update the shadow link counts if we haven't already failed. */
  229. if (xchk_iscan_aborted(&xnc->collect_iscan)) {
  230. error = -ECANCELED;
  231. goto out_incomplete;
  232. }
  233. trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name);
  234. mutex_lock(&xnc->lock);
  235. /*
  236. * If this is a dotdot entry, it is a back link from dp to ino. How
  237. * we handle this depends on whether or not dp is the root directory.
  238. *
  239. * The root directory is its own parent, so we pretend the dotdot entry
  240. * establishes the "parent" of the root directory. Increment the
  241. * number of parents of the root directory.
  242. *
  243. * Otherwise, increment the number of backrefs pointing back to ino.
  244. *
  245. * If the filesystem has parent pointers, we walk the pptrs to
  246. * determine the backref count.
  247. */
  248. if (dotdot) {
  249. if (dp == sc->mp->m_rootip)
  250. error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
  251. else if (!xfs_has_parent(sc->mp))
  252. error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0);
  253. else
  254. error = 0;
  255. if (error)
  256. goto out_unlock;
  257. }
  258. /*
  259. * If this dirent is a forward link from dp to ino, increment the
  260. * number of parents linking into ino.
  261. */
  262. if (!dot && !dotdot) {
  263. error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
  264. if (error)
  265. goto out_unlock;
  266. }
  267. /*
  268. * If this dirent is a forward link to a subdirectory, increment the
  269. * number of child links of dp.
  270. */
  271. if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) {
  272. error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1);
  273. if (error)
  274. goto out_unlock;
  275. }
  276. mutex_unlock(&xnc->lock);
  277. return 0;
  278. out_unlock:
  279. mutex_unlock(&xnc->lock);
  280. out_abort:
  281. xchk_iscan_abort(&xnc->collect_iscan);
  282. out_incomplete:
  283. xchk_set_incomplete(sc);
  284. return error;
  285. }
  286. /* Bump the backref count for the inode referenced by this parent pointer. */
  287. STATIC int
  288. xchk_nlinks_collect_pptr(
  289. struct xfs_scrub *sc,
  290. struct xfs_inode *ip,
  291. unsigned int attr_flags,
  292. const unsigned char *name,
  293. unsigned int namelen,
  294. const void *value,
  295. unsigned int valuelen,
  296. void *priv)
  297. {
  298. struct xfs_name xname = {
  299. .name = name,
  300. .len = namelen,
  301. };
  302. struct xchk_nlink_ctrs *xnc = priv;
  303. const struct xfs_parent_rec *pptr_rec = value;
  304. xfs_ino_t parent_ino;
  305. int error;
  306. /* Update the shadow link counts if we haven't already failed. */
  307. if (xchk_iscan_aborted(&xnc->collect_iscan)) {
  308. error = -ECANCELED;
  309. goto out_incomplete;
  310. }
  311. if (!(attr_flags & XFS_ATTR_PARENT))
  312. return 0;
  313. error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value,
  314. valuelen, &parent_ino, NULL);
  315. if (error)
  316. return error;
  317. trace_xchk_nlinks_collect_pptr(sc->mp, ip, &xname, pptr_rec);
  318. mutex_lock(&xnc->lock);
  319. error = xchk_nlinks_update_incore(xnc, parent_ino, 0, 1, 0);
  320. if (error)
  321. goto out_unlock;
  322. mutex_unlock(&xnc->lock);
  323. return 0;
  324. out_unlock:
  325. mutex_unlock(&xnc->lock);
  326. xchk_iscan_abort(&xnc->collect_iscan);
  327. out_incomplete:
  328. xchk_set_incomplete(sc);
  329. return error;
  330. }
  331. static uint
  332. xchk_nlinks_ilock_dir(
  333. struct xfs_inode *ip)
  334. {
  335. uint lock_mode = XFS_ILOCK_SHARED;
  336. /*
  337. * We're going to scan the directory entries, so we must be ready to
  338. * pull the data fork mappings into memory if they aren't already.
  339. */
  340. if (xfs_need_iread_extents(&ip->i_df))
  341. lock_mode = XFS_ILOCK_EXCL;
  342. /*
  343. * We're going to scan the parent pointers, so we must be ready to
  344. * pull the attr fork mappings into memory if they aren't already.
  345. */
  346. if (xfs_has_parent(ip->i_mount) && xfs_inode_has_attr_fork(ip) &&
  347. xfs_need_iread_extents(&ip->i_af))
  348. lock_mode = XFS_ILOCK_EXCL;
  349. /*
  350. * Take the IOLOCK so that other threads cannot start a directory
  351. * update while we're scanning.
  352. */
  353. lock_mode |= XFS_IOLOCK_SHARED;
  354. xfs_ilock(ip, lock_mode);
  355. return lock_mode;
  356. }
  357. /* Walk a directory to bump the observed link counts of the children. */
  358. STATIC int
  359. xchk_nlinks_collect_dir(
  360. struct xchk_nlink_ctrs *xnc,
  361. struct xfs_inode *dp)
  362. {
  363. struct xfs_scrub *sc = xnc->sc;
  364. unsigned int lock_mode;
  365. int error = 0;
  366. /*
  367. * Ignore temporary directories being used to stage dir repairs, since
  368. * we don't bump the link counts of the children.
  369. */
  370. if (xrep_is_tempfile(dp))
  371. return 0;
  372. /* Prevent anyone from changing this directory while we walk it. */
  373. lock_mode = xchk_nlinks_ilock_dir(dp);
  374. /*
  375. * The dotdot entry of an unlinked directory still points to the last
  376. * parent, but the parent no longer links to this directory. Skip the
  377. * directory to avoid overcounting.
  378. */
  379. if (VFS_I(dp)->i_nlink == 0)
  380. goto out_unlock;
  381. /*
  382. * We cannot count file links if the directory looks as though it has
  383. * been zapped by the inode record repair code.
  384. */
  385. if (xchk_dir_looks_zapped(dp)) {
  386. error = -EBUSY;
  387. goto out_abort;
  388. }
  389. error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc);
  390. if (error == -ECANCELED) {
  391. error = 0;
  392. goto out_unlock;
  393. }
  394. if (error)
  395. goto out_abort;
  396. /* Walk the parent pointers to get real backref counts. */
  397. if (xfs_has_parent(sc->mp)) {
  398. /*
  399. * If the extended attributes look as though they has been
  400. * zapped by the inode record repair code, we cannot scan for
  401. * parent pointers.
  402. */
  403. if (xchk_pptr_looks_zapped(dp)) {
  404. error = -EBUSY;
  405. goto out_unlock;
  406. }
  407. error = xchk_xattr_walk(sc, dp, xchk_nlinks_collect_pptr, NULL,
  408. xnc);
  409. if (error == -ECANCELED) {
  410. error = 0;
  411. goto out_unlock;
  412. }
  413. if (error)
  414. goto out_abort;
  415. }
  416. xchk_iscan_mark_visited(&xnc->collect_iscan, dp);
  417. goto out_unlock;
  418. out_abort:
  419. xchk_set_incomplete(sc);
  420. xchk_iscan_abort(&xnc->collect_iscan);
  421. out_unlock:
  422. xfs_iunlock(dp, lock_mode);
  423. return error;
  424. }
  425. /* If this looks like a valid pointer, count it. */
  426. static inline int
  427. xchk_nlinks_collect_metafile(
  428. struct xchk_nlink_ctrs *xnc,
  429. xfs_ino_t ino)
  430. {
  431. if (!xfs_verify_ino(xnc->sc->mp, ino))
  432. return 0;
  433. trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino);
  434. return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
  435. }
  436. /* Bump the link counts of metadata files rooted in the superblock. */
  437. STATIC int
  438. xchk_nlinks_collect_metafiles(
  439. struct xchk_nlink_ctrs *xnc)
  440. {
  441. struct xfs_mount *mp = xnc->sc->mp;
  442. int error = -ECANCELED;
  443. if (xchk_iscan_aborted(&xnc->collect_iscan))
  444. goto out_incomplete;
  445. mutex_lock(&xnc->lock);
  446. error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino);
  447. if (error)
  448. goto out_abort;
  449. error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino);
  450. if (error)
  451. goto out_abort;
  452. error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino);
  453. if (error)
  454. goto out_abort;
  455. error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino);
  456. if (error)
  457. goto out_abort;
  458. error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino);
  459. if (error)
  460. goto out_abort;
  461. mutex_unlock(&xnc->lock);
  462. return 0;
  463. out_abort:
  464. mutex_unlock(&xnc->lock);
  465. xchk_iscan_abort(&xnc->collect_iscan);
  466. out_incomplete:
  467. xchk_set_incomplete(xnc->sc);
  468. return error;
  469. }
  470. /* Advance the collection scan cursor for this non-directory file. */
  471. static inline int
  472. xchk_nlinks_collect_file(
  473. struct xchk_nlink_ctrs *xnc,
  474. struct xfs_inode *ip)
  475. {
  476. xfs_ilock(ip, XFS_IOLOCK_SHARED);
  477. xchk_iscan_mark_visited(&xnc->collect_iscan, ip);
  478. xfs_iunlock(ip, XFS_IOLOCK_SHARED);
  479. return 0;
  480. }
  481. /* Walk all directories and count inode links. */
  482. STATIC int
  483. xchk_nlinks_collect(
  484. struct xchk_nlink_ctrs *xnc)
  485. {
  486. struct xfs_scrub *sc = xnc->sc;
  487. struct xfs_inode *ip;
  488. int error;
  489. /* Count the rt and quota files that are rooted in the superblock. */
  490. error = xchk_nlinks_collect_metafiles(xnc);
  491. if (error)
  492. return error;
  493. /*
  494. * Set up for a potentially lengthy filesystem scan by reducing our
  495. * transaction resource usage for the duration. Specifically:
  496. *
  497. * Cancel the transaction to release the log grant space while we scan
  498. * the filesystem.
  499. *
  500. * Create a new empty transaction to eliminate the possibility of the
  501. * inode scan deadlocking on cyclical metadata.
  502. *
  503. * We pass the empty transaction to the file scanning function to avoid
  504. * repeatedly cycling empty transactions. This can be done even though
  505. * we take the IOLOCK to quiesce the file because empty transactions
  506. * do not take sb_internal.
  507. */
  508. xchk_trans_cancel(sc);
  509. error = xchk_trans_alloc_empty(sc);
  510. if (error)
  511. return error;
  512. while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) {
  513. if (S_ISDIR(VFS_I(ip)->i_mode))
  514. error = xchk_nlinks_collect_dir(xnc, ip);
  515. else
  516. error = xchk_nlinks_collect_file(xnc, ip);
  517. xchk_irele(sc, ip);
  518. if (error)
  519. break;
  520. if (xchk_should_terminate(sc, &error))
  521. break;
  522. }
  523. xchk_iscan_iter_finish(&xnc->collect_iscan);
  524. if (error) {
  525. xchk_set_incomplete(sc);
  526. /*
  527. * If we couldn't grab an inode that was busy with a state
  528. * change, change the error code so that we exit to userspace
  529. * as quickly as possible.
  530. */
  531. if (error == -EBUSY)
  532. return -ECANCELED;
  533. return error;
  534. }
  535. /*
  536. * Switch out for a real transaction in preparation for building a new
  537. * tree.
  538. */
  539. xchk_trans_cancel(sc);
  540. return xchk_setup_fs(sc);
  541. }
  542. /*
  543. * Part 2: Comparing file link counters. Walk each inode and compare the link
  544. * counts against our shadow information; and then walk each shadow link count
  545. * structure (that wasn't covered in the first part), comparing it against the
  546. * file.
  547. */
  548. /* Read the observed link count for comparison with the actual inode. */
  549. STATIC int
  550. xchk_nlinks_comparison_read(
  551. struct xchk_nlink_ctrs *xnc,
  552. xfs_ino_t ino,
  553. struct xchk_nlink *obs)
  554. {
  555. struct xchk_nlink nl;
  556. int error;
  557. error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
  558. if (error)
  559. return error;
  560. nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN);
  561. error = xfarray_store(xnc->nlinks, ino, &nl);
  562. if (error == -EFBIG) {
  563. /*
  564. * EFBIG means we tried to store data at too high a byte offset
  565. * in the sparse array. IOWs, we cannot complete the check and
  566. * must notify userspace that the check was incomplete. This
  567. * shouldn't really happen outside of the collection phase.
  568. */
  569. xchk_set_incomplete(xnc->sc);
  570. return -ECANCELED;
  571. }
  572. if (error)
  573. return error;
  574. /* Copy the counters, but do not expose the internal state. */
  575. obs->parents = nl.parents;
  576. obs->backrefs = nl.backrefs;
  577. obs->children = nl.children;
  578. obs->flags = 0;
  579. return 0;
  580. }
  581. /* Check our link count against an inode. */
  582. STATIC int
  583. xchk_nlinks_compare_inode(
  584. struct xchk_nlink_ctrs *xnc,
  585. struct xfs_inode *ip)
  586. {
  587. struct xchk_nlink obs;
  588. struct xfs_scrub *sc = xnc->sc;
  589. uint64_t total_links;
  590. unsigned int actual_nlink;
  591. int error;
  592. /*
  593. * Ignore temporary files being used to stage repairs, since we assume
  594. * they're correct for non-directories, and the directory repair code
  595. * doesn't bump the link counts for the children.
  596. */
  597. if (xrep_is_tempfile(ip))
  598. return 0;
  599. xfs_ilock(ip, XFS_ILOCK_SHARED);
  600. mutex_lock(&xnc->lock);
  601. if (xchk_iscan_aborted(&xnc->collect_iscan)) {
  602. xchk_set_incomplete(xnc->sc);
  603. error = -ECANCELED;
  604. goto out_scanlock;
  605. }
  606. error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs);
  607. if (error)
  608. goto out_scanlock;
  609. /*
  610. * If we don't have ftype to get an accurate count of the subdirectory
  611. * entries in this directory, take advantage of the fact that on a
  612. * consistent ftype=0 filesystem, the number of subdirectory
  613. * backreferences (dotdot entries) pointing towards this directory
  614. * should be equal to the number of subdirectory entries in the
  615. * directory.
  616. */
  617. if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode))
  618. obs.children = obs.backrefs;
  619. total_links = xchk_nlink_total(ip, &obs);
  620. actual_nlink = VFS_I(ip)->i_nlink;
  621. trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs);
  622. /*
  623. * If we found so many parents that we'd overflow i_nlink, we must flag
  624. * this as a corruption. The VFS won't let users increase the link
  625. * count, but it will let them decrease it.
  626. */
  627. if (total_links > XFS_NLINK_PINNED) {
  628. xchk_ino_set_corrupt(sc, ip->i_ino);
  629. goto out_corrupt;
  630. } else if (total_links > XFS_MAXLINK) {
  631. xchk_ino_set_warning(sc, ip->i_ino);
  632. }
  633. /* Link counts should match. */
  634. if (total_links != actual_nlink) {
  635. xchk_ino_set_corrupt(sc, ip->i_ino);
  636. goto out_corrupt;
  637. }
  638. if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) {
  639. /*
  640. * The collection phase ignores directories with zero link
  641. * count, so we ignore them here too.
  642. *
  643. * The number of subdirectory backreferences (dotdot entries)
  644. * pointing towards this directory should be equal to the
  645. * number of subdirectory entries in the directory.
  646. */
  647. if (obs.children != obs.backrefs)
  648. xchk_ino_xref_set_corrupt(sc, ip->i_ino);
  649. } else {
  650. /*
  651. * Non-directories and unlinked directories should not have
  652. * back references.
  653. */
  654. if (obs.backrefs != 0) {
  655. xchk_ino_set_corrupt(sc, ip->i_ino);
  656. goto out_corrupt;
  657. }
  658. /*
  659. * Non-directories and unlinked directories should not have
  660. * children.
  661. */
  662. if (obs.children != 0) {
  663. xchk_ino_set_corrupt(sc, ip->i_ino);
  664. goto out_corrupt;
  665. }
  666. }
  667. if (ip == sc->mp->m_rootip) {
  668. /*
  669. * For the root of a directory tree, both the '.' and '..'
  670. * entries should point to the root directory. The dotdot
  671. * entry is counted as a parent of the root /and/ a backref of
  672. * the root directory.
  673. */
  674. if (obs.parents != 1) {
  675. xchk_ino_set_corrupt(sc, ip->i_ino);
  676. goto out_corrupt;
  677. }
  678. } else if (actual_nlink > 0) {
  679. /*
  680. * Linked files that are not the root directory should have at
  681. * least one parent.
  682. */
  683. if (obs.parents == 0) {
  684. xchk_ino_set_corrupt(sc, ip->i_ino);
  685. goto out_corrupt;
  686. }
  687. }
  688. out_corrupt:
  689. if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
  690. error = -ECANCELED;
  691. out_scanlock:
  692. mutex_unlock(&xnc->lock);
  693. xfs_iunlock(ip, XFS_ILOCK_SHARED);
  694. return error;
  695. }
  696. /*
  697. * Check our link count against an inode that wasn't checked previously. This
  698. * is intended to catch directories with dangling links, though we could be
  699. * racing with inode allocation in other threads.
  700. */
  701. STATIC int
  702. xchk_nlinks_compare_inum(
  703. struct xchk_nlink_ctrs *xnc,
  704. xfs_ino_t ino)
  705. {
  706. struct xchk_nlink obs;
  707. struct xfs_mount *mp = xnc->sc->mp;
  708. struct xfs_trans *tp = xnc->sc->tp;
  709. struct xfs_buf *agi_bp;
  710. struct xfs_inode *ip;
  711. int error;
  712. /*
  713. * The first iget failed, so try again with the variant that returns
  714. * either an incore inode or the AGI buffer. If the function returns
  715. * EINVAL/ENOENT, it should have passed us the AGI buffer so that we
  716. * can guarantee that the inode won't be allocated while we check for
  717. * a zero link count in the observed link count data.
  718. */
  719. error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip);
  720. if (!error) {
  721. /* Actually got an inode, so use the inode compare. */
  722. error = xchk_nlinks_compare_inode(xnc, ip);
  723. xchk_irele(xnc->sc, ip);
  724. return error;
  725. }
  726. if (error == -ENOENT || error == -EINVAL) {
  727. /* No inode was found. Check for zero link count below. */
  728. error = 0;
  729. }
  730. if (error)
  731. goto out_agi;
  732. /* Ensure that we have protected against inode allocation/freeing. */
  733. if (agi_bp == NULL) {
  734. ASSERT(agi_bp != NULL);
  735. xchk_set_incomplete(xnc->sc);
  736. return -ECANCELED;
  737. }
  738. if (xchk_iscan_aborted(&xnc->collect_iscan)) {
  739. xchk_set_incomplete(xnc->sc);
  740. error = -ECANCELED;
  741. goto out_agi;
  742. }
  743. mutex_lock(&xnc->lock);
  744. error = xchk_nlinks_comparison_read(xnc, ino, &obs);
  745. if (error)
  746. goto out_scanlock;
  747. trace_xchk_nlinks_check_zero(mp, ino, &obs);
  748. /*
  749. * If we can't grab the inode, the link count had better be zero. We
  750. * still hold the AGI to prevent inode allocation/freeing.
  751. */
  752. if (xchk_nlink_total(NULL, &obs) != 0) {
  753. xchk_ino_set_corrupt(xnc->sc, ino);
  754. error = -ECANCELED;
  755. }
  756. out_scanlock:
  757. mutex_unlock(&xnc->lock);
  758. out_agi:
  759. if (agi_bp)
  760. xfs_trans_brelse(tp, agi_bp);
  761. return error;
  762. }
  763. /*
  764. * Try to visit every inode in the filesystem to compare the link count. Move
  765. * on if we can't grab an inode, since we'll revisit unchecked nlink records in
  766. * the second part.
  767. */
  768. static int
  769. xchk_nlinks_compare_iter(
  770. struct xchk_nlink_ctrs *xnc,
  771. struct xfs_inode **ipp)
  772. {
  773. int error;
  774. do {
  775. error = xchk_iscan_iter(&xnc->compare_iscan, ipp);
  776. } while (error == -EBUSY);
  777. return error;
  778. }
  779. /* Compare the link counts we observed against the live information. */
  780. STATIC int
  781. xchk_nlinks_compare(
  782. struct xchk_nlink_ctrs *xnc)
  783. {
  784. struct xchk_nlink nl;
  785. struct xfs_scrub *sc = xnc->sc;
  786. struct xfs_inode *ip;
  787. xfarray_idx_t cur = XFARRAY_CURSOR_INIT;
  788. int error;
  789. if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
  790. return 0;
  791. /*
  792. * Create a new empty transaction so that we can advance the iscan
  793. * cursor without deadlocking if the inobt has a cycle and push on the
  794. * inactivation workqueue.
  795. */
  796. xchk_trans_cancel(sc);
  797. error = xchk_trans_alloc_empty(sc);
  798. if (error)
  799. return error;
  800. /*
  801. * Use the inobt to walk all allocated inodes to compare the link
  802. * counts. Inodes skipped by _compare_iter will be tried again in the
  803. * next phase of the scan.
  804. */
  805. xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan);
  806. while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) {
  807. error = xchk_nlinks_compare_inode(xnc, ip);
  808. xchk_iscan_mark_visited(&xnc->compare_iscan, ip);
  809. xchk_irele(sc, ip);
  810. if (error)
  811. break;
  812. if (xchk_should_terminate(sc, &error))
  813. break;
  814. }
  815. xchk_iscan_iter_finish(&xnc->compare_iscan);
  816. xchk_iscan_teardown(&xnc->compare_iscan);
  817. if (error)
  818. return error;
  819. if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
  820. return 0;
  821. /*
  822. * Walk all the non-null nlink observations that weren't checked in the
  823. * previous step.
  824. */
  825. mutex_lock(&xnc->lock);
  826. while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) {
  827. xfs_ino_t ino = cur - 1;
  828. if (nl.flags & XCHK_NLINK_COMPARE_SCANNED)
  829. continue;
  830. mutex_unlock(&xnc->lock);
  831. error = xchk_nlinks_compare_inum(xnc, ino);
  832. if (error)
  833. return error;
  834. if (xchk_should_terminate(xnc->sc, &error))
  835. return error;
  836. mutex_lock(&xnc->lock);
  837. }
  838. mutex_unlock(&xnc->lock);
  839. return error;
  840. }
  841. /* Tear down everything associated with a nlinks check. */
  842. static void
  843. xchk_nlinks_teardown_scan(
  844. void *priv)
  845. {
  846. struct xchk_nlink_ctrs *xnc = priv;
  847. /* Discourage any hook functions that might be running. */
  848. xchk_iscan_abort(&xnc->collect_iscan);
  849. xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook);
  850. xfarray_destroy(xnc->nlinks);
  851. xnc->nlinks = NULL;
  852. xchk_iscan_teardown(&xnc->collect_iscan);
  853. mutex_destroy(&xnc->lock);
  854. xnc->sc = NULL;
  855. }
  856. /*
  857. * Scan all inodes in the entire filesystem to generate link count data. If
  858. * the scan is successful, the counts will be left alive for a repair. If any
  859. * error occurs, we'll tear everything down.
  860. */
  861. STATIC int
  862. xchk_nlinks_setup_scan(
  863. struct xfs_scrub *sc,
  864. struct xchk_nlink_ctrs *xnc)
  865. {
  866. struct xfs_mount *mp = sc->mp;
  867. char *descr;
  868. unsigned long long max_inos;
  869. xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1;
  870. xfs_agino_t first_agino, last_agino;
  871. int error;
  872. mutex_init(&xnc->lock);
  873. /* Retry iget every tenth of a second for up to 30 seconds. */
  874. xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan);
  875. /*
  876. * Set up enough space to store an nlink record for the highest
  877. * possible inode number in this system.
  878. */
  879. xfs_agino_range(mp, last_agno, &first_agino, &last_agino);
  880. max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1;
  881. descr = xchk_xfile_descr(sc, "file link counts");
  882. error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos),
  883. sizeof(struct xchk_nlink), &xnc->nlinks);
  884. kfree(descr);
  885. if (error)
  886. goto out_teardown;
  887. /*
  888. * Hook into the directory entry code so that we can capture updates to
  889. * file link counts. The hook only triggers for inodes that were
  890. * already scanned, and the scanner thread takes each inode's ILOCK,
  891. * which means that any in-progress inode updates will finish before we
  892. * can scan the inode.
  893. */
  894. ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
  895. xfs_dir_hook_setup(&xnc->dhook, xchk_nlinks_live_update);
  896. error = xfs_dir_hook_add(mp, &xnc->dhook);
  897. if (error)
  898. goto out_teardown;
  899. /* Use deferred cleanup to pass the inode link count data to repair. */
  900. sc->buf_cleanup = xchk_nlinks_teardown_scan;
  901. return 0;
  902. out_teardown:
  903. xchk_nlinks_teardown_scan(xnc);
  904. return error;
  905. }
  906. /* Scrub the link count of all inodes on the filesystem. */
  907. int
  908. xchk_nlinks(
  909. struct xfs_scrub *sc)
  910. {
  911. struct xchk_nlink_ctrs *xnc = sc->buf;
  912. int error = 0;
  913. /* Set ourselves up to check link counts on the live filesystem. */
  914. error = xchk_nlinks_setup_scan(sc, xnc);
  915. if (error)
  916. return error;
  917. /* Walk all inodes, picking up link count information. */
  918. error = xchk_nlinks_collect(xnc);
  919. if (!xchk_xref_process_error(sc, 0, 0, &error))
  920. return error;
  921. /* Fail fast if we're not playing with a full dataset. */
  922. if (xchk_iscan_aborted(&xnc->collect_iscan))
  923. xchk_set_incomplete(sc);
  924. if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
  925. return 0;
  926. /* Compare link counts. */
  927. error = xchk_nlinks_compare(xnc);
  928. if (!xchk_xref_process_error(sc, 0, 0, &error))
  929. return error;
  930. /* Check one last time for an incomplete dataset. */
  931. if (xchk_iscan_aborted(&xnc->collect_iscan))
  932. xchk_set_incomplete(sc);
  933. return 0;
  934. }