common.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * Copyright (C) 2017 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <darrick.wong@oracle.com>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_trans_resv.h"
  11. #include "xfs_mount.h"
  12. #include "xfs_defer.h"
  13. #include "xfs_btree.h"
  14. #include "xfs_bit.h"
  15. #include "xfs_log_format.h"
  16. #include "xfs_trans.h"
  17. #include "xfs_sb.h"
  18. #include "xfs_inode.h"
  19. #include "xfs_icache.h"
  20. #include "xfs_itable.h"
  21. #include "xfs_alloc.h"
  22. #include "xfs_alloc_btree.h"
  23. #include "xfs_bmap.h"
  24. #include "xfs_bmap_btree.h"
  25. #include "xfs_ialloc.h"
  26. #include "xfs_ialloc_btree.h"
  27. #include "xfs_refcount.h"
  28. #include "xfs_refcount_btree.h"
  29. #include "xfs_rmap.h"
  30. #include "xfs_rmap_btree.h"
  31. #include "xfs_log.h"
  32. #include "xfs_trans_priv.h"
  33. #include "xfs_attr.h"
  34. #include "xfs_reflink.h"
  35. #include "scrub/xfs_scrub.h"
  36. #include "scrub/scrub.h"
  37. #include "scrub/common.h"
  38. #include "scrub/trace.h"
  39. #include "scrub/btree.h"
  40. #include "scrub/repair.h"
  41. /* Common code for the metadata scrubbers. */
  42. /*
  43. * Handling operational errors.
  44. *
  45. * The *_process_error() family of functions are used to process error return
  46. * codes from functions called as part of a scrub operation.
  47. *
  48. * If there's no error, we return true to tell the caller that it's ok
  49. * to move on to the next check in its list.
  50. *
  51. * For non-verifier errors (e.g. ENOMEM) we return false to tell the
  52. * caller that something bad happened, and we preserve *error so that
  53. * the caller can return the *error up the stack to userspace.
  54. *
  55. * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
  56. * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words,
  57. * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
  58. * not via return codes. We return false to tell the caller that
  59. * something bad happened. Since the error has been cleared, the caller
  60. * will (presumably) return that zero and scrubbing will move on to
  61. * whatever's next.
  62. *
  63. * ftrace can be used to record the precise metadata location and the
  64. * approximate code location of the failed operation.
  65. */
  66. /* Check for operational errors. */
  67. static bool
  68. __xchk_process_error(
  69. struct xfs_scrub *sc,
  70. xfs_agnumber_t agno,
  71. xfs_agblock_t bno,
  72. int *error,
  73. __u32 errflag,
  74. void *ret_ip)
  75. {
  76. switch (*error) {
  77. case 0:
  78. return true;
  79. case -EDEADLOCK:
  80. /* Used to restart an op with deadlock avoidance. */
  81. trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
  82. break;
  83. case -EFSBADCRC:
  84. case -EFSCORRUPTED:
  85. /* Note the badness but don't abort. */
  86. sc->sm->sm_flags |= errflag;
  87. *error = 0;
  88. /* fall through */
  89. default:
  90. trace_xchk_op_error(sc, agno, bno, *error,
  91. ret_ip);
  92. break;
  93. }
  94. return false;
  95. }
  96. bool
  97. xchk_process_error(
  98. struct xfs_scrub *sc,
  99. xfs_agnumber_t agno,
  100. xfs_agblock_t bno,
  101. int *error)
  102. {
  103. return __xchk_process_error(sc, agno, bno, error,
  104. XFS_SCRUB_OFLAG_CORRUPT, __return_address);
  105. }
  106. bool
  107. xchk_xref_process_error(
  108. struct xfs_scrub *sc,
  109. xfs_agnumber_t agno,
  110. xfs_agblock_t bno,
  111. int *error)
  112. {
  113. return __xchk_process_error(sc, agno, bno, error,
  114. XFS_SCRUB_OFLAG_XFAIL, __return_address);
  115. }
  116. /* Check for operational errors for a file offset. */
  117. static bool
  118. __xchk_fblock_process_error(
  119. struct xfs_scrub *sc,
  120. int whichfork,
  121. xfs_fileoff_t offset,
  122. int *error,
  123. __u32 errflag,
  124. void *ret_ip)
  125. {
  126. switch (*error) {
  127. case 0:
  128. return true;
  129. case -EDEADLOCK:
  130. /* Used to restart an op with deadlock avoidance. */
  131. trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
  132. break;
  133. case -EFSBADCRC:
  134. case -EFSCORRUPTED:
  135. /* Note the badness but don't abort. */
  136. sc->sm->sm_flags |= errflag;
  137. *error = 0;
  138. /* fall through */
  139. default:
  140. trace_xchk_file_op_error(sc, whichfork, offset, *error,
  141. ret_ip);
  142. break;
  143. }
  144. return false;
  145. }
  146. bool
  147. xchk_fblock_process_error(
  148. struct xfs_scrub *sc,
  149. int whichfork,
  150. xfs_fileoff_t offset,
  151. int *error)
  152. {
  153. return __xchk_fblock_process_error(sc, whichfork, offset, error,
  154. XFS_SCRUB_OFLAG_CORRUPT, __return_address);
  155. }
  156. bool
  157. xchk_fblock_xref_process_error(
  158. struct xfs_scrub *sc,
  159. int whichfork,
  160. xfs_fileoff_t offset,
  161. int *error)
  162. {
  163. return __xchk_fblock_process_error(sc, whichfork, offset, error,
  164. XFS_SCRUB_OFLAG_XFAIL, __return_address);
  165. }
  166. /*
  167. * Handling scrub corruption/optimization/warning checks.
  168. *
  169. * The *_set_{corrupt,preen,warning}() family of functions are used to
  170. * record the presence of metadata that is incorrect (corrupt), could be
  171. * optimized somehow (preen), or should be flagged for administrative
  172. * review but is not incorrect (warn).
  173. *
  174. * ftrace can be used to record the precise metadata location and
  175. * approximate code location of the failed check.
  176. */
  177. /* Record a block which could be optimized. */
  178. void
  179. xchk_block_set_preen(
  180. struct xfs_scrub *sc,
  181. struct xfs_buf *bp)
  182. {
  183. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
  184. trace_xchk_block_preen(sc, bp->b_bn, __return_address);
  185. }
  186. /*
  187. * Record an inode which could be optimized. The trace data will
  188. * include the block given by bp if bp is given; otherwise it will use
  189. * the block location of the inode record itself.
  190. */
  191. void
  192. xchk_ino_set_preen(
  193. struct xfs_scrub *sc,
  194. xfs_ino_t ino)
  195. {
  196. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
  197. trace_xchk_ino_preen(sc, ino, __return_address);
  198. }
  199. /* Record a corrupt block. */
  200. void
  201. xchk_block_set_corrupt(
  202. struct xfs_scrub *sc,
  203. struct xfs_buf *bp)
  204. {
  205. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  206. trace_xchk_block_error(sc, bp->b_bn, __return_address);
  207. }
  208. /* Record a corruption while cross-referencing. */
  209. void
  210. xchk_block_xref_set_corrupt(
  211. struct xfs_scrub *sc,
  212. struct xfs_buf *bp)
  213. {
  214. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
  215. trace_xchk_block_error(sc, bp->b_bn, __return_address);
  216. }
  217. /*
  218. * Record a corrupt inode. The trace data will include the block given
  219. * by bp if bp is given; otherwise it will use the block location of the
  220. * inode record itself.
  221. */
  222. void
  223. xchk_ino_set_corrupt(
  224. struct xfs_scrub *sc,
  225. xfs_ino_t ino)
  226. {
  227. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  228. trace_xchk_ino_error(sc, ino, __return_address);
  229. }
  230. /* Record a corruption while cross-referencing with an inode. */
  231. void
  232. xchk_ino_xref_set_corrupt(
  233. struct xfs_scrub *sc,
  234. xfs_ino_t ino)
  235. {
  236. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
  237. trace_xchk_ino_error(sc, ino, __return_address);
  238. }
  239. /* Record corruption in a block indexed by a file fork. */
  240. void
  241. xchk_fblock_set_corrupt(
  242. struct xfs_scrub *sc,
  243. int whichfork,
  244. xfs_fileoff_t offset)
  245. {
  246. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  247. trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
  248. }
  249. /* Record a corruption while cross-referencing a fork block. */
  250. void
  251. xchk_fblock_xref_set_corrupt(
  252. struct xfs_scrub *sc,
  253. int whichfork,
  254. xfs_fileoff_t offset)
  255. {
  256. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
  257. trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
  258. }
  259. /*
  260. * Warn about inodes that need administrative review but is not
  261. * incorrect.
  262. */
  263. void
  264. xchk_ino_set_warning(
  265. struct xfs_scrub *sc,
  266. xfs_ino_t ino)
  267. {
  268. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
  269. trace_xchk_ino_warning(sc, ino, __return_address);
  270. }
  271. /* Warn about a block indexed by a file fork that needs review. */
  272. void
  273. xchk_fblock_set_warning(
  274. struct xfs_scrub *sc,
  275. int whichfork,
  276. xfs_fileoff_t offset)
  277. {
  278. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
  279. trace_xchk_fblock_warning(sc, whichfork, offset, __return_address);
  280. }
  281. /* Signal an incomplete scrub. */
  282. void
  283. xchk_set_incomplete(
  284. struct xfs_scrub *sc)
  285. {
  286. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
  287. trace_xchk_incomplete(sc, __return_address);
  288. }
  289. /*
  290. * rmap scrubbing -- compute the number of blocks with a given owner,
  291. * at least according to the reverse mapping data.
  292. */
  293. struct xchk_rmap_ownedby_info {
  294. struct xfs_owner_info *oinfo;
  295. xfs_filblks_t *blocks;
  296. };
  297. STATIC int
  298. xchk_count_rmap_ownedby_irec(
  299. struct xfs_btree_cur *cur,
  300. struct xfs_rmap_irec *rec,
  301. void *priv)
  302. {
  303. struct xchk_rmap_ownedby_info *sroi = priv;
  304. bool irec_attr;
  305. bool oinfo_attr;
  306. irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
  307. oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
  308. if (rec->rm_owner != sroi->oinfo->oi_owner)
  309. return 0;
  310. if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
  311. (*sroi->blocks) += rec->rm_blockcount;
  312. return 0;
  313. }
  314. /*
  315. * Calculate the number of blocks the rmap thinks are owned by something.
  316. * The caller should pass us an rmapbt cursor.
  317. */
  318. int
  319. xchk_count_rmap_ownedby_ag(
  320. struct xfs_scrub *sc,
  321. struct xfs_btree_cur *cur,
  322. struct xfs_owner_info *oinfo,
  323. xfs_filblks_t *blocks)
  324. {
  325. struct xchk_rmap_ownedby_info sroi;
  326. sroi.oinfo = oinfo;
  327. *blocks = 0;
  328. sroi.blocks = blocks;
  329. return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec,
  330. &sroi);
  331. }
  332. /*
  333. * AG scrubbing
  334. *
  335. * These helpers facilitate locking an allocation group's header
  336. * buffers, setting up cursors for all btrees that are present, and
  337. * cleaning everything up once we're through.
  338. */
  339. /* Decide if we want to return an AG header read failure. */
  340. static inline bool
  341. want_ag_read_header_failure(
  342. struct xfs_scrub *sc,
  343. unsigned int type)
  344. {
  345. /* Return all AG header read failures when scanning btrees. */
  346. if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
  347. sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
  348. sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
  349. return true;
  350. /*
  351. * If we're scanning a given type of AG header, we only want to
  352. * see read failures from that specific header. We'd like the
  353. * other headers to cross-check them, but this isn't required.
  354. */
  355. if (sc->sm->sm_type == type)
  356. return true;
  357. return false;
  358. }
  359. /*
  360. * Grab all the headers for an AG.
  361. *
  362. * The headers should be released by xchk_ag_free, but as a fail
  363. * safe we attach all the buffers we grab to the scrub transaction so
  364. * they'll all be freed when we cancel it.
  365. */
  366. int
  367. xchk_ag_read_headers(
  368. struct xfs_scrub *sc,
  369. xfs_agnumber_t agno,
  370. struct xfs_buf **agi,
  371. struct xfs_buf **agf,
  372. struct xfs_buf **agfl)
  373. {
  374. struct xfs_mount *mp = sc->mp;
  375. int error;
  376. error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi);
  377. if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
  378. goto out;
  379. error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf);
  380. if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
  381. goto out;
  382. error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
  383. if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
  384. goto out;
  385. error = 0;
  386. out:
  387. return error;
  388. }
  389. /* Release all the AG btree cursors. */
  390. void
  391. xchk_ag_btcur_free(
  392. struct xchk_ag *sa)
  393. {
  394. if (sa->refc_cur)
  395. xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
  396. if (sa->rmap_cur)
  397. xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
  398. if (sa->fino_cur)
  399. xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
  400. if (sa->ino_cur)
  401. xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
  402. if (sa->cnt_cur)
  403. xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
  404. if (sa->bno_cur)
  405. xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
  406. sa->refc_cur = NULL;
  407. sa->rmap_cur = NULL;
  408. sa->fino_cur = NULL;
  409. sa->ino_cur = NULL;
  410. sa->bno_cur = NULL;
  411. sa->cnt_cur = NULL;
  412. }
  413. /* Initialize all the btree cursors for an AG. */
  414. int
  415. xchk_ag_btcur_init(
  416. struct xfs_scrub *sc,
  417. struct xchk_ag *sa)
  418. {
  419. struct xfs_mount *mp = sc->mp;
  420. xfs_agnumber_t agno = sa->agno;
  421. if (sa->agf_bp) {
  422. /* Set up a bnobt cursor for cross-referencing. */
  423. sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
  424. agno, XFS_BTNUM_BNO);
  425. if (!sa->bno_cur)
  426. goto err;
  427. /* Set up a cntbt cursor for cross-referencing. */
  428. sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
  429. agno, XFS_BTNUM_CNT);
  430. if (!sa->cnt_cur)
  431. goto err;
  432. }
  433. /* Set up a inobt cursor for cross-referencing. */
  434. if (sa->agi_bp) {
  435. sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
  436. agno, XFS_BTNUM_INO);
  437. if (!sa->ino_cur)
  438. goto err;
  439. }
  440. /* Set up a finobt cursor for cross-referencing. */
  441. if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb)) {
  442. sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
  443. agno, XFS_BTNUM_FINO);
  444. if (!sa->fino_cur)
  445. goto err;
  446. }
  447. /* Set up a rmapbt cursor for cross-referencing. */
  448. if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb)) {
  449. sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
  450. agno);
  451. if (!sa->rmap_cur)
  452. goto err;
  453. }
  454. /* Set up a refcountbt cursor for cross-referencing. */
  455. if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) {
  456. sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
  457. sa->agf_bp, agno);
  458. if (!sa->refc_cur)
  459. goto err;
  460. }
  461. return 0;
  462. err:
  463. return -ENOMEM;
  464. }
  465. /* Release the AG header context and btree cursors. */
  466. void
  467. xchk_ag_free(
  468. struct xfs_scrub *sc,
  469. struct xchk_ag *sa)
  470. {
  471. xchk_ag_btcur_free(sa);
  472. if (sa->agfl_bp) {
  473. xfs_trans_brelse(sc->tp, sa->agfl_bp);
  474. sa->agfl_bp = NULL;
  475. }
  476. if (sa->agf_bp) {
  477. xfs_trans_brelse(sc->tp, sa->agf_bp);
  478. sa->agf_bp = NULL;
  479. }
  480. if (sa->agi_bp) {
  481. xfs_trans_brelse(sc->tp, sa->agi_bp);
  482. sa->agi_bp = NULL;
  483. }
  484. if (sa->pag) {
  485. xfs_perag_put(sa->pag);
  486. sa->pag = NULL;
  487. }
  488. sa->agno = NULLAGNUMBER;
  489. }
  490. /*
  491. * For scrub, grab the AGI and the AGF headers, in that order. Locking
  492. * order requires us to get the AGI before the AGF. We use the
  493. * transaction to avoid deadlocking on crosslinked metadata buffers;
  494. * either the caller passes one in (bmap scrub) or we have to create a
  495. * transaction ourselves.
  496. */
  497. int
  498. xchk_ag_init(
  499. struct xfs_scrub *sc,
  500. xfs_agnumber_t agno,
  501. struct xchk_ag *sa)
  502. {
  503. int error;
  504. sa->agno = agno;
  505. error = xchk_ag_read_headers(sc, agno, &sa->agi_bp,
  506. &sa->agf_bp, &sa->agfl_bp);
  507. if (error)
  508. return error;
  509. return xchk_ag_btcur_init(sc, sa);
  510. }
  511. /*
  512. * Grab the per-ag structure if we haven't already gotten it. Teardown of the
  513. * xchk_ag will release it for us.
  514. */
  515. void
  516. xchk_perag_get(
  517. struct xfs_mount *mp,
  518. struct xchk_ag *sa)
  519. {
  520. if (!sa->pag)
  521. sa->pag = xfs_perag_get(mp, sa->agno);
  522. }
  523. /* Per-scrubber setup functions */
  524. /*
  525. * Grab an empty transaction so that we can re-grab locked buffers if
  526. * one of our btrees turns out to be cyclic.
  527. *
  528. * If we're going to repair something, we need to ask for the largest possible
  529. * log reservation so that we can handle the worst case scenario for metadata
  530. * updates while rebuilding a metadata item. We also need to reserve as many
  531. * blocks in the head transaction as we think we're going to need to rebuild
  532. * the metadata object.
  533. */
  534. int
  535. xchk_trans_alloc(
  536. struct xfs_scrub *sc,
  537. uint resblks)
  538. {
  539. if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
  540. return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
  541. resblks, 0, 0, &sc->tp);
  542. return xfs_trans_alloc_empty(sc->mp, &sc->tp);
  543. }
  544. /* Set us up with a transaction and an empty context. */
  545. int
  546. xchk_setup_fs(
  547. struct xfs_scrub *sc,
  548. struct xfs_inode *ip)
  549. {
  550. uint resblks;
  551. resblks = xrep_calc_ag_resblks(sc);
  552. return xchk_trans_alloc(sc, resblks);
  553. }
  554. /* Set us up with AG headers and btree cursors. */
  555. int
  556. xchk_setup_ag_btree(
  557. struct xfs_scrub *sc,
  558. struct xfs_inode *ip,
  559. bool force_log)
  560. {
  561. struct xfs_mount *mp = sc->mp;
  562. int error;
  563. /*
  564. * If the caller asks us to checkpont the log, do so. This
  565. * expensive operation should be performed infrequently and only
  566. * as a last resort. Any caller that sets force_log should
  567. * document why they need to do so.
  568. */
  569. if (force_log) {
  570. error = xchk_checkpoint_log(mp);
  571. if (error)
  572. return error;
  573. }
  574. error = xchk_setup_fs(sc, ip);
  575. if (error)
  576. return error;
  577. return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa);
  578. }
  579. /* Push everything out of the log onto disk. */
  580. int
  581. xchk_checkpoint_log(
  582. struct xfs_mount *mp)
  583. {
  584. int error;
  585. error = xfs_log_force(mp, XFS_LOG_SYNC);
  586. if (error)
  587. return error;
  588. xfs_ail_push_all_sync(mp->m_ail);
  589. return 0;
  590. }
  591. /*
  592. * Given an inode and the scrub control structure, grab either the
  593. * inode referenced in the control structure or the inode passed in.
  594. * The inode is not locked.
  595. */
  596. int
  597. xchk_get_inode(
  598. struct xfs_scrub *sc,
  599. struct xfs_inode *ip_in)
  600. {
  601. struct xfs_imap imap;
  602. struct xfs_mount *mp = sc->mp;
  603. struct xfs_inode *ip = NULL;
  604. int error;
  605. /* We want to scan the inode we already had opened. */
  606. if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
  607. sc->ip = ip_in;
  608. return 0;
  609. }
  610. /* Look up the inode, see if the generation number matches. */
  611. if (xfs_internal_inum(mp, sc->sm->sm_ino))
  612. return -ENOENT;
  613. error = xfs_iget(mp, NULL, sc->sm->sm_ino,
  614. XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
  615. switch (error) {
  616. case -ENOENT:
  617. /* Inode doesn't exist, just bail out. */
  618. return error;
  619. case 0:
  620. /* Got an inode, continue. */
  621. break;
  622. case -EINVAL:
  623. /*
  624. * -EINVAL with IGET_UNTRUSTED could mean one of several
  625. * things: userspace gave us an inode number that doesn't
  626. * correspond to fs space, or doesn't have an inobt entry;
  627. * or it could simply mean that the inode buffer failed the
  628. * read verifiers.
  629. *
  630. * Try just the inode mapping lookup -- if it succeeds, then
  631. * the inode buffer verifier failed and something needs fixing.
  632. * Otherwise, we really couldn't find it so tell userspace
  633. * that it no longer exists.
  634. */
  635. error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap,
  636. XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
  637. if (error)
  638. return -ENOENT;
  639. error = -EFSCORRUPTED;
  640. /* fall through */
  641. default:
  642. trace_xchk_op_error(sc,
  643. XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
  644. XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
  645. error, __return_address);
  646. return error;
  647. }
  648. if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
  649. xfs_irele(ip);
  650. return -ENOENT;
  651. }
  652. sc->ip = ip;
  653. return 0;
  654. }
  655. /* Set us up to scrub a file's contents. */
  656. int
  657. xchk_setup_inode_contents(
  658. struct xfs_scrub *sc,
  659. struct xfs_inode *ip,
  660. unsigned int resblks)
  661. {
  662. int error;
  663. error = xchk_get_inode(sc, ip);
  664. if (error)
  665. return error;
  666. /* Got the inode, lock it and we're ready to go. */
  667. sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
  668. xfs_ilock(sc->ip, sc->ilock_flags);
  669. error = xchk_trans_alloc(sc, resblks);
  670. if (error)
  671. goto out;
  672. sc->ilock_flags |= XFS_ILOCK_EXCL;
  673. xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
  674. out:
  675. /* scrub teardown will unlock and release the inode for us */
  676. return error;
  677. }
  678. /*
  679. * Predicate that decides if we need to evaluate the cross-reference check.
  680. * If there was an error accessing the cross-reference btree, just delete
  681. * the cursor and skip the check.
  682. */
  683. bool
  684. xchk_should_check_xref(
  685. struct xfs_scrub *sc,
  686. int *error,
  687. struct xfs_btree_cur **curpp)
  688. {
  689. /* No point in xref if we already know we're corrupt. */
  690. if (xchk_skip_xref(sc->sm))
  691. return false;
  692. if (*error == 0)
  693. return true;
  694. if (curpp) {
  695. /* If we've already given up on xref, just bail out. */
  696. if (!*curpp)
  697. return false;
  698. /* xref error, delete cursor and bail out. */
  699. xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
  700. *curpp = NULL;
  701. }
  702. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
  703. trace_xchk_xref_error(sc, *error, __return_address);
  704. /*
  705. * Errors encountered during cross-referencing with another
  706. * data structure should not cause this scrubber to abort.
  707. */
  708. *error = 0;
  709. return false;
  710. }
  711. /* Run the structure verifiers on in-memory buffers to detect bad memory. */
  712. void
  713. xchk_buffer_recheck(
  714. struct xfs_scrub *sc,
  715. struct xfs_buf *bp)
  716. {
  717. xfs_failaddr_t fa;
  718. if (bp->b_ops == NULL) {
  719. xchk_block_set_corrupt(sc, bp);
  720. return;
  721. }
  722. if (bp->b_ops->verify_struct == NULL) {
  723. xchk_set_incomplete(sc);
  724. return;
  725. }
  726. fa = bp->b_ops->verify_struct(bp);
  727. if (!fa)
  728. return;
  729. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  730. trace_xchk_block_error(sc, bp->b_bn, fa);
  731. }
  732. /*
  733. * Scrub the attr/data forks of a metadata inode. The metadata inode must be
  734. * pointed to by sc->ip and the ILOCK must be held.
  735. */
  736. int
  737. xchk_metadata_inode_forks(
  738. struct xfs_scrub *sc)
  739. {
  740. __u32 smtype;
  741. bool shared;
  742. int error;
  743. if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
  744. return 0;
  745. /* Metadata inodes don't live on the rt device. */
  746. if (sc->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
  747. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  748. return 0;
  749. }
  750. /* They should never participate in reflink. */
  751. if (xfs_is_reflink_inode(sc->ip)) {
  752. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  753. return 0;
  754. }
  755. /* They also should never have extended attributes. */
  756. if (xfs_inode_hasattr(sc->ip)) {
  757. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  758. return 0;
  759. }
  760. /* Invoke the data fork scrubber. */
  761. smtype = sc->sm->sm_type;
  762. sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD;
  763. error = xchk_bmap_data(sc);
  764. sc->sm->sm_type = smtype;
  765. if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
  766. return error;
  767. /* Look for incorrect shared blocks. */
  768. if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) {
  769. error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
  770. &shared);
  771. if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
  772. &error))
  773. return error;
  774. if (shared)
  775. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  776. }
  777. return error;
  778. }
  779. /*
  780. * Try to lock an inode in violation of the usual locking order rules. For
  781. * example, trying to get the IOLOCK while in transaction context, or just
  782. * plain breaking AG-order or inode-order inode locking rules. Either way,
  783. * the only way to avoid an ABBA deadlock is to use trylock and back off if
  784. * we can't.
  785. */
  786. int
  787. xchk_ilock_inverted(
  788. struct xfs_inode *ip,
  789. uint lock_mode)
  790. {
  791. int i;
  792. for (i = 0; i < 20; i++) {
  793. if (xfs_ilock_nowait(ip, lock_mode))
  794. return 0;
  795. delay(1);
  796. }
  797. return -EDEADLOCK;
  798. }