common.c 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <djwong@kernel.org>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_trans_resv.h"
  11. #include "xfs_mount.h"
  12. #include "xfs_btree.h"
  13. #include "xfs_log_format.h"
  14. #include "xfs_trans.h"
  15. #include "xfs_inode.h"
  16. #include "xfs_icache.h"
  17. #include "xfs_alloc.h"
  18. #include "xfs_alloc_btree.h"
  19. #include "xfs_ialloc.h"
  20. #include "xfs_ialloc_btree.h"
  21. #include "xfs_refcount_btree.h"
  22. #include "xfs_rmap.h"
  23. #include "xfs_rmap_btree.h"
  24. #include "xfs_log.h"
  25. #include "xfs_trans_priv.h"
  26. #include "xfs_da_format.h"
  27. #include "xfs_da_btree.h"
  28. #include "xfs_dir2_priv.h"
  29. #include "xfs_dir2.h"
  30. #include "xfs_attr.h"
  31. #include "xfs_reflink.h"
  32. #include "xfs_ag.h"
  33. #include "xfs_error.h"
  34. #include "xfs_quota.h"
  35. #include "xfs_exchmaps.h"
  36. #include "xfs_rtbitmap.h"
  37. #include "scrub/scrub.h"
  38. #include "scrub/common.h"
  39. #include "scrub/trace.h"
  40. #include "scrub/repair.h"
  41. #include "scrub/health.h"
  42. /* Common code for the metadata scrubbers. */
  43. /*
  44. * Handling operational errors.
  45. *
  46. * The *_process_error() family of functions are used to process error return
  47. * codes from functions called as part of a scrub operation.
  48. *
  49. * If there's no error, we return true to tell the caller that it's ok
  50. * to move on to the next check in its list.
  51. *
  52. * For non-verifier errors (e.g. ENOMEM) we return false to tell the
  53. * caller that something bad happened, and we preserve *error so that
  54. * the caller can return the *error up the stack to userspace.
  55. *
  56. * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
  57. * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words,
  58. * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
  59. * not via return codes. We return false to tell the caller that
  60. * something bad happened. Since the error has been cleared, the caller
  61. * will (presumably) return that zero and scrubbing will move on to
  62. * whatever's next.
  63. *
  64. * ftrace can be used to record the precise metadata location and the
  65. * approximate code location of the failed operation.
  66. */
  67. /* Check for operational errors. */
  68. static bool
  69. __xchk_process_error(
  70. struct xfs_scrub *sc,
  71. xfs_agnumber_t agno,
  72. xfs_agblock_t bno,
  73. int *error,
  74. __u32 errflag,
  75. void *ret_ip)
  76. {
  77. switch (*error) {
  78. case 0:
  79. return true;
  80. case -EDEADLOCK:
  81. case -ECHRNG:
  82. /* Used to restart an op with deadlock avoidance. */
  83. trace_xchk_deadlock_retry(
  84. sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
  85. sc->sm, *error);
  86. break;
  87. case -ECANCELED:
  88. /*
  89. * ECANCELED here means that the caller set one of the scrub
  90. * outcome flags (corrupt, xfail, xcorrupt) and wants to exit
  91. * quickly. Set error to zero and do not continue.
  92. */
  93. trace_xchk_op_error(sc, agno, bno, *error, ret_ip);
  94. *error = 0;
  95. break;
  96. case -EFSBADCRC:
  97. case -EFSCORRUPTED:
  98. /* Note the badness but don't abort. */
  99. sc->sm->sm_flags |= errflag;
  100. *error = 0;
  101. fallthrough;
  102. default:
  103. trace_xchk_op_error(sc, agno, bno, *error, ret_ip);
  104. break;
  105. }
  106. return false;
  107. }
  108. bool
  109. xchk_process_error(
  110. struct xfs_scrub *sc,
  111. xfs_agnumber_t agno,
  112. xfs_agblock_t bno,
  113. int *error)
  114. {
  115. return __xchk_process_error(sc, agno, bno, error,
  116. XFS_SCRUB_OFLAG_CORRUPT, __return_address);
  117. }
  118. bool
  119. xchk_xref_process_error(
  120. struct xfs_scrub *sc,
  121. xfs_agnumber_t agno,
  122. xfs_agblock_t bno,
  123. int *error)
  124. {
  125. return __xchk_process_error(sc, agno, bno, error,
  126. XFS_SCRUB_OFLAG_XFAIL, __return_address);
  127. }
  128. /* Check for operational errors for a file offset. */
  129. static bool
  130. __xchk_fblock_process_error(
  131. struct xfs_scrub *sc,
  132. int whichfork,
  133. xfs_fileoff_t offset,
  134. int *error,
  135. __u32 errflag,
  136. void *ret_ip)
  137. {
  138. switch (*error) {
  139. case 0:
  140. return true;
  141. case -EDEADLOCK:
  142. case -ECHRNG:
  143. /* Used to restart an op with deadlock avoidance. */
  144. trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
  145. break;
  146. case -ECANCELED:
  147. /*
  148. * ECANCELED here means that the caller set one of the scrub
  149. * outcome flags (corrupt, xfail, xcorrupt) and wants to exit
  150. * quickly. Set error to zero and do not continue.
  151. */
  152. trace_xchk_file_op_error(sc, whichfork, offset, *error,
  153. ret_ip);
  154. *error = 0;
  155. break;
  156. case -EFSBADCRC:
  157. case -EFSCORRUPTED:
  158. /* Note the badness but don't abort. */
  159. sc->sm->sm_flags |= errflag;
  160. *error = 0;
  161. fallthrough;
  162. default:
  163. trace_xchk_file_op_error(sc, whichfork, offset, *error,
  164. ret_ip);
  165. break;
  166. }
  167. return false;
  168. }
  169. bool
  170. xchk_fblock_process_error(
  171. struct xfs_scrub *sc,
  172. int whichfork,
  173. xfs_fileoff_t offset,
  174. int *error)
  175. {
  176. return __xchk_fblock_process_error(sc, whichfork, offset, error,
  177. XFS_SCRUB_OFLAG_CORRUPT, __return_address);
  178. }
  179. bool
  180. xchk_fblock_xref_process_error(
  181. struct xfs_scrub *sc,
  182. int whichfork,
  183. xfs_fileoff_t offset,
  184. int *error)
  185. {
  186. return __xchk_fblock_process_error(sc, whichfork, offset, error,
  187. XFS_SCRUB_OFLAG_XFAIL, __return_address);
  188. }
  189. /*
  190. * Handling scrub corruption/optimization/warning checks.
  191. *
  192. * The *_set_{corrupt,preen,warning}() family of functions are used to
  193. * record the presence of metadata that is incorrect (corrupt), could be
  194. * optimized somehow (preen), or should be flagged for administrative
  195. * review but is not incorrect (warn).
  196. *
  197. * ftrace can be used to record the precise metadata location and
  198. * approximate code location of the failed check.
  199. */
  200. /* Record a block which could be optimized. */
  201. void
  202. xchk_block_set_preen(
  203. struct xfs_scrub *sc,
  204. struct xfs_buf *bp)
  205. {
  206. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
  207. trace_xchk_block_preen(sc, xfs_buf_daddr(bp), __return_address);
  208. }
  209. /*
  210. * Record an inode which could be optimized. The trace data will
  211. * include the block given by bp if bp is given; otherwise it will use
  212. * the block location of the inode record itself.
  213. */
  214. void
  215. xchk_ino_set_preen(
  216. struct xfs_scrub *sc,
  217. xfs_ino_t ino)
  218. {
  219. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
  220. trace_xchk_ino_preen(sc, ino, __return_address);
  221. }
  222. /* Record something being wrong with the filesystem primary superblock. */
  223. void
  224. xchk_set_corrupt(
  225. struct xfs_scrub *sc)
  226. {
  227. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  228. trace_xchk_fs_error(sc, 0, __return_address);
  229. }
  230. /* Record a corrupt block. */
  231. void
  232. xchk_block_set_corrupt(
  233. struct xfs_scrub *sc,
  234. struct xfs_buf *bp)
  235. {
  236. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  237. trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
  238. }
  239. #ifdef CONFIG_XFS_QUOTA
  240. /* Record a corrupt quota counter. */
  241. void
  242. xchk_qcheck_set_corrupt(
  243. struct xfs_scrub *sc,
  244. unsigned int dqtype,
  245. xfs_dqid_t id)
  246. {
  247. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  248. trace_xchk_qcheck_error(sc, dqtype, id, __return_address);
  249. }
  250. #endif
  251. /* Record a corruption while cross-referencing. */
  252. void
  253. xchk_block_xref_set_corrupt(
  254. struct xfs_scrub *sc,
  255. struct xfs_buf *bp)
  256. {
  257. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
  258. trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
  259. }
  260. /*
  261. * Record a corrupt inode. The trace data will include the block given
  262. * by bp if bp is given; otherwise it will use the block location of the
  263. * inode record itself.
  264. */
  265. void
  266. xchk_ino_set_corrupt(
  267. struct xfs_scrub *sc,
  268. xfs_ino_t ino)
  269. {
  270. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  271. trace_xchk_ino_error(sc, ino, __return_address);
  272. }
  273. /* Record a corruption while cross-referencing with an inode. */
  274. void
  275. xchk_ino_xref_set_corrupt(
  276. struct xfs_scrub *sc,
  277. xfs_ino_t ino)
  278. {
  279. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
  280. trace_xchk_ino_error(sc, ino, __return_address);
  281. }
  282. /* Record corruption in a block indexed by a file fork. */
  283. void
  284. xchk_fblock_set_corrupt(
  285. struct xfs_scrub *sc,
  286. int whichfork,
  287. xfs_fileoff_t offset)
  288. {
  289. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  290. trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
  291. }
  292. /* Record a corruption while cross-referencing a fork block. */
  293. void
  294. xchk_fblock_xref_set_corrupt(
  295. struct xfs_scrub *sc,
  296. int whichfork,
  297. xfs_fileoff_t offset)
  298. {
  299. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
  300. trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
  301. }
  302. /*
  303. * Warn about inodes that need administrative review but is not
  304. * incorrect.
  305. */
  306. void
  307. xchk_ino_set_warning(
  308. struct xfs_scrub *sc,
  309. xfs_ino_t ino)
  310. {
  311. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
  312. trace_xchk_ino_warning(sc, ino, __return_address);
  313. }
  314. /* Warn about a block indexed by a file fork that needs review. */
  315. void
  316. xchk_fblock_set_warning(
  317. struct xfs_scrub *sc,
  318. int whichfork,
  319. xfs_fileoff_t offset)
  320. {
  321. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
  322. trace_xchk_fblock_warning(sc, whichfork, offset, __return_address);
  323. }
  324. /* Signal an incomplete scrub. */
  325. void
  326. xchk_set_incomplete(
  327. struct xfs_scrub *sc)
  328. {
  329. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
  330. trace_xchk_incomplete(sc, __return_address);
  331. }
  332. /*
  333. * rmap scrubbing -- compute the number of blocks with a given owner,
  334. * at least according to the reverse mapping data.
  335. */
  336. struct xchk_rmap_ownedby_info {
  337. const struct xfs_owner_info *oinfo;
  338. xfs_filblks_t *blocks;
  339. };
  340. STATIC int
  341. xchk_count_rmap_ownedby_irec(
  342. struct xfs_btree_cur *cur,
  343. const struct xfs_rmap_irec *rec,
  344. void *priv)
  345. {
  346. struct xchk_rmap_ownedby_info *sroi = priv;
  347. bool irec_attr;
  348. bool oinfo_attr;
  349. irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
  350. oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
  351. if (rec->rm_owner != sroi->oinfo->oi_owner)
  352. return 0;
  353. if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
  354. (*sroi->blocks) += rec->rm_blockcount;
  355. return 0;
  356. }
  357. /*
  358. * Calculate the number of blocks the rmap thinks are owned by something.
  359. * The caller should pass us an rmapbt cursor.
  360. */
  361. int
  362. xchk_count_rmap_ownedby_ag(
  363. struct xfs_scrub *sc,
  364. struct xfs_btree_cur *cur,
  365. const struct xfs_owner_info *oinfo,
  366. xfs_filblks_t *blocks)
  367. {
  368. struct xchk_rmap_ownedby_info sroi = {
  369. .oinfo = oinfo,
  370. .blocks = blocks,
  371. };
  372. *blocks = 0;
  373. return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec,
  374. &sroi);
  375. }
  376. /*
  377. * AG scrubbing
  378. *
  379. * These helpers facilitate locking an allocation group's header
  380. * buffers, setting up cursors for all btrees that are present, and
  381. * cleaning everything up once we're through.
  382. */
  383. /* Decide if we want to return an AG header read failure. */
  384. static inline bool
  385. want_ag_read_header_failure(
  386. struct xfs_scrub *sc,
  387. unsigned int type)
  388. {
  389. /* Return all AG header read failures when scanning btrees. */
  390. if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
  391. sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
  392. sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
  393. return true;
  394. /*
  395. * If we're scanning a given type of AG header, we only want to
  396. * see read failures from that specific header. We'd like the
  397. * other headers to cross-check them, but this isn't required.
  398. */
  399. if (sc->sm->sm_type == type)
  400. return true;
  401. return false;
  402. }
  403. /*
  404. * Grab the AG header buffers for the attached perag structure.
  405. *
  406. * The headers should be released by xchk_ag_free, but as a fail safe we attach
  407. * all the buffers we grab to the scrub transaction so they'll all be freed
  408. * when we cancel it.
  409. */
  410. static inline int
  411. xchk_perag_read_headers(
  412. struct xfs_scrub *sc,
  413. struct xchk_ag *sa)
  414. {
  415. int error;
  416. error = xfs_ialloc_read_agi(sa->pag, sc->tp, 0, &sa->agi_bp);
  417. if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
  418. return error;
  419. error = xfs_alloc_read_agf(sa->pag, sc->tp, 0, &sa->agf_bp);
  420. if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
  421. return error;
  422. return 0;
  423. }
  424. /*
  425. * Grab the AG headers for the attached perag structure and wait for pending
  426. * intents to drain.
  427. */
  428. int
  429. xchk_perag_drain_and_lock(
  430. struct xfs_scrub *sc)
  431. {
  432. struct xchk_ag *sa = &sc->sa;
  433. int error = 0;
  434. ASSERT(sa->pag != NULL);
  435. ASSERT(sa->agi_bp == NULL);
  436. ASSERT(sa->agf_bp == NULL);
  437. do {
  438. if (xchk_should_terminate(sc, &error))
  439. return error;
  440. error = xchk_perag_read_headers(sc, sa);
  441. if (error)
  442. return error;
  443. /*
  444. * If we've grabbed an inode for scrubbing then we assume that
  445. * holding its ILOCK will suffice to coordinate with any intent
  446. * chains involving this inode.
  447. */
  448. if (sc->ip)
  449. return 0;
  450. /*
  451. * Decide if this AG is quiet enough for all metadata to be
  452. * consistent with each other. XFS allows the AG header buffer
  453. * locks to cycle across transaction rolls while processing
  454. * chains of deferred ops, which means that there could be
  455. * other threads in the middle of processing a chain of
  456. * deferred ops. For regular operations we are careful about
  457. * ordering operations to prevent collisions between threads
  458. * (which is why we don't need a per-AG lock), but scrub and
  459. * repair have to serialize against chained operations.
  460. *
  461. * We just locked all the AG headers buffers; now take a look
  462. * to see if there are any intents in progress. If there are,
  463. * drop the AG headers and wait for the intents to drain.
  464. * Since we hold all the AG header locks for the duration of
  465. * the scrub, this is the only time we have to sample the
  466. * intents counter; any threads increasing it after this point
  467. * can't possibly be in the middle of a chain of AG metadata
  468. * updates.
  469. *
  470. * Obviously, this should be slanted against scrub and in favor
  471. * of runtime threads.
  472. */
  473. if (!xfs_perag_intent_busy(sa->pag))
  474. return 0;
  475. if (sa->agf_bp) {
  476. xfs_trans_brelse(sc->tp, sa->agf_bp);
  477. sa->agf_bp = NULL;
  478. }
  479. if (sa->agi_bp) {
  480. xfs_trans_brelse(sc->tp, sa->agi_bp);
  481. sa->agi_bp = NULL;
  482. }
  483. if (!(sc->flags & XCHK_FSGATES_DRAIN))
  484. return -ECHRNG;
  485. error = xfs_perag_intent_drain(sa->pag);
  486. if (error == -ERESTARTSYS)
  487. error = -EINTR;
  488. } while (!error);
  489. return error;
  490. }
  491. /*
  492. * Grab the per-AG structure, grab all AG header buffers, and wait until there
  493. * aren't any pending intents. Returns -ENOENT if we can't grab the perag
  494. * structure.
  495. */
  496. int
  497. xchk_ag_read_headers(
  498. struct xfs_scrub *sc,
  499. xfs_agnumber_t agno,
  500. struct xchk_ag *sa)
  501. {
  502. struct xfs_mount *mp = sc->mp;
  503. ASSERT(!sa->pag);
  504. sa->pag = xfs_perag_get(mp, agno);
  505. if (!sa->pag)
  506. return -ENOENT;
  507. return xchk_perag_drain_and_lock(sc);
  508. }
  509. /* Release all the AG btree cursors. */
  510. void
  511. xchk_ag_btcur_free(
  512. struct xchk_ag *sa)
  513. {
  514. if (sa->refc_cur)
  515. xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
  516. if (sa->rmap_cur)
  517. xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
  518. if (sa->fino_cur)
  519. xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
  520. if (sa->ino_cur)
  521. xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
  522. if (sa->cnt_cur)
  523. xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
  524. if (sa->bno_cur)
  525. xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
  526. sa->refc_cur = NULL;
  527. sa->rmap_cur = NULL;
  528. sa->fino_cur = NULL;
  529. sa->ino_cur = NULL;
  530. sa->bno_cur = NULL;
  531. sa->cnt_cur = NULL;
  532. }
  533. /* Initialize all the btree cursors for an AG. */
  534. void
  535. xchk_ag_btcur_init(
  536. struct xfs_scrub *sc,
  537. struct xchk_ag *sa)
  538. {
  539. struct xfs_mount *mp = sc->mp;
  540. if (sa->agf_bp) {
  541. /* Set up a bnobt cursor for cross-referencing. */
  542. sa->bno_cur = xfs_bnobt_init_cursor(mp, sc->tp, sa->agf_bp,
  543. sa->pag);
  544. xchk_ag_btree_del_cursor_if_sick(sc, &sa->bno_cur,
  545. XFS_SCRUB_TYPE_BNOBT);
  546. /* Set up a cntbt cursor for cross-referencing. */
  547. sa->cnt_cur = xfs_cntbt_init_cursor(mp, sc->tp, sa->agf_bp,
  548. sa->pag);
  549. xchk_ag_btree_del_cursor_if_sick(sc, &sa->cnt_cur,
  550. XFS_SCRUB_TYPE_CNTBT);
  551. /* Set up a rmapbt cursor for cross-referencing. */
  552. if (xfs_has_rmapbt(mp)) {
  553. sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp,
  554. sa->agf_bp, sa->pag);
  555. xchk_ag_btree_del_cursor_if_sick(sc, &sa->rmap_cur,
  556. XFS_SCRUB_TYPE_RMAPBT);
  557. }
  558. /* Set up a refcountbt cursor for cross-referencing. */
  559. if (xfs_has_reflink(mp)) {
  560. sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
  561. sa->agf_bp, sa->pag);
  562. xchk_ag_btree_del_cursor_if_sick(sc, &sa->refc_cur,
  563. XFS_SCRUB_TYPE_REFCNTBT);
  564. }
  565. }
  566. if (sa->agi_bp) {
  567. /* Set up a inobt cursor for cross-referencing. */
  568. sa->ino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp,
  569. sa->agi_bp);
  570. xchk_ag_btree_del_cursor_if_sick(sc, &sa->ino_cur,
  571. XFS_SCRUB_TYPE_INOBT);
  572. /* Set up a finobt cursor for cross-referencing. */
  573. if (xfs_has_finobt(mp)) {
  574. sa->fino_cur = xfs_finobt_init_cursor(sa->pag, sc->tp,
  575. sa->agi_bp);
  576. xchk_ag_btree_del_cursor_if_sick(sc, &sa->fino_cur,
  577. XFS_SCRUB_TYPE_FINOBT);
  578. }
  579. }
  580. }
  581. /* Release the AG header context and btree cursors. */
  582. void
  583. xchk_ag_free(
  584. struct xfs_scrub *sc,
  585. struct xchk_ag *sa)
  586. {
  587. xchk_ag_btcur_free(sa);
  588. xrep_reset_perag_resv(sc);
  589. if (sa->agf_bp) {
  590. xfs_trans_brelse(sc->tp, sa->agf_bp);
  591. sa->agf_bp = NULL;
  592. }
  593. if (sa->agi_bp) {
  594. xfs_trans_brelse(sc->tp, sa->agi_bp);
  595. sa->agi_bp = NULL;
  596. }
  597. if (sa->pag) {
  598. xfs_perag_put(sa->pag);
  599. sa->pag = NULL;
  600. }
  601. }
  602. /*
  603. * For scrub, grab the perag structure, the AGI, and the AGF headers, in that
  604. * order. Locking order requires us to get the AGI before the AGF. We use the
  605. * transaction to avoid deadlocking on crosslinked metadata buffers; either the
  606. * caller passes one in (bmap scrub) or we have to create a transaction
  607. * ourselves. Returns ENOENT if the perag struct cannot be grabbed.
  608. */
  609. int
  610. xchk_ag_init(
  611. struct xfs_scrub *sc,
  612. xfs_agnumber_t agno,
  613. struct xchk_ag *sa)
  614. {
  615. int error;
  616. error = xchk_ag_read_headers(sc, agno, sa);
  617. if (error)
  618. return error;
  619. xchk_ag_btcur_init(sc, sa);
  620. return 0;
  621. }
  622. /* Per-scrubber setup functions */
  623. void
  624. xchk_trans_cancel(
  625. struct xfs_scrub *sc)
  626. {
  627. xfs_trans_cancel(sc->tp);
  628. sc->tp = NULL;
  629. }
  630. int
  631. xchk_trans_alloc_empty(
  632. struct xfs_scrub *sc)
  633. {
  634. return xfs_trans_alloc_empty(sc->mp, &sc->tp);
  635. }
  636. /*
  637. * Grab an empty transaction so that we can re-grab locked buffers if
  638. * one of our btrees turns out to be cyclic.
  639. *
  640. * If we're going to repair something, we need to ask for the largest possible
  641. * log reservation so that we can handle the worst case scenario for metadata
  642. * updates while rebuilding a metadata item. We also need to reserve as many
  643. * blocks in the head transaction as we think we're going to need to rebuild
  644. * the metadata object.
  645. */
  646. int
  647. xchk_trans_alloc(
  648. struct xfs_scrub *sc,
  649. uint resblks)
  650. {
  651. if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
  652. return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
  653. resblks, 0, 0, &sc->tp);
  654. return xchk_trans_alloc_empty(sc);
  655. }
  656. /* Set us up with a transaction and an empty context. */
  657. int
  658. xchk_setup_fs(
  659. struct xfs_scrub *sc)
  660. {
  661. uint resblks;
  662. resblks = xrep_calc_ag_resblks(sc);
  663. return xchk_trans_alloc(sc, resblks);
  664. }
  665. /* Set us up with AG headers and btree cursors. */
  666. int
  667. xchk_setup_ag_btree(
  668. struct xfs_scrub *sc,
  669. bool force_log)
  670. {
  671. struct xfs_mount *mp = sc->mp;
  672. int error;
  673. /*
  674. * If the caller asks us to checkpont the log, do so. This
  675. * expensive operation should be performed infrequently and only
  676. * as a last resort. Any caller that sets force_log should
  677. * document why they need to do so.
  678. */
  679. if (force_log) {
  680. error = xchk_checkpoint_log(mp);
  681. if (error)
  682. return error;
  683. }
  684. error = xchk_setup_fs(sc);
  685. if (error)
  686. return error;
  687. return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa);
  688. }
  689. /* Push everything out of the log onto disk. */
  690. int
  691. xchk_checkpoint_log(
  692. struct xfs_mount *mp)
  693. {
  694. int error;
  695. error = xfs_log_force(mp, XFS_LOG_SYNC);
  696. if (error)
  697. return error;
  698. xfs_ail_push_all_sync(mp->m_ail);
  699. return 0;
  700. }
  701. /* Verify that an inode is allocated ondisk, then return its cached inode. */
  702. int
  703. xchk_iget(
  704. struct xfs_scrub *sc,
  705. xfs_ino_t inum,
  706. struct xfs_inode **ipp)
  707. {
  708. ASSERT(sc->tp != NULL);
  709. return xfs_iget(sc->mp, sc->tp, inum, XCHK_IGET_FLAGS, 0, ipp);
  710. }
  711. /*
  712. * Try to grab an inode in a manner that avoids races with physical inode
  713. * allocation. If we can't, return the locked AGI buffer so that the caller
  714. * can single-step the loading process to see where things went wrong.
  715. * Callers must have a valid scrub transaction.
  716. *
  717. * If the iget succeeds, return 0, a NULL AGI, and the inode.
  718. *
  719. * If the iget fails, return the error, the locked AGI, and a NULL inode. This
  720. * can include -EINVAL and -ENOENT for invalid inode numbers or inodes that are
  721. * no longer allocated; or any other corruption or runtime error.
  722. *
  723. * If the AGI read fails, return the error, a NULL AGI, and NULL inode.
  724. *
  725. * If a fatal signal is pending, return -EINTR, a NULL AGI, and a NULL inode.
  726. */
  727. int
  728. xchk_iget_agi(
  729. struct xfs_scrub *sc,
  730. xfs_ino_t inum,
  731. struct xfs_buf **agi_bpp,
  732. struct xfs_inode **ipp)
  733. {
  734. struct xfs_mount *mp = sc->mp;
  735. struct xfs_trans *tp = sc->tp;
  736. struct xfs_perag *pag;
  737. int error;
  738. ASSERT(sc->tp != NULL);
  739. again:
  740. *agi_bpp = NULL;
  741. *ipp = NULL;
  742. error = 0;
  743. if (xchk_should_terminate(sc, &error))
  744. return error;
  745. /*
  746. * Attach the AGI buffer to the scrub transaction to avoid deadlocks
  747. * in the iget cache miss path.
  748. */
  749. pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
  750. error = xfs_ialloc_read_agi(pag, tp, 0, agi_bpp);
  751. xfs_perag_put(pag);
  752. if (error)
  753. return error;
  754. error = xfs_iget(mp, tp, inum, XFS_IGET_NORETRY | XCHK_IGET_FLAGS, 0,
  755. ipp);
  756. if (error == -EAGAIN) {
  757. /*
  758. * The inode may be in core but temporarily unavailable and may
  759. * require the AGI buffer before it can be returned. Drop the
  760. * AGI buffer and retry the lookup.
  761. *
  762. * Incore lookup will fail with EAGAIN on a cache hit if the
  763. * inode is queued to the inactivation list. The inactivation
  764. * worker may remove the inode from the unlinked list and hence
  765. * needs the AGI.
  766. *
  767. * Hence xchk_iget_agi() needs to drop the AGI lock on EAGAIN
  768. * to allow inodegc to make progress and move the inode to
  769. * IRECLAIMABLE state where xfs_iget will be able to return it
  770. * again if it can lock the inode.
  771. */
  772. xfs_trans_brelse(tp, *agi_bpp);
  773. delay(1);
  774. goto again;
  775. }
  776. if (error)
  777. return error;
  778. /* We got the inode, so we can release the AGI. */
  779. ASSERT(*ipp != NULL);
  780. xfs_trans_brelse(tp, *agi_bpp);
  781. *agi_bpp = NULL;
  782. return 0;
  783. }
  784. #ifdef CONFIG_XFS_QUOTA
  785. /*
  786. * Try to attach dquots to this inode if we think we might want to repair it.
  787. * Callers must not hold any ILOCKs. If the dquots are broken and cannot be
  788. * attached, a quotacheck will be scheduled.
  789. */
  790. int
  791. xchk_ino_dqattach(
  792. struct xfs_scrub *sc)
  793. {
  794. ASSERT(sc->tp != NULL);
  795. ASSERT(sc->ip != NULL);
  796. if (!xchk_could_repair(sc))
  797. return 0;
  798. return xrep_ino_dqattach(sc);
  799. }
  800. #endif
  801. /* Install an inode that we opened by handle for scrubbing. */
  802. int
  803. xchk_install_handle_inode(
  804. struct xfs_scrub *sc,
  805. struct xfs_inode *ip)
  806. {
  807. if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
  808. xchk_irele(sc, ip);
  809. return -ENOENT;
  810. }
  811. sc->ip = ip;
  812. return 0;
  813. }
  814. /*
  815. * Install an already-referenced inode for scrubbing. Get our own reference to
  816. * the inode to make disposal simpler. The inode must not be in I_FREEING or
  817. * I_WILL_FREE state!
  818. */
  819. int
  820. xchk_install_live_inode(
  821. struct xfs_scrub *sc,
  822. struct xfs_inode *ip)
  823. {
  824. if (!igrab(VFS_I(ip))) {
  825. xchk_ino_set_corrupt(sc, ip->i_ino);
  826. return -EFSCORRUPTED;
  827. }
  828. sc->ip = ip;
  829. return 0;
  830. }
  831. /*
  832. * In preparation to scrub metadata structures that hang off of an inode,
  833. * grab either the inode referenced in the scrub control structure or the
  834. * inode passed in. If the inumber does not reference an allocated inode
  835. * record, the function returns ENOENT to end the scrub early. The inode
  836. * is not locked.
  837. */
  838. int
  839. xchk_iget_for_scrubbing(
  840. struct xfs_scrub *sc)
  841. {
  842. struct xfs_imap imap;
  843. struct xfs_mount *mp = sc->mp;
  844. struct xfs_perag *pag;
  845. struct xfs_buf *agi_bp;
  846. struct xfs_inode *ip_in = XFS_I(file_inode(sc->file));
  847. struct xfs_inode *ip = NULL;
  848. xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino);
  849. int error;
  850. ASSERT(sc->tp == NULL);
  851. /* We want to scan the inode we already had opened. */
  852. if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino)
  853. return xchk_install_live_inode(sc, ip_in);
  854. /* Reject internal metadata files and obviously bad inode numbers. */
  855. if (xfs_internal_inum(mp, sc->sm->sm_ino))
  856. return -ENOENT;
  857. if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
  858. return -ENOENT;
  859. /* Try a safe untrusted iget. */
  860. error = xchk_iget_safe(sc, sc->sm->sm_ino, &ip);
  861. if (!error)
  862. return xchk_install_handle_inode(sc, ip);
  863. if (error == -ENOENT)
  864. return error;
  865. if (error != -EINVAL)
  866. goto out_error;
  867. /*
  868. * EINVAL with IGET_UNTRUSTED probably means one of several things:
  869. * userspace gave us an inode number that doesn't correspond to fs
  870. * space; the inode btree lacks a record for this inode; or there is a
  871. * record, and it says this inode is free.
  872. *
  873. * We want to look up this inode in the inobt to distinguish two
  874. * scenarios: (1) the inobt says the inode is free, in which case
  875. * there's nothing to do; and (2) the inobt says the inode is
  876. * allocated, but loading it failed due to corruption.
  877. *
  878. * Allocate a transaction and grab the AGI to prevent inobt activity
  879. * in this AG. Retry the iget in case someone allocated a new inode
  880. * after the first iget failed.
  881. */
  882. error = xchk_trans_alloc(sc, 0);
  883. if (error)
  884. goto out_error;
  885. error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip);
  886. if (error == 0) {
  887. /* Actually got the inode, so install it. */
  888. xchk_trans_cancel(sc);
  889. return xchk_install_handle_inode(sc, ip);
  890. }
  891. if (error == -ENOENT)
  892. goto out_gone;
  893. if (error != -EINVAL)
  894. goto out_cancel;
  895. /* Ensure that we have protected against inode allocation/freeing. */
  896. if (agi_bp == NULL) {
  897. ASSERT(agi_bp != NULL);
  898. error = -ECANCELED;
  899. goto out_cancel;
  900. }
  901. /*
  902. * Untrusted iget failed a second time. Let's try an inobt lookup.
  903. * If the inobt thinks this the inode neither can exist inside the
  904. * filesystem nor is allocated, return ENOENT to signal that the check
  905. * can be skipped.
  906. *
  907. * If the lookup returns corruption, we'll mark this inode corrupt and
  908. * exit to userspace. There's little chance of fixing anything until
  909. * the inobt is straightened out, but there's nothing we can do here.
  910. *
  911. * If the lookup encounters any other error, exit to userspace.
  912. *
  913. * If the lookup succeeds, something else must be very wrong in the fs
  914. * such that setting up the incore inode failed in some strange way.
  915. * Treat those as corruptions.
  916. */
  917. pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino));
  918. if (!pag) {
  919. error = -EFSCORRUPTED;
  920. goto out_cancel;
  921. }
  922. error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap,
  923. XFS_IGET_UNTRUSTED);
  924. xfs_perag_put(pag);
  925. if (error == -EINVAL || error == -ENOENT)
  926. goto out_gone;
  927. if (!error)
  928. error = -EFSCORRUPTED;
  929. out_cancel:
  930. xchk_trans_cancel(sc);
  931. out_error:
  932. trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
  933. error, __return_address);
  934. return error;
  935. out_gone:
  936. /* The file is gone, so there's nothing to check. */
  937. xchk_trans_cancel(sc);
  938. return -ENOENT;
  939. }
  940. /* Release an inode, possibly dropping it in the process. */
  941. void
  942. xchk_irele(
  943. struct xfs_scrub *sc,
  944. struct xfs_inode *ip)
  945. {
  946. if (sc->tp) {
  947. /*
  948. * If we are in a transaction, we /cannot/ drop the inode
  949. * ourselves, because the VFS will trigger writeback, which
  950. * can require a transaction. Clear DONTCACHE to force the
  951. * inode to the LRU, where someone else can take care of
  952. * dropping it.
  953. *
  954. * Note that when we grabbed our reference to the inode, it
  955. * could have had an active ref and DONTCACHE set if a sysadmin
  956. * is trying to coerce a change in file access mode. icache
  957. * hits do not clear DONTCACHE, so we must do it here.
  958. */
  959. spin_lock(&VFS_I(ip)->i_lock);
  960. VFS_I(ip)->i_state &= ~I_DONTCACHE;
  961. spin_unlock(&VFS_I(ip)->i_lock);
  962. }
  963. xfs_irele(ip);
  964. }
  965. /*
  966. * Set us up to scrub metadata mapped by a file's fork. Callers must not use
  967. * this to operate on user-accessible regular file data because the MMAPLOCK is
  968. * not taken.
  969. */
  970. int
  971. xchk_setup_inode_contents(
  972. struct xfs_scrub *sc,
  973. unsigned int resblks)
  974. {
  975. int error;
  976. error = xchk_iget_for_scrubbing(sc);
  977. if (error)
  978. return error;
  979. /* Lock the inode so the VFS cannot touch this file. */
  980. xchk_ilock(sc, XFS_IOLOCK_EXCL);
  981. error = xchk_trans_alloc(sc, resblks);
  982. if (error)
  983. goto out;
  984. error = xchk_ino_dqattach(sc);
  985. if (error)
  986. goto out;
  987. xchk_ilock(sc, XFS_ILOCK_EXCL);
  988. out:
  989. /* scrub teardown will unlock and release the inode for us */
  990. return error;
  991. }
  992. void
  993. xchk_ilock(
  994. struct xfs_scrub *sc,
  995. unsigned int ilock_flags)
  996. {
  997. xfs_ilock(sc->ip, ilock_flags);
  998. sc->ilock_flags |= ilock_flags;
  999. }
  1000. bool
  1001. xchk_ilock_nowait(
  1002. struct xfs_scrub *sc,
  1003. unsigned int ilock_flags)
  1004. {
  1005. if (xfs_ilock_nowait(sc->ip, ilock_flags)) {
  1006. sc->ilock_flags |= ilock_flags;
  1007. return true;
  1008. }
  1009. return false;
  1010. }
  1011. void
  1012. xchk_iunlock(
  1013. struct xfs_scrub *sc,
  1014. unsigned int ilock_flags)
  1015. {
  1016. sc->ilock_flags &= ~ilock_flags;
  1017. xfs_iunlock(sc->ip, ilock_flags);
  1018. }
  1019. /*
  1020. * Predicate that decides if we need to evaluate the cross-reference check.
  1021. * If there was an error accessing the cross-reference btree, just delete
  1022. * the cursor and skip the check.
  1023. */
  1024. bool
  1025. xchk_should_check_xref(
  1026. struct xfs_scrub *sc,
  1027. int *error,
  1028. struct xfs_btree_cur **curpp)
  1029. {
  1030. /* No point in xref if we already know we're corrupt. */
  1031. if (xchk_skip_xref(sc->sm))
  1032. return false;
  1033. if (*error == 0)
  1034. return true;
  1035. if (curpp) {
  1036. /* If we've already given up on xref, just bail out. */
  1037. if (!*curpp)
  1038. return false;
  1039. /* xref error, delete cursor and bail out. */
  1040. xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
  1041. *curpp = NULL;
  1042. }
  1043. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
  1044. trace_xchk_xref_error(sc, *error, __return_address);
  1045. /*
  1046. * Errors encountered during cross-referencing with another
  1047. * data structure should not cause this scrubber to abort.
  1048. */
  1049. *error = 0;
  1050. return false;
  1051. }
  1052. /* Run the structure verifiers on in-memory buffers to detect bad memory. */
  1053. void
  1054. xchk_buffer_recheck(
  1055. struct xfs_scrub *sc,
  1056. struct xfs_buf *bp)
  1057. {
  1058. xfs_failaddr_t fa;
  1059. if (bp->b_ops == NULL) {
  1060. xchk_block_set_corrupt(sc, bp);
  1061. return;
  1062. }
  1063. if (bp->b_ops->verify_struct == NULL) {
  1064. xchk_set_incomplete(sc);
  1065. return;
  1066. }
  1067. fa = bp->b_ops->verify_struct(bp);
  1068. if (!fa)
  1069. return;
  1070. sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  1071. trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa);
  1072. }
  1073. static inline int
  1074. xchk_metadata_inode_subtype(
  1075. struct xfs_scrub *sc,
  1076. unsigned int scrub_type)
  1077. {
  1078. struct xfs_scrub_subord *sub;
  1079. int error;
  1080. sub = xchk_scrub_create_subord(sc, scrub_type);
  1081. error = sub->sc.ops->scrub(&sub->sc);
  1082. xchk_scrub_free_subord(sub);
  1083. return error;
  1084. }
  1085. /*
  1086. * Scrub the attr/data forks of a metadata inode. The metadata inode must be
  1087. * pointed to by sc->ip and the ILOCK must be held.
  1088. */
  1089. int
  1090. xchk_metadata_inode_forks(
  1091. struct xfs_scrub *sc)
  1092. {
  1093. bool shared;
  1094. int error;
  1095. if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
  1096. return 0;
  1097. /* Check the inode record. */
  1098. error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE);
  1099. if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
  1100. return error;
  1101. /* Metadata inodes don't live on the rt device. */
  1102. if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) {
  1103. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  1104. return 0;
  1105. }
  1106. /* They should never participate in reflink. */
  1107. if (xfs_is_reflink_inode(sc->ip)) {
  1108. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  1109. return 0;
  1110. }
  1111. /* They also should never have extended attributes. */
  1112. if (xfs_inode_hasattr(sc->ip)) {
  1113. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  1114. return 0;
  1115. }
  1116. /* Invoke the data fork scrubber. */
  1117. error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
  1118. if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
  1119. return error;
  1120. /* Look for incorrect shared blocks. */
  1121. if (xfs_has_reflink(sc->mp)) {
  1122. error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
  1123. &shared);
  1124. if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
  1125. &error))
  1126. return error;
  1127. if (shared)
  1128. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  1129. }
  1130. return 0;
  1131. }
  1132. /*
  1133. * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub
  1134. * operation. Callers must not hold any locks that intersect with the CPU
  1135. * hotplug lock (e.g. writeback locks) because code patching must halt the CPUs
  1136. * to change kernel code.
  1137. */
  1138. void
  1139. xchk_fsgates_enable(
  1140. struct xfs_scrub *sc,
  1141. unsigned int scrub_fsgates)
  1142. {
  1143. ASSERT(!(scrub_fsgates & ~XCHK_FSGATES_ALL));
  1144. ASSERT(!(sc->flags & scrub_fsgates));
  1145. trace_xchk_fsgates_enable(sc, scrub_fsgates);
  1146. if (scrub_fsgates & XCHK_FSGATES_DRAIN)
  1147. xfs_drain_wait_enable();
  1148. if (scrub_fsgates & XCHK_FSGATES_QUOTA)
  1149. xfs_dqtrx_hook_enable();
  1150. if (scrub_fsgates & XCHK_FSGATES_DIRENTS)
  1151. xfs_dir_hook_enable();
  1152. if (scrub_fsgates & XCHK_FSGATES_RMAP)
  1153. xfs_rmap_hook_enable();
  1154. sc->flags |= scrub_fsgates;
  1155. }
  1156. /*
  1157. * Decide if this is this a cached inode that's also allocated. The caller
  1158. * must hold a reference to an AG and the AGI buffer lock to prevent inodes
  1159. * from being allocated or freed.
  1160. *
  1161. * Look up an inode by number in the given file system. If the inode number
  1162. * is invalid, return -EINVAL. If the inode is not in cache, return -ENODATA.
  1163. * If the inode is being reclaimed, return -ENODATA because we know the inode
  1164. * cache cannot be updating the ondisk metadata.
  1165. *
  1166. * Otherwise, the incore inode is the one we want, and it is either live,
  1167. * somewhere in the inactivation machinery, or reclaimable. The inode is
  1168. * allocated if i_mode is nonzero. In all three cases, the cached inode will
  1169. * be more up to date than the ondisk inode buffer, so we must use the incore
  1170. * i_mode.
  1171. */
  1172. int
  1173. xchk_inode_is_allocated(
  1174. struct xfs_scrub *sc,
  1175. xfs_agino_t agino,
  1176. bool *inuse)
  1177. {
  1178. struct xfs_mount *mp = sc->mp;
  1179. struct xfs_perag *pag = sc->sa.pag;
  1180. xfs_ino_t ino;
  1181. struct xfs_inode *ip;
  1182. int error;
  1183. /* caller must hold perag reference */
  1184. if (pag == NULL) {
  1185. ASSERT(pag != NULL);
  1186. return -EINVAL;
  1187. }
  1188. /* caller must have AGI buffer */
  1189. if (sc->sa.agi_bp == NULL) {
  1190. ASSERT(sc->sa.agi_bp != NULL);
  1191. return -EINVAL;
  1192. }
  1193. /* reject inode numbers outside existing AGs */
  1194. ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino);
  1195. if (!xfs_verify_ino(mp, ino))
  1196. return -EINVAL;
  1197. error = -ENODATA;
  1198. rcu_read_lock();
  1199. ip = radix_tree_lookup(&pag->pag_ici_root, agino);
  1200. if (!ip) {
  1201. /* cache miss */
  1202. goto out_rcu;
  1203. }
  1204. /*
  1205. * If the inode number doesn't match, the incore inode got reused
  1206. * during an RCU grace period and the radix tree hasn't been updated.
  1207. * This isn't the inode we want.
  1208. */
  1209. spin_lock(&ip->i_flags_lock);
  1210. if (ip->i_ino != ino)
  1211. goto out_skip;
  1212. trace_xchk_inode_is_allocated(ip);
  1213. /*
  1214. * We have an incore inode that matches the inode we want, and the
  1215. * caller holds the perag structure and the AGI buffer. Let's check
  1216. * our assumptions below:
  1217. */
  1218. #ifdef DEBUG
  1219. /*
  1220. * (1) If the incore inode is live (i.e. referenced from the dcache),
  1221. * it will not be INEW, nor will it be in the inactivation or reclaim
  1222. * machinery. The ondisk inode had better be allocated. This is the
  1223. * most trivial case.
  1224. */
  1225. if (!(ip->i_flags & (XFS_NEED_INACTIVE | XFS_INEW | XFS_IRECLAIMABLE |
  1226. XFS_INACTIVATING))) {
  1227. /* live inode */
  1228. ASSERT(VFS_I(ip)->i_mode != 0);
  1229. }
  1230. /*
  1231. * If the incore inode is INEW, there are several possibilities:
  1232. *
  1233. * (2) For a file that is being created, note that we allocate the
  1234. * ondisk inode before allocating, initializing, and adding the incore
  1235. * inode to the radix tree.
  1236. *
  1237. * (3) If the incore inode is being recycled, the inode has to be
  1238. * allocated because we don't allow freed inodes to be recycled.
  1239. * Recycling doesn't touch i_mode.
  1240. */
  1241. if (ip->i_flags & XFS_INEW) {
  1242. /* created on disk already or recycling */
  1243. ASSERT(VFS_I(ip)->i_mode != 0);
  1244. }
  1245. /*
  1246. * (4) If the inode is queued for inactivation (NEED_INACTIVE) but
  1247. * inactivation has not started (!INACTIVATING), it is still allocated.
  1248. */
  1249. if ((ip->i_flags & XFS_NEED_INACTIVE) &&
  1250. !(ip->i_flags & XFS_INACTIVATING)) {
  1251. /* definitely before difree */
  1252. ASSERT(VFS_I(ip)->i_mode != 0);
  1253. }
  1254. #endif
  1255. /*
  1256. * If the incore inode is undergoing inactivation (INACTIVATING), there
  1257. * are two possibilities:
  1258. *
  1259. * (5) It is before the point where it would get freed ondisk, in which
  1260. * case i_mode is still nonzero.
  1261. *
  1262. * (6) It has already been freed, in which case i_mode is zero.
  1263. *
  1264. * We don't take the ILOCK here, but difree and dialloc update the AGI,
  1265. * and we've taken the AGI buffer lock, which prevents that from
  1266. * happening.
  1267. */
  1268. /*
  1269. * (7) Inodes undergoing inactivation (INACTIVATING) or queued for
  1270. * reclaim (IRECLAIMABLE) could be allocated or free. i_mode still
  1271. * reflects the ondisk state.
  1272. */
  1273. /*
  1274. * (8) If the inode is in IFLUSHING, it's safe to query i_mode because
  1275. * the flush code uses i_mode to format the ondisk inode.
  1276. */
  1277. /*
  1278. * (9) If the inode is in IRECLAIM and was reachable via the radix
  1279. * tree, it still has the same i_mode as it did before it entered
  1280. * reclaim. The inode object is still alive because we hold the RCU
  1281. * read lock.
  1282. */
  1283. *inuse = VFS_I(ip)->i_mode != 0;
  1284. error = 0;
  1285. out_skip:
  1286. spin_unlock(&ip->i_flags_lock);
  1287. out_rcu:
  1288. rcu_read_unlock();
  1289. return error;
  1290. }