bmap.c 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <djwong@kernel.org>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_trans_resv.h"
  11. #include "xfs_mount.h"
  12. #include "xfs_btree.h"
  13. #include "xfs_bit.h"
  14. #include "xfs_log_format.h"
  15. #include "xfs_trans.h"
  16. #include "xfs_inode.h"
  17. #include "xfs_alloc.h"
  18. #include "xfs_bmap.h"
  19. #include "xfs_bmap_btree.h"
  20. #include "xfs_rmap.h"
  21. #include "xfs_rmap_btree.h"
  22. #include "xfs_health.h"
  23. #include "scrub/scrub.h"
  24. #include "scrub/common.h"
  25. #include "scrub/btree.h"
  26. #include "scrub/health.h"
  27. #include "xfs_ag.h"
  28. /* Set us up with an inode's bmap. */
  29. int
  30. xchk_setup_inode_bmap(
  31. struct xfs_scrub *sc)
  32. {
  33. int error;
  34. if (xchk_need_intent_drain(sc))
  35. xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
  36. error = xchk_iget_for_scrubbing(sc);
  37. if (error)
  38. goto out;
  39. xchk_ilock(sc, XFS_IOLOCK_EXCL);
  40. /*
  41. * We don't want any ephemeral data/cow fork updates sitting around
  42. * while we inspect block mappings, so wait for directio to finish
  43. * and flush dirty data if we have delalloc reservations.
  44. */
  45. if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
  46. sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
  47. struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
  48. bool is_repair = xchk_could_repair(sc);
  49. xchk_ilock(sc, XFS_MMAPLOCK_EXCL);
  50. /* Break all our leases, we're going to mess with things. */
  51. if (is_repair) {
  52. error = xfs_break_layouts(VFS_I(sc->ip),
  53. &sc->ilock_flags, BREAK_WRITE);
  54. if (error)
  55. goto out;
  56. }
  57. inode_dio_wait(VFS_I(sc->ip));
  58. /*
  59. * Try to flush all incore state to disk before we examine the
  60. * space mappings for the data fork. Leave accumulated errors
  61. * in the mapping for the writer threads to consume.
  62. *
  63. * On ENOSPC or EIO writeback errors, we continue into the
  64. * extent mapping checks because write failures do not
  65. * necessarily imply anything about the correctness of the file
  66. * metadata. The metadata and the file data could be on
  67. * completely separate devices; a media failure might only
  68. * affect a subset of the disk, etc. We can handle delalloc
  69. * extents in the scrubber, so leaving them in memory is fine.
  70. */
  71. error = filemap_fdatawrite(mapping);
  72. if (!error)
  73. error = filemap_fdatawait_keep_errors(mapping);
  74. if (error && (error != -ENOSPC && error != -EIO))
  75. goto out;
  76. /* Drop the page cache if we're repairing block mappings. */
  77. if (is_repair) {
  78. error = invalidate_inode_pages2(
  79. VFS_I(sc->ip)->i_mapping);
  80. if (error)
  81. goto out;
  82. }
  83. }
  84. /* Got the inode, lock it and we're ready to go. */
  85. error = xchk_trans_alloc(sc, 0);
  86. if (error)
  87. goto out;
  88. error = xchk_ino_dqattach(sc);
  89. if (error)
  90. goto out;
  91. xchk_ilock(sc, XFS_ILOCK_EXCL);
  92. out:
  93. /* scrub teardown will unlock and release the inode */
  94. return error;
  95. }
  96. /*
  97. * Inode fork block mapping (BMBT) scrubber.
  98. * More complex than the others because we have to scrub
  99. * all the extents regardless of whether or not the fork
  100. * is in btree format.
  101. */
  102. struct xchk_bmap_info {
  103. struct xfs_scrub *sc;
  104. /* Incore extent tree cursor */
  105. struct xfs_iext_cursor icur;
  106. /* Previous fork mapping that we examined */
  107. struct xfs_bmbt_irec prev_rec;
  108. /* Is this a realtime fork? */
  109. bool is_rt;
  110. /* May mappings point to shared space? */
  111. bool is_shared;
  112. /* Was the incore extent tree loaded? */
  113. bool was_loaded;
  114. /* Which inode fork are we checking? */
  115. int whichfork;
  116. };
  117. /* Look for a corresponding rmap for this irec. */
  118. static inline bool
  119. xchk_bmap_get_rmap(
  120. struct xchk_bmap_info *info,
  121. struct xfs_bmbt_irec *irec,
  122. xfs_agblock_t agbno,
  123. uint64_t owner,
  124. struct xfs_rmap_irec *rmap)
  125. {
  126. xfs_fileoff_t offset;
  127. unsigned int rflags = 0;
  128. int has_rmap;
  129. int error;
  130. if (info->whichfork == XFS_ATTR_FORK)
  131. rflags |= XFS_RMAP_ATTR_FORK;
  132. if (irec->br_state == XFS_EXT_UNWRITTEN)
  133. rflags |= XFS_RMAP_UNWRITTEN;
  134. /*
  135. * CoW staging extents are owned (on disk) by the refcountbt, so
  136. * their rmaps do not have offsets.
  137. */
  138. if (info->whichfork == XFS_COW_FORK)
  139. offset = 0;
  140. else
  141. offset = irec->br_startoff;
  142. /*
  143. * If the caller thinks this could be a shared bmbt extent (IOWs,
  144. * any data fork extent of a reflink inode) then we have to use the
  145. * range rmap lookup to make sure we get the correct owner/offset.
  146. */
  147. if (info->is_shared) {
  148. error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
  149. owner, offset, rflags, rmap, &has_rmap);
  150. } else {
  151. error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
  152. owner, offset, rflags, rmap, &has_rmap);
  153. }
  154. if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
  155. return false;
  156. if (!has_rmap)
  157. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  158. irec->br_startoff);
  159. return has_rmap;
  160. }
  161. /* Make sure that we have rmapbt records for this data/attr fork extent. */
  162. STATIC void
  163. xchk_bmap_xref_rmap(
  164. struct xchk_bmap_info *info,
  165. struct xfs_bmbt_irec *irec,
  166. xfs_agblock_t agbno)
  167. {
  168. struct xfs_rmap_irec rmap;
  169. unsigned long long rmap_end;
  170. uint64_t owner = info->sc->ip->i_ino;
  171. if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
  172. return;
  173. /* Find the rmap record for this irec. */
  174. if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
  175. return;
  176. /*
  177. * The rmap must be an exact match for this incore file mapping record,
  178. * which may have arisen from multiple ondisk records.
  179. */
  180. if (rmap.rm_startblock != agbno)
  181. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  182. irec->br_startoff);
  183. rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
  184. if (rmap_end != agbno + irec->br_blockcount)
  185. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  186. irec->br_startoff);
  187. /* Check the logical offsets. */
  188. if (rmap.rm_offset != irec->br_startoff)
  189. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  190. irec->br_startoff);
  191. rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount;
  192. if (rmap_end != irec->br_startoff + irec->br_blockcount)
  193. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  194. irec->br_startoff);
  195. /* Check the owner */
  196. if (rmap.rm_owner != owner)
  197. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  198. irec->br_startoff);
  199. /*
  200. * Check for discrepancies between the unwritten flag in the irec and
  201. * the rmap. Note that the (in-memory) CoW fork distinguishes between
  202. * unwritten and written extents, but we don't track that in the rmap
  203. * records because the blocks are owned (on-disk) by the refcountbt,
  204. * which doesn't track unwritten state.
  205. */
  206. if (!!(irec->br_state == XFS_EXT_UNWRITTEN) !=
  207. !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
  208. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  209. irec->br_startoff);
  210. if (!!(info->whichfork == XFS_ATTR_FORK) !=
  211. !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
  212. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  213. irec->br_startoff);
  214. if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
  215. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  216. irec->br_startoff);
  217. }
  218. /* Make sure that we have rmapbt records for this COW fork extent. */
  219. STATIC void
  220. xchk_bmap_xref_rmap_cow(
  221. struct xchk_bmap_info *info,
  222. struct xfs_bmbt_irec *irec,
  223. xfs_agblock_t agbno)
  224. {
  225. struct xfs_rmap_irec rmap;
  226. unsigned long long rmap_end;
  227. uint64_t owner = XFS_RMAP_OWN_COW;
  228. if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
  229. return;
  230. /* Find the rmap record for this irec. */
  231. if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
  232. return;
  233. /*
  234. * CoW staging extents are owned by the refcount btree, so the rmap
  235. * can start before and end after the physical space allocated to this
  236. * mapping. There are no offsets to check.
  237. */
  238. if (rmap.rm_startblock > agbno)
  239. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  240. irec->br_startoff);
  241. rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
  242. if (rmap_end < agbno + irec->br_blockcount)
  243. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  244. irec->br_startoff);
  245. /* Check the owner */
  246. if (rmap.rm_owner != owner)
  247. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  248. irec->br_startoff);
  249. /*
  250. * No flags allowed. Note that the (in-memory) CoW fork distinguishes
  251. * between unwritten and written extents, but we don't track that in
  252. * the rmap records because the blocks are owned (on-disk) by the
  253. * refcountbt, which doesn't track unwritten state.
  254. */
  255. if (rmap.rm_flags & XFS_RMAP_ATTR_FORK)
  256. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  257. irec->br_startoff);
  258. if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
  259. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  260. irec->br_startoff);
  261. if (rmap.rm_flags & XFS_RMAP_UNWRITTEN)
  262. xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
  263. irec->br_startoff);
  264. }
  265. /* Cross-reference a single rtdev extent record. */
  266. STATIC void
  267. xchk_bmap_rt_iextent_xref(
  268. struct xfs_inode *ip,
  269. struct xchk_bmap_info *info,
  270. struct xfs_bmbt_irec *irec)
  271. {
  272. xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
  273. irec->br_blockcount);
  274. }
  275. /* Cross-reference a single datadev extent record. */
  276. STATIC void
  277. xchk_bmap_iextent_xref(
  278. struct xfs_inode *ip,
  279. struct xchk_bmap_info *info,
  280. struct xfs_bmbt_irec *irec)
  281. {
  282. struct xfs_owner_info oinfo;
  283. struct xfs_mount *mp = info->sc->mp;
  284. xfs_agnumber_t agno;
  285. xfs_agblock_t agbno;
  286. xfs_extlen_t len;
  287. int error;
  288. agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
  289. agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
  290. len = irec->br_blockcount;
  291. error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa);
  292. if (!xchk_fblock_process_error(info->sc, info->whichfork,
  293. irec->br_startoff, &error))
  294. goto out_free;
  295. xchk_xref_is_used_space(info->sc, agbno, len);
  296. xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
  297. switch (info->whichfork) {
  298. case XFS_DATA_FORK:
  299. xchk_bmap_xref_rmap(info, irec, agbno);
  300. if (!xfs_is_reflink_inode(info->sc->ip)) {
  301. xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
  302. info->whichfork, irec->br_startoff);
  303. xchk_xref_is_only_owned_by(info->sc, agbno,
  304. irec->br_blockcount, &oinfo);
  305. xchk_xref_is_not_shared(info->sc, agbno,
  306. irec->br_blockcount);
  307. }
  308. xchk_xref_is_not_cow_staging(info->sc, agbno,
  309. irec->br_blockcount);
  310. break;
  311. case XFS_ATTR_FORK:
  312. xchk_bmap_xref_rmap(info, irec, agbno);
  313. xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
  314. info->whichfork, irec->br_startoff);
  315. xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
  316. &oinfo);
  317. xchk_xref_is_not_shared(info->sc, agbno,
  318. irec->br_blockcount);
  319. xchk_xref_is_not_cow_staging(info->sc, agbno,
  320. irec->br_blockcount);
  321. break;
  322. case XFS_COW_FORK:
  323. xchk_bmap_xref_rmap_cow(info, irec, agbno);
  324. xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
  325. &XFS_RMAP_OINFO_COW);
  326. xchk_xref_is_cow_staging(info->sc, agbno,
  327. irec->br_blockcount);
  328. xchk_xref_is_not_shared(info->sc, agbno,
  329. irec->br_blockcount);
  330. break;
  331. }
  332. out_free:
  333. xchk_ag_free(info->sc, &info->sc->sa);
  334. }
  335. /*
  336. * Directories and attr forks should never have blocks that can't be addressed
  337. * by a xfs_dablk_t.
  338. */
  339. STATIC void
  340. xchk_bmap_dirattr_extent(
  341. struct xfs_inode *ip,
  342. struct xchk_bmap_info *info,
  343. struct xfs_bmbt_irec *irec)
  344. {
  345. struct xfs_mount *mp = ip->i_mount;
  346. xfs_fileoff_t off;
  347. if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK)
  348. return;
  349. if (!xfs_verify_dablk(mp, irec->br_startoff))
  350. xchk_fblock_set_corrupt(info->sc, info->whichfork,
  351. irec->br_startoff);
  352. off = irec->br_startoff + irec->br_blockcount - 1;
  353. if (!xfs_verify_dablk(mp, off))
  354. xchk_fblock_set_corrupt(info->sc, info->whichfork, off);
  355. }
  356. /* Scrub a single extent record. */
  357. STATIC void
  358. xchk_bmap_iextent(
  359. struct xfs_inode *ip,
  360. struct xchk_bmap_info *info,
  361. struct xfs_bmbt_irec *irec)
  362. {
  363. struct xfs_mount *mp = info->sc->mp;
  364. /*
  365. * Check for out-of-order extents. This record could have come
  366. * from the incore list, for which there is no ordering check.
  367. */
  368. if (irec->br_startoff < info->prev_rec.br_startoff +
  369. info->prev_rec.br_blockcount)
  370. xchk_fblock_set_corrupt(info->sc, info->whichfork,
  371. irec->br_startoff);
  372. if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
  373. xchk_fblock_set_corrupt(info->sc, info->whichfork,
  374. irec->br_startoff);
  375. xchk_bmap_dirattr_extent(ip, info, irec);
  376. /* Make sure the extent points to a valid place. */
  377. if (info->is_rt &&
  378. !xfs_verify_rtbext(mp, irec->br_startblock, irec->br_blockcount))
  379. xchk_fblock_set_corrupt(info->sc, info->whichfork,
  380. irec->br_startoff);
  381. if (!info->is_rt &&
  382. !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount))
  383. xchk_fblock_set_corrupt(info->sc, info->whichfork,
  384. irec->br_startoff);
  385. /* We don't allow unwritten extents on attr forks. */
  386. if (irec->br_state == XFS_EXT_UNWRITTEN &&
  387. info->whichfork == XFS_ATTR_FORK)
  388. xchk_fblock_set_corrupt(info->sc, info->whichfork,
  389. irec->br_startoff);
  390. if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
  391. return;
  392. if (info->is_rt)
  393. xchk_bmap_rt_iextent_xref(ip, info, irec);
  394. else
  395. xchk_bmap_iextent_xref(ip, info, irec);
  396. }
  397. /* Scrub a bmbt record. */
  398. STATIC int
  399. xchk_bmapbt_rec(
  400. struct xchk_btree *bs,
  401. const union xfs_btree_rec *rec)
  402. {
  403. struct xfs_bmbt_irec irec;
  404. struct xfs_bmbt_irec iext_irec;
  405. struct xfs_iext_cursor icur;
  406. struct xchk_bmap_info *info = bs->private;
  407. struct xfs_inode *ip = bs->cur->bc_ino.ip;
  408. struct xfs_buf *bp = NULL;
  409. struct xfs_btree_block *block;
  410. struct xfs_ifork *ifp = xfs_ifork_ptr(ip, info->whichfork);
  411. uint64_t owner;
  412. int i;
  413. /*
  414. * Check the owners of the btree blocks up to the level below
  415. * the root since the verifiers don't do that.
  416. */
  417. if (xfs_has_crc(bs->cur->bc_mp) &&
  418. bs->cur->bc_levels[0].ptr == 1) {
  419. for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
  420. block = xfs_btree_get_block(bs->cur, i, &bp);
  421. owner = be64_to_cpu(block->bb_u.l.bb_owner);
  422. if (owner != ip->i_ino)
  423. xchk_fblock_set_corrupt(bs->sc,
  424. info->whichfork, 0);
  425. }
  426. }
  427. /*
  428. * Check that the incore extent tree contains an extent that matches
  429. * this one exactly. We validate those cached bmaps later, so we don't
  430. * need to check them here. If the incore extent tree was just loaded
  431. * from disk by the scrubber, we assume that its contents match what's
  432. * on disk (we still hold the ILOCK) and skip the equivalence check.
  433. */
  434. if (!info->was_loaded)
  435. return 0;
  436. xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
  437. if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) {
  438. xchk_fblock_set_corrupt(bs->sc, info->whichfork,
  439. irec.br_startoff);
  440. return 0;
  441. }
  442. if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
  443. &iext_irec) ||
  444. irec.br_startoff != iext_irec.br_startoff ||
  445. irec.br_startblock != iext_irec.br_startblock ||
  446. irec.br_blockcount != iext_irec.br_blockcount ||
  447. irec.br_state != iext_irec.br_state)
  448. xchk_fblock_set_corrupt(bs->sc, info->whichfork,
  449. irec.br_startoff);
  450. return 0;
  451. }
  452. /* Scan the btree records. */
  453. STATIC int
  454. xchk_bmap_btree(
  455. struct xfs_scrub *sc,
  456. int whichfork,
  457. struct xchk_bmap_info *info)
  458. {
  459. struct xfs_owner_info oinfo;
  460. struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork);
  461. struct xfs_mount *mp = sc->mp;
  462. struct xfs_inode *ip = sc->ip;
  463. struct xfs_btree_cur *cur;
  464. int error;
  465. /* Load the incore bmap cache if it's not loaded. */
  466. info->was_loaded = !xfs_need_iread_extents(ifp);
  467. error = xfs_iread_extents(sc->tp, ip, whichfork);
  468. if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
  469. goto out;
  470. /* Check the btree structure. */
  471. cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
  472. xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
  473. error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
  474. xfs_btree_del_cursor(cur, error);
  475. out:
  476. return error;
  477. }
  478. struct xchk_bmap_check_rmap_info {
  479. struct xfs_scrub *sc;
  480. int whichfork;
  481. struct xfs_iext_cursor icur;
  482. };
  483. /* Can we find bmaps that fit this rmap? */
  484. STATIC int
  485. xchk_bmap_check_rmap(
  486. struct xfs_btree_cur *cur,
  487. const struct xfs_rmap_irec *rec,
  488. void *priv)
  489. {
  490. struct xfs_bmbt_irec irec;
  491. struct xfs_rmap_irec check_rec;
  492. struct xchk_bmap_check_rmap_info *sbcri = priv;
  493. struct xfs_ifork *ifp;
  494. struct xfs_scrub *sc = sbcri->sc;
  495. bool have_map;
  496. /* Is this even the right fork? */
  497. if (rec->rm_owner != sc->ip->i_ino)
  498. return 0;
  499. if ((sbcri->whichfork == XFS_ATTR_FORK) ^
  500. !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
  501. return 0;
  502. if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
  503. return 0;
  504. /* Now look up the bmbt record. */
  505. ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork);
  506. if (!ifp) {
  507. xchk_fblock_set_corrupt(sc, sbcri->whichfork,
  508. rec->rm_offset);
  509. goto out;
  510. }
  511. have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
  512. &sbcri->icur, &irec);
  513. if (!have_map)
  514. xchk_fblock_set_corrupt(sc, sbcri->whichfork,
  515. rec->rm_offset);
  516. /*
  517. * bmap extent record lengths are constrained to 2^21 blocks in length
  518. * because of space constraints in the on-disk metadata structure.
  519. * However, rmap extent record lengths are constrained only by AG
  520. * length, so we have to loop through the bmbt to make sure that the
  521. * entire rmap is covered by bmbt records.
  522. */
  523. check_rec = *rec;
  524. while (have_map) {
  525. if (irec.br_startoff != check_rec.rm_offset)
  526. xchk_fblock_set_corrupt(sc, sbcri->whichfork,
  527. check_rec.rm_offset);
  528. if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
  529. cur->bc_ag.pag->pag_agno,
  530. check_rec.rm_startblock))
  531. xchk_fblock_set_corrupt(sc, sbcri->whichfork,
  532. check_rec.rm_offset);
  533. if (irec.br_blockcount > check_rec.rm_blockcount)
  534. xchk_fblock_set_corrupt(sc, sbcri->whichfork,
  535. check_rec.rm_offset);
  536. if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
  537. break;
  538. check_rec.rm_startblock += irec.br_blockcount;
  539. check_rec.rm_offset += irec.br_blockcount;
  540. check_rec.rm_blockcount -= irec.br_blockcount;
  541. if (check_rec.rm_blockcount == 0)
  542. break;
  543. have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
  544. if (!have_map)
  545. xchk_fblock_set_corrupt(sc, sbcri->whichfork,
  546. check_rec.rm_offset);
  547. }
  548. out:
  549. if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
  550. return -ECANCELED;
  551. return 0;
  552. }
  553. /* Make sure each rmap has a corresponding bmbt entry. */
  554. STATIC int
  555. xchk_bmap_check_ag_rmaps(
  556. struct xfs_scrub *sc,
  557. int whichfork,
  558. struct xfs_perag *pag)
  559. {
  560. struct xchk_bmap_check_rmap_info sbcri;
  561. struct xfs_btree_cur *cur;
  562. struct xfs_buf *agf;
  563. int error;
  564. error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf);
  565. if (error)
  566. return error;
  567. cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag);
  568. sbcri.sc = sc;
  569. sbcri.whichfork = whichfork;
  570. error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
  571. if (error == -ECANCELED)
  572. error = 0;
  573. xfs_btree_del_cursor(cur, error);
  574. xfs_trans_brelse(sc->tp, agf);
  575. return error;
  576. }
  577. /*
  578. * Decide if we want to scan the reverse mappings to determine if the attr
  579. * fork /really/ has zero space mappings.
  580. */
  581. STATIC bool
  582. xchk_bmap_check_empty_attrfork(
  583. struct xfs_inode *ip)
  584. {
  585. struct xfs_ifork *ifp = &ip->i_af;
  586. /*
  587. * If the dinode repair found a bad attr fork, it will reset the fork
  588. * to extents format with zero records and wait for the this scrubber
  589. * to reconstruct the block mappings. If the fork is not in this
  590. * state, then the fork cannot have been zapped.
  591. */
  592. if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0)
  593. return false;
  594. /*
  595. * Files can have an attr fork in EXTENTS format with zero records for
  596. * several reasons:
  597. *
  598. * a) an attr set created a fork but ran out of space
  599. * b) attr replace deleted an old attr but failed during the set step
  600. * c) the data fork was in btree format when all attrs were deleted, so
  601. * the fork was left in place
  602. * d) the inode repair code zapped the fork
  603. *
  604. * Only in case (d) do we want to scan the rmapbt to see if we need to
  605. * rebuild the attr fork. The fork zap code clears all DAC permission
  606. * bits and zeroes the uid and gid, so avoid the scan if any of those
  607. * three conditions are not met.
  608. */
  609. if ((VFS_I(ip)->i_mode & 0777) != 0)
  610. return false;
  611. if (!uid_eq(VFS_I(ip)->i_uid, GLOBAL_ROOT_UID))
  612. return false;
  613. if (!gid_eq(VFS_I(ip)->i_gid, GLOBAL_ROOT_GID))
  614. return false;
  615. return true;
  616. }
  617. /*
  618. * Decide if we want to scan the reverse mappings to determine if the data
  619. * fork /really/ has zero space mappings.
  620. */
  621. STATIC bool
  622. xchk_bmap_check_empty_datafork(
  623. struct xfs_inode *ip)
  624. {
  625. struct xfs_ifork *ifp = &ip->i_df;
  626. /* Don't support realtime rmap checks yet. */
  627. if (XFS_IS_REALTIME_INODE(ip))
  628. return false;
  629. /*
  630. * If the dinode repair found a bad data fork, it will reset the fork
  631. * to extents format with zero records and wait for the this scrubber
  632. * to reconstruct the block mappings. If the fork is not in this
  633. * state, then the fork cannot have been zapped.
  634. */
  635. if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0)
  636. return false;
  637. /*
  638. * If we encounter an empty data fork along with evidence that the fork
  639. * might not really be empty, we need to scan the reverse mappings to
  640. * decide if we're going to rebuild the fork. Data forks with nonzero
  641. * file size are scanned.
  642. */
  643. return i_size_read(VFS_I(ip)) != 0;
  644. }
  645. /*
  646. * Decide if we want to walk every rmap btree in the fs to make sure that each
  647. * rmap for this file fork has corresponding bmbt entries.
  648. */
  649. static bool
  650. xchk_bmap_want_check_rmaps(
  651. struct xchk_bmap_info *info)
  652. {
  653. struct xfs_scrub *sc = info->sc;
  654. if (!xfs_has_rmapbt(sc->mp))
  655. return false;
  656. if (info->whichfork == XFS_COW_FORK)
  657. return false;
  658. if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
  659. return false;
  660. if (info->whichfork == XFS_ATTR_FORK)
  661. return xchk_bmap_check_empty_attrfork(sc->ip);
  662. return xchk_bmap_check_empty_datafork(sc->ip);
  663. }
  664. /* Make sure each rmap has a corresponding bmbt entry. */
  665. STATIC int
  666. xchk_bmap_check_rmaps(
  667. struct xfs_scrub *sc,
  668. int whichfork)
  669. {
  670. struct xfs_perag *pag;
  671. xfs_agnumber_t agno;
  672. int error;
  673. for_each_perag(sc->mp, agno, pag) {
  674. error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
  675. if (error ||
  676. (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
  677. xfs_perag_rele(pag);
  678. return error;
  679. }
  680. }
  681. return 0;
  682. }
  683. /* Scrub a delalloc reservation from the incore extent map tree. */
  684. STATIC void
  685. xchk_bmap_iextent_delalloc(
  686. struct xfs_inode *ip,
  687. struct xchk_bmap_info *info,
  688. struct xfs_bmbt_irec *irec)
  689. {
  690. struct xfs_mount *mp = info->sc->mp;
  691. /*
  692. * Check for out-of-order extents. This record could have come
  693. * from the incore list, for which there is no ordering check.
  694. */
  695. if (irec->br_startoff < info->prev_rec.br_startoff +
  696. info->prev_rec.br_blockcount)
  697. xchk_fblock_set_corrupt(info->sc, info->whichfork,
  698. irec->br_startoff);
  699. if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
  700. xchk_fblock_set_corrupt(info->sc, info->whichfork,
  701. irec->br_startoff);
  702. /* Make sure the extent points to a valid place. */
  703. if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
  704. xchk_fblock_set_corrupt(info->sc, info->whichfork,
  705. irec->br_startoff);
  706. }
  707. /* Decide if this individual fork mapping is ok. */
  708. static bool
  709. xchk_bmap_iext_mapping(
  710. struct xchk_bmap_info *info,
  711. const struct xfs_bmbt_irec *irec)
  712. {
  713. /* There should never be a "hole" extent in either extent list. */
  714. if (irec->br_startblock == HOLESTARTBLOCK)
  715. return false;
  716. if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
  717. return false;
  718. return true;
  719. }
  720. /* Are these two mappings contiguous with each other? */
  721. static inline bool
  722. xchk_are_bmaps_contiguous(
  723. const struct xfs_bmbt_irec *b1,
  724. const struct xfs_bmbt_irec *b2)
  725. {
  726. /* Don't try to combine unallocated mappings. */
  727. if (!xfs_bmap_is_real_extent(b1))
  728. return false;
  729. if (!xfs_bmap_is_real_extent(b2))
  730. return false;
  731. /* Does b2 come right after b1 in the logical and physical range? */
  732. if (b1->br_startoff + b1->br_blockcount != b2->br_startoff)
  733. return false;
  734. if (b1->br_startblock + b1->br_blockcount != b2->br_startblock)
  735. return false;
  736. if (b1->br_state != b2->br_state)
  737. return false;
  738. return true;
  739. }
  740. /*
  741. * Walk the incore extent records, accumulating consecutive contiguous records
  742. * into a single incore mapping. Returns true if @irec has been set to a
  743. * mapping or false if there are no more mappings. Caller must ensure that
  744. * @info.icur is zeroed before the first call.
  745. */
  746. static bool
  747. xchk_bmap_iext_iter(
  748. struct xchk_bmap_info *info,
  749. struct xfs_bmbt_irec *irec)
  750. {
  751. struct xfs_bmbt_irec got;
  752. struct xfs_ifork *ifp;
  753. unsigned int nr = 0;
  754. ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
  755. /* Advance to the next iextent record and check the mapping. */
  756. xfs_iext_next(ifp, &info->icur);
  757. if (!xfs_iext_get_extent(ifp, &info->icur, irec))
  758. return false;
  759. if (!xchk_bmap_iext_mapping(info, irec)) {
  760. xchk_fblock_set_corrupt(info->sc, info->whichfork,
  761. irec->br_startoff);
  762. return false;
  763. }
  764. nr++;
  765. /*
  766. * Iterate subsequent iextent records and merge them with the one
  767. * that we just read, if possible.
  768. */
  769. while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) {
  770. if (!xchk_are_bmaps_contiguous(irec, &got))
  771. break;
  772. if (!xchk_bmap_iext_mapping(info, &got)) {
  773. xchk_fblock_set_corrupt(info->sc, info->whichfork,
  774. got.br_startoff);
  775. return false;
  776. }
  777. nr++;
  778. irec->br_blockcount += got.br_blockcount;
  779. xfs_iext_next(ifp, &info->icur);
  780. }
  781. /*
  782. * If the merged mapping could be expressed with fewer bmbt records
  783. * than we actually found, notify the user that this fork could be
  784. * optimized. CoW forks only exist in memory so we ignore them.
  785. */
  786. if (nr > 1 && info->whichfork != XFS_COW_FORK &&
  787. howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr)
  788. xchk_ino_set_preen(info->sc, info->sc->ip->i_ino);
  789. return true;
  790. }
  791. /*
  792. * Scrub an inode fork's block mappings.
  793. *
  794. * First we scan every record in every btree block, if applicable.
  795. * Then we unconditionally scan the incore extent cache.
  796. */
  797. STATIC int
  798. xchk_bmap(
  799. struct xfs_scrub *sc,
  800. int whichfork)
  801. {
  802. struct xfs_bmbt_irec irec;
  803. struct xchk_bmap_info info = { NULL };
  804. struct xfs_mount *mp = sc->mp;
  805. struct xfs_inode *ip = sc->ip;
  806. struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
  807. xfs_fileoff_t endoff;
  808. int error = 0;
  809. /* Non-existent forks can be ignored. */
  810. if (!ifp)
  811. return -ENOENT;
  812. info.is_rt = xfs_ifork_is_realtime(ip, whichfork);
  813. info.whichfork = whichfork;
  814. info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
  815. info.sc = sc;
  816. switch (whichfork) {
  817. case XFS_COW_FORK:
  818. /* No CoW forks on non-reflink filesystems. */
  819. if (!xfs_has_reflink(mp)) {
  820. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  821. return 0;
  822. }
  823. break;
  824. case XFS_ATTR_FORK:
  825. /*
  826. * "attr" means that an attr fork was created at some point in
  827. * the life of this filesystem. "attr2" means that inodes have
  828. * variable-sized data/attr fork areas. Hence we only check
  829. * attr here.
  830. */
  831. if (!xfs_has_attr(mp))
  832. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  833. break;
  834. default:
  835. ASSERT(whichfork == XFS_DATA_FORK);
  836. break;
  837. }
  838. /* Check the fork values */
  839. switch (ifp->if_format) {
  840. case XFS_DINODE_FMT_UUID:
  841. case XFS_DINODE_FMT_DEV:
  842. case XFS_DINODE_FMT_LOCAL:
  843. /* No mappings to check. */
  844. if (whichfork == XFS_COW_FORK)
  845. xchk_fblock_set_corrupt(sc, whichfork, 0);
  846. return 0;
  847. case XFS_DINODE_FMT_EXTENTS:
  848. break;
  849. case XFS_DINODE_FMT_BTREE:
  850. if (whichfork == XFS_COW_FORK) {
  851. xchk_fblock_set_corrupt(sc, whichfork, 0);
  852. return 0;
  853. }
  854. error = xchk_bmap_btree(sc, whichfork, &info);
  855. if (error)
  856. return error;
  857. break;
  858. default:
  859. xchk_fblock_set_corrupt(sc, whichfork, 0);
  860. return 0;
  861. }
  862. if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
  863. return 0;
  864. /* Find the offset of the last extent in the mapping. */
  865. error = xfs_bmap_last_offset(ip, &endoff, whichfork);
  866. if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
  867. return error;
  868. /*
  869. * Scrub extent records. We use a special iterator function here that
  870. * combines adjacent mappings if they are logically and physically
  871. * contiguous. For large allocations that require multiple bmbt
  872. * records, this reduces the number of cross-referencing calls, which
  873. * reduces runtime. Cross referencing with the rmap is simpler because
  874. * the rmap must match the combined mapping exactly.
  875. */
  876. while (xchk_bmap_iext_iter(&info, &irec)) {
  877. if (xchk_should_terminate(sc, &error) ||
  878. (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
  879. return 0;
  880. if (irec.br_startoff >= endoff) {
  881. xchk_fblock_set_corrupt(sc, whichfork,
  882. irec.br_startoff);
  883. return 0;
  884. }
  885. if (isnullstartblock(irec.br_startblock))
  886. xchk_bmap_iextent_delalloc(ip, &info, &irec);
  887. else
  888. xchk_bmap_iextent(ip, &info, &irec);
  889. memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec));
  890. }
  891. if (xchk_bmap_want_check_rmaps(&info)) {
  892. error = xchk_bmap_check_rmaps(sc, whichfork);
  893. if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
  894. return error;
  895. }
  896. return 0;
  897. }
  898. /* Scrub an inode's data fork. */
  899. int
  900. xchk_bmap_data(
  901. struct xfs_scrub *sc)
  902. {
  903. int error;
  904. if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTD_ZAPPED)) {
  905. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  906. return 0;
  907. }
  908. error = xchk_bmap(sc, XFS_DATA_FORK);
  909. if (error)
  910. return error;
  911. /* If the data fork is clean, it is clearly not zapped. */
  912. xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTD_ZAPPED);
  913. return 0;
  914. }
  915. /* Scrub an inode's attr fork. */
  916. int
  917. xchk_bmap_attr(
  918. struct xfs_scrub *sc)
  919. {
  920. int error;
  921. /*
  922. * If the attr fork has been zapped, it's possible that forkoff was
  923. * reset to zero and hence sc->ip->i_afp is NULL. We don't want the
  924. * NULL ifp check in xchk_bmap to conclude that the attr fork is ok,
  925. * so short circuit that logic by setting the corruption flag and
  926. * returning immediately.
  927. */
  928. if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTA_ZAPPED)) {
  929. xchk_ino_set_corrupt(sc, sc->ip->i_ino);
  930. return 0;
  931. }
  932. error = xchk_bmap(sc, XFS_ATTR_FORK);
  933. if (error)
  934. return error;
  935. /* If the attr fork is clean, it is clearly not zapped. */
  936. xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTA_ZAPPED);
  937. return 0;
  938. }
  939. /* Scrub an inode's CoW fork. */
  940. int
  941. xchk_bmap_cow(
  942. struct xfs_scrub *sc)
  943. {
  944. return xchk_bmap(sc, XFS_COW_FORK);
  945. }