refcount_repair.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <djwong@kernel.org>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_trans_resv.h"
  11. #include "xfs_mount.h"
  12. #include "xfs_defer.h"
  13. #include "xfs_btree.h"
  14. #include "xfs_btree_staging.h"
  15. #include "xfs_inode.h"
  16. #include "xfs_bit.h"
  17. #include "xfs_log_format.h"
  18. #include "xfs_trans.h"
  19. #include "xfs_sb.h"
  20. #include "xfs_alloc.h"
  21. #include "xfs_ialloc.h"
  22. #include "xfs_rmap.h"
  23. #include "xfs_rmap_btree.h"
  24. #include "xfs_refcount.h"
  25. #include "xfs_refcount_btree.h"
  26. #include "xfs_error.h"
  27. #include "xfs_ag.h"
  28. #include "xfs_health.h"
  29. #include "scrub/xfs_scrub.h"
  30. #include "scrub/scrub.h"
  31. #include "scrub/common.h"
  32. #include "scrub/btree.h"
  33. #include "scrub/trace.h"
  34. #include "scrub/repair.h"
  35. #include "scrub/bitmap.h"
  36. #include "scrub/agb_bitmap.h"
  37. #include "scrub/xfile.h"
  38. #include "scrub/xfarray.h"
  39. #include "scrub/newbt.h"
  40. #include "scrub/reap.h"
  41. #include "scrub/rcbag.h"
  42. /*
  43. * Rebuilding the Reference Count Btree
  44. * ====================================
  45. *
  46. * This algorithm is "borrowed" from xfs_repair. Imagine the rmap
  47. * entries as rectangles representing extents of physical blocks, and
  48. * that the rectangles can be laid down to allow them to overlap each
  49. * other; then we know that we must emit a refcnt btree entry wherever
  50. * the amount of overlap changes, i.e. the emission stimulus is
  51. * level-triggered:
  52. *
  53. * - ---
  54. * -- ----- ---- --- ------
  55. * -- ---- ----------- ---- ---------
  56. * -------------------------------- -----------
  57. * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
  58. * 2 1 23 21 3 43 234 2123 1 01 2 3 0
  59. *
  60. * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
  61. *
  62. * Note that in the actual refcnt btree we don't store the refcount < 2
  63. * cases because the bnobt tells us which blocks are free; single-use
  64. * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
  65. * supports storing multiple entries covering a given block we could
  66. * theoretically dispense with the refcntbt and simply count rmaps, but
  67. * that's inefficient in the (hot) write path, so we'll take the cost of
  68. * the extra tree to save time. Also there's no guarantee that rmap
  69. * will be enabled.
  70. *
  71. * Given an array of rmaps sorted by physical block number, a starting
  72. * physical block (sp), a bag to hold rmaps that cover sp, and the next
  73. * physical block where the level changes (np), we can reconstruct the
  74. * refcount btree as follows:
  75. *
  76. * While there are still unprocessed rmaps in the array,
  77. * - Set sp to the physical block (pblk) of the next unprocessed rmap.
  78. * - Add to the bag all rmaps in the array where startblock == sp.
  79. * - Set np to the physical block where the bag size will change. This
  80. * is the minimum of (the pblk of the next unprocessed rmap) and
  81. * (startblock + len of each rmap in the bag).
  82. * - Record the bag size as old_bag_size.
  83. *
  84. * - While the bag isn't empty,
  85. * - Remove from the bag all rmaps where startblock + len == np.
  86. * - Add to the bag all rmaps in the array where startblock == np.
  87. * - If the bag size isn't old_bag_size, store the refcount entry
  88. * (sp, np - sp, bag_size) in the refcnt btree.
  89. * - If the bag is empty, break out of the inner loop.
  90. * - Set old_bag_size to the bag size
  91. * - Set sp = np.
  92. * - Set np to the physical block where the bag size will change.
  93. * This is the minimum of (the pblk of the next unprocessed rmap)
  94. * and (startblock + len of each rmap in the bag).
  95. *
  96. * Like all the other repairers, we make a list of all the refcount
  97. * records we need, then reinitialize the refcount btree root and
  98. * insert all the records.
  99. */
  100. struct xrep_refc {
  101. /* refcount extents */
  102. struct xfarray *refcount_records;
  103. /* new refcountbt information */
  104. struct xrep_newbt new_btree;
  105. /* old refcountbt blocks */
  106. struct xagb_bitmap old_refcountbt_blocks;
  107. struct xfs_scrub *sc;
  108. /* get_records()'s position in the refcount record array. */
  109. xfarray_idx_t array_cur;
  110. /* # of refcountbt blocks */
  111. xfs_extlen_t btblocks;
  112. };
  113. /* Set us up to repair refcount btrees. */
  114. int
  115. xrep_setup_ag_refcountbt(
  116. struct xfs_scrub *sc)
  117. {
  118. char *descr;
  119. int error;
  120. descr = xchk_xfile_ag_descr(sc, "rmap record bag");
  121. error = xrep_setup_xfbtree(sc, descr);
  122. kfree(descr);
  123. return error;
  124. }
  125. /* Check for any obvious conflicts with this shared/CoW staging extent. */
  126. STATIC int
  127. xrep_refc_check_ext(
  128. struct xfs_scrub *sc,
  129. const struct xfs_refcount_irec *rec)
  130. {
  131. enum xbtree_recpacking outcome;
  132. int error;
  133. if (xfs_refcount_check_irec(sc->sa.pag, rec) != NULL)
  134. return -EFSCORRUPTED;
  135. /* Make sure this isn't free space. */
  136. error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rc_startblock,
  137. rec->rc_blockcount, &outcome);
  138. if (error)
  139. return error;
  140. if (outcome != XBTREE_RECPACKING_EMPTY)
  141. return -EFSCORRUPTED;
  142. /* Must not be an inode chunk. */
  143. error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
  144. rec->rc_startblock, rec->rc_blockcount, &outcome);
  145. if (error)
  146. return error;
  147. if (outcome != XBTREE_RECPACKING_EMPTY)
  148. return -EFSCORRUPTED;
  149. return 0;
  150. }
  151. /* Record a reference count extent. */
  152. STATIC int
  153. xrep_refc_stash(
  154. struct xrep_refc *rr,
  155. enum xfs_refc_domain domain,
  156. xfs_agblock_t agbno,
  157. xfs_extlen_t len,
  158. uint64_t refcount)
  159. {
  160. struct xfs_refcount_irec irec = {
  161. .rc_startblock = agbno,
  162. .rc_blockcount = len,
  163. .rc_domain = domain,
  164. };
  165. struct xfs_scrub *sc = rr->sc;
  166. int error = 0;
  167. if (xchk_should_terminate(sc, &error))
  168. return error;
  169. irec.rc_refcount = min_t(uint64_t, MAXREFCOUNT, refcount);
  170. error = xrep_refc_check_ext(rr->sc, &irec);
  171. if (error)
  172. return error;
  173. trace_xrep_refc_found(sc->sa.pag, &irec);
  174. return xfarray_append(rr->refcount_records, &irec);
  175. }
  176. /* Record a CoW staging extent. */
  177. STATIC int
  178. xrep_refc_stash_cow(
  179. struct xrep_refc *rr,
  180. xfs_agblock_t agbno,
  181. xfs_extlen_t len)
  182. {
  183. return xrep_refc_stash(rr, XFS_REFC_DOMAIN_COW, agbno, len, 1);
  184. }
  185. /* Decide if an rmap could describe a shared extent. */
  186. static inline bool
  187. xrep_refc_rmap_shareable(
  188. struct xfs_mount *mp,
  189. const struct xfs_rmap_irec *rmap)
  190. {
  191. /* AG metadata are never sharable */
  192. if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
  193. return false;
  194. /* Metadata in files are never shareable */
  195. if (xfs_internal_inum(mp, rmap->rm_owner))
  196. return false;
  197. /* Metadata and unwritten file blocks are not shareable. */
  198. if (rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
  199. XFS_RMAP_UNWRITTEN))
  200. return false;
  201. return true;
  202. }
  203. /*
  204. * Walk along the reverse mapping records until we find one that could describe
  205. * a shared extent.
  206. */
  207. STATIC int
  208. xrep_refc_walk_rmaps(
  209. struct xrep_refc *rr,
  210. struct xfs_rmap_irec *rmap,
  211. bool *have_rec)
  212. {
  213. struct xfs_btree_cur *cur = rr->sc->sa.rmap_cur;
  214. struct xfs_mount *mp = cur->bc_mp;
  215. int have_gt;
  216. int error = 0;
  217. *have_rec = false;
  218. /*
  219. * Loop through the remaining rmaps. Remember CoW staging
  220. * extents and the refcountbt blocks from the old tree for later
  221. * disposal. We can only share written data fork extents, so
  222. * keep looping until we find an rmap for one.
  223. */
  224. do {
  225. if (xchk_should_terminate(rr->sc, &error))
  226. return error;
  227. error = xfs_btree_increment(cur, 0, &have_gt);
  228. if (error)
  229. return error;
  230. if (!have_gt)
  231. return 0;
  232. error = xfs_rmap_get_rec(cur, rmap, &have_gt);
  233. if (error)
  234. return error;
  235. if (XFS_IS_CORRUPT(mp, !have_gt)) {
  236. xfs_btree_mark_sick(cur);
  237. return -EFSCORRUPTED;
  238. }
  239. if (rmap->rm_owner == XFS_RMAP_OWN_COW) {
  240. error = xrep_refc_stash_cow(rr, rmap->rm_startblock,
  241. rmap->rm_blockcount);
  242. if (error)
  243. return error;
  244. } else if (rmap->rm_owner == XFS_RMAP_OWN_REFC) {
  245. /* refcountbt block, dump it when we're done. */
  246. rr->btblocks += rmap->rm_blockcount;
  247. error = xagb_bitmap_set(&rr->old_refcountbt_blocks,
  248. rmap->rm_startblock,
  249. rmap->rm_blockcount);
  250. if (error)
  251. return error;
  252. }
  253. } while (!xrep_refc_rmap_shareable(mp, rmap));
  254. *have_rec = true;
  255. return 0;
  256. }
  257. static inline uint32_t
  258. xrep_refc_encode_startblock(
  259. const struct xfs_refcount_irec *irec)
  260. {
  261. uint32_t start;
  262. start = irec->rc_startblock & ~XFS_REFC_COWFLAG;
  263. if (irec->rc_domain == XFS_REFC_DOMAIN_COW)
  264. start |= XFS_REFC_COWFLAG;
  265. return start;
  266. }
  267. /* Sort in the same order as the ondisk records. */
  268. static int
  269. xrep_refc_extent_cmp(
  270. const void *a,
  271. const void *b)
  272. {
  273. const struct xfs_refcount_irec *ap = a;
  274. const struct xfs_refcount_irec *bp = b;
  275. uint32_t sa, sb;
  276. sa = xrep_refc_encode_startblock(ap);
  277. sb = xrep_refc_encode_startblock(bp);
  278. if (sa > sb)
  279. return 1;
  280. if (sa < sb)
  281. return -1;
  282. return 0;
  283. }
  284. /*
  285. * Sort the refcount extents by startblock or else the btree records will be in
  286. * the wrong order. Make sure the records do not overlap in physical space.
  287. */
  288. STATIC int
  289. xrep_refc_sort_records(
  290. struct xrep_refc *rr)
  291. {
  292. struct xfs_refcount_irec irec;
  293. xfarray_idx_t cur;
  294. enum xfs_refc_domain dom = XFS_REFC_DOMAIN_SHARED;
  295. xfs_agblock_t next_agbno = 0;
  296. int error;
  297. error = xfarray_sort(rr->refcount_records, xrep_refc_extent_cmp,
  298. XFARRAY_SORT_KILLABLE);
  299. if (error)
  300. return error;
  301. foreach_xfarray_idx(rr->refcount_records, cur) {
  302. if (xchk_should_terminate(rr->sc, &error))
  303. return error;
  304. error = xfarray_load(rr->refcount_records, cur, &irec);
  305. if (error)
  306. return error;
  307. if (dom == XFS_REFC_DOMAIN_SHARED &&
  308. irec.rc_domain == XFS_REFC_DOMAIN_COW) {
  309. dom = irec.rc_domain;
  310. next_agbno = 0;
  311. }
  312. if (dom != irec.rc_domain)
  313. return -EFSCORRUPTED;
  314. if (irec.rc_startblock < next_agbno)
  315. return -EFSCORRUPTED;
  316. next_agbno = irec.rc_startblock + irec.rc_blockcount;
  317. }
  318. return error;
  319. }
  320. /*
  321. * Walk forward through the rmap btree to collect all rmaps starting at
  322. * @bno in @rmap_bag. These represent the file(s) that share ownership of
  323. * the current block. Upon return, the rmap cursor points to the last record
  324. * satisfying the startblock constraint.
  325. */
  326. static int
  327. xrep_refc_push_rmaps_at(
  328. struct xrep_refc *rr,
  329. struct rcbag *rcstack,
  330. xfs_agblock_t bno,
  331. struct xfs_rmap_irec *rmap,
  332. bool *have)
  333. {
  334. struct xfs_scrub *sc = rr->sc;
  335. int have_gt;
  336. int error;
  337. while (*have && rmap->rm_startblock == bno) {
  338. error = rcbag_add(rcstack, rr->sc->tp, rmap);
  339. if (error)
  340. return error;
  341. error = xrep_refc_walk_rmaps(rr, rmap, have);
  342. if (error)
  343. return error;
  344. }
  345. error = xfs_btree_decrement(sc->sa.rmap_cur, 0, &have_gt);
  346. if (error)
  347. return error;
  348. if (XFS_IS_CORRUPT(sc->mp, !have_gt)) {
  349. xfs_btree_mark_sick(sc->sa.rmap_cur);
  350. return -EFSCORRUPTED;
  351. }
  352. return 0;
  353. }
  354. /* Iterate all the rmap records to generate reference count data. */
  355. STATIC int
  356. xrep_refc_find_refcounts(
  357. struct xrep_refc *rr)
  358. {
  359. struct xfs_scrub *sc = rr->sc;
  360. struct rcbag *rcstack;
  361. uint64_t old_stack_height;
  362. xfs_agblock_t sbno;
  363. xfs_agblock_t cbno;
  364. xfs_agblock_t nbno;
  365. bool have;
  366. int error;
  367. xrep_ag_btcur_init(sc, &sc->sa);
  368. /*
  369. * Set up a bag to store all the rmap records that we're tracking to
  370. * generate a reference count record. If the size of the bag exceeds
  371. * MAXREFCOUNT, we clamp rc_refcount.
  372. */
  373. error = rcbag_init(sc->mp, sc->xmbtp, &rcstack);
  374. if (error)
  375. goto out_cur;
  376. /* Start the rmapbt cursor to the left of all records. */
  377. error = xfs_btree_goto_left_edge(sc->sa.rmap_cur);
  378. if (error)
  379. goto out_bag;
  380. /* Process reverse mappings into refcount data. */
  381. while (xfs_btree_has_more_records(sc->sa.rmap_cur)) {
  382. struct xfs_rmap_irec rmap;
  383. /* Push all rmaps with pblk == sbno onto the stack */
  384. error = xrep_refc_walk_rmaps(rr, &rmap, &have);
  385. if (error)
  386. goto out_bag;
  387. if (!have)
  388. break;
  389. sbno = cbno = rmap.rm_startblock;
  390. error = xrep_refc_push_rmaps_at(rr, rcstack, sbno, &rmap,
  391. &have);
  392. if (error)
  393. goto out_bag;
  394. /* Set nbno to the bno of the next refcount change */
  395. error = rcbag_next_edge(rcstack, sc->tp, &rmap, have, &nbno);
  396. if (error)
  397. goto out_bag;
  398. ASSERT(nbno > sbno);
  399. old_stack_height = rcbag_count(rcstack);
  400. /* While stack isn't empty... */
  401. while (rcbag_count(rcstack) > 0) {
  402. /* Pop all rmaps that end at nbno */
  403. error = rcbag_remove_ending_at(rcstack, sc->tp, nbno);
  404. if (error)
  405. goto out_bag;
  406. /* Push array items that start at nbno */
  407. error = xrep_refc_walk_rmaps(rr, &rmap, &have);
  408. if (error)
  409. goto out_bag;
  410. if (have) {
  411. error = xrep_refc_push_rmaps_at(rr, rcstack,
  412. nbno, &rmap, &have);
  413. if (error)
  414. goto out_bag;
  415. }
  416. /* Emit refcount if necessary */
  417. ASSERT(nbno > cbno);
  418. if (rcbag_count(rcstack) != old_stack_height) {
  419. if (old_stack_height > 1) {
  420. error = xrep_refc_stash(rr,
  421. XFS_REFC_DOMAIN_SHARED,
  422. cbno, nbno - cbno,
  423. old_stack_height);
  424. if (error)
  425. goto out_bag;
  426. }
  427. cbno = nbno;
  428. }
  429. /* Stack empty, go find the next rmap */
  430. if (rcbag_count(rcstack) == 0)
  431. break;
  432. old_stack_height = rcbag_count(rcstack);
  433. sbno = nbno;
  434. /* Set nbno to the bno of the next refcount change */
  435. error = rcbag_next_edge(rcstack, sc->tp, &rmap, have,
  436. &nbno);
  437. if (error)
  438. goto out_bag;
  439. ASSERT(nbno > sbno);
  440. }
  441. }
  442. ASSERT(rcbag_count(rcstack) == 0);
  443. out_bag:
  444. rcbag_free(&rcstack);
  445. out_cur:
  446. xchk_ag_btcur_free(&sc->sa);
  447. return error;
  448. }
  449. /* Retrieve refcountbt data for bulk load. */
  450. STATIC int
  451. xrep_refc_get_records(
  452. struct xfs_btree_cur *cur,
  453. unsigned int idx,
  454. struct xfs_btree_block *block,
  455. unsigned int nr_wanted,
  456. void *priv)
  457. {
  458. struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
  459. struct xrep_refc *rr = priv;
  460. union xfs_btree_rec *block_rec;
  461. unsigned int loaded;
  462. int error;
  463. for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
  464. error = xfarray_load(rr->refcount_records, rr->array_cur++,
  465. irec);
  466. if (error)
  467. return error;
  468. block_rec = xfs_btree_rec_addr(cur, idx, block);
  469. cur->bc_ops->init_rec_from_cur(cur, block_rec);
  470. }
  471. return loaded;
  472. }
  473. /* Feed one of the new btree blocks to the bulk loader. */
  474. STATIC int
  475. xrep_refc_claim_block(
  476. struct xfs_btree_cur *cur,
  477. union xfs_btree_ptr *ptr,
  478. void *priv)
  479. {
  480. struct xrep_refc *rr = priv;
  481. return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
  482. }
  483. /* Update the AGF counters. */
  484. STATIC int
  485. xrep_refc_reset_counters(
  486. struct xrep_refc *rr)
  487. {
  488. struct xfs_scrub *sc = rr->sc;
  489. struct xfs_perag *pag = sc->sa.pag;
  490. /*
  491. * After we commit the new btree to disk, it is possible that the
  492. * process to reap the old btree blocks will race with the AIL trying
  493. * to checkpoint the old btree blocks into the filesystem. If the new
  494. * tree is shorter than the old one, the refcountbt write verifier will
  495. * fail and the AIL will shut down the filesystem.
  496. *
  497. * To avoid this, save the old incore btree height values as the alt
  498. * height values before re-initializing the perag info from the updated
  499. * AGF to capture all the new values.
  500. */
  501. pag->pagf_repair_refcount_level = pag->pagf_refcount_level;
  502. /* Reinitialize with the values we just logged. */
  503. return xrep_reinit_pagf(sc);
  504. }
  505. /*
  506. * Use the collected refcount information to stage a new refcount btree. If
  507. * this is successful we'll return with the new btree root information logged
  508. * to the repair transaction but not yet committed.
  509. */
  510. STATIC int
  511. xrep_refc_build_new_tree(
  512. struct xrep_refc *rr)
  513. {
  514. struct xfs_scrub *sc = rr->sc;
  515. struct xfs_btree_cur *refc_cur;
  516. struct xfs_perag *pag = sc->sa.pag;
  517. xfs_fsblock_t fsbno;
  518. int error;
  519. error = xrep_refc_sort_records(rr);
  520. if (error)
  521. return error;
  522. /*
  523. * Prepare to construct the new btree by reserving disk space for the
  524. * new btree and setting up all the accounting information we'll need
  525. * to root the new btree while it's under construction and before we
  526. * attach it to the AG header.
  527. */
  528. fsbno = XFS_AGB_TO_FSB(sc->mp, pag->pag_agno, xfs_refc_block(sc->mp));
  529. xrep_newbt_init_ag(&rr->new_btree, sc, &XFS_RMAP_OINFO_REFC, fsbno,
  530. XFS_AG_RESV_METADATA);
  531. rr->new_btree.bload.get_records = xrep_refc_get_records;
  532. rr->new_btree.bload.claim_block = xrep_refc_claim_block;
  533. /* Compute how many blocks we'll need. */
  534. refc_cur = xfs_refcountbt_init_cursor(sc->mp, NULL, NULL, pag);
  535. xfs_btree_stage_afakeroot(refc_cur, &rr->new_btree.afake);
  536. error = xfs_btree_bload_compute_geometry(refc_cur,
  537. &rr->new_btree.bload,
  538. xfarray_length(rr->refcount_records));
  539. if (error)
  540. goto err_cur;
  541. /* Last chance to abort before we start committing fixes. */
  542. if (xchk_should_terminate(sc, &error))
  543. goto err_cur;
  544. /* Reserve the space we'll need for the new btree. */
  545. error = xrep_newbt_alloc_blocks(&rr->new_btree,
  546. rr->new_btree.bload.nr_blocks);
  547. if (error)
  548. goto err_cur;
  549. /*
  550. * Due to btree slack factors, it's possible for a new btree to be one
  551. * level taller than the old btree. Update the incore btree height so
  552. * that we don't trip the verifiers when writing the new btree blocks
  553. * to disk.
  554. */
  555. pag->pagf_repair_refcount_level = rr->new_btree.bload.btree_height;
  556. /* Add all observed refcount records. */
  557. rr->array_cur = XFARRAY_CURSOR_INIT;
  558. error = xfs_btree_bload(refc_cur, &rr->new_btree.bload, rr);
  559. if (error)
  560. goto err_level;
  561. /*
  562. * Install the new btree in the AG header. After this point the old
  563. * btree is no longer accessible and the new tree is live.
  564. */
  565. xfs_refcountbt_commit_staged_btree(refc_cur, sc->tp, sc->sa.agf_bp);
  566. xfs_btree_del_cursor(refc_cur, 0);
  567. /* Reset the AGF counters now that we've changed the btree shape. */
  568. error = xrep_refc_reset_counters(rr);
  569. if (error)
  570. goto err_newbt;
  571. /* Dispose of any unused blocks and the accounting information. */
  572. error = xrep_newbt_commit(&rr->new_btree);
  573. if (error)
  574. return error;
  575. return xrep_roll_ag_trans(sc);
  576. err_level:
  577. pag->pagf_repair_refcount_level = 0;
  578. err_cur:
  579. xfs_btree_del_cursor(refc_cur, error);
  580. err_newbt:
  581. xrep_newbt_cancel(&rr->new_btree);
  582. return error;
  583. }
  584. /*
  585. * Now that we've logged the roots of the new btrees, invalidate all of the
  586. * old blocks and free them.
  587. */
  588. STATIC int
  589. xrep_refc_remove_old_tree(
  590. struct xrep_refc *rr)
  591. {
  592. struct xfs_scrub *sc = rr->sc;
  593. struct xfs_perag *pag = sc->sa.pag;
  594. int error;
  595. /* Free the old refcountbt blocks if they're not in use. */
  596. error = xrep_reap_agblocks(sc, &rr->old_refcountbt_blocks,
  597. &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
  598. if (error)
  599. return error;
  600. /*
  601. * Now that we've zapped all the old refcountbt blocks we can turn off
  602. * the alternate height mechanism and reset the per-AG space
  603. * reservations.
  604. */
  605. pag->pagf_repair_refcount_level = 0;
  606. sc->flags |= XREP_RESET_PERAG_RESV;
  607. return 0;
  608. }
  609. /* Rebuild the refcount btree. */
  610. int
  611. xrep_refcountbt(
  612. struct xfs_scrub *sc)
  613. {
  614. struct xrep_refc *rr;
  615. struct xfs_mount *mp = sc->mp;
  616. char *descr;
  617. int error;
  618. /* We require the rmapbt to rebuild anything. */
  619. if (!xfs_has_rmapbt(mp))
  620. return -EOPNOTSUPP;
  621. rr = kzalloc(sizeof(struct xrep_refc), XCHK_GFP_FLAGS);
  622. if (!rr)
  623. return -ENOMEM;
  624. rr->sc = sc;
  625. /* Set up enough storage to handle one refcount record per block. */
  626. descr = xchk_xfile_ag_descr(sc, "reference count records");
  627. error = xfarray_create(descr, mp->m_sb.sb_agblocks,
  628. sizeof(struct xfs_refcount_irec),
  629. &rr->refcount_records);
  630. kfree(descr);
  631. if (error)
  632. goto out_rr;
  633. /* Collect all reference counts. */
  634. xagb_bitmap_init(&rr->old_refcountbt_blocks);
  635. error = xrep_refc_find_refcounts(rr);
  636. if (error)
  637. goto out_bitmap;
  638. /* Rebuild the refcount information. */
  639. error = xrep_refc_build_new_tree(rr);
  640. if (error)
  641. goto out_bitmap;
  642. /* Kill the old tree. */
  643. error = xrep_refc_remove_old_tree(rr);
  644. if (error)
  645. goto out_bitmap;
  646. out_bitmap:
  647. xagb_bitmap_destroy(&rr->old_refcountbt_blocks);
  648. xfarray_destroy(rr->refcount_records);
  649. out_rr:
  650. kfree(rr);
  651. return error;
  652. }