btree.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <djwong@kernel.org>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_trans_resv.h"
  11. #include "xfs_mount.h"
  12. #include "xfs_inode.h"
  13. #include "xfs_btree.h"
  14. #include "scrub/scrub.h"
  15. #include "scrub/common.h"
  16. #include "scrub/btree.h"
  17. #include "scrub/trace.h"
  18. /* btree scrubbing */
  19. /*
  20. * Check for btree operation errors. See the section about handling
  21. * operational errors in common.c.
  22. */
  23. static bool
  24. __xchk_btree_process_error(
  25. struct xfs_scrub *sc,
  26. struct xfs_btree_cur *cur,
  27. int level,
  28. int *error,
  29. __u32 errflag,
  30. void *ret_ip)
  31. {
  32. if (*error == 0)
  33. return true;
  34. switch (*error) {
  35. case -EDEADLOCK:
  36. case -ECHRNG:
  37. /* Used to restart an op with deadlock avoidance. */
  38. trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
  39. break;
  40. case -EFSBADCRC:
  41. case -EFSCORRUPTED:
  42. /* Note the badness but don't abort. */
  43. sc->sm->sm_flags |= errflag;
  44. *error = 0;
  45. fallthrough;
  46. default:
  47. if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
  48. trace_xchk_ifork_btree_op_error(sc, cur, level,
  49. *error, ret_ip);
  50. else
  51. trace_xchk_btree_op_error(sc, cur, level,
  52. *error, ret_ip);
  53. break;
  54. }
  55. return false;
  56. }
  57. bool
  58. xchk_btree_process_error(
  59. struct xfs_scrub *sc,
  60. struct xfs_btree_cur *cur,
  61. int level,
  62. int *error)
  63. {
  64. return __xchk_btree_process_error(sc, cur, level, error,
  65. XFS_SCRUB_OFLAG_CORRUPT, __return_address);
  66. }
  67. bool
  68. xchk_btree_xref_process_error(
  69. struct xfs_scrub *sc,
  70. struct xfs_btree_cur *cur,
  71. int level,
  72. int *error)
  73. {
  74. return __xchk_btree_process_error(sc, cur, level, error,
  75. XFS_SCRUB_OFLAG_XFAIL, __return_address);
  76. }
  77. /* Record btree block corruption. */
  78. static void
  79. __xchk_btree_set_corrupt(
  80. struct xfs_scrub *sc,
  81. struct xfs_btree_cur *cur,
  82. int level,
  83. __u32 errflag,
  84. void *ret_ip)
  85. {
  86. sc->sm->sm_flags |= errflag;
  87. if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
  88. trace_xchk_ifork_btree_error(sc, cur, level,
  89. ret_ip);
  90. else
  91. trace_xchk_btree_error(sc, cur, level,
  92. ret_ip);
  93. }
  94. void
  95. xchk_btree_set_corrupt(
  96. struct xfs_scrub *sc,
  97. struct xfs_btree_cur *cur,
  98. int level)
  99. {
  100. __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
  101. __return_address);
  102. }
  103. void
  104. xchk_btree_xref_set_corrupt(
  105. struct xfs_scrub *sc,
  106. struct xfs_btree_cur *cur,
  107. int level)
  108. {
  109. __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
  110. __return_address);
  111. }
  112. void
  113. xchk_btree_set_preen(
  114. struct xfs_scrub *sc,
  115. struct xfs_btree_cur *cur,
  116. int level)
  117. {
  118. __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_PREEN,
  119. __return_address);
  120. }
  121. /*
  122. * Make sure this record is in order and doesn't stray outside of the parent
  123. * keys.
  124. */
  125. STATIC void
  126. xchk_btree_rec(
  127. struct xchk_btree *bs)
  128. {
  129. struct xfs_btree_cur *cur = bs->cur;
  130. union xfs_btree_rec *rec;
  131. union xfs_btree_key key;
  132. union xfs_btree_key hkey;
  133. union xfs_btree_key *keyp;
  134. struct xfs_btree_block *block;
  135. struct xfs_btree_block *keyblock;
  136. struct xfs_buf *bp;
  137. block = xfs_btree_get_block(cur, 0, &bp);
  138. rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block);
  139. trace_xchk_btree_rec(bs->sc, cur, 0);
  140. /* Are all records across all record blocks in order? */
  141. if (bs->lastrec_valid &&
  142. !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
  143. xchk_btree_set_corrupt(bs->sc, cur, 0);
  144. memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
  145. bs->lastrec_valid = true;
  146. if (cur->bc_nlevels == 1)
  147. return;
  148. /* Is low_key(rec) at least as large as the parent low key? */
  149. cur->bc_ops->init_key_from_rec(&key, rec);
  150. keyblock = xfs_btree_get_block(cur, 1, &bp);
  151. keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
  152. if (xfs_btree_keycmp_lt(cur, &key, keyp))
  153. xchk_btree_set_corrupt(bs->sc, cur, 1);
  154. if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
  155. return;
  156. /* Is high_key(rec) no larger than the parent high key? */
  157. cur->bc_ops->init_high_key_from_rec(&hkey, rec);
  158. keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
  159. if (xfs_btree_keycmp_lt(cur, keyp, &hkey))
  160. xchk_btree_set_corrupt(bs->sc, cur, 1);
  161. }
  162. /*
  163. * Make sure this key is in order and doesn't stray outside of the parent
  164. * keys.
  165. */
  166. STATIC void
  167. xchk_btree_key(
  168. struct xchk_btree *bs,
  169. int level)
  170. {
  171. struct xfs_btree_cur *cur = bs->cur;
  172. union xfs_btree_key *key;
  173. union xfs_btree_key *keyp;
  174. struct xfs_btree_block *block;
  175. struct xfs_btree_block *keyblock;
  176. struct xfs_buf *bp;
  177. block = xfs_btree_get_block(cur, level, &bp);
  178. key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
  179. trace_xchk_btree_key(bs->sc, cur, level);
  180. /* Are all low keys across all node blocks in order? */
  181. if (bs->lastkey[level - 1].valid &&
  182. !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1].key, key))
  183. xchk_btree_set_corrupt(bs->sc, cur, level);
  184. memcpy(&bs->lastkey[level - 1].key, key, cur->bc_ops->key_len);
  185. bs->lastkey[level - 1].valid = true;
  186. if (level + 1 >= cur->bc_nlevels)
  187. return;
  188. /* Is this block's low key at least as large as the parent low key? */
  189. keyblock = xfs_btree_get_block(cur, level + 1, &bp);
  190. keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock);
  191. if (xfs_btree_keycmp_lt(cur, key, keyp))
  192. xchk_btree_set_corrupt(bs->sc, cur, level);
  193. if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
  194. return;
  195. /* Is this block's high key no larger than the parent high key? */
  196. key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block);
  197. keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
  198. keyblock);
  199. if (xfs_btree_keycmp_lt(cur, keyp, key))
  200. xchk_btree_set_corrupt(bs->sc, cur, level);
  201. }
  202. /*
  203. * Check a btree pointer. Returns true if it's ok to use this pointer.
  204. * Callers do not need to set the corrupt flag.
  205. */
  206. static bool
  207. xchk_btree_ptr_ok(
  208. struct xchk_btree *bs,
  209. int level,
  210. union xfs_btree_ptr *ptr)
  211. {
  212. /* A btree rooted in an inode has no block pointer to the root. */
  213. if (bs->cur->bc_ops->type == XFS_BTREE_TYPE_INODE &&
  214. level == bs->cur->bc_nlevels)
  215. return true;
  216. /* Otherwise, check the pointers. */
  217. if (__xfs_btree_check_ptr(bs->cur, ptr, 0, level)) {
  218. xchk_btree_set_corrupt(bs->sc, bs->cur, level);
  219. return false;
  220. }
  221. return true;
  222. }
  223. /* Check that a btree block's sibling matches what we expect it. */
  224. STATIC int
  225. xchk_btree_block_check_sibling(
  226. struct xchk_btree *bs,
  227. int level,
  228. int direction,
  229. union xfs_btree_ptr *sibling)
  230. {
  231. struct xfs_btree_cur *cur = bs->cur;
  232. struct xfs_btree_block *pblock;
  233. struct xfs_buf *pbp;
  234. struct xfs_btree_cur *ncur = NULL;
  235. union xfs_btree_ptr *pp;
  236. int success;
  237. int error;
  238. error = xfs_btree_dup_cursor(cur, &ncur);
  239. if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
  240. !ncur)
  241. return error;
  242. /*
  243. * If the pointer is null, we shouldn't be able to move the upper
  244. * level pointer anywhere.
  245. */
  246. if (xfs_btree_ptr_is_null(cur, sibling)) {
  247. if (direction > 0)
  248. error = xfs_btree_increment(ncur, level + 1, &success);
  249. else
  250. error = xfs_btree_decrement(ncur, level + 1, &success);
  251. if (error == 0 && success)
  252. xchk_btree_set_corrupt(bs->sc, cur, level);
  253. error = 0;
  254. goto out;
  255. }
  256. /* Increment upper level pointer. */
  257. if (direction > 0)
  258. error = xfs_btree_increment(ncur, level + 1, &success);
  259. else
  260. error = xfs_btree_decrement(ncur, level + 1, &success);
  261. if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
  262. goto out;
  263. if (!success) {
  264. xchk_btree_set_corrupt(bs->sc, cur, level + 1);
  265. goto out;
  266. }
  267. /* Compare upper level pointer to sibling pointer. */
  268. pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
  269. pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock);
  270. if (!xchk_btree_ptr_ok(bs, level + 1, pp))
  271. goto out;
  272. if (pbp)
  273. xchk_buffer_recheck(bs->sc, pbp);
  274. if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
  275. xchk_btree_set_corrupt(bs->sc, cur, level);
  276. out:
  277. xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
  278. return error;
  279. }
  280. /* Check the siblings of a btree block. */
  281. STATIC int
  282. xchk_btree_block_check_siblings(
  283. struct xchk_btree *bs,
  284. struct xfs_btree_block *block)
  285. {
  286. struct xfs_btree_cur *cur = bs->cur;
  287. union xfs_btree_ptr leftsib;
  288. union xfs_btree_ptr rightsib;
  289. int level;
  290. int error = 0;
  291. xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
  292. xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
  293. level = xfs_btree_get_level(block);
  294. /* Root block should never have siblings. */
  295. if (level == cur->bc_nlevels - 1) {
  296. if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
  297. !xfs_btree_ptr_is_null(cur, &rightsib))
  298. xchk_btree_set_corrupt(bs->sc, cur, level);
  299. goto out;
  300. }
  301. /*
  302. * Does the left & right sibling pointers match the adjacent
  303. * parent level pointers?
  304. * (These function absorbs error codes for us.)
  305. */
  306. error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
  307. if (error)
  308. return error;
  309. error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
  310. if (error)
  311. return error;
  312. out:
  313. return error;
  314. }
  315. struct check_owner {
  316. struct list_head list;
  317. xfs_daddr_t daddr;
  318. int level;
  319. };
  320. /*
  321. * Make sure this btree block isn't in the free list and that there's
  322. * an rmap record for it.
  323. */
  324. STATIC int
  325. xchk_btree_check_block_owner(
  326. struct xchk_btree *bs,
  327. int level,
  328. xfs_daddr_t daddr)
  329. {
  330. xfs_agnumber_t agno;
  331. xfs_agblock_t agbno;
  332. bool init_sa;
  333. int error = 0;
  334. if (!bs->cur)
  335. return 0;
  336. agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
  337. agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
  338. /*
  339. * If the btree being examined is not itself a per-AG btree, initialize
  340. * sc->sa so that we can check for the presence of an ownership record
  341. * in the rmap btree for the AG containing the block.
  342. */
  343. init_sa = bs->cur->bc_ops->type != XFS_BTREE_TYPE_AG;
  344. if (init_sa) {
  345. error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa);
  346. if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
  347. level, &error))
  348. goto out_free;
  349. }
  350. xchk_xref_is_used_space(bs->sc, agbno, 1);
  351. /*
  352. * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
  353. * have to nullify it (to shut down further block owner checks) if
  354. * self-xref encounters problems.
  355. */
  356. if (!bs->sc->sa.bno_cur && xfs_btree_is_bno(bs->cur->bc_ops))
  357. bs->cur = NULL;
  358. xchk_xref_is_only_owned_by(bs->sc, agbno, 1, bs->oinfo);
  359. if (!bs->sc->sa.rmap_cur && xfs_btree_is_rmap(bs->cur->bc_ops))
  360. bs->cur = NULL;
  361. out_free:
  362. if (init_sa)
  363. xchk_ag_free(bs->sc, &bs->sc->sa);
  364. return error;
  365. }
  366. /* Check the owner of a btree block. */
  367. STATIC int
  368. xchk_btree_check_owner(
  369. struct xchk_btree *bs,
  370. int level,
  371. struct xfs_buf *bp)
  372. {
  373. struct xfs_btree_cur *cur = bs->cur;
  374. /*
  375. * In theory, xfs_btree_get_block should only give us a null buffer
  376. * pointer for the root of a root-in-inode btree type, but we need
  377. * to check defensively here in case the cursor state is also screwed
  378. * up.
  379. */
  380. if (bp == NULL) {
  381. if (cur->bc_ops->type != XFS_BTREE_TYPE_INODE)
  382. xchk_btree_set_corrupt(bs->sc, bs->cur, level);
  383. return 0;
  384. }
  385. /*
  386. * We want to cross-reference each btree block with the bnobt
  387. * and the rmapbt. We cannot cross-reference the bnobt or
  388. * rmapbt while scanning the bnobt or rmapbt, respectively,
  389. * because we cannot alter the cursor and we'd prefer not to
  390. * duplicate cursors. Therefore, save the buffer daddr for
  391. * later scanning.
  392. */
  393. if (xfs_btree_is_bno(cur->bc_ops) || xfs_btree_is_rmap(cur->bc_ops)) {
  394. struct check_owner *co;
  395. co = kmalloc(sizeof(struct check_owner), XCHK_GFP_FLAGS);
  396. if (!co)
  397. return -ENOMEM;
  398. INIT_LIST_HEAD(&co->list);
  399. co->level = level;
  400. co->daddr = xfs_buf_daddr(bp);
  401. list_add_tail(&co->list, &bs->to_check);
  402. return 0;
  403. }
  404. return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp));
  405. }
  406. /* Decide if we want to check minrecs of a btree block in the inode root. */
  407. static inline bool
  408. xchk_btree_check_iroot_minrecs(
  409. struct xchk_btree *bs)
  410. {
  411. /*
  412. * xfs_bmap_add_attrfork_btree had an implementation bug wherein it
  413. * would miscalculate the space required for the data fork bmbt root
  414. * when adding an attr fork, and promote the iroot contents to an
  415. * external block unnecessarily. This went unnoticed for many years
  416. * until scrub found filesystems in this state. Inode rooted btrees are
  417. * not supposed to have immediate child blocks that are small enough
  418. * that the contents could fit in the inode root, but we can't fail
  419. * existing filesystems, so instead we disable the check for data fork
  420. * bmap btrees when there's an attr fork.
  421. */
  422. if (xfs_btree_is_bmap(bs->cur->bc_ops) &&
  423. bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
  424. xfs_inode_has_attr_fork(bs->sc->ip))
  425. return false;
  426. return true;
  427. }
  428. /*
  429. * Check that this btree block has at least minrecs records or is one of the
  430. * special blocks that don't require that.
  431. */
  432. STATIC void
  433. xchk_btree_check_minrecs(
  434. struct xchk_btree *bs,
  435. int level,
  436. struct xfs_btree_block *block)
  437. {
  438. struct xfs_btree_cur *cur = bs->cur;
  439. unsigned int root_level = cur->bc_nlevels - 1;
  440. unsigned int numrecs = be16_to_cpu(block->bb_numrecs);
  441. /* More records than minrecs means the block is ok. */
  442. if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
  443. return;
  444. /*
  445. * For btrees rooted in the inode, it's possible that the root block
  446. * contents spilled into a regular ondisk block because there wasn't
  447. * enough space in the inode root. The number of records in that
  448. * child block might be less than the standard minrecs, but that's ok
  449. * provided that there's only one direct child of the root.
  450. */
  451. if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE &&
  452. level == cur->bc_nlevels - 2) {
  453. struct xfs_btree_block *root_block;
  454. struct xfs_buf *root_bp;
  455. int root_maxrecs;
  456. root_block = xfs_btree_get_block(cur, root_level, &root_bp);
  457. root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
  458. if (xchk_btree_check_iroot_minrecs(bs) &&
  459. (be16_to_cpu(root_block->bb_numrecs) != 1 ||
  460. numrecs <= root_maxrecs))
  461. xchk_btree_set_corrupt(bs->sc, cur, level);
  462. return;
  463. }
  464. /*
  465. * Otherwise, only the root level is allowed to have fewer than minrecs
  466. * records or keyptrs.
  467. */
  468. if (level < root_level)
  469. xchk_btree_set_corrupt(bs->sc, cur, level);
  470. }
  471. /*
  472. * If this btree block has a parent, make sure that the parent's keys capture
  473. * the keyspace contained in this block.
  474. */
  475. STATIC void
  476. xchk_btree_block_check_keys(
  477. struct xchk_btree *bs,
  478. int level,
  479. struct xfs_btree_block *block)
  480. {
  481. union xfs_btree_key block_key;
  482. union xfs_btree_key *block_high_key;
  483. union xfs_btree_key *parent_low_key, *parent_high_key;
  484. struct xfs_btree_cur *cur = bs->cur;
  485. struct xfs_btree_block *parent_block;
  486. struct xfs_buf *bp;
  487. if (level == cur->bc_nlevels - 1)
  488. return;
  489. xfs_btree_get_keys(cur, block, &block_key);
  490. /* Make sure the low key of this block matches the parent. */
  491. parent_block = xfs_btree_get_block(cur, level + 1, &bp);
  492. parent_low_key = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
  493. parent_block);
  494. if (xfs_btree_keycmp_ne(cur, &block_key, parent_low_key)) {
  495. xchk_btree_set_corrupt(bs->sc, bs->cur, level);
  496. return;
  497. }
  498. if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
  499. return;
  500. /* Make sure the high key of this block matches the parent. */
  501. parent_high_key = xfs_btree_high_key_addr(cur,
  502. cur->bc_levels[level + 1].ptr, parent_block);
  503. block_high_key = xfs_btree_high_key_from_key(cur, &block_key);
  504. if (xfs_btree_keycmp_ne(cur, block_high_key, parent_high_key))
  505. xchk_btree_set_corrupt(bs->sc, bs->cur, level);
  506. }
  507. /*
  508. * Grab and scrub a btree block given a btree pointer. Returns block
  509. * and buffer pointers (if applicable) if they're ok to use.
  510. */
  511. STATIC int
  512. xchk_btree_get_block(
  513. struct xchk_btree *bs,
  514. int level,
  515. union xfs_btree_ptr *pp,
  516. struct xfs_btree_block **pblock,
  517. struct xfs_buf **pbp)
  518. {
  519. int error;
  520. *pblock = NULL;
  521. *pbp = NULL;
  522. error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
  523. if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
  524. !*pblock)
  525. return error;
  526. xfs_btree_get_block(bs->cur, level, pbp);
  527. if (__xfs_btree_check_block(bs->cur, *pblock, level, *pbp)) {
  528. xchk_btree_set_corrupt(bs->sc, bs->cur, level);
  529. return 0;
  530. }
  531. if (*pbp)
  532. xchk_buffer_recheck(bs->sc, *pbp);
  533. xchk_btree_check_minrecs(bs, level, *pblock);
  534. /*
  535. * Check the block's owner; this function absorbs error codes
  536. * for us.
  537. */
  538. error = xchk_btree_check_owner(bs, level, *pbp);
  539. if (error)
  540. return error;
  541. /*
  542. * Check the block's siblings; this function absorbs error codes
  543. * for us.
  544. */
  545. error = xchk_btree_block_check_siblings(bs, *pblock);
  546. if (error)
  547. return error;
  548. xchk_btree_block_check_keys(bs, level, *pblock);
  549. return 0;
  550. }
  551. /*
  552. * Check that the low and high keys of this block match the keys stored
  553. * in the parent block.
  554. */
  555. STATIC void
  556. xchk_btree_block_keys(
  557. struct xchk_btree *bs,
  558. int level,
  559. struct xfs_btree_block *block)
  560. {
  561. union xfs_btree_key block_keys;
  562. struct xfs_btree_cur *cur = bs->cur;
  563. union xfs_btree_key *high_bk;
  564. union xfs_btree_key *parent_keys;
  565. union xfs_btree_key *high_pk;
  566. struct xfs_btree_block *parent_block;
  567. struct xfs_buf *bp;
  568. if (level >= cur->bc_nlevels - 1)
  569. return;
  570. /* Calculate the keys for this block. */
  571. xfs_btree_get_keys(cur, block, &block_keys);
  572. /* Obtain the parent's copy of the keys for this block. */
  573. parent_block = xfs_btree_get_block(cur, level + 1, &bp);
  574. parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
  575. parent_block);
  576. if (xfs_btree_keycmp_ne(cur, &block_keys, parent_keys))
  577. xchk_btree_set_corrupt(bs->sc, cur, 1);
  578. if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
  579. return;
  580. /* Get high keys */
  581. high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
  582. high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
  583. parent_block);
  584. if (xfs_btree_keycmp_ne(cur, high_bk, high_pk))
  585. xchk_btree_set_corrupt(bs->sc, cur, 1);
  586. }
  587. /*
  588. * Visit all nodes and leaves of a btree. Check that all pointers and
  589. * records are in order, that the keys reflect the records, and use a callback
  590. * so that the caller can verify individual records.
  591. */
  592. int
  593. xchk_btree(
  594. struct xfs_scrub *sc,
  595. struct xfs_btree_cur *cur,
  596. xchk_btree_rec_fn scrub_fn,
  597. const struct xfs_owner_info *oinfo,
  598. void *private)
  599. {
  600. union xfs_btree_ptr ptr;
  601. struct xchk_btree *bs;
  602. union xfs_btree_ptr *pp;
  603. union xfs_btree_rec *recp;
  604. struct xfs_btree_block *block;
  605. struct xfs_buf *bp;
  606. struct check_owner *co;
  607. struct check_owner *n;
  608. size_t cur_sz;
  609. int level;
  610. int error = 0;
  611. /*
  612. * Allocate the btree scrub context from the heap, because this
  613. * structure can get rather large. Don't let a caller feed us a
  614. * totally absurd size.
  615. */
  616. cur_sz = xchk_btree_sizeof(cur->bc_nlevels);
  617. if (cur_sz > PAGE_SIZE) {
  618. xchk_btree_set_corrupt(sc, cur, 0);
  619. return 0;
  620. }
  621. bs = kzalloc(cur_sz, XCHK_GFP_FLAGS);
  622. if (!bs)
  623. return -ENOMEM;
  624. bs->cur = cur;
  625. bs->scrub_rec = scrub_fn;
  626. bs->oinfo = oinfo;
  627. bs->private = private;
  628. bs->sc = sc;
  629. /* Initialize scrub state */
  630. INIT_LIST_HEAD(&bs->to_check);
  631. /*
  632. * Load the root of the btree. The helper function absorbs
  633. * error codes for us.
  634. */
  635. level = cur->bc_nlevels - 1;
  636. xfs_btree_init_ptr_from_cur(cur, &ptr);
  637. if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr))
  638. goto out;
  639. error = xchk_btree_get_block(bs, level, &ptr, &block, &bp);
  640. if (error || !block)
  641. goto out;
  642. cur->bc_levels[level].ptr = 1;
  643. while (level < cur->bc_nlevels) {
  644. block = xfs_btree_get_block(cur, level, &bp);
  645. if (level == 0) {
  646. /* End of leaf, pop back towards the root. */
  647. if (cur->bc_levels[level].ptr >
  648. be16_to_cpu(block->bb_numrecs)) {
  649. xchk_btree_block_keys(bs, level, block);
  650. if (level < cur->bc_nlevels - 1)
  651. cur->bc_levels[level + 1].ptr++;
  652. level++;
  653. continue;
  654. }
  655. /* Records in order for scrub? */
  656. xchk_btree_rec(bs);
  657. /* Call out to the record checker. */
  658. recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
  659. block);
  660. error = bs->scrub_rec(bs, recp);
  661. if (error)
  662. break;
  663. if (xchk_should_terminate(sc, &error) ||
  664. (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
  665. break;
  666. cur->bc_levels[level].ptr++;
  667. continue;
  668. }
  669. /* End of node, pop back towards the root. */
  670. if (cur->bc_levels[level].ptr >
  671. be16_to_cpu(block->bb_numrecs)) {
  672. xchk_btree_block_keys(bs, level, block);
  673. if (level < cur->bc_nlevels - 1)
  674. cur->bc_levels[level + 1].ptr++;
  675. level++;
  676. continue;
  677. }
  678. /* Keys in order for scrub? */
  679. xchk_btree_key(bs, level);
  680. /* Drill another level deeper. */
  681. pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
  682. if (!xchk_btree_ptr_ok(bs, level, pp)) {
  683. cur->bc_levels[level].ptr++;
  684. continue;
  685. }
  686. level--;
  687. error = xchk_btree_get_block(bs, level, pp, &block, &bp);
  688. if (error || !block)
  689. goto out;
  690. cur->bc_levels[level].ptr = 1;
  691. }
  692. out:
  693. /* Process deferred owner checks on btree blocks. */
  694. list_for_each_entry_safe(co, n, &bs->to_check, list) {
  695. if (!error && bs->cur)
  696. error = xchk_btree_check_block_owner(bs, co->level,
  697. co->daddr);
  698. list_del(&co->list);
  699. kfree(co);
  700. }
  701. kfree(bs);
  702. return error;
  703. }