xfs_health.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * Copyright (C) 2019 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <darrick.wong@oracle.com>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_log_format.h"
  11. #include "xfs_trans_resv.h"
  12. #include "xfs_mount.h"
  13. #include "xfs_inode.h"
  14. #include "xfs_trace.h"
  15. #include "xfs_health.h"
  16. #include "xfs_ag.h"
  17. #include "xfs_btree.h"
  18. #include "xfs_da_format.h"
  19. #include "xfs_da_btree.h"
  20. #include "xfs_quota_defs.h"
  21. /*
  22. * Warn about metadata corruption that we detected but haven't fixed, and
  23. * make sure we're not sitting on anything that would get in the way of
  24. * recovery.
  25. */
  26. void
  27. xfs_health_unmount(
  28. struct xfs_mount *mp)
  29. {
  30. struct xfs_perag *pag;
  31. xfs_agnumber_t agno;
  32. unsigned int sick = 0;
  33. unsigned int checked = 0;
  34. bool warn = false;
  35. if (xfs_is_shutdown(mp))
  36. return;
  37. /* Measure AG corruption levels. */
  38. for_each_perag(mp, agno, pag) {
  39. xfs_ag_measure_sickness(pag, &sick, &checked);
  40. if (sick) {
  41. trace_xfs_ag_unfixed_corruption(mp, agno, sick);
  42. warn = true;
  43. }
  44. }
  45. /* Measure realtime volume corruption levels. */
  46. xfs_rt_measure_sickness(mp, &sick, &checked);
  47. if (sick) {
  48. trace_xfs_rt_unfixed_corruption(mp, sick);
  49. warn = true;
  50. }
  51. /*
  52. * Measure fs corruption and keep the sample around for the warning.
  53. * See the note below for why we exempt FS_COUNTERS.
  54. */
  55. xfs_fs_measure_sickness(mp, &sick, &checked);
  56. if (sick & ~XFS_SICK_FS_COUNTERS) {
  57. trace_xfs_fs_unfixed_corruption(mp, sick);
  58. warn = true;
  59. }
  60. if (warn) {
  61. xfs_warn(mp,
  62. "Uncorrected metadata errors detected; please run xfs_repair.");
  63. /*
  64. * We discovered uncorrected metadata problems at some point
  65. * during this filesystem mount and have advised the
  66. * administrator to run repair once the unmount completes.
  67. *
  68. * However, we must be careful -- when FSCOUNTERS are flagged
  69. * unhealthy, the unmount procedure omits writing the clean
  70. * unmount record to the log so that the next mount will run
  71. * recovery and recompute the summary counters. In other
  72. * words, we leave a dirty log to get the counters fixed.
  73. *
  74. * Unfortunately, xfs_repair cannot recover dirty logs, so if
  75. * there were filesystem problems, FSCOUNTERS was flagged, and
  76. * the administrator takes our advice to run xfs_repair,
  77. * they'll have to zap the log before repairing structures.
  78. * We don't really want to encourage this, so we mark the
  79. * FSCOUNTERS healthy so that a subsequent repair run won't see
  80. * a dirty log.
  81. */
  82. if (sick & XFS_SICK_FS_COUNTERS)
  83. xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS);
  84. }
  85. }
  86. /* Mark unhealthy per-fs metadata. */
  87. void
  88. xfs_fs_mark_sick(
  89. struct xfs_mount *mp,
  90. unsigned int mask)
  91. {
  92. ASSERT(!(mask & ~XFS_SICK_FS_ALL));
  93. trace_xfs_fs_mark_sick(mp, mask);
  94. spin_lock(&mp->m_sb_lock);
  95. mp->m_fs_sick |= mask;
  96. spin_unlock(&mp->m_sb_lock);
  97. }
  98. /* Mark per-fs metadata as having been checked and found unhealthy by fsck. */
  99. void
  100. xfs_fs_mark_corrupt(
  101. struct xfs_mount *mp,
  102. unsigned int mask)
  103. {
  104. ASSERT(!(mask & ~XFS_SICK_FS_ALL));
  105. trace_xfs_fs_mark_corrupt(mp, mask);
  106. spin_lock(&mp->m_sb_lock);
  107. mp->m_fs_sick |= mask;
  108. mp->m_fs_checked |= mask;
  109. spin_unlock(&mp->m_sb_lock);
  110. }
  111. /* Mark a per-fs metadata healed. */
  112. void
  113. xfs_fs_mark_healthy(
  114. struct xfs_mount *mp,
  115. unsigned int mask)
  116. {
  117. ASSERT(!(mask & ~XFS_SICK_FS_ALL));
  118. trace_xfs_fs_mark_healthy(mp, mask);
  119. spin_lock(&mp->m_sb_lock);
  120. mp->m_fs_sick &= ~mask;
  121. if (!(mp->m_fs_sick & XFS_SICK_FS_PRIMARY))
  122. mp->m_fs_sick &= ~XFS_SICK_FS_SECONDARY;
  123. mp->m_fs_checked |= mask;
  124. spin_unlock(&mp->m_sb_lock);
  125. }
  126. /* Sample which per-fs metadata are unhealthy. */
  127. void
  128. xfs_fs_measure_sickness(
  129. struct xfs_mount *mp,
  130. unsigned int *sick,
  131. unsigned int *checked)
  132. {
  133. spin_lock(&mp->m_sb_lock);
  134. *sick = mp->m_fs_sick;
  135. *checked = mp->m_fs_checked;
  136. spin_unlock(&mp->m_sb_lock);
  137. }
  138. /* Mark unhealthy realtime metadata. */
  139. void
  140. xfs_rt_mark_sick(
  141. struct xfs_mount *mp,
  142. unsigned int mask)
  143. {
  144. ASSERT(!(mask & ~XFS_SICK_RT_ALL));
  145. trace_xfs_rt_mark_sick(mp, mask);
  146. spin_lock(&mp->m_sb_lock);
  147. mp->m_rt_sick |= mask;
  148. spin_unlock(&mp->m_sb_lock);
  149. }
  150. /* Mark realtime metadata as having been checked and found unhealthy by fsck. */
  151. void
  152. xfs_rt_mark_corrupt(
  153. struct xfs_mount *mp,
  154. unsigned int mask)
  155. {
  156. ASSERT(!(mask & ~XFS_SICK_RT_ALL));
  157. trace_xfs_rt_mark_corrupt(mp, mask);
  158. spin_lock(&mp->m_sb_lock);
  159. mp->m_rt_sick |= mask;
  160. mp->m_rt_checked |= mask;
  161. spin_unlock(&mp->m_sb_lock);
  162. }
  163. /* Mark a realtime metadata healed. */
  164. void
  165. xfs_rt_mark_healthy(
  166. struct xfs_mount *mp,
  167. unsigned int mask)
  168. {
  169. ASSERT(!(mask & ~XFS_SICK_RT_ALL));
  170. trace_xfs_rt_mark_healthy(mp, mask);
  171. spin_lock(&mp->m_sb_lock);
  172. mp->m_rt_sick &= ~mask;
  173. if (!(mp->m_rt_sick & XFS_SICK_RT_PRIMARY))
  174. mp->m_rt_sick &= ~XFS_SICK_RT_SECONDARY;
  175. mp->m_rt_checked |= mask;
  176. spin_unlock(&mp->m_sb_lock);
  177. }
  178. /* Sample which realtime metadata are unhealthy. */
  179. void
  180. xfs_rt_measure_sickness(
  181. struct xfs_mount *mp,
  182. unsigned int *sick,
  183. unsigned int *checked)
  184. {
  185. spin_lock(&mp->m_sb_lock);
  186. *sick = mp->m_rt_sick;
  187. *checked = mp->m_rt_checked;
  188. spin_unlock(&mp->m_sb_lock);
  189. }
  190. /* Mark unhealthy per-ag metadata given a raw AG number. */
  191. void
  192. xfs_agno_mark_sick(
  193. struct xfs_mount *mp,
  194. xfs_agnumber_t agno,
  195. unsigned int mask)
  196. {
  197. struct xfs_perag *pag = xfs_perag_get(mp, agno);
  198. /* per-ag structure not set up yet? */
  199. if (!pag)
  200. return;
  201. xfs_ag_mark_sick(pag, mask);
  202. xfs_perag_put(pag);
  203. }
  204. /* Mark unhealthy per-ag metadata. */
  205. void
  206. xfs_ag_mark_sick(
  207. struct xfs_perag *pag,
  208. unsigned int mask)
  209. {
  210. ASSERT(!(mask & ~XFS_SICK_AG_ALL));
  211. trace_xfs_ag_mark_sick(pag->pag_mount, pag->pag_agno, mask);
  212. spin_lock(&pag->pag_state_lock);
  213. pag->pag_sick |= mask;
  214. spin_unlock(&pag->pag_state_lock);
  215. }
  216. /* Mark per-ag metadata as having been checked and found unhealthy by fsck. */
  217. void
  218. xfs_ag_mark_corrupt(
  219. struct xfs_perag *pag,
  220. unsigned int mask)
  221. {
  222. ASSERT(!(mask & ~XFS_SICK_AG_ALL));
  223. trace_xfs_ag_mark_corrupt(pag->pag_mount, pag->pag_agno, mask);
  224. spin_lock(&pag->pag_state_lock);
  225. pag->pag_sick |= mask;
  226. pag->pag_checked |= mask;
  227. spin_unlock(&pag->pag_state_lock);
  228. }
  229. /* Mark per-ag metadata ok. */
  230. void
  231. xfs_ag_mark_healthy(
  232. struct xfs_perag *pag,
  233. unsigned int mask)
  234. {
  235. ASSERT(!(mask & ~XFS_SICK_AG_ALL));
  236. trace_xfs_ag_mark_healthy(pag->pag_mount, pag->pag_agno, mask);
  237. spin_lock(&pag->pag_state_lock);
  238. pag->pag_sick &= ~mask;
  239. if (!(pag->pag_sick & XFS_SICK_AG_PRIMARY))
  240. pag->pag_sick &= ~XFS_SICK_AG_SECONDARY;
  241. pag->pag_checked |= mask;
  242. spin_unlock(&pag->pag_state_lock);
  243. }
  244. /* Sample which per-ag metadata are unhealthy. */
  245. void
  246. xfs_ag_measure_sickness(
  247. struct xfs_perag *pag,
  248. unsigned int *sick,
  249. unsigned int *checked)
  250. {
  251. spin_lock(&pag->pag_state_lock);
  252. *sick = pag->pag_sick;
  253. *checked = pag->pag_checked;
  254. spin_unlock(&pag->pag_state_lock);
  255. }
  256. /* Mark the unhealthy parts of an inode. */
  257. void
  258. xfs_inode_mark_sick(
  259. struct xfs_inode *ip,
  260. unsigned int mask)
  261. {
  262. ASSERT(!(mask & ~XFS_SICK_INO_ALL));
  263. trace_xfs_inode_mark_sick(ip, mask);
  264. spin_lock(&ip->i_flags_lock);
  265. ip->i_sick |= mask;
  266. spin_unlock(&ip->i_flags_lock);
  267. /*
  268. * Keep this inode around so we don't lose the sickness report. Scrub
  269. * grabs inodes with DONTCACHE assuming that most inode are ok, which
  270. * is not the case here.
  271. */
  272. spin_lock(&VFS_I(ip)->i_lock);
  273. VFS_I(ip)->i_state &= ~I_DONTCACHE;
  274. spin_unlock(&VFS_I(ip)->i_lock);
  275. }
  276. /* Mark inode metadata as having been checked and found unhealthy by fsck. */
  277. void
  278. xfs_inode_mark_corrupt(
  279. struct xfs_inode *ip,
  280. unsigned int mask)
  281. {
  282. ASSERT(!(mask & ~XFS_SICK_INO_ALL));
  283. trace_xfs_inode_mark_corrupt(ip, mask);
  284. spin_lock(&ip->i_flags_lock);
  285. ip->i_sick |= mask;
  286. ip->i_checked |= mask;
  287. spin_unlock(&ip->i_flags_lock);
  288. /*
  289. * Keep this inode around so we don't lose the sickness report. Scrub
  290. * grabs inodes with DONTCACHE assuming that most inode are ok, which
  291. * is not the case here.
  292. */
  293. spin_lock(&VFS_I(ip)->i_lock);
  294. VFS_I(ip)->i_state &= ~I_DONTCACHE;
  295. spin_unlock(&VFS_I(ip)->i_lock);
  296. }
  297. /* Mark parts of an inode healed. */
  298. void
  299. xfs_inode_mark_healthy(
  300. struct xfs_inode *ip,
  301. unsigned int mask)
  302. {
  303. ASSERT(!(mask & ~XFS_SICK_INO_ALL));
  304. trace_xfs_inode_mark_healthy(ip, mask);
  305. spin_lock(&ip->i_flags_lock);
  306. ip->i_sick &= ~mask;
  307. if (!(ip->i_sick & XFS_SICK_INO_PRIMARY))
  308. ip->i_sick &= ~XFS_SICK_INO_SECONDARY;
  309. ip->i_checked |= mask;
  310. spin_unlock(&ip->i_flags_lock);
  311. }
  312. /* Sample which parts of an inode are unhealthy. */
  313. void
  314. xfs_inode_measure_sickness(
  315. struct xfs_inode *ip,
  316. unsigned int *sick,
  317. unsigned int *checked)
  318. {
  319. spin_lock(&ip->i_flags_lock);
  320. *sick = ip->i_sick;
  321. *checked = ip->i_checked;
  322. spin_unlock(&ip->i_flags_lock);
  323. }
  324. /* Mappings between internal sick masks and ioctl sick masks. */
  325. struct ioctl_sick_map {
  326. unsigned int sick_mask;
  327. unsigned int ioctl_mask;
  328. };
  329. static const struct ioctl_sick_map fs_map[] = {
  330. { XFS_SICK_FS_COUNTERS, XFS_FSOP_GEOM_SICK_COUNTERS},
  331. { XFS_SICK_FS_UQUOTA, XFS_FSOP_GEOM_SICK_UQUOTA },
  332. { XFS_SICK_FS_GQUOTA, XFS_FSOP_GEOM_SICK_GQUOTA },
  333. { XFS_SICK_FS_PQUOTA, XFS_FSOP_GEOM_SICK_PQUOTA },
  334. { XFS_SICK_FS_QUOTACHECK, XFS_FSOP_GEOM_SICK_QUOTACHECK },
  335. { XFS_SICK_FS_NLINKS, XFS_FSOP_GEOM_SICK_NLINKS },
  336. { 0, 0 },
  337. };
  338. static const struct ioctl_sick_map rt_map[] = {
  339. { XFS_SICK_RT_BITMAP, XFS_FSOP_GEOM_SICK_RT_BITMAP },
  340. { XFS_SICK_RT_SUMMARY, XFS_FSOP_GEOM_SICK_RT_SUMMARY },
  341. { 0, 0 },
  342. };
  343. static inline void
  344. xfgeo_health_tick(
  345. struct xfs_fsop_geom *geo,
  346. unsigned int sick,
  347. unsigned int checked,
  348. const struct ioctl_sick_map *m)
  349. {
  350. if (checked & m->sick_mask)
  351. geo->checked |= m->ioctl_mask;
  352. if (sick & m->sick_mask)
  353. geo->sick |= m->ioctl_mask;
  354. }
  355. /* Fill out fs geometry health info. */
  356. void
  357. xfs_fsop_geom_health(
  358. struct xfs_mount *mp,
  359. struct xfs_fsop_geom *geo)
  360. {
  361. const struct ioctl_sick_map *m;
  362. unsigned int sick;
  363. unsigned int checked;
  364. geo->sick = 0;
  365. geo->checked = 0;
  366. xfs_fs_measure_sickness(mp, &sick, &checked);
  367. for (m = fs_map; m->sick_mask; m++)
  368. xfgeo_health_tick(geo, sick, checked, m);
  369. xfs_rt_measure_sickness(mp, &sick, &checked);
  370. for (m = rt_map; m->sick_mask; m++)
  371. xfgeo_health_tick(geo, sick, checked, m);
  372. }
  373. static const struct ioctl_sick_map ag_map[] = {
  374. { XFS_SICK_AG_SB, XFS_AG_GEOM_SICK_SB },
  375. { XFS_SICK_AG_AGF, XFS_AG_GEOM_SICK_AGF },
  376. { XFS_SICK_AG_AGFL, XFS_AG_GEOM_SICK_AGFL },
  377. { XFS_SICK_AG_AGI, XFS_AG_GEOM_SICK_AGI },
  378. { XFS_SICK_AG_BNOBT, XFS_AG_GEOM_SICK_BNOBT },
  379. { XFS_SICK_AG_CNTBT, XFS_AG_GEOM_SICK_CNTBT },
  380. { XFS_SICK_AG_INOBT, XFS_AG_GEOM_SICK_INOBT },
  381. { XFS_SICK_AG_FINOBT, XFS_AG_GEOM_SICK_FINOBT },
  382. { XFS_SICK_AG_RMAPBT, XFS_AG_GEOM_SICK_RMAPBT },
  383. { XFS_SICK_AG_REFCNTBT, XFS_AG_GEOM_SICK_REFCNTBT },
  384. { XFS_SICK_AG_INODES, XFS_AG_GEOM_SICK_INODES },
  385. { 0, 0 },
  386. };
  387. /* Fill out ag geometry health info. */
  388. void
  389. xfs_ag_geom_health(
  390. struct xfs_perag *pag,
  391. struct xfs_ag_geometry *ageo)
  392. {
  393. const struct ioctl_sick_map *m;
  394. unsigned int sick;
  395. unsigned int checked;
  396. ageo->ag_sick = 0;
  397. ageo->ag_checked = 0;
  398. xfs_ag_measure_sickness(pag, &sick, &checked);
  399. for (m = ag_map; m->sick_mask; m++) {
  400. if (checked & m->sick_mask)
  401. ageo->ag_checked |= m->ioctl_mask;
  402. if (sick & m->sick_mask)
  403. ageo->ag_sick |= m->ioctl_mask;
  404. }
  405. }
  406. static const struct ioctl_sick_map ino_map[] = {
  407. { XFS_SICK_INO_CORE, XFS_BS_SICK_INODE },
  408. { XFS_SICK_INO_BMBTD, XFS_BS_SICK_BMBTD },
  409. { XFS_SICK_INO_BMBTA, XFS_BS_SICK_BMBTA },
  410. { XFS_SICK_INO_BMBTC, XFS_BS_SICK_BMBTC },
  411. { XFS_SICK_INO_DIR, XFS_BS_SICK_DIR },
  412. { XFS_SICK_INO_XATTR, XFS_BS_SICK_XATTR },
  413. { XFS_SICK_INO_SYMLINK, XFS_BS_SICK_SYMLINK },
  414. { XFS_SICK_INO_PARENT, XFS_BS_SICK_PARENT },
  415. { XFS_SICK_INO_BMBTD_ZAPPED, XFS_BS_SICK_BMBTD },
  416. { XFS_SICK_INO_BMBTA_ZAPPED, XFS_BS_SICK_BMBTA },
  417. { XFS_SICK_INO_DIR_ZAPPED, XFS_BS_SICK_DIR },
  418. { XFS_SICK_INO_SYMLINK_ZAPPED, XFS_BS_SICK_SYMLINK },
  419. { XFS_SICK_INO_DIRTREE, XFS_BS_SICK_DIRTREE },
  420. { 0, 0 },
  421. };
  422. /* Fill out bulkstat health info. */
  423. void
  424. xfs_bulkstat_health(
  425. struct xfs_inode *ip,
  426. struct xfs_bulkstat *bs)
  427. {
  428. const struct ioctl_sick_map *m;
  429. unsigned int sick;
  430. unsigned int checked;
  431. bs->bs_sick = 0;
  432. bs->bs_checked = 0;
  433. xfs_inode_measure_sickness(ip, &sick, &checked);
  434. for (m = ino_map; m->sick_mask; m++) {
  435. if (checked & m->sick_mask)
  436. bs->bs_checked |= m->ioctl_mask;
  437. if (sick & m->sick_mask)
  438. bs->bs_sick |= m->ioctl_mask;
  439. }
  440. }
  441. /* Mark a block mapping sick. */
  442. void
  443. xfs_bmap_mark_sick(
  444. struct xfs_inode *ip,
  445. int whichfork)
  446. {
  447. unsigned int mask;
  448. switch (whichfork) {
  449. case XFS_DATA_FORK:
  450. mask = XFS_SICK_INO_BMBTD;
  451. break;
  452. case XFS_ATTR_FORK:
  453. mask = XFS_SICK_INO_BMBTA;
  454. break;
  455. case XFS_COW_FORK:
  456. mask = XFS_SICK_INO_BMBTC;
  457. break;
  458. default:
  459. ASSERT(0);
  460. return;
  461. }
  462. xfs_inode_mark_sick(ip, mask);
  463. }
  464. /* Record observations of btree corruption with the health tracking system. */
  465. void
  466. xfs_btree_mark_sick(
  467. struct xfs_btree_cur *cur)
  468. {
  469. switch (cur->bc_ops->type) {
  470. case XFS_BTREE_TYPE_MEM:
  471. /* no health state tracking for ephemeral btrees */
  472. return;
  473. case XFS_BTREE_TYPE_AG:
  474. ASSERT(cur->bc_ops->sick_mask);
  475. xfs_ag_mark_sick(cur->bc_ag.pag, cur->bc_ops->sick_mask);
  476. return;
  477. case XFS_BTREE_TYPE_INODE:
  478. if (xfs_btree_is_bmap(cur->bc_ops)) {
  479. xfs_bmap_mark_sick(cur->bc_ino.ip,
  480. cur->bc_ino.whichfork);
  481. return;
  482. }
  483. fallthrough;
  484. default:
  485. ASSERT(0);
  486. return;
  487. }
  488. }
  489. /*
  490. * Record observations of dir/attr btree corruption with the health tracking
  491. * system.
  492. */
  493. void
  494. xfs_dirattr_mark_sick(
  495. struct xfs_inode *ip,
  496. int whichfork)
  497. {
  498. unsigned int mask;
  499. switch (whichfork) {
  500. case XFS_DATA_FORK:
  501. mask = XFS_SICK_INO_DIR;
  502. break;
  503. case XFS_ATTR_FORK:
  504. mask = XFS_SICK_INO_XATTR;
  505. break;
  506. default:
  507. ASSERT(0);
  508. return;
  509. }
  510. xfs_inode_mark_sick(ip, mask);
  511. }
  512. /*
  513. * Record observations of dir/attr btree corruption with the health tracking
  514. * system.
  515. */
  516. void
  517. xfs_da_mark_sick(
  518. struct xfs_da_args *args)
  519. {
  520. xfs_dirattr_mark_sick(args->dp, args->whichfork);
  521. }