inode_repair.c 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <djwong@kernel.org>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_trans_resv.h"
  11. #include "xfs_mount.h"
  12. #include "xfs_defer.h"
  13. #include "xfs_btree.h"
  14. #include "xfs_bit.h"
  15. #include "xfs_log_format.h"
  16. #include "xfs_trans.h"
  17. #include "xfs_sb.h"
  18. #include "xfs_inode.h"
  19. #include "xfs_icache.h"
  20. #include "xfs_inode_buf.h"
  21. #include "xfs_inode_fork.h"
  22. #include "xfs_ialloc.h"
  23. #include "xfs_da_format.h"
  24. #include "xfs_reflink.h"
  25. #include "xfs_alloc.h"
  26. #include "xfs_rmap.h"
  27. #include "xfs_rmap_btree.h"
  28. #include "xfs_bmap.h"
  29. #include "xfs_bmap_btree.h"
  30. #include "xfs_bmap_util.h"
  31. #include "xfs_dir2.h"
  32. #include "xfs_dir2_priv.h"
  33. #include "xfs_quota_defs.h"
  34. #include "xfs_quota.h"
  35. #include "xfs_ag.h"
  36. #include "xfs_rtbitmap.h"
  37. #include "xfs_attr_leaf.h"
  38. #include "xfs_log_priv.h"
  39. #include "xfs_health.h"
  40. #include "xfs_symlink_remote.h"
  41. #include "scrub/xfs_scrub.h"
  42. #include "scrub/scrub.h"
  43. #include "scrub/common.h"
  44. #include "scrub/btree.h"
  45. #include "scrub/trace.h"
  46. #include "scrub/repair.h"
  47. #include "scrub/iscan.h"
  48. #include "scrub/readdir.h"
  49. #include "scrub/tempfile.h"
  50. /*
  51. * Inode Record Repair
  52. * ===================
  53. *
  54. * Roughly speaking, inode problems can be classified based on whether or not
  55. * they trip the dinode verifiers. If those trip, then we won't be able to
  56. * xfs_iget ourselves the inode.
  57. *
  58. * Therefore, the xrep_dinode_* functions fix anything that will cause the
  59. * inode buffer verifier or the dinode verifier. The xrep_inode_* functions
  60. * fix things on live incore inodes. The inode repair functions make decisions
  61. * with security and usability implications when reviving a file:
  62. *
  63. * - Files with zero di_mode or a garbage di_mode are converted to regular file
  64. * that only root can read. This file may not actually contain user data,
  65. * if the file was not previously a regular file. Setuid and setgid bits
  66. * are cleared.
  67. *
  68. * - Zero-size directories can be truncated to look empty. It is necessary to
  69. * run the bmapbtd and directory repair functions to fully rebuild the
  70. * directory.
  71. *
  72. * - Zero-size symbolic link targets can be truncated to '?'. It is necessary
  73. * to run the bmapbtd and symlink repair functions to salvage the symlink.
  74. *
  75. * - Invalid extent size hints will be removed.
  76. *
  77. * - Quotacheck will be scheduled if we repaired an inode that was so badly
  78. * damaged that the ondisk inode had to be rebuilt.
  79. *
  80. * - Invalid user, group, or project IDs (aka -1U) will be reset to zero.
  81. * Setuid and setgid bits are cleared.
  82. *
  83. * - Data and attr forks are reset to extents format with zero extents if the
  84. * fork data is inconsistent. It is necessary to run the bmapbtd or bmapbta
  85. * repair functions to recover the space mapping.
  86. *
  87. * - ACLs will not be recovered if the attr fork is zapped or the extended
  88. * attribute structure itself requires salvaging.
  89. *
  90. * - If the attr fork is zapped, the user and group ids are reset to root and
  91. * the setuid and setgid bits are removed.
  92. */
  93. /*
  94. * All the information we need to repair the ondisk inode if we can't iget the
  95. * incore inode. We don't allocate this buffer unless we're going to perform
  96. * a repair to the ondisk inode cluster buffer.
  97. */
  98. struct xrep_inode {
  99. /* Inode mapping that we saved from the initial lookup attempt. */
  100. struct xfs_imap imap;
  101. struct xfs_scrub *sc;
  102. /* Blocks in use on the data device by data extents or bmbt blocks. */
  103. xfs_rfsblock_t data_blocks;
  104. /* Blocks in use on the rt device. */
  105. xfs_rfsblock_t rt_blocks;
  106. /* Blocks in use by the attr fork. */
  107. xfs_rfsblock_t attr_blocks;
  108. /* Number of data device extents for the data fork. */
  109. xfs_extnum_t data_extents;
  110. /*
  111. * Number of realtime device extents for the data fork. If
  112. * data_extents and rt_extents indicate that the data fork has extents
  113. * on both devices, we'll just back away slowly.
  114. */
  115. xfs_extnum_t rt_extents;
  116. /* Number of (data device) extents for the attr fork. */
  117. xfs_aextnum_t attr_extents;
  118. /* Sick state to set after zapping parts of the inode. */
  119. unsigned int ino_sick_mask;
  120. /* Must we remove all access from this file? */
  121. bool zap_acls;
  122. /* Inode scanner to see if we can find the ftype from dirents */
  123. struct xchk_iscan ftype_iscan;
  124. uint8_t alleged_ftype;
  125. };
  126. /*
  127. * Setup function for inode repair. @imap contains the ondisk inode mapping
  128. * information so that we can correct the ondisk inode cluster buffer if
  129. * necessary to make iget work.
  130. */
  131. int
  132. xrep_setup_inode(
  133. struct xfs_scrub *sc,
  134. const struct xfs_imap *imap)
  135. {
  136. struct xrep_inode *ri;
  137. sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS);
  138. if (!sc->buf)
  139. return -ENOMEM;
  140. ri = sc->buf;
  141. memcpy(&ri->imap, imap, sizeof(struct xfs_imap));
  142. ri->sc = sc;
  143. return 0;
  144. }
  145. /*
  146. * Make sure this ondisk inode can pass the inode buffer verifier. This is
  147. * not the same as the dinode verifier.
  148. */
  149. STATIC void
  150. xrep_dinode_buf_core(
  151. struct xfs_scrub *sc,
  152. struct xfs_buf *bp,
  153. unsigned int ioffset)
  154. {
  155. struct xfs_dinode *dip = xfs_buf_offset(bp, ioffset);
  156. struct xfs_trans *tp = sc->tp;
  157. struct xfs_mount *mp = sc->mp;
  158. xfs_agino_t agino;
  159. bool crc_ok = false;
  160. bool magic_ok = false;
  161. bool unlinked_ok = false;
  162. agino = be32_to_cpu(dip->di_next_unlinked);
  163. if (xfs_verify_agino_or_null(bp->b_pag, agino))
  164. unlinked_ok = true;
  165. if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
  166. xfs_dinode_good_version(mp, dip->di_version))
  167. magic_ok = true;
  168. if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
  169. XFS_DINODE_CRC_OFF))
  170. crc_ok = true;
  171. if (magic_ok && unlinked_ok && crc_ok)
  172. return;
  173. if (!magic_ok) {
  174. dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
  175. dip->di_version = 3;
  176. }
  177. if (!unlinked_ok)
  178. dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
  179. xfs_dinode_calc_crc(mp, dip);
  180. xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
  181. xfs_trans_log_buf(tp, bp, ioffset,
  182. ioffset + sizeof(struct xfs_dinode) - 1);
  183. }
  184. /* Make sure this inode cluster buffer can pass the inode buffer verifier. */
  185. STATIC void
  186. xrep_dinode_buf(
  187. struct xfs_scrub *sc,
  188. struct xfs_buf *bp)
  189. {
  190. struct xfs_mount *mp = sc->mp;
  191. int i;
  192. int ni;
  193. ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
  194. for (i = 0; i < ni; i++)
  195. xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog);
  196. }
  197. /* Reinitialize things that never change in an inode. */
  198. STATIC void
  199. xrep_dinode_header(
  200. struct xfs_scrub *sc,
  201. struct xfs_dinode *dip)
  202. {
  203. trace_xrep_dinode_header(sc, dip);
  204. dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
  205. if (!xfs_dinode_good_version(sc->mp, dip->di_version))
  206. dip->di_version = 3;
  207. dip->di_ino = cpu_to_be64(sc->sm->sm_ino);
  208. uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid);
  209. dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
  210. }
  211. /*
  212. * If this directory entry points to the scrub target inode, then the directory
  213. * we're scanning is the parent of the scrub target inode.
  214. */
  215. STATIC int
  216. xrep_dinode_findmode_dirent(
  217. struct xfs_scrub *sc,
  218. struct xfs_inode *dp,
  219. xfs_dir2_dataptr_t dapos,
  220. const struct xfs_name *name,
  221. xfs_ino_t ino,
  222. void *priv)
  223. {
  224. struct xrep_inode *ri = priv;
  225. int error = 0;
  226. if (xchk_should_terminate(ri->sc, &error))
  227. return error;
  228. if (ino != sc->sm->sm_ino)
  229. return 0;
  230. /* Ignore garbage directory entry names. */
  231. if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len))
  232. return -EFSCORRUPTED;
  233. /* Don't pick up dot or dotdot entries; we only want child dirents. */
  234. if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
  235. xfs_dir2_samename(name, &xfs_name_dot))
  236. return 0;
  237. /*
  238. * Uhoh, more than one parent for this inode and they don't agree on
  239. * the file type?
  240. */
  241. if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN &&
  242. ri->alleged_ftype != name->type) {
  243. trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type,
  244. ri->alleged_ftype);
  245. return -EFSCORRUPTED;
  246. }
  247. /* We found a potential parent; remember the ftype. */
  248. trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type);
  249. ri->alleged_ftype = name->type;
  250. return 0;
  251. }
  252. /* Try to lock a directory, or wait a jiffy. */
  253. static inline int
  254. xrep_dinode_ilock_nowait(
  255. struct xfs_inode *dp,
  256. unsigned int lock_mode)
  257. {
  258. if (xfs_ilock_nowait(dp, lock_mode))
  259. return true;
  260. schedule_timeout_killable(1);
  261. return false;
  262. }
  263. /*
  264. * Try to lock a directory to look for ftype hints. Since we already hold the
  265. * AGI buffer, we cannot block waiting for the ILOCK because rename can take
  266. * the ILOCK and then try to lock AGIs.
  267. */
  268. STATIC int
  269. xrep_dinode_trylock_directory(
  270. struct xrep_inode *ri,
  271. struct xfs_inode *dp,
  272. unsigned int *lock_modep)
  273. {
  274. unsigned long deadline = jiffies + msecs_to_jiffies(30000);
  275. unsigned int lock_mode;
  276. int error = 0;
  277. do {
  278. if (xchk_should_terminate(ri->sc, &error))
  279. return error;
  280. if (xfs_need_iread_extents(&dp->i_df))
  281. lock_mode = XFS_ILOCK_EXCL;
  282. else
  283. lock_mode = XFS_ILOCK_SHARED;
  284. if (xrep_dinode_ilock_nowait(dp, lock_mode)) {
  285. *lock_modep = lock_mode;
  286. return 0;
  287. }
  288. } while (!time_is_before_jiffies(deadline));
  289. return -EBUSY;
  290. }
  291. /*
  292. * If this is a directory, walk the dirents looking for any that point to the
  293. * scrub target inode.
  294. */
  295. STATIC int
  296. xrep_dinode_findmode_walk_directory(
  297. struct xrep_inode *ri,
  298. struct xfs_inode *dp)
  299. {
  300. struct xfs_scrub *sc = ri->sc;
  301. unsigned int lock_mode;
  302. int error = 0;
  303. /* Ignore temporary repair directories. */
  304. if (xrep_is_tempfile(dp))
  305. return 0;
  306. /*
  307. * Scan the directory to see if there it contains an entry pointing to
  308. * the directory that we are repairing.
  309. */
  310. error = xrep_dinode_trylock_directory(ri, dp, &lock_mode);
  311. if (error)
  312. return error;
  313. /*
  314. * If this directory is known to be sick, we cannot scan it reliably
  315. * and must abort.
  316. */
  317. if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE |
  318. XFS_SICK_INO_BMBTD |
  319. XFS_SICK_INO_DIR)) {
  320. error = -EFSCORRUPTED;
  321. goto out_unlock;
  322. }
  323. /*
  324. * We cannot complete our parent pointer scan if a directory looks as
  325. * though it has been zapped by the inode record repair code.
  326. */
  327. if (xchk_dir_looks_zapped(dp)) {
  328. error = -EBUSY;
  329. goto out_unlock;
  330. }
  331. error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri);
  332. if (error)
  333. goto out_unlock;
  334. out_unlock:
  335. xfs_iunlock(dp, lock_mode);
  336. return error;
  337. }
  338. /*
  339. * Try to find the mode of the inode being repaired by looking for directories
  340. * that point down to this file.
  341. */
  342. STATIC int
  343. xrep_dinode_find_mode(
  344. struct xrep_inode *ri,
  345. uint16_t *mode)
  346. {
  347. struct xfs_scrub *sc = ri->sc;
  348. struct xfs_inode *dp;
  349. int error;
  350. /* No ftype means we have no other metadata to consult. */
  351. if (!xfs_has_ftype(sc->mp)) {
  352. *mode = S_IFREG;
  353. return 0;
  354. }
  355. /*
  356. * Scan all directories for parents that might point down to this
  357. * inode. Skip the inode being repaired during the scan since it
  358. * cannot be its own parent. Note that we still hold the AGI locked
  359. * so there's a real possibility that _iscan_iter can return EBUSY.
  360. */
  361. xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan);
  362. xchk_iscan_set_agi_trylock(&ri->ftype_iscan);
  363. ri->ftype_iscan.skip_ino = sc->sm->sm_ino;
  364. ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN;
  365. while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) {
  366. if (S_ISDIR(VFS_I(dp)->i_mode))
  367. error = xrep_dinode_findmode_walk_directory(ri, dp);
  368. xchk_iscan_mark_visited(&ri->ftype_iscan, dp);
  369. xchk_irele(sc, dp);
  370. if (error < 0)
  371. break;
  372. if (xchk_should_terminate(sc, &error))
  373. break;
  374. }
  375. xchk_iscan_iter_finish(&ri->ftype_iscan);
  376. xchk_iscan_teardown(&ri->ftype_iscan);
  377. if (error == -EBUSY) {
  378. if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) {
  379. /*
  380. * If we got an EBUSY after finding at least one
  381. * dirent, that means the scan found an inode on the
  382. * inactivation list and could not open it. Accept the
  383. * alleged ftype and install a new mode below.
  384. */
  385. error = 0;
  386. } else if (!(sc->flags & XCHK_TRY_HARDER)) {
  387. /*
  388. * Otherwise, retry the operation one time to see if
  389. * the reason for the delay is an inode from the same
  390. * cluster buffer waiting on the inactivation list.
  391. */
  392. error = -EDEADLOCK;
  393. }
  394. }
  395. if (error)
  396. return error;
  397. /*
  398. * Convert the discovered ftype into the file mode. If all else fails,
  399. * return S_IFREG.
  400. */
  401. switch (ri->alleged_ftype) {
  402. case XFS_DIR3_FT_DIR:
  403. *mode = S_IFDIR;
  404. break;
  405. case XFS_DIR3_FT_WHT:
  406. case XFS_DIR3_FT_CHRDEV:
  407. *mode = S_IFCHR;
  408. break;
  409. case XFS_DIR3_FT_BLKDEV:
  410. *mode = S_IFBLK;
  411. break;
  412. case XFS_DIR3_FT_FIFO:
  413. *mode = S_IFIFO;
  414. break;
  415. case XFS_DIR3_FT_SOCK:
  416. *mode = S_IFSOCK;
  417. break;
  418. case XFS_DIR3_FT_SYMLINK:
  419. *mode = S_IFLNK;
  420. break;
  421. default:
  422. *mode = S_IFREG;
  423. break;
  424. }
  425. return 0;
  426. }
  427. /* Turn di_mode into /something/ recognizable. Returns true if we succeed. */
  428. STATIC int
  429. xrep_dinode_mode(
  430. struct xrep_inode *ri,
  431. struct xfs_dinode *dip)
  432. {
  433. struct xfs_scrub *sc = ri->sc;
  434. uint16_t mode = be16_to_cpu(dip->di_mode);
  435. int error;
  436. trace_xrep_dinode_mode(sc, dip);
  437. if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
  438. return 0;
  439. /* Try to fix the mode. If we cannot, then leave everything alone. */
  440. error = xrep_dinode_find_mode(ri, &mode);
  441. switch (error) {
  442. case -EINTR:
  443. case -EBUSY:
  444. case -EDEADLOCK:
  445. /* temporary failure or fatal signal */
  446. return error;
  447. case 0:
  448. /* found mode */
  449. break;
  450. default:
  451. /* some other error, assume S_IFREG */
  452. mode = S_IFREG;
  453. break;
  454. }
  455. /* bad mode, so we set it to a file that only root can read */
  456. dip->di_mode = cpu_to_be16(mode);
  457. dip->di_uid = 0;
  458. dip->di_gid = 0;
  459. ri->zap_acls = true;
  460. return 0;
  461. }
  462. /* Fix unused link count fields having nonzero values. */
  463. STATIC void
  464. xrep_dinode_nlinks(
  465. struct xfs_dinode *dip)
  466. {
  467. if (dip->di_version > 1)
  468. dip->di_onlink = 0;
  469. else
  470. dip->di_nlink = 0;
  471. }
  472. /* Fix any conflicting flags that the verifiers complain about. */
  473. STATIC void
  474. xrep_dinode_flags(
  475. struct xfs_scrub *sc,
  476. struct xfs_dinode *dip,
  477. bool isrt)
  478. {
  479. struct xfs_mount *mp = sc->mp;
  480. uint64_t flags2 = be64_to_cpu(dip->di_flags2);
  481. uint16_t flags = be16_to_cpu(dip->di_flags);
  482. uint16_t mode = be16_to_cpu(dip->di_mode);
  483. trace_xrep_dinode_flags(sc, dip);
  484. if (isrt)
  485. flags |= XFS_DIFLAG_REALTIME;
  486. else
  487. flags &= ~XFS_DIFLAG_REALTIME;
  488. /*
  489. * For regular files on a reflink filesystem, set the REFLINK flag to
  490. * protect shared extents. A later stage will actually check those
  491. * extents and clear the flag if possible.
  492. */
  493. if (xfs_has_reflink(mp) && S_ISREG(mode))
  494. flags2 |= XFS_DIFLAG2_REFLINK;
  495. else
  496. flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE);
  497. if (flags & XFS_DIFLAG_REALTIME)
  498. flags2 &= ~XFS_DIFLAG2_REFLINK;
  499. if (!xfs_has_bigtime(mp))
  500. flags2 &= ~XFS_DIFLAG2_BIGTIME;
  501. if (!xfs_has_large_extent_counts(mp))
  502. flags2 &= ~XFS_DIFLAG2_NREXT64;
  503. if (flags2 & XFS_DIFLAG2_NREXT64)
  504. dip->di_nrext64_pad = 0;
  505. else if (dip->di_version >= 3)
  506. dip->di_v3_pad = 0;
  507. dip->di_flags = cpu_to_be16(flags);
  508. dip->di_flags2 = cpu_to_be64(flags2);
  509. }
  510. /*
  511. * Blow out symlink; now it points nowhere. We don't have to worry about
  512. * incore state because this inode is failing the verifiers.
  513. */
  514. STATIC void
  515. xrep_dinode_zap_symlink(
  516. struct xrep_inode *ri,
  517. struct xfs_dinode *dip)
  518. {
  519. struct xfs_scrub *sc = ri->sc;
  520. char *p;
  521. trace_xrep_dinode_zap_symlink(sc, dip);
  522. dip->di_format = XFS_DINODE_FMT_LOCAL;
  523. dip->di_size = cpu_to_be64(1);
  524. p = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
  525. *p = '?';
  526. ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED;
  527. }
  528. /*
  529. * Blow out dir, make the parent point to the root. In the future repair will
  530. * reconstruct this directory for us. Note that there's no in-core directory
  531. * inode because the sf verifier tripped, so we don't have to worry about the
  532. * dentry cache.
  533. */
  534. STATIC void
  535. xrep_dinode_zap_dir(
  536. struct xrep_inode *ri,
  537. struct xfs_dinode *dip)
  538. {
  539. struct xfs_scrub *sc = ri->sc;
  540. struct xfs_mount *mp = sc->mp;
  541. struct xfs_dir2_sf_hdr *sfp;
  542. int i8count;
  543. trace_xrep_dinode_zap_dir(sc, dip);
  544. dip->di_format = XFS_DINODE_FMT_LOCAL;
  545. i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM;
  546. sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
  547. sfp->count = 0;
  548. sfp->i8count = i8count;
  549. xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
  550. dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count));
  551. ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED;
  552. }
  553. /* Make sure we don't have a garbage file size. */
  554. STATIC void
  555. xrep_dinode_size(
  556. struct xrep_inode *ri,
  557. struct xfs_dinode *dip)
  558. {
  559. struct xfs_scrub *sc = ri->sc;
  560. uint64_t size = be64_to_cpu(dip->di_size);
  561. uint16_t mode = be16_to_cpu(dip->di_mode);
  562. trace_xrep_dinode_size(sc, dip);
  563. switch (mode & S_IFMT) {
  564. case S_IFIFO:
  565. case S_IFCHR:
  566. case S_IFBLK:
  567. case S_IFSOCK:
  568. /* di_size can't be nonzero for special files */
  569. dip->di_size = 0;
  570. break;
  571. case S_IFREG:
  572. /* Regular files can't be larger than 2^63-1 bytes. */
  573. dip->di_size = cpu_to_be64(size & ~(1ULL << 63));
  574. break;
  575. case S_IFLNK:
  576. /*
  577. * Truncate ridiculously oversized symlinks. If the size is
  578. * zero, reset it to point to the current directory. Both of
  579. * these conditions trigger dinode verifier errors, so there
  580. * is no in-core state to reset.
  581. */
  582. if (size > XFS_SYMLINK_MAXLEN)
  583. dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN);
  584. else if (size == 0)
  585. xrep_dinode_zap_symlink(ri, dip);
  586. break;
  587. case S_IFDIR:
  588. /*
  589. * Directories can't have a size larger than 32G. If the size
  590. * is zero, reset it to an empty directory. Both of these
  591. * conditions trigger dinode verifier errors, so there is no
  592. * in-core state to reset.
  593. */
  594. if (size > XFS_DIR2_SPACE_SIZE)
  595. dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE);
  596. else if (size == 0)
  597. xrep_dinode_zap_dir(ri, dip);
  598. break;
  599. }
  600. }
  601. /* Fix extent size hints. */
  602. STATIC void
  603. xrep_dinode_extsize_hints(
  604. struct xfs_scrub *sc,
  605. struct xfs_dinode *dip)
  606. {
  607. struct xfs_mount *mp = sc->mp;
  608. uint64_t flags2 = be64_to_cpu(dip->di_flags2);
  609. uint16_t flags = be16_to_cpu(dip->di_flags);
  610. uint16_t mode = be16_to_cpu(dip->di_mode);
  611. xfs_failaddr_t fa;
  612. trace_xrep_dinode_extsize_hints(sc, dip);
  613. fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
  614. mode, flags);
  615. if (fa) {
  616. dip->di_extsize = 0;
  617. dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE |
  618. XFS_DIFLAG_EXTSZINHERIT);
  619. }
  620. if (dip->di_version < 3)
  621. return;
  622. fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
  623. mode, flags, flags2);
  624. if (fa) {
  625. dip->di_cowextsize = 0;
  626. dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE);
  627. }
  628. }
  629. /* Count extents and blocks for an inode given an rmap. */
  630. STATIC int
  631. xrep_dinode_walk_rmap(
  632. struct xfs_btree_cur *cur,
  633. const struct xfs_rmap_irec *rec,
  634. void *priv)
  635. {
  636. struct xrep_inode *ri = priv;
  637. int error = 0;
  638. if (xchk_should_terminate(ri->sc, &error))
  639. return error;
  640. /* We only care about this inode. */
  641. if (rec->rm_owner != ri->sc->sm->sm_ino)
  642. return 0;
  643. if (rec->rm_flags & XFS_RMAP_ATTR_FORK) {
  644. ri->attr_blocks += rec->rm_blockcount;
  645. if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
  646. ri->attr_extents++;
  647. return 0;
  648. }
  649. ri->data_blocks += rec->rm_blockcount;
  650. if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
  651. ri->data_extents++;
  652. return 0;
  653. }
  654. /* Count extents and blocks for an inode from all AG rmap data. */
  655. STATIC int
  656. xrep_dinode_count_ag_rmaps(
  657. struct xrep_inode *ri,
  658. struct xfs_perag *pag)
  659. {
  660. struct xfs_btree_cur *cur;
  661. struct xfs_buf *agf;
  662. int error;
  663. error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf);
  664. if (error)
  665. return error;
  666. cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag);
  667. error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri);
  668. xfs_btree_del_cursor(cur, error);
  669. xfs_trans_brelse(ri->sc->tp, agf);
  670. return error;
  671. }
  672. /* Count extents and blocks for a given inode from all rmap data. */
  673. STATIC int
  674. xrep_dinode_count_rmaps(
  675. struct xrep_inode *ri)
  676. {
  677. struct xfs_perag *pag;
  678. xfs_agnumber_t agno;
  679. int error;
  680. if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp))
  681. return -EOPNOTSUPP;
  682. for_each_perag(ri->sc->mp, agno, pag) {
  683. error = xrep_dinode_count_ag_rmaps(ri, pag);
  684. if (error) {
  685. xfs_perag_rele(pag);
  686. return error;
  687. }
  688. }
  689. /* Can't have extents on both the rt and the data device. */
  690. if (ri->data_extents && ri->rt_extents)
  691. return -EFSCORRUPTED;
  692. trace_xrep_dinode_count_rmaps(ri->sc,
  693. ri->data_blocks, ri->rt_blocks, ri->attr_blocks,
  694. ri->data_extents, ri->rt_extents, ri->attr_extents);
  695. return 0;
  696. }
  697. /* Return true if this extents-format ifork looks like garbage. */
  698. STATIC bool
  699. xrep_dinode_bad_extents_fork(
  700. struct xfs_scrub *sc,
  701. struct xfs_dinode *dip,
  702. unsigned int dfork_size,
  703. int whichfork)
  704. {
  705. struct xfs_bmbt_irec new;
  706. struct xfs_bmbt_rec *dp;
  707. xfs_extnum_t nex;
  708. bool isrt;
  709. unsigned int i;
  710. nex = xfs_dfork_nextents(dip, whichfork);
  711. if (nex > dfork_size / sizeof(struct xfs_bmbt_rec))
  712. return true;
  713. dp = XFS_DFORK_PTR(dip, whichfork);
  714. isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME);
  715. for (i = 0; i < nex; i++, dp++) {
  716. xfs_failaddr_t fa;
  717. xfs_bmbt_disk_get_all(dp, &new);
  718. fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork,
  719. &new);
  720. if (fa)
  721. return true;
  722. }
  723. return false;
  724. }
  725. /* Return true if this btree-format ifork looks like garbage. */
  726. STATIC bool
  727. xrep_dinode_bad_bmbt_fork(
  728. struct xfs_scrub *sc,
  729. struct xfs_dinode *dip,
  730. unsigned int dfork_size,
  731. int whichfork)
  732. {
  733. struct xfs_bmdr_block *dfp;
  734. xfs_extnum_t nex;
  735. unsigned int i;
  736. unsigned int dmxr;
  737. unsigned int nrecs;
  738. unsigned int level;
  739. nex = xfs_dfork_nextents(dip, whichfork);
  740. if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec))
  741. return true;
  742. if (dfork_size < sizeof(struct xfs_bmdr_block))
  743. return true;
  744. dfp = XFS_DFORK_PTR(dip, whichfork);
  745. nrecs = be16_to_cpu(dfp->bb_numrecs);
  746. level = be16_to_cpu(dfp->bb_level);
  747. if (nrecs == 0 || xfs_bmdr_space_calc(nrecs) > dfork_size)
  748. return true;
  749. if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork))
  750. return true;
  751. dmxr = xfs_bmdr_maxrecs(dfork_size, 0);
  752. for (i = 1; i <= nrecs; i++) {
  753. struct xfs_bmbt_key *fkp;
  754. xfs_bmbt_ptr_t *fpp;
  755. xfs_fileoff_t fileoff;
  756. xfs_fsblock_t fsbno;
  757. fkp = xfs_bmdr_key_addr(dfp, i);
  758. fileoff = be64_to_cpu(fkp->br_startoff);
  759. if (!xfs_verify_fileoff(sc->mp, fileoff))
  760. return true;
  761. fpp = xfs_bmdr_ptr_addr(dfp, i, dmxr);
  762. fsbno = be64_to_cpu(*fpp);
  763. if (!xfs_verify_fsbno(sc->mp, fsbno))
  764. return true;
  765. }
  766. return false;
  767. }
  768. /*
  769. * Check the data fork for things that will fail the ifork verifiers or the
  770. * ifork formatters.
  771. */
  772. STATIC bool
  773. xrep_dinode_check_dfork(
  774. struct xfs_scrub *sc,
  775. struct xfs_dinode *dip,
  776. uint16_t mode)
  777. {
  778. void *dfork_ptr;
  779. int64_t data_size;
  780. unsigned int fmt;
  781. unsigned int dfork_size;
  782. /*
  783. * Verifier functions take signed int64_t, so check for bogus negative
  784. * values first.
  785. */
  786. data_size = be64_to_cpu(dip->di_size);
  787. if (data_size < 0)
  788. return true;
  789. fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK);
  790. switch (mode & S_IFMT) {
  791. case S_IFIFO:
  792. case S_IFCHR:
  793. case S_IFBLK:
  794. case S_IFSOCK:
  795. if (fmt != XFS_DINODE_FMT_DEV)
  796. return true;
  797. break;
  798. case S_IFREG:
  799. if (fmt == XFS_DINODE_FMT_LOCAL)
  800. return true;
  801. fallthrough;
  802. case S_IFLNK:
  803. case S_IFDIR:
  804. switch (fmt) {
  805. case XFS_DINODE_FMT_LOCAL:
  806. case XFS_DINODE_FMT_EXTENTS:
  807. case XFS_DINODE_FMT_BTREE:
  808. break;
  809. default:
  810. return true;
  811. }
  812. break;
  813. default:
  814. return true;
  815. }
  816. dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK);
  817. dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
  818. switch (fmt) {
  819. case XFS_DINODE_FMT_DEV:
  820. break;
  821. case XFS_DINODE_FMT_LOCAL:
  822. /* dir/symlink structure cannot be larger than the fork */
  823. if (data_size > dfork_size)
  824. return true;
  825. /* directory structure must pass verification. */
  826. if (S_ISDIR(mode) &&
  827. xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL)
  828. return true;
  829. /* symlink structure must pass verification. */
  830. if (S_ISLNK(mode) &&
  831. xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL)
  832. return true;
  833. break;
  834. case XFS_DINODE_FMT_EXTENTS:
  835. if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size,
  836. XFS_DATA_FORK))
  837. return true;
  838. break;
  839. case XFS_DINODE_FMT_BTREE:
  840. if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size,
  841. XFS_DATA_FORK))
  842. return true;
  843. break;
  844. default:
  845. return true;
  846. }
  847. return false;
  848. }
  849. static void
  850. xrep_dinode_set_data_nextents(
  851. struct xfs_dinode *dip,
  852. xfs_extnum_t nextents)
  853. {
  854. if (xfs_dinode_has_large_extent_counts(dip))
  855. dip->di_big_nextents = cpu_to_be64(nextents);
  856. else
  857. dip->di_nextents = cpu_to_be32(nextents);
  858. }
  859. static void
  860. xrep_dinode_set_attr_nextents(
  861. struct xfs_dinode *dip,
  862. xfs_extnum_t nextents)
  863. {
  864. if (xfs_dinode_has_large_extent_counts(dip))
  865. dip->di_big_anextents = cpu_to_be32(nextents);
  866. else
  867. dip->di_anextents = cpu_to_be16(nextents);
  868. }
  869. /* Reset the data fork to something sane. */
  870. STATIC void
  871. xrep_dinode_zap_dfork(
  872. struct xrep_inode *ri,
  873. struct xfs_dinode *dip,
  874. uint16_t mode)
  875. {
  876. struct xfs_scrub *sc = ri->sc;
  877. trace_xrep_dinode_zap_dfork(sc, dip);
  878. ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED;
  879. xrep_dinode_set_data_nextents(dip, 0);
  880. ri->data_blocks = 0;
  881. ri->rt_blocks = 0;
  882. /* Special files always get reset to DEV */
  883. switch (mode & S_IFMT) {
  884. case S_IFIFO:
  885. case S_IFCHR:
  886. case S_IFBLK:
  887. case S_IFSOCK:
  888. dip->di_format = XFS_DINODE_FMT_DEV;
  889. dip->di_size = 0;
  890. return;
  891. }
  892. /*
  893. * If we have data extents, reset to an empty map and hope the user
  894. * will run the bmapbtd checker next.
  895. */
  896. if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) {
  897. dip->di_format = XFS_DINODE_FMT_EXTENTS;
  898. return;
  899. }
  900. /* Otherwise, reset the local format to the minimum. */
  901. switch (mode & S_IFMT) {
  902. case S_IFLNK:
  903. xrep_dinode_zap_symlink(ri, dip);
  904. break;
  905. case S_IFDIR:
  906. xrep_dinode_zap_dir(ri, dip);
  907. break;
  908. }
  909. }
  910. /*
  911. * Check the attr fork for things that will fail the ifork verifiers or the
  912. * ifork formatters.
  913. */
  914. STATIC bool
  915. xrep_dinode_check_afork(
  916. struct xfs_scrub *sc,
  917. struct xfs_dinode *dip)
  918. {
  919. struct xfs_attr_sf_hdr *afork_ptr;
  920. size_t attr_size;
  921. unsigned int afork_size;
  922. if (XFS_DFORK_BOFF(dip) == 0)
  923. return dip->di_aformat != XFS_DINODE_FMT_EXTENTS ||
  924. xfs_dfork_attr_extents(dip) != 0;
  925. afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
  926. afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
  927. switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) {
  928. case XFS_DINODE_FMT_LOCAL:
  929. /* Fork has to be large enough to extract the xattr size. */
  930. if (afork_size < sizeof(struct xfs_attr_sf_hdr))
  931. return true;
  932. /* xattr structure cannot be larger than the fork */
  933. attr_size = be16_to_cpu(afork_ptr->totsize);
  934. if (attr_size > afork_size)
  935. return true;
  936. /* xattr structure must pass verification. */
  937. return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL;
  938. case XFS_DINODE_FMT_EXTENTS:
  939. if (xrep_dinode_bad_extents_fork(sc, dip, afork_size,
  940. XFS_ATTR_FORK))
  941. return true;
  942. break;
  943. case XFS_DINODE_FMT_BTREE:
  944. if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size,
  945. XFS_ATTR_FORK))
  946. return true;
  947. break;
  948. default:
  949. return true;
  950. }
  951. return false;
  952. }
  953. /*
  954. * Reset the attr fork to empty. Since the attr fork could have contained
  955. * ACLs, make the file readable only by root.
  956. */
  957. STATIC void
  958. xrep_dinode_zap_afork(
  959. struct xrep_inode *ri,
  960. struct xfs_dinode *dip,
  961. uint16_t mode)
  962. {
  963. struct xfs_scrub *sc = ri->sc;
  964. trace_xrep_dinode_zap_afork(sc, dip);
  965. ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED;
  966. dip->di_aformat = XFS_DINODE_FMT_EXTENTS;
  967. xrep_dinode_set_attr_nextents(dip, 0);
  968. ri->attr_blocks = 0;
  969. /*
  970. * If the data fork is in btree format, removing the attr fork entirely
  971. * might cause verifier failures if the next level down in the bmbt
  972. * could now fit in the data fork area.
  973. */
  974. if (dip->di_format != XFS_DINODE_FMT_BTREE)
  975. dip->di_forkoff = 0;
  976. dip->di_mode = cpu_to_be16(mode & ~0777);
  977. dip->di_uid = 0;
  978. dip->di_gid = 0;
  979. }
  980. /* Make sure the fork offset is a sensible value. */
  981. STATIC void
  982. xrep_dinode_ensure_forkoff(
  983. struct xrep_inode *ri,
  984. struct xfs_dinode *dip,
  985. uint16_t mode)
  986. {
  987. struct xfs_bmdr_block *bmdr;
  988. struct xfs_scrub *sc = ri->sc;
  989. xfs_extnum_t attr_extents, data_extents;
  990. size_t bmdr_minsz = xfs_bmdr_space_calc(1);
  991. unsigned int lit_sz = XFS_LITINO(sc->mp);
  992. unsigned int afork_min, dfork_min;
  993. trace_xrep_dinode_ensure_forkoff(sc, dip);
  994. /*
  995. * Before calling this function, xrep_dinode_core ensured that both
  996. * forks actually fit inside their respective literal areas. If this
  997. * was not the case, the fork was reset to FMT_EXTENTS with zero
  998. * records. If the rmapbt scan found attr or data fork blocks, this
  999. * will be noted in the dinode_stats, and we must leave enough room
  1000. * for the bmap repair code to reconstruct the mapping structure.
  1001. *
  1002. * First, compute the minimum space required for the attr fork.
  1003. */
  1004. switch (dip->di_aformat) {
  1005. case XFS_DINODE_FMT_LOCAL:
  1006. /*
  1007. * If we still have a shortform xattr structure at all, that
  1008. * means the attr fork area was exactly large enough to fit
  1009. * the sf structure.
  1010. */
  1011. afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
  1012. break;
  1013. case XFS_DINODE_FMT_EXTENTS:
  1014. attr_extents = xfs_dfork_attr_extents(dip);
  1015. if (attr_extents) {
  1016. /*
  1017. * We must maintain sufficient space to hold the entire
  1018. * extent map array in the data fork. Note that we
  1019. * previously zapped the fork if it had no chance of
  1020. * fitting in the inode.
  1021. */
  1022. afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents;
  1023. } else if (ri->attr_extents > 0) {
  1024. /*
  1025. * The attr fork thinks it has zero extents, but we
  1026. * found some xattr extents. We need to leave enough
  1027. * empty space here so that the incore attr fork will
  1028. * get created (and hence trigger the attr fork bmap
  1029. * repairer).
  1030. */
  1031. afork_min = bmdr_minsz;
  1032. } else {
  1033. /* No extents on disk or found in rmapbt. */
  1034. afork_min = 0;
  1035. }
  1036. break;
  1037. case XFS_DINODE_FMT_BTREE:
  1038. /* Must have space for btree header and key/pointers. */
  1039. bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
  1040. afork_min = xfs_bmap_broot_space(sc->mp, bmdr);
  1041. break;
  1042. default:
  1043. /* We should never see any other formats. */
  1044. afork_min = 0;
  1045. break;
  1046. }
  1047. /* Compute the minimum space required for the data fork. */
  1048. switch (dip->di_format) {
  1049. case XFS_DINODE_FMT_DEV:
  1050. dfork_min = sizeof(__be32);
  1051. break;
  1052. case XFS_DINODE_FMT_UUID:
  1053. dfork_min = sizeof(uuid_t);
  1054. break;
  1055. case XFS_DINODE_FMT_LOCAL:
  1056. /*
  1057. * If we still have a shortform data fork at all, that means
  1058. * the data fork area was large enough to fit whatever was in
  1059. * there.
  1060. */
  1061. dfork_min = be64_to_cpu(dip->di_size);
  1062. break;
  1063. case XFS_DINODE_FMT_EXTENTS:
  1064. data_extents = xfs_dfork_data_extents(dip);
  1065. if (data_extents) {
  1066. /*
  1067. * We must maintain sufficient space to hold the entire
  1068. * extent map array in the data fork. Note that we
  1069. * previously zapped the fork if it had no chance of
  1070. * fitting in the inode.
  1071. */
  1072. dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents;
  1073. } else if (ri->data_extents > 0 || ri->rt_extents > 0) {
  1074. /*
  1075. * The data fork thinks it has zero extents, but we
  1076. * found some data extents. We need to leave enough
  1077. * empty space here so that the data fork bmap repair
  1078. * will recover the mappings.
  1079. */
  1080. dfork_min = bmdr_minsz;
  1081. } else {
  1082. /* No extents on disk or found in rmapbt. */
  1083. dfork_min = 0;
  1084. }
  1085. break;
  1086. case XFS_DINODE_FMT_BTREE:
  1087. /* Must have space for btree header and key/pointers. */
  1088. bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
  1089. dfork_min = xfs_bmap_broot_space(sc->mp, bmdr);
  1090. break;
  1091. default:
  1092. dfork_min = 0;
  1093. break;
  1094. }
  1095. /*
  1096. * Round all values up to the nearest 8 bytes, because that is the
  1097. * precision of di_forkoff.
  1098. */
  1099. afork_min = roundup(afork_min, 8);
  1100. dfork_min = roundup(dfork_min, 8);
  1101. bmdr_minsz = roundup(bmdr_minsz, 8);
  1102. ASSERT(dfork_min <= lit_sz);
  1103. ASSERT(afork_min <= lit_sz);
  1104. /*
  1105. * If the data fork was zapped and we don't have enough space for the
  1106. * recovery fork, move the attr fork up.
  1107. */
  1108. if (dip->di_format == XFS_DINODE_FMT_EXTENTS &&
  1109. xfs_dfork_data_extents(dip) == 0 &&
  1110. (ri->data_extents > 0 || ri->rt_extents > 0) &&
  1111. bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) {
  1112. if (bmdr_minsz + afork_min > lit_sz) {
  1113. /*
  1114. * The attr for and the stub fork we need to recover
  1115. * the data fork won't both fit. Zap the attr fork.
  1116. */
  1117. xrep_dinode_zap_afork(ri, dip, mode);
  1118. afork_min = bmdr_minsz;
  1119. } else {
  1120. void *before, *after;
  1121. /* Otherwise, just slide the attr fork up. */
  1122. before = XFS_DFORK_APTR(dip);
  1123. dip->di_forkoff = bmdr_minsz >> 3;
  1124. after = XFS_DFORK_APTR(dip);
  1125. memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp));
  1126. }
  1127. }
  1128. /*
  1129. * If the attr fork was zapped and we don't have enough space for the
  1130. * recovery fork, move the attr fork down.
  1131. */
  1132. if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS &&
  1133. xfs_dfork_attr_extents(dip) == 0 &&
  1134. ri->attr_extents > 0 &&
  1135. bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) {
  1136. if (dip->di_format == XFS_DINODE_FMT_BTREE) {
  1137. /*
  1138. * If the data fork is in btree format then we can't
  1139. * adjust forkoff because that runs the risk of
  1140. * violating the extents/btree format transition rules.
  1141. */
  1142. } else if (bmdr_minsz + dfork_min > lit_sz) {
  1143. /*
  1144. * If we can't move the attr fork, too bad, we lose the
  1145. * attr fork and leak its blocks.
  1146. */
  1147. xrep_dinode_zap_afork(ri, dip, mode);
  1148. } else {
  1149. /*
  1150. * Otherwise, just slide the attr fork down. The attr
  1151. * fork is empty, so we don't have any old contents to
  1152. * move here.
  1153. */
  1154. dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3;
  1155. }
  1156. }
  1157. }
  1158. /*
  1159. * Zap the data/attr forks if we spot anything that isn't going to pass the
  1160. * ifork verifiers or the ifork formatters, because we need to get the inode
  1161. * into good enough shape that the higher level repair functions can run.
  1162. */
  1163. STATIC void
  1164. xrep_dinode_zap_forks(
  1165. struct xrep_inode *ri,
  1166. struct xfs_dinode *dip)
  1167. {
  1168. struct xfs_scrub *sc = ri->sc;
  1169. xfs_extnum_t data_extents;
  1170. xfs_extnum_t attr_extents;
  1171. xfs_filblks_t nblocks;
  1172. uint16_t mode;
  1173. bool zap_datafork = false;
  1174. bool zap_attrfork = ri->zap_acls;
  1175. trace_xrep_dinode_zap_forks(sc, dip);
  1176. mode = be16_to_cpu(dip->di_mode);
  1177. data_extents = xfs_dfork_data_extents(dip);
  1178. attr_extents = xfs_dfork_attr_extents(dip);
  1179. nblocks = be64_to_cpu(dip->di_nblocks);
  1180. /* Inode counters don't make sense? */
  1181. if (data_extents > nblocks)
  1182. zap_datafork = true;
  1183. if (attr_extents > nblocks)
  1184. zap_attrfork = true;
  1185. if (data_extents + attr_extents > nblocks)
  1186. zap_datafork = zap_attrfork = true;
  1187. if (!zap_datafork)
  1188. zap_datafork = xrep_dinode_check_dfork(sc, dip, mode);
  1189. if (!zap_attrfork)
  1190. zap_attrfork = xrep_dinode_check_afork(sc, dip);
  1191. /* Zap whatever's bad. */
  1192. if (zap_attrfork)
  1193. xrep_dinode_zap_afork(ri, dip, mode);
  1194. if (zap_datafork)
  1195. xrep_dinode_zap_dfork(ri, dip, mode);
  1196. xrep_dinode_ensure_forkoff(ri, dip, mode);
  1197. /*
  1198. * Zero di_nblocks if we don't have any extents at all to satisfy the
  1199. * buffer verifier.
  1200. */
  1201. data_extents = xfs_dfork_data_extents(dip);
  1202. attr_extents = xfs_dfork_attr_extents(dip);
  1203. if (data_extents + attr_extents == 0)
  1204. dip->di_nblocks = 0;
  1205. }
  1206. /* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */
  1207. STATIC int
  1208. xrep_dinode_core(
  1209. struct xrep_inode *ri)
  1210. {
  1211. struct xfs_scrub *sc = ri->sc;
  1212. struct xfs_buf *bp;
  1213. struct xfs_dinode *dip;
  1214. xfs_ino_t ino = sc->sm->sm_ino;
  1215. int error;
  1216. int iget_error;
  1217. /* Figure out what this inode had mapped in both forks. */
  1218. error = xrep_dinode_count_rmaps(ri);
  1219. if (error)
  1220. return error;
  1221. /* Read the inode cluster buffer. */
  1222. error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp,
  1223. ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp,
  1224. NULL);
  1225. if (error)
  1226. return error;
  1227. /* Make sure we can pass the inode buffer verifier. */
  1228. xrep_dinode_buf(sc, bp);
  1229. bp->b_ops = &xfs_inode_buf_ops;
  1230. /* Fix everything the verifier will complain about. */
  1231. dip = xfs_buf_offset(bp, ri->imap.im_boffset);
  1232. xrep_dinode_header(sc, dip);
  1233. iget_error = xrep_dinode_mode(ri, dip);
  1234. if (iget_error)
  1235. goto write;
  1236. xrep_dinode_nlinks(dip);
  1237. xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
  1238. xrep_dinode_size(ri, dip);
  1239. xrep_dinode_extsize_hints(sc, dip);
  1240. xrep_dinode_zap_forks(ri, dip);
  1241. write:
  1242. /* Write out the inode. */
  1243. trace_xrep_dinode_fixed(sc, dip);
  1244. xfs_dinode_calc_crc(sc->mp, dip);
  1245. xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF);
  1246. xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset,
  1247. ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1);
  1248. /*
  1249. * In theory, we've fixed the ondisk inode record enough that we should
  1250. * be able to load the inode into the cache. Try to iget that inode
  1251. * now while we hold the AGI and the inode cluster buffer and take the
  1252. * IOLOCK so that we can continue with repairs without anyone else
  1253. * accessing the inode. If iget fails, we still need to commit the
  1254. * changes.
  1255. */
  1256. if (!iget_error)
  1257. iget_error = xchk_iget(sc, ino, &sc->ip);
  1258. if (!iget_error)
  1259. xchk_ilock(sc, XFS_IOLOCK_EXCL);
  1260. /*
  1261. * Commit the inode cluster buffer updates and drop the AGI buffer that
  1262. * we've been holding since scrub setup. From here on out, repairs
  1263. * deal only with the cached inode.
  1264. */
  1265. error = xrep_trans_commit(sc);
  1266. if (error)
  1267. return error;
  1268. if (iget_error)
  1269. return iget_error;
  1270. error = xchk_trans_alloc(sc, 0);
  1271. if (error)
  1272. return error;
  1273. error = xrep_ino_dqattach(sc);
  1274. if (error)
  1275. return error;
  1276. xchk_ilock(sc, XFS_ILOCK_EXCL);
  1277. if (ri->ino_sick_mask)
  1278. xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask);
  1279. return 0;
  1280. }
  1281. /* Fix everything xfs_dinode_verify cares about. */
  1282. STATIC int
  1283. xrep_dinode_problems(
  1284. struct xrep_inode *ri)
  1285. {
  1286. struct xfs_scrub *sc = ri->sc;
  1287. int error;
  1288. error = xrep_dinode_core(ri);
  1289. if (error)
  1290. return error;
  1291. /* We had to fix a totally busted inode, schedule quotacheck. */
  1292. if (XFS_IS_UQUOTA_ON(sc->mp))
  1293. xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
  1294. if (XFS_IS_GQUOTA_ON(sc->mp))
  1295. xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
  1296. if (XFS_IS_PQUOTA_ON(sc->mp))
  1297. xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
  1298. return 0;
  1299. }
  1300. /*
  1301. * Fix problems that the verifiers don't care about. In general these are
  1302. * errors that don't cause problems elsewhere in the kernel that we can easily
  1303. * detect, so we don't check them all that rigorously.
  1304. */
  1305. /* Make sure block and extent counts are ok. */
  1306. STATIC int
  1307. xrep_inode_blockcounts(
  1308. struct xfs_scrub *sc)
  1309. {
  1310. struct xfs_ifork *ifp;
  1311. xfs_filblks_t count;
  1312. xfs_filblks_t acount;
  1313. xfs_extnum_t nextents;
  1314. int error;
  1315. trace_xrep_inode_blockcounts(sc);
  1316. /* Set data fork counters from the data fork mappings. */
  1317. error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
  1318. &nextents, &count);
  1319. if (error)
  1320. return error;
  1321. if (xfs_is_reflink_inode(sc->ip)) {
  1322. /*
  1323. * data fork blockcount can exceed physical storage if a user
  1324. * reflinks the same block over and over again.
  1325. */
  1326. ;
  1327. } else if (XFS_IS_REALTIME_INODE(sc->ip)) {
  1328. if (count >= sc->mp->m_sb.sb_rblocks)
  1329. return -EFSCORRUPTED;
  1330. } else {
  1331. if (count >= sc->mp->m_sb.sb_dblocks)
  1332. return -EFSCORRUPTED;
  1333. }
  1334. error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents);
  1335. if (error)
  1336. return error;
  1337. sc->ip->i_df.if_nextents = nextents;
  1338. /* Set attr fork counters from the attr fork mappings. */
  1339. ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
  1340. if (ifp) {
  1341. error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
  1342. &nextents, &acount);
  1343. if (error)
  1344. return error;
  1345. if (count >= sc->mp->m_sb.sb_dblocks)
  1346. return -EFSCORRUPTED;
  1347. error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK,
  1348. nextents);
  1349. if (error)
  1350. return error;
  1351. ifp->if_nextents = nextents;
  1352. } else {
  1353. acount = 0;
  1354. }
  1355. sc->ip->i_nblocks = count + acount;
  1356. return 0;
  1357. }
  1358. /* Check for invalid uid/gid/prid. */
  1359. STATIC void
  1360. xrep_inode_ids(
  1361. struct xfs_scrub *sc)
  1362. {
  1363. bool dirty = false;
  1364. trace_xrep_inode_ids(sc);
  1365. if (!uid_valid(VFS_I(sc->ip)->i_uid)) {
  1366. i_uid_write(VFS_I(sc->ip), 0);
  1367. dirty = true;
  1368. if (XFS_IS_UQUOTA_ON(sc->mp))
  1369. xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
  1370. }
  1371. if (!gid_valid(VFS_I(sc->ip)->i_gid)) {
  1372. i_gid_write(VFS_I(sc->ip), 0);
  1373. dirty = true;
  1374. if (XFS_IS_GQUOTA_ON(sc->mp))
  1375. xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
  1376. }
  1377. if (sc->ip->i_projid == -1U) {
  1378. sc->ip->i_projid = 0;
  1379. dirty = true;
  1380. if (XFS_IS_PQUOTA_ON(sc->mp))
  1381. xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
  1382. }
  1383. /* strip setuid/setgid if we touched any of the ids */
  1384. if (dirty)
  1385. VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID);
  1386. }
  1387. static inline void
  1388. xrep_clamp_timestamp(
  1389. struct xfs_inode *ip,
  1390. struct timespec64 *ts)
  1391. {
  1392. ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC);
  1393. *ts = timestamp_truncate(*ts, VFS_I(ip));
  1394. }
  1395. /* Nanosecond counters can't have more than 1 billion. */
  1396. STATIC void
  1397. xrep_inode_timestamps(
  1398. struct xfs_inode *ip)
  1399. {
  1400. struct timespec64 tstamp;
  1401. struct inode *inode = VFS_I(ip);
  1402. tstamp = inode_get_atime(inode);
  1403. xrep_clamp_timestamp(ip, &tstamp);
  1404. inode_set_atime_to_ts(inode, tstamp);
  1405. tstamp = inode_get_mtime(inode);
  1406. xrep_clamp_timestamp(ip, &tstamp);
  1407. inode_set_mtime_to_ts(inode, tstamp);
  1408. tstamp = inode_get_ctime(inode);
  1409. xrep_clamp_timestamp(ip, &tstamp);
  1410. inode_set_ctime_to_ts(inode, tstamp);
  1411. xrep_clamp_timestamp(ip, &ip->i_crtime);
  1412. }
  1413. /* Fix inode flags that don't make sense together. */
  1414. STATIC void
  1415. xrep_inode_flags(
  1416. struct xfs_scrub *sc)
  1417. {
  1418. uint16_t mode;
  1419. trace_xrep_inode_flags(sc);
  1420. mode = VFS_I(sc->ip)->i_mode;
  1421. /* Clear junk flags */
  1422. if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY)
  1423. sc->ip->i_diflags &= ~XFS_DIFLAG_ANY;
  1424. /* NEWRTBM only applies to realtime bitmaps */
  1425. if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino)
  1426. sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
  1427. else
  1428. sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM;
  1429. /* These only make sense for directories. */
  1430. if (!S_ISDIR(mode))
  1431. sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT |
  1432. XFS_DIFLAG_EXTSZINHERIT |
  1433. XFS_DIFLAG_PROJINHERIT |
  1434. XFS_DIFLAG_NOSYMLINKS);
  1435. /* These only make sense for files. */
  1436. if (!S_ISREG(mode))
  1437. sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME |
  1438. XFS_DIFLAG_EXTSIZE);
  1439. /* These only make sense for non-rt files. */
  1440. if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
  1441. sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM;
  1442. /* Immutable and append only? Drop the append. */
  1443. if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) &&
  1444. (sc->ip->i_diflags & XFS_DIFLAG_APPEND))
  1445. sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND;
  1446. /* Clear junk flags. */
  1447. if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY)
  1448. sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY;
  1449. /* No reflink flag unless we support it and it's a file. */
  1450. if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode))
  1451. sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
  1452. /* DAX only applies to files and dirs. */
  1453. if (!(S_ISREG(mode) || S_ISDIR(mode)))
  1454. sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
  1455. /* No reflink files on the realtime device. */
  1456. if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
  1457. sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
  1458. }
  1459. /*
  1460. * Fix size problems with block/node format directories. If we fail to find
  1461. * the extent list, just bail out and let the bmapbtd repair functions clean
  1462. * up that mess.
  1463. */
  1464. STATIC void
  1465. xrep_inode_blockdir_size(
  1466. struct xfs_scrub *sc)
  1467. {
  1468. struct xfs_iext_cursor icur;
  1469. struct xfs_bmbt_irec got;
  1470. struct xfs_ifork *ifp;
  1471. xfs_fileoff_t off;
  1472. int error;
  1473. trace_xrep_inode_blockdir_size(sc);
  1474. error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK);
  1475. if (error)
  1476. return;
  1477. /* Find the last block before 32G; this is the dir size. */
  1478. ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
  1479. off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE);
  1480. if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) {
  1481. /* zero-extents directory? */
  1482. return;
  1483. }
  1484. off = got.br_startoff + got.br_blockcount;
  1485. sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE,
  1486. XFS_FSB_TO_B(sc->mp, off));
  1487. }
  1488. /* Fix size problems with short format directories. */
  1489. STATIC void
  1490. xrep_inode_sfdir_size(
  1491. struct xfs_scrub *sc)
  1492. {
  1493. struct xfs_ifork *ifp;
  1494. trace_xrep_inode_sfdir_size(sc);
  1495. ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
  1496. sc->ip->i_disk_size = ifp->if_bytes;
  1497. }
  1498. /*
  1499. * Fix any irregularities in a directory inode's size now that we can iterate
  1500. * extent maps and access other regular inode data.
  1501. */
  1502. STATIC void
  1503. xrep_inode_dir_size(
  1504. struct xfs_scrub *sc)
  1505. {
  1506. trace_xrep_inode_dir_size(sc);
  1507. switch (sc->ip->i_df.if_format) {
  1508. case XFS_DINODE_FMT_EXTENTS:
  1509. case XFS_DINODE_FMT_BTREE:
  1510. xrep_inode_blockdir_size(sc);
  1511. break;
  1512. case XFS_DINODE_FMT_LOCAL:
  1513. xrep_inode_sfdir_size(sc);
  1514. break;
  1515. }
  1516. }
  1517. /* Fix extent size hint problems. */
  1518. STATIC void
  1519. xrep_inode_extsize(
  1520. struct xfs_scrub *sc)
  1521. {
  1522. /* Fix misaligned extent size hints on a directory. */
  1523. if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
  1524. (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
  1525. xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) {
  1526. sc->ip->i_extsize = 0;
  1527. sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT;
  1528. }
  1529. }
  1530. /* Ensure this file has an attr fork if it needs to hold a parent pointer. */
  1531. STATIC int
  1532. xrep_inode_pptr(
  1533. struct xfs_scrub *sc)
  1534. {
  1535. struct xfs_mount *mp = sc->mp;
  1536. struct xfs_inode *ip = sc->ip;
  1537. struct inode *inode = VFS_I(ip);
  1538. if (!xfs_has_parent(mp))
  1539. return 0;
  1540. /*
  1541. * Unlinked inodes that cannot be added to the directory tree will not
  1542. * have a parent pointer.
  1543. */
  1544. if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
  1545. return 0;
  1546. /* The root directory doesn't have a parent pointer. */
  1547. if (ip == mp->m_rootip)
  1548. return 0;
  1549. /*
  1550. * Metadata inodes are rooted in the superblock and do not have any
  1551. * parents.
  1552. */
  1553. if (xfs_is_metadata_inode(ip))
  1554. return 0;
  1555. /* Inode already has an attr fork; no further work possible here. */
  1556. if (xfs_inode_has_attr_fork(ip))
  1557. return 0;
  1558. return xfs_bmap_add_attrfork(sc->tp, ip,
  1559. sizeof(struct xfs_attr_sf_hdr), true);
  1560. }
  1561. /* Fix any irregularities in an inode that the verifiers don't catch. */
  1562. STATIC int
  1563. xrep_inode_problems(
  1564. struct xfs_scrub *sc)
  1565. {
  1566. int error;
  1567. error = xrep_inode_blockcounts(sc);
  1568. if (error)
  1569. return error;
  1570. error = xrep_inode_pptr(sc);
  1571. if (error)
  1572. return error;
  1573. xrep_inode_timestamps(sc->ip);
  1574. xrep_inode_flags(sc);
  1575. xrep_inode_ids(sc);
  1576. /*
  1577. * We can now do a better job fixing the size of a directory now that
  1578. * we can scan the data fork extents than we could in xrep_dinode_size.
  1579. */
  1580. if (S_ISDIR(VFS_I(sc->ip)->i_mode))
  1581. xrep_inode_dir_size(sc);
  1582. xrep_inode_extsize(sc);
  1583. trace_xrep_inode_fixed(sc);
  1584. xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
  1585. return xrep_roll_trans(sc);
  1586. }
  1587. /*
  1588. * Make sure this inode's unlinked list pointers are consistent with its
  1589. * link count.
  1590. */
  1591. STATIC int
  1592. xrep_inode_unlinked(
  1593. struct xfs_scrub *sc)
  1594. {
  1595. unsigned int nlink = VFS_I(sc->ip)->i_nlink;
  1596. int error;
  1597. /*
  1598. * If this inode is linked from the directory tree and on the unlinked
  1599. * list, remove it from the unlinked list.
  1600. */
  1601. if (nlink > 0 && xfs_inode_on_unlinked_list(sc->ip)) {
  1602. struct xfs_perag *pag;
  1603. int error;
  1604. pag = xfs_perag_get(sc->mp,
  1605. XFS_INO_TO_AGNO(sc->mp, sc->ip->i_ino));
  1606. error = xfs_iunlink_remove(sc->tp, pag, sc->ip);
  1607. xfs_perag_put(pag);
  1608. if (error)
  1609. return error;
  1610. }
  1611. /*
  1612. * If this inode is not linked from the directory tree yet not on the
  1613. * unlinked list, put it on the unlinked list.
  1614. */
  1615. if (nlink == 0 && !xfs_inode_on_unlinked_list(sc->ip)) {
  1616. error = xfs_iunlink(sc->tp, sc->ip);
  1617. if (error)
  1618. return error;
  1619. }
  1620. return 0;
  1621. }
  1622. /* Repair an inode's fields. */
  1623. int
  1624. xrep_inode(
  1625. struct xfs_scrub *sc)
  1626. {
  1627. int error = 0;
  1628. /*
  1629. * No inode? That means we failed the _iget verifiers. Repair all
  1630. * the things that the inode verifiers care about, then retry _iget.
  1631. */
  1632. if (!sc->ip) {
  1633. struct xrep_inode *ri = sc->buf;
  1634. ASSERT(ri != NULL);
  1635. error = xrep_dinode_problems(ri);
  1636. if (error == -EBUSY) {
  1637. /*
  1638. * Directory scan to recover inode mode encountered a
  1639. * busy inode, so we did not continue repairing things.
  1640. */
  1641. return 0;
  1642. }
  1643. if (error)
  1644. return error;
  1645. /* By this point we had better have a working incore inode. */
  1646. if (!sc->ip)
  1647. return -EFSCORRUPTED;
  1648. }
  1649. xfs_trans_ijoin(sc->tp, sc->ip, 0);
  1650. /* If we found corruption of any kind, try to fix it. */
  1651. if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) ||
  1652. (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) {
  1653. error = xrep_inode_problems(sc);
  1654. if (error)
  1655. return error;
  1656. }
  1657. /* See if we can clear the reflink flag. */
  1658. if (xfs_is_reflink_inode(sc->ip)) {
  1659. error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
  1660. if (error)
  1661. return error;
  1662. }
  1663. /* Reconnect incore unlinked list */
  1664. error = xrep_inode_unlinked(sc);
  1665. if (error)
  1666. return error;
  1667. return xrep_defer_finish(sc);
  1668. }