xfs_inode_buf.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  4. * All Rights Reserved.
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_log_format.h"
  11. #include "xfs_trans_resv.h"
  12. #include "xfs_mount.h"
  13. #include "xfs_defer.h"
  14. #include "xfs_inode.h"
  15. #include "xfs_errortag.h"
  16. #include "xfs_error.h"
  17. #include "xfs_cksum.h"
  18. #include "xfs_icache.h"
  19. #include "xfs_trans.h"
  20. #include "xfs_ialloc.h"
  21. #include "xfs_dir2.h"
  22. #include <linux/iversion.h>
  23. /*
  24. * Check that none of the inode's in the buffer have a next
  25. * unlinked field of 0.
  26. */
  27. #if defined(DEBUG)
  28. void
  29. xfs_inobp_check(
  30. xfs_mount_t *mp,
  31. xfs_buf_t *bp)
  32. {
  33. int i;
  34. int j;
  35. xfs_dinode_t *dip;
  36. j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
  37. for (i = 0; i < j; i++) {
  38. dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
  39. if (!dip->di_next_unlinked) {
  40. xfs_alert(mp,
  41. "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
  42. i, (long long)bp->b_bn);
  43. }
  44. }
  45. }
  46. #endif
  47. bool
  48. xfs_dinode_good_version(
  49. struct xfs_mount *mp,
  50. __u8 version)
  51. {
  52. if (xfs_sb_version_hascrc(&mp->m_sb))
  53. return version == 3;
  54. return version == 1 || version == 2;
  55. }
  56. /*
  57. * If we are doing readahead on an inode buffer, we might be in log recovery
  58. * reading an inode allocation buffer that hasn't yet been replayed, and hence
  59. * has not had the inode cores stamped into it. Hence for readahead, the buffer
  60. * may be potentially invalid.
  61. *
  62. * If the readahead buffer is invalid, we need to mark it with an error and
  63. * clear the DONE status of the buffer so that a followup read will re-read it
  64. * from disk. We don't report the error otherwise to avoid warnings during log
  65. * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
  66. * because all we want to do is say readahead failed; there is no-one to report
  67. * the error to, so this will distinguish it from a non-ra verifier failure.
  68. * Changes to this readahead error behavour also need to be reflected in
  69. * xfs_dquot_buf_readahead_verify().
  70. */
  71. static void
  72. xfs_inode_buf_verify(
  73. struct xfs_buf *bp,
  74. bool readahead)
  75. {
  76. struct xfs_mount *mp = bp->b_target->bt_mount;
  77. xfs_agnumber_t agno;
  78. int i;
  79. int ni;
  80. /*
  81. * Validate the magic number and version of every inode in the buffer
  82. */
  83. agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
  84. ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
  85. for (i = 0; i < ni; i++) {
  86. int di_ok;
  87. xfs_dinode_t *dip;
  88. xfs_agino_t unlinked_ino;
  89. dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
  90. unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
  91. di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
  92. xfs_dinode_good_version(mp, dip->di_version) &&
  93. (unlinked_ino == NULLAGINO ||
  94. xfs_verify_agino(mp, agno, unlinked_ino));
  95. if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
  96. XFS_ERRTAG_ITOBP_INOTOBP))) {
  97. if (readahead) {
  98. bp->b_flags &= ~XBF_DONE;
  99. xfs_buf_ioerror(bp, -EIO);
  100. return;
  101. }
  102. #ifdef DEBUG
  103. xfs_alert(mp,
  104. "bad inode magic/vsn daddr %lld #%d (magic=%x)",
  105. (unsigned long long)bp->b_bn, i,
  106. be16_to_cpu(dip->di_magic));
  107. #endif
  108. xfs_buf_verifier_error(bp, -EFSCORRUPTED,
  109. __func__, dip, sizeof(*dip),
  110. NULL);
  111. return;
  112. }
  113. }
  114. }
  115. static void
  116. xfs_inode_buf_read_verify(
  117. struct xfs_buf *bp)
  118. {
  119. xfs_inode_buf_verify(bp, false);
  120. }
  121. static void
  122. xfs_inode_buf_readahead_verify(
  123. struct xfs_buf *bp)
  124. {
  125. xfs_inode_buf_verify(bp, true);
  126. }
  127. static void
  128. xfs_inode_buf_write_verify(
  129. struct xfs_buf *bp)
  130. {
  131. xfs_inode_buf_verify(bp, false);
  132. }
  133. const struct xfs_buf_ops xfs_inode_buf_ops = {
  134. .name = "xfs_inode",
  135. .verify_read = xfs_inode_buf_read_verify,
  136. .verify_write = xfs_inode_buf_write_verify,
  137. };
  138. const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
  139. .name = "xxfs_inode_ra",
  140. .verify_read = xfs_inode_buf_readahead_verify,
  141. .verify_write = xfs_inode_buf_write_verify,
  142. };
  143. /*
  144. * This routine is called to map an inode to the buffer containing the on-disk
  145. * version of the inode. It returns a pointer to the buffer containing the
  146. * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
  147. * pointer to the on-disk inode within that buffer.
  148. *
  149. * If a non-zero error is returned, then the contents of bpp and dipp are
  150. * undefined.
  151. */
  152. int
  153. xfs_imap_to_bp(
  154. struct xfs_mount *mp,
  155. struct xfs_trans *tp,
  156. struct xfs_imap *imap,
  157. struct xfs_dinode **dipp,
  158. struct xfs_buf **bpp,
  159. uint buf_flags,
  160. uint iget_flags)
  161. {
  162. struct xfs_buf *bp;
  163. int error;
  164. buf_flags |= XBF_UNMAPPED;
  165. error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
  166. (int)imap->im_len, buf_flags, &bp,
  167. &xfs_inode_buf_ops);
  168. if (error) {
  169. if (error == -EAGAIN) {
  170. ASSERT(buf_flags & XBF_TRYLOCK);
  171. return error;
  172. }
  173. xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
  174. __func__, error);
  175. return error;
  176. }
  177. *bpp = bp;
  178. *dipp = xfs_buf_offset(bp, imap->im_boffset);
  179. return 0;
  180. }
  181. void
  182. xfs_inode_from_disk(
  183. struct xfs_inode *ip,
  184. struct xfs_dinode *from)
  185. {
  186. struct xfs_icdinode *to = &ip->i_d;
  187. struct inode *inode = VFS_I(ip);
  188. /*
  189. * Convert v1 inodes immediately to v2 inode format as this is the
  190. * minimum inode version format we support in the rest of the code.
  191. */
  192. to->di_version = from->di_version;
  193. if (to->di_version == 1) {
  194. set_nlink(inode, be16_to_cpu(from->di_onlink));
  195. to->di_projid_lo = 0;
  196. to->di_projid_hi = 0;
  197. to->di_version = 2;
  198. } else {
  199. set_nlink(inode, be32_to_cpu(from->di_nlink));
  200. to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
  201. to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
  202. }
  203. to->di_format = from->di_format;
  204. to->di_uid = be32_to_cpu(from->di_uid);
  205. to->di_gid = be32_to_cpu(from->di_gid);
  206. to->di_flushiter = be16_to_cpu(from->di_flushiter);
  207. /*
  208. * Time is signed, so need to convert to signed 32 bit before
  209. * storing in inode timestamp which may be 64 bit. Otherwise
  210. * a time before epoch is converted to a time long after epoch
  211. * on 64 bit systems.
  212. */
  213. inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec);
  214. inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec);
  215. inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec);
  216. inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec);
  217. inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec);
  218. inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec);
  219. inode->i_generation = be32_to_cpu(from->di_gen);
  220. inode->i_mode = be16_to_cpu(from->di_mode);
  221. to->di_size = be64_to_cpu(from->di_size);
  222. to->di_nblocks = be64_to_cpu(from->di_nblocks);
  223. to->di_extsize = be32_to_cpu(from->di_extsize);
  224. to->di_nextents = be32_to_cpu(from->di_nextents);
  225. to->di_anextents = be16_to_cpu(from->di_anextents);
  226. to->di_forkoff = from->di_forkoff;
  227. to->di_aformat = from->di_aformat;
  228. to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
  229. to->di_dmstate = be16_to_cpu(from->di_dmstate);
  230. to->di_flags = be16_to_cpu(from->di_flags);
  231. if (to->di_version == 3) {
  232. inode_set_iversion_queried(inode,
  233. be64_to_cpu(from->di_changecount));
  234. to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
  235. to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
  236. to->di_flags2 = be64_to_cpu(from->di_flags2);
  237. to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
  238. }
  239. }
  240. void
  241. xfs_inode_to_disk(
  242. struct xfs_inode *ip,
  243. struct xfs_dinode *to,
  244. xfs_lsn_t lsn)
  245. {
  246. struct xfs_icdinode *from = &ip->i_d;
  247. struct inode *inode = VFS_I(ip);
  248. to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
  249. to->di_onlink = 0;
  250. to->di_version = from->di_version;
  251. to->di_format = from->di_format;
  252. to->di_uid = cpu_to_be32(from->di_uid);
  253. to->di_gid = cpu_to_be32(from->di_gid);
  254. to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
  255. to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
  256. memset(to->di_pad, 0, sizeof(to->di_pad));
  257. to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec);
  258. to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
  259. to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec);
  260. to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
  261. to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec);
  262. to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
  263. to->di_nlink = cpu_to_be32(inode->i_nlink);
  264. to->di_gen = cpu_to_be32(inode->i_generation);
  265. to->di_mode = cpu_to_be16(inode->i_mode);
  266. to->di_size = cpu_to_be64(from->di_size);
  267. to->di_nblocks = cpu_to_be64(from->di_nblocks);
  268. to->di_extsize = cpu_to_be32(from->di_extsize);
  269. to->di_nextents = cpu_to_be32(from->di_nextents);
  270. to->di_anextents = cpu_to_be16(from->di_anextents);
  271. to->di_forkoff = from->di_forkoff;
  272. to->di_aformat = from->di_aformat;
  273. to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
  274. to->di_dmstate = cpu_to_be16(from->di_dmstate);
  275. to->di_flags = cpu_to_be16(from->di_flags);
  276. if (from->di_version == 3) {
  277. to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
  278. to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
  279. to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
  280. to->di_flags2 = cpu_to_be64(from->di_flags2);
  281. to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
  282. to->di_ino = cpu_to_be64(ip->i_ino);
  283. to->di_lsn = cpu_to_be64(lsn);
  284. memset(to->di_pad2, 0, sizeof(to->di_pad2));
  285. uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
  286. to->di_flushiter = 0;
  287. } else {
  288. to->di_flushiter = cpu_to_be16(from->di_flushiter);
  289. }
  290. }
  291. void
  292. xfs_log_dinode_to_disk(
  293. struct xfs_log_dinode *from,
  294. struct xfs_dinode *to)
  295. {
  296. to->di_magic = cpu_to_be16(from->di_magic);
  297. to->di_mode = cpu_to_be16(from->di_mode);
  298. to->di_version = from->di_version;
  299. to->di_format = from->di_format;
  300. to->di_onlink = 0;
  301. to->di_uid = cpu_to_be32(from->di_uid);
  302. to->di_gid = cpu_to_be32(from->di_gid);
  303. to->di_nlink = cpu_to_be32(from->di_nlink);
  304. to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
  305. to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
  306. memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
  307. to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
  308. to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
  309. to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
  310. to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
  311. to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
  312. to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
  313. to->di_size = cpu_to_be64(from->di_size);
  314. to->di_nblocks = cpu_to_be64(from->di_nblocks);
  315. to->di_extsize = cpu_to_be32(from->di_extsize);
  316. to->di_nextents = cpu_to_be32(from->di_nextents);
  317. to->di_anextents = cpu_to_be16(from->di_anextents);
  318. to->di_forkoff = from->di_forkoff;
  319. to->di_aformat = from->di_aformat;
  320. to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
  321. to->di_dmstate = cpu_to_be16(from->di_dmstate);
  322. to->di_flags = cpu_to_be16(from->di_flags);
  323. to->di_gen = cpu_to_be32(from->di_gen);
  324. if (from->di_version == 3) {
  325. to->di_changecount = cpu_to_be64(from->di_changecount);
  326. to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
  327. to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
  328. to->di_flags2 = cpu_to_be64(from->di_flags2);
  329. to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
  330. to->di_ino = cpu_to_be64(from->di_ino);
  331. to->di_lsn = cpu_to_be64(from->di_lsn);
  332. memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
  333. uuid_copy(&to->di_uuid, &from->di_uuid);
  334. to->di_flushiter = 0;
  335. } else {
  336. to->di_flushiter = cpu_to_be16(from->di_flushiter);
  337. }
  338. }
  339. static xfs_failaddr_t
  340. xfs_dinode_verify_fork(
  341. struct xfs_dinode *dip,
  342. struct xfs_mount *mp,
  343. int whichfork)
  344. {
  345. uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
  346. switch (XFS_DFORK_FORMAT(dip, whichfork)) {
  347. case XFS_DINODE_FMT_LOCAL:
  348. /*
  349. * no local regular files yet
  350. */
  351. if (whichfork == XFS_DATA_FORK) {
  352. if (S_ISREG(be16_to_cpu(dip->di_mode)))
  353. return __this_address;
  354. if (be64_to_cpu(dip->di_size) >
  355. XFS_DFORK_SIZE(dip, mp, whichfork))
  356. return __this_address;
  357. }
  358. if (di_nextents)
  359. return __this_address;
  360. break;
  361. case XFS_DINODE_FMT_EXTENTS:
  362. if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
  363. return __this_address;
  364. break;
  365. case XFS_DINODE_FMT_BTREE:
  366. if (whichfork == XFS_ATTR_FORK) {
  367. if (di_nextents > MAXAEXTNUM)
  368. return __this_address;
  369. } else if (di_nextents > MAXEXTNUM) {
  370. return __this_address;
  371. }
  372. break;
  373. default:
  374. return __this_address;
  375. }
  376. return NULL;
  377. }
  378. static xfs_failaddr_t
  379. xfs_dinode_verify_forkoff(
  380. struct xfs_dinode *dip,
  381. struct xfs_mount *mp)
  382. {
  383. if (!XFS_DFORK_Q(dip))
  384. return NULL;
  385. switch (dip->di_format) {
  386. case XFS_DINODE_FMT_DEV:
  387. if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
  388. return __this_address;
  389. break;
  390. case XFS_DINODE_FMT_LOCAL: /* fall through ... */
  391. case XFS_DINODE_FMT_EXTENTS: /* fall through ... */
  392. case XFS_DINODE_FMT_BTREE:
  393. if (dip->di_forkoff >= (XFS_LITINO(mp, dip->di_version) >> 3))
  394. return __this_address;
  395. break;
  396. default:
  397. return __this_address;
  398. }
  399. return NULL;
  400. }
  401. xfs_failaddr_t
  402. xfs_dinode_verify(
  403. struct xfs_mount *mp,
  404. xfs_ino_t ino,
  405. struct xfs_dinode *dip)
  406. {
  407. xfs_failaddr_t fa;
  408. uint16_t mode;
  409. uint16_t flags;
  410. uint64_t flags2;
  411. uint64_t di_size;
  412. if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
  413. return __this_address;
  414. /* Verify v3 integrity information first */
  415. if (dip->di_version >= 3) {
  416. if (!xfs_sb_version_hascrc(&mp->m_sb))
  417. return __this_address;
  418. if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
  419. XFS_DINODE_CRC_OFF))
  420. return __this_address;
  421. if (be64_to_cpu(dip->di_ino) != ino)
  422. return __this_address;
  423. if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
  424. return __this_address;
  425. }
  426. /* don't allow invalid i_size */
  427. di_size = be64_to_cpu(dip->di_size);
  428. if (di_size & (1ULL << 63))
  429. return __this_address;
  430. mode = be16_to_cpu(dip->di_mode);
  431. if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
  432. return __this_address;
  433. /* No zero-length symlinks/dirs. */
  434. if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
  435. return __this_address;
  436. /* Fork checks carried over from xfs_iformat_fork */
  437. if (mode &&
  438. be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
  439. be64_to_cpu(dip->di_nblocks))
  440. return __this_address;
  441. if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
  442. return __this_address;
  443. flags = be16_to_cpu(dip->di_flags);
  444. if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
  445. return __this_address;
  446. /* check for illegal values of forkoff */
  447. fa = xfs_dinode_verify_forkoff(dip, mp);
  448. if (fa)
  449. return fa;
  450. /* Do we have appropriate data fork formats for the mode? */
  451. switch (mode & S_IFMT) {
  452. case S_IFIFO:
  453. case S_IFCHR:
  454. case S_IFBLK:
  455. case S_IFSOCK:
  456. if (dip->di_format != XFS_DINODE_FMT_DEV)
  457. return __this_address;
  458. break;
  459. case S_IFREG:
  460. case S_IFLNK:
  461. case S_IFDIR:
  462. fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
  463. if (fa)
  464. return fa;
  465. break;
  466. case 0:
  467. /* Uninitialized inode ok. */
  468. break;
  469. default:
  470. return __this_address;
  471. }
  472. if (XFS_DFORK_Q(dip)) {
  473. fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
  474. if (fa)
  475. return fa;
  476. } else {
  477. /*
  478. * If there is no fork offset, this may be a freshly-made inode
  479. * in a new disk cluster, in which case di_aformat is zeroed.
  480. * Otherwise, such an inode must be in EXTENTS format; this goes
  481. * for freed inodes as well.
  482. */
  483. switch (dip->di_aformat) {
  484. case 0:
  485. case XFS_DINODE_FMT_EXTENTS:
  486. break;
  487. default:
  488. return __this_address;
  489. }
  490. if (dip->di_anextents)
  491. return __this_address;
  492. }
  493. /* extent size hint validation */
  494. fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
  495. mode, flags);
  496. if (fa)
  497. return fa;
  498. /* only version 3 or greater inodes are extensively verified here */
  499. if (dip->di_version < 3)
  500. return NULL;
  501. flags2 = be64_to_cpu(dip->di_flags2);
  502. /* don't allow reflink/cowextsize if we don't have reflink */
  503. if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
  504. !xfs_sb_version_hasreflink(&mp->m_sb))
  505. return __this_address;
  506. /* only regular files get reflink */
  507. if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
  508. return __this_address;
  509. /* don't let reflink and realtime mix */
  510. if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
  511. return __this_address;
  512. /* don't let reflink and dax mix */
  513. if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX))
  514. return __this_address;
  515. /* COW extent size hint validation */
  516. fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
  517. mode, flags, flags2);
  518. if (fa)
  519. return fa;
  520. return NULL;
  521. }
  522. void
  523. xfs_dinode_calc_crc(
  524. struct xfs_mount *mp,
  525. struct xfs_dinode *dip)
  526. {
  527. uint32_t crc;
  528. if (dip->di_version < 3)
  529. return;
  530. ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
  531. crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
  532. XFS_DINODE_CRC_OFF);
  533. dip->di_crc = xfs_end_cksum(crc);
  534. }
  535. /*
  536. * Read the disk inode attributes into the in-core inode structure.
  537. *
  538. * For version 5 superblocks, if we are initialising a new inode and we are not
  539. * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
  540. * inode core with a random generation number. If we are keeping inodes around,
  541. * we need to read the inode cluster to get the existing generation number off
  542. * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
  543. * format) then log recovery is dependent on the di_flushiter field being
  544. * initialised from the current on-disk value and hence we must also read the
  545. * inode off disk.
  546. */
  547. int
  548. xfs_iread(
  549. xfs_mount_t *mp,
  550. xfs_trans_t *tp,
  551. xfs_inode_t *ip,
  552. uint iget_flags)
  553. {
  554. xfs_buf_t *bp;
  555. xfs_dinode_t *dip;
  556. xfs_failaddr_t fa;
  557. int error;
  558. /*
  559. * Fill in the location information in the in-core inode.
  560. */
  561. error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
  562. if (error)
  563. return error;
  564. /* shortcut IO on inode allocation if possible */
  565. if ((iget_flags & XFS_IGET_CREATE) &&
  566. xfs_sb_version_hascrc(&mp->m_sb) &&
  567. !(mp->m_flags & XFS_MOUNT_IKEEP)) {
  568. /* initialise the on-disk inode core */
  569. memset(&ip->i_d, 0, sizeof(ip->i_d));
  570. VFS_I(ip)->i_generation = prandom_u32();
  571. ip->i_d.di_version = 3;
  572. return 0;
  573. }
  574. /*
  575. * Get pointers to the on-disk inode and the buffer containing it.
  576. */
  577. error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
  578. if (error)
  579. return error;
  580. /* even unallocated inodes are verified */
  581. fa = xfs_dinode_verify(mp, ip->i_ino, dip);
  582. if (fa) {
  583. xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip,
  584. sizeof(*dip), fa);
  585. error = -EFSCORRUPTED;
  586. goto out_brelse;
  587. }
  588. /*
  589. * If the on-disk inode is already linked to a directory
  590. * entry, copy all of the inode into the in-core inode.
  591. * xfs_iformat_fork() handles copying in the inode format
  592. * specific information.
  593. * Otherwise, just get the truly permanent information.
  594. */
  595. if (dip->di_mode) {
  596. xfs_inode_from_disk(ip, dip);
  597. error = xfs_iformat_fork(ip, dip);
  598. if (error) {
  599. #ifdef DEBUG
  600. xfs_alert(mp, "%s: xfs_iformat() returned error %d",
  601. __func__, error);
  602. #endif /* DEBUG */
  603. goto out_brelse;
  604. }
  605. } else {
  606. /*
  607. * Partial initialisation of the in-core inode. Just the bits
  608. * that xfs_ialloc won't overwrite or relies on being correct.
  609. */
  610. ip->i_d.di_version = dip->di_version;
  611. VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen);
  612. ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
  613. /*
  614. * Make sure to pull in the mode here as well in
  615. * case the inode is released without being used.
  616. * This ensures that xfs_inactive() will see that
  617. * the inode is already free and not try to mess
  618. * with the uninitialized part of it.
  619. */
  620. VFS_I(ip)->i_mode = 0;
  621. }
  622. ASSERT(ip->i_d.di_version >= 2);
  623. ip->i_delayed_blks = 0;
  624. /*
  625. * Mark the buffer containing the inode as something to keep
  626. * around for a while. This helps to keep recently accessed
  627. * meta-data in-core longer.
  628. */
  629. xfs_buf_set_ref(bp, XFS_INO_REF);
  630. /*
  631. * Use xfs_trans_brelse() to release the buffer containing the on-disk
  632. * inode, because it was acquired with xfs_trans_read_buf() in
  633. * xfs_imap_to_bp() above. If tp is NULL, this is just a normal
  634. * brelse(). If we're within a transaction, then xfs_trans_brelse()
  635. * will only release the buffer if it is not dirty within the
  636. * transaction. It will be OK to release the buffer in this case,
  637. * because inodes on disk are never destroyed and we will be locking the
  638. * new in-core inode before putting it in the cache where other
  639. * processes can find it. Thus we don't have to worry about the inode
  640. * being changed just because we released the buffer.
  641. */
  642. out_brelse:
  643. xfs_trans_brelse(tp, bp);
  644. return error;
  645. }
  646. /*
  647. * Validate di_extsize hint.
  648. *
  649. * The rules are documented at xfs_ioctl_setattr_check_extsize().
  650. * These functions must be kept in sync with each other.
  651. */
  652. xfs_failaddr_t
  653. xfs_inode_validate_extsize(
  654. struct xfs_mount *mp,
  655. uint32_t extsize,
  656. uint16_t mode,
  657. uint16_t flags)
  658. {
  659. bool rt_flag;
  660. bool hint_flag;
  661. bool inherit_flag;
  662. uint32_t extsize_bytes;
  663. uint32_t blocksize_bytes;
  664. rt_flag = (flags & XFS_DIFLAG_REALTIME);
  665. hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
  666. inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
  667. extsize_bytes = XFS_FSB_TO_B(mp, extsize);
  668. if (rt_flag)
  669. blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
  670. else
  671. blocksize_bytes = mp->m_sb.sb_blocksize;
  672. if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
  673. return __this_address;
  674. if (hint_flag && !S_ISREG(mode))
  675. return __this_address;
  676. if (inherit_flag && !S_ISDIR(mode))
  677. return __this_address;
  678. if ((hint_flag || inherit_flag) && extsize == 0)
  679. return __this_address;
  680. /* free inodes get flags set to zero but extsize remains */
  681. if (mode && !(hint_flag || inherit_flag) && extsize != 0)
  682. return __this_address;
  683. if (extsize_bytes % blocksize_bytes)
  684. return __this_address;
  685. if (extsize > MAXEXTLEN)
  686. return __this_address;
  687. if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
  688. return __this_address;
  689. return NULL;
  690. }
  691. /*
  692. * Validate di_cowextsize hint.
  693. *
  694. * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
  695. * These functions must be kept in sync with each other.
  696. */
  697. xfs_failaddr_t
  698. xfs_inode_validate_cowextsize(
  699. struct xfs_mount *mp,
  700. uint32_t cowextsize,
  701. uint16_t mode,
  702. uint16_t flags,
  703. uint64_t flags2)
  704. {
  705. bool rt_flag;
  706. bool hint_flag;
  707. uint32_t cowextsize_bytes;
  708. rt_flag = (flags & XFS_DIFLAG_REALTIME);
  709. hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
  710. cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
  711. if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
  712. return __this_address;
  713. if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
  714. return __this_address;
  715. if (hint_flag && cowextsize == 0)
  716. return __this_address;
  717. /* free inodes get flags set to zero but cowextsize remains */
  718. if (mode && !hint_flag && cowextsize != 0)
  719. return __this_address;
  720. if (hint_flag && rt_flag)
  721. return __this_address;
  722. if (cowextsize_bytes % mp->m_sb.sb_blocksize)
  723. return __this_address;
  724. if (cowextsize > MAXEXTLEN)
  725. return __this_address;
  726. if (cowextsize > mp->m_sb.sb_agblocks / 2)
  727. return __this_address;
  728. return NULL;
  729. }