xfs_inode_util.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  4. * All Rights Reserved.
  5. */
  6. #include <linux/iversion.h>
  7. #include "xfs.h"
  8. #include "xfs_fs.h"
  9. #include "xfs_shared.h"
  10. #include "xfs_format.h"
  11. #include "xfs_log_format.h"
  12. #include "xfs_trans_resv.h"
  13. #include "xfs_sb.h"
  14. #include "xfs_mount.h"
  15. #include "xfs_inode.h"
  16. #include "xfs_inode_util.h"
  17. #include "xfs_trans.h"
  18. #include "xfs_ialloc.h"
  19. #include "xfs_health.h"
  20. #include "xfs_bmap.h"
  21. #include "xfs_error.h"
  22. #include "xfs_trace.h"
  23. #include "xfs_ag.h"
  24. #include "xfs_iunlink_item.h"
  25. #include "xfs_inode_item.h"
  26. uint16_t
  27. xfs_flags2diflags(
  28. struct xfs_inode *ip,
  29. unsigned int xflags)
  30. {
  31. /* can't set PREALLOC this way, just preserve it */
  32. uint16_t di_flags =
  33. (ip->i_diflags & XFS_DIFLAG_PREALLOC);
  34. if (xflags & FS_XFLAG_IMMUTABLE)
  35. di_flags |= XFS_DIFLAG_IMMUTABLE;
  36. if (xflags & FS_XFLAG_APPEND)
  37. di_flags |= XFS_DIFLAG_APPEND;
  38. if (xflags & FS_XFLAG_SYNC)
  39. di_flags |= XFS_DIFLAG_SYNC;
  40. if (xflags & FS_XFLAG_NOATIME)
  41. di_flags |= XFS_DIFLAG_NOATIME;
  42. if (xflags & FS_XFLAG_NODUMP)
  43. di_flags |= XFS_DIFLAG_NODUMP;
  44. if (xflags & FS_XFLAG_NODEFRAG)
  45. di_flags |= XFS_DIFLAG_NODEFRAG;
  46. if (xflags & FS_XFLAG_FILESTREAM)
  47. di_flags |= XFS_DIFLAG_FILESTREAM;
  48. if (S_ISDIR(VFS_I(ip)->i_mode)) {
  49. if (xflags & FS_XFLAG_RTINHERIT)
  50. di_flags |= XFS_DIFLAG_RTINHERIT;
  51. if (xflags & FS_XFLAG_NOSYMLINKS)
  52. di_flags |= XFS_DIFLAG_NOSYMLINKS;
  53. if (xflags & FS_XFLAG_EXTSZINHERIT)
  54. di_flags |= XFS_DIFLAG_EXTSZINHERIT;
  55. if (xflags & FS_XFLAG_PROJINHERIT)
  56. di_flags |= XFS_DIFLAG_PROJINHERIT;
  57. } else if (S_ISREG(VFS_I(ip)->i_mode)) {
  58. if (xflags & FS_XFLAG_REALTIME)
  59. di_flags |= XFS_DIFLAG_REALTIME;
  60. if (xflags & FS_XFLAG_EXTSIZE)
  61. di_flags |= XFS_DIFLAG_EXTSIZE;
  62. }
  63. return di_flags;
  64. }
  65. uint64_t
  66. xfs_flags2diflags2(
  67. struct xfs_inode *ip,
  68. unsigned int xflags)
  69. {
  70. uint64_t di_flags2 =
  71. (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK |
  72. XFS_DIFLAG2_BIGTIME |
  73. XFS_DIFLAG2_NREXT64));
  74. if (xflags & FS_XFLAG_DAX)
  75. di_flags2 |= XFS_DIFLAG2_DAX;
  76. if (xflags & FS_XFLAG_COWEXTSIZE)
  77. di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
  78. return di_flags2;
  79. }
  80. uint32_t
  81. xfs_ip2xflags(
  82. struct xfs_inode *ip)
  83. {
  84. uint32_t flags = 0;
  85. if (ip->i_diflags & XFS_DIFLAG_ANY) {
  86. if (ip->i_diflags & XFS_DIFLAG_REALTIME)
  87. flags |= FS_XFLAG_REALTIME;
  88. if (ip->i_diflags & XFS_DIFLAG_PREALLOC)
  89. flags |= FS_XFLAG_PREALLOC;
  90. if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
  91. flags |= FS_XFLAG_IMMUTABLE;
  92. if (ip->i_diflags & XFS_DIFLAG_APPEND)
  93. flags |= FS_XFLAG_APPEND;
  94. if (ip->i_diflags & XFS_DIFLAG_SYNC)
  95. flags |= FS_XFLAG_SYNC;
  96. if (ip->i_diflags & XFS_DIFLAG_NOATIME)
  97. flags |= FS_XFLAG_NOATIME;
  98. if (ip->i_diflags & XFS_DIFLAG_NODUMP)
  99. flags |= FS_XFLAG_NODUMP;
  100. if (ip->i_diflags & XFS_DIFLAG_RTINHERIT)
  101. flags |= FS_XFLAG_RTINHERIT;
  102. if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT)
  103. flags |= FS_XFLAG_PROJINHERIT;
  104. if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS)
  105. flags |= FS_XFLAG_NOSYMLINKS;
  106. if (ip->i_diflags & XFS_DIFLAG_EXTSIZE)
  107. flags |= FS_XFLAG_EXTSIZE;
  108. if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)
  109. flags |= FS_XFLAG_EXTSZINHERIT;
  110. if (ip->i_diflags & XFS_DIFLAG_NODEFRAG)
  111. flags |= FS_XFLAG_NODEFRAG;
  112. if (ip->i_diflags & XFS_DIFLAG_FILESTREAM)
  113. flags |= FS_XFLAG_FILESTREAM;
  114. }
  115. if (ip->i_diflags2 & XFS_DIFLAG2_ANY) {
  116. if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
  117. flags |= FS_XFLAG_DAX;
  118. if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
  119. flags |= FS_XFLAG_COWEXTSIZE;
  120. }
  121. if (xfs_inode_has_attr_fork(ip))
  122. flags |= FS_XFLAG_HASATTR;
  123. return flags;
  124. }
  125. prid_t
  126. xfs_get_initial_prid(struct xfs_inode *dp)
  127. {
  128. if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT)
  129. return dp->i_projid;
  130. /* Assign to the root project by default. */
  131. return 0;
  132. }
  133. /* Propagate di_flags from a parent inode to a child inode. */
  134. static inline void
  135. xfs_inode_inherit_flags(
  136. struct xfs_inode *ip,
  137. const struct xfs_inode *pip)
  138. {
  139. unsigned int di_flags = 0;
  140. xfs_failaddr_t failaddr;
  141. umode_t mode = VFS_I(ip)->i_mode;
  142. if (S_ISDIR(mode)) {
  143. if (pip->i_diflags & XFS_DIFLAG_RTINHERIT)
  144. di_flags |= XFS_DIFLAG_RTINHERIT;
  145. if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
  146. di_flags |= XFS_DIFLAG_EXTSZINHERIT;
  147. ip->i_extsize = pip->i_extsize;
  148. }
  149. if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT)
  150. di_flags |= XFS_DIFLAG_PROJINHERIT;
  151. } else if (S_ISREG(mode)) {
  152. if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
  153. xfs_has_realtime(ip->i_mount))
  154. di_flags |= XFS_DIFLAG_REALTIME;
  155. if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
  156. di_flags |= XFS_DIFLAG_EXTSIZE;
  157. ip->i_extsize = pip->i_extsize;
  158. }
  159. }
  160. if ((pip->i_diflags & XFS_DIFLAG_NOATIME) &&
  161. xfs_inherit_noatime)
  162. di_flags |= XFS_DIFLAG_NOATIME;
  163. if ((pip->i_diflags & XFS_DIFLAG_NODUMP) &&
  164. xfs_inherit_nodump)
  165. di_flags |= XFS_DIFLAG_NODUMP;
  166. if ((pip->i_diflags & XFS_DIFLAG_SYNC) &&
  167. xfs_inherit_sync)
  168. di_flags |= XFS_DIFLAG_SYNC;
  169. if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) &&
  170. xfs_inherit_nosymlinks)
  171. di_flags |= XFS_DIFLAG_NOSYMLINKS;
  172. if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) &&
  173. xfs_inherit_nodefrag)
  174. di_flags |= XFS_DIFLAG_NODEFRAG;
  175. if (pip->i_diflags & XFS_DIFLAG_FILESTREAM)
  176. di_flags |= XFS_DIFLAG_FILESTREAM;
  177. ip->i_diflags |= di_flags;
  178. /*
  179. * Inode verifiers on older kernels only check that the extent size
  180. * hint is an integer multiple of the rt extent size on realtime files.
  181. * They did not check the hint alignment on a directory with both
  182. * rtinherit and extszinherit flags set. If the misaligned hint is
  183. * propagated from a directory into a new realtime file, new file
  184. * allocations will fail due to math errors in the rt allocator and/or
  185. * trip the verifiers. Validate the hint settings in the new file so
  186. * that we don't let broken hints propagate.
  187. */
  188. failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
  189. VFS_I(ip)->i_mode, ip->i_diflags);
  190. if (failaddr) {
  191. ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
  192. XFS_DIFLAG_EXTSZINHERIT);
  193. ip->i_extsize = 0;
  194. }
  195. }
  196. /* Propagate di_flags2 from a parent inode to a child inode. */
  197. static inline void
  198. xfs_inode_inherit_flags2(
  199. struct xfs_inode *ip,
  200. const struct xfs_inode *pip)
  201. {
  202. xfs_failaddr_t failaddr;
  203. if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
  204. ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
  205. ip->i_cowextsize = pip->i_cowextsize;
  206. }
  207. if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
  208. ip->i_diflags2 |= XFS_DIFLAG2_DAX;
  209. /* Don't let invalid cowextsize hints propagate. */
  210. failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
  211. VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
  212. if (failaddr) {
  213. ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
  214. ip->i_cowextsize = 0;
  215. }
  216. }
  217. /*
  218. * If we need to create attributes immediately after allocating the inode,
  219. * initialise an empty attribute fork right now. We use the default fork offset
  220. * for attributes here as we don't know exactly what size or how many
  221. * attributes we might be adding. We can do this safely here because we know
  222. * the data fork is completely empty and this saves us from needing to run a
  223. * separate transaction to set the fork offset in the immediate future.
  224. *
  225. * If we have parent pointers and the caller hasn't told us that the file will
  226. * never be linked into a directory tree, we /must/ create the attr fork.
  227. */
  228. static inline bool
  229. xfs_icreate_want_attrfork(
  230. struct xfs_mount *mp,
  231. const struct xfs_icreate_args *args)
  232. {
  233. if (args->flags & XFS_ICREATE_INIT_XATTRS)
  234. return true;
  235. if (!(args->flags & XFS_ICREATE_UNLINKABLE) && xfs_has_parent(mp))
  236. return true;
  237. return false;
  238. }
  239. /* Initialise an inode's attributes. */
  240. void
  241. xfs_inode_init(
  242. struct xfs_trans *tp,
  243. const struct xfs_icreate_args *args,
  244. struct xfs_inode *ip)
  245. {
  246. struct xfs_inode *pip = args->pip;
  247. struct inode *dir = pip ? VFS_I(pip) : NULL;
  248. struct xfs_mount *mp = tp->t_mountp;
  249. struct inode *inode = VFS_I(ip);
  250. unsigned int flags;
  251. int times = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG |
  252. XFS_ICHGTIME_ACCESS;
  253. if (args->flags & XFS_ICREATE_TMPFILE)
  254. set_nlink(inode, 0);
  255. else if (S_ISDIR(args->mode))
  256. set_nlink(inode, 2);
  257. else
  258. set_nlink(inode, 1);
  259. inode->i_rdev = args->rdev;
  260. if (!args->idmap || pip == NULL) {
  261. /* creating a tree root, sb rooted, or detached file */
  262. inode->i_uid = GLOBAL_ROOT_UID;
  263. inode->i_gid = GLOBAL_ROOT_GID;
  264. ip->i_projid = 0;
  265. inode->i_mode = args->mode;
  266. } else {
  267. /* creating a child in the directory tree */
  268. if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
  269. inode_fsuid_set(inode, args->idmap);
  270. inode->i_gid = dir->i_gid;
  271. inode->i_mode = args->mode;
  272. } else {
  273. inode_init_owner(args->idmap, inode, dir, args->mode);
  274. }
  275. /*
  276. * If the group ID of the new file does not match the effective
  277. * group ID or one of the supplementary group IDs, the S_ISGID
  278. * bit is cleared (and only if the irix_sgid_inherit
  279. * compatibility variable is set).
  280. */
  281. if (irix_sgid_inherit && (inode->i_mode & S_ISGID) &&
  282. !vfsgid_in_group_p(i_gid_into_vfsgid(args->idmap, inode)))
  283. inode->i_mode &= ~S_ISGID;
  284. ip->i_projid = xfs_get_initial_prid(pip);
  285. }
  286. ip->i_disk_size = 0;
  287. ip->i_df.if_nextents = 0;
  288. ASSERT(ip->i_nblocks == 0);
  289. ip->i_extsize = 0;
  290. ip->i_diflags = 0;
  291. if (xfs_has_v3inodes(mp)) {
  292. inode_set_iversion(inode, 1);
  293. ip->i_cowextsize = 0;
  294. times |= XFS_ICHGTIME_CREATE;
  295. }
  296. xfs_trans_ichgtime(tp, ip, times);
  297. flags = XFS_ILOG_CORE;
  298. switch (args->mode & S_IFMT) {
  299. case S_IFIFO:
  300. case S_IFCHR:
  301. case S_IFBLK:
  302. case S_IFSOCK:
  303. ip->i_df.if_format = XFS_DINODE_FMT_DEV;
  304. flags |= XFS_ILOG_DEV;
  305. break;
  306. case S_IFREG:
  307. case S_IFDIR:
  308. if (pip && (pip->i_diflags & XFS_DIFLAG_ANY))
  309. xfs_inode_inherit_flags(ip, pip);
  310. if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY))
  311. xfs_inode_inherit_flags2(ip, pip);
  312. fallthrough;
  313. case S_IFLNK:
  314. ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
  315. ip->i_df.if_bytes = 0;
  316. ip->i_df.if_data = NULL;
  317. break;
  318. default:
  319. ASSERT(0);
  320. }
  321. if (xfs_icreate_want_attrfork(mp, args)) {
  322. ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
  323. xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
  324. if (!xfs_has_attr(mp)) {
  325. spin_lock(&mp->m_sb_lock);
  326. xfs_add_attr(mp);
  327. spin_unlock(&mp->m_sb_lock);
  328. xfs_log_sb(tp);
  329. }
  330. }
  331. xfs_trans_log_inode(tp, ip, flags);
  332. }
  333. /*
  334. * In-Core Unlinked List Lookups
  335. * =============================
  336. *
  337. * Every inode is supposed to be reachable from some other piece of metadata
  338. * with the exception of the root directory. Inodes with a connection to a
  339. * file descriptor but not linked from anywhere in the on-disk directory tree
  340. * are collectively known as unlinked inodes, though the filesystem itself
  341. * maintains links to these inodes so that on-disk metadata are consistent.
  342. *
  343. * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI
  344. * header contains a number of buckets that point to an inode, and each inode
  345. * record has a pointer to the next inode in the hash chain. This
  346. * singly-linked list causes scaling problems in the iunlink remove function
  347. * because we must walk that list to find the inode that points to the inode
  348. * being removed from the unlinked hash bucket list.
  349. *
  350. * Hence we keep an in-memory double linked list to link each inode on an
  351. * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer
  352. * based lists would require having 64 list heads in the perag, one for each
  353. * list. This is expensive in terms of memory (think millions of AGs) and cache
  354. * misses on lookups. Instead, use the fact that inodes on the unlinked list
  355. * must be referenced at the VFS level to keep them on the list and hence we
  356. * have an existence guarantee for inodes on the unlinked list.
  357. *
  358. * Given we have an existence guarantee, we can use lockless inode cache lookups
  359. * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode
  360. * for the double linked unlinked list, and we don't need any extra locking to
  361. * keep the list safe as all manipulations are done under the AGI buffer lock.
  362. * Keeping the list up to date does not require memory allocation, just finding
  363. * the XFS inode and updating the next/prev unlinked list aginos.
  364. */
  365. /*
  366. * Update the prev pointer of the next agino. Returns -ENOLINK if the inode
  367. * is not in cache.
  368. */
  369. static int
  370. xfs_iunlink_update_backref(
  371. struct xfs_perag *pag,
  372. xfs_agino_t prev_agino,
  373. xfs_agino_t next_agino)
  374. {
  375. struct xfs_inode *ip;
  376. /* No update necessary if we are at the end of the list. */
  377. if (next_agino == NULLAGINO)
  378. return 0;
  379. ip = xfs_iunlink_lookup(pag, next_agino);
  380. if (!ip)
  381. return -ENOLINK;
  382. ip->i_prev_unlinked = prev_agino;
  383. return 0;
  384. }
  385. /*
  386. * Point the AGI unlinked bucket at an inode and log the results. The caller
  387. * is responsible for validating the old value.
  388. */
  389. STATIC int
  390. xfs_iunlink_update_bucket(
  391. struct xfs_trans *tp,
  392. struct xfs_perag *pag,
  393. struct xfs_buf *agibp,
  394. unsigned int bucket_index,
  395. xfs_agino_t new_agino)
  396. {
  397. struct xfs_agi *agi = agibp->b_addr;
  398. xfs_agino_t old_value;
  399. int offset;
  400. ASSERT(xfs_verify_agino_or_null(pag, new_agino));
  401. old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
  402. trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index,
  403. old_value, new_agino);
  404. /*
  405. * We should never find the head of the list already set to the value
  406. * passed in because either we're adding or removing ourselves from the
  407. * head of the list.
  408. */
  409. if (old_value == new_agino) {
  410. xfs_buf_mark_corrupt(agibp);
  411. xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
  412. return -EFSCORRUPTED;
  413. }
  414. agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
  415. offset = offsetof(struct xfs_agi, agi_unlinked) +
  416. (sizeof(xfs_agino_t) * bucket_index);
  417. xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1);
  418. return 0;
  419. }
  420. static int
  421. xfs_iunlink_insert_inode(
  422. struct xfs_trans *tp,
  423. struct xfs_perag *pag,
  424. struct xfs_buf *agibp,
  425. struct xfs_inode *ip)
  426. {
  427. struct xfs_mount *mp = tp->t_mountp;
  428. struct xfs_agi *agi = agibp->b_addr;
  429. xfs_agino_t next_agino;
  430. xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
  431. short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
  432. int error;
  433. /*
  434. * Get the index into the agi hash table for the list this inode will
  435. * go on. Make sure the pointer isn't garbage and that this inode
  436. * isn't already on the list.
  437. */
  438. next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
  439. if (next_agino == agino ||
  440. !xfs_verify_agino_or_null(pag, next_agino)) {
  441. xfs_buf_mark_corrupt(agibp);
  442. xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
  443. return -EFSCORRUPTED;
  444. }
  445. /*
  446. * Update the prev pointer in the next inode to point back to this
  447. * inode.
  448. */
  449. error = xfs_iunlink_update_backref(pag, agino, next_agino);
  450. if (error == -ENOLINK)
  451. error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino);
  452. if (error)
  453. return error;
  454. if (next_agino != NULLAGINO) {
  455. /*
  456. * There is already another inode in the bucket, so point this
  457. * inode to the current head of the list.
  458. */
  459. error = xfs_iunlink_log_inode(tp, ip, pag, next_agino);
  460. if (error)
  461. return error;
  462. ip->i_next_unlinked = next_agino;
  463. }
  464. /* Point the head of the list to point to this inode. */
  465. ip->i_prev_unlinked = NULLAGINO;
  466. return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
  467. }
  468. /*
  469. * This is called when the inode's link count has gone to 0 or we are creating
  470. * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0.
  471. *
  472. * We place the on-disk inode on a list in the AGI. It will be pulled from this
  473. * list when the inode is freed.
  474. */
  475. int
  476. xfs_iunlink(
  477. struct xfs_trans *tp,
  478. struct xfs_inode *ip)
  479. {
  480. struct xfs_mount *mp = tp->t_mountp;
  481. struct xfs_perag *pag;
  482. struct xfs_buf *agibp;
  483. int error;
  484. ASSERT(VFS_I(ip)->i_nlink == 0);
  485. ASSERT(VFS_I(ip)->i_mode != 0);
  486. trace_xfs_iunlink(ip);
  487. pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
  488. /* Get the agi buffer first. It ensures lock ordering on the list. */
  489. error = xfs_read_agi(pag, tp, 0, &agibp);
  490. if (error)
  491. goto out;
  492. error = xfs_iunlink_insert_inode(tp, pag, agibp, ip);
  493. out:
  494. xfs_perag_put(pag);
  495. return error;
  496. }
  497. static int
  498. xfs_iunlink_remove_inode(
  499. struct xfs_trans *tp,
  500. struct xfs_perag *pag,
  501. struct xfs_buf *agibp,
  502. struct xfs_inode *ip)
  503. {
  504. struct xfs_mount *mp = tp->t_mountp;
  505. struct xfs_agi *agi = agibp->b_addr;
  506. xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
  507. xfs_agino_t head_agino;
  508. short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
  509. int error;
  510. trace_xfs_iunlink_remove(ip);
  511. /*
  512. * Get the index into the agi hash table for the list this inode will
  513. * go on. Make sure the head pointer isn't garbage.
  514. */
  515. head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
  516. if (!xfs_verify_agino(pag, head_agino)) {
  517. XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
  518. agi, sizeof(*agi));
  519. xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
  520. return -EFSCORRUPTED;
  521. }
  522. /*
  523. * Set our inode's next_unlinked pointer to NULL and then return
  524. * the old pointer value so that we can update whatever was previous
  525. * to us in the list to point to whatever was next in the list.
  526. */
  527. error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO);
  528. if (error)
  529. return error;
  530. /*
  531. * Update the prev pointer in the next inode to point back to previous
  532. * inode in the chain.
  533. */
  534. error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked,
  535. ip->i_next_unlinked);
  536. if (error == -ENOLINK)
  537. error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked,
  538. ip->i_next_unlinked);
  539. if (error)
  540. return error;
  541. if (head_agino != agino) {
  542. struct xfs_inode *prev_ip;
  543. prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked);
  544. if (!prev_ip) {
  545. xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
  546. return -EFSCORRUPTED;
  547. }
  548. error = xfs_iunlink_log_inode(tp, prev_ip, pag,
  549. ip->i_next_unlinked);
  550. prev_ip->i_next_unlinked = ip->i_next_unlinked;
  551. } else {
  552. /* Point the head of the list to the next unlinked inode. */
  553. error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
  554. ip->i_next_unlinked);
  555. }
  556. ip->i_next_unlinked = NULLAGINO;
  557. ip->i_prev_unlinked = 0;
  558. return error;
  559. }
  560. /*
  561. * Pull the on-disk inode from the AGI unlinked list.
  562. */
  563. int
  564. xfs_iunlink_remove(
  565. struct xfs_trans *tp,
  566. struct xfs_perag *pag,
  567. struct xfs_inode *ip)
  568. {
  569. struct xfs_buf *agibp;
  570. int error;
  571. trace_xfs_iunlink_remove(ip);
  572. /* Get the agi buffer first. It ensures lock ordering on the list. */
  573. error = xfs_read_agi(pag, tp, 0, &agibp);
  574. if (error)
  575. return error;
  576. return xfs_iunlink_remove_inode(tp, pag, agibp, ip);
  577. }
  578. /*
  579. * Decrement the link count on an inode & log the change. If this causes the
  580. * link count to go to zero, move the inode to AGI unlinked list so that it can
  581. * be freed when the last active reference goes away via xfs_inactive().
  582. */
  583. int
  584. xfs_droplink(
  585. struct xfs_trans *tp,
  586. struct xfs_inode *ip)
  587. {
  588. struct inode *inode = VFS_I(ip);
  589. xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
  590. if (inode->i_nlink == 0) {
  591. xfs_info_ratelimited(tp->t_mountp,
  592. "Inode 0x%llx link count dropped below zero. Pinning link count.",
  593. ip->i_ino);
  594. set_nlink(inode, XFS_NLINK_PINNED);
  595. }
  596. if (inode->i_nlink != XFS_NLINK_PINNED)
  597. drop_nlink(inode);
  598. xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  599. if (inode->i_nlink)
  600. return 0;
  601. return xfs_iunlink(tp, ip);
  602. }
  603. /*
  604. * Increment the link count on an inode & log the change.
  605. */
  606. void
  607. xfs_bumplink(
  608. struct xfs_trans *tp,
  609. struct xfs_inode *ip)
  610. {
  611. struct inode *inode = VFS_I(ip);
  612. xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
  613. if (inode->i_nlink == XFS_NLINK_PINNED - 1)
  614. xfs_info_ratelimited(tp->t_mountp,
  615. "Inode 0x%llx link count exceeded maximum. Pinning link count.",
  616. ip->i_ino);
  617. if (inode->i_nlink != XFS_NLINK_PINNED)
  618. inc_nlink(inode);
  619. xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  620. }
  621. /* Free an inode in the ondisk index and zero it out. */
  622. int
  623. xfs_inode_uninit(
  624. struct xfs_trans *tp,
  625. struct xfs_perag *pag,
  626. struct xfs_inode *ip,
  627. struct xfs_icluster *xic)
  628. {
  629. struct xfs_mount *mp = ip->i_mount;
  630. int error;
  631. /*
  632. * Free the inode first so that we guarantee that the AGI lock is going
  633. * to be taken before we remove the inode from the unlinked list. This
  634. * makes the AGI lock -> unlinked list modification order the same as
  635. * used in O_TMPFILE creation.
  636. */
  637. error = xfs_difree(tp, pag, ip->i_ino, xic);
  638. if (error)
  639. return error;
  640. error = xfs_iunlink_remove(tp, pag, ip);
  641. if (error)
  642. return error;
  643. /*
  644. * Free any local-format data sitting around before we reset the
  645. * data fork to extents format. Note that the attr fork data has
  646. * already been freed by xfs_attr_inactive.
  647. */
  648. if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
  649. kfree(ip->i_df.if_data);
  650. ip->i_df.if_data = NULL;
  651. ip->i_df.if_bytes = 0;
  652. }
  653. VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
  654. ip->i_diflags = 0;
  655. ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
  656. ip->i_forkoff = 0; /* mark the attr fork not in use */
  657. ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
  658. /*
  659. * Bump the generation count so no one will be confused
  660. * by reincarnations of this inode.
  661. */
  662. VFS_I(ip)->i_generation++;
  663. xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  664. return 0;
  665. }