xfs_inode_item.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  4. * All Rights Reserved.
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_format.h"
  9. #include "xfs_log_format.h"
  10. #include "xfs_trans_resv.h"
  11. #include "xfs_mount.h"
  12. #include "xfs_inode.h"
  13. #include "xfs_trans.h"
  14. #include "xfs_inode_item.h"
  15. #include "xfs_error.h"
  16. #include "xfs_trace.h"
  17. #include "xfs_trans_priv.h"
  18. #include "xfs_buf_item.h"
  19. #include "xfs_log.h"
  20. #include <linux/iversion.h>
  21. kmem_zone_t *xfs_ili_zone; /* inode log item zone */
  22. static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
  23. {
  24. return container_of(lip, struct xfs_inode_log_item, ili_item);
  25. }
  26. STATIC void
  27. xfs_inode_item_data_fork_size(
  28. struct xfs_inode_log_item *iip,
  29. int *nvecs,
  30. int *nbytes)
  31. {
  32. struct xfs_inode *ip = iip->ili_inode;
  33. switch (ip->i_d.di_format) {
  34. case XFS_DINODE_FMT_EXTENTS:
  35. if ((iip->ili_fields & XFS_ILOG_DEXT) &&
  36. ip->i_d.di_nextents > 0 &&
  37. ip->i_df.if_bytes > 0) {
  38. /* worst case, doesn't subtract delalloc extents */
  39. *nbytes += XFS_IFORK_DSIZE(ip);
  40. *nvecs += 1;
  41. }
  42. break;
  43. case XFS_DINODE_FMT_BTREE:
  44. if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
  45. ip->i_df.if_broot_bytes > 0) {
  46. *nbytes += ip->i_df.if_broot_bytes;
  47. *nvecs += 1;
  48. }
  49. break;
  50. case XFS_DINODE_FMT_LOCAL:
  51. if ((iip->ili_fields & XFS_ILOG_DDATA) &&
  52. ip->i_df.if_bytes > 0) {
  53. *nbytes += roundup(ip->i_df.if_bytes, 4);
  54. *nvecs += 1;
  55. }
  56. break;
  57. case XFS_DINODE_FMT_DEV:
  58. break;
  59. default:
  60. ASSERT(0);
  61. break;
  62. }
  63. }
  64. STATIC void
  65. xfs_inode_item_attr_fork_size(
  66. struct xfs_inode_log_item *iip,
  67. int *nvecs,
  68. int *nbytes)
  69. {
  70. struct xfs_inode *ip = iip->ili_inode;
  71. switch (ip->i_d.di_aformat) {
  72. case XFS_DINODE_FMT_EXTENTS:
  73. if ((iip->ili_fields & XFS_ILOG_AEXT) &&
  74. ip->i_d.di_anextents > 0 &&
  75. ip->i_afp->if_bytes > 0) {
  76. /* worst case, doesn't subtract unused space */
  77. *nbytes += XFS_IFORK_ASIZE(ip);
  78. *nvecs += 1;
  79. }
  80. break;
  81. case XFS_DINODE_FMT_BTREE:
  82. if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
  83. ip->i_afp->if_broot_bytes > 0) {
  84. *nbytes += ip->i_afp->if_broot_bytes;
  85. *nvecs += 1;
  86. }
  87. break;
  88. case XFS_DINODE_FMT_LOCAL:
  89. if ((iip->ili_fields & XFS_ILOG_ADATA) &&
  90. ip->i_afp->if_bytes > 0) {
  91. *nbytes += roundup(ip->i_afp->if_bytes, 4);
  92. *nvecs += 1;
  93. }
  94. break;
  95. default:
  96. ASSERT(0);
  97. break;
  98. }
  99. }
  100. /*
  101. * This returns the number of iovecs needed to log the given inode item.
  102. *
  103. * We need one iovec for the inode log format structure, one for the
  104. * inode core, and possibly one for the inode data/extents/b-tree root
  105. * and one for the inode attribute data/extents/b-tree root.
  106. */
  107. STATIC void
  108. xfs_inode_item_size(
  109. struct xfs_log_item *lip,
  110. int *nvecs,
  111. int *nbytes)
  112. {
  113. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  114. struct xfs_inode *ip = iip->ili_inode;
  115. *nvecs += 2;
  116. *nbytes += sizeof(struct xfs_inode_log_format) +
  117. xfs_log_dinode_size(ip->i_d.di_version);
  118. xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
  119. if (XFS_IFORK_Q(ip))
  120. xfs_inode_item_attr_fork_size(iip, nvecs, nbytes);
  121. }
  122. STATIC void
  123. xfs_inode_item_format_data_fork(
  124. struct xfs_inode_log_item *iip,
  125. struct xfs_inode_log_format *ilf,
  126. struct xfs_log_vec *lv,
  127. struct xfs_log_iovec **vecp)
  128. {
  129. struct xfs_inode *ip = iip->ili_inode;
  130. size_t data_bytes;
  131. switch (ip->i_d.di_format) {
  132. case XFS_DINODE_FMT_EXTENTS:
  133. iip->ili_fields &=
  134. ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
  135. if ((iip->ili_fields & XFS_ILOG_DEXT) &&
  136. ip->i_d.di_nextents > 0 &&
  137. ip->i_df.if_bytes > 0) {
  138. struct xfs_bmbt_rec *p;
  139. ASSERT(xfs_iext_count(&ip->i_df) > 0);
  140. p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
  141. data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
  142. xlog_finish_iovec(lv, *vecp, data_bytes);
  143. ASSERT(data_bytes <= ip->i_df.if_bytes);
  144. ilf->ilf_dsize = data_bytes;
  145. ilf->ilf_size++;
  146. } else {
  147. iip->ili_fields &= ~XFS_ILOG_DEXT;
  148. }
  149. break;
  150. case XFS_DINODE_FMT_BTREE:
  151. iip->ili_fields &=
  152. ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV);
  153. if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
  154. ip->i_df.if_broot_bytes > 0) {
  155. ASSERT(ip->i_df.if_broot != NULL);
  156. xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT,
  157. ip->i_df.if_broot,
  158. ip->i_df.if_broot_bytes);
  159. ilf->ilf_dsize = ip->i_df.if_broot_bytes;
  160. ilf->ilf_size++;
  161. } else {
  162. ASSERT(!(iip->ili_fields &
  163. XFS_ILOG_DBROOT));
  164. iip->ili_fields &= ~XFS_ILOG_DBROOT;
  165. }
  166. break;
  167. case XFS_DINODE_FMT_LOCAL:
  168. iip->ili_fields &=
  169. ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
  170. if ((iip->ili_fields & XFS_ILOG_DDATA) &&
  171. ip->i_df.if_bytes > 0) {
  172. /*
  173. * Round i_bytes up to a word boundary.
  174. * The underlying memory is guaranteed to
  175. * to be there by xfs_idata_realloc().
  176. */
  177. data_bytes = roundup(ip->i_df.if_bytes, 4);
  178. ASSERT(ip->i_df.if_u1.if_data != NULL);
  179. ASSERT(ip->i_d.di_size > 0);
  180. xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
  181. ip->i_df.if_u1.if_data, data_bytes);
  182. ilf->ilf_dsize = (unsigned)data_bytes;
  183. ilf->ilf_size++;
  184. } else {
  185. iip->ili_fields &= ~XFS_ILOG_DDATA;
  186. }
  187. break;
  188. case XFS_DINODE_FMT_DEV:
  189. iip->ili_fields &=
  190. ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT);
  191. if (iip->ili_fields & XFS_ILOG_DEV)
  192. ilf->ilf_u.ilfu_rdev = sysv_encode_dev(VFS_I(ip)->i_rdev);
  193. break;
  194. default:
  195. ASSERT(0);
  196. break;
  197. }
  198. }
  199. STATIC void
  200. xfs_inode_item_format_attr_fork(
  201. struct xfs_inode_log_item *iip,
  202. struct xfs_inode_log_format *ilf,
  203. struct xfs_log_vec *lv,
  204. struct xfs_log_iovec **vecp)
  205. {
  206. struct xfs_inode *ip = iip->ili_inode;
  207. size_t data_bytes;
  208. switch (ip->i_d.di_aformat) {
  209. case XFS_DINODE_FMT_EXTENTS:
  210. iip->ili_fields &=
  211. ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
  212. if ((iip->ili_fields & XFS_ILOG_AEXT) &&
  213. ip->i_d.di_anextents > 0 &&
  214. ip->i_afp->if_bytes > 0) {
  215. struct xfs_bmbt_rec *p;
  216. ASSERT(xfs_iext_count(ip->i_afp) ==
  217. ip->i_d.di_anextents);
  218. p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT);
  219. data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK);
  220. xlog_finish_iovec(lv, *vecp, data_bytes);
  221. ilf->ilf_asize = data_bytes;
  222. ilf->ilf_size++;
  223. } else {
  224. iip->ili_fields &= ~XFS_ILOG_AEXT;
  225. }
  226. break;
  227. case XFS_DINODE_FMT_BTREE:
  228. iip->ili_fields &=
  229. ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
  230. if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
  231. ip->i_afp->if_broot_bytes > 0) {
  232. ASSERT(ip->i_afp->if_broot != NULL);
  233. xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT,
  234. ip->i_afp->if_broot,
  235. ip->i_afp->if_broot_bytes);
  236. ilf->ilf_asize = ip->i_afp->if_broot_bytes;
  237. ilf->ilf_size++;
  238. } else {
  239. iip->ili_fields &= ~XFS_ILOG_ABROOT;
  240. }
  241. break;
  242. case XFS_DINODE_FMT_LOCAL:
  243. iip->ili_fields &=
  244. ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
  245. if ((iip->ili_fields & XFS_ILOG_ADATA) &&
  246. ip->i_afp->if_bytes > 0) {
  247. /*
  248. * Round i_bytes up to a word boundary.
  249. * The underlying memory is guaranteed to
  250. * to be there by xfs_idata_realloc().
  251. */
  252. data_bytes = roundup(ip->i_afp->if_bytes, 4);
  253. ASSERT(ip->i_afp->if_u1.if_data != NULL);
  254. xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
  255. ip->i_afp->if_u1.if_data,
  256. data_bytes);
  257. ilf->ilf_asize = (unsigned)data_bytes;
  258. ilf->ilf_size++;
  259. } else {
  260. iip->ili_fields &= ~XFS_ILOG_ADATA;
  261. }
  262. break;
  263. default:
  264. ASSERT(0);
  265. break;
  266. }
  267. }
  268. static void
  269. xfs_inode_to_log_dinode(
  270. struct xfs_inode *ip,
  271. struct xfs_log_dinode *to,
  272. xfs_lsn_t lsn)
  273. {
  274. struct xfs_icdinode *from = &ip->i_d;
  275. struct inode *inode = VFS_I(ip);
  276. to->di_magic = XFS_DINODE_MAGIC;
  277. to->di_version = from->di_version;
  278. to->di_format = from->di_format;
  279. to->di_uid = from->di_uid;
  280. to->di_gid = from->di_gid;
  281. to->di_projid_lo = from->di_projid_lo;
  282. to->di_projid_hi = from->di_projid_hi;
  283. memset(to->di_pad, 0, sizeof(to->di_pad));
  284. memset(to->di_pad3, 0, sizeof(to->di_pad3));
  285. to->di_atime.t_sec = inode->i_atime.tv_sec;
  286. to->di_atime.t_nsec = inode->i_atime.tv_nsec;
  287. to->di_mtime.t_sec = inode->i_mtime.tv_sec;
  288. to->di_mtime.t_nsec = inode->i_mtime.tv_nsec;
  289. to->di_ctime.t_sec = inode->i_ctime.tv_sec;
  290. to->di_ctime.t_nsec = inode->i_ctime.tv_nsec;
  291. to->di_nlink = inode->i_nlink;
  292. to->di_gen = inode->i_generation;
  293. to->di_mode = inode->i_mode;
  294. to->di_size = from->di_size;
  295. to->di_nblocks = from->di_nblocks;
  296. to->di_extsize = from->di_extsize;
  297. to->di_nextents = from->di_nextents;
  298. to->di_anextents = from->di_anextents;
  299. to->di_forkoff = from->di_forkoff;
  300. to->di_aformat = from->di_aformat;
  301. to->di_dmevmask = from->di_dmevmask;
  302. to->di_dmstate = from->di_dmstate;
  303. to->di_flags = from->di_flags;
  304. /* log a dummy value to ensure log structure is fully initialised */
  305. to->di_next_unlinked = NULLAGINO;
  306. if (from->di_version == 3) {
  307. to->di_changecount = inode_peek_iversion(inode);
  308. to->di_crtime.t_sec = from->di_crtime.t_sec;
  309. to->di_crtime.t_nsec = from->di_crtime.t_nsec;
  310. to->di_flags2 = from->di_flags2;
  311. to->di_cowextsize = from->di_cowextsize;
  312. to->di_ino = ip->i_ino;
  313. to->di_lsn = lsn;
  314. memset(to->di_pad2, 0, sizeof(to->di_pad2));
  315. uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
  316. to->di_flushiter = 0;
  317. } else {
  318. to->di_flushiter = from->di_flushiter;
  319. }
  320. }
  321. /*
  322. * Format the inode core. Current timestamp data is only in the VFS inode
  323. * fields, so we need to grab them from there. Hence rather than just copying
  324. * the XFS inode core structure, format the fields directly into the iovec.
  325. */
  326. static void
  327. xfs_inode_item_format_core(
  328. struct xfs_inode *ip,
  329. struct xfs_log_vec *lv,
  330. struct xfs_log_iovec **vecp)
  331. {
  332. struct xfs_log_dinode *dic;
  333. dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE);
  334. xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn);
  335. xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_d.di_version));
  336. }
  337. /*
  338. * This is called to fill in the vector of log iovecs for the given inode
  339. * log item. It fills the first item with an inode log format structure,
  340. * the second with the on-disk inode structure, and a possible third and/or
  341. * fourth with the inode data/extents/b-tree root and inode attributes
  342. * data/extents/b-tree root.
  343. *
  344. * Note: Always use the 64 bit inode log format structure so we don't
  345. * leave an uninitialised hole in the format item on 64 bit systems. Log
  346. * recovery on 32 bit systems handles this just fine, so there's no reason
  347. * for not using an initialising the properly padded structure all the time.
  348. */
  349. STATIC void
  350. xfs_inode_item_format(
  351. struct xfs_log_item *lip,
  352. struct xfs_log_vec *lv)
  353. {
  354. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  355. struct xfs_inode *ip = iip->ili_inode;
  356. struct xfs_log_iovec *vecp = NULL;
  357. struct xfs_inode_log_format *ilf;
  358. ASSERT(ip->i_d.di_version > 1);
  359. ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT);
  360. ilf->ilf_type = XFS_LI_INODE;
  361. ilf->ilf_ino = ip->i_ino;
  362. ilf->ilf_blkno = ip->i_imap.im_blkno;
  363. ilf->ilf_len = ip->i_imap.im_len;
  364. ilf->ilf_boffset = ip->i_imap.im_boffset;
  365. ilf->ilf_fields = XFS_ILOG_CORE;
  366. ilf->ilf_size = 2; /* format + core */
  367. /*
  368. * make sure we don't leak uninitialised data into the log in the case
  369. * when we don't log every field in the inode.
  370. */
  371. ilf->ilf_dsize = 0;
  372. ilf->ilf_asize = 0;
  373. ilf->ilf_pad = 0;
  374. memset(&ilf->ilf_u, 0, sizeof(ilf->ilf_u));
  375. xlog_finish_iovec(lv, vecp, sizeof(*ilf));
  376. xfs_inode_item_format_core(ip, lv, &vecp);
  377. xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
  378. if (XFS_IFORK_Q(ip)) {
  379. xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);
  380. } else {
  381. iip->ili_fields &=
  382. ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
  383. }
  384. /* update the format with the exact fields we actually logged */
  385. ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
  386. }
  387. /*
  388. * This is called to pin the inode associated with the inode log
  389. * item in memory so it cannot be written out.
  390. */
  391. STATIC void
  392. xfs_inode_item_pin(
  393. struct xfs_log_item *lip)
  394. {
  395. struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
  396. ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  397. trace_xfs_inode_pin(ip, _RET_IP_);
  398. atomic_inc(&ip->i_pincount);
  399. }
  400. /*
  401. * This is called to unpin the inode associated with the inode log
  402. * item which was previously pinned with a call to xfs_inode_item_pin().
  403. *
  404. * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
  405. */
  406. STATIC void
  407. xfs_inode_item_unpin(
  408. struct xfs_log_item *lip,
  409. int remove)
  410. {
  411. struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
  412. trace_xfs_inode_unpin(ip, _RET_IP_);
  413. ASSERT(atomic_read(&ip->i_pincount) > 0);
  414. if (atomic_dec_and_test(&ip->i_pincount))
  415. wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
  416. }
  417. /*
  418. * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
  419. * have been failed during writeback
  420. *
  421. * This informs the AIL that the inode is already flush locked on the next push,
  422. * and acquires a hold on the buffer to ensure that it isn't reclaimed before
  423. * dirty data makes it to disk.
  424. */
  425. STATIC void
  426. xfs_inode_item_error(
  427. struct xfs_log_item *lip,
  428. struct xfs_buf *bp)
  429. {
  430. ASSERT(xfs_isiflocked(INODE_ITEM(lip)->ili_inode));
  431. xfs_set_li_failed(lip, bp);
  432. }
  433. STATIC uint
  434. xfs_inode_item_push(
  435. struct xfs_log_item *lip,
  436. struct list_head *buffer_list)
  437. __releases(&lip->li_ailp->ail_lock)
  438. __acquires(&lip->li_ailp->ail_lock)
  439. {
  440. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  441. struct xfs_inode *ip = iip->ili_inode;
  442. struct xfs_buf *bp = lip->li_buf;
  443. uint rval = XFS_ITEM_SUCCESS;
  444. int error;
  445. if (xfs_ipincount(ip) > 0)
  446. return XFS_ITEM_PINNED;
  447. /*
  448. * The buffer containing this item failed to be written back
  449. * previously. Resubmit the buffer for IO.
  450. */
  451. if (test_bit(XFS_LI_FAILED, &lip->li_flags)) {
  452. if (!xfs_buf_trylock(bp))
  453. return XFS_ITEM_LOCKED;
  454. if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list))
  455. rval = XFS_ITEM_FLUSHING;
  456. xfs_buf_unlock(bp);
  457. return rval;
  458. }
  459. if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
  460. return XFS_ITEM_LOCKED;
  461. /*
  462. * Re-check the pincount now that we stabilized the value by
  463. * taking the ilock.
  464. */
  465. if (xfs_ipincount(ip) > 0) {
  466. rval = XFS_ITEM_PINNED;
  467. goto out_unlock;
  468. }
  469. /*
  470. * Stale inode items should force out the iclog.
  471. */
  472. if (ip->i_flags & XFS_ISTALE) {
  473. rval = XFS_ITEM_PINNED;
  474. goto out_unlock;
  475. }
  476. /*
  477. * Someone else is already flushing the inode. Nothing we can do
  478. * here but wait for the flush to finish and remove the item from
  479. * the AIL.
  480. */
  481. if (!xfs_iflock_nowait(ip)) {
  482. rval = XFS_ITEM_FLUSHING;
  483. goto out_unlock;
  484. }
  485. ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
  486. ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
  487. spin_unlock(&lip->li_ailp->ail_lock);
  488. error = xfs_iflush(ip, &bp);
  489. if (!error) {
  490. if (!xfs_buf_delwri_queue(bp, buffer_list))
  491. rval = XFS_ITEM_FLUSHING;
  492. xfs_buf_relse(bp);
  493. }
  494. spin_lock(&lip->li_ailp->ail_lock);
  495. out_unlock:
  496. xfs_iunlock(ip, XFS_ILOCK_SHARED);
  497. return rval;
  498. }
  499. /*
  500. * Unlock the inode associated with the inode log item.
  501. */
  502. STATIC void
  503. xfs_inode_item_unlock(
  504. struct xfs_log_item *lip)
  505. {
  506. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  507. struct xfs_inode *ip = iip->ili_inode;
  508. unsigned short lock_flags;
  509. ASSERT(ip->i_itemp != NULL);
  510. ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  511. lock_flags = iip->ili_lock_flags;
  512. iip->ili_lock_flags = 0;
  513. if (lock_flags)
  514. xfs_iunlock(ip, lock_flags);
  515. }
  516. /*
  517. * This is called to find out where the oldest active copy of the inode log
  518. * item in the on disk log resides now that the last log write of it completed
  519. * at the given lsn. Since we always re-log all dirty data in an inode, the
  520. * latest copy in the on disk log is the only one that matters. Therefore,
  521. * simply return the given lsn.
  522. *
  523. * If the inode has been marked stale because the cluster is being freed, we
  524. * don't want to (re-)insert this inode into the AIL. There is a race condition
  525. * where the cluster buffer may be unpinned before the inode is inserted into
  526. * the AIL during transaction committed processing. If the buffer is unpinned
  527. * before the inode item has been committed and inserted, then it is possible
  528. * for the buffer to be written and IO completes before the inode is inserted
  529. * into the AIL. In that case, we'd be inserting a clean, stale inode into the
  530. * AIL which will never get removed. It will, however, get reclaimed which
  531. * triggers an assert in xfs_inode_free() complaining about freein an inode
  532. * still in the AIL.
  533. *
  534. * To avoid this, just unpin the inode directly and return a LSN of -1 so the
  535. * transaction committed code knows that it does not need to do any further
  536. * processing on the item.
  537. */
  538. STATIC xfs_lsn_t
  539. xfs_inode_item_committed(
  540. struct xfs_log_item *lip,
  541. xfs_lsn_t lsn)
  542. {
  543. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  544. struct xfs_inode *ip = iip->ili_inode;
  545. if (xfs_iflags_test(ip, XFS_ISTALE)) {
  546. xfs_inode_item_unpin(lip, 0);
  547. return -1;
  548. }
  549. return lsn;
  550. }
  551. STATIC void
  552. xfs_inode_item_committing(
  553. struct xfs_log_item *lip,
  554. xfs_lsn_t lsn)
  555. {
  556. INODE_ITEM(lip)->ili_last_lsn = lsn;
  557. }
  558. /*
  559. * This is the ops vector shared by all buf log items.
  560. */
  561. static const struct xfs_item_ops xfs_inode_item_ops = {
  562. .iop_size = xfs_inode_item_size,
  563. .iop_format = xfs_inode_item_format,
  564. .iop_pin = xfs_inode_item_pin,
  565. .iop_unpin = xfs_inode_item_unpin,
  566. .iop_unlock = xfs_inode_item_unlock,
  567. .iop_committed = xfs_inode_item_committed,
  568. .iop_push = xfs_inode_item_push,
  569. .iop_committing = xfs_inode_item_committing,
  570. .iop_error = xfs_inode_item_error
  571. };
  572. /*
  573. * Initialize the inode log item for a newly allocated (in-core) inode.
  574. */
  575. void
  576. xfs_inode_item_init(
  577. struct xfs_inode *ip,
  578. struct xfs_mount *mp)
  579. {
  580. struct xfs_inode_log_item *iip;
  581. ASSERT(ip->i_itemp == NULL);
  582. iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
  583. iip->ili_inode = ip;
  584. xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
  585. &xfs_inode_item_ops);
  586. }
  587. /*
  588. * Free the inode log item and any memory hanging off of it.
  589. */
  590. void
  591. xfs_inode_item_destroy(
  592. xfs_inode_t *ip)
  593. {
  594. kmem_free(ip->i_itemp->ili_item.li_lv_shadow);
  595. kmem_zone_free(xfs_ili_zone, ip->i_itemp);
  596. }
  597. /*
  598. * This is the inode flushing I/O completion routine. It is called
  599. * from interrupt level when the buffer containing the inode is
  600. * flushed to disk. It is responsible for removing the inode item
  601. * from the AIL if it has not been re-logged, and unlocking the inode's
  602. * flush lock.
  603. *
  604. * To reduce AIL lock traffic as much as possible, we scan the buffer log item
  605. * list for other inodes that will run this function. We remove them from the
  606. * buffer list so we can process all the inode IO completions in one AIL lock
  607. * traversal.
  608. */
  609. void
  610. xfs_iflush_done(
  611. struct xfs_buf *bp,
  612. struct xfs_log_item *lip)
  613. {
  614. struct xfs_inode_log_item *iip;
  615. struct xfs_log_item *blip, *n;
  616. struct xfs_ail *ailp = lip->li_ailp;
  617. int need_ail = 0;
  618. LIST_HEAD(tmp);
  619. /*
  620. * Scan the buffer IO completions for other inodes being completed and
  621. * attach them to the current inode log item.
  622. */
  623. list_add_tail(&lip->li_bio_list, &tmp);
  624. list_for_each_entry_safe(blip, n, &bp->b_li_list, li_bio_list) {
  625. if (lip->li_cb != xfs_iflush_done)
  626. continue;
  627. list_move_tail(&blip->li_bio_list, &tmp);
  628. /*
  629. * while we have the item, do the unlocked check for needing
  630. * the AIL lock.
  631. */
  632. iip = INODE_ITEM(blip);
  633. if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
  634. test_bit(XFS_LI_FAILED, &blip->li_flags))
  635. need_ail++;
  636. }
  637. /* make sure we capture the state of the initial inode. */
  638. iip = INODE_ITEM(lip);
  639. if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) ||
  640. test_bit(XFS_LI_FAILED, &lip->li_flags))
  641. need_ail++;
  642. /*
  643. * We only want to pull the item from the AIL if it is
  644. * actually there and its location in the log has not
  645. * changed since we started the flush. Thus, we only bother
  646. * if the ili_logged flag is set and the inode's lsn has not
  647. * changed. First we check the lsn outside
  648. * the lock since it's cheaper, and then we recheck while
  649. * holding the lock before removing the inode from the AIL.
  650. */
  651. if (need_ail) {
  652. bool mlip_changed = false;
  653. /* this is an opencoded batch version of xfs_trans_ail_delete */
  654. spin_lock(&ailp->ail_lock);
  655. list_for_each_entry(blip, &tmp, li_bio_list) {
  656. if (INODE_ITEM(blip)->ili_logged &&
  657. blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
  658. mlip_changed |= xfs_ail_delete_one(ailp, blip);
  659. else {
  660. xfs_clear_li_failed(blip);
  661. }
  662. }
  663. if (mlip_changed) {
  664. if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
  665. xlog_assign_tail_lsn_locked(ailp->ail_mount);
  666. if (list_empty(&ailp->ail_head))
  667. wake_up_all(&ailp->ail_empty);
  668. }
  669. spin_unlock(&ailp->ail_lock);
  670. if (mlip_changed)
  671. xfs_log_space_wake(ailp->ail_mount);
  672. }
  673. /*
  674. * clean up and unlock the flush lock now we are done. We can clear the
  675. * ili_last_fields bits now that we know that the data corresponding to
  676. * them is safely on disk.
  677. */
  678. list_for_each_entry_safe(blip, n, &tmp, li_bio_list) {
  679. list_del_init(&blip->li_bio_list);
  680. iip = INODE_ITEM(blip);
  681. iip->ili_logged = 0;
  682. iip->ili_last_fields = 0;
  683. xfs_ifunlock(iip->ili_inode);
  684. }
  685. list_del(&tmp);
  686. }
  687. /*
  688. * This is the inode flushing abort routine. It is called from xfs_iflush when
  689. * the filesystem is shutting down to clean up the inode state. It is
  690. * responsible for removing the inode item from the AIL if it has not been
  691. * re-logged, and unlocking the inode's flush lock.
  692. */
  693. void
  694. xfs_iflush_abort(
  695. xfs_inode_t *ip,
  696. bool stale)
  697. {
  698. xfs_inode_log_item_t *iip = ip->i_itemp;
  699. if (iip) {
  700. if (test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags)) {
  701. xfs_trans_ail_remove(&iip->ili_item,
  702. stale ? SHUTDOWN_LOG_IO_ERROR :
  703. SHUTDOWN_CORRUPT_INCORE);
  704. }
  705. iip->ili_logged = 0;
  706. /*
  707. * Clear the ili_last_fields bits now that we know that the
  708. * data corresponding to them is safely on disk.
  709. */
  710. iip->ili_last_fields = 0;
  711. /*
  712. * Clear the inode logging fields so no more flushes are
  713. * attempted.
  714. */
  715. iip->ili_fields = 0;
  716. iip->ili_fsync_fields = 0;
  717. }
  718. /*
  719. * Release the inode's flush lock since we're done with it.
  720. */
  721. xfs_ifunlock(ip);
  722. }
  723. void
  724. xfs_istale_done(
  725. struct xfs_buf *bp,
  726. struct xfs_log_item *lip)
  727. {
  728. xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true);
  729. }
  730. /*
  731. * convert an xfs_inode_log_format struct from the old 32 bit version
  732. * (which can have different field alignments) to the native 64 bit version
  733. */
  734. int
  735. xfs_inode_item_format_convert(
  736. struct xfs_log_iovec *buf,
  737. struct xfs_inode_log_format *in_f)
  738. {
  739. struct xfs_inode_log_format_32 *in_f32 = buf->i_addr;
  740. if (buf->i_len != sizeof(*in_f32))
  741. return -EFSCORRUPTED;
  742. in_f->ilf_type = in_f32->ilf_type;
  743. in_f->ilf_size = in_f32->ilf_size;
  744. in_f->ilf_fields = in_f32->ilf_fields;
  745. in_f->ilf_asize = in_f32->ilf_asize;
  746. in_f->ilf_dsize = in_f32->ilf_dsize;
  747. in_f->ilf_ino = in_f32->ilf_ino;
  748. memcpy(&in_f->ilf_u, &in_f32->ilf_u, sizeof(in_f->ilf_u));
  749. in_f->ilf_blkno = in_f32->ilf_blkno;
  750. in_f->ilf_len = in_f32->ilf_len;
  751. in_f->ilf_boffset = in_f32->ilf_boffset;
  752. return 0;
  753. }