xfs_refcount_item.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * Copyright (C) 2016 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <darrick.wong@oracle.com>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_format.h"
  9. #include "xfs_log_format.h"
  10. #include "xfs_trans_resv.h"
  11. #include "xfs_bit.h"
  12. #include "xfs_shared.h"
  13. #include "xfs_mount.h"
  14. #include "xfs_defer.h"
  15. #include "xfs_trans.h"
  16. #include "xfs_trans_priv.h"
  17. #include "xfs_buf_item.h"
  18. #include "xfs_refcount_item.h"
  19. #include "xfs_log.h"
  20. #include "xfs_refcount.h"
  21. kmem_zone_t *xfs_cui_zone;
  22. kmem_zone_t *xfs_cud_zone;
  23. static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip)
  24. {
  25. return container_of(lip, struct xfs_cui_log_item, cui_item);
  26. }
  27. void
  28. xfs_cui_item_free(
  29. struct xfs_cui_log_item *cuip)
  30. {
  31. if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS)
  32. kmem_free(cuip);
  33. else
  34. kmem_zone_free(xfs_cui_zone, cuip);
  35. }
  36. /*
  37. * Freeing the CUI requires that we remove it from the AIL if it has already
  38. * been placed there. However, the CUI may not yet have been placed in the AIL
  39. * when called by xfs_cui_release() from CUD processing due to the ordering of
  40. * committed vs unpin operations in bulk insert operations. Hence the reference
  41. * count to ensure only the last caller frees the CUI.
  42. */
  43. void
  44. xfs_cui_release(
  45. struct xfs_cui_log_item *cuip)
  46. {
  47. ASSERT(atomic_read(&cuip->cui_refcount) > 0);
  48. if (atomic_dec_and_test(&cuip->cui_refcount)) {
  49. xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR);
  50. xfs_cui_item_free(cuip);
  51. }
  52. }
  53. STATIC void
  54. xfs_cui_item_size(
  55. struct xfs_log_item *lip,
  56. int *nvecs,
  57. int *nbytes)
  58. {
  59. struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
  60. *nvecs += 1;
  61. *nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents);
  62. }
  63. /*
  64. * This is called to fill in the vector of log iovecs for the
  65. * given cui log item. We use only 1 iovec, and we point that
  66. * at the cui_log_format structure embedded in the cui item.
  67. * It is at this point that we assert that all of the extent
  68. * slots in the cui item have been filled.
  69. */
  70. STATIC void
  71. xfs_cui_item_format(
  72. struct xfs_log_item *lip,
  73. struct xfs_log_vec *lv)
  74. {
  75. struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
  76. struct xfs_log_iovec *vecp = NULL;
  77. ASSERT(atomic_read(&cuip->cui_next_extent) ==
  78. cuip->cui_format.cui_nextents);
  79. cuip->cui_format.cui_type = XFS_LI_CUI;
  80. cuip->cui_format.cui_size = 1;
  81. xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format,
  82. xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents));
  83. }
  84. /*
  85. * Pinning has no meaning for an cui item, so just return.
  86. */
  87. STATIC void
  88. xfs_cui_item_pin(
  89. struct xfs_log_item *lip)
  90. {
  91. }
  92. /*
  93. * The unpin operation is the last place an CUI is manipulated in the log. It is
  94. * either inserted in the AIL or aborted in the event of a log I/O error. In
  95. * either case, the CUI transaction has been successfully committed to make it
  96. * this far. Therefore, we expect whoever committed the CUI to either construct
  97. * and commit the CUD or drop the CUD's reference in the event of error. Simply
  98. * drop the log's CUI reference now that the log is done with it.
  99. */
  100. STATIC void
  101. xfs_cui_item_unpin(
  102. struct xfs_log_item *lip,
  103. int remove)
  104. {
  105. struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
  106. xfs_cui_release(cuip);
  107. }
  108. /*
  109. * CUI items have no locking or pushing. However, since CUIs are pulled from
  110. * the AIL when their corresponding CUDs are committed to disk, their situation
  111. * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
  112. * will eventually flush the log. This should help in getting the CUI out of
  113. * the AIL.
  114. */
  115. STATIC uint
  116. xfs_cui_item_push(
  117. struct xfs_log_item *lip,
  118. struct list_head *buffer_list)
  119. {
  120. return XFS_ITEM_PINNED;
  121. }
  122. /*
  123. * The CUI has been either committed or aborted if the transaction has been
  124. * cancelled. If the transaction was cancelled, an CUD isn't going to be
  125. * constructed and thus we free the CUI here directly.
  126. */
  127. STATIC void
  128. xfs_cui_item_unlock(
  129. struct xfs_log_item *lip)
  130. {
  131. if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
  132. xfs_cui_release(CUI_ITEM(lip));
  133. }
  134. /*
  135. * The CUI is logged only once and cannot be moved in the log, so simply return
  136. * the lsn at which it's been logged.
  137. */
  138. STATIC xfs_lsn_t
  139. xfs_cui_item_committed(
  140. struct xfs_log_item *lip,
  141. xfs_lsn_t lsn)
  142. {
  143. return lsn;
  144. }
  145. /*
  146. * The CUI dependency tracking op doesn't do squat. It can't because
  147. * it doesn't know where the free extent is coming from. The dependency
  148. * tracking has to be handled by the "enclosing" metadata object. For
  149. * example, for inodes, the inode is locked throughout the extent freeing
  150. * so the dependency should be recorded there.
  151. */
  152. STATIC void
  153. xfs_cui_item_committing(
  154. struct xfs_log_item *lip,
  155. xfs_lsn_t lsn)
  156. {
  157. }
  158. /*
  159. * This is the ops vector shared by all cui log items.
  160. */
  161. static const struct xfs_item_ops xfs_cui_item_ops = {
  162. .iop_size = xfs_cui_item_size,
  163. .iop_format = xfs_cui_item_format,
  164. .iop_pin = xfs_cui_item_pin,
  165. .iop_unpin = xfs_cui_item_unpin,
  166. .iop_unlock = xfs_cui_item_unlock,
  167. .iop_committed = xfs_cui_item_committed,
  168. .iop_push = xfs_cui_item_push,
  169. .iop_committing = xfs_cui_item_committing,
  170. };
  171. /*
  172. * Allocate and initialize an cui item with the given number of extents.
  173. */
  174. struct xfs_cui_log_item *
  175. xfs_cui_init(
  176. struct xfs_mount *mp,
  177. uint nextents)
  178. {
  179. struct xfs_cui_log_item *cuip;
  180. ASSERT(nextents > 0);
  181. if (nextents > XFS_CUI_MAX_FAST_EXTENTS)
  182. cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents),
  183. KM_SLEEP);
  184. else
  185. cuip = kmem_zone_zalloc(xfs_cui_zone, KM_SLEEP);
  186. xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops);
  187. cuip->cui_format.cui_nextents = nextents;
  188. cuip->cui_format.cui_id = (uintptr_t)(void *)cuip;
  189. atomic_set(&cuip->cui_next_extent, 0);
  190. atomic_set(&cuip->cui_refcount, 2);
  191. return cuip;
  192. }
  193. static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip)
  194. {
  195. return container_of(lip, struct xfs_cud_log_item, cud_item);
  196. }
  197. STATIC void
  198. xfs_cud_item_size(
  199. struct xfs_log_item *lip,
  200. int *nvecs,
  201. int *nbytes)
  202. {
  203. *nvecs += 1;
  204. *nbytes += sizeof(struct xfs_cud_log_format);
  205. }
  206. /*
  207. * This is called to fill in the vector of log iovecs for the
  208. * given cud log item. We use only 1 iovec, and we point that
  209. * at the cud_log_format structure embedded in the cud item.
  210. * It is at this point that we assert that all of the extent
  211. * slots in the cud item have been filled.
  212. */
  213. STATIC void
  214. xfs_cud_item_format(
  215. struct xfs_log_item *lip,
  216. struct xfs_log_vec *lv)
  217. {
  218. struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
  219. struct xfs_log_iovec *vecp = NULL;
  220. cudp->cud_format.cud_type = XFS_LI_CUD;
  221. cudp->cud_format.cud_size = 1;
  222. xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format,
  223. sizeof(struct xfs_cud_log_format));
  224. }
  225. /*
  226. * Pinning has no meaning for an cud item, so just return.
  227. */
  228. STATIC void
  229. xfs_cud_item_pin(
  230. struct xfs_log_item *lip)
  231. {
  232. }
  233. /*
  234. * Since pinning has no meaning for an cud item, unpinning does
  235. * not either.
  236. */
  237. STATIC void
  238. xfs_cud_item_unpin(
  239. struct xfs_log_item *lip,
  240. int remove)
  241. {
  242. }
  243. /*
  244. * There isn't much you can do to push on an cud item. It is simply stuck
  245. * waiting for the log to be flushed to disk.
  246. */
  247. STATIC uint
  248. xfs_cud_item_push(
  249. struct xfs_log_item *lip,
  250. struct list_head *buffer_list)
  251. {
  252. return XFS_ITEM_PINNED;
  253. }
  254. /*
  255. * The CUD is either committed or aborted if the transaction is cancelled. If
  256. * the transaction is cancelled, drop our reference to the CUI and free the
  257. * CUD.
  258. */
  259. STATIC void
  260. xfs_cud_item_unlock(
  261. struct xfs_log_item *lip)
  262. {
  263. struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
  264. if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
  265. xfs_cui_release(cudp->cud_cuip);
  266. kmem_zone_free(xfs_cud_zone, cudp);
  267. }
  268. }
  269. /*
  270. * When the cud item is committed to disk, all we need to do is delete our
  271. * reference to our partner cui item and then free ourselves. Since we're
  272. * freeing ourselves we must return -1 to keep the transaction code from
  273. * further referencing this item.
  274. */
  275. STATIC xfs_lsn_t
  276. xfs_cud_item_committed(
  277. struct xfs_log_item *lip,
  278. xfs_lsn_t lsn)
  279. {
  280. struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
  281. /*
  282. * Drop the CUI reference regardless of whether the CUD has been
  283. * aborted. Once the CUD transaction is constructed, it is the sole
  284. * responsibility of the CUD to release the CUI (even if the CUI is
  285. * aborted due to log I/O error).
  286. */
  287. xfs_cui_release(cudp->cud_cuip);
  288. kmem_zone_free(xfs_cud_zone, cudp);
  289. return (xfs_lsn_t)-1;
  290. }
  291. /*
  292. * The CUD dependency tracking op doesn't do squat. It can't because
  293. * it doesn't know where the free extent is coming from. The dependency
  294. * tracking has to be handled by the "enclosing" metadata object. For
  295. * example, for inodes, the inode is locked throughout the extent freeing
  296. * so the dependency should be recorded there.
  297. */
  298. STATIC void
  299. xfs_cud_item_committing(
  300. struct xfs_log_item *lip,
  301. xfs_lsn_t lsn)
  302. {
  303. }
  304. /*
  305. * This is the ops vector shared by all cud log items.
  306. */
  307. static const struct xfs_item_ops xfs_cud_item_ops = {
  308. .iop_size = xfs_cud_item_size,
  309. .iop_format = xfs_cud_item_format,
  310. .iop_pin = xfs_cud_item_pin,
  311. .iop_unpin = xfs_cud_item_unpin,
  312. .iop_unlock = xfs_cud_item_unlock,
  313. .iop_committed = xfs_cud_item_committed,
  314. .iop_push = xfs_cud_item_push,
  315. .iop_committing = xfs_cud_item_committing,
  316. };
  317. /*
  318. * Allocate and initialize an cud item with the given number of extents.
  319. */
  320. struct xfs_cud_log_item *
  321. xfs_cud_init(
  322. struct xfs_mount *mp,
  323. struct xfs_cui_log_item *cuip)
  324. {
  325. struct xfs_cud_log_item *cudp;
  326. cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
  327. xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops);
  328. cudp->cud_cuip = cuip;
  329. cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
  330. return cudp;
  331. }
  332. /*
  333. * Process a refcount update intent item that was recovered from the log.
  334. * We need to update the refcountbt.
  335. */
  336. int
  337. xfs_cui_recover(
  338. struct xfs_trans *parent_tp,
  339. struct xfs_cui_log_item *cuip)
  340. {
  341. int i;
  342. int error = 0;
  343. unsigned int refc_type;
  344. struct xfs_phys_extent *refc;
  345. xfs_fsblock_t startblock_fsb;
  346. bool op_ok;
  347. struct xfs_cud_log_item *cudp;
  348. struct xfs_trans *tp;
  349. struct xfs_btree_cur *rcur = NULL;
  350. enum xfs_refcount_intent_type type;
  351. xfs_fsblock_t new_fsb;
  352. xfs_extlen_t new_len;
  353. struct xfs_bmbt_irec irec;
  354. bool requeue_only = false;
  355. struct xfs_mount *mp = parent_tp->t_mountp;
  356. ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
  357. /*
  358. * First check the validity of the extents described by the
  359. * CUI. If any are bad, then assume that all are bad and
  360. * just toss the CUI.
  361. */
  362. for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
  363. refc = &cuip->cui_format.cui_extents[i];
  364. startblock_fsb = XFS_BB_TO_FSB(mp,
  365. XFS_FSB_TO_DADDR(mp, refc->pe_startblock));
  366. switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) {
  367. case XFS_REFCOUNT_INCREASE:
  368. case XFS_REFCOUNT_DECREASE:
  369. case XFS_REFCOUNT_ALLOC_COW:
  370. case XFS_REFCOUNT_FREE_COW:
  371. op_ok = true;
  372. break;
  373. default:
  374. op_ok = false;
  375. break;
  376. }
  377. if (!op_ok || startblock_fsb == 0 ||
  378. refc->pe_len == 0 ||
  379. startblock_fsb >= mp->m_sb.sb_dblocks ||
  380. refc->pe_len >= mp->m_sb.sb_agblocks ||
  381. (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) {
  382. /*
  383. * This will pull the CUI from the AIL and
  384. * free the memory associated with it.
  385. */
  386. set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
  387. xfs_cui_release(cuip);
  388. return -EIO;
  389. }
  390. }
  391. /*
  392. * Under normal operation, refcount updates are deferred, so we
  393. * wouldn't be adding them directly to a transaction. All
  394. * refcount updates manage reservation usage internally and
  395. * dynamically by deferring work that won't fit in the
  396. * transaction. Normally, any work that needs to be deferred
  397. * gets attached to the same defer_ops that scheduled the
  398. * refcount update. However, we're in log recovery here, so we
  399. * we use the passed in defer_ops and to finish up any work that
  400. * doesn't fit. We need to reserve enough blocks to handle a
  401. * full btree split on either end of the refcount range.
  402. */
  403. error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
  404. mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
  405. if (error)
  406. return error;
  407. /*
  408. * Recovery stashes all deferred ops during intent processing and
  409. * finishes them on completion. Transfer current dfops state to this
  410. * transaction and transfer the result back before we return.
  411. */
  412. xfs_defer_move(tp, parent_tp);
  413. cudp = xfs_trans_get_cud(tp, cuip);
  414. for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
  415. refc = &cuip->cui_format.cui_extents[i];
  416. refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
  417. switch (refc_type) {
  418. case XFS_REFCOUNT_INCREASE:
  419. case XFS_REFCOUNT_DECREASE:
  420. case XFS_REFCOUNT_ALLOC_COW:
  421. case XFS_REFCOUNT_FREE_COW:
  422. type = refc_type;
  423. break;
  424. default:
  425. error = -EFSCORRUPTED;
  426. goto abort_error;
  427. }
  428. if (requeue_only) {
  429. new_fsb = refc->pe_startblock;
  430. new_len = refc->pe_len;
  431. } else
  432. error = xfs_trans_log_finish_refcount_update(tp, cudp,
  433. type, refc->pe_startblock, refc->pe_len,
  434. &new_fsb, &new_len, &rcur);
  435. if (error)
  436. goto abort_error;
  437. /* Requeue what we didn't finish. */
  438. if (new_len > 0) {
  439. irec.br_startblock = new_fsb;
  440. irec.br_blockcount = new_len;
  441. switch (type) {
  442. case XFS_REFCOUNT_INCREASE:
  443. error = xfs_refcount_increase_extent(tp, &irec);
  444. break;
  445. case XFS_REFCOUNT_DECREASE:
  446. error = xfs_refcount_decrease_extent(tp, &irec);
  447. break;
  448. case XFS_REFCOUNT_ALLOC_COW:
  449. error = xfs_refcount_alloc_cow_extent(tp,
  450. irec.br_startblock,
  451. irec.br_blockcount);
  452. break;
  453. case XFS_REFCOUNT_FREE_COW:
  454. error = xfs_refcount_free_cow_extent(tp,
  455. irec.br_startblock,
  456. irec.br_blockcount);
  457. break;
  458. default:
  459. ASSERT(0);
  460. }
  461. if (error)
  462. goto abort_error;
  463. requeue_only = true;
  464. }
  465. }
  466. xfs_refcount_finish_one_cleanup(tp, rcur, error);
  467. set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
  468. xfs_defer_move(parent_tp, tp);
  469. error = xfs_trans_commit(tp);
  470. return error;
  471. abort_error:
  472. xfs_refcount_finish_one_cleanup(tp, rcur, error);
  473. xfs_defer_move(parent_tp, tp);
  474. xfs_trans_cancel(tp);
  475. return error;
  476. }