xfs_ag_resv.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * Copyright (C) 2016 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <darrick.wong@oracle.com>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_log_format.h"
  11. #include "xfs_trans_resv.h"
  12. #include "xfs_sb.h"
  13. #include "xfs_mount.h"
  14. #include "xfs_defer.h"
  15. #include "xfs_alloc.h"
  16. #include "xfs_errortag.h"
  17. #include "xfs_error.h"
  18. #include "xfs_trace.h"
  19. #include "xfs_cksum.h"
  20. #include "xfs_trans.h"
  21. #include "xfs_bit.h"
  22. #include "xfs_bmap.h"
  23. #include "xfs_bmap_btree.h"
  24. #include "xfs_ag_resv.h"
  25. #include "xfs_trans_space.h"
  26. #include "xfs_rmap_btree.h"
  27. #include "xfs_btree.h"
  28. #include "xfs_refcount_btree.h"
  29. #include "xfs_ialloc_btree.h"
  30. /*
  31. * Per-AG Block Reservations
  32. *
  33. * For some kinds of allocation group metadata structures, it is advantageous
  34. * to reserve a small number of blocks in each AG so that future expansions of
  35. * that data structure do not encounter ENOSPC because errors during a btree
  36. * split cause the filesystem to go offline.
  37. *
  38. * Prior to the introduction of reflink, this wasn't an issue because the free
  39. * space btrees maintain a reserve of space (the AGFL) to handle any expansion
  40. * that may be necessary; and allocations of other metadata (inodes, BMBT,
  41. * dir/attr) aren't restricted to a single AG. However, with reflink it is
  42. * possible to allocate all the space in an AG, have subsequent reflink/CoW
  43. * activity expand the refcount btree, and discover that there's no space left
  44. * to handle that expansion. Since we can calculate the maximum size of the
  45. * refcount btree, we can reserve space for it and avoid ENOSPC.
  46. *
  47. * Handling per-AG reservations consists of three changes to the allocator's
  48. * behavior: First, because these reservations are always needed, we decrease
  49. * the ag_max_usable counter to reflect the size of the AG after the reserved
  50. * blocks are taken. Second, the reservations must be reflected in the
  51. * fdblocks count to maintain proper accounting. Third, each AG must maintain
  52. * its own reserved block counter so that we can calculate the amount of space
  53. * that must remain free to maintain the reservations. Fourth, the "remaining
  54. * reserved blocks" count must be used when calculating the length of the
  55. * longest free extent in an AG and to clamp maxlen in the per-AG allocation
  56. * functions. In other words, we maintain a virtual allocation via in-core
  57. * accounting tricks so that we don't have to clean up after a crash. :)
  58. *
  59. * Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type
  60. * values via struct xfs_alloc_arg or directly to the xfs_free_extent
  61. * function. It might seem a little funny to maintain a reservoir of blocks
  62. * to feed another reservoir, but the AGFL only holds enough blocks to get
  63. * through the next transaction. The per-AG reservation is to ensure (we
  64. * hope) that each AG never runs out of blocks. Each data structure wanting
  65. * to use the reservation system should update ask/used in xfs_ag_resv_init.
  66. */
  67. /*
  68. * Are we critically low on blocks? For now we'll define that as the number
  69. * of blocks we can get our hands on being less than 10% of what we reserved
  70. * or less than some arbitrary number (maximum btree height).
  71. */
  72. bool
  73. xfs_ag_resv_critical(
  74. struct xfs_perag *pag,
  75. enum xfs_ag_resv_type type)
  76. {
  77. xfs_extlen_t avail;
  78. xfs_extlen_t orig;
  79. switch (type) {
  80. case XFS_AG_RESV_METADATA:
  81. avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved;
  82. orig = pag->pag_meta_resv.ar_asked;
  83. break;
  84. case XFS_AG_RESV_RMAPBT:
  85. avail = pag->pagf_freeblks + pag->pagf_flcount -
  86. pag->pag_meta_resv.ar_reserved;
  87. orig = pag->pag_rmapbt_resv.ar_asked;
  88. break;
  89. default:
  90. ASSERT(0);
  91. return false;
  92. }
  93. trace_xfs_ag_resv_critical(pag, type, avail);
  94. /* Critically low if less than 10% or max btree height remains. */
  95. return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS,
  96. pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
  97. }
  98. /*
  99. * How many blocks are reserved but not used, and therefore must not be
  100. * allocated away?
  101. */
  102. xfs_extlen_t
  103. xfs_ag_resv_needed(
  104. struct xfs_perag *pag,
  105. enum xfs_ag_resv_type type)
  106. {
  107. xfs_extlen_t len;
  108. len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved;
  109. switch (type) {
  110. case XFS_AG_RESV_METADATA:
  111. case XFS_AG_RESV_RMAPBT:
  112. len -= xfs_perag_resv(pag, type)->ar_reserved;
  113. break;
  114. case XFS_AG_RESV_NONE:
  115. /* empty */
  116. break;
  117. default:
  118. ASSERT(0);
  119. }
  120. trace_xfs_ag_resv_needed(pag, type, len);
  121. return len;
  122. }
  123. /* Clean out a reservation */
  124. static int
  125. __xfs_ag_resv_free(
  126. struct xfs_perag *pag,
  127. enum xfs_ag_resv_type type)
  128. {
  129. struct xfs_ag_resv *resv;
  130. xfs_extlen_t oldresv;
  131. int error;
  132. trace_xfs_ag_resv_free(pag, type, 0);
  133. resv = xfs_perag_resv(pag, type);
  134. if (pag->pag_agno == 0)
  135. pag->pag_mount->m_ag_max_usable += resv->ar_asked;
  136. /*
  137. * RMAPBT blocks come from the AGFL and AGFL blocks are always
  138. * considered "free", so whatever was reserved at mount time must be
  139. * given back at umount.
  140. */
  141. if (type == XFS_AG_RESV_RMAPBT)
  142. oldresv = resv->ar_orig_reserved;
  143. else
  144. oldresv = resv->ar_reserved;
  145. error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
  146. resv->ar_reserved = 0;
  147. resv->ar_asked = 0;
  148. resv->ar_orig_reserved = 0;
  149. if (error)
  150. trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
  151. error, _RET_IP_);
  152. return error;
  153. }
  154. /* Free a per-AG reservation. */
  155. int
  156. xfs_ag_resv_free(
  157. struct xfs_perag *pag)
  158. {
  159. int error;
  160. int err2;
  161. error = __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT);
  162. err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
  163. if (err2 && !error)
  164. error = err2;
  165. return error;
  166. }
  167. static int
  168. __xfs_ag_resv_init(
  169. struct xfs_perag *pag,
  170. enum xfs_ag_resv_type type,
  171. xfs_extlen_t ask,
  172. xfs_extlen_t used)
  173. {
  174. struct xfs_mount *mp = pag->pag_mount;
  175. struct xfs_ag_resv *resv;
  176. int error;
  177. xfs_extlen_t hidden_space;
  178. if (used > ask)
  179. ask = used;
  180. switch (type) {
  181. case XFS_AG_RESV_RMAPBT:
  182. /*
  183. * Space taken by the rmapbt is not subtracted from fdblocks
  184. * because the rmapbt lives in the free space. Here we must
  185. * subtract the entire reservation from fdblocks so that we
  186. * always have blocks available for rmapbt expansion.
  187. */
  188. hidden_space = ask;
  189. break;
  190. case XFS_AG_RESV_METADATA:
  191. /*
  192. * Space taken by all other metadata btrees are accounted
  193. * on-disk as used space. We therefore only hide the space
  194. * that is reserved but not used by the trees.
  195. */
  196. hidden_space = ask - used;
  197. break;
  198. default:
  199. ASSERT(0);
  200. return -EINVAL;
  201. }
  202. error = xfs_mod_fdblocks(mp, -(int64_t)hidden_space, true);
  203. if (error) {
  204. trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
  205. error, _RET_IP_);
  206. xfs_warn(mp,
  207. "Per-AG reservation for AG %u failed. Filesystem may run out of space.",
  208. pag->pag_agno);
  209. return error;
  210. }
  211. /*
  212. * Reduce the maximum per-AG allocation length by however much we're
  213. * trying to reserve for an AG. Since this is a filesystem-wide
  214. * counter, we only make the adjustment for AG 0. This assumes that
  215. * there aren't any AGs hungrier for per-AG reservation than AG 0.
  216. */
  217. if (pag->pag_agno == 0)
  218. mp->m_ag_max_usable -= ask;
  219. resv = xfs_perag_resv(pag, type);
  220. resv->ar_asked = ask;
  221. resv->ar_orig_reserved = hidden_space;
  222. resv->ar_reserved = ask - used;
  223. trace_xfs_ag_resv_init(pag, type, ask);
  224. return 0;
  225. }
  226. /* Create a per-AG block reservation. */
  227. int
  228. xfs_ag_resv_init(
  229. struct xfs_perag *pag,
  230. struct xfs_trans *tp)
  231. {
  232. struct xfs_mount *mp = pag->pag_mount;
  233. xfs_agnumber_t agno = pag->pag_agno;
  234. xfs_extlen_t ask;
  235. xfs_extlen_t used;
  236. int error = 0;
  237. /* Create the metadata reservation. */
  238. if (pag->pag_meta_resv.ar_asked == 0) {
  239. ask = used = 0;
  240. error = xfs_refcountbt_calc_reserves(mp, tp, agno, &ask, &used);
  241. if (error)
  242. goto out;
  243. error = xfs_finobt_calc_reserves(mp, tp, agno, &ask, &used);
  244. if (error)
  245. goto out;
  246. error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
  247. ask, used);
  248. if (error) {
  249. /*
  250. * Because we didn't have per-AG reservations when the
  251. * finobt feature was added we might not be able to
  252. * reserve all needed blocks. Warn and fall back to the
  253. * old and potentially buggy code in that case, but
  254. * ensure we do have the reservation for the refcountbt.
  255. */
  256. ask = used = 0;
  257. mp->m_finobt_nores = true;
  258. error = xfs_refcountbt_calc_reserves(mp, tp, agno, &ask,
  259. &used);
  260. if (error)
  261. goto out;
  262. error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
  263. ask, used);
  264. if (error)
  265. goto out;
  266. }
  267. }
  268. /* Create the RMAPBT metadata reservation */
  269. if (pag->pag_rmapbt_resv.ar_asked == 0) {
  270. ask = used = 0;
  271. error = xfs_rmapbt_calc_reserves(mp, tp, agno, &ask, &used);
  272. if (error)
  273. goto out;
  274. error = __xfs_ag_resv_init(pag, XFS_AG_RESV_RMAPBT, ask, used);
  275. if (error)
  276. goto out;
  277. }
  278. #ifdef DEBUG
  279. /* need to read in the AGF for the ASSERT below to work */
  280. error = xfs_alloc_pagf_init(pag->pag_mount, tp, pag->pag_agno, 0);
  281. if (error)
  282. return error;
  283. ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
  284. xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <=
  285. pag->pagf_freeblks + pag->pagf_flcount);
  286. #endif
  287. out:
  288. return error;
  289. }
  290. /* Allocate a block from the reservation. */
  291. void
  292. xfs_ag_resv_alloc_extent(
  293. struct xfs_perag *pag,
  294. enum xfs_ag_resv_type type,
  295. struct xfs_alloc_arg *args)
  296. {
  297. struct xfs_ag_resv *resv;
  298. xfs_extlen_t len;
  299. uint field;
  300. trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
  301. switch (type) {
  302. case XFS_AG_RESV_AGFL:
  303. return;
  304. case XFS_AG_RESV_METADATA:
  305. case XFS_AG_RESV_RMAPBT:
  306. resv = xfs_perag_resv(pag, type);
  307. break;
  308. default:
  309. ASSERT(0);
  310. /* fall through */
  311. case XFS_AG_RESV_NONE:
  312. field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
  313. XFS_TRANS_SB_FDBLOCKS;
  314. xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len);
  315. return;
  316. }
  317. len = min_t(xfs_extlen_t, args->len, resv->ar_reserved);
  318. resv->ar_reserved -= len;
  319. if (type == XFS_AG_RESV_RMAPBT)
  320. return;
  321. /* Allocations of reserved blocks only need on-disk sb updates... */
  322. xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len);
  323. /* ...but non-reserved blocks need in-core and on-disk updates. */
  324. if (args->len > len)
  325. xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS,
  326. -((int64_t)args->len - len));
  327. }
  328. /* Free a block to the reservation. */
  329. void
  330. xfs_ag_resv_free_extent(
  331. struct xfs_perag *pag,
  332. enum xfs_ag_resv_type type,
  333. struct xfs_trans *tp,
  334. xfs_extlen_t len)
  335. {
  336. xfs_extlen_t leftover;
  337. struct xfs_ag_resv *resv;
  338. trace_xfs_ag_resv_free_extent(pag, type, len);
  339. switch (type) {
  340. case XFS_AG_RESV_AGFL:
  341. return;
  342. case XFS_AG_RESV_METADATA:
  343. case XFS_AG_RESV_RMAPBT:
  344. resv = xfs_perag_resv(pag, type);
  345. break;
  346. default:
  347. ASSERT(0);
  348. /* fall through */
  349. case XFS_AG_RESV_NONE:
  350. xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len);
  351. return;
  352. }
  353. leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved);
  354. resv->ar_reserved += leftover;
  355. if (type == XFS_AG_RESV_RMAPBT)
  356. return;
  357. /* Freeing into the reserved pool only requires on-disk update... */
  358. xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
  359. /* ...but freeing beyond that requires in-core and on-disk update. */
  360. if (len > leftover)
  361. xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover);
  362. }