balloc.c 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * linux/fs/ext4/balloc.c
  4. *
  5. * Copyright (C) 1992, 1993, 1994, 1995
  6. * Remy Card (card@masi.ibp.fr)
  7. * Laboratoire MASI - Institut Blaise Pascal
  8. * Universite Pierre et Marie Curie (Paris VI)
  9. *
  10. * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
  11. * Big-endian to little-endian byte-swapping/bitmaps by
  12. * David S. Miller (davem@caip.rutgers.edu), 1995
  13. */
  14. #include <linux/time.h>
  15. #include <linux/capability.h>
  16. #include <linux/fs.h>
  17. #include <linux/quotaops.h>
  18. #include <linux/buffer_head.h>
  19. #include "ext4.h"
  20. #include "ext4_jbd2.h"
  21. #include "mballoc.h"
  22. #include <trace/events/ext4.h>
  23. #include <kunit/static_stub.h>
  24. static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
  25. ext4_group_t block_group);
  26. /*
  27. * balloc.c contains the blocks allocation and deallocation routines
  28. */
  29. /*
  30. * Calculate block group number for a given block number
  31. */
  32. ext4_group_t ext4_get_group_number(struct super_block *sb,
  33. ext4_fsblk_t block)
  34. {
  35. ext4_group_t group;
  36. if (test_opt2(sb, STD_GROUP_SIZE))
  37. group = (block -
  38. le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) >>
  39. (EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3);
  40. else
  41. ext4_get_group_no_and_offset(sb, block, &group, NULL);
  42. return group;
  43. }
  44. /*
  45. * Calculate the block group number and offset into the block/cluster
  46. * allocation bitmap, given a block number
  47. */
  48. void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
  49. ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
  50. {
  51. struct ext4_super_block *es = EXT4_SB(sb)->s_es;
  52. ext4_grpblk_t offset;
  53. blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
  54. offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >>
  55. EXT4_SB(sb)->s_cluster_bits;
  56. if (offsetp)
  57. *offsetp = offset;
  58. if (blockgrpp)
  59. *blockgrpp = blocknr;
  60. }
  61. /*
  62. * Check whether the 'block' lives within the 'block_group'. Returns 1 if so
  63. * and 0 otherwise.
  64. */
  65. static inline int ext4_block_in_group(struct super_block *sb,
  66. ext4_fsblk_t block,
  67. ext4_group_t block_group)
  68. {
  69. ext4_group_t actual_group;
  70. actual_group = ext4_get_group_number(sb, block);
  71. return (actual_group == block_group) ? 1 : 0;
  72. }
  73. /*
  74. * Return the number of clusters used for file system metadata; this
  75. * represents the overhead needed by the file system.
  76. */
  77. static unsigned ext4_num_overhead_clusters(struct super_block *sb,
  78. ext4_group_t block_group,
  79. struct ext4_group_desc *gdp)
  80. {
  81. unsigned base_clusters, num_clusters;
  82. int block_cluster = -1, inode_cluster;
  83. int itbl_cluster_start = -1, itbl_cluster_end = -1;
  84. ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
  85. ext4_fsblk_t end = start + EXT4_BLOCKS_PER_GROUP(sb) - 1;
  86. ext4_fsblk_t itbl_blk_start, itbl_blk_end;
  87. struct ext4_sb_info *sbi = EXT4_SB(sb);
  88. /* This is the number of clusters used by the superblock,
  89. * block group descriptors, and reserved block group
  90. * descriptor blocks */
  91. base_clusters = ext4_num_base_meta_clusters(sb, block_group);
  92. num_clusters = base_clusters;
  93. /*
  94. * Account and record inode table clusters if any cluster
  95. * is in the block group, or inode table cluster range is
  96. * [-1, -1] and won't overlap with block/inode bitmap cluster
  97. * accounted below.
  98. */
  99. itbl_blk_start = ext4_inode_table(sb, gdp);
  100. itbl_blk_end = itbl_blk_start + sbi->s_itb_per_group - 1;
  101. if (itbl_blk_start <= end && itbl_blk_end >= start) {
  102. itbl_blk_start = max(itbl_blk_start, start);
  103. itbl_blk_end = min(itbl_blk_end, end);
  104. itbl_cluster_start = EXT4_B2C(sbi, itbl_blk_start - start);
  105. itbl_cluster_end = EXT4_B2C(sbi, itbl_blk_end - start);
  106. num_clusters += itbl_cluster_end - itbl_cluster_start + 1;
  107. /* check if border cluster is overlapped */
  108. if (itbl_cluster_start == base_clusters - 1)
  109. num_clusters--;
  110. }
  111. /*
  112. * For the allocation bitmaps, we first need to check to see
  113. * if the block is in the block group. If it is, then check
  114. * to see if the cluster is already accounted for in the clusters
  115. * used for the base metadata cluster and inode tables cluster.
  116. * Normally all of these blocks are contiguous, so the special
  117. * case handling shouldn't be necessary except for *very*
  118. * unusual file system layouts.
  119. */
  120. if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
  121. block_cluster = EXT4_B2C(sbi,
  122. ext4_block_bitmap(sb, gdp) - start);
  123. if (block_cluster >= base_clusters &&
  124. (block_cluster < itbl_cluster_start ||
  125. block_cluster > itbl_cluster_end))
  126. num_clusters++;
  127. }
  128. if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
  129. inode_cluster = EXT4_B2C(sbi,
  130. ext4_inode_bitmap(sb, gdp) - start);
  131. /*
  132. * Additional check if inode bitmap is in just accounted
  133. * block_cluster
  134. */
  135. if (inode_cluster != block_cluster &&
  136. inode_cluster >= base_clusters &&
  137. (inode_cluster < itbl_cluster_start ||
  138. inode_cluster > itbl_cluster_end))
  139. num_clusters++;
  140. }
  141. return num_clusters;
  142. }
  143. static unsigned int num_clusters_in_group(struct super_block *sb,
  144. ext4_group_t block_group)
  145. {
  146. unsigned int blocks;
  147. if (block_group == ext4_get_groups_count(sb) - 1) {
  148. /*
  149. * Even though mke2fs always initializes the first and
  150. * last group, just in case some other tool was used,
  151. * we need to make sure we calculate the right free
  152. * blocks.
  153. */
  154. blocks = ext4_blocks_count(EXT4_SB(sb)->s_es) -
  155. ext4_group_first_block_no(sb, block_group);
  156. } else
  157. blocks = EXT4_BLOCKS_PER_GROUP(sb);
  158. return EXT4_NUM_B2C(EXT4_SB(sb), blocks);
  159. }
  160. /* Initializes an uninitialized block bitmap */
  161. static int ext4_init_block_bitmap(struct super_block *sb,
  162. struct buffer_head *bh,
  163. ext4_group_t block_group,
  164. struct ext4_group_desc *gdp)
  165. {
  166. unsigned int bit, bit_max;
  167. struct ext4_sb_info *sbi = EXT4_SB(sb);
  168. ext4_fsblk_t start, tmp;
  169. ASSERT(buffer_locked(bh));
  170. if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
  171. ext4_mark_group_bitmap_corrupted(sb, block_group,
  172. EXT4_GROUP_INFO_BBITMAP_CORRUPT |
  173. EXT4_GROUP_INFO_IBITMAP_CORRUPT);
  174. return -EFSBADCRC;
  175. }
  176. memset(bh->b_data, 0, sb->s_blocksize);
  177. bit_max = ext4_num_base_meta_clusters(sb, block_group);
  178. if ((bit_max >> 3) >= bh->b_size)
  179. return -EFSCORRUPTED;
  180. for (bit = 0; bit < bit_max; bit++)
  181. ext4_set_bit(bit, bh->b_data);
  182. start = ext4_group_first_block_no(sb, block_group);
  183. /* Set bits for block and inode bitmaps, and inode table */
  184. tmp = ext4_block_bitmap(sb, gdp);
  185. if (ext4_block_in_group(sb, tmp, block_group))
  186. ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
  187. tmp = ext4_inode_bitmap(sb, gdp);
  188. if (ext4_block_in_group(sb, tmp, block_group))
  189. ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
  190. tmp = ext4_inode_table(sb, gdp);
  191. for (; tmp < ext4_inode_table(sb, gdp) +
  192. sbi->s_itb_per_group; tmp++) {
  193. if (ext4_block_in_group(sb, tmp, block_group))
  194. ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
  195. }
  196. /*
  197. * Also if the number of blocks within the group is less than
  198. * the blocksize * 8 ( which is the size of bitmap ), set rest
  199. * of the block bitmap to 1
  200. */
  201. ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
  202. sb->s_blocksize * 8, bh->b_data);
  203. return 0;
  204. }
  205. /* Return the number of free blocks in a block group. It is used when
  206. * the block bitmap is uninitialized, so we can't just count the bits
  207. * in the bitmap. */
  208. unsigned ext4_free_clusters_after_init(struct super_block *sb,
  209. ext4_group_t block_group,
  210. struct ext4_group_desc *gdp)
  211. {
  212. return num_clusters_in_group(sb, block_group) -
  213. ext4_num_overhead_clusters(sb, block_group, gdp);
  214. }
  215. /*
  216. * The free blocks are managed by bitmaps. A file system contains several
  217. * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
  218. * block for inodes, N blocks for the inode table and data blocks.
  219. *
  220. * The file system contains group descriptors which are located after the
  221. * super block. Each descriptor contains the number of the bitmap block and
  222. * the free blocks count in the block. The descriptors are loaded in memory
  223. * when a file system is mounted (see ext4_fill_super).
  224. */
  225. /**
  226. * ext4_get_group_desc() -- load group descriptor from disk
  227. * @sb: super block
  228. * @block_group: given block group
  229. * @bh: pointer to the buffer head to store the block
  230. * group descriptor
  231. */
  232. struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
  233. ext4_group_t block_group,
  234. struct buffer_head **bh)
  235. {
  236. unsigned int group_desc;
  237. unsigned int offset;
  238. ext4_group_t ngroups = ext4_get_groups_count(sb);
  239. struct ext4_group_desc *desc;
  240. struct ext4_sb_info *sbi = EXT4_SB(sb);
  241. struct buffer_head *bh_p;
  242. KUNIT_STATIC_STUB_REDIRECT(ext4_get_group_desc,
  243. sb, block_group, bh);
  244. if (block_group >= ngroups) {
  245. ext4_error(sb, "block_group >= groups_count - block_group = %u,"
  246. " groups_count = %u", block_group, ngroups);
  247. return NULL;
  248. }
  249. group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
  250. offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
  251. bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc);
  252. /*
  253. * sbi_array_rcu_deref returns with rcu unlocked, this is ok since
  254. * the pointer being dereferenced won't be dereferenced again. By
  255. * looking at the usage in add_new_gdb() the value isn't modified,
  256. * just the pointer, and so it remains valid.
  257. */
  258. if (!bh_p) {
  259. ext4_error(sb, "Group descriptor not loaded - "
  260. "block_group = %u, group_desc = %u, desc = %u",
  261. block_group, group_desc, offset);
  262. return NULL;
  263. }
  264. desc = (struct ext4_group_desc *)(
  265. (__u8 *)bh_p->b_data +
  266. offset * EXT4_DESC_SIZE(sb));
  267. if (bh)
  268. *bh = bh_p;
  269. return desc;
  270. }
  271. static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb,
  272. ext4_group_t block_group,
  273. struct buffer_head *bh)
  274. {
  275. ext4_grpblk_t next_zero_bit;
  276. unsigned long bitmap_size = sb->s_blocksize * 8;
  277. unsigned int offset = num_clusters_in_group(sb, block_group);
  278. if (bitmap_size <= offset)
  279. return 0;
  280. next_zero_bit = ext4_find_next_zero_bit(bh->b_data, bitmap_size, offset);
  281. return (next_zero_bit < bitmap_size ? next_zero_bit : 0);
  282. }
  283. struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
  284. ext4_group_t group)
  285. {
  286. struct ext4_group_info **grp_info;
  287. long indexv, indexh;
  288. if (unlikely(group >= EXT4_SB(sb)->s_groups_count))
  289. return NULL;
  290. indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
  291. indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
  292. grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
  293. return grp_info[indexh];
  294. }
  295. /*
  296. * Return the block number which was discovered to be invalid, or 0 if
  297. * the block bitmap is valid.
  298. */
  299. static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
  300. struct ext4_group_desc *desc,
  301. ext4_group_t block_group,
  302. struct buffer_head *bh)
  303. {
  304. struct ext4_sb_info *sbi = EXT4_SB(sb);
  305. ext4_grpblk_t offset;
  306. ext4_grpblk_t next_zero_bit;
  307. ext4_grpblk_t max_bit = EXT4_CLUSTERS_PER_GROUP(sb);
  308. ext4_fsblk_t blk;
  309. ext4_fsblk_t group_first_block;
  310. if (ext4_has_feature_flex_bg(sb)) {
  311. /* with FLEX_BG, the inode/block bitmaps and itable
  312. * blocks may not be in the group at all
  313. * so the bitmap validation will be skipped for those groups
  314. * or it has to also read the block group where the bitmaps
  315. * are located to verify they are set.
  316. */
  317. return 0;
  318. }
  319. group_first_block = ext4_group_first_block_no(sb, block_group);
  320. /* check whether block bitmap block number is set */
  321. blk = ext4_block_bitmap(sb, desc);
  322. offset = blk - group_first_block;
  323. if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
  324. !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
  325. /* bad block bitmap */
  326. return blk;
  327. /* check whether the inode bitmap block number is set */
  328. blk = ext4_inode_bitmap(sb, desc);
  329. offset = blk - group_first_block;
  330. if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
  331. !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
  332. /* bad block bitmap */
  333. return blk;
  334. /* check whether the inode table block number is set */
  335. blk = ext4_inode_table(sb, desc);
  336. offset = blk - group_first_block;
  337. if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
  338. EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) >= max_bit)
  339. return blk;
  340. next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
  341. EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) + 1,
  342. EXT4_B2C(sbi, offset));
  343. if (next_zero_bit <
  344. EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) + 1)
  345. /* bad bitmap for inode tables */
  346. return blk;
  347. return 0;
  348. }
  349. static int ext4_validate_block_bitmap(struct super_block *sb,
  350. struct ext4_group_desc *desc,
  351. ext4_group_t block_group,
  352. struct buffer_head *bh)
  353. {
  354. ext4_fsblk_t blk;
  355. struct ext4_group_info *grp;
  356. if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
  357. return 0;
  358. grp = ext4_get_group_info(sb, block_group);
  359. if (buffer_verified(bh))
  360. return 0;
  361. if (!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
  362. return -EFSCORRUPTED;
  363. ext4_lock_group(sb, block_group);
  364. if (buffer_verified(bh))
  365. goto verified;
  366. if (unlikely(!ext4_block_bitmap_csum_verify(sb, desc, bh) ||
  367. ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_CRC))) {
  368. ext4_unlock_group(sb, block_group);
  369. ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
  370. ext4_mark_group_bitmap_corrupted(sb, block_group,
  371. EXT4_GROUP_INFO_BBITMAP_CORRUPT);
  372. return -EFSBADCRC;
  373. }
  374. blk = ext4_valid_block_bitmap(sb, desc, block_group, bh);
  375. if (unlikely(blk != 0)) {
  376. ext4_unlock_group(sb, block_group);
  377. ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
  378. block_group, blk);
  379. ext4_mark_group_bitmap_corrupted(sb, block_group,
  380. EXT4_GROUP_INFO_BBITMAP_CORRUPT);
  381. return -EFSCORRUPTED;
  382. }
  383. blk = ext4_valid_block_bitmap_padding(sb, block_group, bh);
  384. if (unlikely(blk != 0)) {
  385. ext4_unlock_group(sb, block_group);
  386. ext4_error(sb, "bg %u: block %llu: padding at end of block bitmap is not set",
  387. block_group, blk);
  388. ext4_mark_group_bitmap_corrupted(sb, block_group,
  389. EXT4_GROUP_INFO_BBITMAP_CORRUPT);
  390. return -EFSCORRUPTED;
  391. }
  392. set_buffer_verified(bh);
  393. verified:
  394. ext4_unlock_group(sb, block_group);
  395. return 0;
  396. }
  397. /**
  398. * ext4_read_block_bitmap_nowait()
  399. * @sb: super block
  400. * @block_group: given block group
  401. * @ignore_locked: ignore locked buffers
  402. *
  403. * Read the bitmap for a given block_group,and validate the
  404. * bits for block/inode/inode tables are set in the bitmaps
  405. *
  406. * Return buffer_head on success or an ERR_PTR in case of failure.
  407. */
  408. struct buffer_head *
  409. ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group,
  410. bool ignore_locked)
  411. {
  412. struct ext4_group_desc *desc;
  413. struct ext4_sb_info *sbi = EXT4_SB(sb);
  414. struct buffer_head *bh;
  415. ext4_fsblk_t bitmap_blk;
  416. int err;
  417. KUNIT_STATIC_STUB_REDIRECT(ext4_read_block_bitmap_nowait,
  418. sb, block_group, ignore_locked);
  419. desc = ext4_get_group_desc(sb, block_group, NULL);
  420. if (!desc)
  421. return ERR_PTR(-EFSCORRUPTED);
  422. bitmap_blk = ext4_block_bitmap(sb, desc);
  423. if ((bitmap_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
  424. (bitmap_blk >= ext4_blocks_count(sbi->s_es))) {
  425. ext4_error(sb, "Invalid block bitmap block %llu in "
  426. "block_group %u", bitmap_blk, block_group);
  427. ext4_mark_group_bitmap_corrupted(sb, block_group,
  428. EXT4_GROUP_INFO_BBITMAP_CORRUPT);
  429. return ERR_PTR(-EFSCORRUPTED);
  430. }
  431. bh = sb_getblk(sb, bitmap_blk);
  432. if (unlikely(!bh)) {
  433. ext4_warning(sb, "Cannot get buffer for block bitmap - "
  434. "block_group = %u, block_bitmap = %llu",
  435. block_group, bitmap_blk);
  436. return ERR_PTR(-ENOMEM);
  437. }
  438. if (ignore_locked && buffer_locked(bh)) {
  439. /* buffer under IO already, return if called for prefetching */
  440. put_bh(bh);
  441. return NULL;
  442. }
  443. if (bitmap_uptodate(bh))
  444. goto verify;
  445. lock_buffer(bh);
  446. if (bitmap_uptodate(bh)) {
  447. unlock_buffer(bh);
  448. goto verify;
  449. }
  450. ext4_lock_group(sb, block_group);
  451. if (ext4_has_group_desc_csum(sb) &&
  452. (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
  453. if (block_group == 0) {
  454. ext4_unlock_group(sb, block_group);
  455. unlock_buffer(bh);
  456. ext4_error(sb, "Block bitmap for bg 0 marked "
  457. "uninitialized");
  458. err = -EFSCORRUPTED;
  459. goto out;
  460. }
  461. err = ext4_init_block_bitmap(sb, bh, block_group, desc);
  462. if (err) {
  463. ext4_unlock_group(sb, block_group);
  464. unlock_buffer(bh);
  465. ext4_error(sb, "Failed to init block bitmap for group "
  466. "%u: %d", block_group, err);
  467. goto out;
  468. }
  469. set_bitmap_uptodate(bh);
  470. set_buffer_uptodate(bh);
  471. set_buffer_verified(bh);
  472. ext4_unlock_group(sb, block_group);
  473. unlock_buffer(bh);
  474. return bh;
  475. }
  476. ext4_unlock_group(sb, block_group);
  477. if (buffer_uptodate(bh)) {
  478. /*
  479. * if not uninit if bh is uptodate,
  480. * bitmap is also uptodate
  481. */
  482. set_bitmap_uptodate(bh);
  483. unlock_buffer(bh);
  484. goto verify;
  485. }
  486. /*
  487. * submit the buffer_head for reading
  488. */
  489. set_buffer_new(bh);
  490. trace_ext4_read_block_bitmap_load(sb, block_group, ignore_locked);
  491. ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO |
  492. (ignore_locked ? REQ_RAHEAD : 0),
  493. ext4_end_bitmap_read,
  494. ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_EIO));
  495. return bh;
  496. verify:
  497. err = ext4_validate_block_bitmap(sb, desc, block_group, bh);
  498. if (err)
  499. goto out;
  500. return bh;
  501. out:
  502. put_bh(bh);
  503. return ERR_PTR(err);
  504. }
  505. /* Returns 0 on success, -errno on error */
  506. int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
  507. struct buffer_head *bh)
  508. {
  509. struct ext4_group_desc *desc;
  510. KUNIT_STATIC_STUB_REDIRECT(ext4_wait_block_bitmap,
  511. sb, block_group, bh);
  512. if (!buffer_new(bh))
  513. return 0;
  514. desc = ext4_get_group_desc(sb, block_group, NULL);
  515. if (!desc)
  516. return -EFSCORRUPTED;
  517. wait_on_buffer(bh);
  518. if (!buffer_uptodate(bh)) {
  519. ext4_error_err(sb, EIO, "Cannot read block bitmap - "
  520. "block_group = %u, block_bitmap = %llu",
  521. block_group, (unsigned long long) bh->b_blocknr);
  522. ext4_mark_group_bitmap_corrupted(sb, block_group,
  523. EXT4_GROUP_INFO_BBITMAP_CORRUPT);
  524. return -EIO;
  525. }
  526. clear_buffer_new(bh);
  527. /* Panic or remount fs read-only if block bitmap is invalid */
  528. return ext4_validate_block_bitmap(sb, desc, block_group, bh);
  529. }
  530. struct buffer_head *
  531. ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
  532. {
  533. struct buffer_head *bh;
  534. int err;
  535. bh = ext4_read_block_bitmap_nowait(sb, block_group, false);
  536. if (IS_ERR(bh))
  537. return bh;
  538. err = ext4_wait_block_bitmap(sb, block_group, bh);
  539. if (err) {
  540. put_bh(bh);
  541. return ERR_PTR(err);
  542. }
  543. return bh;
  544. }
  545. /**
  546. * ext4_has_free_clusters()
  547. * @sbi: in-core super block structure.
  548. * @nclusters: number of needed blocks
  549. * @flags: flags from ext4_mb_new_blocks()
  550. *
  551. * Check if filesystem has nclusters free & available for allocation.
  552. * On success return 1, return 0 on failure.
  553. */
  554. static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
  555. s64 nclusters, unsigned int flags)
  556. {
  557. s64 free_clusters, dirty_clusters, rsv, resv_clusters;
  558. struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
  559. struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
  560. free_clusters = percpu_counter_read_positive(fcc);
  561. dirty_clusters = percpu_counter_read_positive(dcc);
  562. resv_clusters = atomic64_read(&sbi->s_resv_clusters);
  563. /*
  564. * r_blocks_count should always be multiple of the cluster ratio so
  565. * we are safe to do a plane bit shift only.
  566. */
  567. rsv = (ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits) +
  568. resv_clusters;
  569. if (free_clusters - (nclusters + rsv + dirty_clusters) <
  570. EXT4_FREECLUSTERS_WATERMARK) {
  571. free_clusters = percpu_counter_sum_positive(fcc);
  572. dirty_clusters = percpu_counter_sum_positive(dcc);
  573. }
  574. /* Check whether we have space after accounting for current
  575. * dirty clusters & root reserved clusters.
  576. */
  577. if (free_clusters >= (rsv + nclusters + dirty_clusters))
  578. return 1;
  579. /* Hm, nope. Are (enough) root reserved clusters available? */
  580. if (uid_eq(sbi->s_resuid, current_fsuid()) ||
  581. (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) ||
  582. (flags & EXT4_MB_USE_ROOT_BLOCKS) ||
  583. capable(CAP_SYS_RESOURCE)) {
  584. if (free_clusters >= (nclusters + dirty_clusters +
  585. resv_clusters))
  586. return 1;
  587. }
  588. /* No free blocks. Let's see if we can dip into reserved pool */
  589. if (flags & EXT4_MB_USE_RESERVED) {
  590. if (free_clusters >= (nclusters + dirty_clusters))
  591. return 1;
  592. }
  593. return 0;
  594. }
  595. int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
  596. s64 nclusters, unsigned int flags)
  597. {
  598. if (ext4_has_free_clusters(sbi, nclusters, flags)) {
  599. percpu_counter_add(&sbi->s_dirtyclusters_counter, nclusters);
  600. return 0;
  601. } else
  602. return -ENOSPC;
  603. }
  604. /**
  605. * ext4_should_retry_alloc() - check if a block allocation should be retried
  606. * @sb: superblock
  607. * @retries: number of retry attempts made so far
  608. *
  609. * ext4_should_retry_alloc() is called when ENOSPC is returned while
  610. * attempting to allocate blocks. If there's an indication that a pending
  611. * journal transaction might free some space and allow another attempt to
  612. * succeed, this function will wait for the current or committing transaction
  613. * to complete and then return TRUE.
  614. */
  615. int ext4_should_retry_alloc(struct super_block *sb, int *retries)
  616. {
  617. struct ext4_sb_info *sbi = EXT4_SB(sb);
  618. if (!sbi->s_journal)
  619. return 0;
  620. if (++(*retries) > 3) {
  621. percpu_counter_inc(&sbi->s_sra_exceeded_retry_limit);
  622. return 0;
  623. }
  624. /*
  625. * if there's no indication that blocks are about to be freed it's
  626. * possible we just missed a transaction commit that did so
  627. */
  628. smp_mb();
  629. if (sbi->s_mb_free_pending == 0) {
  630. if (test_opt(sb, DISCARD)) {
  631. atomic_inc(&sbi->s_retry_alloc_pending);
  632. flush_work(&sbi->s_discard_work);
  633. atomic_dec(&sbi->s_retry_alloc_pending);
  634. }
  635. return ext4_has_free_clusters(sbi, 1, 0);
  636. }
  637. /*
  638. * it's possible we've just missed a transaction commit here,
  639. * so ignore the returned status
  640. */
  641. ext4_debug("%s: retrying operation after ENOSPC\n", sb->s_id);
  642. (void) jbd2_journal_force_commit_nested(sbi->s_journal);
  643. return 1;
  644. }
  645. /*
  646. * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
  647. *
  648. * @handle: handle to this transaction
  649. * @inode: file inode
  650. * @goal: given target block(filesystem wide)
  651. * @count: pointer to total number of clusters needed
  652. * @errp: error code
  653. *
  654. * Return 1st allocated block number on success, *count stores total account
  655. * error stores in errp pointer
  656. */
  657. ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
  658. ext4_fsblk_t goal, unsigned int flags,
  659. unsigned long *count, int *errp)
  660. {
  661. struct ext4_allocation_request ar;
  662. ext4_fsblk_t ret;
  663. memset(&ar, 0, sizeof(ar));
  664. /* Fill with neighbour allocated blocks */
  665. ar.inode = inode;
  666. ar.goal = goal;
  667. ar.len = count ? *count : 1;
  668. ar.flags = flags;
  669. ret = ext4_mb_new_blocks(handle, &ar, errp);
  670. if (count)
  671. *count = ar.len;
  672. /*
  673. * Account for the allocated meta blocks. We will never
  674. * fail EDQUOT for metdata, but we do account for it.
  675. */
  676. if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
  677. dquot_alloc_block_nofail(inode,
  678. EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
  679. }
  680. return ret;
  681. }
  682. /**
  683. * ext4_count_free_clusters() -- count filesystem free clusters
  684. * @sb: superblock
  685. *
  686. * Adds up the number of free clusters from each block group.
  687. */
  688. ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
  689. {
  690. ext4_fsblk_t desc_count;
  691. struct ext4_group_desc *gdp;
  692. ext4_group_t i;
  693. ext4_group_t ngroups = ext4_get_groups_count(sb);
  694. struct ext4_group_info *grp;
  695. #ifdef EXT4FS_DEBUG
  696. struct ext4_super_block *es;
  697. ext4_fsblk_t bitmap_count;
  698. unsigned int x;
  699. struct buffer_head *bitmap_bh = NULL;
  700. es = EXT4_SB(sb)->s_es;
  701. desc_count = 0;
  702. bitmap_count = 0;
  703. gdp = NULL;
  704. for (i = 0; i < ngroups; i++) {
  705. gdp = ext4_get_group_desc(sb, i, NULL);
  706. if (!gdp)
  707. continue;
  708. grp = NULL;
  709. if (EXT4_SB(sb)->s_group_info)
  710. grp = ext4_get_group_info(sb, i);
  711. if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
  712. desc_count += ext4_free_group_clusters(sb, gdp);
  713. brelse(bitmap_bh);
  714. bitmap_bh = ext4_read_block_bitmap(sb, i);
  715. if (IS_ERR(bitmap_bh)) {
  716. bitmap_bh = NULL;
  717. continue;
  718. }
  719. x = ext4_count_free(bitmap_bh->b_data,
  720. EXT4_CLUSTERS_PER_GROUP(sb) / 8);
  721. printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
  722. i, ext4_free_group_clusters(sb, gdp), x);
  723. bitmap_count += x;
  724. }
  725. brelse(bitmap_bh);
  726. printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
  727. ", computed = %llu, %llu\n",
  728. EXT4_NUM_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)),
  729. desc_count, bitmap_count);
  730. return bitmap_count;
  731. #else
  732. desc_count = 0;
  733. for (i = 0; i < ngroups; i++) {
  734. gdp = ext4_get_group_desc(sb, i, NULL);
  735. if (!gdp)
  736. continue;
  737. grp = NULL;
  738. if (EXT4_SB(sb)->s_group_info)
  739. grp = ext4_get_group_info(sb, i);
  740. if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
  741. desc_count += ext4_free_group_clusters(sb, gdp);
  742. }
  743. return desc_count;
  744. #endif
  745. }
  746. static inline int test_root(ext4_group_t a, int b)
  747. {
  748. while (1) {
  749. if (a < b)
  750. return 0;
  751. if (a == b)
  752. return 1;
  753. if ((a % b) != 0)
  754. return 0;
  755. a = a / b;
  756. }
  757. }
  758. /**
  759. * ext4_bg_has_super - number of blocks used by the superblock in group
  760. * @sb: superblock for filesystem
  761. * @group: group number to check
  762. *
  763. * Return the number of blocks used by the superblock (primary or backup)
  764. * in this group. Currently this will be only 0 or 1.
  765. */
  766. int ext4_bg_has_super(struct super_block *sb, ext4_group_t group)
  767. {
  768. struct ext4_super_block *es = EXT4_SB(sb)->s_es;
  769. if (group == 0)
  770. return 1;
  771. if (ext4_has_feature_sparse_super2(sb)) {
  772. if (group == le32_to_cpu(es->s_backup_bgs[0]) ||
  773. group == le32_to_cpu(es->s_backup_bgs[1]))
  774. return 1;
  775. return 0;
  776. }
  777. if ((group <= 1) || !ext4_has_feature_sparse_super(sb))
  778. return 1;
  779. if (!(group & 1))
  780. return 0;
  781. if (test_root(group, 3) || (test_root(group, 5)) ||
  782. test_root(group, 7))
  783. return 1;
  784. return 0;
  785. }
  786. static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
  787. ext4_group_t group)
  788. {
  789. unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
  790. ext4_group_t first = metagroup * EXT4_DESC_PER_BLOCK(sb);
  791. ext4_group_t last = first + EXT4_DESC_PER_BLOCK(sb) - 1;
  792. if (group == first || group == first + 1 || group == last)
  793. return 1;
  794. return 0;
  795. }
  796. static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
  797. ext4_group_t group)
  798. {
  799. if (!ext4_bg_has_super(sb, group))
  800. return 0;
  801. if (ext4_has_feature_meta_bg(sb))
  802. return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
  803. else
  804. return EXT4_SB(sb)->s_gdb_count;
  805. }
  806. /**
  807. * ext4_bg_num_gdb - number of blocks used by the group table in group
  808. * @sb: superblock for filesystem
  809. * @group: group number to check
  810. *
  811. * Return the number of blocks used by the group descriptor table
  812. * (primary or backup) in this group. In the future there may be a
  813. * different number of descriptor blocks in each group.
  814. */
  815. unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
  816. {
  817. unsigned long first_meta_bg =
  818. le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
  819. unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
  820. if (!ext4_has_feature_meta_bg(sb) || metagroup < first_meta_bg)
  821. return ext4_bg_num_gdb_nometa(sb, group);
  822. return ext4_bg_num_gdb_meta(sb,group);
  823. }
  824. /*
  825. * This function returns the number of file system metadata blocks at
  826. * the beginning of a block group, including the reserved gdt blocks.
  827. */
  828. unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
  829. ext4_group_t block_group)
  830. {
  831. struct ext4_sb_info *sbi = EXT4_SB(sb);
  832. unsigned num;
  833. /* Check for superblock and gdt backups in this group */
  834. num = ext4_bg_has_super(sb, block_group);
  835. if (!ext4_has_feature_meta_bg(sb) ||
  836. block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
  837. sbi->s_desc_per_block) {
  838. if (num) {
  839. num += ext4_bg_num_gdb_nometa(sb, block_group);
  840. num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
  841. }
  842. } else { /* For META_BG_BLOCK_GROUPS */
  843. num += ext4_bg_num_gdb_meta(sb, block_group);
  844. }
  845. return num;
  846. }
  847. static unsigned int ext4_num_base_meta_clusters(struct super_block *sb,
  848. ext4_group_t block_group)
  849. {
  850. return EXT4_NUM_B2C(EXT4_SB(sb), ext4_num_base_meta_blocks(sb, block_group));
  851. }
  852. /**
  853. * ext4_inode_to_goal_block - return a hint for block allocation
  854. * @inode: inode for block allocation
  855. *
  856. * Return the ideal location to start allocating blocks for a
  857. * newly created inode.
  858. */
  859. ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode)
  860. {
  861. struct ext4_inode_info *ei = EXT4_I(inode);
  862. ext4_group_t block_group;
  863. ext4_grpblk_t colour;
  864. int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
  865. ext4_fsblk_t bg_start;
  866. ext4_fsblk_t last_block;
  867. block_group = ei->i_block_group;
  868. if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
  869. /*
  870. * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
  871. * block groups per flexgroup, reserve the first block
  872. * group for directories and special files. Regular
  873. * files will start at the second block group. This
  874. * tends to speed up directory access and improves
  875. * fsck times.
  876. */
  877. block_group &= ~(flex_size-1);
  878. if (S_ISREG(inode->i_mode))
  879. block_group++;
  880. }
  881. bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
  882. last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
  883. /*
  884. * If we are doing delayed allocation, we don't need take
  885. * colour into account.
  886. */
  887. if (test_opt(inode->i_sb, DELALLOC))
  888. return bg_start;
  889. if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
  890. colour = (task_pid_nr(current) % 16) *
  891. (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
  892. else
  893. colour = (task_pid_nr(current) % 16) *
  894. ((last_block - bg_start) / 16);
  895. return bg_start + colour;
  896. }