root-tree.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2007 Oracle. All rights reserved.
  4. */
  5. #include <linux/err.h>
  6. #include <linux/uuid.h>
  7. #include "ctree.h"
  8. #include "fs.h"
  9. #include "messages.h"
  10. #include "transaction.h"
  11. #include "disk-io.h"
  12. #include "qgroup.h"
  13. #include "space-info.h"
  14. #include "accessors.h"
  15. #include "root-tree.h"
  16. #include "orphan.h"
  17. /*
  18. * Read a root item from the tree. In case we detect a root item smaller then
  19. * sizeof(root_item), we know it's an old version of the root structure and
  20. * initialize all new fields to zero. The same happens if we detect mismatching
  21. * generation numbers as then we know the root was once mounted with an older
  22. * kernel that was not aware of the root item structure change.
  23. */
  24. static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
  25. struct btrfs_root_item *item)
  26. {
  27. u32 len;
  28. int need_reset = 0;
  29. len = btrfs_item_size(eb, slot);
  30. read_extent_buffer(eb, item, btrfs_item_ptr_offset(eb, slot),
  31. min_t(u32, len, sizeof(*item)));
  32. if (len < sizeof(*item))
  33. need_reset = 1;
  34. if (!need_reset && btrfs_root_generation(item)
  35. != btrfs_root_generation_v2(item)) {
  36. if (btrfs_root_generation_v2(item) != 0) {
  37. btrfs_warn(eb->fs_info,
  38. "mismatching generation and generation_v2 found in root item. This root was probably mounted with an older kernel. Resetting all new fields.");
  39. }
  40. need_reset = 1;
  41. }
  42. if (need_reset) {
  43. /* Clear all members from generation_v2 onwards. */
  44. memset_startat(item, 0, generation_v2);
  45. generate_random_guid(item->uuid);
  46. }
  47. }
  48. /*
  49. * Lookup the root by the key.
  50. *
  51. * root: the root of the root tree
  52. * search_key: the key to search
  53. * path: the path we search
  54. * root_item: the root item of the tree we look for
  55. * root_key: the root key of the tree we look for
  56. *
  57. * If ->offset of 'search_key' is -1ULL, it means we are not sure the offset
  58. * of the search key, just lookup the root with the highest offset for a
  59. * given objectid.
  60. *
  61. * If we find something return 0, otherwise > 0, < 0 on error.
  62. */
  63. int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key,
  64. struct btrfs_path *path, struct btrfs_root_item *root_item,
  65. struct btrfs_key *root_key)
  66. {
  67. struct btrfs_key found_key;
  68. struct extent_buffer *l;
  69. int ret;
  70. int slot;
  71. ret = btrfs_search_slot(NULL, root, search_key, path, 0, 0);
  72. if (ret < 0)
  73. return ret;
  74. if (search_key->offset != -1ULL) { /* the search key is exact */
  75. if (ret > 0)
  76. goto out;
  77. } else {
  78. /*
  79. * Key with offset -1 found, there would have to exist a root
  80. * with such id, but this is out of the valid range.
  81. */
  82. if (ret == 0) {
  83. ret = -EUCLEAN;
  84. goto out;
  85. }
  86. if (path->slots[0] == 0)
  87. goto out;
  88. path->slots[0]--;
  89. ret = 0;
  90. }
  91. l = path->nodes[0];
  92. slot = path->slots[0];
  93. btrfs_item_key_to_cpu(l, &found_key, slot);
  94. if (found_key.objectid != search_key->objectid ||
  95. found_key.type != BTRFS_ROOT_ITEM_KEY) {
  96. ret = 1;
  97. goto out;
  98. }
  99. if (root_item)
  100. btrfs_read_root_item(l, slot, root_item);
  101. if (root_key)
  102. memcpy(root_key, &found_key, sizeof(found_key));
  103. out:
  104. btrfs_release_path(path);
  105. return ret;
  106. }
  107. void btrfs_set_root_node(struct btrfs_root_item *item,
  108. struct extent_buffer *node)
  109. {
  110. btrfs_set_root_bytenr(item, node->start);
  111. btrfs_set_root_level(item, btrfs_header_level(node));
  112. btrfs_set_root_generation(item, btrfs_header_generation(node));
  113. }
  114. /*
  115. * copy the data in 'item' into the btree
  116. */
  117. int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
  118. *root, struct btrfs_key *key, struct btrfs_root_item
  119. *item)
  120. {
  121. struct btrfs_fs_info *fs_info = root->fs_info;
  122. struct btrfs_path *path;
  123. struct extent_buffer *l;
  124. int ret;
  125. int slot;
  126. unsigned long ptr;
  127. u32 old_len;
  128. path = btrfs_alloc_path();
  129. if (!path)
  130. return -ENOMEM;
  131. ret = btrfs_search_slot(trans, root, key, path, 0, 1);
  132. if (ret < 0)
  133. goto out;
  134. if (ret > 0) {
  135. btrfs_crit(fs_info,
  136. "unable to find root key (%llu %u %llu) in tree %llu",
  137. key->objectid, key->type, key->offset, btrfs_root_id(root));
  138. ret = -EUCLEAN;
  139. btrfs_abort_transaction(trans, ret);
  140. goto out;
  141. }
  142. l = path->nodes[0];
  143. slot = path->slots[0];
  144. ptr = btrfs_item_ptr_offset(l, slot);
  145. old_len = btrfs_item_size(l, slot);
  146. /*
  147. * If this is the first time we update the root item which originated
  148. * from an older kernel, we need to enlarge the item size to make room
  149. * for the added fields.
  150. */
  151. if (old_len < sizeof(*item)) {
  152. btrfs_release_path(path);
  153. ret = btrfs_search_slot(trans, root, key, path,
  154. -1, 1);
  155. if (ret < 0) {
  156. btrfs_abort_transaction(trans, ret);
  157. goto out;
  158. }
  159. ret = btrfs_del_item(trans, root, path);
  160. if (ret < 0) {
  161. btrfs_abort_transaction(trans, ret);
  162. goto out;
  163. }
  164. btrfs_release_path(path);
  165. ret = btrfs_insert_empty_item(trans, root, path,
  166. key, sizeof(*item));
  167. if (ret < 0) {
  168. btrfs_abort_transaction(trans, ret);
  169. goto out;
  170. }
  171. l = path->nodes[0];
  172. slot = path->slots[0];
  173. ptr = btrfs_item_ptr_offset(l, slot);
  174. }
  175. /*
  176. * Update generation_v2 so at the next mount we know the new root
  177. * fields are valid.
  178. */
  179. btrfs_set_root_generation_v2(item, btrfs_root_generation(item));
  180. write_extent_buffer(l, item, ptr, sizeof(*item));
  181. btrfs_mark_buffer_dirty(trans, path->nodes[0]);
  182. out:
  183. btrfs_free_path(path);
  184. return ret;
  185. }
  186. int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  187. const struct btrfs_key *key, struct btrfs_root_item *item)
  188. {
  189. /*
  190. * Make sure generation v1 and v2 match. See update_root for details.
  191. */
  192. btrfs_set_root_generation_v2(item, btrfs_root_generation(item));
  193. return btrfs_insert_item(trans, root, key, item, sizeof(*item));
  194. }
  195. int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
  196. {
  197. struct btrfs_root *tree_root = fs_info->tree_root;
  198. struct extent_buffer *leaf;
  199. struct btrfs_path *path;
  200. struct btrfs_key key;
  201. struct btrfs_root *root;
  202. int err = 0;
  203. int ret;
  204. path = btrfs_alloc_path();
  205. if (!path)
  206. return -ENOMEM;
  207. key.objectid = BTRFS_ORPHAN_OBJECTID;
  208. key.type = BTRFS_ORPHAN_ITEM_KEY;
  209. key.offset = 0;
  210. while (1) {
  211. u64 root_objectid;
  212. ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
  213. if (ret < 0) {
  214. err = ret;
  215. break;
  216. }
  217. leaf = path->nodes[0];
  218. if (path->slots[0] >= btrfs_header_nritems(leaf)) {
  219. ret = btrfs_next_leaf(tree_root, path);
  220. if (ret < 0)
  221. err = ret;
  222. if (ret != 0)
  223. break;
  224. leaf = path->nodes[0];
  225. }
  226. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  227. btrfs_release_path(path);
  228. if (key.objectid != BTRFS_ORPHAN_OBJECTID ||
  229. key.type != BTRFS_ORPHAN_ITEM_KEY)
  230. break;
  231. root_objectid = key.offset;
  232. key.offset++;
  233. root = btrfs_get_fs_root(fs_info, root_objectid, false);
  234. err = PTR_ERR_OR_ZERO(root);
  235. if (err && err != -ENOENT) {
  236. break;
  237. } else if (err == -ENOENT) {
  238. struct btrfs_trans_handle *trans;
  239. btrfs_release_path(path);
  240. trans = btrfs_join_transaction(tree_root);
  241. if (IS_ERR(trans)) {
  242. err = PTR_ERR(trans);
  243. btrfs_handle_fs_error(fs_info, err,
  244. "Failed to start trans to delete orphan item");
  245. break;
  246. }
  247. err = btrfs_del_orphan_item(trans, tree_root,
  248. root_objectid);
  249. btrfs_end_transaction(trans);
  250. if (err) {
  251. btrfs_handle_fs_error(fs_info, err,
  252. "Failed to delete root orphan item");
  253. break;
  254. }
  255. continue;
  256. }
  257. WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state));
  258. if (btrfs_root_refs(&root->root_item) == 0) {
  259. struct btrfs_key drop_key;
  260. btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
  261. /*
  262. * If we have a non-zero drop_progress then we know we
  263. * made it partly through deleting this snapshot, and
  264. * thus we need to make sure we block any balance from
  265. * happening until this snapshot is completely dropped.
  266. */
  267. if (drop_key.objectid != 0 || drop_key.type != 0 ||
  268. drop_key.offset != 0) {
  269. set_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
  270. set_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
  271. }
  272. set_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
  273. btrfs_add_dead_root(root);
  274. }
  275. btrfs_put_root(root);
  276. }
  277. btrfs_free_path(path);
  278. return err;
  279. }
  280. /* drop the root item for 'key' from the tree root */
  281. int btrfs_del_root(struct btrfs_trans_handle *trans,
  282. const struct btrfs_key *key)
  283. {
  284. struct btrfs_root *root = trans->fs_info->tree_root;
  285. struct btrfs_path *path;
  286. int ret;
  287. path = btrfs_alloc_path();
  288. if (!path)
  289. return -ENOMEM;
  290. ret = btrfs_search_slot(trans, root, key, path, -1, 1);
  291. if (ret < 0)
  292. goto out;
  293. if (ret != 0) {
  294. /* The root must exist but we did not find it by the key. */
  295. ret = -EUCLEAN;
  296. goto out;
  297. }
  298. ret = btrfs_del_item(trans, root, path);
  299. out:
  300. btrfs_free_path(path);
  301. return ret;
  302. }
  303. int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
  304. u64 ref_id, u64 dirid, u64 *sequence,
  305. const struct fscrypt_str *name)
  306. {
  307. struct btrfs_root *tree_root = trans->fs_info->tree_root;
  308. struct btrfs_path *path;
  309. struct btrfs_root_ref *ref;
  310. struct extent_buffer *leaf;
  311. struct btrfs_key key;
  312. unsigned long ptr;
  313. int ret;
  314. path = btrfs_alloc_path();
  315. if (!path)
  316. return -ENOMEM;
  317. key.objectid = root_id;
  318. key.type = BTRFS_ROOT_BACKREF_KEY;
  319. key.offset = ref_id;
  320. again:
  321. ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
  322. if (ret < 0) {
  323. goto out;
  324. } else if (ret == 0) {
  325. leaf = path->nodes[0];
  326. ref = btrfs_item_ptr(leaf, path->slots[0],
  327. struct btrfs_root_ref);
  328. ptr = (unsigned long)(ref + 1);
  329. if ((btrfs_root_ref_dirid(leaf, ref) != dirid) ||
  330. (btrfs_root_ref_name_len(leaf, ref) != name->len) ||
  331. memcmp_extent_buffer(leaf, name->name, ptr, name->len)) {
  332. ret = -ENOENT;
  333. goto out;
  334. }
  335. *sequence = btrfs_root_ref_sequence(leaf, ref);
  336. ret = btrfs_del_item(trans, tree_root, path);
  337. if (ret)
  338. goto out;
  339. } else {
  340. ret = -ENOENT;
  341. goto out;
  342. }
  343. if (key.type == BTRFS_ROOT_BACKREF_KEY) {
  344. btrfs_release_path(path);
  345. key.objectid = ref_id;
  346. key.type = BTRFS_ROOT_REF_KEY;
  347. key.offset = root_id;
  348. goto again;
  349. }
  350. out:
  351. btrfs_free_path(path);
  352. return ret;
  353. }
  354. /*
  355. * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY
  356. * or BTRFS_ROOT_BACKREF_KEY.
  357. *
  358. * The dirid, sequence, name and name_len refer to the directory entry
  359. * that is referencing the root.
  360. *
  361. * For a forward ref, the root_id is the id of the tree referencing
  362. * the root and ref_id is the id of the subvol or snapshot.
  363. *
  364. * For a back ref the root_id is the id of the subvol or snapshot and
  365. * ref_id is the id of the tree referencing it.
  366. *
  367. * Will return 0, -ENOMEM, or anything from the CoW path
  368. */
  369. int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
  370. u64 ref_id, u64 dirid, u64 sequence,
  371. const struct fscrypt_str *name)
  372. {
  373. struct btrfs_root *tree_root = trans->fs_info->tree_root;
  374. struct btrfs_key key;
  375. int ret;
  376. struct btrfs_path *path;
  377. struct btrfs_root_ref *ref;
  378. struct extent_buffer *leaf;
  379. unsigned long ptr;
  380. path = btrfs_alloc_path();
  381. if (!path)
  382. return -ENOMEM;
  383. key.objectid = root_id;
  384. key.type = BTRFS_ROOT_BACKREF_KEY;
  385. key.offset = ref_id;
  386. again:
  387. ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
  388. sizeof(*ref) + name->len);
  389. if (ret) {
  390. btrfs_abort_transaction(trans, ret);
  391. btrfs_free_path(path);
  392. return ret;
  393. }
  394. leaf = path->nodes[0];
  395. ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
  396. btrfs_set_root_ref_dirid(leaf, ref, dirid);
  397. btrfs_set_root_ref_sequence(leaf, ref, sequence);
  398. btrfs_set_root_ref_name_len(leaf, ref, name->len);
  399. ptr = (unsigned long)(ref + 1);
  400. write_extent_buffer(leaf, name->name, ptr, name->len);
  401. btrfs_mark_buffer_dirty(trans, leaf);
  402. if (key.type == BTRFS_ROOT_BACKREF_KEY) {
  403. btrfs_release_path(path);
  404. key.objectid = ref_id;
  405. key.type = BTRFS_ROOT_REF_KEY;
  406. key.offset = root_id;
  407. goto again;
  408. }
  409. btrfs_free_path(path);
  410. return 0;
  411. }
  412. /*
  413. * Old btrfs forgets to init root_item->flags and root_item->byte_limit
  414. * for subvolumes. To work around this problem, we steal a bit from
  415. * root_item->inode_item->flags, and use it to indicate if those fields
  416. * have been properly initialized.
  417. */
  418. void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
  419. {
  420. u64 inode_flags = btrfs_stack_inode_flags(&root_item->inode);
  421. if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) {
  422. inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT;
  423. btrfs_set_stack_inode_flags(&root_item->inode, inode_flags);
  424. btrfs_set_root_flags(root_item, 0);
  425. btrfs_set_root_limit(root_item, 0);
  426. }
  427. }
  428. void btrfs_update_root_times(struct btrfs_trans_handle *trans,
  429. struct btrfs_root *root)
  430. {
  431. struct btrfs_root_item *item = &root->root_item;
  432. struct timespec64 ct;
  433. ktime_get_real_ts64(&ct);
  434. spin_lock(&root->root_item_lock);
  435. btrfs_set_root_ctransid(item, trans->transid);
  436. btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec);
  437. btrfs_set_stack_timespec_nsec(&item->ctime, ct.tv_nsec);
  438. spin_unlock(&root->root_item_lock);
  439. }
  440. /*
  441. * Reserve space for subvolume operation.
  442. *
  443. * root: the root of the parent directory
  444. * rsv: block reservation
  445. * items: the number of items that we need do reservation
  446. * use_global_rsv: allow fallback to the global block reservation
  447. *
  448. * This function is used to reserve the space for snapshot/subvolume
  449. * creation and deletion. Those operations are different with the
  450. * common file/directory operations, they change two fs/file trees
  451. * and root tree, the number of items that the qgroup reserves is
  452. * different with the free space reservation. So we can not use
  453. * the space reservation mechanism in start_transaction().
  454. */
  455. int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
  456. struct btrfs_block_rsv *rsv, int items,
  457. bool use_global_rsv)
  458. {
  459. u64 qgroup_num_bytes = 0;
  460. u64 num_bytes;
  461. int ret;
  462. struct btrfs_fs_info *fs_info = root->fs_info;
  463. struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
  464. if (btrfs_qgroup_enabled(fs_info)) {
  465. /* One for parent inode, two for dir entries */
  466. qgroup_num_bytes = 3 * fs_info->nodesize;
  467. ret = btrfs_qgroup_reserve_meta_prealloc(root,
  468. qgroup_num_bytes, true,
  469. false);
  470. if (ret)
  471. return ret;
  472. }
  473. num_bytes = btrfs_calc_insert_metadata_size(fs_info, items);
  474. rsv->space_info = btrfs_find_space_info(fs_info,
  475. BTRFS_BLOCK_GROUP_METADATA);
  476. ret = btrfs_block_rsv_add(fs_info, rsv, num_bytes,
  477. BTRFS_RESERVE_FLUSH_ALL);
  478. if (ret == -ENOSPC && use_global_rsv)
  479. ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true);
  480. if (ret && qgroup_num_bytes)
  481. btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
  482. if (!ret) {
  483. spin_lock(&rsv->lock);
  484. rsv->qgroup_rsv_reserved += qgroup_num_bytes;
  485. spin_unlock(&rsv->lock);
  486. }
  487. return ret;
  488. }