ctree.h 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Copyright (C) 2007 Oracle. All rights reserved.
  4. */
  5. #ifndef BTRFS_CTREE_H
  6. #define BTRFS_CTREE_H
  7. #include "linux/cleanup.h"
  8. #include <linux/pagemap.h>
  9. #include <linux/spinlock.h>
  10. #include <linux/rbtree.h>
  11. #include <linux/mutex.h>
  12. #include <linux/wait.h>
  13. #include <linux/list.h>
  14. #include <linux/atomic.h>
  15. #include <linux/xarray.h>
  16. #include <linux/refcount.h>
  17. #include <uapi/linux/btrfs_tree.h>
  18. #include "locking.h"
  19. #include "fs.h"
  20. #include "accessors.h"
  21. #include "extent-io-tree.h"
  22. struct extent_buffer;
  23. struct btrfs_block_rsv;
  24. struct btrfs_trans_handle;
  25. struct btrfs_block_group;
  26. /* Read ahead values for struct btrfs_path.reada */
  27. enum {
  28. READA_NONE,
  29. READA_BACK,
  30. READA_FORWARD,
  31. /*
  32. * Similar to READA_FORWARD but unlike it:
  33. *
  34. * 1) It will trigger readahead even for leaves that are not close to
  35. * each other on disk;
  36. * 2) It also triggers readahead for nodes;
  37. * 3) During a search, even when a node or leaf is already in memory, it
  38. * will still trigger readahead for other nodes and leaves that follow
  39. * it.
  40. *
  41. * This is meant to be used only when we know we are iterating over the
  42. * entire tree or a very large part of it.
  43. */
  44. READA_FORWARD_ALWAYS,
  45. };
  46. /*
  47. * btrfs_paths remember the path taken from the root down to the leaf.
  48. * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point
  49. * to any other levels that are present.
  50. *
  51. * The slots array records the index of the item or block pointer
  52. * used while walking the tree.
  53. */
  54. struct btrfs_path {
  55. struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
  56. int slots[BTRFS_MAX_LEVEL];
  57. /* if there is real range locking, this locks field will change */
  58. u8 locks[BTRFS_MAX_LEVEL];
  59. u8 reada;
  60. /* keep some upper locks as we walk down */
  61. u8 lowest_level;
  62. /*
  63. * set by btrfs_split_item, tells search_slot to keep all locks
  64. * and to force calls to keep space in the nodes
  65. */
  66. unsigned int search_for_split:1;
  67. unsigned int keep_locks:1;
  68. unsigned int skip_locking:1;
  69. unsigned int search_commit_root:1;
  70. unsigned int need_commit_sem:1;
  71. unsigned int skip_release_on_error:1;
  72. /*
  73. * Indicate that new item (btrfs_search_slot) is extending already
  74. * existing item and ins_len contains only the data size and not item
  75. * header (ie. sizeof(struct btrfs_item) is not included).
  76. */
  77. unsigned int search_for_extension:1;
  78. /* Stop search if any locks need to be taken (for read) */
  79. unsigned int nowait:1;
  80. };
  81. #define BTRFS_PATH_AUTO_FREE(path_name) \
  82. struct btrfs_path *path_name __free(btrfs_free_path) = NULL
  83. /*
  84. * The state of btrfs root
  85. */
  86. enum {
  87. /*
  88. * btrfs_record_root_in_trans is a multi-step process, and it can race
  89. * with the balancing code. But the race is very small, and only the
  90. * first time the root is added to each transaction. So IN_TRANS_SETUP
  91. * is used to tell us when more checks are required
  92. */
  93. BTRFS_ROOT_IN_TRANS_SETUP,
  94. /*
  95. * Set if tree blocks of this root can be shared by other roots.
  96. * Only subvolume trees and their reloc trees have this bit set.
  97. * Conflicts with TRACK_DIRTY bit.
  98. *
  99. * This affects two things:
  100. *
  101. * - How balance works
  102. * For shareable roots, we need to use reloc tree and do path
  103. * replacement for balance, and need various pre/post hooks for
  104. * snapshot creation to handle them.
  105. *
  106. * While for non-shareable trees, we just simply do a tree search
  107. * with COW.
  108. *
  109. * - How dirty roots are tracked
  110. * For shareable roots, btrfs_record_root_in_trans() is needed to
  111. * track them, while non-subvolume roots have TRACK_DIRTY bit, they
  112. * don't need to set this manually.
  113. */
  114. BTRFS_ROOT_SHAREABLE,
  115. BTRFS_ROOT_TRACK_DIRTY,
  116. BTRFS_ROOT_IN_RADIX,
  117. BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
  118. BTRFS_ROOT_DEFRAG_RUNNING,
  119. BTRFS_ROOT_FORCE_COW,
  120. BTRFS_ROOT_MULTI_LOG_TASKS,
  121. BTRFS_ROOT_DIRTY,
  122. BTRFS_ROOT_DELETING,
  123. /*
  124. * Reloc tree is orphan, only kept here for qgroup delayed subtree scan
  125. *
  126. * Set for the subvolume tree owning the reloc tree.
  127. */
  128. BTRFS_ROOT_DEAD_RELOC_TREE,
  129. /* Mark dead root stored on device whose cleanup needs to be resumed */
  130. BTRFS_ROOT_DEAD_TREE,
  131. /* The root has a log tree. Used for subvolume roots and the tree root. */
  132. BTRFS_ROOT_HAS_LOG_TREE,
  133. /* Qgroup flushing is in progress */
  134. BTRFS_ROOT_QGROUP_FLUSHING,
  135. /* We started the orphan cleanup for this root. */
  136. BTRFS_ROOT_ORPHAN_CLEANUP,
  137. /* This root has a drop operation that was started previously. */
  138. BTRFS_ROOT_UNFINISHED_DROP,
  139. /* This reloc root needs to have its buffers lockdep class reset. */
  140. BTRFS_ROOT_RESET_LOCKDEP_CLASS,
  141. };
  142. /*
  143. * Record swapped tree blocks of a subvolume tree for delayed subtree trace
  144. * code. For detail check comment in fs/btrfs/qgroup.c.
  145. */
  146. struct btrfs_qgroup_swapped_blocks {
  147. spinlock_t lock;
  148. /* RM_EMPTY_ROOT() of above blocks[] */
  149. bool swapped;
  150. struct rb_root blocks[BTRFS_MAX_LEVEL];
  151. };
  152. /*
  153. * in ram representation of the tree. extent_root is used for all allocations
  154. * and for the extent tree extent_root root.
  155. */
  156. struct btrfs_root {
  157. struct rb_node rb_node;
  158. struct extent_buffer *node;
  159. struct extent_buffer *commit_root;
  160. struct btrfs_root *log_root;
  161. struct btrfs_root *reloc_root;
  162. unsigned long state;
  163. struct btrfs_root_item root_item;
  164. struct btrfs_key root_key;
  165. struct btrfs_fs_info *fs_info;
  166. struct extent_io_tree dirty_log_pages;
  167. struct mutex objectid_mutex;
  168. spinlock_t accounting_lock;
  169. struct btrfs_block_rsv *block_rsv;
  170. struct mutex log_mutex;
  171. wait_queue_head_t log_writer_wait;
  172. wait_queue_head_t log_commit_wait[2];
  173. struct list_head log_ctxs[2];
  174. /* Used only for log trees of subvolumes, not for the log root tree */
  175. atomic_t log_writers;
  176. atomic_t log_commit[2];
  177. /* Used only for log trees of subvolumes, not for the log root tree */
  178. atomic_t log_batch;
  179. /*
  180. * Protected by the 'log_mutex' lock but can be read without holding
  181. * that lock to avoid unnecessary lock contention, in which case it
  182. * should be read using btrfs_get_root_log_transid() except if it's a
  183. * log tree in which case it can be directly accessed. Updates to this
  184. * field should always use btrfs_set_root_log_transid(), except for log
  185. * trees where the field can be updated directly.
  186. */
  187. int log_transid;
  188. /* No matter the commit succeeds or not*/
  189. int log_transid_committed;
  190. /*
  191. * Just be updated when the commit succeeds. Use
  192. * btrfs_get_root_last_log_commit() and btrfs_set_root_last_log_commit()
  193. * to access this field.
  194. */
  195. int last_log_commit;
  196. pid_t log_start_pid;
  197. u64 last_trans;
  198. u64 free_objectid;
  199. struct btrfs_key defrag_progress;
  200. struct btrfs_key defrag_max;
  201. /* The dirty list is only used by non-shareable roots */
  202. struct list_head dirty_list;
  203. struct list_head root_list;
  204. /*
  205. * Xarray that keeps track of in-memory inodes, protected by the lock
  206. * @inode_lock.
  207. */
  208. struct xarray inodes;
  209. /*
  210. * Xarray that keeps track of delayed nodes of every inode, protected
  211. * by @inode_lock.
  212. */
  213. struct xarray delayed_nodes;
  214. /*
  215. * right now this just gets used so that a root has its own devid
  216. * for stat. It may be used for more later
  217. */
  218. dev_t anon_dev;
  219. spinlock_t root_item_lock;
  220. refcount_t refs;
  221. struct mutex delalloc_mutex;
  222. spinlock_t delalloc_lock;
  223. /*
  224. * all of the inodes that have delalloc bytes. It is possible for
  225. * this list to be empty even when there is still dirty data=ordered
  226. * extents waiting to finish IO.
  227. */
  228. struct list_head delalloc_inodes;
  229. struct list_head delalloc_root;
  230. u64 nr_delalloc_inodes;
  231. struct mutex ordered_extent_mutex;
  232. /*
  233. * this is used by the balancing code to wait for all the pending
  234. * ordered extents
  235. */
  236. spinlock_t ordered_extent_lock;
  237. /*
  238. * all of the data=ordered extents pending writeback
  239. * these can span multiple transactions and basically include
  240. * every dirty data page that isn't from nodatacow
  241. */
  242. struct list_head ordered_extents;
  243. struct list_head ordered_root;
  244. u64 nr_ordered_extents;
  245. /*
  246. * Not empty if this subvolume root has gone through tree block swap
  247. * (relocation)
  248. *
  249. * Will be used by reloc_control::dirty_subvol_roots.
  250. */
  251. struct list_head reloc_dirty_list;
  252. /*
  253. * Number of currently running SEND ioctls to prevent
  254. * manipulation with the read-only status via SUBVOL_SETFLAGS
  255. */
  256. int send_in_progress;
  257. /*
  258. * Number of currently running deduplication operations that have a
  259. * destination inode belonging to this root. Protected by the lock
  260. * root_item_lock.
  261. */
  262. int dedupe_in_progress;
  263. /* For exclusion of snapshot creation and nocow writes */
  264. struct btrfs_drew_lock snapshot_lock;
  265. atomic_t snapshot_force_cow;
  266. /* For qgroup metadata reserved space */
  267. spinlock_t qgroup_meta_rsv_lock;
  268. u64 qgroup_meta_rsv_pertrans;
  269. u64 qgroup_meta_rsv_prealloc;
  270. wait_queue_head_t qgroup_flush_wait;
  271. /* Number of active swapfiles */
  272. atomic_t nr_swapfiles;
  273. /* Record pairs of swapped blocks for qgroup */
  274. struct btrfs_qgroup_swapped_blocks swapped_blocks;
  275. /* Used only by log trees, when logging csum items */
  276. struct extent_io_tree log_csum_range;
  277. /* Used in simple quotas, track root during relocation. */
  278. u64 relocation_src_root;
  279. #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
  280. u64 alloc_bytenr;
  281. #endif
  282. #ifdef CONFIG_BTRFS_DEBUG
  283. struct list_head leak_list;
  284. #endif
  285. };
  286. static inline bool btrfs_root_readonly(const struct btrfs_root *root)
  287. {
  288. /* Byte-swap the constant at compile time, root_item::flags is LE */
  289. return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
  290. }
  291. static inline bool btrfs_root_dead(const struct btrfs_root *root)
  292. {
  293. /* Byte-swap the constant at compile time, root_item::flags is LE */
  294. return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0;
  295. }
  296. static inline u64 btrfs_root_id(const struct btrfs_root *root)
  297. {
  298. return root->root_key.objectid;
  299. }
  300. static inline int btrfs_get_root_log_transid(const struct btrfs_root *root)
  301. {
  302. return READ_ONCE(root->log_transid);
  303. }
  304. static inline void btrfs_set_root_log_transid(struct btrfs_root *root, int log_transid)
  305. {
  306. WRITE_ONCE(root->log_transid, log_transid);
  307. }
  308. static inline int btrfs_get_root_last_log_commit(const struct btrfs_root *root)
  309. {
  310. return READ_ONCE(root->last_log_commit);
  311. }
  312. static inline void btrfs_set_root_last_log_commit(struct btrfs_root *root, int commit_id)
  313. {
  314. WRITE_ONCE(root->last_log_commit, commit_id);
  315. }
  316. static inline u64 btrfs_get_root_last_trans(const struct btrfs_root *root)
  317. {
  318. return READ_ONCE(root->last_trans);
  319. }
  320. static inline void btrfs_set_root_last_trans(struct btrfs_root *root, u64 transid)
  321. {
  322. WRITE_ONCE(root->last_trans, transid);
  323. }
  324. /*
  325. * Return the generation this root started with.
  326. *
  327. * Every normal root that is created with root->root_key.offset set to it's
  328. * originating generation. If it is a snapshot it is the generation when the
  329. * snapshot was created.
  330. *
  331. * However for TREE_RELOC roots root_key.offset is the objectid of the owning
  332. * tree root. Thankfully we copy the root item of the owning tree root, which
  333. * has it's last_snapshot set to what we would have root_key.offset set to, so
  334. * return that if this is a TREE_RELOC root.
  335. */
  336. static inline u64 btrfs_root_origin_generation(const struct btrfs_root *root)
  337. {
  338. if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
  339. return btrfs_root_last_snapshot(&root->root_item);
  340. return root->root_key.offset;
  341. }
  342. /*
  343. * Structure that conveys information about an extent that is going to replace
  344. * all the extents in a file range.
  345. */
  346. struct btrfs_replace_extent_info {
  347. u64 disk_offset;
  348. u64 disk_len;
  349. u64 data_offset;
  350. u64 data_len;
  351. u64 file_offset;
  352. /* Pointer to a file extent item of type regular or prealloc. */
  353. char *extent_buf;
  354. /*
  355. * Set to true when attempting to replace a file range with a new extent
  356. * described by this structure, set to false when attempting to clone an
  357. * existing extent into a file range.
  358. */
  359. bool is_new_extent;
  360. /* Indicate if we should update the inode's mtime and ctime. */
  361. bool update_times;
  362. /* Meaningful only if is_new_extent is true. */
  363. int qgroup_reserved;
  364. /*
  365. * Meaningful only if is_new_extent is true.
  366. * Used to track how many extent items we have already inserted in a
  367. * subvolume tree that refer to the extent described by this structure,
  368. * so that we know when to create a new delayed ref or update an existing
  369. * one.
  370. */
  371. int insertions;
  372. };
  373. /* Arguments for btrfs_drop_extents() */
  374. struct btrfs_drop_extents_args {
  375. /* Input parameters */
  376. /*
  377. * If NULL, btrfs_drop_extents() will allocate and free its own path.
  378. * If 'replace_extent' is true, this must not be NULL. Also the path
  379. * is always released except if 'replace_extent' is true and
  380. * btrfs_drop_extents() sets 'extent_inserted' to true, in which case
  381. * the path is kept locked.
  382. */
  383. struct btrfs_path *path;
  384. /* Start offset of the range to drop extents from */
  385. u64 start;
  386. /* End (exclusive, last byte + 1) of the range to drop extents from */
  387. u64 end;
  388. /* If true drop all the extent maps in the range */
  389. bool drop_cache;
  390. /*
  391. * If true it means we want to insert a new extent after dropping all
  392. * the extents in the range. If this is true, the 'extent_item_size'
  393. * parameter must be set as well and the 'extent_inserted' field will
  394. * be set to true by btrfs_drop_extents() if it could insert the new
  395. * extent.
  396. * Note: when this is set to true the path must not be NULL.
  397. */
  398. bool replace_extent;
  399. /*
  400. * Used if 'replace_extent' is true. Size of the file extent item to
  401. * insert after dropping all existing extents in the range
  402. */
  403. u32 extent_item_size;
  404. /* Output parameters */
  405. /*
  406. * Set to the minimum between the input parameter 'end' and the end
  407. * (exclusive, last byte + 1) of the last dropped extent. This is always
  408. * set even if btrfs_drop_extents() returns an error.
  409. */
  410. u64 drop_end;
  411. /*
  412. * The number of allocated bytes found in the range. This can be smaller
  413. * than the range's length when there are holes in the range.
  414. */
  415. u64 bytes_found;
  416. /*
  417. * Only set if 'replace_extent' is true. Set to true if we were able
  418. * to insert a replacement extent after dropping all extents in the
  419. * range, otherwise set to false by btrfs_drop_extents().
  420. * Also, if btrfs_drop_extents() has set this to true it means it
  421. * returned with the path locked, otherwise if it has set this to
  422. * false it has returned with the path released.
  423. */
  424. bool extent_inserted;
  425. };
  426. struct btrfs_file_private {
  427. void *filldir_buf;
  428. u64 last_index;
  429. struct extent_state *llseek_cached_state;
  430. /* Task that allocated this structure. */
  431. struct task_struct *owner_task;
  432. };
  433. static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info)
  434. {
  435. return info->nodesize - sizeof(struct btrfs_header);
  436. }
  437. static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_fs_info *info)
  438. {
  439. return BTRFS_LEAF_DATA_SIZE(info) - sizeof(struct btrfs_item);
  440. }
  441. static inline u32 BTRFS_NODEPTRS_PER_BLOCK(const struct btrfs_fs_info *info)
  442. {
  443. return BTRFS_LEAF_DATA_SIZE(info) / sizeof(struct btrfs_key_ptr);
  444. }
  445. static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
  446. {
  447. return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item);
  448. }
  449. #define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \
  450. ((bytes) >> (fs_info)->sectorsize_bits)
  451. static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
  452. {
  453. return mapping_gfp_constraint(mapping, ~__GFP_FS);
  454. }
  455. void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u64 end);
  456. int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
  457. u64 num_bytes, u64 *actual_bytes);
  458. int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
  459. /* ctree.c */
  460. int __init btrfs_ctree_init(void);
  461. void __cold btrfs_ctree_exit(void);
  462. int btrfs_bin_search(struct extent_buffer *eb, int first_slot,
  463. const struct btrfs_key *key, int *slot);
  464. int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
  465. #ifdef __LITTLE_ENDIAN
  466. /*
  467. * Compare two keys, on little-endian the disk order is same as CPU order and
  468. * we can avoid the conversion.
  469. */
  470. static inline int btrfs_comp_keys(const struct btrfs_disk_key *disk_key,
  471. const struct btrfs_key *k2)
  472. {
  473. const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
  474. return btrfs_comp_cpu_keys(k1, k2);
  475. }
  476. #else
  477. /* Compare two keys in a memcmp fashion. */
  478. static inline int btrfs_comp_keys(const struct btrfs_disk_key *disk,
  479. const struct btrfs_key *k2)
  480. {
  481. struct btrfs_key k1;
  482. btrfs_disk_key_to_cpu(&k1, disk);
  483. return btrfs_comp_cpu_keys(&k1, k2);
  484. }
  485. #endif
  486. int btrfs_previous_item(struct btrfs_root *root,
  487. struct btrfs_path *path, u64 min_objectid,
  488. int type);
  489. int btrfs_previous_extent_item(struct btrfs_root *root,
  490. struct btrfs_path *path, u64 min_objectid);
  491. void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
  492. const struct btrfs_path *path,
  493. const struct btrfs_key *new_key);
  494. struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
  495. int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
  496. struct btrfs_key *key, int lowest_level,
  497. u64 min_trans);
  498. int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
  499. struct btrfs_path *path,
  500. u64 min_trans);
  501. struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
  502. int slot);
  503. int btrfs_cow_block(struct btrfs_trans_handle *trans,
  504. struct btrfs_root *root, struct extent_buffer *buf,
  505. struct extent_buffer *parent, int parent_slot,
  506. struct extent_buffer **cow_ret,
  507. enum btrfs_lock_nesting nest);
  508. int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
  509. struct btrfs_root *root,
  510. struct extent_buffer *buf,
  511. struct extent_buffer *parent, int parent_slot,
  512. struct extent_buffer **cow_ret,
  513. u64 search_start, u64 empty_size,
  514. enum btrfs_lock_nesting nest);
  515. int btrfs_copy_root(struct btrfs_trans_handle *trans,
  516. struct btrfs_root *root,
  517. struct extent_buffer *buf,
  518. struct extent_buffer **cow_ret, u64 new_root_objectid);
  519. bool btrfs_block_can_be_shared(struct btrfs_trans_handle *trans,
  520. struct btrfs_root *root,
  521. struct extent_buffer *buf);
  522. int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  523. struct btrfs_path *path, int level, int slot);
  524. void btrfs_extend_item(struct btrfs_trans_handle *trans,
  525. const struct btrfs_path *path, u32 data_size);
  526. void btrfs_truncate_item(struct btrfs_trans_handle *trans,
  527. const struct btrfs_path *path, u32 new_size, int from_end);
  528. int btrfs_split_item(struct btrfs_trans_handle *trans,
  529. struct btrfs_root *root,
  530. struct btrfs_path *path,
  531. const struct btrfs_key *new_key,
  532. unsigned long split_offset);
  533. int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
  534. struct btrfs_root *root,
  535. struct btrfs_path *path,
  536. const struct btrfs_key *new_key);
  537. int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
  538. u64 inum, u64 ioff, u8 key_type, struct btrfs_key *found_key);
  539. int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  540. const struct btrfs_key *key, struct btrfs_path *p,
  541. int ins_len, int cow);
  542. int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
  543. struct btrfs_path *p, u64 time_seq);
  544. int btrfs_search_slot_for_read(struct btrfs_root *root,
  545. const struct btrfs_key *key,
  546. struct btrfs_path *p, int find_higher,
  547. int return_any);
  548. void btrfs_release_path(struct btrfs_path *p);
  549. struct btrfs_path *btrfs_alloc_path(void);
  550. void btrfs_free_path(struct btrfs_path *p);
  551. DEFINE_FREE(btrfs_free_path, struct btrfs_path *, btrfs_free_path(_T))
  552. int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  553. struct btrfs_path *path, int slot, int nr);
  554. static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
  555. struct btrfs_root *root,
  556. struct btrfs_path *path)
  557. {
  558. return btrfs_del_items(trans, root, path, path->slots[0], 1);
  559. }
  560. /*
  561. * Describes a batch of items to insert in a btree. This is used by
  562. * btrfs_insert_empty_items().
  563. */
  564. struct btrfs_item_batch {
  565. /*
  566. * Pointer to an array containing the keys of the items to insert (in
  567. * sorted order).
  568. */
  569. const struct btrfs_key *keys;
  570. /* Pointer to an array containing the data size for each item to insert. */
  571. const u32 *data_sizes;
  572. /*
  573. * The sum of data sizes for all items. The caller can compute this while
  574. * setting up the data_sizes array, so it ends up being more efficient
  575. * than having btrfs_insert_empty_items() or setup_item_for_insert()
  576. * doing it, as it would avoid an extra loop over a potentially large
  577. * array, and in the case of setup_item_for_insert(), we would be doing
  578. * it while holding a write lock on a leaf and often on upper level nodes
  579. * too, unnecessarily increasing the size of a critical section.
  580. */
  581. u32 total_data_size;
  582. /* Size of the keys and data_sizes arrays (number of items in the batch). */
  583. int nr;
  584. };
  585. void btrfs_setup_item_for_insert(struct btrfs_trans_handle *trans,
  586. struct btrfs_root *root,
  587. struct btrfs_path *path,
  588. const struct btrfs_key *key,
  589. u32 data_size);
  590. int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  591. const struct btrfs_key *key, void *data, u32 data_size);
  592. int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
  593. struct btrfs_root *root,
  594. struct btrfs_path *path,
  595. const struct btrfs_item_batch *batch);
  596. static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
  597. struct btrfs_root *root,
  598. struct btrfs_path *path,
  599. const struct btrfs_key *key,
  600. u32 data_size)
  601. {
  602. struct btrfs_item_batch batch;
  603. batch.keys = key;
  604. batch.data_sizes = &data_size;
  605. batch.total_data_size = data_size;
  606. batch.nr = 1;
  607. return btrfs_insert_empty_items(trans, root, path, &batch);
  608. }
  609. int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
  610. u64 time_seq);
  611. int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
  612. struct btrfs_path *path);
  613. int btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key,
  614. struct btrfs_path *path);
  615. /*
  616. * Search in @root for a given @key, and store the slot found in @found_key.
  617. *
  618. * @root: The root node of the tree.
  619. * @key: The key we are looking for.
  620. * @found_key: Will hold the found item.
  621. * @path: Holds the current slot/leaf.
  622. * @iter_ret: Contains the value returned from btrfs_search_slot or
  623. * btrfs_get_next_valid_item, whichever was executed last.
  624. *
  625. * The @iter_ret is an output variable that will contain the return value of
  626. * btrfs_search_slot, if it encountered an error, or the value returned from
  627. * btrfs_get_next_valid_item otherwise. That return value can be 0, if a valid
  628. * slot was found, 1 if there were no more leaves, and <0 if there was an error.
  629. *
  630. * It's recommended to use a separate variable for iter_ret and then use it to
  631. * set the function return value so there's no confusion of the 0/1/errno
  632. * values stemming from btrfs_search_slot.
  633. */
  634. #define btrfs_for_each_slot(root, key, found_key, path, iter_ret) \
  635. for (iter_ret = btrfs_search_slot(NULL, (root), (key), (path), 0, 0); \
  636. (iter_ret) >= 0 && \
  637. (iter_ret = btrfs_get_next_valid_item((root), (found_key), (path))) == 0; \
  638. (path)->slots[0]++ \
  639. )
  640. int btrfs_next_old_item(struct btrfs_root *root, struct btrfs_path *path, u64 time_seq);
  641. /*
  642. * Search the tree again to find a leaf with greater keys.
  643. *
  644. * Returns 0 if it found something or 1 if there are no greater leaves.
  645. * Returns < 0 on error.
  646. */
  647. static inline int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
  648. {
  649. return btrfs_next_old_leaf(root, path, 0);
  650. }
  651. static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
  652. {
  653. return btrfs_next_old_item(root, p, 0);
  654. }
  655. int btrfs_leaf_free_space(const struct extent_buffer *leaf);
  656. static inline int is_fstree(u64 rootid)
  657. {
  658. if (rootid == BTRFS_FS_TREE_OBJECTID ||
  659. ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID &&
  660. !btrfs_qgroup_level(rootid)))
  661. return 1;
  662. return 0;
  663. }
  664. static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
  665. {
  666. return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
  667. }
  668. u16 btrfs_csum_type_size(u16 type);
  669. int btrfs_super_csum_size(const struct btrfs_super_block *s);
  670. const char *btrfs_super_csum_name(u16 csum_type);
  671. const char *btrfs_super_csum_driver(u16 csum_type);
  672. size_t __attribute_const__ btrfs_get_num_csums(void);
  673. /*
  674. * We use page status Private2 to indicate there is an ordered extent with
  675. * unfinished IO.
  676. *
  677. * Rename the Private2 accessors to Ordered, to improve readability.
  678. */
  679. #define PageOrdered(page) PagePrivate2(page)
  680. #define SetPageOrdered(page) SetPagePrivate2(page)
  681. #define ClearPageOrdered(page) ClearPagePrivate2(page)
  682. #define folio_test_ordered(folio) folio_test_private_2(folio)
  683. #define folio_set_ordered(folio) folio_set_private_2(folio)
  684. #define folio_clear_ordered(folio) folio_clear_private_2(folio)
  685. #endif