extent_io.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef BTRFS_EXTENT_IO_H
  3. #define BTRFS_EXTENT_IO_H
  4. #include <linux/rbtree.h>
  5. #include <linux/refcount.h>
  6. #include <linux/fiemap.h>
  7. #include <linux/btrfs_tree.h>
  8. #include <linux/spinlock.h>
  9. #include <linux/atomic.h>
  10. #include <linux/rwsem.h>
  11. #include <linux/list.h>
  12. #include <linux/slab.h>
  13. #include "compression.h"
  14. #include "messages.h"
  15. #include "ulist.h"
  16. #include "misc.h"
  17. struct page;
  18. struct file;
  19. struct folio;
  20. struct inode;
  21. struct fiemap_extent_info;
  22. struct readahead_control;
  23. struct address_space;
  24. struct writeback_control;
  25. struct extent_io_tree;
  26. struct extent_map_tree;
  27. struct extent_state;
  28. struct btrfs_block_group;
  29. struct btrfs_fs_info;
  30. struct btrfs_inode;
  31. struct btrfs_root;
  32. struct btrfs_trans_handle;
  33. struct btrfs_tree_parent_check;
  34. enum {
  35. EXTENT_BUFFER_UPTODATE,
  36. EXTENT_BUFFER_DIRTY,
  37. EXTENT_BUFFER_CORRUPT,
  38. /* this got triggered by readahead */
  39. EXTENT_BUFFER_READAHEAD,
  40. EXTENT_BUFFER_TREE_REF,
  41. EXTENT_BUFFER_STALE,
  42. EXTENT_BUFFER_WRITEBACK,
  43. /* read IO error */
  44. EXTENT_BUFFER_READ_ERR,
  45. EXTENT_BUFFER_UNMAPPED,
  46. EXTENT_BUFFER_IN_TREE,
  47. /* write IO error */
  48. EXTENT_BUFFER_WRITE_ERR,
  49. /* Indicate the extent buffer is written zeroed out (for zoned) */
  50. EXTENT_BUFFER_ZONED_ZEROOUT,
  51. /* Indicate that extent buffer pages a being read */
  52. EXTENT_BUFFER_READING,
  53. };
  54. /* these are flags for __process_pages_contig */
  55. enum {
  56. ENUM_BIT(PAGE_UNLOCK),
  57. /* Page starts writeback, clear dirty bit and set writeback bit */
  58. ENUM_BIT(PAGE_START_WRITEBACK),
  59. ENUM_BIT(PAGE_END_WRITEBACK),
  60. ENUM_BIT(PAGE_SET_ORDERED),
  61. };
  62. /*
  63. * Folio private values. Every page that is controlled by the extent map has
  64. * folio private set to this value.
  65. */
  66. #define EXTENT_FOLIO_PRIVATE 1
  67. /*
  68. * The extent buffer bitmap operations are done with byte granularity instead of
  69. * word granularity for two reasons:
  70. * 1. The bitmaps must be little-endian on disk.
  71. * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a
  72. * single word in a bitmap may straddle two pages in the extent buffer.
  73. */
  74. #define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
  75. #define BYTE_MASK ((1U << BITS_PER_BYTE) - 1)
  76. #define BITMAP_FIRST_BYTE_MASK(start) \
  77. ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
  78. #define BITMAP_LAST_BYTE_MASK(nbits) \
  79. (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
  80. int __init extent_buffer_init_cachep(void);
  81. void __cold extent_buffer_free_cachep(void);
  82. #define INLINE_EXTENT_BUFFER_PAGES (BTRFS_MAX_METADATA_BLOCKSIZE / PAGE_SIZE)
  83. struct extent_buffer {
  84. u64 start;
  85. u32 len;
  86. u32 folio_size;
  87. unsigned long bflags;
  88. struct btrfs_fs_info *fs_info;
  89. /*
  90. * The address where the eb can be accessed without any cross-page handling.
  91. * This can be NULL if not possible.
  92. */
  93. void *addr;
  94. spinlock_t refs_lock;
  95. atomic_t refs;
  96. int read_mirror;
  97. /* >= 0 if eb belongs to a log tree, -1 otherwise */
  98. s8 log_index;
  99. u8 folio_shift;
  100. struct rcu_head rcu_head;
  101. struct rw_semaphore lock;
  102. /*
  103. * Pointers to all the folios of the extent buffer.
  104. *
  105. * For now the folio is always order 0 (aka, a single page).
  106. */
  107. struct folio *folios[INLINE_EXTENT_BUFFER_PAGES];
  108. #ifdef CONFIG_BTRFS_DEBUG
  109. struct list_head leak_list;
  110. pid_t lock_owner;
  111. #endif
  112. };
  113. struct btrfs_eb_write_context {
  114. struct writeback_control *wbc;
  115. struct extent_buffer *eb;
  116. /* Block group @eb resides in. Only used for zoned mode. */
  117. struct btrfs_block_group *zoned_bg;
  118. };
  119. static inline unsigned long offset_in_eb_folio(const struct extent_buffer *eb,
  120. u64 start)
  121. {
  122. ASSERT(eb->folio_size);
  123. return start & (eb->folio_size - 1);
  124. }
  125. /*
  126. * Get the correct offset inside the page of extent buffer.
  127. *
  128. * @eb: target extent buffer
  129. * @start: offset inside the extent buffer
  130. *
  131. * Will handle both sectorsize == PAGE_SIZE and sectorsize < PAGE_SIZE cases.
  132. */
  133. static inline size_t get_eb_offset_in_folio(const struct extent_buffer *eb,
  134. unsigned long offset)
  135. {
  136. /*
  137. * 1) sectorsize == PAGE_SIZE and nodesize >= PAGE_SIZE case
  138. * 1.1) One large folio covering the whole eb
  139. * The eb->start is aligned to folio size, thus adding it
  140. * won't cause any difference.
  141. * 1.2) Several page sized folios
  142. * The eb->start is aligned to folio (page) size, thus
  143. * adding it won't cause any difference.
  144. *
  145. * 2) sectorsize < PAGE_SIZE and nodesize < PAGE_SIZE case
  146. * In this case there would only be one page sized folio, and there
  147. * may be several different extent buffers in the page/folio.
  148. * We need to add eb->start to properly access the offset inside
  149. * that eb.
  150. */
  151. return offset_in_folio(eb->folios[0], offset + eb->start);
  152. }
  153. static inline unsigned long get_eb_folio_index(const struct extent_buffer *eb,
  154. unsigned long offset)
  155. {
  156. /*
  157. * 1) sectorsize == PAGE_SIZE and nodesize >= PAGE_SIZE case
  158. * 1.1) One large folio covering the whole eb.
  159. * the folio_shift would be large enough to always make us
  160. * return 0 as index.
  161. * 1.2) Several page sized folios
  162. * The folio_shift would be PAGE_SHIFT, giving us the correct
  163. * index.
  164. *
  165. * 2) sectorsize < PAGE_SIZE and nodesize < PAGE_SIZE case
  166. * The folio would only be page sized, and always give us 0 as index.
  167. */
  168. return offset >> eb->folio_shift;
  169. }
  170. /*
  171. * Structure to record how many bytes and which ranges are set/cleared
  172. */
  173. struct extent_changeset {
  174. /* How many bytes are set/cleared in this operation */
  175. u64 bytes_changed;
  176. /* Changed ranges */
  177. struct ulist range_changed;
  178. };
  179. static inline void extent_changeset_init(struct extent_changeset *changeset)
  180. {
  181. changeset->bytes_changed = 0;
  182. ulist_init(&changeset->range_changed);
  183. }
  184. static inline struct extent_changeset *extent_changeset_alloc(void)
  185. {
  186. struct extent_changeset *ret;
  187. ret = kmalloc(sizeof(*ret), GFP_KERNEL);
  188. if (!ret)
  189. return NULL;
  190. extent_changeset_init(ret);
  191. return ret;
  192. }
  193. static inline void extent_changeset_prealloc(struct extent_changeset *changeset, gfp_t gfp_mask)
  194. {
  195. ulist_prealloc(&changeset->range_changed, gfp_mask);
  196. }
  197. static inline void extent_changeset_release(struct extent_changeset *changeset)
  198. {
  199. if (!changeset)
  200. return;
  201. changeset->bytes_changed = 0;
  202. ulist_release(&changeset->range_changed);
  203. }
  204. static inline void extent_changeset_free(struct extent_changeset *changeset)
  205. {
  206. if (!changeset)
  207. return;
  208. extent_changeset_release(changeset);
  209. kfree(changeset);
  210. }
  211. bool try_release_extent_mapping(struct folio *folio, gfp_t mask);
  212. int try_release_extent_buffer(struct folio *folio);
  213. int btrfs_read_folio(struct file *file, struct folio *folio);
  214. void extent_write_locked_range(struct inode *inode, const struct folio *locked_folio,
  215. u64 start, u64 end, struct writeback_control *wbc,
  216. bool pages_dirty);
  217. int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc);
  218. int btree_write_cache_pages(struct address_space *mapping,
  219. struct writeback_control *wbc);
  220. void btrfs_readahead(struct readahead_control *rac);
  221. int set_folio_extent_mapped(struct folio *folio);
  222. int set_page_extent_mapped(struct page *page);
  223. void clear_folio_extent_mapped(struct folio *folio);
  224. struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
  225. u64 start, u64 owner_root, int level);
  226. struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
  227. u64 start, unsigned long len);
  228. struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
  229. u64 start);
  230. struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src);
  231. struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
  232. u64 start);
  233. void free_extent_buffer(struct extent_buffer *eb);
  234. void free_extent_buffer_stale(struct extent_buffer *eb);
  235. #define WAIT_NONE 0
  236. #define WAIT_COMPLETE 1
  237. #define WAIT_PAGE_LOCK 2
  238. int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
  239. const struct btrfs_tree_parent_check *parent_check);
  240. void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
  241. void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
  242. u64 bytenr, u64 owner_root, u64 gen, int level);
  243. void btrfs_readahead_node_child(struct extent_buffer *node, int slot);
  244. static inline int num_extent_pages(const struct extent_buffer *eb)
  245. {
  246. /*
  247. * For sectorsize == PAGE_SIZE case, since nodesize is always aligned to
  248. * sectorsize, it's just eb->len >> PAGE_SHIFT.
  249. *
  250. * For sectorsize < PAGE_SIZE case, we could have nodesize < PAGE_SIZE,
  251. * thus have to ensure we get at least one page.
  252. */
  253. return (eb->len >> PAGE_SHIFT) ?: 1;
  254. }
  255. /*
  256. * This can only be determined at runtime by checking eb::folios[0].
  257. *
  258. * As we can have either one large folio covering the whole eb
  259. * (either nodesize <= PAGE_SIZE, or high order folio), or multiple
  260. * single-paged folios.
  261. */
  262. static inline int num_extent_folios(const struct extent_buffer *eb)
  263. {
  264. if (!eb->folios[0])
  265. return 0;
  266. if (folio_order(eb->folios[0]))
  267. return 1;
  268. return num_extent_pages(eb);
  269. }
  270. static inline int extent_buffer_uptodate(const struct extent_buffer *eb)
  271. {
  272. return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
  273. }
  274. int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
  275. unsigned long start, unsigned long len);
  276. void read_extent_buffer(const struct extent_buffer *eb, void *dst,
  277. unsigned long start,
  278. unsigned long len);
  279. int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
  280. void __user *dst, unsigned long start,
  281. unsigned long len);
  282. void write_extent_buffer(const struct extent_buffer *eb, const void *src,
  283. unsigned long start, unsigned long len);
  284. static inline void write_extent_buffer_chunk_tree_uuid(
  285. const struct extent_buffer *eb, const void *chunk_tree_uuid)
  286. {
  287. write_extent_buffer(eb, chunk_tree_uuid,
  288. offsetof(struct btrfs_header, chunk_tree_uuid),
  289. BTRFS_FSID_SIZE);
  290. }
  291. static inline void write_extent_buffer_fsid(const struct extent_buffer *eb,
  292. const void *fsid)
  293. {
  294. write_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid),
  295. BTRFS_FSID_SIZE);
  296. }
  297. void copy_extent_buffer_full(const struct extent_buffer *dst,
  298. const struct extent_buffer *src);
  299. void copy_extent_buffer(const struct extent_buffer *dst,
  300. const struct extent_buffer *src,
  301. unsigned long dst_offset, unsigned long src_offset,
  302. unsigned long len);
  303. void memcpy_extent_buffer(const struct extent_buffer *dst,
  304. unsigned long dst_offset, unsigned long src_offset,
  305. unsigned long len);
  306. void memmove_extent_buffer(const struct extent_buffer *dst,
  307. unsigned long dst_offset, unsigned long src_offset,
  308. unsigned long len);
  309. void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
  310. unsigned long len);
  311. int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
  312. unsigned long pos);
  313. void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
  314. unsigned long pos, unsigned long len);
  315. void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
  316. unsigned long start, unsigned long pos,
  317. unsigned long len);
  318. void set_extent_buffer_dirty(struct extent_buffer *eb);
  319. void set_extent_buffer_uptodate(struct extent_buffer *eb);
  320. void clear_extent_buffer_uptodate(struct extent_buffer *eb);
  321. void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
  322. const struct folio *locked_folio,
  323. struct extent_state **cached,
  324. u32 bits_to_clear, unsigned long page_ops);
  325. int extent_invalidate_folio(struct extent_io_tree *tree,
  326. struct folio *folio, size_t offset);
  327. void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
  328. struct extent_buffer *buf);
  329. int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
  330. bool nofail);
  331. int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array);
  332. #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
  333. bool find_lock_delalloc_range(struct inode *inode,
  334. struct folio *locked_folio, u64 *start,
  335. u64 *end);
  336. #endif
  337. struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
  338. u64 start);
  339. #ifdef CONFIG_BTRFS_DEBUG
  340. void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info);
  341. #else
  342. #define btrfs_extent_buffer_leak_debug_check(fs_info) do {} while (0)
  343. #endif
  344. #endif