volumes.h 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Copyright (C) 2007 Oracle. All rights reserved.
  4. */
  5. #ifndef BTRFS_VOLUMES_H
  6. #define BTRFS_VOLUMES_H
  7. #include <linux/blk_types.h>
  8. #include <linux/sizes.h>
  9. #include <linux/atomic.h>
  10. #include <linux/sort.h>
  11. #include <linux/list.h>
  12. #include <linux/mutex.h>
  13. #include <linux/log2.h>
  14. #include <linux/kobject.h>
  15. #include <linux/refcount.h>
  16. #include <linux/completion.h>
  17. #include <linux/rbtree.h>
  18. #include <uapi/linux/btrfs.h>
  19. #include "messages.h"
  20. #include "rcu-string.h"
  21. struct block_device;
  22. struct bdev_handle;
  23. struct btrfs_fs_info;
  24. struct btrfs_block_group;
  25. struct btrfs_trans_handle;
  26. struct btrfs_zoned_device_info;
  27. #define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
  28. /*
  29. * Arbitratry maximum size of one discard request to limit potentially long time
  30. * spent in blkdev_issue_discard().
  31. */
  32. #define BTRFS_MAX_DISCARD_CHUNK_SIZE (SZ_1G)
  33. extern struct mutex uuid_mutex;
  34. #define BTRFS_STRIPE_LEN SZ_64K
  35. #define BTRFS_STRIPE_LEN_SHIFT (16)
  36. #define BTRFS_STRIPE_LEN_MASK (BTRFS_STRIPE_LEN - 1)
  37. static_assert(const_ilog2(BTRFS_STRIPE_LEN) == BTRFS_STRIPE_LEN_SHIFT);
  38. /* Used by sanity check for btrfs_raid_types. */
  39. #define const_ffs(n) (__builtin_ctzll(n) + 1)
  40. /*
  41. * The conversion from BTRFS_BLOCK_GROUP_* bits to btrfs_raid_type requires
  42. * RAID0 always to be the lowest profile bit.
  43. * Although it's part of on-disk format and should never change, do extra
  44. * compile-time sanity checks.
  45. */
  46. static_assert(const_ffs(BTRFS_BLOCK_GROUP_RAID0) <
  47. const_ffs(BTRFS_BLOCK_GROUP_PROFILE_MASK & ~BTRFS_BLOCK_GROUP_RAID0));
  48. static_assert(const_ilog2(BTRFS_BLOCK_GROUP_RAID0) >
  49. ilog2(BTRFS_BLOCK_GROUP_TYPE_MASK));
  50. /* ilog2() can handle both constants and variables */
  51. #define BTRFS_BG_FLAG_TO_INDEX(profile) \
  52. ilog2((profile) >> (ilog2(BTRFS_BLOCK_GROUP_RAID0) - 1))
  53. enum btrfs_raid_types {
  54. /* SINGLE is the special one as it doesn't have on-disk bit. */
  55. BTRFS_RAID_SINGLE = 0,
  56. BTRFS_RAID_RAID0 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID0),
  57. BTRFS_RAID_RAID1 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID1),
  58. BTRFS_RAID_DUP = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_DUP),
  59. BTRFS_RAID_RAID10 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID10),
  60. BTRFS_RAID_RAID5 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID5),
  61. BTRFS_RAID_RAID6 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID6),
  62. BTRFS_RAID_RAID1C3 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID1C3),
  63. BTRFS_RAID_RAID1C4 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID1C4),
  64. BTRFS_NR_RAID_TYPES
  65. };
  66. /*
  67. * Use sequence counter to get consistent device stat data on
  68. * 32-bit processors.
  69. */
  70. #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
  71. #include <linux/seqlock.h>
  72. #define __BTRFS_NEED_DEVICE_DATA_ORDERED
  73. #define btrfs_device_data_ordered_init(device) \
  74. seqcount_init(&device->data_seqcount)
  75. #else
  76. #define btrfs_device_data_ordered_init(device) do { } while (0)
  77. #endif
  78. #define BTRFS_DEV_STATE_WRITEABLE (0)
  79. #define BTRFS_DEV_STATE_IN_FS_METADATA (1)
  80. #define BTRFS_DEV_STATE_MISSING (2)
  81. #define BTRFS_DEV_STATE_REPLACE_TGT (3)
  82. #define BTRFS_DEV_STATE_FLUSH_SENT (4)
  83. #define BTRFS_DEV_STATE_NO_READA (5)
  84. /* Special value encoding failure to write primary super block. */
  85. #define BTRFS_SUPER_PRIMARY_WRITE_ERROR (INT_MAX / 2)
  86. struct btrfs_fs_devices;
  87. struct btrfs_device {
  88. struct list_head dev_list; /* device_list_mutex */
  89. struct list_head dev_alloc_list; /* chunk mutex */
  90. struct list_head post_commit_list; /* chunk mutex */
  91. struct btrfs_fs_devices *fs_devices;
  92. struct btrfs_fs_info *fs_info;
  93. struct rcu_string __rcu *name;
  94. u64 generation;
  95. struct file *bdev_file;
  96. struct block_device *bdev;
  97. struct btrfs_zoned_device_info *zone_info;
  98. /*
  99. * Device's major-minor number. Must be set even if the device is not
  100. * opened (bdev == NULL), unless the device is missing.
  101. */
  102. dev_t devt;
  103. unsigned long dev_state;
  104. blk_status_t last_flush_error;
  105. #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
  106. seqcount_t data_seqcount;
  107. #endif
  108. /* the internal btrfs device id */
  109. u64 devid;
  110. /* size of the device in memory */
  111. u64 total_bytes;
  112. /* size of the device on disk */
  113. u64 disk_total_bytes;
  114. /* bytes used */
  115. u64 bytes_used;
  116. /* optimal io alignment for this device */
  117. u32 io_align;
  118. /* optimal io width for this device */
  119. u32 io_width;
  120. /* type and info about this device */
  121. u64 type;
  122. /*
  123. * Counter of super block write errors, values larger than
  124. * BTRFS_SUPER_PRIMARY_WRITE_ERROR encode primary super block write failure.
  125. */
  126. atomic_t sb_write_errors;
  127. /* minimal io size for this device */
  128. u32 sector_size;
  129. /* physical drive uuid (or lvm uuid) */
  130. u8 uuid[BTRFS_UUID_SIZE];
  131. /*
  132. * size of the device on the current transaction
  133. *
  134. * This variant is update when committing the transaction,
  135. * and protected by chunk mutex
  136. */
  137. u64 commit_total_bytes;
  138. /* bytes used on the current transaction */
  139. u64 commit_bytes_used;
  140. /* Bio used for flushing device barriers */
  141. struct bio flush_bio;
  142. struct completion flush_wait;
  143. /* per-device scrub information */
  144. struct scrub_ctx *scrub_ctx;
  145. /* disk I/O failure stats. For detailed description refer to
  146. * enum btrfs_dev_stat_values in ioctl.h */
  147. int dev_stats_valid;
  148. /* Counter to record the change of device stats */
  149. atomic_t dev_stats_ccnt;
  150. atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
  151. struct extent_io_tree alloc_state;
  152. struct completion kobj_unregister;
  153. /* For sysfs/FSID/devinfo/devid/ */
  154. struct kobject devid_kobj;
  155. /* Bandwidth limit for scrub, in bytes */
  156. u64 scrub_speed_max;
  157. };
  158. /*
  159. * Block group or device which contains an active swapfile. Used for preventing
  160. * unsafe operations while a swapfile is active.
  161. *
  162. * These are sorted on (ptr, inode) (note that a block group or device can
  163. * contain more than one swapfile). We compare the pointer values because we
  164. * don't actually care what the object is, we just need a quick check whether
  165. * the object exists in the rbtree.
  166. */
  167. struct btrfs_swapfile_pin {
  168. struct rb_node node;
  169. void *ptr;
  170. struct inode *inode;
  171. /*
  172. * If true, ptr points to a struct btrfs_block_group. Otherwise, ptr
  173. * points to a struct btrfs_device.
  174. */
  175. bool is_block_group;
  176. /*
  177. * Only used when 'is_block_group' is true and it is the number of
  178. * extents used by a swapfile for this block group ('ptr' field).
  179. */
  180. int bg_extent_count;
  181. };
  182. /*
  183. * If we read those variants at the context of their own lock, we needn't
  184. * use the following helpers, reading them directly is safe.
  185. */
  186. #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
  187. #define BTRFS_DEVICE_GETSET_FUNCS(name) \
  188. static inline u64 \
  189. btrfs_device_get_##name(const struct btrfs_device *dev) \
  190. { \
  191. u64 size; \
  192. unsigned int seq; \
  193. \
  194. do { \
  195. seq = read_seqcount_begin(&dev->data_seqcount); \
  196. size = dev->name; \
  197. } while (read_seqcount_retry(&dev->data_seqcount, seq)); \
  198. return size; \
  199. } \
  200. \
  201. static inline void \
  202. btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \
  203. { \
  204. preempt_disable(); \
  205. write_seqcount_begin(&dev->data_seqcount); \
  206. dev->name = size; \
  207. write_seqcount_end(&dev->data_seqcount); \
  208. preempt_enable(); \
  209. }
  210. #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
  211. #define BTRFS_DEVICE_GETSET_FUNCS(name) \
  212. static inline u64 \
  213. btrfs_device_get_##name(const struct btrfs_device *dev) \
  214. { \
  215. u64 size; \
  216. \
  217. preempt_disable(); \
  218. size = dev->name; \
  219. preempt_enable(); \
  220. return size; \
  221. } \
  222. \
  223. static inline void \
  224. btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \
  225. { \
  226. preempt_disable(); \
  227. dev->name = size; \
  228. preempt_enable(); \
  229. }
  230. #else
  231. #define BTRFS_DEVICE_GETSET_FUNCS(name) \
  232. static inline u64 \
  233. btrfs_device_get_##name(const struct btrfs_device *dev) \
  234. { \
  235. return dev->name; \
  236. } \
  237. \
  238. static inline void \
  239. btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \
  240. { \
  241. dev->name = size; \
  242. }
  243. #endif
  244. BTRFS_DEVICE_GETSET_FUNCS(total_bytes);
  245. BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes);
  246. BTRFS_DEVICE_GETSET_FUNCS(bytes_used);
  247. enum btrfs_chunk_allocation_policy {
  248. BTRFS_CHUNK_ALLOC_REGULAR,
  249. BTRFS_CHUNK_ALLOC_ZONED,
  250. };
  251. /*
  252. * Read policies for mirrored block group profiles, read picks the stripe based
  253. * on these policies.
  254. */
  255. enum btrfs_read_policy {
  256. /* Use process PID to choose the stripe */
  257. BTRFS_READ_POLICY_PID,
  258. BTRFS_NR_READ_POLICY,
  259. };
  260. #ifdef CONFIG_BTRFS_DEBUG
  261. /*
  262. * Checksum mode - offload it to workqueues or do it synchronously in
  263. * btrfs_submit_chunk().
  264. */
  265. enum btrfs_offload_csum_mode {
  266. /*
  267. * Choose offloading checksum or do it synchronously automatically.
  268. * Do it synchronously if the checksum is fast, or offload to workqueues
  269. * otherwise.
  270. */
  271. BTRFS_OFFLOAD_CSUM_AUTO,
  272. /* Always offload checksum to workqueues. */
  273. BTRFS_OFFLOAD_CSUM_FORCE_ON,
  274. /* Never offload checksum to workqueues. */
  275. BTRFS_OFFLOAD_CSUM_FORCE_OFF,
  276. };
  277. #endif
  278. struct btrfs_fs_devices {
  279. u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
  280. /*
  281. * UUID written into the btree blocks:
  282. *
  283. * - If metadata_uuid != fsid then super block must have
  284. * BTRFS_FEATURE_INCOMPAT_METADATA_UUID flag set.
  285. *
  286. * - Following shall be true at all times:
  287. * - metadata_uuid == btrfs_header::fsid
  288. * - metadata_uuid == btrfs_dev_item::fsid
  289. *
  290. * - Relations between fsid and metadata_uuid in sb and fs_devices:
  291. * - Normal:
  292. * fs_devices->fsid == fs_devices->metadata_uuid == sb->fsid
  293. * sb->metadata_uuid == 0
  294. *
  295. * - When the BTRFS_FEATURE_INCOMPAT_METADATA_UUID flag is set:
  296. * fs_devices->fsid == sb->fsid
  297. * fs_devices->metadata_uuid == sb->metadata_uuid
  298. *
  299. * - When in-memory fs_devices->temp_fsid is true
  300. * fs_devices->fsid = random
  301. * fs_devices->metadata_uuid == sb->fsid
  302. */
  303. u8 metadata_uuid[BTRFS_FSID_SIZE];
  304. struct list_head fs_list;
  305. /*
  306. * Number of devices under this fsid including missing and
  307. * replace-target device and excludes seed devices.
  308. */
  309. u64 num_devices;
  310. /*
  311. * The number of devices that successfully opened, including
  312. * replace-target, excludes seed devices.
  313. */
  314. u64 open_devices;
  315. /* The number of devices that are under the chunk allocation list. */
  316. u64 rw_devices;
  317. /* Count of missing devices under this fsid excluding seed device. */
  318. u64 missing_devices;
  319. u64 total_rw_bytes;
  320. /*
  321. * Count of devices from btrfs_super_block::num_devices for this fsid,
  322. * which includes the seed device, excludes the transient replace-target
  323. * device.
  324. */
  325. u64 total_devices;
  326. /* Highest generation number of seen devices */
  327. u64 latest_generation;
  328. /*
  329. * The mount device or a device with highest generation after removal
  330. * or replace.
  331. */
  332. struct btrfs_device *latest_dev;
  333. /*
  334. * All of the devices in the filesystem, protected by a mutex so we can
  335. * safely walk it to write out the super blocks without worrying about
  336. * adding/removing by the multi-device code. Scrubbing super block can
  337. * kick off supers writing by holding this mutex lock.
  338. */
  339. struct mutex device_list_mutex;
  340. /* List of all devices, protected by device_list_mutex */
  341. struct list_head devices;
  342. /* Devices which can satisfy space allocation. Protected by * chunk_mutex. */
  343. struct list_head alloc_list;
  344. struct list_head seed_list;
  345. /* Count fs-devices opened. */
  346. int opened;
  347. /* Set when we find or add a device that doesn't have the nonrot flag set. */
  348. bool rotating;
  349. /* Devices support TRIM/discard commands. */
  350. bool discardable;
  351. /* The filesystem is a seed filesystem. */
  352. bool seeding;
  353. /* The mount needs to use a randomly generated fsid. */
  354. bool temp_fsid;
  355. struct btrfs_fs_info *fs_info;
  356. /* sysfs kobjects */
  357. struct kobject fsid_kobj;
  358. struct kobject *devices_kobj;
  359. struct kobject *devinfo_kobj;
  360. struct completion kobj_unregister;
  361. enum btrfs_chunk_allocation_policy chunk_alloc_policy;
  362. /* Policy used to read the mirrored stripes. */
  363. enum btrfs_read_policy read_policy;
  364. #ifdef CONFIG_BTRFS_DEBUG
  365. /* Checksum mode - offload it or do it synchronously. */
  366. enum btrfs_offload_csum_mode offload_csum_mode;
  367. #endif
  368. };
  369. #define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \
  370. - sizeof(struct btrfs_chunk)) \
  371. / sizeof(struct btrfs_stripe) + 1)
  372. #define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \
  373. - 2 * sizeof(struct btrfs_disk_key) \
  374. - 2 * sizeof(struct btrfs_chunk)) \
  375. / sizeof(struct btrfs_stripe) + 1)
  376. struct btrfs_io_stripe {
  377. struct btrfs_device *dev;
  378. /* Block mapping. */
  379. u64 physical;
  380. u64 length;
  381. bool rst_search_commit_root;
  382. /* For the endio handler. */
  383. struct btrfs_io_context *bioc;
  384. };
  385. struct btrfs_discard_stripe {
  386. struct btrfs_device *dev;
  387. u64 physical;
  388. u64 length;
  389. };
  390. /*
  391. * Context for IO subsmission for device stripe.
  392. *
  393. * - Track the unfinished mirrors for mirror based profiles
  394. * Mirror based profiles are SINGLE/DUP/RAID1/RAID10.
  395. *
  396. * - Contain the logical -> physical mapping info
  397. * Used by submit_stripe_bio() for mapping logical bio
  398. * into physical device address.
  399. *
  400. * - Contain device replace info
  401. * Used by handle_ops_on_dev_replace() to copy logical bios
  402. * into the new device.
  403. *
  404. * - Contain RAID56 full stripe logical bytenrs
  405. */
  406. struct btrfs_io_context {
  407. refcount_t refs;
  408. struct btrfs_fs_info *fs_info;
  409. /* Taken from struct btrfs_chunk_map::type. */
  410. u64 map_type;
  411. struct bio *orig_bio;
  412. atomic_t error;
  413. u16 max_errors;
  414. u64 logical;
  415. u64 size;
  416. /* Raid stripe tree ordered entry. */
  417. struct list_head rst_ordered_entry;
  418. /*
  419. * The total number of stripes, including the extra duplicated
  420. * stripe for replace.
  421. */
  422. u16 num_stripes;
  423. /*
  424. * The mirror_num of this bioc.
  425. *
  426. * This is for reads which use 0 as mirror_num, thus we should return a
  427. * valid mirror_num (>0) for the reader.
  428. */
  429. u16 mirror_num;
  430. /*
  431. * The following two members are for dev-replace case only.
  432. *
  433. * @replace_nr_stripes: Number of duplicated stripes which need to be
  434. * written to replace target.
  435. * Should be <= 2 (2 for DUP, otherwise <= 1).
  436. * @replace_stripe_src: The array indicates where the duplicated stripes
  437. * are from.
  438. *
  439. * The @replace_stripe_src[] array is mostly for RAID56 cases.
  440. * As non-RAID56 stripes share the same contents of the mapped range,
  441. * thus no need to bother where the duplicated ones are from.
  442. *
  443. * But for RAID56 case, all stripes contain different contents, thus
  444. * we need a way to know the mapping.
  445. *
  446. * There is an example for the two members, using a RAID5 write:
  447. *
  448. * num_stripes: 4 (3 + 1 duplicated write)
  449. * stripes[0]: dev = devid 1, physical = X
  450. * stripes[1]: dev = devid 2, physical = Y
  451. * stripes[2]: dev = devid 3, physical = Z
  452. * stripes[3]: dev = devid 0, physical = Y
  453. *
  454. * replace_nr_stripes = 1
  455. * replace_stripe_src = 1 <- Means stripes[1] is involved in replace.
  456. * The duplicated stripe index would be
  457. * (@num_stripes - 1).
  458. *
  459. * Note, that we can still have cases replace_nr_stripes = 2 for DUP.
  460. * In that case, all stripes share the same content, thus we don't
  461. * need to bother @replace_stripe_src value at all.
  462. */
  463. u16 replace_nr_stripes;
  464. s16 replace_stripe_src;
  465. /*
  466. * Logical bytenr of the full stripe start, only for RAID56 cases.
  467. *
  468. * When this value is set to other than (u64)-1, the stripes[] should
  469. * follow this pattern:
  470. *
  471. * (real_stripes = num_stripes - replace_nr_stripes)
  472. * (data_stripes = (is_raid6) ? (real_stripes - 2) : (real_stripes - 1))
  473. *
  474. * stripes[0]: The first data stripe
  475. * stripes[1]: The second data stripe
  476. * ...
  477. * stripes[data_stripes - 1]: The last data stripe
  478. * stripes[data_stripes]: The P stripe
  479. * stripes[data_stripes + 1]: The Q stripe (only for RAID6).
  480. */
  481. u64 full_stripe_logical;
  482. struct btrfs_io_stripe stripes[];
  483. };
  484. struct btrfs_device_info {
  485. struct btrfs_device *dev;
  486. u64 dev_offset;
  487. u64 max_avail;
  488. u64 total_avail;
  489. };
  490. struct btrfs_raid_attr {
  491. u8 sub_stripes; /* sub_stripes info for map */
  492. u8 dev_stripes; /* stripes per dev */
  493. u8 devs_max; /* max devs to use */
  494. u8 devs_min; /* min devs needed */
  495. u8 tolerated_failures; /* max tolerated fail devs */
  496. u8 devs_increment; /* ndevs has to be a multiple of this */
  497. u8 ncopies; /* how many copies to data has */
  498. u8 nparity; /* number of stripes worth of bytes to store
  499. * parity information */
  500. u8 mindev_error; /* error code if min devs requisite is unmet */
  501. const char raid_name[8]; /* name of the raid */
  502. u64 bg_flag; /* block group flag of the raid */
  503. };
  504. extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES];
  505. struct btrfs_chunk_map {
  506. struct rb_node rb_node;
  507. /* For mount time dev extent verification. */
  508. int verified_stripes;
  509. refcount_t refs;
  510. u64 start;
  511. u64 chunk_len;
  512. u64 stripe_size;
  513. u64 type;
  514. int io_align;
  515. int io_width;
  516. int num_stripes;
  517. int sub_stripes;
  518. struct btrfs_io_stripe stripes[];
  519. };
  520. #define btrfs_chunk_map_size(n) (sizeof(struct btrfs_chunk_map) + \
  521. (sizeof(struct btrfs_io_stripe) * (n)))
  522. static inline void btrfs_free_chunk_map(struct btrfs_chunk_map *map)
  523. {
  524. if (map && refcount_dec_and_test(&map->refs)) {
  525. ASSERT(RB_EMPTY_NODE(&map->rb_node));
  526. kfree(map);
  527. }
  528. }
  529. struct btrfs_balance_control {
  530. struct btrfs_balance_args data;
  531. struct btrfs_balance_args meta;
  532. struct btrfs_balance_args sys;
  533. u64 flags;
  534. struct btrfs_balance_progress stat;
  535. };
  536. /*
  537. * Search for a given device by the set parameters
  538. */
  539. struct btrfs_dev_lookup_args {
  540. u64 devid;
  541. u8 *uuid;
  542. u8 *fsid;
  543. bool missing;
  544. };
  545. /* We have to initialize to -1 because BTRFS_DEV_REPLACE_DEVID is 0 */
  546. #define BTRFS_DEV_LOOKUP_ARGS_INIT { .devid = (u64)-1 }
  547. #define BTRFS_DEV_LOOKUP_ARGS(name) \
  548. struct btrfs_dev_lookup_args name = BTRFS_DEV_LOOKUP_ARGS_INIT
  549. enum btrfs_map_op {
  550. BTRFS_MAP_READ,
  551. BTRFS_MAP_WRITE,
  552. BTRFS_MAP_GET_READ_MIRRORS,
  553. };
  554. static inline enum btrfs_map_op btrfs_op(struct bio *bio)
  555. {
  556. switch (bio_op(bio)) {
  557. case REQ_OP_WRITE:
  558. case REQ_OP_ZONE_APPEND:
  559. return BTRFS_MAP_WRITE;
  560. default:
  561. WARN_ON_ONCE(1);
  562. fallthrough;
  563. case REQ_OP_READ:
  564. return BTRFS_MAP_READ;
  565. }
  566. }
  567. static inline unsigned long btrfs_chunk_item_size(int num_stripes)
  568. {
  569. ASSERT(num_stripes);
  570. return sizeof(struct btrfs_chunk) +
  571. sizeof(struct btrfs_stripe) * (num_stripes - 1);
  572. }
  573. /*
  574. * Do the type safe conversion from stripe_nr to offset inside the chunk.
  575. *
  576. * @stripe_nr is u32, with left shift it can overflow u32 for chunks larger
  577. * than 4G. This does the proper type cast to avoid overflow.
  578. */
  579. static inline u64 btrfs_stripe_nr_to_offset(u32 stripe_nr)
  580. {
  581. return (u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT;
  582. }
  583. void btrfs_get_bioc(struct btrfs_io_context *bioc);
  584. void btrfs_put_bioc(struct btrfs_io_context *bioc);
  585. int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
  586. u64 logical, u64 *length,
  587. struct btrfs_io_context **bioc_ret,
  588. struct btrfs_io_stripe *smap, int *mirror_num_ret);
  589. int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
  590. struct btrfs_io_stripe *smap, u64 logical,
  591. u32 length, int mirror_num);
  592. struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
  593. u64 logical, u64 *length_ret,
  594. u32 *num_stripes);
  595. int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
  596. int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
  597. struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
  598. u64 type);
  599. void btrfs_mapping_tree_free(struct btrfs_fs_info *fs_info);
  600. int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
  601. blk_mode_t flags, void *holder);
  602. struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
  603. bool mount_arg_dev);
  604. int btrfs_forget_devices(dev_t devt);
  605. void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
  606. void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);
  607. void btrfs_assign_next_active_device(struct btrfs_device *device,
  608. struct btrfs_device *this_dev);
  609. struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info,
  610. u64 devid,
  611. const char *devpath);
  612. int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
  613. struct btrfs_dev_lookup_args *args,
  614. const char *path);
  615. struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
  616. const u64 *devid, const u8 *uuid,
  617. const char *path);
  618. void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args);
  619. int btrfs_rm_device(struct btrfs_fs_info *fs_info,
  620. struct btrfs_dev_lookup_args *args,
  621. struct file **bdev_file);
  622. void __exit btrfs_cleanup_fs_uuids(void);
  623. int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
  624. int btrfs_grow_device(struct btrfs_trans_handle *trans,
  625. struct btrfs_device *device, u64 new_size);
  626. struct btrfs_device *btrfs_find_device(const struct btrfs_fs_devices *fs_devices,
  627. const struct btrfs_dev_lookup_args *args);
  628. int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
  629. int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
  630. int btrfs_balance(struct btrfs_fs_info *fs_info,
  631. struct btrfs_balance_control *bctl,
  632. struct btrfs_ioctl_balance_args *bargs);
  633. void btrfs_describe_block_groups(u64 flags, char *buf, u32 size_buf);
  634. int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
  635. int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
  636. int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
  637. int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset);
  638. int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
  639. bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset);
  640. void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
  641. int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
  642. struct btrfs_ioctl_get_dev_stats *stats);
  643. int btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
  644. int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
  645. int btrfs_run_dev_stats(struct btrfs_trans_handle *trans);
  646. void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
  647. void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev);
  648. void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev);
  649. int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
  650. u64 logical, u64 len);
  651. unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
  652. u64 logical);
  653. u64 btrfs_calc_stripe_length(const struct btrfs_chunk_map *map);
  654. int btrfs_nr_parity_stripes(u64 type);
  655. int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans,
  656. struct btrfs_block_group *bg);
  657. int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset);
  658. #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
  659. struct btrfs_chunk_map *btrfs_alloc_chunk_map(int num_stripes, gfp_t gfp);
  660. int btrfs_add_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map);
  661. #endif
  662. struct btrfs_chunk_map *btrfs_find_chunk_map(struct btrfs_fs_info *fs_info,
  663. u64 logical, u64 length);
  664. struct btrfs_chunk_map *btrfs_find_chunk_map_nolock(struct btrfs_fs_info *fs_info,
  665. u64 logical, u64 length);
  666. struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
  667. u64 logical, u64 length);
  668. void btrfs_remove_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map);
  669. void btrfs_release_disk_super(struct btrfs_super_block *super);
  670. static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
  671. int index)
  672. {
  673. atomic_inc(dev->dev_stat_values + index);
  674. /*
  675. * This memory barrier orders stores updating statistics before stores
  676. * updating dev_stats_ccnt.
  677. *
  678. * It pairs with smp_rmb() in btrfs_run_dev_stats().
  679. */
  680. smp_mb__before_atomic();
  681. atomic_inc(&dev->dev_stats_ccnt);
  682. }
  683. static inline int btrfs_dev_stat_read(struct btrfs_device *dev,
  684. int index)
  685. {
  686. return atomic_read(dev->dev_stat_values + index);
  687. }
  688. static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev,
  689. int index)
  690. {
  691. int ret;
  692. ret = atomic_xchg(dev->dev_stat_values + index, 0);
  693. /*
  694. * atomic_xchg implies a full memory barriers as per atomic_t.txt:
  695. * - RMW operations that have a return value are fully ordered;
  696. *
  697. * This implicit memory barriers is paired with the smp_rmb in
  698. * btrfs_run_dev_stats
  699. */
  700. atomic_inc(&dev->dev_stats_ccnt);
  701. return ret;
  702. }
  703. static inline void btrfs_dev_stat_set(struct btrfs_device *dev,
  704. int index, unsigned long val)
  705. {
  706. atomic_set(dev->dev_stat_values + index, val);
  707. /*
  708. * This memory barrier orders stores updating statistics before stores
  709. * updating dev_stats_ccnt.
  710. *
  711. * It pairs with smp_rmb() in btrfs_run_dev_stats().
  712. */
  713. smp_mb__before_atomic();
  714. atomic_inc(&dev->dev_stats_ccnt);
  715. }
  716. static inline const char *btrfs_dev_name(const struct btrfs_device *device)
  717. {
  718. if (!device || test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
  719. return "<missing disk>";
  720. else
  721. return rcu_str_deref(device->name);
  722. }
  723. void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
  724. struct list_head * __attribute_const__ btrfs_get_fs_uuids(void);
  725. bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
  726. struct btrfs_device *failing_dev);
  727. void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct btrfs_device *device);
  728. enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags);
  729. int btrfs_bg_type_to_factor(u64 flags);
  730. const char *btrfs_bg_type_to_raid_name(u64 flags);
  731. int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
  732. bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
  733. bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
  734. const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb);
  735. #endif