disk-io.c 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100
  1. // SPDX-License-Identifier: GPL-2.0+
  2. #include <common.h>
  3. #include <fs_internal.h>
  4. #include <log.h>
  5. #include <uuid.h>
  6. #include <memalign.h>
  7. #include "kernel-shared/btrfs_tree.h"
  8. #include "common/rbtree-utils.h"
  9. #include "disk-io.h"
  10. #include "ctree.h"
  11. #include "btrfs.h"
  12. #include "volumes.h"
  13. #include "extent-io.h"
  14. #include "crypto/hash.h"
  15. /* specified errno for check_tree_block */
  16. #define BTRFS_BAD_BYTENR (-1)
  17. #define BTRFS_BAD_FSID (-2)
  18. #define BTRFS_BAD_LEVEL (-3)
  19. #define BTRFS_BAD_NRITEMS (-4)
  20. /* Calculate max possible nritems for a leaf/node */
  21. static u32 max_nritems(u8 level, u32 nodesize)
  22. {
  23. if (level == 0)
  24. return ((nodesize - sizeof(struct btrfs_header)) /
  25. sizeof(struct btrfs_item));
  26. return ((nodesize - sizeof(struct btrfs_header)) /
  27. sizeof(struct btrfs_key_ptr));
  28. }
  29. static int check_tree_block(struct btrfs_fs_info *fs_info,
  30. struct extent_buffer *buf)
  31. {
  32. struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
  33. u32 nodesize = fs_info->nodesize;
  34. bool fsid_match = false;
  35. int ret = BTRFS_BAD_FSID;
  36. if (buf->start != btrfs_header_bytenr(buf))
  37. return BTRFS_BAD_BYTENR;
  38. if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL)
  39. return BTRFS_BAD_LEVEL;
  40. if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf),
  41. nodesize))
  42. return BTRFS_BAD_NRITEMS;
  43. /* Only leaf can be empty */
  44. if (btrfs_header_nritems(buf) == 0 &&
  45. btrfs_header_level(buf) != 0)
  46. return BTRFS_BAD_NRITEMS;
  47. while (fs_devices) {
  48. /*
  49. * Checking the incompat flag is only valid for the current
  50. * fs. For seed devices it's forbidden to have their uuid
  51. * changed so reading ->fsid in this case is fine
  52. */
  53. if (fs_devices == fs_info->fs_devices &&
  54. btrfs_fs_incompat(fs_info, METADATA_UUID))
  55. fsid_match = !memcmp_extent_buffer(buf,
  56. fs_devices->metadata_uuid,
  57. btrfs_header_fsid(),
  58. BTRFS_FSID_SIZE);
  59. else
  60. fsid_match = !memcmp_extent_buffer(buf,
  61. fs_devices->fsid,
  62. btrfs_header_fsid(),
  63. BTRFS_FSID_SIZE);
  64. if (fsid_match) {
  65. ret = 0;
  66. break;
  67. }
  68. fs_devices = fs_devices->seed;
  69. }
  70. return ret;
  71. }
  72. static void print_tree_block_error(struct btrfs_fs_info *fs_info,
  73. struct extent_buffer *eb,
  74. int err)
  75. {
  76. char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
  77. char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
  78. u8 buf[BTRFS_UUID_SIZE];
  79. if (!err)
  80. return;
  81. fprintf(stderr, "bad tree block %llu, ", eb->start);
  82. switch (err) {
  83. case BTRFS_BAD_FSID:
  84. read_extent_buffer(eb, buf, btrfs_header_fsid(),
  85. BTRFS_UUID_SIZE);
  86. uuid_unparse(buf, found_uuid);
  87. uuid_unparse(fs_info->fs_devices->metadata_uuid, fs_uuid);
  88. fprintf(stderr, "fsid mismatch, want=%s, have=%s\n",
  89. fs_uuid, found_uuid);
  90. break;
  91. case BTRFS_BAD_BYTENR:
  92. fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n",
  93. eb->start, btrfs_header_bytenr(eb));
  94. break;
  95. case BTRFS_BAD_LEVEL:
  96. fprintf(stderr, "bad level, %u > %d\n",
  97. btrfs_header_level(eb), BTRFS_MAX_LEVEL);
  98. break;
  99. case BTRFS_BAD_NRITEMS:
  100. fprintf(stderr, "invalid nr_items: %u\n",
  101. btrfs_header_nritems(eb));
  102. break;
  103. }
  104. }
  105. int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len)
  106. {
  107. memset(out, 0, BTRFS_CSUM_SIZE);
  108. switch (csum_type) {
  109. case BTRFS_CSUM_TYPE_CRC32:
  110. return hash_crc32c(data, len, out);
  111. case BTRFS_CSUM_TYPE_XXHASH:
  112. return hash_xxhash(data, len, out);
  113. case BTRFS_CSUM_TYPE_SHA256:
  114. return hash_sha256(data, len, out);
  115. case BTRFS_CSUM_TYPE_BLAKE2:
  116. return hash_blake2(data, len, out);
  117. default:
  118. printf("Unknown csum type %d\n", csum_type);
  119. return -EINVAL;
  120. }
  121. }
  122. /*
  123. * Check if the super is valid:
  124. * - nodesize/sectorsize - minimum, maximum, alignment
  125. * - tree block starts - alignment
  126. * - number of devices - something sane
  127. * - sys array size - maximum
  128. */
  129. static int btrfs_check_super(struct btrfs_super_block *sb)
  130. {
  131. u8 result[BTRFS_CSUM_SIZE];
  132. u16 csum_type;
  133. int csum_size;
  134. u8 *metadata_uuid;
  135. if (btrfs_super_magic(sb) != BTRFS_MAGIC)
  136. return -EIO;
  137. csum_type = btrfs_super_csum_type(sb);
  138. if (csum_type >= btrfs_super_num_csums()) {
  139. error("unsupported checksum algorithm %u", csum_type);
  140. return -EIO;
  141. }
  142. csum_size = btrfs_super_csum_size(sb);
  143. btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE,
  144. result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
  145. if (memcmp(result, sb->csum, csum_size)) {
  146. error("superblock checksum mismatch");
  147. return -EIO;
  148. }
  149. if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
  150. error("tree_root level too big: %d >= %d",
  151. btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
  152. goto error_out;
  153. }
  154. if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
  155. error("chunk_root level too big: %d >= %d",
  156. btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
  157. goto error_out;
  158. }
  159. if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
  160. error("log_root level too big: %d >= %d",
  161. btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
  162. goto error_out;
  163. }
  164. if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) {
  165. error("tree_root block unaligned: %llu", btrfs_super_root(sb));
  166. goto error_out;
  167. }
  168. if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) {
  169. error("chunk_root block unaligned: %llu",
  170. btrfs_super_chunk_root(sb));
  171. goto error_out;
  172. }
  173. if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) {
  174. error("log_root block unaligned: %llu",
  175. btrfs_super_log_root(sb));
  176. goto error_out;
  177. }
  178. if (btrfs_super_nodesize(sb) < 4096) {
  179. error("nodesize too small: %u < 4096",
  180. btrfs_super_nodesize(sb));
  181. goto error_out;
  182. }
  183. if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) {
  184. error("nodesize unaligned: %u", btrfs_super_nodesize(sb));
  185. goto error_out;
  186. }
  187. if (btrfs_super_sectorsize(sb) < 4096) {
  188. error("sectorsize too small: %u < 4096",
  189. btrfs_super_sectorsize(sb));
  190. goto error_out;
  191. }
  192. if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) {
  193. error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb));
  194. goto error_out;
  195. }
  196. if (btrfs_super_total_bytes(sb) == 0) {
  197. error("invalid total_bytes 0");
  198. goto error_out;
  199. }
  200. if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
  201. error("invalid bytes_used %llu", btrfs_super_bytes_used(sb));
  202. goto error_out;
  203. }
  204. if ((btrfs_super_stripesize(sb) != 4096)
  205. && (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) {
  206. error("invalid stripesize %u", btrfs_super_stripesize(sb));
  207. goto error_out;
  208. }
  209. if (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID)
  210. metadata_uuid = sb->metadata_uuid;
  211. else
  212. metadata_uuid = sb->fsid;
  213. if (memcmp(metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
  214. char fsid[BTRFS_UUID_UNPARSED_SIZE];
  215. char dev_fsid[BTRFS_UUID_UNPARSED_SIZE];
  216. uuid_unparse(sb->metadata_uuid, fsid);
  217. uuid_unparse(sb->dev_item.fsid, dev_fsid);
  218. error("dev_item UUID does not match fsid: %s != %s",
  219. dev_fsid, fsid);
  220. goto error_out;
  221. }
  222. /*
  223. * Hint to catch really bogus numbers, bitflips or so
  224. */
  225. if (btrfs_super_num_devices(sb) > (1UL << 31)) {
  226. error("suspicious number of devices: %llu",
  227. btrfs_super_num_devices(sb));
  228. }
  229. if (btrfs_super_num_devices(sb) == 0) {
  230. error("number of devices is 0");
  231. goto error_out;
  232. }
  233. /*
  234. * Obvious sys_chunk_array corruptions, it must hold at least one key
  235. * and one chunk
  236. */
  237. if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
  238. error("system chunk array too big %u > %u",
  239. btrfs_super_sys_array_size(sb),
  240. BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
  241. goto error_out;
  242. }
  243. if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
  244. + sizeof(struct btrfs_chunk)) {
  245. error("system chunk array too small %u < %zu",
  246. btrfs_super_sys_array_size(sb),
  247. sizeof(struct btrfs_disk_key) +
  248. sizeof(struct btrfs_chunk));
  249. goto error_out;
  250. }
  251. return 0;
  252. error_out:
  253. error("superblock checksum matches but it has invalid members");
  254. return -EIO;
  255. }
  256. /*
  257. * btrfs_read_dev_super - read a valid primary superblock from a block device
  258. * @desc,@part: file descriptor of the device
  259. * @sb: buffer where the superblock is going to be read in
  260. *
  261. * Unlike the btrfs-progs/kernel version, here we ony care about the first
  262. * super block, thus it's much simpler.
  263. */
  264. int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part,
  265. struct btrfs_super_block *sb)
  266. {
  267. ALLOC_CACHE_ALIGN_BUFFER(char, tmp, BTRFS_SUPER_INFO_SIZE);
  268. struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp;
  269. int ret;
  270. ret = __btrfs_devread(desc, part, tmp, BTRFS_SUPER_INFO_SIZE,
  271. BTRFS_SUPER_INFO_OFFSET);
  272. if (ret < BTRFS_SUPER_INFO_SIZE)
  273. return -EIO;
  274. if (btrfs_super_bytenr(buf) != BTRFS_SUPER_INFO_OFFSET)
  275. return -EIO;
  276. if (btrfs_check_super(buf))
  277. return -EIO;
  278. memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE);
  279. return 0;
  280. }
  281. static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
  282. int verify, int silent, u16 csum_type)
  283. {
  284. u8 result[BTRFS_CSUM_SIZE];
  285. u32 len;
  286. len = buf->len - BTRFS_CSUM_SIZE;
  287. btrfs_csum_data(csum_type, (u8 *)buf->data + BTRFS_CSUM_SIZE,
  288. result, len);
  289. if (verify) {
  290. if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
  291. /* FIXME: format */
  292. if (!silent)
  293. printk("checksum verify failed on %llu found %08X wanted %08X\n",
  294. (unsigned long long)buf->start,
  295. result[0],
  296. buf->data[0]);
  297. return 1;
  298. }
  299. } else {
  300. write_extent_buffer(buf, result, 0, csum_size);
  301. }
  302. return 0;
  303. }
  304. int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify,
  305. u16 csum_type)
  306. {
  307. return __csum_tree_block_size(buf, csum_size, verify, 0, csum_type);
  308. }
  309. static int csum_tree_block(struct btrfs_fs_info *fs_info,
  310. struct extent_buffer *buf, int verify)
  311. {
  312. u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
  313. u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
  314. return csum_tree_block_size(buf, csum_size, verify, csum_type);
  315. }
  316. struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
  317. u64 bytenr, u32 blocksize)
  318. {
  319. return find_extent_buffer(&fs_info->extent_cache,
  320. bytenr, blocksize);
  321. }
  322. struct extent_buffer* btrfs_find_create_tree_block(
  323. struct btrfs_fs_info *fs_info, u64 bytenr)
  324. {
  325. return alloc_extent_buffer(fs_info, bytenr, fs_info->nodesize);
  326. }
  327. static int verify_parent_transid(struct extent_io_tree *io_tree,
  328. struct extent_buffer *eb, u64 parent_transid,
  329. int ignore)
  330. {
  331. int ret;
  332. if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
  333. return 0;
  334. if (extent_buffer_uptodate(eb) &&
  335. btrfs_header_generation(eb) == parent_transid) {
  336. ret = 0;
  337. goto out;
  338. }
  339. printk("parent transid verify failed on %llu wanted %llu found %llu\n",
  340. (unsigned long long)eb->start,
  341. (unsigned long long)parent_transid,
  342. (unsigned long long)btrfs_header_generation(eb));
  343. if (ignore) {
  344. eb->flags |= EXTENT_BAD_TRANSID;
  345. printk("Ignoring transid failure\n");
  346. return 0;
  347. }
  348. ret = 1;
  349. out:
  350. clear_extent_buffer_uptodate(eb);
  351. return ret;
  352. }
  353. int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
  354. {
  355. unsigned long offset = 0;
  356. struct btrfs_multi_bio *multi = NULL;
  357. struct btrfs_device *device;
  358. int ret = 0;
  359. u64 read_len;
  360. unsigned long bytes_left = eb->len;
  361. while (bytes_left) {
  362. read_len = bytes_left;
  363. device = NULL;
  364. ret = btrfs_map_block(info, READ, eb->start + offset,
  365. &read_len, &multi, mirror, NULL);
  366. if (ret) {
  367. printk("Couldn't map the block %Lu\n", eb->start + offset);
  368. kfree(multi);
  369. return -EIO;
  370. }
  371. device = multi->stripes[0].dev;
  372. if (!device->desc || !device->part) {
  373. kfree(multi);
  374. return -EIO;
  375. }
  376. if (read_len > bytes_left)
  377. read_len = bytes_left;
  378. ret = read_extent_from_disk(device->desc, device->part,
  379. multi->stripes[0].physical, eb,
  380. offset, read_len);
  381. kfree(multi);
  382. multi = NULL;
  383. if (ret)
  384. return -EIO;
  385. offset += read_len;
  386. bytes_left -= read_len;
  387. }
  388. return 0;
  389. }
  390. struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
  391. u64 parent_transid)
  392. {
  393. int ret;
  394. struct extent_buffer *eb;
  395. u64 best_transid = 0;
  396. u32 sectorsize = fs_info->sectorsize;
  397. int mirror_num = 1;
  398. int good_mirror = 0;
  399. int candidate_mirror = 0;
  400. int num_copies;
  401. int ignore = 0;
  402. /*
  403. * Don't even try to create tree block for unaligned tree block
  404. * bytenr.
  405. * Such unaligned tree block will free overlapping extent buffer,
  406. * causing use-after-free bugs for fuzzed images.
  407. */
  408. if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) {
  409. error("tree block bytenr %llu is not aligned to sectorsize %u",
  410. bytenr, sectorsize);
  411. return ERR_PTR(-EIO);
  412. }
  413. eb = btrfs_find_create_tree_block(fs_info, bytenr);
  414. if (!eb)
  415. return ERR_PTR(-ENOMEM);
  416. if (btrfs_buffer_uptodate(eb, parent_transid))
  417. return eb;
  418. num_copies = btrfs_num_copies(fs_info, eb->start, eb->len);
  419. while (1) {
  420. ret = read_whole_eb(fs_info, eb, mirror_num);
  421. if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 &&
  422. check_tree_block(fs_info, eb) == 0 &&
  423. verify_parent_transid(&fs_info->extent_cache, eb,
  424. parent_transid, ignore) == 0) {
  425. /*
  426. * check_tree_block() is less strict to allow btrfs
  427. * check to get raw eb with bad key order and fix it.
  428. * But we still need to try to get a good copy if
  429. * possible, or bad key order can go into tools like
  430. * btrfs ins dump-tree.
  431. */
  432. if (btrfs_header_level(eb))
  433. ret = btrfs_check_node(fs_info, NULL, eb);
  434. else
  435. ret = btrfs_check_leaf(fs_info, NULL, eb);
  436. if (!ret || candidate_mirror == mirror_num) {
  437. btrfs_set_buffer_uptodate(eb);
  438. return eb;
  439. }
  440. if (candidate_mirror <= 0)
  441. candidate_mirror = mirror_num;
  442. }
  443. if (ignore) {
  444. if (candidate_mirror > 0) {
  445. mirror_num = candidate_mirror;
  446. continue;
  447. }
  448. if (check_tree_block(fs_info, eb))
  449. print_tree_block_error(fs_info, eb,
  450. check_tree_block(fs_info, eb));
  451. else
  452. fprintf(stderr, "Csum didn't match\n");
  453. ret = -EIO;
  454. break;
  455. }
  456. if (num_copies == 1) {
  457. ignore = 1;
  458. continue;
  459. }
  460. if (btrfs_header_generation(eb) > best_transid) {
  461. best_transid = btrfs_header_generation(eb);
  462. good_mirror = mirror_num;
  463. }
  464. mirror_num++;
  465. if (mirror_num > num_copies) {
  466. if (candidate_mirror > 0)
  467. mirror_num = candidate_mirror;
  468. else
  469. mirror_num = good_mirror;
  470. ignore = 1;
  471. continue;
  472. }
  473. }
  474. /*
  475. * We failed to read this tree block, it be should deleted right now
  476. * to avoid stale cache populate the cache.
  477. */
  478. free_extent_buffer(eb);
  479. return ERR_PTR(ret);
  480. }
  481. int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical,
  482. u64 *len, int mirror)
  483. {
  484. u64 orig_len = *len;
  485. u64 cur = logical;
  486. struct btrfs_multi_bio *multi = NULL;
  487. struct btrfs_device *device;
  488. int ret = 0;
  489. while (cur < logical + orig_len) {
  490. u64 cur_len = logical + orig_len - cur;
  491. ret = btrfs_map_block(fs_info, READ, cur, &cur_len, &multi,
  492. mirror, NULL);
  493. if (ret) {
  494. error("Couldn't map the block %llu", cur);
  495. goto err;
  496. }
  497. device = multi->stripes[0].dev;
  498. if (!device->desc || !device->part) {
  499. error("devid %llu is missing", device->devid);
  500. ret = -EIO;
  501. goto err;
  502. }
  503. ret = __btrfs_devread(device->desc, device->part,
  504. data + (cur - logical), cur_len,
  505. multi->stripes[0].physical);
  506. if (ret != cur_len) {
  507. error("read failed on devid %llu physical %llu",
  508. device->devid, multi->stripes[0].physical);
  509. ret = -EIO;
  510. goto err;
  511. }
  512. cur += cur_len;
  513. ret = 0;
  514. }
  515. err:
  516. kfree(multi);
  517. return ret;
  518. }
  519. void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
  520. u64 objectid)
  521. {
  522. root->node = NULL;
  523. root->track_dirty = 0;
  524. root->fs_info = fs_info;
  525. root->objectid = objectid;
  526. root->last_trans = 0;
  527. root->last_inode_alloc = 0;
  528. memset(&root->root_key, 0, sizeof(root->root_key));
  529. memset(&root->root_item, 0, sizeof(root->root_item));
  530. root->root_key.objectid = objectid;
  531. }
  532. static int find_and_setup_root(struct btrfs_root *tree_root,
  533. struct btrfs_fs_info *fs_info,
  534. u64 objectid, struct btrfs_root *root)
  535. {
  536. int ret;
  537. u64 generation;
  538. btrfs_setup_root(root, fs_info, objectid);
  539. ret = btrfs_find_last_root(tree_root, objectid,
  540. &root->root_item, &root->root_key);
  541. if (ret)
  542. return ret;
  543. generation = btrfs_root_generation(&root->root_item);
  544. root->node = read_tree_block(fs_info,
  545. btrfs_root_bytenr(&root->root_item), generation);
  546. if (!extent_buffer_uptodate(root->node))
  547. return -EIO;
  548. return 0;
  549. }
  550. int btrfs_free_fs_root(struct btrfs_root *root)
  551. {
  552. if (root->node)
  553. free_extent_buffer(root->node);
  554. kfree(root);
  555. return 0;
  556. }
  557. static void __free_fs_root(struct rb_node *node)
  558. {
  559. struct btrfs_root *root;
  560. root = container_of(node, struct btrfs_root, rb_node);
  561. btrfs_free_fs_root(root);
  562. }
  563. FREE_RB_BASED_TREE(fs_roots, __free_fs_root);
  564. struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
  565. struct btrfs_key *location)
  566. {
  567. struct btrfs_root *root;
  568. struct btrfs_root *tree_root = fs_info->tree_root;
  569. struct btrfs_path *path;
  570. struct extent_buffer *l;
  571. u64 generation;
  572. int ret = 0;
  573. root = calloc(1, sizeof(*root));
  574. if (!root)
  575. return ERR_PTR(-ENOMEM);
  576. if (location->offset == (u64)-1) {
  577. ret = find_and_setup_root(tree_root, fs_info,
  578. location->objectid, root);
  579. if (ret) {
  580. free(root);
  581. return ERR_PTR(ret);
  582. }
  583. goto insert;
  584. }
  585. btrfs_setup_root(root, fs_info,
  586. location->objectid);
  587. path = btrfs_alloc_path();
  588. if (!path) {
  589. free(root);
  590. return ERR_PTR(-ENOMEM);
  591. }
  592. ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
  593. if (ret != 0) {
  594. if (ret > 0)
  595. ret = -ENOENT;
  596. goto out;
  597. }
  598. l = path->nodes[0];
  599. read_extent_buffer(l, &root->root_item,
  600. btrfs_item_ptr_offset(l, path->slots[0]),
  601. sizeof(root->root_item));
  602. memcpy(&root->root_key, location, sizeof(*location));
  603. /* If this root is already an orphan, no need to read */
  604. if (btrfs_root_refs(&root->root_item) == 0) {
  605. ret = -ENOENT;
  606. goto out;
  607. }
  608. ret = 0;
  609. out:
  610. btrfs_free_path(path);
  611. if (ret) {
  612. free(root);
  613. return ERR_PTR(ret);
  614. }
  615. generation = btrfs_root_generation(&root->root_item);
  616. root->node = read_tree_block(fs_info,
  617. btrfs_root_bytenr(&root->root_item), generation);
  618. if (!extent_buffer_uptodate(root->node)) {
  619. free(root);
  620. return ERR_PTR(-EIO);
  621. }
  622. insert:
  623. root->ref_cows = 1;
  624. return root;
  625. }
  626. static int btrfs_fs_roots_compare_objectids(struct rb_node *node,
  627. void *data)
  628. {
  629. u64 objectid = *((u64 *)data);
  630. struct btrfs_root *root;
  631. root = rb_entry(node, struct btrfs_root, rb_node);
  632. if (objectid > root->objectid)
  633. return 1;
  634. else if (objectid < root->objectid)
  635. return -1;
  636. else
  637. return 0;
  638. }
  639. int btrfs_fs_roots_compare_roots(struct rb_node *node1, struct rb_node *node2)
  640. {
  641. struct btrfs_root *root;
  642. root = rb_entry(node2, struct btrfs_root, rb_node);
  643. return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid);
  644. }
  645. struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
  646. struct btrfs_key *location)
  647. {
  648. struct btrfs_root *root;
  649. struct rb_node *node;
  650. int ret;
  651. u64 objectid = location->objectid;
  652. if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
  653. return fs_info->tree_root;
  654. if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
  655. return fs_info->chunk_root;
  656. if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
  657. return fs_info->csum_root;
  658. BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID);
  659. node = rb_search(&fs_info->fs_root_tree, (void *)&objectid,
  660. btrfs_fs_roots_compare_objectids, NULL);
  661. if (node)
  662. return container_of(node, struct btrfs_root, rb_node);
  663. root = btrfs_read_fs_root_no_cache(fs_info, location);
  664. if (IS_ERR(root))
  665. return root;
  666. ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node,
  667. btrfs_fs_roots_compare_roots);
  668. BUG_ON(ret);
  669. return root;
  670. }
  671. void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
  672. {
  673. free(fs_info->tree_root);
  674. free(fs_info->chunk_root);
  675. free(fs_info->csum_root);
  676. free(fs_info->super_copy);
  677. free(fs_info);
  678. }
  679. struct btrfs_fs_info *btrfs_new_fs_info(void)
  680. {
  681. struct btrfs_fs_info *fs_info;
  682. fs_info = calloc(1, sizeof(struct btrfs_fs_info));
  683. if (!fs_info)
  684. return NULL;
  685. fs_info->tree_root = calloc(1, sizeof(struct btrfs_root));
  686. fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root));
  687. fs_info->csum_root = calloc(1, sizeof(struct btrfs_root));
  688. fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE);
  689. if (!fs_info->tree_root || !fs_info->chunk_root ||
  690. !fs_info->csum_root || !fs_info->super_copy)
  691. goto free_all;
  692. extent_io_tree_init(&fs_info->extent_cache);
  693. fs_info->fs_root_tree = RB_ROOT;
  694. cache_tree_init(&fs_info->mapping_tree.cache_tree);
  695. return fs_info;
  696. free_all:
  697. btrfs_free_fs_info(fs_info);
  698. return NULL;
  699. }
  700. static int setup_root_or_create_block(struct btrfs_fs_info *fs_info,
  701. struct btrfs_root *info_root,
  702. u64 objectid, char *str)
  703. {
  704. struct btrfs_root *root = fs_info->tree_root;
  705. int ret;
  706. ret = find_and_setup_root(root, fs_info, objectid, info_root);
  707. if (ret) {
  708. error("could not setup %s tree", str);
  709. return -EIO;
  710. }
  711. return 0;
  712. }
  713. static int get_default_subvolume(struct btrfs_fs_info *fs_info,
  714. struct btrfs_key *key_ret)
  715. {
  716. struct btrfs_root *root = fs_info->tree_root;
  717. struct btrfs_dir_item *dir_item;
  718. struct btrfs_path path;
  719. int ret = 0;
  720. btrfs_init_path(&path);
  721. dir_item = btrfs_lookup_dir_item(NULL, root, &path,
  722. BTRFS_ROOT_TREE_DIR_OBJECTID,
  723. "default", 7, 0);
  724. if (IS_ERR(dir_item)) {
  725. ret = PTR_ERR(dir_item);
  726. goto out;
  727. }
  728. btrfs_dir_item_key_to_cpu(path.nodes[0], dir_item, key_ret);
  729. out:
  730. btrfs_release_path(&path);
  731. return ret;
  732. }
  733. int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info)
  734. {
  735. struct btrfs_super_block *sb = fs_info->super_copy;
  736. struct btrfs_root *root;
  737. struct btrfs_key key;
  738. u64 root_tree_bytenr;
  739. u64 generation;
  740. int ret;
  741. root = fs_info->tree_root;
  742. btrfs_setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
  743. generation = btrfs_super_generation(sb);
  744. root_tree_bytenr = btrfs_super_root(sb);
  745. root->node = read_tree_block(fs_info, root_tree_bytenr, generation);
  746. if (!extent_buffer_uptodate(root->node)) {
  747. fprintf(stderr, "Couldn't read tree root\n");
  748. return -EIO;
  749. }
  750. ret = setup_root_or_create_block(fs_info, fs_info->csum_root,
  751. BTRFS_CSUM_TREE_OBJECTID, "csum");
  752. if (ret)
  753. return ret;
  754. fs_info->csum_root->track_dirty = 1;
  755. fs_info->last_trans_committed = generation;
  756. ret = get_default_subvolume(fs_info, &key);
  757. if (ret) {
  758. /*
  759. * The default dir item isn't there. Linux kernel behaviour is
  760. * to silently use the top-level subvolume in this case.
  761. */
  762. key.objectid = BTRFS_FS_TREE_OBJECTID;
  763. key.type = BTRFS_ROOT_ITEM_KEY;
  764. key.offset = (u64)-1;
  765. }
  766. fs_info->fs_root = btrfs_read_fs_root(fs_info, &key);
  767. if (IS_ERR(fs_info->fs_root))
  768. return -EIO;
  769. return 0;
  770. }
  771. void btrfs_release_all_roots(struct btrfs_fs_info *fs_info)
  772. {
  773. if (fs_info->csum_root)
  774. free_extent_buffer(fs_info->csum_root->node);
  775. if (fs_info->tree_root)
  776. free_extent_buffer(fs_info->tree_root->node);
  777. if (fs_info->chunk_root)
  778. free_extent_buffer(fs_info->chunk_root->node);
  779. }
  780. static void free_map_lookup(struct cache_extent *ce)
  781. {
  782. struct map_lookup *map;
  783. map = container_of(ce, struct map_lookup, ce);
  784. kfree(map);
  785. }
  786. FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup);
  787. void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info)
  788. {
  789. free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree);
  790. extent_io_tree_cleanup(&fs_info->extent_cache);
  791. }
  792. static int btrfs_scan_fs_devices(struct blk_desc *desc,
  793. struct disk_partition *part,
  794. struct btrfs_fs_devices **fs_devices)
  795. {
  796. u64 total_devs;
  797. int ret;
  798. if (round_up(BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
  799. desc->blksz) > (part->size << desc->log2blksz)) {
  800. log_debug("superblock end %u is larger than device size " LBAFU,
  801. BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
  802. part->size << desc->log2blksz);
  803. return -EINVAL;
  804. }
  805. ret = btrfs_scan_one_device(desc, part, fs_devices, &total_devs);
  806. if (ret) {
  807. /*
  808. * Avoid showing this when probing for a possible Btrfs
  809. *
  810. * fprintf(stderr, "No valid Btrfs found\n");
  811. */
  812. return ret;
  813. }
  814. return 0;
  815. }
  816. int btrfs_check_fs_compatibility(struct btrfs_super_block *sb)
  817. {
  818. u64 features;
  819. features = btrfs_super_incompat_flags(sb) &
  820. ~BTRFS_FEATURE_INCOMPAT_SUPP;
  821. if (features) {
  822. printk("couldn't open because of unsupported "
  823. "option features (%llx).\n",
  824. (unsigned long long)features);
  825. return -ENOTSUPP;
  826. }
  827. features = btrfs_super_incompat_flags(sb);
  828. if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
  829. features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
  830. btrfs_set_super_incompat_flags(sb, features);
  831. }
  832. return 0;
  833. }
  834. static int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info)
  835. {
  836. struct btrfs_super_block *sb = fs_info->super_copy;
  837. u64 chunk_root_bytenr;
  838. u64 generation;
  839. int ret;
  840. btrfs_setup_root(fs_info->chunk_root, fs_info,
  841. BTRFS_CHUNK_TREE_OBJECTID);
  842. ret = btrfs_read_sys_array(fs_info);
  843. if (ret)
  844. return ret;
  845. generation = btrfs_super_chunk_root_generation(sb);
  846. chunk_root_bytenr = btrfs_super_chunk_root(sb);
  847. fs_info->chunk_root->node = read_tree_block(fs_info,
  848. chunk_root_bytenr,
  849. generation);
  850. if (!extent_buffer_uptodate(fs_info->chunk_root->node)) {
  851. error("cannot read chunk root");
  852. return -EIO;
  853. }
  854. ret = btrfs_read_chunk_tree(fs_info);
  855. if (ret) {
  856. fprintf(stderr, "Couldn't read chunk tree\n");
  857. return ret;
  858. }
  859. return 0;
  860. }
  861. struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc,
  862. struct disk_partition *part)
  863. {
  864. struct btrfs_fs_info *fs_info;
  865. struct btrfs_super_block *disk_super;
  866. struct btrfs_fs_devices *fs_devices = NULL;
  867. struct extent_buffer *eb;
  868. int ret;
  869. fs_info = btrfs_new_fs_info();
  870. if (!fs_info) {
  871. fprintf(stderr, "Failed to allocate memory for fs_info\n");
  872. return NULL;
  873. }
  874. ret = btrfs_scan_fs_devices(desc, part, &fs_devices);
  875. if (ret)
  876. goto out;
  877. fs_info->fs_devices = fs_devices;
  878. ret = btrfs_open_devices(fs_devices);
  879. if (ret)
  880. goto out;
  881. disk_super = fs_info->super_copy;
  882. ret = btrfs_read_dev_super(desc, part, disk_super);
  883. if (ret) {
  884. debug("No valid btrfs found\n");
  885. goto out_devices;
  886. }
  887. if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID) {
  888. fprintf(stderr, "ERROR: Filesystem UUID change in progress\n");
  889. goto out_devices;
  890. }
  891. ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE));
  892. if (btrfs_fs_incompat(fs_info, METADATA_UUID))
  893. ASSERT(!memcmp(disk_super->metadata_uuid,
  894. fs_devices->metadata_uuid, BTRFS_FSID_SIZE));
  895. fs_info->sectorsize = btrfs_super_sectorsize(disk_super);
  896. fs_info->nodesize = btrfs_super_nodesize(disk_super);
  897. fs_info->stripesize = btrfs_super_stripesize(disk_super);
  898. ret = btrfs_check_fs_compatibility(fs_info->super_copy);
  899. if (ret)
  900. goto out_devices;
  901. ret = btrfs_setup_chunk_tree_and_device_map(fs_info);
  902. if (ret)
  903. goto out_chunk;
  904. /* Chunk tree root is unable to read, return directly */
  905. if (!fs_info->chunk_root)
  906. return fs_info;
  907. eb = fs_info->chunk_root->node;
  908. read_extent_buffer(eb, fs_info->chunk_tree_uuid,
  909. btrfs_header_chunk_tree_uuid(eb),
  910. BTRFS_UUID_SIZE);
  911. ret = btrfs_setup_all_roots(fs_info);
  912. if (ret)
  913. goto out_chunk;
  914. return fs_info;
  915. out_chunk:
  916. btrfs_release_all_roots(fs_info);
  917. btrfs_cleanup_all_caches(fs_info);
  918. out_devices:
  919. btrfs_close_devices(fs_devices);
  920. out:
  921. btrfs_free_fs_info(fs_info);
  922. return NULL;
  923. }
  924. int close_ctree_fs_info(struct btrfs_fs_info *fs_info)
  925. {
  926. int ret;
  927. free_fs_roots_tree(&fs_info->fs_root_tree);
  928. btrfs_release_all_roots(fs_info);
  929. ret = btrfs_close_devices(fs_info->fs_devices);
  930. btrfs_cleanup_all_caches(fs_info);
  931. btrfs_free_fs_info(fs_info);
  932. return ret;
  933. }
  934. int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
  935. {
  936. int ret;
  937. ret = extent_buffer_uptodate(buf);
  938. if (!ret)
  939. return ret;
  940. ret = verify_parent_transid(&buf->fs_info->extent_cache, buf,
  941. parent_transid, 1);
  942. return !ret;
  943. }
  944. int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
  945. {
  946. return set_extent_buffer_uptodate(eb);
  947. }