recovery.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * linux/fs/jbd2/recovery.c
  4. *
  5. * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
  6. *
  7. * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
  8. *
  9. * Journal recovery routines for the generic filesystem journaling code;
  10. * part of the ext2fs journaling system.
  11. */
  12. #ifndef __KERNEL__
  13. #include "jfs_user.h"
  14. #else
  15. #include <linux/time.h>
  16. #include <linux/fs.h>
  17. #include <linux/jbd2.h>
  18. #include <linux/errno.h>
  19. #include <linux/crc32.h>
  20. #include <linux/blkdev.h>
  21. #endif
  22. /*
  23. * Maintain information about the progress of the recovery job, so that
  24. * the different passes can carry information between them.
  25. */
  26. struct recovery_info
  27. {
  28. tid_t start_transaction;
  29. tid_t end_transaction;
  30. int nr_replays;
  31. int nr_revokes;
  32. int nr_revoke_hits;
  33. };
  34. enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
  35. static int do_one_pass(journal_t *journal,
  36. struct recovery_info *info, enum passtype pass);
  37. static int scan_revoke_records(journal_t *, struct buffer_head *,
  38. tid_t, struct recovery_info *);
  39. #ifdef __KERNEL__
  40. /* Release readahead buffers after use */
  41. static void journal_brelse_array(struct buffer_head *b[], int n)
  42. {
  43. while (--n >= 0)
  44. brelse (b[n]);
  45. }
  46. /*
  47. * When reading from the journal, we are going through the block device
  48. * layer directly and so there is no readahead being done for us. We
  49. * need to implement any readahead ourselves if we want it to happen at
  50. * all. Recovery is basically one long sequential read, so make sure we
  51. * do the IO in reasonably large chunks.
  52. *
  53. * This is not so critical that we need to be enormously clever about
  54. * the readahead size, though. 128K is a purely arbitrary, good-enough
  55. * fixed value.
  56. */
  57. #define MAXBUF 8
  58. static int do_readahead(journal_t *journal, unsigned int start)
  59. {
  60. int err;
  61. unsigned int max, nbufs, next;
  62. unsigned long long blocknr;
  63. struct buffer_head *bh;
  64. struct buffer_head * bufs[MAXBUF];
  65. /* Do up to 128K of readahead */
  66. max = start + (128 * 1024 / journal->j_blocksize);
  67. if (max > journal->j_maxlen)
  68. max = journal->j_maxlen;
  69. /* Do the readahead itself. We'll submit MAXBUF buffer_heads at
  70. * a time to the block device IO layer. */
  71. nbufs = 0;
  72. for (next = start; next < max; next++) {
  73. err = jbd2_journal_bmap(journal, next, &blocknr);
  74. if (err) {
  75. printk(KERN_ERR "JBD2: bad block at offset %u\n",
  76. next);
  77. goto failed;
  78. }
  79. bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
  80. if (!bh) {
  81. err = -ENOMEM;
  82. goto failed;
  83. }
  84. if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
  85. bufs[nbufs++] = bh;
  86. if (nbufs == MAXBUF) {
  87. ll_rw_block(REQ_OP_READ, 0, nbufs, bufs);
  88. journal_brelse_array(bufs, nbufs);
  89. nbufs = 0;
  90. }
  91. } else
  92. brelse(bh);
  93. }
  94. if (nbufs)
  95. ll_rw_block(REQ_OP_READ, 0, nbufs, bufs);
  96. err = 0;
  97. failed:
  98. if (nbufs)
  99. journal_brelse_array(bufs, nbufs);
  100. return err;
  101. }
  102. #endif /* __KERNEL__ */
  103. /*
  104. * Read a block from the journal
  105. */
  106. static int jread(struct buffer_head **bhp, journal_t *journal,
  107. unsigned int offset)
  108. {
  109. int err;
  110. unsigned long long blocknr;
  111. struct buffer_head *bh;
  112. *bhp = NULL;
  113. if (offset >= journal->j_maxlen) {
  114. printk(KERN_ERR "JBD2: corrupted journal superblock\n");
  115. return -EFSCORRUPTED;
  116. }
  117. err = jbd2_journal_bmap(journal, offset, &blocknr);
  118. if (err) {
  119. printk(KERN_ERR "JBD2: bad block at offset %u\n",
  120. offset);
  121. return err;
  122. }
  123. bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
  124. if (!bh)
  125. return -ENOMEM;
  126. if (!buffer_uptodate(bh)) {
  127. /* If this is a brand new buffer, start readahead.
  128. Otherwise, we assume we are already reading it. */
  129. if (!buffer_req(bh))
  130. do_readahead(journal, offset);
  131. wait_on_buffer(bh);
  132. }
  133. if (!buffer_uptodate(bh)) {
  134. printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
  135. offset);
  136. brelse(bh);
  137. return -EIO;
  138. }
  139. *bhp = bh;
  140. return 0;
  141. }
  142. static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
  143. {
  144. struct jbd2_journal_block_tail *tail;
  145. __be32 provided;
  146. __u32 calculated;
  147. if (!jbd2_journal_has_csum_v2or3(j))
  148. return 1;
  149. tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
  150. sizeof(struct jbd2_journal_block_tail));
  151. provided = tail->t_checksum;
  152. tail->t_checksum = 0;
  153. calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
  154. tail->t_checksum = provided;
  155. return provided == cpu_to_be32(calculated);
  156. }
  157. /*
  158. * Count the number of in-use tags in a journal descriptor block.
  159. */
  160. static int count_tags(journal_t *journal, struct buffer_head *bh)
  161. {
  162. char * tagp;
  163. journal_block_tag_t * tag;
  164. int nr = 0, size = journal->j_blocksize;
  165. int tag_bytes = journal_tag_bytes(journal);
  166. if (jbd2_journal_has_csum_v2or3(journal))
  167. size -= sizeof(struct jbd2_journal_block_tail);
  168. tagp = &bh->b_data[sizeof(journal_header_t)];
  169. while ((tagp - bh->b_data + tag_bytes) <= size) {
  170. tag = (journal_block_tag_t *) tagp;
  171. nr++;
  172. tagp += tag_bytes;
  173. if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
  174. tagp += 16;
  175. if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
  176. break;
  177. }
  178. return nr;
  179. }
  180. /* Make sure we wrap around the log correctly! */
  181. #define wrap(journal, var) \
  182. do { \
  183. if (var >= (journal)->j_last) \
  184. var -= ((journal)->j_last - (journal)->j_first); \
  185. } while (0)
  186. /**
  187. * jbd2_journal_recover - recovers a on-disk journal
  188. * @journal: the journal to recover
  189. *
  190. * The primary function for recovering the log contents when mounting a
  191. * journaled device.
  192. *
  193. * Recovery is done in three passes. In the first pass, we look for the
  194. * end of the log. In the second, we assemble the list of revoke
  195. * blocks. In the third and final pass, we replay any un-revoked blocks
  196. * in the log.
  197. */
  198. int jbd2_journal_recover(journal_t *journal)
  199. {
  200. int err, err2;
  201. journal_superblock_t * sb;
  202. struct recovery_info info;
  203. memset(&info, 0, sizeof(info));
  204. sb = journal->j_superblock;
  205. /*
  206. * The journal superblock's s_start field (the current log head)
  207. * is always zero if, and only if, the journal was cleanly
  208. * unmounted.
  209. */
  210. if (!sb->s_start) {
  211. jbd_debug(1, "No recovery required, last transaction %d\n",
  212. be32_to_cpu(sb->s_sequence));
  213. journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
  214. return 0;
  215. }
  216. err = do_one_pass(journal, &info, PASS_SCAN);
  217. if (!err)
  218. err = do_one_pass(journal, &info, PASS_REVOKE);
  219. if (!err)
  220. err = do_one_pass(journal, &info, PASS_REPLAY);
  221. jbd_debug(1, "JBD2: recovery, exit status %d, "
  222. "recovered transactions %u to %u\n",
  223. err, info.start_transaction, info.end_transaction);
  224. jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
  225. info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
  226. /* Restart the log at the next transaction ID, thus invalidating
  227. * any existing commit records in the log. */
  228. journal->j_transaction_sequence = ++info.end_transaction;
  229. jbd2_journal_clear_revoke(journal);
  230. err2 = sync_blockdev(journal->j_fs_dev);
  231. if (!err)
  232. err = err2;
  233. /* Make sure all replayed data is on permanent storage */
  234. if (journal->j_flags & JBD2_BARRIER) {
  235. err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
  236. if (!err)
  237. err = err2;
  238. }
  239. return err;
  240. }
  241. /**
  242. * jbd2_journal_skip_recovery - Start journal and wipe exiting records
  243. * @journal: journal to startup
  244. *
  245. * Locate any valid recovery information from the journal and set up the
  246. * journal structures in memory to ignore it (presumably because the
  247. * caller has evidence that it is out of date).
  248. * This function doesn't appear to be exported..
  249. *
  250. * We perform one pass over the journal to allow us to tell the user how
  251. * much recovery information is being erased, and to let us initialise
  252. * the journal transaction sequence numbers to the next unused ID.
  253. */
  254. int jbd2_journal_skip_recovery(journal_t *journal)
  255. {
  256. int err;
  257. struct recovery_info info;
  258. memset (&info, 0, sizeof(info));
  259. err = do_one_pass(journal, &info, PASS_SCAN);
  260. if (err) {
  261. printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
  262. ++journal->j_transaction_sequence;
  263. } else {
  264. #ifdef CONFIG_JBD2_DEBUG
  265. int dropped = info.end_transaction -
  266. be32_to_cpu(journal->j_superblock->s_sequence);
  267. jbd_debug(1,
  268. "JBD2: ignoring %d transaction%s from the journal.\n",
  269. dropped, (dropped == 1) ? "" : "s");
  270. #endif
  271. journal->j_transaction_sequence = ++info.end_transaction;
  272. }
  273. journal->j_tail = 0;
  274. return err;
  275. }
  276. static inline unsigned long long read_tag_block(journal_t *journal,
  277. journal_block_tag_t *tag)
  278. {
  279. unsigned long long block = be32_to_cpu(tag->t_blocknr);
  280. if (jbd2_has_feature_64bit(journal))
  281. block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
  282. return block;
  283. }
  284. /*
  285. * calc_chksums calculates the checksums for the blocks described in the
  286. * descriptor block.
  287. */
  288. static int calc_chksums(journal_t *journal, struct buffer_head *bh,
  289. unsigned long *next_log_block, __u32 *crc32_sum)
  290. {
  291. int i, num_blks, err;
  292. unsigned long io_block;
  293. struct buffer_head *obh;
  294. num_blks = count_tags(journal, bh);
  295. /* Calculate checksum of the descriptor block. */
  296. *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
  297. for (i = 0; i < num_blks; i++) {
  298. io_block = (*next_log_block)++;
  299. wrap(journal, *next_log_block);
  300. err = jread(&obh, journal, io_block);
  301. if (err) {
  302. printk(KERN_ERR "JBD2: IO error %d recovering block "
  303. "%lu in log\n", err, io_block);
  304. return 1;
  305. } else {
  306. *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
  307. obh->b_size);
  308. }
  309. put_bh(obh);
  310. }
  311. return 0;
  312. }
  313. static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
  314. {
  315. struct commit_header *h;
  316. __be32 provided;
  317. __u32 calculated;
  318. if (!jbd2_journal_has_csum_v2or3(j))
  319. return 1;
  320. h = buf;
  321. provided = h->h_chksum[0];
  322. h->h_chksum[0] = 0;
  323. calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
  324. h->h_chksum[0] = provided;
  325. return provided == cpu_to_be32(calculated);
  326. }
  327. static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
  328. void *buf, __u32 sequence)
  329. {
  330. journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
  331. __u32 csum32;
  332. __be32 seq;
  333. if (!jbd2_journal_has_csum_v2or3(j))
  334. return 1;
  335. seq = cpu_to_be32(sequence);
  336. csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
  337. csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
  338. if (jbd2_has_feature_csum3(j))
  339. return tag3->t_checksum == cpu_to_be32(csum32);
  340. else
  341. return tag->t_checksum == cpu_to_be16(csum32);
  342. }
  343. static int do_one_pass(journal_t *journal,
  344. struct recovery_info *info, enum passtype pass)
  345. {
  346. unsigned int first_commit_ID, next_commit_ID;
  347. unsigned long next_log_block;
  348. int err, success = 0;
  349. journal_superblock_t * sb;
  350. journal_header_t * tmp;
  351. struct buffer_head * bh;
  352. unsigned int sequence;
  353. int blocktype;
  354. int tag_bytes = journal_tag_bytes(journal);
  355. __u32 crc32_sum = ~0; /* Transactional Checksums */
  356. int descr_csum_size = 0;
  357. int block_error = 0;
  358. /*
  359. * First thing is to establish what we expect to find in the log
  360. * (in terms of transaction IDs), and where (in terms of log
  361. * block offsets): query the superblock.
  362. */
  363. sb = journal->j_superblock;
  364. next_commit_ID = be32_to_cpu(sb->s_sequence);
  365. next_log_block = be32_to_cpu(sb->s_start);
  366. first_commit_ID = next_commit_ID;
  367. if (pass == PASS_SCAN)
  368. info->start_transaction = first_commit_ID;
  369. jbd_debug(1, "Starting recovery pass %d\n", pass);
  370. /*
  371. * Now we walk through the log, transaction by transaction,
  372. * making sure that each transaction has a commit block in the
  373. * expected place. Each complete transaction gets replayed back
  374. * into the main filesystem.
  375. */
  376. while (1) {
  377. int flags;
  378. char * tagp;
  379. journal_block_tag_t * tag;
  380. struct buffer_head * obh;
  381. struct buffer_head * nbh;
  382. cond_resched();
  383. /* If we already know where to stop the log traversal,
  384. * check right now that we haven't gone past the end of
  385. * the log. */
  386. if (pass != PASS_SCAN)
  387. if (tid_geq(next_commit_ID, info->end_transaction))
  388. break;
  389. jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
  390. next_commit_ID, next_log_block, journal->j_last);
  391. /* Skip over each chunk of the transaction looking
  392. * either the next descriptor block or the final commit
  393. * record. */
  394. jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
  395. err = jread(&bh, journal, next_log_block);
  396. if (err)
  397. goto failed;
  398. next_log_block++;
  399. wrap(journal, next_log_block);
  400. /* What kind of buffer is it?
  401. *
  402. * If it is a descriptor block, check that it has the
  403. * expected sequence number. Otherwise, we're all done
  404. * here. */
  405. tmp = (journal_header_t *)bh->b_data;
  406. if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
  407. brelse(bh);
  408. break;
  409. }
  410. blocktype = be32_to_cpu(tmp->h_blocktype);
  411. sequence = be32_to_cpu(tmp->h_sequence);
  412. jbd_debug(3, "Found magic %d, sequence %d\n",
  413. blocktype, sequence);
  414. if (sequence != next_commit_ID) {
  415. brelse(bh);
  416. break;
  417. }
  418. /* OK, we have a valid descriptor block which matches
  419. * all of the sequence number checks. What are we going
  420. * to do with it? That depends on the pass... */
  421. switch(blocktype) {
  422. case JBD2_DESCRIPTOR_BLOCK:
  423. /* Verify checksum first */
  424. if (jbd2_journal_has_csum_v2or3(journal))
  425. descr_csum_size =
  426. sizeof(struct jbd2_journal_block_tail);
  427. if (descr_csum_size > 0 &&
  428. !jbd2_descriptor_block_csum_verify(journal,
  429. bh->b_data)) {
  430. printk(KERN_ERR "JBD2: Invalid checksum "
  431. "recovering block %lu in log\n",
  432. next_log_block);
  433. err = -EFSBADCRC;
  434. brelse(bh);
  435. goto failed;
  436. }
  437. /* If it is a valid descriptor block, replay it
  438. * in pass REPLAY; if journal_checksums enabled, then
  439. * calculate checksums in PASS_SCAN, otherwise,
  440. * just skip over the blocks it describes. */
  441. if (pass != PASS_REPLAY) {
  442. if (pass == PASS_SCAN &&
  443. jbd2_has_feature_checksum(journal) &&
  444. !info->end_transaction) {
  445. if (calc_chksums(journal, bh,
  446. &next_log_block,
  447. &crc32_sum)) {
  448. put_bh(bh);
  449. break;
  450. }
  451. put_bh(bh);
  452. continue;
  453. }
  454. next_log_block += count_tags(journal, bh);
  455. wrap(journal, next_log_block);
  456. put_bh(bh);
  457. continue;
  458. }
  459. /* A descriptor block: we can now write all of
  460. * the data blocks. Yay, useful work is finally
  461. * getting done here! */
  462. tagp = &bh->b_data[sizeof(journal_header_t)];
  463. while ((tagp - bh->b_data + tag_bytes)
  464. <= journal->j_blocksize - descr_csum_size) {
  465. unsigned long io_block;
  466. tag = (journal_block_tag_t *) tagp;
  467. flags = be16_to_cpu(tag->t_flags);
  468. io_block = next_log_block++;
  469. wrap(journal, next_log_block);
  470. err = jread(&obh, journal, io_block);
  471. if (err) {
  472. /* Recover what we can, but
  473. * report failure at the end. */
  474. success = err;
  475. printk(KERN_ERR
  476. "JBD2: IO error %d recovering "
  477. "block %ld in log\n",
  478. err, io_block);
  479. } else {
  480. unsigned long long blocknr;
  481. J_ASSERT(obh != NULL);
  482. blocknr = read_tag_block(journal,
  483. tag);
  484. /* If the block has been
  485. * revoked, then we're all done
  486. * here. */
  487. if (jbd2_journal_test_revoke
  488. (journal, blocknr,
  489. next_commit_ID)) {
  490. brelse(obh);
  491. ++info->nr_revoke_hits;
  492. goto skip_write;
  493. }
  494. /* Look for block corruption */
  495. if (!jbd2_block_tag_csum_verify(
  496. journal, tag, obh->b_data,
  497. be32_to_cpu(tmp->h_sequence))) {
  498. brelse(obh);
  499. success = -EFSBADCRC;
  500. printk(KERN_ERR "JBD2: Invalid "
  501. "checksum recovering "
  502. "data block %llu in "
  503. "log\n", blocknr);
  504. block_error = 1;
  505. goto skip_write;
  506. }
  507. /* Find a buffer for the new
  508. * data being restored */
  509. nbh = __getblk(journal->j_fs_dev,
  510. blocknr,
  511. journal->j_blocksize);
  512. if (nbh == NULL) {
  513. printk(KERN_ERR
  514. "JBD2: Out of memory "
  515. "during recovery.\n");
  516. err = -ENOMEM;
  517. brelse(bh);
  518. brelse(obh);
  519. goto failed;
  520. }
  521. lock_buffer(nbh);
  522. memcpy(nbh->b_data, obh->b_data,
  523. journal->j_blocksize);
  524. if (flags & JBD2_FLAG_ESCAPE) {
  525. *((__be32 *)nbh->b_data) =
  526. cpu_to_be32(JBD2_MAGIC_NUMBER);
  527. }
  528. BUFFER_TRACE(nbh, "marking dirty");
  529. set_buffer_uptodate(nbh);
  530. mark_buffer_dirty(nbh);
  531. BUFFER_TRACE(nbh, "marking uptodate");
  532. ++info->nr_replays;
  533. /* ll_rw_block(WRITE, 1, &nbh); */
  534. unlock_buffer(nbh);
  535. brelse(obh);
  536. brelse(nbh);
  537. }
  538. skip_write:
  539. tagp += tag_bytes;
  540. if (!(flags & JBD2_FLAG_SAME_UUID))
  541. tagp += 16;
  542. if (flags & JBD2_FLAG_LAST_TAG)
  543. break;
  544. }
  545. brelse(bh);
  546. continue;
  547. case JBD2_COMMIT_BLOCK:
  548. /* How to differentiate between interrupted commit
  549. * and journal corruption ?
  550. *
  551. * {nth transaction}
  552. * Checksum Verification Failed
  553. * |
  554. * ____________________
  555. * | |
  556. * async_commit sync_commit
  557. * | |
  558. * | GO TO NEXT "Journal Corruption"
  559. * | TRANSACTION
  560. * |
  561. * {(n+1)th transanction}
  562. * |
  563. * _______|______________
  564. * | |
  565. * Commit block found Commit block not found
  566. * | |
  567. * "Journal Corruption" |
  568. * _____________|_________
  569. * | |
  570. * nth trans corrupt OR nth trans
  571. * and (n+1)th interrupted interrupted
  572. * before commit block
  573. * could reach the disk.
  574. * (Cannot find the difference in above
  575. * mentioned conditions. Hence assume
  576. * "Interrupted Commit".)
  577. */
  578. /* Found an expected commit block: if checksums
  579. * are present verify them in PASS_SCAN; else not
  580. * much to do other than move on to the next sequence
  581. * number. */
  582. if (pass == PASS_SCAN &&
  583. jbd2_has_feature_checksum(journal)) {
  584. int chksum_err, chksum_seen;
  585. struct commit_header *cbh =
  586. (struct commit_header *)bh->b_data;
  587. unsigned found_chksum =
  588. be32_to_cpu(cbh->h_chksum[0]);
  589. chksum_err = chksum_seen = 0;
  590. if (info->end_transaction) {
  591. journal->j_failed_commit =
  592. info->end_transaction;
  593. brelse(bh);
  594. break;
  595. }
  596. if (crc32_sum == found_chksum &&
  597. cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
  598. cbh->h_chksum_size ==
  599. JBD2_CRC32_CHKSUM_SIZE)
  600. chksum_seen = 1;
  601. else if (!(cbh->h_chksum_type == 0 &&
  602. cbh->h_chksum_size == 0 &&
  603. found_chksum == 0 &&
  604. !chksum_seen))
  605. /*
  606. * If fs is mounted using an old kernel and then
  607. * kernel with journal_chksum is used then we
  608. * get a situation where the journal flag has
  609. * checksum flag set but checksums are not
  610. * present i.e chksum = 0, in the individual
  611. * commit blocks.
  612. * Hence to avoid checksum failures, in this
  613. * situation, this extra check is added.
  614. */
  615. chksum_err = 1;
  616. if (chksum_err) {
  617. info->end_transaction = next_commit_ID;
  618. if (!jbd2_has_feature_async_commit(journal)) {
  619. journal->j_failed_commit =
  620. next_commit_ID;
  621. brelse(bh);
  622. break;
  623. }
  624. }
  625. crc32_sum = ~0;
  626. }
  627. if (pass == PASS_SCAN &&
  628. !jbd2_commit_block_csum_verify(journal,
  629. bh->b_data)) {
  630. info->end_transaction = next_commit_ID;
  631. if (!jbd2_has_feature_async_commit(journal)) {
  632. journal->j_failed_commit =
  633. next_commit_ID;
  634. brelse(bh);
  635. break;
  636. }
  637. }
  638. brelse(bh);
  639. next_commit_ID++;
  640. continue;
  641. case JBD2_REVOKE_BLOCK:
  642. /* If we aren't in the REVOKE pass, then we can
  643. * just skip over this block. */
  644. if (pass != PASS_REVOKE) {
  645. brelse(bh);
  646. continue;
  647. }
  648. err = scan_revoke_records(journal, bh,
  649. next_commit_ID, info);
  650. brelse(bh);
  651. if (err)
  652. goto failed;
  653. continue;
  654. default:
  655. jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
  656. blocktype);
  657. brelse(bh);
  658. goto done;
  659. }
  660. }
  661. done:
  662. /*
  663. * We broke out of the log scan loop: either we came to the
  664. * known end of the log or we found an unexpected block in the
  665. * log. If the latter happened, then we know that the "current"
  666. * transaction marks the end of the valid log.
  667. */
  668. if (pass == PASS_SCAN) {
  669. if (!info->end_transaction)
  670. info->end_transaction = next_commit_ID;
  671. } else {
  672. /* It's really bad news if different passes end up at
  673. * different places (but possible due to IO errors). */
  674. if (info->end_transaction != next_commit_ID) {
  675. printk(KERN_ERR "JBD2: recovery pass %d ended at "
  676. "transaction %u, expected %u\n",
  677. pass, next_commit_ID, info->end_transaction);
  678. if (!success)
  679. success = -EIO;
  680. }
  681. }
  682. if (block_error && success == 0)
  683. success = -EIO;
  684. return success;
  685. failed:
  686. return err;
  687. }
  688. /* Scan a revoke record, marking all blocks mentioned as revoked. */
  689. static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
  690. tid_t sequence, struct recovery_info *info)
  691. {
  692. jbd2_journal_revoke_header_t *header;
  693. int offset, max;
  694. int csum_size = 0;
  695. __u32 rcount;
  696. int record_len = 4;
  697. header = (jbd2_journal_revoke_header_t *) bh->b_data;
  698. offset = sizeof(jbd2_journal_revoke_header_t);
  699. rcount = be32_to_cpu(header->r_count);
  700. if (!jbd2_descriptor_block_csum_verify(journal, header))
  701. return -EFSBADCRC;
  702. if (jbd2_journal_has_csum_v2or3(journal))
  703. csum_size = sizeof(struct jbd2_journal_block_tail);
  704. if (rcount > journal->j_blocksize - csum_size)
  705. return -EINVAL;
  706. max = rcount;
  707. if (jbd2_has_feature_64bit(journal))
  708. record_len = 8;
  709. while (offset + record_len <= max) {
  710. unsigned long long blocknr;
  711. int err;
  712. if (record_len == 4)
  713. blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
  714. else
  715. blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
  716. offset += record_len;
  717. err = jbd2_journal_set_revoke(journal, blocknr, sequence);
  718. if (err)
  719. return err;
  720. ++info->nr_revokes;
  721. }
  722. return 0;
  723. }