iscan.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <djwong@kernel.org>
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_shared.h"
  9. #include "xfs_format.h"
  10. #include "xfs_trans_resv.h"
  11. #include "xfs_mount.h"
  12. #include "xfs_log_format.h"
  13. #include "xfs_trans.h"
  14. #include "xfs_inode.h"
  15. #include "xfs_btree.h"
  16. #include "xfs_ialloc.h"
  17. #include "xfs_ialloc_btree.h"
  18. #include "xfs_ag.h"
  19. #include "xfs_error.h"
  20. #include "xfs_bit.h"
  21. #include "xfs_icache.h"
  22. #include "scrub/scrub.h"
  23. #include "scrub/iscan.h"
  24. #include "scrub/common.h"
  25. #include "scrub/trace.h"
  26. /*
  27. * Live File Scan
  28. * ==============
  29. *
  30. * Live file scans walk every inode in a live filesystem. This is more or
  31. * less like a regular iwalk, except that when we're advancing the scan cursor,
  32. * we must ensure that inodes cannot be added or deleted anywhere between the
  33. * old cursor value and the new cursor value. If we're advancing the cursor
  34. * by one inode, the caller must hold that inode; if we're finding the next
  35. * inode to scan, we must grab the AGI and hold it until we've updated the
  36. * scan cursor.
  37. *
  38. * Callers are expected to use this code to scan all files in the filesystem to
  39. * construct a new metadata index of some kind. The scan races against other
  40. * live updates, which means there must be a provision to update the new index
  41. * when updates are made to inodes that already been scanned. The iscan lock
  42. * can be used in live update hook code to stop the scan and protect this data
  43. * structure.
  44. *
  45. * To keep the new index up to date with other metadata updates being made to
  46. * the live filesystem, it is assumed that the caller will add hooks as needed
  47. * to be notified when a metadata update occurs. The inode scanner must tell
  48. * the hook code when an inode has been visited with xchk_iscan_mark_visit.
  49. * Hook functions can use xchk_iscan_want_live_update to decide if the
  50. * scanner's observations must be updated.
  51. */
  52. /*
  53. * If the inobt record @rec covers @iscan->skip_ino, mark the inode free so
  54. * that the scan ignores that inode.
  55. */
  56. STATIC void
  57. xchk_iscan_mask_skipino(
  58. struct xchk_iscan *iscan,
  59. struct xfs_perag *pag,
  60. struct xfs_inobt_rec_incore *rec,
  61. xfs_agino_t lastrecino)
  62. {
  63. struct xfs_scrub *sc = iscan->sc;
  64. struct xfs_mount *mp = sc->mp;
  65. xfs_agnumber_t skip_agno = XFS_INO_TO_AGNO(mp, iscan->skip_ino);
  66. xfs_agnumber_t skip_agino = XFS_INO_TO_AGINO(mp, iscan->skip_ino);
  67. if (pag->pag_agno != skip_agno)
  68. return;
  69. if (skip_agino < rec->ir_startino)
  70. return;
  71. if (skip_agino > lastrecino)
  72. return;
  73. rec->ir_free |= xfs_inobt_maskn(skip_agino - rec->ir_startino, 1);
  74. }
  75. /*
  76. * Set *cursor to the next allocated inode after whatever it's set to now.
  77. * If there are no more inodes in this AG, cursor is set to NULLAGINO.
  78. */
  79. STATIC int
  80. xchk_iscan_find_next(
  81. struct xchk_iscan *iscan,
  82. struct xfs_buf *agi_bp,
  83. struct xfs_perag *pag,
  84. xfs_inofree_t *allocmaskp,
  85. xfs_agino_t *cursor,
  86. uint8_t *nr_inodesp)
  87. {
  88. struct xfs_scrub *sc = iscan->sc;
  89. struct xfs_inobt_rec_incore rec;
  90. struct xfs_btree_cur *cur;
  91. struct xfs_mount *mp = sc->mp;
  92. struct xfs_trans *tp = sc->tp;
  93. xfs_agnumber_t agno = pag->pag_agno;
  94. xfs_agino_t lastino = NULLAGINO;
  95. xfs_agino_t first, last;
  96. xfs_agino_t agino = *cursor;
  97. int has_rec;
  98. int error;
  99. /* If the cursor is beyond the end of this AG, move to the next one. */
  100. xfs_agino_range(mp, agno, &first, &last);
  101. if (agino > last) {
  102. *cursor = NULLAGINO;
  103. return 0;
  104. }
  105. /*
  106. * Look up the inode chunk for the current cursor position. If there
  107. * is no chunk here, we want the next one.
  108. */
  109. cur = xfs_inobt_init_cursor(pag, tp, agi_bp);
  110. error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_rec);
  111. if (!error && !has_rec)
  112. error = xfs_btree_increment(cur, 0, &has_rec);
  113. for (; !error; error = xfs_btree_increment(cur, 0, &has_rec)) {
  114. xfs_inofree_t allocmask;
  115. /*
  116. * If we've run out of inobt records in this AG, move the
  117. * cursor on to the next AG and exit. The caller can try
  118. * again with the next AG.
  119. */
  120. if (!has_rec) {
  121. *cursor = NULLAGINO;
  122. break;
  123. }
  124. error = xfs_inobt_get_rec(cur, &rec, &has_rec);
  125. if (error)
  126. break;
  127. if (!has_rec) {
  128. error = -EFSCORRUPTED;
  129. break;
  130. }
  131. /* Make sure that we always move forward. */
  132. if (lastino != NULLAGINO &&
  133. XFS_IS_CORRUPT(mp, lastino >= rec.ir_startino)) {
  134. error = -EFSCORRUPTED;
  135. break;
  136. }
  137. lastino = rec.ir_startino + XFS_INODES_PER_CHUNK - 1;
  138. /*
  139. * If this record only covers inodes that come before the
  140. * cursor, advance to the next record.
  141. */
  142. if (rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
  143. continue;
  144. if (iscan->skip_ino)
  145. xchk_iscan_mask_skipino(iscan, pag, &rec, lastino);
  146. /*
  147. * If the incoming lookup put us in the middle of an inobt
  148. * record, mark it and the previous inodes "free" so that the
  149. * search for allocated inodes will start at the cursor.
  150. * We don't care about ir_freecount here.
  151. */
  152. if (agino >= rec.ir_startino)
  153. rec.ir_free |= xfs_inobt_maskn(0,
  154. agino + 1 - rec.ir_startino);
  155. /*
  156. * If there are allocated inodes in this chunk, find them
  157. * and update the scan cursor.
  158. */
  159. allocmask = ~rec.ir_free;
  160. if (hweight64(allocmask) > 0) {
  161. int next = xfs_lowbit64(allocmask);
  162. ASSERT(next >= 0);
  163. *cursor = rec.ir_startino + next;
  164. *allocmaskp = allocmask >> next;
  165. *nr_inodesp = XFS_INODES_PER_CHUNK - next;
  166. break;
  167. }
  168. }
  169. xfs_btree_del_cursor(cur, error);
  170. return error;
  171. }
  172. /*
  173. * Advance both the scan and the visited cursors.
  174. *
  175. * The inumber address space for a given filesystem is sparse, which means that
  176. * the scan cursor can jump a long ways in a single iter() call. There are no
  177. * inodes in these sparse areas, so we must move the visited cursor forward at
  178. * the same time so that the scan user can receive live updates for inodes that
  179. * may get created once we release the AGI buffer.
  180. */
  181. static inline void
  182. xchk_iscan_move_cursor(
  183. struct xchk_iscan *iscan,
  184. xfs_agnumber_t agno,
  185. xfs_agino_t agino)
  186. {
  187. struct xfs_scrub *sc = iscan->sc;
  188. struct xfs_mount *mp = sc->mp;
  189. xfs_ino_t cursor, visited;
  190. BUILD_BUG_ON(XFS_MAXINUMBER == NULLFSINO);
  191. /*
  192. * Special-case ino == 0 here so that we never set visited_ino to
  193. * NULLFSINO when wrapping around EOFS, for that will let through all
  194. * live updates.
  195. */
  196. cursor = XFS_AGINO_TO_INO(mp, agno, agino);
  197. if (cursor == 0)
  198. visited = XFS_MAXINUMBER;
  199. else
  200. visited = cursor - 1;
  201. mutex_lock(&iscan->lock);
  202. iscan->cursor_ino = cursor;
  203. iscan->__visited_ino = visited;
  204. trace_xchk_iscan_move_cursor(iscan);
  205. mutex_unlock(&iscan->lock);
  206. }
  207. /*
  208. * Prepare to return agno/agino to the iscan caller by moving the lastino
  209. * cursor to the previous inode. Do this while we still hold the AGI so that
  210. * no other threads can create or delete inodes in this AG.
  211. */
  212. static inline void
  213. xchk_iscan_finish(
  214. struct xchk_iscan *iscan)
  215. {
  216. mutex_lock(&iscan->lock);
  217. iscan->cursor_ino = NULLFSINO;
  218. /* All live updates will be applied from now on */
  219. iscan->__visited_ino = NULLFSINO;
  220. mutex_unlock(&iscan->lock);
  221. }
  222. /* Mark an inode scan finished before we actually scan anything. */
  223. void
  224. xchk_iscan_finish_early(
  225. struct xchk_iscan *iscan)
  226. {
  227. ASSERT(iscan->cursor_ino == iscan->scan_start_ino);
  228. ASSERT(iscan->__visited_ino == iscan->scan_start_ino);
  229. xchk_iscan_finish(iscan);
  230. }
  231. /*
  232. * Grab the AGI to advance the inode scan. Returns 0 if *agi_bpp is now set,
  233. * -ECANCELED if the live scan aborted, -EBUSY if the AGI could not be grabbed,
  234. * or the usual negative errno.
  235. */
  236. STATIC int
  237. xchk_iscan_read_agi(
  238. struct xchk_iscan *iscan,
  239. struct xfs_perag *pag,
  240. struct xfs_buf **agi_bpp)
  241. {
  242. struct xfs_scrub *sc = iscan->sc;
  243. unsigned long relax;
  244. int ret;
  245. if (!xchk_iscan_agi_needs_trylock(iscan))
  246. return xfs_ialloc_read_agi(pag, sc->tp, 0, agi_bpp);
  247. relax = msecs_to_jiffies(iscan->iget_retry_delay);
  248. do {
  249. ret = xfs_ialloc_read_agi(pag, sc->tp, XFS_IALLOC_FLAG_TRYLOCK,
  250. agi_bpp);
  251. if (ret != -EAGAIN)
  252. return ret;
  253. if (!iscan->iget_timeout ||
  254. time_is_before_jiffies(iscan->__iget_deadline))
  255. return -EBUSY;
  256. trace_xchk_iscan_agi_retry_wait(iscan);
  257. } while (!schedule_timeout_killable(relax) &&
  258. !xchk_iscan_aborted(iscan));
  259. return -ECANCELED;
  260. }
  261. /*
  262. * Advance ino to the next inode that the inobt thinks is allocated, being
  263. * careful to jump to the next AG if we've reached the right end of this AG's
  264. * inode btree. Advancing ino effectively means that we've pushed the inode
  265. * scan forward, so set the iscan cursor to (ino - 1) so that our live update
  266. * predicates will track inode allocations in that part of the inode number
  267. * key space once we release the AGI buffer.
  268. *
  269. * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes,
  270. * -ECANCELED if the live scan aborted, or the usual negative errno.
  271. */
  272. STATIC int
  273. xchk_iscan_advance(
  274. struct xchk_iscan *iscan,
  275. struct xfs_perag **pagp,
  276. struct xfs_buf **agi_bpp,
  277. xfs_inofree_t *allocmaskp,
  278. uint8_t *nr_inodesp)
  279. {
  280. struct xfs_scrub *sc = iscan->sc;
  281. struct xfs_mount *mp = sc->mp;
  282. struct xfs_buf *agi_bp;
  283. struct xfs_perag *pag;
  284. xfs_agnumber_t agno;
  285. xfs_agino_t agino;
  286. int ret;
  287. ASSERT(iscan->cursor_ino >= iscan->__visited_ino);
  288. do {
  289. if (xchk_iscan_aborted(iscan))
  290. return -ECANCELED;
  291. agno = XFS_INO_TO_AGNO(mp, iscan->cursor_ino);
  292. pag = xfs_perag_get(mp, agno);
  293. if (!pag)
  294. return -ECANCELED;
  295. ret = xchk_iscan_read_agi(iscan, pag, &agi_bp);
  296. if (ret)
  297. goto out_pag;
  298. agino = XFS_INO_TO_AGINO(mp, iscan->cursor_ino);
  299. ret = xchk_iscan_find_next(iscan, agi_bp, pag, allocmaskp,
  300. &agino, nr_inodesp);
  301. if (ret)
  302. goto out_buf;
  303. if (agino != NULLAGINO) {
  304. /*
  305. * Found the next inode in this AG, so return it along
  306. * with the AGI buffer and the perag structure to
  307. * ensure it cannot go away.
  308. */
  309. xchk_iscan_move_cursor(iscan, agno, agino);
  310. *agi_bpp = agi_bp;
  311. *pagp = pag;
  312. return 1;
  313. }
  314. /*
  315. * Did not find any more inodes in this AG, move on to the next
  316. * AG.
  317. */
  318. agno = (agno + 1) % mp->m_sb.sb_agcount;
  319. xchk_iscan_move_cursor(iscan, agno, 0);
  320. xfs_trans_brelse(sc->tp, agi_bp);
  321. xfs_perag_put(pag);
  322. trace_xchk_iscan_advance_ag(iscan);
  323. } while (iscan->cursor_ino != iscan->scan_start_ino);
  324. xchk_iscan_finish(iscan);
  325. return 0;
  326. out_buf:
  327. xfs_trans_brelse(sc->tp, agi_bp);
  328. out_pag:
  329. xfs_perag_put(pag);
  330. return ret;
  331. }
  332. /*
  333. * Grabbing the inode failed, so we need to back up the scan and ask the caller
  334. * to try to _advance the scan again. Returns -EBUSY if we've run out of retry
  335. * opportunities, -ECANCELED if the process has a fatal signal pending, or
  336. * -EAGAIN if we should try again.
  337. */
  338. STATIC int
  339. xchk_iscan_iget_retry(
  340. struct xchk_iscan *iscan,
  341. bool wait)
  342. {
  343. ASSERT(iscan->cursor_ino == iscan->__visited_ino + 1);
  344. if (!iscan->iget_timeout ||
  345. time_is_before_jiffies(iscan->__iget_deadline))
  346. return -EBUSY;
  347. if (wait) {
  348. unsigned long relax;
  349. /*
  350. * Sleep for a period of time to let the rest of the system
  351. * catch up. If we return early, someone sent a kill signal to
  352. * the calling process.
  353. */
  354. relax = msecs_to_jiffies(iscan->iget_retry_delay);
  355. trace_xchk_iscan_iget_retry_wait(iscan);
  356. if (schedule_timeout_killable(relax) ||
  357. xchk_iscan_aborted(iscan))
  358. return -ECANCELED;
  359. }
  360. iscan->cursor_ino--;
  361. return -EAGAIN;
  362. }
  363. /*
  364. * For an inode scan, we hold the AGI and want to try to grab a batch of
  365. * inodes. Holding the AGI prevents inodegc from clearing freed inodes,
  366. * so we must use noretry here. For every inode after the first one in the
  367. * batch, we don't want to wait, so we use retry there too. Finally, use
  368. * dontcache to avoid polluting the cache.
  369. */
  370. #define ISCAN_IGET_FLAGS (XFS_IGET_NORETRY | XFS_IGET_DONTCACHE)
  371. /*
  372. * Grab an inode as part of an inode scan. While scanning this inode, the
  373. * caller must ensure that no other threads can modify the inode until a call
  374. * to xchk_iscan_visit succeeds.
  375. *
  376. * Returns the number of incore inodes grabbed; -EAGAIN if the caller should
  377. * call again xchk_iscan_advance; -EBUSY if we couldn't grab an inode;
  378. * -ECANCELED if there's a fatal signal pending; or some other negative errno.
  379. */
  380. STATIC int
  381. xchk_iscan_iget(
  382. struct xchk_iscan *iscan,
  383. struct xfs_perag *pag,
  384. struct xfs_buf *agi_bp,
  385. xfs_inofree_t allocmask,
  386. uint8_t nr_inodes)
  387. {
  388. struct xfs_scrub *sc = iscan->sc;
  389. struct xfs_mount *mp = sc->mp;
  390. xfs_ino_t ino = iscan->cursor_ino;
  391. unsigned int idx = 0;
  392. unsigned int i;
  393. int error;
  394. ASSERT(iscan->__inodes[0] == NULL);
  395. /* Fill the first slot in the inode array. */
  396. error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0,
  397. &iscan->__inodes[idx]);
  398. trace_xchk_iscan_iget(iscan, error);
  399. if (error == -ENOENT || error == -EAGAIN) {
  400. xfs_trans_brelse(sc->tp, agi_bp);
  401. xfs_perag_put(pag);
  402. /*
  403. * It's possible that this inode has lost all of its links but
  404. * hasn't yet been inactivated. If we don't have a transaction
  405. * or it's not writable, flush the inodegc workers and wait.
  406. * If we have a non-empty transaction, we must not block on
  407. * inodegc, which allocates its own transactions.
  408. */
  409. if (sc->tp && !(sc->tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
  410. xfs_inodegc_push(mp);
  411. else
  412. xfs_inodegc_flush(mp);
  413. return xchk_iscan_iget_retry(iscan, true);
  414. }
  415. if (error == -EINVAL) {
  416. xfs_trans_brelse(sc->tp, agi_bp);
  417. xfs_perag_put(pag);
  418. /*
  419. * We thought the inode was allocated, but the inode btree
  420. * lookup failed, which means that it was freed since the last
  421. * time we advanced the cursor. Back up and try again. This
  422. * should never happen since still hold the AGI buffer from the
  423. * inobt check, but we need to be careful about infinite loops.
  424. */
  425. return xchk_iscan_iget_retry(iscan, false);
  426. }
  427. if (error) {
  428. xfs_trans_brelse(sc->tp, agi_bp);
  429. xfs_perag_put(pag);
  430. return error;
  431. }
  432. idx++;
  433. ino++;
  434. allocmask >>= 1;
  435. /*
  436. * Now that we've filled the first slot in __inodes, try to fill the
  437. * rest of the batch with consecutively ordered inodes. to reduce the
  438. * number of _iter calls. Make a bitmap of unallocated inodes from the
  439. * zeroes in the inuse bitmap; these inodes will not be scanned, but
  440. * the _want_live_update predicate will pass through all live updates.
  441. *
  442. * If we can't iget an allocated inode, stop and return what we have.
  443. */
  444. mutex_lock(&iscan->lock);
  445. iscan->__batch_ino = ino - 1;
  446. iscan->__skipped_inomask = 0;
  447. mutex_unlock(&iscan->lock);
  448. for (i = 1; i < nr_inodes; i++, ino++, allocmask >>= 1) {
  449. if (!(allocmask & 1)) {
  450. ASSERT(!(iscan->__skipped_inomask & (1ULL << i)));
  451. mutex_lock(&iscan->lock);
  452. iscan->cursor_ino = ino;
  453. iscan->__skipped_inomask |= (1ULL << i);
  454. mutex_unlock(&iscan->lock);
  455. continue;
  456. }
  457. ASSERT(iscan->__inodes[idx] == NULL);
  458. error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0,
  459. &iscan->__inodes[idx]);
  460. if (error)
  461. break;
  462. mutex_lock(&iscan->lock);
  463. iscan->cursor_ino = ino;
  464. mutex_unlock(&iscan->lock);
  465. idx++;
  466. }
  467. trace_xchk_iscan_iget_batch(sc->mp, iscan, nr_inodes, idx);
  468. xfs_trans_brelse(sc->tp, agi_bp);
  469. xfs_perag_put(pag);
  470. return idx;
  471. }
  472. /*
  473. * Advance the visit cursor to reflect skipped inodes beyond whatever we
  474. * scanned.
  475. */
  476. STATIC void
  477. xchk_iscan_finish_batch(
  478. struct xchk_iscan *iscan)
  479. {
  480. xfs_ino_t highest_skipped;
  481. mutex_lock(&iscan->lock);
  482. if (iscan->__batch_ino != NULLFSINO) {
  483. highest_skipped = iscan->__batch_ino +
  484. xfs_highbit64(iscan->__skipped_inomask);
  485. iscan->__visited_ino = max(iscan->__visited_ino,
  486. highest_skipped);
  487. trace_xchk_iscan_skip(iscan);
  488. }
  489. iscan->__batch_ino = NULLFSINO;
  490. iscan->__skipped_inomask = 0;
  491. mutex_unlock(&iscan->lock);
  492. }
  493. /*
  494. * Advance the inode scan cursor to the next allocated inode and return up to
  495. * 64 consecutive allocated inodes starting with the cursor position.
  496. */
  497. STATIC int
  498. xchk_iscan_iter_batch(
  499. struct xchk_iscan *iscan)
  500. {
  501. struct xfs_scrub *sc = iscan->sc;
  502. int ret;
  503. xchk_iscan_finish_batch(iscan);
  504. if (iscan->iget_timeout)
  505. iscan->__iget_deadline = jiffies +
  506. msecs_to_jiffies(iscan->iget_timeout);
  507. do {
  508. struct xfs_buf *agi_bp = NULL;
  509. struct xfs_perag *pag = NULL;
  510. xfs_inofree_t allocmask = 0;
  511. uint8_t nr_inodes = 0;
  512. ret = xchk_iscan_advance(iscan, &pag, &agi_bp, &allocmask,
  513. &nr_inodes);
  514. if (ret != 1)
  515. return ret;
  516. if (xchk_iscan_aborted(iscan)) {
  517. xfs_trans_brelse(sc->tp, agi_bp);
  518. xfs_perag_put(pag);
  519. ret = -ECANCELED;
  520. break;
  521. }
  522. ret = xchk_iscan_iget(iscan, pag, agi_bp, allocmask, nr_inodes);
  523. } while (ret == -EAGAIN);
  524. return ret;
  525. }
  526. /*
  527. * Advance the inode scan cursor to the next allocated inode and return the
  528. * incore inode structure associated with it.
  529. *
  530. * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes,
  531. * -ECANCELED if the live scan aborted, -EBUSY if the incore inode could not be
  532. * grabbed, or the usual negative errno.
  533. *
  534. * If the function returns -EBUSY and the caller can handle skipping an inode,
  535. * it may call this function again to continue the scan with the next allocated
  536. * inode.
  537. */
  538. int
  539. xchk_iscan_iter(
  540. struct xchk_iscan *iscan,
  541. struct xfs_inode **ipp)
  542. {
  543. unsigned int i;
  544. int error;
  545. /* Find a cached inode, or go get another batch. */
  546. for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
  547. if (iscan->__inodes[i])
  548. goto foundit;
  549. }
  550. error = xchk_iscan_iter_batch(iscan);
  551. if (error <= 0)
  552. return error;
  553. ASSERT(iscan->__inodes[0] != NULL);
  554. i = 0;
  555. foundit:
  556. /* Give the caller our reference. */
  557. *ipp = iscan->__inodes[i];
  558. iscan->__inodes[i] = NULL;
  559. return 1;
  560. }
  561. /* Clean up an xfs_iscan_iter call by dropping any inodes that we still hold. */
  562. void
  563. xchk_iscan_iter_finish(
  564. struct xchk_iscan *iscan)
  565. {
  566. struct xfs_scrub *sc = iscan->sc;
  567. unsigned int i;
  568. for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
  569. if (iscan->__inodes[i]) {
  570. xchk_irele(sc, iscan->__inodes[i]);
  571. iscan->__inodes[i] = NULL;
  572. }
  573. }
  574. }
  575. /* Mark this inode scan finished and release resources. */
  576. void
  577. xchk_iscan_teardown(
  578. struct xchk_iscan *iscan)
  579. {
  580. xchk_iscan_iter_finish(iscan);
  581. xchk_iscan_finish(iscan);
  582. mutex_destroy(&iscan->lock);
  583. }
  584. /* Pick an AG from which to start a scan. */
  585. static inline xfs_ino_t
  586. xchk_iscan_rotor(
  587. struct xfs_mount *mp)
  588. {
  589. static atomic_t agi_rotor;
  590. unsigned int r = atomic_inc_return(&agi_rotor) - 1;
  591. /*
  592. * Rotoring *backwards* through the AGs, so we add one here before
  593. * subtracting from the agcount to arrive at an AG number.
  594. */
  595. r = (r % mp->m_sb.sb_agcount) + 1;
  596. return XFS_AGINO_TO_INO(mp, mp->m_sb.sb_agcount - r, 0);
  597. }
  598. /*
  599. * Set ourselves up to start an inode scan. If the @iget_timeout and
  600. * @iget_retry_delay parameters are set, the scan will try to iget each inode
  601. * for @iget_timeout milliseconds. If an iget call indicates that the inode is
  602. * waiting to be inactivated, the CPU will relax for @iget_retry_delay
  603. * milliseconds after pushing the inactivation workers.
  604. */
  605. void
  606. xchk_iscan_start(
  607. struct xfs_scrub *sc,
  608. unsigned int iget_timeout,
  609. unsigned int iget_retry_delay,
  610. struct xchk_iscan *iscan)
  611. {
  612. xfs_ino_t start_ino;
  613. start_ino = xchk_iscan_rotor(sc->mp);
  614. iscan->__batch_ino = NULLFSINO;
  615. iscan->__skipped_inomask = 0;
  616. iscan->sc = sc;
  617. clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate);
  618. iscan->iget_timeout = iget_timeout;
  619. iscan->iget_retry_delay = iget_retry_delay;
  620. iscan->__visited_ino = start_ino;
  621. iscan->cursor_ino = start_ino;
  622. iscan->scan_start_ino = start_ino;
  623. mutex_init(&iscan->lock);
  624. memset(iscan->__inodes, 0, sizeof(iscan->__inodes));
  625. trace_xchk_iscan_start(iscan, start_ino);
  626. }
  627. /*
  628. * Mark this inode as having been visited. Callers must hold a sufficiently
  629. * exclusive lock on the inode to prevent concurrent modifications.
  630. */
  631. void
  632. xchk_iscan_mark_visited(
  633. struct xchk_iscan *iscan,
  634. struct xfs_inode *ip)
  635. {
  636. mutex_lock(&iscan->lock);
  637. iscan->__visited_ino = ip->i_ino;
  638. trace_xchk_iscan_visit(iscan);
  639. mutex_unlock(&iscan->lock);
  640. }
  641. /*
  642. * Did we skip this inode because it wasn't allocated when we loaded the batch?
  643. * If so, it is newly allocated and will not be scanned. All live updates to
  644. * this inode must be passed to the caller to maintain scan correctness.
  645. */
  646. static inline bool
  647. xchk_iscan_skipped(
  648. const struct xchk_iscan *iscan,
  649. xfs_ino_t ino)
  650. {
  651. if (iscan->__batch_ino == NULLFSINO)
  652. return false;
  653. if (ino < iscan->__batch_ino)
  654. return false;
  655. if (ino >= iscan->__batch_ino + XFS_INODES_PER_CHUNK)
  656. return false;
  657. return iscan->__skipped_inomask & (1ULL << (ino - iscan->__batch_ino));
  658. }
  659. /*
  660. * Do we need a live update for this inode? This is true if the scanner thread
  661. * has visited this inode and the scan hasn't been aborted due to errors.
  662. * Callers must hold a sufficiently exclusive lock on the inode to prevent
  663. * scanners from reading any inode metadata.
  664. */
  665. bool
  666. xchk_iscan_want_live_update(
  667. struct xchk_iscan *iscan,
  668. xfs_ino_t ino)
  669. {
  670. bool ret = false;
  671. if (xchk_iscan_aborted(iscan))
  672. return false;
  673. mutex_lock(&iscan->lock);
  674. trace_xchk_iscan_want_live_update(iscan, ino);
  675. /* Scan is finished, caller should receive all updates. */
  676. if (iscan->__visited_ino == NULLFSINO) {
  677. ret = true;
  678. goto unlock;
  679. }
  680. /*
  681. * No inodes have been visited yet, so the visited cursor points at the
  682. * start of the scan range. The caller should not receive any updates.
  683. */
  684. if (iscan->scan_start_ino == iscan->__visited_ino) {
  685. ret = false;
  686. goto unlock;
  687. }
  688. /*
  689. * This inode was not allocated at the time of the iscan batch.
  690. * The caller should receive all updates.
  691. */
  692. if (xchk_iscan_skipped(iscan, ino)) {
  693. ret = true;
  694. goto unlock;
  695. }
  696. /*
  697. * The visited cursor hasn't yet wrapped around the end of the FS. If
  698. * @ino is inside the starred range, the caller should receive updates:
  699. *
  700. * 0 ------------ S ************ V ------------ EOFS
  701. */
  702. if (iscan->scan_start_ino <= iscan->__visited_ino) {
  703. if (ino >= iscan->scan_start_ino &&
  704. ino <= iscan->__visited_ino)
  705. ret = true;
  706. goto unlock;
  707. }
  708. /*
  709. * The visited cursor wrapped around the end of the FS. If @ino is
  710. * inside the starred range, the caller should receive updates:
  711. *
  712. * 0 ************ V ------------ S ************ EOFS
  713. */
  714. if (ino >= iscan->scan_start_ino || ino <= iscan->__visited_ino)
  715. ret = true;
  716. unlock:
  717. mutex_unlock(&iscan->lock);
  718. return ret;
  719. }