move_extent.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708
  1. // SPDX-License-Identifier: LGPL-2.1
  2. /*
  3. * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
  4. * Written by Takashi Sato <t-sato@yk.jp.nec.com>
  5. * Akira Fujita <a-fujita@rs.jp.nec.com>
  6. */
  7. #include <linux/fs.h>
  8. #include <linux/quotaops.h>
  9. #include <linux/slab.h>
  10. #include <linux/sched/mm.h>
  11. #include "ext4_jbd2.h"
  12. #include "ext4.h"
  13. #include "ext4_extents.h"
  14. /**
  15. * get_ext_path() - Find an extent path for designated logical block number.
  16. * @inode: inode to be searched
  17. * @lblock: logical block number to find an extent path
  18. * @path: pointer to an extent path
  19. *
  20. * ext4_find_extent wrapper. Return an extent path pointer on success,
  21. * or an error pointer on failure.
  22. */
  23. static inline struct ext4_ext_path *
  24. get_ext_path(struct inode *inode, ext4_lblk_t lblock,
  25. struct ext4_ext_path *path)
  26. {
  27. path = ext4_find_extent(inode, lblock, path, EXT4_EX_NOCACHE);
  28. if (IS_ERR(path))
  29. return path;
  30. if (path[ext_depth(inode)].p_ext == NULL) {
  31. ext4_free_ext_path(path);
  32. return ERR_PTR(-ENODATA);
  33. }
  34. return path;
  35. }
  36. /**
  37. * ext4_double_down_write_data_sem() - write lock two inodes's i_data_sem
  38. * @first: inode to be locked
  39. * @second: inode to be locked
  40. *
  41. * Acquire write lock of i_data_sem of the two inodes
  42. */
  43. void
  44. ext4_double_down_write_data_sem(struct inode *first, struct inode *second)
  45. {
  46. if (first < second) {
  47. down_write(&EXT4_I(first)->i_data_sem);
  48. down_write_nested(&EXT4_I(second)->i_data_sem, I_DATA_SEM_OTHER);
  49. } else {
  50. down_write(&EXT4_I(second)->i_data_sem);
  51. down_write_nested(&EXT4_I(first)->i_data_sem, I_DATA_SEM_OTHER);
  52. }
  53. }
  54. /**
  55. * ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem
  56. *
  57. * @orig_inode: original inode structure to be released its lock first
  58. * @donor_inode: donor inode structure to be released its lock second
  59. * Release write lock of i_data_sem of two inodes (orig and donor).
  60. */
  61. void
  62. ext4_double_up_write_data_sem(struct inode *orig_inode,
  63. struct inode *donor_inode)
  64. {
  65. up_write(&EXT4_I(orig_inode)->i_data_sem);
  66. up_write(&EXT4_I(donor_inode)->i_data_sem);
  67. }
  68. /**
  69. * mext_check_coverage - Check that all extents in range has the same type
  70. *
  71. * @inode: inode in question
  72. * @from: block offset of inode
  73. * @count: block count to be checked
  74. * @unwritten: extents expected to be unwritten
  75. * @err: pointer to save error value
  76. *
  77. * Return 1 if all extents in range has expected type, and zero otherwise.
  78. */
  79. static int
  80. mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
  81. int unwritten, int *err)
  82. {
  83. struct ext4_ext_path *path = NULL;
  84. struct ext4_extent *ext;
  85. int ret = 0;
  86. ext4_lblk_t last = from + count;
  87. while (from < last) {
  88. path = get_ext_path(inode, from, path);
  89. if (IS_ERR(path)) {
  90. *err = PTR_ERR(path);
  91. return ret;
  92. }
  93. ext = path[ext_depth(inode)].p_ext;
  94. if (unwritten != ext4_ext_is_unwritten(ext))
  95. goto out;
  96. from += ext4_ext_get_actual_len(ext);
  97. }
  98. ret = 1;
  99. out:
  100. ext4_free_ext_path(path);
  101. return ret;
  102. }
  103. /**
  104. * mext_folio_double_lock - Grab and lock folio on both @inode1 and @inode2
  105. *
  106. * @inode1: the inode structure
  107. * @inode2: the inode structure
  108. * @index1: folio index
  109. * @index2: folio index
  110. * @folio: result folio vector
  111. *
  112. * Grab two locked folio for inode's by inode order
  113. */
  114. static int
  115. mext_folio_double_lock(struct inode *inode1, struct inode *inode2,
  116. pgoff_t index1, pgoff_t index2, struct folio *folio[2])
  117. {
  118. struct address_space *mapping[2];
  119. unsigned int flags;
  120. BUG_ON(!inode1 || !inode2);
  121. if (inode1 < inode2) {
  122. mapping[0] = inode1->i_mapping;
  123. mapping[1] = inode2->i_mapping;
  124. } else {
  125. swap(index1, index2);
  126. mapping[0] = inode2->i_mapping;
  127. mapping[1] = inode1->i_mapping;
  128. }
  129. flags = memalloc_nofs_save();
  130. folio[0] = __filemap_get_folio(mapping[0], index1, FGP_WRITEBEGIN,
  131. mapping_gfp_mask(mapping[0]));
  132. if (IS_ERR(folio[0])) {
  133. memalloc_nofs_restore(flags);
  134. return PTR_ERR(folio[0]);
  135. }
  136. folio[1] = __filemap_get_folio(mapping[1], index2, FGP_WRITEBEGIN,
  137. mapping_gfp_mask(mapping[1]));
  138. memalloc_nofs_restore(flags);
  139. if (IS_ERR(folio[1])) {
  140. folio_unlock(folio[0]);
  141. folio_put(folio[0]);
  142. return PTR_ERR(folio[1]);
  143. }
  144. /*
  145. * __filemap_get_folio() may not wait on folio's writeback if
  146. * BDI not demand that. But it is reasonable to be very conservative
  147. * here and explicitly wait on folio's writeback
  148. */
  149. folio_wait_writeback(folio[0]);
  150. folio_wait_writeback(folio[1]);
  151. if (inode1 > inode2)
  152. swap(folio[0], folio[1]);
  153. return 0;
  154. }
  155. /* Force folio buffers uptodate w/o dropping folio's lock */
  156. static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to)
  157. {
  158. struct inode *inode = folio->mapping->host;
  159. sector_t block;
  160. struct buffer_head *bh, *head;
  161. unsigned int blocksize, block_start, block_end;
  162. int nr = 0;
  163. bool partial = false;
  164. BUG_ON(!folio_test_locked(folio));
  165. BUG_ON(folio_test_writeback(folio));
  166. if (folio_test_uptodate(folio))
  167. return 0;
  168. blocksize = i_blocksize(inode);
  169. head = folio_buffers(folio);
  170. if (!head)
  171. head = create_empty_buffers(folio, blocksize, 0);
  172. block = folio_pos(folio) >> inode->i_blkbits;
  173. block_end = 0;
  174. bh = head;
  175. do {
  176. block_start = block_end;
  177. block_end = block_start + blocksize;
  178. if (block_end <= from || block_start >= to) {
  179. if (!buffer_uptodate(bh))
  180. partial = true;
  181. continue;
  182. }
  183. if (buffer_uptodate(bh))
  184. continue;
  185. if (!buffer_mapped(bh)) {
  186. int err = ext4_get_block(inode, block, bh, 0);
  187. if (err)
  188. return err;
  189. if (!buffer_mapped(bh)) {
  190. folio_zero_range(folio, block_start, blocksize);
  191. set_buffer_uptodate(bh);
  192. continue;
  193. }
  194. }
  195. lock_buffer(bh);
  196. if (buffer_uptodate(bh)) {
  197. unlock_buffer(bh);
  198. continue;
  199. }
  200. ext4_read_bh_nowait(bh, 0, NULL, false);
  201. nr++;
  202. } while (block++, (bh = bh->b_this_page) != head);
  203. /* No io required */
  204. if (!nr)
  205. goto out;
  206. bh = head;
  207. do {
  208. if (bh_offset(bh) + blocksize <= from)
  209. continue;
  210. if (bh_offset(bh) >= to)
  211. break;
  212. wait_on_buffer(bh);
  213. if (buffer_uptodate(bh))
  214. continue;
  215. return -EIO;
  216. } while ((bh = bh->b_this_page) != head);
  217. out:
  218. if (!partial)
  219. folio_mark_uptodate(folio);
  220. return 0;
  221. }
  222. /**
  223. * move_extent_per_page - Move extent data per page
  224. *
  225. * @o_filp: file structure of original file
  226. * @donor_inode: donor inode
  227. * @orig_page_offset: page index on original file
  228. * @donor_page_offset: page index on donor file
  229. * @data_offset_in_page: block index where data swapping starts
  230. * @block_len_in_page: the number of blocks to be swapped
  231. * @unwritten: orig extent is unwritten or not
  232. * @err: pointer to save return value
  233. *
  234. * Save the data in original inode blocks and replace original inode extents
  235. * with donor inode extents by calling ext4_swap_extents().
  236. * Finally, write out the saved data in new original inode blocks. Return
  237. * replaced block count.
  238. */
  239. static int
  240. move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
  241. pgoff_t orig_page_offset, pgoff_t donor_page_offset,
  242. int data_offset_in_page,
  243. int block_len_in_page, int unwritten, int *err)
  244. {
  245. struct inode *orig_inode = file_inode(o_filp);
  246. struct folio *folio[2] = {NULL, NULL};
  247. handle_t *handle;
  248. ext4_lblk_t orig_blk_offset, donor_blk_offset;
  249. unsigned long blocksize = orig_inode->i_sb->s_blocksize;
  250. unsigned int tmp_data_size, data_size, replaced_size;
  251. int i, err2, jblocks, retries = 0;
  252. int replaced_count = 0;
  253. int from = data_offset_in_page << orig_inode->i_blkbits;
  254. int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
  255. struct super_block *sb = orig_inode->i_sb;
  256. struct buffer_head *bh = NULL;
  257. /*
  258. * It needs twice the amount of ordinary journal buffers because
  259. * inode and donor_inode may change each different metadata blocks.
  260. */
  261. again:
  262. *err = 0;
  263. jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
  264. handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks);
  265. if (IS_ERR(handle)) {
  266. *err = PTR_ERR(handle);
  267. return 0;
  268. }
  269. orig_blk_offset = orig_page_offset * blocks_per_page +
  270. data_offset_in_page;
  271. donor_blk_offset = donor_page_offset * blocks_per_page +
  272. data_offset_in_page;
  273. /* Calculate data_size */
  274. if ((orig_blk_offset + block_len_in_page - 1) ==
  275. ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
  276. /* Replace the last block */
  277. tmp_data_size = orig_inode->i_size & (blocksize - 1);
  278. /*
  279. * If data_size equal zero, it shows data_size is multiples of
  280. * blocksize. So we set appropriate value.
  281. */
  282. if (tmp_data_size == 0)
  283. tmp_data_size = blocksize;
  284. data_size = tmp_data_size +
  285. ((block_len_in_page - 1) << orig_inode->i_blkbits);
  286. } else
  287. data_size = block_len_in_page << orig_inode->i_blkbits;
  288. replaced_size = data_size;
  289. *err = mext_folio_double_lock(orig_inode, donor_inode, orig_page_offset,
  290. donor_page_offset, folio);
  291. if (unlikely(*err < 0))
  292. goto stop_journal;
  293. /*
  294. * If orig extent was unwritten it can become initialized
  295. * at any time after i_data_sem was dropped, in order to
  296. * serialize with delalloc we have recheck extent while we
  297. * hold page's lock, if it is still the case data copy is not
  298. * necessary, just swap data blocks between orig and donor.
  299. */
  300. VM_BUG_ON_FOLIO(folio_test_large(folio[0]), folio[0]);
  301. VM_BUG_ON_FOLIO(folio_test_large(folio[1]), folio[1]);
  302. VM_BUG_ON_FOLIO(folio_nr_pages(folio[0]) != folio_nr_pages(folio[1]), folio[1]);
  303. if (unwritten) {
  304. ext4_double_down_write_data_sem(orig_inode, donor_inode);
  305. /* If any of extents in range became initialized we have to
  306. * fallback to data copying */
  307. unwritten = mext_check_coverage(orig_inode, orig_blk_offset,
  308. block_len_in_page, 1, err);
  309. if (*err)
  310. goto drop_data_sem;
  311. unwritten &= mext_check_coverage(donor_inode, donor_blk_offset,
  312. block_len_in_page, 1, err);
  313. if (*err)
  314. goto drop_data_sem;
  315. if (!unwritten) {
  316. ext4_double_up_write_data_sem(orig_inode, donor_inode);
  317. goto data_copy;
  318. }
  319. if (!filemap_release_folio(folio[0], 0) ||
  320. !filemap_release_folio(folio[1], 0)) {
  321. *err = -EBUSY;
  322. goto drop_data_sem;
  323. }
  324. replaced_count = ext4_swap_extents(handle, orig_inode,
  325. donor_inode, orig_blk_offset,
  326. donor_blk_offset,
  327. block_len_in_page, 1, err);
  328. drop_data_sem:
  329. ext4_double_up_write_data_sem(orig_inode, donor_inode);
  330. goto unlock_folios;
  331. }
  332. data_copy:
  333. *err = mext_page_mkuptodate(folio[0], from, from + replaced_size);
  334. if (*err)
  335. goto unlock_folios;
  336. /* At this point all buffers in range are uptodate, old mapping layout
  337. * is no longer required, try to drop it now. */
  338. if (!filemap_release_folio(folio[0], 0) ||
  339. !filemap_release_folio(folio[1], 0)) {
  340. *err = -EBUSY;
  341. goto unlock_folios;
  342. }
  343. ext4_double_down_write_data_sem(orig_inode, donor_inode);
  344. replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
  345. orig_blk_offset, donor_blk_offset,
  346. block_len_in_page, 1, err);
  347. ext4_double_up_write_data_sem(orig_inode, donor_inode);
  348. if (*err) {
  349. if (replaced_count) {
  350. block_len_in_page = replaced_count;
  351. replaced_size =
  352. block_len_in_page << orig_inode->i_blkbits;
  353. } else
  354. goto unlock_folios;
  355. }
  356. /* Perform all necessary steps similar write_begin()/write_end()
  357. * but keeping in mind that i_size will not change */
  358. bh = folio_buffers(folio[0]);
  359. if (!bh)
  360. bh = create_empty_buffers(folio[0],
  361. 1 << orig_inode->i_blkbits, 0);
  362. for (i = 0; i < data_offset_in_page; i++)
  363. bh = bh->b_this_page;
  364. for (i = 0; i < block_len_in_page; i++) {
  365. *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0);
  366. if (*err < 0)
  367. goto repair_branches;
  368. bh = bh->b_this_page;
  369. }
  370. block_commit_write(&folio[0]->page, from, from + replaced_size);
  371. /* Even in case of data=writeback it is reasonable to pin
  372. * inode to transaction, to prevent unexpected data loss */
  373. *err = ext4_jbd2_inode_add_write(handle, orig_inode,
  374. (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size);
  375. unlock_folios:
  376. folio_unlock(folio[0]);
  377. folio_put(folio[0]);
  378. folio_unlock(folio[1]);
  379. folio_put(folio[1]);
  380. stop_journal:
  381. ext4_journal_stop(handle);
  382. if (*err == -ENOSPC &&
  383. ext4_should_retry_alloc(sb, &retries))
  384. goto again;
  385. /* Buffer was busy because probably is pinned to journal transaction,
  386. * force transaction commit may help to free it. */
  387. if (*err == -EBUSY && retries++ < 4 && EXT4_SB(sb)->s_journal &&
  388. jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal))
  389. goto again;
  390. return replaced_count;
  391. repair_branches:
  392. /*
  393. * This should never ever happen!
  394. * Extents are swapped already, but we are not able to copy data.
  395. * Try to swap extents to it's original places
  396. */
  397. ext4_double_down_write_data_sem(orig_inode, donor_inode);
  398. replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode,
  399. orig_blk_offset, donor_blk_offset,
  400. block_len_in_page, 0, &err2);
  401. ext4_double_up_write_data_sem(orig_inode, donor_inode);
  402. if (replaced_count != block_len_in_page) {
  403. ext4_error_inode_block(orig_inode, (sector_t)(orig_blk_offset),
  404. EIO, "Unable to copy data block,"
  405. " data will be lost.");
  406. *err = -EIO;
  407. }
  408. replaced_count = 0;
  409. goto unlock_folios;
  410. }
  411. /**
  412. * mext_check_arguments - Check whether move extent can be done
  413. *
  414. * @orig_inode: original inode
  415. * @donor_inode: donor inode
  416. * @orig_start: logical start offset in block for orig
  417. * @donor_start: logical start offset in block for donor
  418. * @len: the number of blocks to be moved
  419. *
  420. * Check the arguments of ext4_move_extents() whether the files can be
  421. * exchanged with each other.
  422. * Return 0 on success, or a negative error value on failure.
  423. */
  424. static int
  425. mext_check_arguments(struct inode *orig_inode,
  426. struct inode *donor_inode, __u64 orig_start,
  427. __u64 donor_start, __u64 *len)
  428. {
  429. __u64 orig_eof, donor_eof;
  430. unsigned int blkbits = orig_inode->i_blkbits;
  431. unsigned int blocksize = 1 << blkbits;
  432. orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits;
  433. donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits;
  434. if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
  435. ext4_debug("ext4 move extent: suid or sgid is set"
  436. " to donor file [ino:orig %lu, donor %lu]\n",
  437. orig_inode->i_ino, donor_inode->i_ino);
  438. return -EINVAL;
  439. }
  440. if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
  441. return -EPERM;
  442. /* Ext4 move extent does not support swap files */
  443. if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
  444. ext4_debug("ext4 move extent: The argument files should not be swap files [ino:orig %lu, donor %lu]\n",
  445. orig_inode->i_ino, donor_inode->i_ino);
  446. return -ETXTBSY;
  447. }
  448. if (ext4_is_quota_file(orig_inode) && ext4_is_quota_file(donor_inode)) {
  449. ext4_debug("ext4 move extent: The argument files should not be quota files [ino:orig %lu, donor %lu]\n",
  450. orig_inode->i_ino, donor_inode->i_ino);
  451. return -EOPNOTSUPP;
  452. }
  453. /* Ext4 move extent supports only extent based file */
  454. if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
  455. ext4_debug("ext4 move extent: orig file is not extents "
  456. "based file [ino:orig %lu]\n", orig_inode->i_ino);
  457. return -EOPNOTSUPP;
  458. } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) {
  459. ext4_debug("ext4 move extent: donor file is not extents "
  460. "based file [ino:donor %lu]\n", donor_inode->i_ino);
  461. return -EOPNOTSUPP;
  462. }
  463. if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
  464. ext4_debug("ext4 move extent: File size is 0 byte\n");
  465. return -EINVAL;
  466. }
  467. /* Start offset should be same */
  468. if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
  469. (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
  470. ext4_debug("ext4 move extent: orig and donor's start "
  471. "offsets are not aligned [ino:orig %lu, donor %lu]\n",
  472. orig_inode->i_ino, donor_inode->i_ino);
  473. return -EINVAL;
  474. }
  475. if ((orig_start >= EXT_MAX_BLOCKS) ||
  476. (donor_start >= EXT_MAX_BLOCKS) ||
  477. (*len > EXT_MAX_BLOCKS) ||
  478. (donor_start + *len >= EXT_MAX_BLOCKS) ||
  479. (orig_start + *len >= EXT_MAX_BLOCKS)) {
  480. ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
  481. "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
  482. orig_inode->i_ino, donor_inode->i_ino);
  483. return -EINVAL;
  484. }
  485. if (orig_eof <= orig_start)
  486. *len = 0;
  487. else if (orig_eof < orig_start + *len - 1)
  488. *len = orig_eof - orig_start;
  489. if (donor_eof <= donor_start)
  490. *len = 0;
  491. else if (donor_eof < donor_start + *len - 1)
  492. *len = donor_eof - donor_start;
  493. if (!*len) {
  494. ext4_debug("ext4 move extent: len should not be 0 "
  495. "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
  496. donor_inode->i_ino);
  497. return -EINVAL;
  498. }
  499. return 0;
  500. }
  501. /**
  502. * ext4_move_extents - Exchange the specified range of a file
  503. *
  504. * @o_filp: file structure of the original file
  505. * @d_filp: file structure of the donor file
  506. * @orig_blk: start offset in block for orig
  507. * @donor_blk: start offset in block for donor
  508. * @len: the number of blocks to be moved
  509. * @moved_len: moved block length
  510. *
  511. * This function returns 0 and moved block length is set in moved_len
  512. * if succeed, otherwise returns error value.
  513. *
  514. */
  515. int
  516. ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
  517. __u64 donor_blk, __u64 len, __u64 *moved_len)
  518. {
  519. struct inode *orig_inode = file_inode(o_filp);
  520. struct inode *donor_inode = file_inode(d_filp);
  521. struct ext4_ext_path *path = NULL;
  522. int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
  523. ext4_lblk_t o_end, o_start = orig_blk;
  524. ext4_lblk_t d_start = donor_blk;
  525. int ret;
  526. if (orig_inode->i_sb != donor_inode->i_sb) {
  527. ext4_debug("ext4 move extent: The argument files "
  528. "should be in same FS [ino:orig %lu, donor %lu]\n",
  529. orig_inode->i_ino, donor_inode->i_ino);
  530. return -EINVAL;
  531. }
  532. /* orig and donor should be different inodes */
  533. if (orig_inode == donor_inode) {
  534. ext4_debug("ext4 move extent: The argument files should not "
  535. "be same inode [ino:orig %lu, donor %lu]\n",
  536. orig_inode->i_ino, donor_inode->i_ino);
  537. return -EINVAL;
  538. }
  539. /* Regular file check */
  540. if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
  541. ext4_debug("ext4 move extent: The argument files should be "
  542. "regular file [ino:orig %lu, donor %lu]\n",
  543. orig_inode->i_ino, donor_inode->i_ino);
  544. return -EINVAL;
  545. }
  546. /* TODO: it's not obvious how to swap blocks for inodes with full
  547. journaling enabled */
  548. if (ext4_should_journal_data(orig_inode) ||
  549. ext4_should_journal_data(donor_inode)) {
  550. ext4_msg(orig_inode->i_sb, KERN_ERR,
  551. "Online defrag not supported with data journaling");
  552. return -EOPNOTSUPP;
  553. }
  554. if (IS_ENCRYPTED(orig_inode) || IS_ENCRYPTED(donor_inode)) {
  555. ext4_msg(orig_inode->i_sb, KERN_ERR,
  556. "Online defrag not supported for encrypted files");
  557. return -EOPNOTSUPP;
  558. }
  559. /* Protect orig and donor inodes against a truncate */
  560. lock_two_nondirectories(orig_inode, donor_inode);
  561. /* Wait for all existing dio workers */
  562. inode_dio_wait(orig_inode);
  563. inode_dio_wait(donor_inode);
  564. /* Protect extent tree against block allocations via delalloc */
  565. ext4_double_down_write_data_sem(orig_inode, donor_inode);
  566. /* Check the filesystem environment whether move_extent can be done */
  567. ret = mext_check_arguments(orig_inode, donor_inode, orig_blk,
  568. donor_blk, &len);
  569. if (ret)
  570. goto out;
  571. o_end = o_start + len;
  572. *moved_len = 0;
  573. while (o_start < o_end) {
  574. struct ext4_extent *ex;
  575. ext4_lblk_t cur_blk, next_blk;
  576. pgoff_t orig_page_index, donor_page_index;
  577. int offset_in_page;
  578. int unwritten, cur_len;
  579. path = get_ext_path(orig_inode, o_start, path);
  580. if (IS_ERR(path)) {
  581. ret = PTR_ERR(path);
  582. goto out;
  583. }
  584. ex = path[path->p_depth].p_ext;
  585. cur_blk = le32_to_cpu(ex->ee_block);
  586. cur_len = ext4_ext_get_actual_len(ex);
  587. /* Check hole before the start pos */
  588. if (cur_blk + cur_len - 1 < o_start) {
  589. next_blk = ext4_ext_next_allocated_block(path);
  590. if (next_blk == EXT_MAX_BLOCKS) {
  591. ret = -ENODATA;
  592. goto out;
  593. }
  594. d_start += next_blk - o_start;
  595. o_start = next_blk;
  596. continue;
  597. /* Check hole after the start pos */
  598. } else if (cur_blk > o_start) {
  599. /* Skip hole */
  600. d_start += cur_blk - o_start;
  601. o_start = cur_blk;
  602. /* Extent inside requested range ?*/
  603. if (cur_blk >= o_end)
  604. goto out;
  605. } else { /* in_range(o_start, o_blk, o_len) */
  606. cur_len += cur_blk - o_start;
  607. }
  608. unwritten = ext4_ext_is_unwritten(ex);
  609. if (o_end - o_start < cur_len)
  610. cur_len = o_end - o_start;
  611. orig_page_index = o_start >> (PAGE_SHIFT -
  612. orig_inode->i_blkbits);
  613. donor_page_index = d_start >> (PAGE_SHIFT -
  614. donor_inode->i_blkbits);
  615. offset_in_page = o_start % blocks_per_page;
  616. if (cur_len > blocks_per_page - offset_in_page)
  617. cur_len = blocks_per_page - offset_in_page;
  618. /*
  619. * Up semaphore to avoid following problems:
  620. * a. transaction deadlock among ext4_journal_start,
  621. * ->write_begin via pagefault, and jbd2_journal_commit
  622. * b. racing with ->read_folio, ->write_begin, and
  623. * ext4_get_block in move_extent_per_page
  624. */
  625. ext4_double_up_write_data_sem(orig_inode, donor_inode);
  626. /* Swap original branches with new branches */
  627. *moved_len += move_extent_per_page(o_filp, donor_inode,
  628. orig_page_index, donor_page_index,
  629. offset_in_page, cur_len,
  630. unwritten, &ret);
  631. ext4_double_down_write_data_sem(orig_inode, donor_inode);
  632. if (ret < 0)
  633. break;
  634. o_start += cur_len;
  635. d_start += cur_len;
  636. }
  637. out:
  638. if (*moved_len) {
  639. ext4_discard_preallocations(orig_inode);
  640. ext4_discard_preallocations(donor_inode);
  641. }
  642. ext4_free_ext_path(path);
  643. ext4_double_up_write_data_sem(orig_inode, donor_inode);
  644. unlock_two_nondirectories(orig_inode, donor_inode);
  645. return ret;
  646. }