| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928 |
- // SPDX-License-Identifier: GPL-2.0-or-later
- /*
- * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
- * Author: Darrick J. Wong <djwong@kernel.org>
- */
- #include "xfs.h"
- #include "xfs_shared.h"
- #include "xfs_format.h"
- #include "xfs_log_format.h"
- #include "xfs_trans_resv.h"
- #include "xfs_mount.h"
- #include "xfs_defer.h"
- #include "xfs_inode.h"
- #include "xfs_trans.h"
- #include "xfs_quota.h"
- #include "xfs_bmap_util.h"
- #include "xfs_reflink.h"
- #include "xfs_trace.h"
- #include "xfs_exchrange.h"
- #include "xfs_exchmaps.h"
- #include "xfs_sb.h"
- #include "xfs_icache.h"
- #include "xfs_log.h"
- #include "xfs_rtbitmap.h"
- #include <linux/fsnotify.h>
- /* Lock (and optionally join) two inodes for a file range exchange. */
- void
- xfs_exchrange_ilock(
- struct xfs_trans *tp,
- struct xfs_inode *ip1,
- struct xfs_inode *ip2)
- {
- if (ip1 != ip2)
- xfs_lock_two_inodes(ip1, XFS_ILOCK_EXCL,
- ip2, XFS_ILOCK_EXCL);
- else
- xfs_ilock(ip1, XFS_ILOCK_EXCL);
- if (tp) {
- xfs_trans_ijoin(tp, ip1, 0);
- if (ip2 != ip1)
- xfs_trans_ijoin(tp, ip2, 0);
- }
- }
- /* Unlock two inodes after a file range exchange operation. */
- void
- xfs_exchrange_iunlock(
- struct xfs_inode *ip1,
- struct xfs_inode *ip2)
- {
- if (ip2 != ip1)
- xfs_iunlock(ip2, XFS_ILOCK_EXCL);
- xfs_iunlock(ip1, XFS_ILOCK_EXCL);
- }
- /*
- * Estimate the resource requirements to exchange file contents between the two
- * files. The caller is required to hold the IOLOCK and the MMAPLOCK and to
- * have flushed both inodes' pagecache and active direct-ios.
- */
- int
- xfs_exchrange_estimate(
- struct xfs_exchmaps_req *req)
- {
- int error;
- xfs_exchrange_ilock(NULL, req->ip1, req->ip2);
- error = xfs_exchmaps_estimate(req);
- xfs_exchrange_iunlock(req->ip1, req->ip2);
- return error;
- }
- /*
- * Check that file2's metadata agree with the snapshot that we took for the
- * range commit request.
- *
- * This should be called after the filesystem has locked /all/ inode metadata
- * against modification.
- */
- STATIC int
- xfs_exchrange_check_freshness(
- const struct xfs_exchrange *fxr,
- struct xfs_inode *ip2)
- {
- struct inode *inode2 = VFS_I(ip2);
- struct timespec64 ctime = inode_get_ctime(inode2);
- struct timespec64 mtime = inode_get_mtime(inode2);
- trace_xfs_exchrange_freshness(fxr, ip2);
- /* Check that file2 hasn't otherwise been modified. */
- if (fxr->file2_ino != ip2->i_ino ||
- fxr->file2_gen != inode2->i_generation ||
- !timespec64_equal(&fxr->file2_ctime, &ctime) ||
- !timespec64_equal(&fxr->file2_mtime, &mtime))
- return -EBUSY;
- return 0;
- }
- #define QRETRY_IP1 (0x1)
- #define QRETRY_IP2 (0x2)
- /*
- * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip
- * this if quota enforcement is disabled or if both inodes' dquots are the
- * same. The qretry structure must be initialized to zeroes before the first
- * call to this function.
- */
- STATIC int
- xfs_exchrange_reserve_quota(
- struct xfs_trans *tp,
- const struct xfs_exchmaps_req *req,
- unsigned int *qretry)
- {
- int64_t ddelta, rdelta;
- int ip1_error = 0;
- int error;
- /*
- * Don't bother with a quota reservation if we're not enforcing them
- * or the two inodes have the same dquots.
- */
- if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
- (req->ip1->i_udquot == req->ip2->i_udquot &&
- req->ip1->i_gdquot == req->ip2->i_gdquot &&
- req->ip1->i_pdquot == req->ip2->i_pdquot))
- return 0;
- *qretry = 0;
- /*
- * For each file, compute the net gain in the number of regular blocks
- * that will be mapped into that file and reserve that much quota. The
- * quota counts must be able to absorb at least that much space.
- */
- ddelta = req->ip2_bcount - req->ip1_bcount;
- rdelta = req->ip2_rtbcount - req->ip1_rtbcount;
- if (ddelta > 0 || rdelta > 0) {
- error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
- ddelta > 0 ? ddelta : 0,
- rdelta > 0 ? rdelta : 0,
- false);
- if (error == -EDQUOT || error == -ENOSPC) {
- /*
- * Save this error and see what happens if we try to
- * reserve quota for ip2. Then report both.
- */
- *qretry |= QRETRY_IP1;
- ip1_error = error;
- error = 0;
- }
- if (error)
- return error;
- }
- if (ddelta < 0 || rdelta < 0) {
- error = xfs_trans_reserve_quota_nblks(tp, req->ip2,
- ddelta < 0 ? -ddelta : 0,
- rdelta < 0 ? -rdelta : 0,
- false);
- if (error == -EDQUOT || error == -ENOSPC)
- *qretry |= QRETRY_IP2;
- if (error)
- return error;
- }
- if (ip1_error)
- return ip1_error;
- /*
- * For each file, forcibly reserve the gross gain in mapped blocks so
- * that we don't trip over any quota block reservation assertions.
- * We must reserve the gross gain because the quota code subtracts from
- * bcount the number of blocks that we unmap; it does not add that
- * quantity back to the quota block reservation.
- */
- error = xfs_trans_reserve_quota_nblks(tp, req->ip1, req->ip1_bcount,
- req->ip1_rtbcount, true);
- if (error)
- return error;
- return xfs_trans_reserve_quota_nblks(tp, req->ip2, req->ip2_bcount,
- req->ip2_rtbcount, true);
- }
- /* Exchange the mappings (and hence the contents) of two files' forks. */
- STATIC int
- xfs_exchrange_mappings(
- const struct xfs_exchrange *fxr,
- struct xfs_inode *ip1,
- struct xfs_inode *ip2)
- {
- struct xfs_mount *mp = ip1->i_mount;
- struct xfs_exchmaps_req req = {
- .ip1 = ip1,
- .ip2 = ip2,
- .startoff1 = XFS_B_TO_FSBT(mp, fxr->file1_offset),
- .startoff2 = XFS_B_TO_FSBT(mp, fxr->file2_offset),
- .blockcount = XFS_B_TO_FSB(mp, fxr->length),
- };
- struct xfs_trans *tp;
- unsigned int qretry;
- bool retried = false;
- int error;
- trace_xfs_exchrange_mappings(fxr, ip1, ip2);
- if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)
- req.flags |= XFS_EXCHMAPS_SET_SIZES;
- if (fxr->flags & XFS_EXCHANGE_RANGE_FILE1_WRITTEN)
- req.flags |= XFS_EXCHMAPS_INO1_WRITTEN;
- /*
- * Round the request length up to the nearest file allocation unit.
- * The prep function already checked that the request offsets and
- * length in @fxr are safe to round up.
- */
- if (xfs_inode_has_bigrtalloc(ip2))
- req.blockcount = xfs_rtb_roundup_rtx(mp, req.blockcount);
- error = xfs_exchrange_estimate(&req);
- if (error)
- return error;
- retry:
- /* Allocate the transaction, lock the inodes, and join them. */
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, req.resblks, 0,
- XFS_TRANS_RES_FDBLKS, &tp);
- if (error)
- return error;
- xfs_exchrange_ilock(tp, ip1, ip2);
- trace_xfs_exchrange_before(ip2, 2);
- trace_xfs_exchrange_before(ip1, 1);
- error = xfs_exchmaps_check_forks(mp, &req);
- if (error)
- goto out_trans_cancel;
- /*
- * Reserve ourselves some quota if any of them are in enforcing mode.
- * In theory we only need enough to satisfy the change in the number
- * of blocks between the two ranges being remapped.
- */
- error = xfs_exchrange_reserve_quota(tp, &req, &qretry);
- if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
- xfs_trans_cancel(tp);
- xfs_exchrange_iunlock(ip1, ip2);
- if (qretry & QRETRY_IP1)
- xfs_blockgc_free_quota(ip1, 0);
- if (qretry & QRETRY_IP2)
- xfs_blockgc_free_quota(ip2, 0);
- retried = true;
- goto retry;
- }
- if (error)
- goto out_trans_cancel;
- /* If we got this far on a dry run, all parameters are ok. */
- if (fxr->flags & XFS_EXCHANGE_RANGE_DRY_RUN)
- goto out_trans_cancel;
- /* Update the mtime and ctime of both files. */
- if (fxr->flags & __XFS_EXCHANGE_RANGE_UPD_CMTIME1)
- xfs_trans_ichgtime(tp, ip1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- if (fxr->flags & __XFS_EXCHANGE_RANGE_UPD_CMTIME2)
- xfs_trans_ichgtime(tp, ip2, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- xfs_exchange_mappings(tp, &req);
- /*
- * Force the log to persist metadata updates if the caller or the
- * administrator requires this. The generic prep function already
- * flushed the relevant parts of the page cache.
- */
- if (xfs_has_wsync(mp) || (fxr->flags & XFS_EXCHANGE_RANGE_DSYNC))
- xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp);
- trace_xfs_exchrange_after(ip2, 2);
- trace_xfs_exchrange_after(ip1, 1);
- if (error)
- goto out_unlock;
- /*
- * If the caller wanted us to exchange the contents of two complete
- * files of unequal length, exchange the incore sizes now. This should
- * be safe because we flushed both files' page caches, exchanged all
- * the mappings, and updated the ondisk sizes.
- */
- if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) {
- loff_t temp;
- temp = i_size_read(VFS_I(ip2));
- i_size_write(VFS_I(ip2), i_size_read(VFS_I(ip1)));
- i_size_write(VFS_I(ip1), temp);
- }
- out_unlock:
- xfs_exchrange_iunlock(ip1, ip2);
- return error;
- out_trans_cancel:
- xfs_trans_cancel(tp);
- goto out_unlock;
- }
- /*
- * Generic code for exchanging ranges of two files via XFS_IOC_EXCHANGE_RANGE.
- * This part deals with struct file objects and byte ranges and does not deal
- * with XFS-specific data structures such as xfs_inodes and block ranges. This
- * separation may some day facilitate porting to another filesystem.
- *
- * The goal is to exchange fxr.length bytes starting at fxr.file1_offset in
- * file1 with the same number of bytes starting at fxr.file2_offset in file2.
- * Implementations must call xfs_exchange_range_prep to prepare the two
- * files prior to taking locks; and they must update the inode change and mod
- * times of both files as part of the metadata update. The timestamp update
- * and freshness checks must be done atomically as part of the data exchange
- * operation to ensure correctness of the freshness check.
- * xfs_exchange_range_finish must be called after the operation completes
- * successfully but before locks are dropped.
- */
- /*
- * Performs necessary checks before doing a range exchange, having stabilized
- * mutable inode attributes via i_rwsem.
- */
- static inline int
- xfs_exchange_range_checks(
- struct xfs_exchrange *fxr,
- unsigned int alloc_unit)
- {
- struct inode *inode1 = file_inode(fxr->file1);
- loff_t size1 = i_size_read(inode1);
- struct inode *inode2 = file_inode(fxr->file2);
- loff_t size2 = i_size_read(inode2);
- uint64_t allocmask = alloc_unit - 1;
- int64_t test_len;
- uint64_t blen;
- loff_t tmp;
- int error;
- /* Don't touch certain kinds of inodes */
- if (IS_IMMUTABLE(inode1) || IS_IMMUTABLE(inode2))
- return -EPERM;
- if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2))
- return -ETXTBSY;
- /* Ranges cannot start after EOF. */
- if (fxr->file1_offset > size1 || fxr->file2_offset > size2)
- return -EINVAL;
- if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) {
- /*
- * If the caller said to exchange to EOF, we set the length of
- * the request large enough to cover everything to the end of
- * both files.
- */
- fxr->length = max_t(int64_t, size1 - fxr->file1_offset,
- size2 - fxr->file2_offset);
- } else {
- /*
- * Otherwise we require both ranges to end within EOF.
- */
- if (fxr->file1_offset + fxr->length > size1 ||
- fxr->file2_offset + fxr->length > size2)
- return -EINVAL;
- }
- /*
- * The start of both ranges must be aligned to the file allocation
- * unit.
- */
- if (!IS_ALIGNED(fxr->file1_offset, alloc_unit) ||
- !IS_ALIGNED(fxr->file2_offset, alloc_unit))
- return -EINVAL;
- /* Ensure offsets don't wrap. */
- if (check_add_overflow(fxr->file1_offset, fxr->length, &tmp) ||
- check_add_overflow(fxr->file2_offset, fxr->length, &tmp))
- return -EINVAL;
- /*
- * Make sure we don't hit any file size limits. If we hit any size
- * limits such that test_length was adjusted, we abort the whole
- * operation.
- */
- test_len = fxr->length;
- error = generic_write_check_limits(fxr->file2, fxr->file2_offset,
- &test_len);
- if (error)
- return error;
- error = generic_write_check_limits(fxr->file1, fxr->file1_offset,
- &test_len);
- if (error)
- return error;
- if (test_len != fxr->length)
- return -EINVAL;
- /*
- * If the user wanted us to exchange up to the infile's EOF, round up
- * to the next allocation unit boundary for this check. Do the same
- * for the outfile.
- *
- * Otherwise, reject the range length if it's not aligned to an
- * allocation unit.
- */
- if (fxr->file1_offset + fxr->length == size1)
- blen = ALIGN(size1, alloc_unit) - fxr->file1_offset;
- else if (fxr->file2_offset + fxr->length == size2)
- blen = ALIGN(size2, alloc_unit) - fxr->file2_offset;
- else if (!IS_ALIGNED(fxr->length, alloc_unit))
- return -EINVAL;
- else
- blen = fxr->length;
- /* Don't allow overlapped exchanges within the same file. */
- if (inode1 == inode2 &&
- fxr->file2_offset + blen > fxr->file1_offset &&
- fxr->file1_offset + blen > fxr->file2_offset)
- return -EINVAL;
- /*
- * Ensure that we don't exchange a partial EOF block into the middle of
- * another file.
- */
- if ((fxr->length & allocmask) == 0)
- return 0;
- blen = fxr->length;
- if (fxr->file2_offset + blen < size2)
- blen &= ~allocmask;
- if (fxr->file1_offset + blen < size1)
- blen &= ~allocmask;
- return blen == fxr->length ? 0 : -EINVAL;
- }
- /*
- * Check that the two inodes are eligible for range exchanges, the ranges make
- * sense, and then flush all dirty data. Caller must ensure that the inodes
- * have been locked against any other modifications.
- */
- static inline int
- xfs_exchange_range_prep(
- struct xfs_exchrange *fxr,
- unsigned int alloc_unit)
- {
- struct inode *inode1 = file_inode(fxr->file1);
- struct inode *inode2 = file_inode(fxr->file2);
- bool same_inode = (inode1 == inode2);
- int error;
- /* Check that we don't violate system file offset limits. */
- error = xfs_exchange_range_checks(fxr, alloc_unit);
- if (error || fxr->length == 0)
- return error;
- /* Wait for the completion of any pending IOs on both files */
- inode_dio_wait(inode1);
- if (!same_inode)
- inode_dio_wait(inode2);
- error = filemap_write_and_wait_range(inode1->i_mapping,
- fxr->file1_offset,
- fxr->file1_offset + fxr->length - 1);
- if (error)
- return error;
- error = filemap_write_and_wait_range(inode2->i_mapping,
- fxr->file2_offset,
- fxr->file2_offset + fxr->length - 1);
- if (error)
- return error;
- /*
- * If the files or inodes involved require synchronous writes, amend
- * the request to force the filesystem to flush all data and metadata
- * to disk after the operation completes.
- */
- if (((fxr->file1->f_flags | fxr->file2->f_flags) & O_SYNC) ||
- IS_SYNC(inode1) || IS_SYNC(inode2))
- fxr->flags |= XFS_EXCHANGE_RANGE_DSYNC;
- return 0;
- }
- /*
- * Finish a range exchange operation, if it was successful. Caller must ensure
- * that the inodes are still locked against any other modifications.
- */
- static inline int
- xfs_exchange_range_finish(
- struct xfs_exchrange *fxr)
- {
- int error;
- error = file_remove_privs(fxr->file1);
- if (error)
- return error;
- if (file_inode(fxr->file1) == file_inode(fxr->file2))
- return 0;
- return file_remove_privs(fxr->file2);
- }
- /*
- * Check the alignment of an exchange request when the allocation unit size
- * isn't a power of two. The generic file-level helpers use (fast)
- * bitmask-based alignment checks, but here we have to use slow long division.
- */
- static int
- xfs_exchrange_check_rtalign(
- const struct xfs_exchrange *fxr,
- struct xfs_inode *ip1,
- struct xfs_inode *ip2,
- unsigned int alloc_unit)
- {
- uint64_t length = fxr->length;
- uint64_t blen;
- loff_t size1, size2;
- size1 = i_size_read(VFS_I(ip1));
- size2 = i_size_read(VFS_I(ip2));
- /* The start of both ranges must be aligned to a rt extent. */
- if (!isaligned_64(fxr->file1_offset, alloc_unit) ||
- !isaligned_64(fxr->file2_offset, alloc_unit))
- return -EINVAL;
- if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)
- length = max_t(int64_t, size1 - fxr->file1_offset,
- size2 - fxr->file2_offset);
- /*
- * If the user wanted us to exchange up to the infile's EOF, round up
- * to the next rt extent boundary for this check. Do the same for the
- * outfile.
- *
- * Otherwise, reject the range length if it's not rt extent aligned.
- * We already confirmed the starting offsets' rt extent block
- * alignment.
- */
- if (fxr->file1_offset + length == size1)
- blen = roundup_64(size1, alloc_unit) - fxr->file1_offset;
- else if (fxr->file2_offset + length == size2)
- blen = roundup_64(size2, alloc_unit) - fxr->file2_offset;
- else if (!isaligned_64(length, alloc_unit))
- return -EINVAL;
- else
- blen = length;
- /* Don't allow overlapped exchanges within the same file. */
- if (ip1 == ip2 &&
- fxr->file2_offset + blen > fxr->file1_offset &&
- fxr->file1_offset + blen > fxr->file2_offset)
- return -EINVAL;
- /*
- * Ensure that we don't exchange a partial EOF rt extent into the
- * middle of another file.
- */
- if (isaligned_64(length, alloc_unit))
- return 0;
- blen = length;
- if (fxr->file2_offset + length < size2)
- blen = rounddown_64(blen, alloc_unit);
- if (fxr->file1_offset + blen < size1)
- blen = rounddown_64(blen, alloc_unit);
- return blen == length ? 0 : -EINVAL;
- }
- /* Prepare two files to have their data exchanged. */
- STATIC int
- xfs_exchrange_prep(
- struct xfs_exchrange *fxr,
- struct xfs_inode *ip1,
- struct xfs_inode *ip2)
- {
- struct xfs_mount *mp = ip2->i_mount;
- unsigned int alloc_unit = xfs_inode_alloc_unitsize(ip2);
- int error;
- trace_xfs_exchrange_prep(fxr, ip1, ip2);
- /* Verify both files are either real-time or non-realtime */
- if (XFS_IS_REALTIME_INODE(ip1) != XFS_IS_REALTIME_INODE(ip2))
- return -EINVAL;
- /* Check non-power of two alignment issues, if necessary. */
- if (!is_power_of_2(alloc_unit)) {
- error = xfs_exchrange_check_rtalign(fxr, ip1, ip2, alloc_unit);
- if (error)
- return error;
- /*
- * Do the generic file-level checks with the regular block
- * alignment.
- */
- alloc_unit = mp->m_sb.sb_blocksize;
- }
- error = xfs_exchange_range_prep(fxr, alloc_unit);
- if (error || fxr->length == 0)
- return error;
- if (fxr->flags & __XFS_EXCHANGE_RANGE_CHECK_FRESH2) {
- error = xfs_exchrange_check_freshness(fxr, ip2);
- if (error)
- return error;
- }
- /* Attach dquots to both inodes before changing block maps. */
- error = xfs_qm_dqattach(ip2);
- if (error)
- return error;
- error = xfs_qm_dqattach(ip1);
- if (error)
- return error;
- trace_xfs_exchrange_flush(fxr, ip1, ip2);
- /* Flush the relevant ranges of both files. */
- error = xfs_flush_unmap_range(ip2, fxr->file2_offset, fxr->length);
- if (error)
- return error;
- error = xfs_flush_unmap_range(ip1, fxr->file1_offset, fxr->length);
- if (error)
- return error;
- /*
- * Cancel CoW fork preallocations for the ranges of both files. The
- * prep function should have flushed all the dirty data, so the only
- * CoW mappings remaining should be speculative.
- */
- if (xfs_inode_has_cow_data(ip1)) {
- error = xfs_reflink_cancel_cow_range(ip1, fxr->file1_offset,
- fxr->length, true);
- if (error)
- return error;
- }
- if (xfs_inode_has_cow_data(ip2)) {
- error = xfs_reflink_cancel_cow_range(ip2, fxr->file2_offset,
- fxr->length, true);
- if (error)
- return error;
- }
- return 0;
- }
- /*
- * Exchange contents of files. This is the binding between the generic
- * file-level concepts and the XFS inode-specific implementation.
- */
- STATIC int
- xfs_exchrange_contents(
- struct xfs_exchrange *fxr)
- {
- struct inode *inode1 = file_inode(fxr->file1);
- struct inode *inode2 = file_inode(fxr->file2);
- struct xfs_inode *ip1 = XFS_I(inode1);
- struct xfs_inode *ip2 = XFS_I(inode2);
- struct xfs_mount *mp = ip1->i_mount;
- int error;
- if (!xfs_has_exchange_range(mp))
- return -EOPNOTSUPP;
- if (fxr->flags & ~(XFS_EXCHANGE_RANGE_ALL_FLAGS |
- XFS_EXCHANGE_RANGE_PRIV_FLAGS))
- return -EINVAL;
- if (xfs_is_shutdown(mp))
- return -EIO;
- /* Lock both files against IO */
- error = xfs_ilock2_io_mmap(ip1, ip2);
- if (error)
- goto out_err;
- /* Prepare and then exchange file contents. */
- error = xfs_exchrange_prep(fxr, ip1, ip2);
- if (error)
- goto out_unlock;
- error = xfs_exchrange_mappings(fxr, ip1, ip2);
- if (error)
- goto out_unlock;
- /*
- * Finish the exchange by removing special file privileges like any
- * other file write would do. This may involve turning on support for
- * logged xattrs if either file has security capabilities.
- */
- error = xfs_exchange_range_finish(fxr);
- if (error)
- goto out_unlock;
- out_unlock:
- xfs_iunlock2_io_mmap(ip1, ip2);
- out_err:
- if (error)
- trace_xfs_exchrange_error(ip2, error, _RET_IP_);
- return error;
- }
- /* Exchange parts of two files. */
- static int
- xfs_exchange_range(
- struct xfs_exchrange *fxr)
- {
- struct inode *inode1 = file_inode(fxr->file1);
- struct inode *inode2 = file_inode(fxr->file2);
- loff_t check_len = fxr->length;
- int ret;
- BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS &
- XFS_EXCHANGE_RANGE_PRIV_FLAGS);
- /* Both files must be on the same mount/filesystem. */
- if (fxr->file1->f_path.mnt != fxr->file2->f_path.mnt)
- return -EXDEV;
- if (fxr->flags & ~(XFS_EXCHANGE_RANGE_ALL_FLAGS |
- __XFS_EXCHANGE_RANGE_CHECK_FRESH2))
- return -EINVAL;
- /* Userspace requests only honored for regular files. */
- if (S_ISDIR(inode1->i_mode) || S_ISDIR(inode2->i_mode))
- return -EISDIR;
- if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
- return -EINVAL;
- /* Both files must be opened for read and write. */
- if (!(fxr->file1->f_mode & FMODE_READ) ||
- !(fxr->file1->f_mode & FMODE_WRITE) ||
- !(fxr->file2->f_mode & FMODE_READ) ||
- !(fxr->file2->f_mode & FMODE_WRITE))
- return -EBADF;
- /* Neither file can be opened append-only. */
- if ((fxr->file1->f_flags & O_APPEND) ||
- (fxr->file2->f_flags & O_APPEND))
- return -EBADF;
- /*
- * If we're exchanging to EOF we can't calculate the length until taking
- * the iolock. Pass a 0 length to remap_verify_area similar to the
- * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well.
- */
- if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)
- check_len = 0;
- ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true);
- if (ret)
- return ret;
- ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true);
- if (ret)
- return ret;
- /* Update cmtime if the fd/inode don't forbid it. */
- if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1))
- fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME1;
- if (!(fxr->file2->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode2))
- fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME2;
- file_start_write(fxr->file2);
- ret = xfs_exchrange_contents(fxr);
- file_end_write(fxr->file2);
- if (ret)
- return ret;
- fsnotify_modify(fxr->file1);
- if (fxr->file2 != fxr->file1)
- fsnotify_modify(fxr->file2);
- return 0;
- }
- /* Collect exchange-range arguments from userspace. */
- long
- xfs_ioc_exchange_range(
- struct file *file,
- struct xfs_exchange_range __user *argp)
- {
- struct xfs_exchrange fxr = {
- .file2 = file,
- };
- struct xfs_exchange_range args;
- struct fd file1;
- int error;
- if (copy_from_user(&args, argp, sizeof(args)))
- return -EFAULT;
- if (memchr_inv(&args.pad, 0, sizeof(args.pad)))
- return -EINVAL;
- if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS)
- return -EINVAL;
- fxr.file1_offset = args.file1_offset;
- fxr.file2_offset = args.file2_offset;
- fxr.length = args.length;
- fxr.flags = args.flags;
- file1 = fdget(args.file1_fd);
- if (!fd_file(file1))
- return -EBADF;
- fxr.file1 = fd_file(file1);
- error = xfs_exchange_range(&fxr);
- fdput(file1);
- return error;
- }
- /* Opaque freshness blob for XFS_IOC_COMMIT_RANGE */
- struct xfs_commit_range_fresh {
- xfs_fsid_t fsid; /* m_fixedfsid */
- __u64 file2_ino; /* inode number */
- __s64 file2_mtime; /* modification time */
- __s64 file2_ctime; /* change time */
- __s32 file2_mtime_nsec; /* mod time, nsec */
- __s32 file2_ctime_nsec; /* change time, nsec */
- __u32 file2_gen; /* inode generation */
- __u32 magic; /* zero */
- };
- #define XCR_FRESH_MAGIC 0x444F524B /* DORK */
- /* Set up a commitrange operation by sampling file2's write-related attrs */
- long
- xfs_ioc_start_commit(
- struct file *file,
- struct xfs_commit_range __user *argp)
- {
- struct xfs_commit_range args = { };
- struct timespec64 ts;
- struct xfs_commit_range_fresh *kern_f;
- struct xfs_commit_range_fresh __user *user_f;
- struct inode *inode2 = file_inode(file);
- struct xfs_inode *ip2 = XFS_I(inode2);
- const unsigned int lockflags = XFS_IOLOCK_SHARED |
- XFS_MMAPLOCK_SHARED |
- XFS_ILOCK_SHARED;
- BUILD_BUG_ON(sizeof(struct xfs_commit_range_fresh) !=
- sizeof(args.file2_freshness));
- kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness;
- memcpy(&kern_f->fsid, ip2->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
- xfs_ilock(ip2, lockflags);
- ts = inode_get_ctime(inode2);
- kern_f->file2_ctime = ts.tv_sec;
- kern_f->file2_ctime_nsec = ts.tv_nsec;
- ts = inode_get_mtime(inode2);
- kern_f->file2_mtime = ts.tv_sec;
- kern_f->file2_mtime_nsec = ts.tv_nsec;
- kern_f->file2_ino = ip2->i_ino;
- kern_f->file2_gen = inode2->i_generation;
- kern_f->magic = XCR_FRESH_MAGIC;
- xfs_iunlock(ip2, lockflags);
- user_f = (struct xfs_commit_range_fresh __user *)&argp->file2_freshness;
- if (copy_to_user(user_f, kern_f, sizeof(*kern_f)))
- return -EFAULT;
- return 0;
- }
- /*
- * Exchange file1 and file2 contents if file2 has not been written since the
- * start commit operation.
- */
- long
- xfs_ioc_commit_range(
- struct file *file,
- struct xfs_commit_range __user *argp)
- {
- struct xfs_exchrange fxr = {
- .file2 = file,
- };
- struct xfs_commit_range args;
- struct xfs_commit_range_fresh *kern_f;
- struct xfs_inode *ip2 = XFS_I(file_inode(file));
- struct xfs_mount *mp = ip2->i_mount;
- struct fd file1;
- int error;
- kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness;
- if (copy_from_user(&args, argp, sizeof(args)))
- return -EFAULT;
- if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS)
- return -EINVAL;
- if (kern_f->magic != XCR_FRESH_MAGIC)
- return -EBUSY;
- if (memcmp(&kern_f->fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t)))
- return -EBUSY;
- fxr.file1_offset = args.file1_offset;
- fxr.file2_offset = args.file2_offset;
- fxr.length = args.length;
- fxr.flags = args.flags | __XFS_EXCHANGE_RANGE_CHECK_FRESH2;
- fxr.file2_ino = kern_f->file2_ino;
- fxr.file2_gen = kern_f->file2_gen;
- fxr.file2_mtime.tv_sec = kern_f->file2_mtime;
- fxr.file2_mtime.tv_nsec = kern_f->file2_mtime_nsec;
- fxr.file2_ctime.tv_sec = kern_f->file2_ctime;
- fxr.file2_ctime.tv_nsec = kern_f->file2_ctime_nsec;
- file1 = fdget(args.file1_fd);
- if (fd_empty(file1))
- return -EBADF;
- fxr.file1 = fd_file(file1);
- error = xfs_exchange_range(&fxr);
- fdput(file1);
- return error;
- }
|