| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614 |
- // SPDX-License-Identifier: GPL-2.0-or-later
- /*
- * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
- * Author: Darrick J. Wong <djwong@kernel.org>
- */
- #include "xfs.h"
- #include "xfs_fs.h"
- #include "xfs_format.h"
- #include "xfs_log_format.h"
- #include "xfs_trans_resv.h"
- #include "xfs_bit.h"
- #include "xfs_shared.h"
- #include "xfs_mount.h"
- #include "xfs_defer.h"
- #include "xfs_inode.h"
- #include "xfs_trans.h"
- #include "xfs_trans_priv.h"
- #include "xfs_exchmaps_item.h"
- #include "xfs_exchmaps.h"
- #include "xfs_log.h"
- #include "xfs_bmap.h"
- #include "xfs_icache.h"
- #include "xfs_bmap_btree.h"
- #include "xfs_trans_space.h"
- #include "xfs_error.h"
- #include "xfs_log_priv.h"
- #include "xfs_log_recover.h"
- #include "xfs_exchrange.h"
- #include "xfs_trace.h"
- struct kmem_cache *xfs_xmi_cache;
- struct kmem_cache *xfs_xmd_cache;
- static const struct xfs_item_ops xfs_xmi_item_ops;
- static inline struct xfs_xmi_log_item *XMI_ITEM(struct xfs_log_item *lip)
- {
- return container_of(lip, struct xfs_xmi_log_item, xmi_item);
- }
- STATIC void
- xfs_xmi_item_free(
- struct xfs_xmi_log_item *xmi_lip)
- {
- kvfree(xmi_lip->xmi_item.li_lv_shadow);
- kmem_cache_free(xfs_xmi_cache, xmi_lip);
- }
- /*
- * Freeing the XMI requires that we remove it from the AIL if it has already
- * been placed there. However, the XMI may not yet have been placed in the AIL
- * when called by xfs_xmi_release() from XMD processing due to the ordering of
- * committed vs unpin operations in bulk insert operations. Hence the reference
- * count to ensure only the last caller frees the XMI.
- */
- STATIC void
- xfs_xmi_release(
- struct xfs_xmi_log_item *xmi_lip)
- {
- ASSERT(atomic_read(&xmi_lip->xmi_refcount) > 0);
- if (atomic_dec_and_test(&xmi_lip->xmi_refcount)) {
- xfs_trans_ail_delete(&xmi_lip->xmi_item, 0);
- xfs_xmi_item_free(xmi_lip);
- }
- }
- STATIC void
- xfs_xmi_item_size(
- struct xfs_log_item *lip,
- int *nvecs,
- int *nbytes)
- {
- *nvecs += 1;
- *nbytes += sizeof(struct xfs_xmi_log_format);
- }
- /*
- * This is called to fill in the vector of log iovecs for the given xmi log
- * item. We use only 1 iovec, and we point that at the xmi_log_format structure
- * embedded in the xmi item.
- */
- STATIC void
- xfs_xmi_item_format(
- struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
- {
- struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
- xmi_lip->xmi_format.xmi_type = XFS_LI_XMI;
- xmi_lip->xmi_format.xmi_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_XMI_FORMAT,
- &xmi_lip->xmi_format,
- sizeof(struct xfs_xmi_log_format));
- }
- /*
- * The unpin operation is the last place an XMI is manipulated in the log. It
- * is either inserted in the AIL or aborted in the event of a log I/O error. In
- * either case, the XMI transaction has been successfully committed to make it
- * this far. Therefore, we expect whoever committed the XMI to either construct
- * and commit the XMD or drop the XMD's reference in the event of error. Simply
- * drop the log's XMI reference now that the log is done with it.
- */
- STATIC void
- xfs_xmi_item_unpin(
- struct xfs_log_item *lip,
- int remove)
- {
- struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip);
- xfs_xmi_release(xmi_lip);
- }
- /*
- * The XMI has been either committed or aborted if the transaction has been
- * cancelled. If the transaction was cancelled, an XMD isn't going to be
- * constructed and thus we free the XMI here directly.
- */
- STATIC void
- xfs_xmi_item_release(
- struct xfs_log_item *lip)
- {
- xfs_xmi_release(XMI_ITEM(lip));
- }
- /* Allocate and initialize an xmi item. */
- STATIC struct xfs_xmi_log_item *
- xfs_xmi_init(
- struct xfs_mount *mp)
- {
- struct xfs_xmi_log_item *xmi_lip;
- xmi_lip = kmem_cache_zalloc(xfs_xmi_cache, GFP_KERNEL | __GFP_NOFAIL);
- xfs_log_item_init(mp, &xmi_lip->xmi_item, XFS_LI_XMI, &xfs_xmi_item_ops);
- xmi_lip->xmi_format.xmi_id = (uintptr_t)(void *)xmi_lip;
- atomic_set(&xmi_lip->xmi_refcount, 2);
- return xmi_lip;
- }
- static inline struct xfs_xmd_log_item *XMD_ITEM(struct xfs_log_item *lip)
- {
- return container_of(lip, struct xfs_xmd_log_item, xmd_item);
- }
- STATIC void
- xfs_xmd_item_size(
- struct xfs_log_item *lip,
- int *nvecs,
- int *nbytes)
- {
- *nvecs += 1;
- *nbytes += sizeof(struct xfs_xmd_log_format);
- }
- /*
- * This is called to fill in the vector of log iovecs for the given xmd log
- * item. We use only 1 iovec, and we point that at the xmd_log_format structure
- * embedded in the xmd item.
- */
- STATIC void
- xfs_xmd_item_format(
- struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
- {
- struct xfs_xmd_log_item *xmd_lip = XMD_ITEM(lip);
- struct xfs_log_iovec *vecp = NULL;
- xmd_lip->xmd_format.xmd_type = XFS_LI_XMD;
- xmd_lip->xmd_format.xmd_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_XMD_FORMAT, &xmd_lip->xmd_format,
- sizeof(struct xfs_xmd_log_format));
- }
- /*
- * The XMD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the XMI and free the
- * XMD.
- */
- STATIC void
- xfs_xmd_item_release(
- struct xfs_log_item *lip)
- {
- struct xfs_xmd_log_item *xmd_lip = XMD_ITEM(lip);
- xfs_xmi_release(xmd_lip->xmd_intent_log_item);
- kvfree(xmd_lip->xmd_item.li_lv_shadow);
- kmem_cache_free(xfs_xmd_cache, xmd_lip);
- }
- static struct xfs_log_item *
- xfs_xmd_item_intent(
- struct xfs_log_item *lip)
- {
- return &XMD_ITEM(lip)->xmd_intent_log_item->xmi_item;
- }
- static const struct xfs_item_ops xfs_xmd_item_ops = {
- .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED |
- XFS_ITEM_INTENT_DONE,
- .iop_size = xfs_xmd_item_size,
- .iop_format = xfs_xmd_item_format,
- .iop_release = xfs_xmd_item_release,
- .iop_intent = xfs_xmd_item_intent,
- };
- /* Log file mapping exchange information in the intent item. */
- STATIC struct xfs_log_item *
- xfs_exchmaps_create_intent(
- struct xfs_trans *tp,
- struct list_head *items,
- unsigned int count,
- bool sort)
- {
- struct xfs_xmi_log_item *xmi_lip;
- struct xfs_exchmaps_intent *xmi;
- struct xfs_xmi_log_format *xlf;
- ASSERT(count == 1);
- xmi = list_first_entry_or_null(items, struct xfs_exchmaps_intent,
- xmi_list);
- xmi_lip = xfs_xmi_init(tp->t_mountp);
- xlf = &xmi_lip->xmi_format;
- xlf->xmi_inode1 = xmi->xmi_ip1->i_ino;
- xlf->xmi_igen1 = VFS_I(xmi->xmi_ip1)->i_generation;
- xlf->xmi_inode2 = xmi->xmi_ip2->i_ino;
- xlf->xmi_igen2 = VFS_I(xmi->xmi_ip2)->i_generation;
- xlf->xmi_startoff1 = xmi->xmi_startoff1;
- xlf->xmi_startoff2 = xmi->xmi_startoff2;
- xlf->xmi_blockcount = xmi->xmi_blockcount;
- xlf->xmi_isize1 = xmi->xmi_isize1;
- xlf->xmi_isize2 = xmi->xmi_isize2;
- xlf->xmi_flags = xmi->xmi_flags & XFS_EXCHMAPS_LOGGED_FLAGS;
- return &xmi_lip->xmi_item;
- }
- STATIC struct xfs_log_item *
- xfs_exchmaps_create_done(
- struct xfs_trans *tp,
- struct xfs_log_item *intent,
- unsigned int count)
- {
- struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(intent);
- struct xfs_xmd_log_item *xmd_lip;
- xmd_lip = kmem_cache_zalloc(xfs_xmd_cache, GFP_KERNEL | __GFP_NOFAIL);
- xfs_log_item_init(tp->t_mountp, &xmd_lip->xmd_item, XFS_LI_XMD,
- &xfs_xmd_item_ops);
- xmd_lip->xmd_intent_log_item = xmi_lip;
- xmd_lip->xmd_format.xmd_xmi_id = xmi_lip->xmi_format.xmi_id;
- return &xmd_lip->xmd_item;
- }
- /* Add this deferred XMI to the transaction. */
- void
- xfs_exchmaps_defer_add(
- struct xfs_trans *tp,
- struct xfs_exchmaps_intent *xmi)
- {
- trace_xfs_exchmaps_defer(tp->t_mountp, xmi);
- xfs_defer_add(tp, &xmi->xmi_list, &xfs_exchmaps_defer_type);
- }
- static inline struct xfs_exchmaps_intent *xmi_entry(const struct list_head *e)
- {
- return list_entry(e, struct xfs_exchmaps_intent, xmi_list);
- }
- /* Cancel a deferred file mapping exchange. */
- STATIC void
- xfs_exchmaps_cancel_item(
- struct list_head *item)
- {
- struct xfs_exchmaps_intent *xmi = xmi_entry(item);
- kmem_cache_free(xfs_exchmaps_intent_cache, xmi);
- }
- /* Process a deferred file mapping exchange. */
- STATIC int
- xfs_exchmaps_finish_item(
- struct xfs_trans *tp,
- struct xfs_log_item *done,
- struct list_head *item,
- struct xfs_btree_cur **state)
- {
- struct xfs_exchmaps_intent *xmi = xmi_entry(item);
- int error;
- /*
- * Exchange one more mappings between two files. If there's still more
- * work to do, we want to requeue ourselves after all other pending
- * deferred operations have finished. This includes all of the dfops
- * that we queued directly as well as any new ones created in the
- * process of finishing the others. Doing so prevents us from queuing
- * a large number of XMI log items in kernel memory, which in turn
- * prevents us from pinning the tail of the log (while logging those
- * new XMI items) until the first XMI items can be processed.
- */
- error = xfs_exchmaps_finish_one(tp, xmi);
- if (error != -EAGAIN)
- xfs_exchmaps_cancel_item(item);
- return error;
- }
- /* Abort all pending XMIs. */
- STATIC void
- xfs_exchmaps_abort_intent(
- struct xfs_log_item *intent)
- {
- xfs_xmi_release(XMI_ITEM(intent));
- }
- /* Is this recovered XMI ok? */
- static inline bool
- xfs_xmi_validate(
- struct xfs_mount *mp,
- struct xfs_xmi_log_item *xmi_lip)
- {
- struct xfs_xmi_log_format *xlf = &xmi_lip->xmi_format;
- if (!xfs_has_exchange_range(mp))
- return false;
- if (xmi_lip->xmi_format.__pad != 0)
- return false;
- if (xlf->xmi_flags & ~XFS_EXCHMAPS_LOGGED_FLAGS)
- return false;
- if (!xfs_verify_ino(mp, xlf->xmi_inode1) ||
- !xfs_verify_ino(mp, xlf->xmi_inode2))
- return false;
- if (!xfs_verify_fileext(mp, xlf->xmi_startoff1, xlf->xmi_blockcount))
- return false;
- return xfs_verify_fileext(mp, xlf->xmi_startoff2, xlf->xmi_blockcount);
- }
- /*
- * Use the recovered log state to create a new request, estimate resource
- * requirements, and create a new incore intent state.
- */
- STATIC struct xfs_exchmaps_intent *
- xfs_xmi_item_recover_intent(
- struct xfs_mount *mp,
- struct xfs_defer_pending *dfp,
- const struct xfs_xmi_log_format *xlf,
- struct xfs_exchmaps_req *req,
- struct xfs_inode **ipp1,
- struct xfs_inode **ipp2)
- {
- struct xfs_inode *ip1, *ip2;
- struct xfs_exchmaps_intent *xmi;
- int error;
- /*
- * Grab both inodes and set IRECOVERY to prevent trimming of post-eof
- * mappings and freeing of unlinked inodes until we're totally done
- * processing files. The ondisk format of this new log item contains
- * file handle information, which is why recovery for other items do
- * not check the inode generation number.
- */
- error = xlog_recover_iget_handle(mp, xlf->xmi_inode1, xlf->xmi_igen1,
- &ip1);
- if (error) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, xlf,
- sizeof(*xlf));
- return ERR_PTR(error);
- }
- error = xlog_recover_iget_handle(mp, xlf->xmi_inode2, xlf->xmi_igen2,
- &ip2);
- if (error) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, xlf,
- sizeof(*xlf));
- goto err_rele1;
- }
- req->ip1 = ip1;
- req->ip2 = ip2;
- req->startoff1 = xlf->xmi_startoff1;
- req->startoff2 = xlf->xmi_startoff2;
- req->blockcount = xlf->xmi_blockcount;
- req->flags = xlf->xmi_flags & XFS_EXCHMAPS_PARAMS;
- xfs_exchrange_ilock(NULL, ip1, ip2);
- error = xfs_exchmaps_estimate(req);
- xfs_exchrange_iunlock(ip1, ip2);
- if (error)
- goto err_rele2;
- *ipp1 = ip1;
- *ipp2 = ip2;
- xmi = xfs_exchmaps_init_intent(req);
- xfs_defer_add_item(dfp, &xmi->xmi_list);
- return xmi;
- err_rele2:
- xfs_irele(ip2);
- err_rele1:
- xfs_irele(ip1);
- req->ip2 = req->ip1 = NULL;
- return ERR_PTR(error);
- }
- /* Process a file mapping exchange item that was recovered from the log. */
- STATIC int
- xfs_exchmaps_recover_work(
- struct xfs_defer_pending *dfp,
- struct list_head *capture_list)
- {
- struct xfs_exchmaps_req req = { .flags = 0 };
- struct xfs_trans_res resv;
- struct xfs_exchmaps_intent *xmi;
- struct xfs_log_item *lip = dfp->dfp_intent;
- struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip);
- struct xfs_mount *mp = lip->li_log->l_mp;
- struct xfs_trans *tp;
- struct xfs_inode *ip1, *ip2;
- int error = 0;
- if (!xfs_xmi_validate(mp, xmi_lip)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- &xmi_lip->xmi_format,
- sizeof(xmi_lip->xmi_format));
- return -EFSCORRUPTED;
- }
- xmi = xfs_xmi_item_recover_intent(mp, dfp, &xmi_lip->xmi_format, &req,
- &ip1, &ip2);
- if (IS_ERR(xmi))
- return PTR_ERR(xmi);
- trace_xfs_exchmaps_recover(mp, xmi);
- resv = xlog_recover_resv(&M_RES(mp)->tr_write);
- error = xfs_trans_alloc(mp, &resv, req.resblks, 0, 0, &tp);
- if (error)
- goto err_rele;
- xfs_exchrange_ilock(tp, ip1, ip2);
- xfs_exchmaps_ensure_reflink(tp, xmi);
- xfs_exchmaps_upgrade_extent_counts(tp, xmi);
- error = xlog_recover_finish_intent(tp, dfp);
- if (error == -EFSCORRUPTED)
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- &xmi_lip->xmi_format,
- sizeof(xmi_lip->xmi_format));
- if (error)
- goto err_cancel;
- /*
- * Commit transaction, which frees the transaction and saves the inodes
- * for later replay activities.
- */
- error = xfs_defer_ops_capture_and_commit(tp, capture_list);
- goto err_unlock;
- err_cancel:
- xfs_trans_cancel(tp);
- err_unlock:
- xfs_exchrange_iunlock(ip1, ip2);
- err_rele:
- xfs_irele(ip2);
- xfs_irele(ip1);
- return error;
- }
- /* Relog an intent item to push the log tail forward. */
- static struct xfs_log_item *
- xfs_exchmaps_relog_intent(
- struct xfs_trans *tp,
- struct xfs_log_item *intent,
- struct xfs_log_item *done_item)
- {
- struct xfs_xmi_log_item *xmi_lip;
- struct xfs_xmi_log_format *old_xlf, *new_xlf;
- old_xlf = &XMI_ITEM(intent)->xmi_format;
- xmi_lip = xfs_xmi_init(tp->t_mountp);
- new_xlf = &xmi_lip->xmi_format;
- new_xlf->xmi_inode1 = old_xlf->xmi_inode1;
- new_xlf->xmi_inode2 = old_xlf->xmi_inode2;
- new_xlf->xmi_igen1 = old_xlf->xmi_igen1;
- new_xlf->xmi_igen2 = old_xlf->xmi_igen2;
- new_xlf->xmi_startoff1 = old_xlf->xmi_startoff1;
- new_xlf->xmi_startoff2 = old_xlf->xmi_startoff2;
- new_xlf->xmi_blockcount = old_xlf->xmi_blockcount;
- new_xlf->xmi_flags = old_xlf->xmi_flags;
- new_xlf->xmi_isize1 = old_xlf->xmi_isize1;
- new_xlf->xmi_isize2 = old_xlf->xmi_isize2;
- return &xmi_lip->xmi_item;
- }
- const struct xfs_defer_op_type xfs_exchmaps_defer_type = {
- .name = "exchmaps",
- .max_items = 1,
- .create_intent = xfs_exchmaps_create_intent,
- .abort_intent = xfs_exchmaps_abort_intent,
- .create_done = xfs_exchmaps_create_done,
- .finish_item = xfs_exchmaps_finish_item,
- .cancel_item = xfs_exchmaps_cancel_item,
- .recover_work = xfs_exchmaps_recover_work,
- .relog_intent = xfs_exchmaps_relog_intent,
- };
- STATIC bool
- xfs_xmi_item_match(
- struct xfs_log_item *lip,
- uint64_t intent_id)
- {
- return XMI_ITEM(lip)->xmi_format.xmi_id == intent_id;
- }
- static const struct xfs_item_ops xfs_xmi_item_ops = {
- .flags = XFS_ITEM_INTENT,
- .iop_size = xfs_xmi_item_size,
- .iop_format = xfs_xmi_item_format,
- .iop_unpin = xfs_xmi_item_unpin,
- .iop_release = xfs_xmi_item_release,
- .iop_match = xfs_xmi_item_match,
- };
- /*
- * This routine is called to create an in-core file mapping exchange item from
- * the xmi format structure which was logged on disk. It allocates an in-core
- * xmi, copies the exchange information from the format structure into it, and
- * adds the xmi to the AIL with the given LSN.
- */
- STATIC int
- xlog_recover_xmi_commit_pass2(
- struct xlog *log,
- struct list_head *buffer_list,
- struct xlog_recover_item *item,
- xfs_lsn_t lsn)
- {
- struct xfs_mount *mp = log->l_mp;
- struct xfs_xmi_log_item *xmi_lip;
- struct xfs_xmi_log_format *xmi_formatp;
- size_t len;
- len = sizeof(struct xfs_xmi_log_format);
- if (item->ri_buf[0].i_len != len) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
- return -EFSCORRUPTED;
- }
- xmi_formatp = item->ri_buf[0].i_addr;
- if (xmi_formatp->__pad != 0) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
- return -EFSCORRUPTED;
- }
- xmi_lip = xfs_xmi_init(mp);
- memcpy(&xmi_lip->xmi_format, xmi_formatp, len);
- xlog_recover_intent_item(log, &xmi_lip->xmi_item, lsn,
- &xfs_exchmaps_defer_type);
- return 0;
- }
- const struct xlog_recover_item_ops xlog_xmi_item_ops = {
- .item_type = XFS_LI_XMI,
- .commit_pass2 = xlog_recover_xmi_commit_pass2,
- };
- /*
- * This routine is called when an XMD format structure is found in a committed
- * transaction in the log. Its purpose is to cancel the corresponding XMI if it
- * was still in the log. To do this it searches the AIL for the XMI with an id
- * equal to that in the XMD format structure. If we find it we drop the XMD
- * reference, which removes the XMI from the AIL and frees it.
- */
- STATIC int
- xlog_recover_xmd_commit_pass2(
- struct xlog *log,
- struct list_head *buffer_list,
- struct xlog_recover_item *item,
- xfs_lsn_t lsn)
- {
- struct xfs_xmd_log_format *xmd_formatp;
- xmd_formatp = item->ri_buf[0].i_addr;
- if (item->ri_buf[0].i_len != sizeof(struct xfs_xmd_log_format)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
- return -EFSCORRUPTED;
- }
- xlog_recover_release_intent(log, XFS_LI_XMI, xmd_formatp->xmd_xmi_id);
- return 0;
- }
- const struct xlog_recover_item_ops xlog_xmd_item_ops = {
- .item_type = XFS_LI_XMD,
- .commit_pass2 = xlog_recover_xmd_commit_pass2,
- };
|