| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374 |
- // SPDX-License-Identifier: GPL-2.0
- /*
- * Copyright (c) 2022 Fujitsu. All Rights Reserved.
- */
- #include "xfs.h"
- #include "xfs_shared.h"
- #include "xfs_format.h"
- #include "xfs_log_format.h"
- #include "xfs_trans_resv.h"
- #include "xfs_mount.h"
- #include "xfs_alloc.h"
- #include "xfs_bit.h"
- #include "xfs_btree.h"
- #include "xfs_inode.h"
- #include "xfs_icache.h"
- #include "xfs_rmap.h"
- #include "xfs_rmap_btree.h"
- #include "xfs_rtalloc.h"
- #include "xfs_trans.h"
- #include "xfs_ag.h"
- #include <linux/mm.h>
- #include <linux/dax.h>
- #include <linux/fs.h>
- struct xfs_failure_info {
- xfs_agblock_t startblock;
- xfs_extlen_t blockcount;
- int mf_flags;
- bool want_shutdown;
- };
- static pgoff_t
- xfs_failure_pgoff(
- struct xfs_mount *mp,
- const struct xfs_rmap_irec *rec,
- const struct xfs_failure_info *notify)
- {
- loff_t pos = XFS_FSB_TO_B(mp, rec->rm_offset);
- if (notify->startblock > rec->rm_startblock)
- pos += XFS_FSB_TO_B(mp,
- notify->startblock - rec->rm_startblock);
- return pos >> PAGE_SHIFT;
- }
- static unsigned long
- xfs_failure_pgcnt(
- struct xfs_mount *mp,
- const struct xfs_rmap_irec *rec,
- const struct xfs_failure_info *notify)
- {
- xfs_agblock_t end_rec;
- xfs_agblock_t end_notify;
- xfs_agblock_t start_cross;
- xfs_agblock_t end_cross;
- start_cross = max(rec->rm_startblock, notify->startblock);
- end_rec = rec->rm_startblock + rec->rm_blockcount;
- end_notify = notify->startblock + notify->blockcount;
- end_cross = min(end_rec, end_notify);
- return XFS_FSB_TO_B(mp, end_cross - start_cross) >> PAGE_SHIFT;
- }
- static int
- xfs_dax_failure_fn(
- struct xfs_btree_cur *cur,
- const struct xfs_rmap_irec *rec,
- void *data)
- {
- struct xfs_mount *mp = cur->bc_mp;
- struct xfs_inode *ip;
- struct xfs_failure_info *notify = data;
- struct address_space *mapping;
- pgoff_t pgoff;
- unsigned long pgcnt;
- int error = 0;
- if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
- (rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))) {
- /* Continue the query because this isn't a failure. */
- if (notify->mf_flags & MF_MEM_PRE_REMOVE)
- return 0;
- notify->want_shutdown = true;
- return 0;
- }
- /* Get files that incore, filter out others that are not in use. */
- error = xfs_iget(mp, cur->bc_tp, rec->rm_owner, XFS_IGET_INCORE,
- 0, &ip);
- /* Continue the rmap query if the inode isn't incore */
- if (error == -ENODATA)
- return 0;
- if (error) {
- notify->want_shutdown = true;
- return 0;
- }
- mapping = VFS_I(ip)->i_mapping;
- pgoff = xfs_failure_pgoff(mp, rec, notify);
- pgcnt = xfs_failure_pgcnt(mp, rec, notify);
- /* Continue the rmap query if the inode isn't a dax file. */
- if (dax_mapping(mapping))
- error = mf_dax_kill_procs(mapping, pgoff, pgcnt,
- notify->mf_flags);
- /* Invalidate the cache in dax pages. */
- if (notify->mf_flags & MF_MEM_PRE_REMOVE)
- invalidate_inode_pages2_range(mapping, pgoff,
- pgoff + pgcnt - 1);
- xfs_irele(ip);
- return error;
- }
- static int
- xfs_dax_notify_failure_freeze(
- struct xfs_mount *mp)
- {
- struct super_block *sb = mp->m_super;
- int error;
- error = freeze_super(sb, FREEZE_HOLDER_KERNEL);
- if (error)
- xfs_emerg(mp, "already frozen by kernel, err=%d", error);
- return error;
- }
- static void
- xfs_dax_notify_failure_thaw(
- struct xfs_mount *mp,
- bool kernel_frozen)
- {
- struct super_block *sb = mp->m_super;
- int error;
- if (kernel_frozen) {
- error = thaw_super(sb, FREEZE_HOLDER_KERNEL);
- if (error)
- xfs_emerg(mp, "still frozen after notify failure, err=%d",
- error);
- }
- /*
- * Also thaw userspace call anyway because the device is about to be
- * removed immediately.
- */
- thaw_super(sb, FREEZE_HOLDER_USERSPACE);
- }
- static int
- xfs_dax_translate_range(
- struct xfs_buftarg *btp,
- u64 offset,
- u64 len,
- xfs_daddr_t *daddr,
- uint64_t *bblen)
- {
- u64 dev_start = btp->bt_dax_part_off;
- u64 dev_len = bdev_nr_bytes(btp->bt_bdev);
- u64 dev_end = dev_start + dev_len - 1;
- /* Notify failure on the whole device. */
- if (offset == 0 && len == U64_MAX) {
- offset = dev_start;
- len = dev_len;
- }
- /* Ignore the range out of filesystem area */
- if (offset + len - 1 < dev_start)
- return -ENXIO;
- if (offset > dev_end)
- return -ENXIO;
- /* Calculate the real range when it touches the boundary */
- if (offset > dev_start)
- offset -= dev_start;
- else {
- len -= dev_start - offset;
- offset = 0;
- }
- if (offset + len - 1 > dev_end)
- len = dev_end - offset + 1;
- *daddr = BTOBB(offset);
- *bblen = BTOBB(len);
- return 0;
- }
- static int
- xfs_dax_notify_logdev_failure(
- struct xfs_mount *mp,
- u64 offset,
- u64 len,
- int mf_flags)
- {
- xfs_daddr_t daddr;
- uint64_t bblen;
- int error;
- /*
- * Return ENXIO instead of shutting down the filesystem if the failed
- * region is beyond the end of the log.
- */
- error = xfs_dax_translate_range(mp->m_logdev_targp,
- offset, len, &daddr, &bblen);
- if (error)
- return error;
- /*
- * In the pre-remove case the failure notification is attempting to
- * trigger a force unmount. The expectation is that the device is
- * still present, but its removal is in progress and can not be
- * cancelled, proceed with accessing the log device.
- */
- if (mf_flags & MF_MEM_PRE_REMOVE)
- return 0;
- xfs_err(mp, "ondisk log corrupt, shutting down fs!");
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
- return -EFSCORRUPTED;
- }
- static int
- xfs_dax_notify_ddev_failure(
- struct xfs_mount *mp,
- xfs_daddr_t daddr,
- xfs_daddr_t bblen,
- int mf_flags)
- {
- struct xfs_failure_info notify = { .mf_flags = mf_flags };
- struct xfs_trans *tp = NULL;
- struct xfs_btree_cur *cur = NULL;
- struct xfs_buf *agf_bp = NULL;
- int error = 0;
- bool kernel_frozen = false;
- xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, daddr);
- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno);
- xfs_fsblock_t end_fsbno = XFS_DADDR_TO_FSB(mp,
- daddr + bblen - 1);
- xfs_agnumber_t end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno);
- if (mf_flags & MF_MEM_PRE_REMOVE) {
- xfs_info(mp, "Device is about to be removed!");
- /*
- * Freeze fs to prevent new mappings from being created.
- * - Keep going on if others already hold the kernel forzen.
- * - Keep going on if other errors too because this device is
- * starting to fail.
- * - If kernel frozen state is hold successfully here, thaw it
- * here as well at the end.
- */
- kernel_frozen = xfs_dax_notify_failure_freeze(mp) == 0;
- }
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- goto out;
- for (; agno <= end_agno; agno++) {
- struct xfs_rmap_irec ri_low = { };
- struct xfs_rmap_irec ri_high;
- struct xfs_agf *agf;
- struct xfs_perag *pag;
- xfs_agblock_t range_agend;
- pag = xfs_perag_get(mp, agno);
- error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
- if (error) {
- xfs_perag_put(pag);
- break;
- }
- cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag);
- /*
- * Set the rmap range from ri_low to ri_high, which represents
- * a [start, end] where we looking for the files or metadata.
- */
- memset(&ri_high, 0xFF, sizeof(ri_high));
- ri_low.rm_startblock = XFS_FSB_TO_AGBNO(mp, fsbno);
- if (agno == end_agno)
- ri_high.rm_startblock = XFS_FSB_TO_AGBNO(mp, end_fsbno);
- agf = agf_bp->b_addr;
- range_agend = min(be32_to_cpu(agf->agf_length) - 1,
- ri_high.rm_startblock);
- notify.startblock = ri_low.rm_startblock;
- notify.blockcount = range_agend + 1 - ri_low.rm_startblock;
- error = xfs_rmap_query_range(cur, &ri_low, &ri_high,
- xfs_dax_failure_fn, ¬ify);
- xfs_btree_del_cursor(cur, error);
- xfs_trans_brelse(tp, agf_bp);
- xfs_perag_put(pag);
- if (error)
- break;
- fsbno = XFS_AGB_TO_FSB(mp, agno + 1, 0);
- }
- xfs_trans_cancel(tp);
- /*
- * Shutdown fs from a force umount in pre-remove case which won't fail,
- * so errors can be ignored. Otherwise, shutdown the filesystem with
- * CORRUPT flag if error occured or notify.want_shutdown was set during
- * RMAP querying.
- */
- if (mf_flags & MF_MEM_PRE_REMOVE)
- xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
- else if (error || notify.want_shutdown) {
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
- if (!error)
- error = -EFSCORRUPTED;
- }
- out:
- /* Thaw the fs if it has been frozen before. */
- if (mf_flags & MF_MEM_PRE_REMOVE)
- xfs_dax_notify_failure_thaw(mp, kernel_frozen);
- return error;
- }
- static int
- xfs_dax_notify_failure(
- struct dax_device *dax_dev,
- u64 offset,
- u64 len,
- int mf_flags)
- {
- struct xfs_mount *mp = dax_holder(dax_dev);
- xfs_daddr_t daddr;
- uint64_t bblen;
- int error;
- if (!(mp->m_super->s_flags & SB_BORN)) {
- xfs_warn(mp, "filesystem is not ready for notify_failure()!");
- return -EIO;
- }
- if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_daxdev == dax_dev) {
- xfs_debug(mp,
- "notify_failure() not supported on realtime device!");
- return -EOPNOTSUPP;
- }
- if (mp->m_logdev_targp && mp->m_logdev_targp->bt_daxdev == dax_dev &&
- mp->m_logdev_targp != mp->m_ddev_targp) {
- return xfs_dax_notify_logdev_failure(mp, offset, len, mf_flags);
- }
- if (!xfs_has_rmapbt(mp)) {
- xfs_debug(mp, "notify_failure() needs rmapbt enabled!");
- return -EOPNOTSUPP;
- }
- error = xfs_dax_translate_range(mp->m_ddev_targp, offset, len, &daddr,
- &bblen);
- if (error)
- return error;
- return xfs_dax_notify_ddev_failure(mp, daddr, bblen, mf_flags);
- }
- const struct dax_holder_operations xfs_dax_holder_operations = {
- .notify_failure = xfs_dax_notify_failure,
- };
|