mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-01 02:36:02 +00:00
New code for 6.8:
* New features/functionality * Online repair * Reserve disk space for online repairs. * Fix misinteraction between the AIL and btree bulkloader because of which the bulk load fails to queue a buffer for writeback if it happens to be on the AIL list. * Prevent transaction reservation overflows when reaping blocks during online repair. * Whenever possible, bulkloader now copies multiple records into a block. * Support repairing of 1. Per-AG free space, inode and refcount btrees. 2. Ondisk inodes. 3. File data and attribute fork mappings. * Verify the contents of 1. Inode and data fork of realtime bitmap file. 2. Quota files. * Introduce MF_MEM_PRE_REMOVE. This will be used to notify tasks about a pmem device being removed. * Bug fixes * Fix memory leak of recovered attri intent items. * Fix UAF during log intent recovery. * Fix realtime geometry integer overflows. * Prevent scrub from live locking in xchk_iget. * Prevent fs shutdown when removing files during low free disk space. * Prevent transaction reservation overflow when extending an RT device. * Prevent incorrect warning from being printed when extending a filesystem. * Fix an off-by-one error in xreap_agextent_binval. * Serialize access to perag radix tree during deletion operation. * Fix perag memory leak during growfs. * Allow allocation of minlen realtime extent when the maximum sized realtime free extent is minlen in size. * Cleanups * Remove duplicate boilerplate code spread across functionality associated with different log items. * Cleanup resblks interfaces. * Pass defer ops pointer to defer helpers instead of an enum. * Initialize di_crc in xfs_log_dinode to prevent KMSAN warnings. * Use static_assert() instead of BUILD_BUG_ON_MSG() to validate size of structures and structure member offsets. This is done in order to be able to share the code with userspace. * Move XFS documentation under a new directory specific to XFS. * Do not invoke deferred ops' ->create_done callback if the deferred operation does not have an intent item associated with it. * Remove duplicate inclusion of header files from scrub/health.c. * Refactor Realtime code. * Cleanup attr code. Signed-off-by: Chandan Babu R <chandanbabu@kernel.org> -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQQjMC4mbgVeU7MxEIYH7y4RirJu9AUCZZJQbwAKCRAH7y4RirJu 9JjkAP9Zg0QZNmAMsZwvgEBbuF/OnHKl4GmPA5uq0jPmSWCOqAEA0HjlOmuNfQWn 93fIw6CPbt+9QCluTYBwUisKLIJ/wgA= =qmO0 -----END PGP SIGNATURE----- Merge tag 'xfs-6.8-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux Pull xfs updates from Chandan Babu: "New features/functionality: - Online repair: - Reserve disk space for online repairs - Fix misinteraction between the AIL and btree bulkloader because of which the bulk load fails to queue a buffer for writeback if it happens to be on the AIL list - Prevent transaction reservation overflows when reaping blocks during online repair - Whenever possible, bulkloader now copies multiple records into a block - Support repairing of 1. Per-AG free space, inode and refcount btrees 2. Ondisk inodes 3. File data and attribute fork mappings - Verify the contents of 1. Inode and data fork of realtime bitmap file 2. Quota files - Introduce MF_MEM_PRE_REMOVE. This will be used to notify tasks about a pmem device being removed Bug fixes: - Fix memory leak of recovered attri intent items - Fix UAF during log intent recovery - Fix realtime geometry integer overflows - Prevent scrub from live locking in xchk_iget - Prevent fs shutdown when removing files during low free disk space - Prevent transaction reservation overflow when extending an RT device - Prevent incorrect warning from being printed when extending a filesystem - Fix an off-by-one error in xreap_agextent_binval - Serialize access to perag radix tree during deletion operation - Fix perag memory leak during growfs - Allow allocation of minlen realtime extent when the maximum sized realtime free extent is minlen in size Cleanups: - Remove duplicate boilerplate code spread across functionality associated with different log items - Cleanup resblks interfaces - Pass defer ops pointer to defer helpers instead of an enum - Initialize di_crc in xfs_log_dinode to prevent KMSAN warnings - Use static_assert() instead of BUILD_BUG_ON_MSG() to validate size of structures and structure member offsets. This is done in order to be able to share the code with userspace - Move XFS documentation under a new directory specific to XFS - Do not invoke deferred ops' ->create_done callback if the deferred operation does not have an intent item associated with it - Remove duplicate inclusion of header files from scrub/health.c - Refactor Realtime code - Cleanup attr code" * tag 'xfs-6.8-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (123 commits) xfs: use the op name in trace_xlog_intent_recovery_failed xfs: fix a use after free in xfs_defer_finish_recovery xfs: turn the XFS_DA_OP_REPLACE checks in xfs_attr_shortform_addname into asserts xfs: remove xfs_attr_sf_hdr_t xfs: remove struct xfs_attr_shortform xfs: use xfs_attr_sf_findname in xfs_attr_shortform_getvalue xfs: remove xfs_attr_shortform_lookup xfs: simplify xfs_attr_sf_findname xfs: move the xfs_attr_sf_lookup tracepoint xfs: return if_data from xfs_idata_realloc xfs: make if_data a void pointer xfs: fold xfs_rtallocate_extent into xfs_bmap_rtalloc xfs: simplify and optimize the RT allocation fallback cascade xfs: reorder the minlen and prod calculations in xfs_bmap_rtalloc xfs: remove XFS_RTMIN/XFS_RTMAX xfs: remove rt-wrappers from xfs_format.h xfs: factor out a xfs_rtalloc_sumlevel helper xfs: tidy up xfs_rtallocate_extent_exact xfs: merge the calls to xfs_rtallocate_range in xfs_rtallocate_block xfs: reflow the tail end of xfs_rtallocate_extent_block ...
This commit is contained in:
commit
12958e9c4c
@ -121,8 +121,5 @@ Documentation for filesystem implementations.
|
||||
udf
|
||||
virtiofs
|
||||
vfat
|
||||
xfs-delayed-logging-design
|
||||
xfs-maintainer-entry-profile
|
||||
xfs-self-describing-metadata
|
||||
xfs-online-fsck-design
|
||||
xfs/index
|
||||
zonefs
|
||||
|
14
Documentation/filesystems/xfs/index.rst
Normal file
14
Documentation/filesystems/xfs/index.rst
Normal file
@ -0,0 +1,14 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
============================
|
||||
XFS Filesystem Documentation
|
||||
============================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:numbered:
|
||||
|
||||
xfs-delayed-logging-design
|
||||
xfs-maintainer-entry-profile
|
||||
xfs-self-describing-metadata
|
||||
xfs-online-fsck-design
|
@ -962,7 +962,7 @@ disk, but these buffer verifiers cannot provide any consistency checking
|
||||
between metadata structures.
|
||||
|
||||
For more information, please see the documentation for
|
||||
Documentation/filesystems/xfs-self-describing-metadata.rst
|
||||
Documentation/filesystems/xfs/xfs-self-describing-metadata.rst
|
||||
|
||||
Reverse Mapping
|
||||
---------------
|
@ -105,4 +105,4 @@ to do something different in the near future.
|
||||
../driver-api/media/maintainer-entry-profile
|
||||
../driver-api/vfio-pci-device-specific-driver-acceptance
|
||||
../nvme/feature-and-quirk-policy
|
||||
../filesystems/xfs-maintainer-entry-profile
|
||||
../filesystems/xfs/xfs-maintainer-entry-profile
|
||||
|
@ -23846,10 +23846,10 @@ S: Supported
|
||||
W: http://xfs.org/
|
||||
C: irc://irc.oftc.net/xfs
|
||||
T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
|
||||
P: Documentation/filesystems/xfs-maintainer-entry-profile.rst
|
||||
P: Documentation/filesystems/xfs/xfs-maintainer-entry-profile.rst
|
||||
F: Documentation/ABI/testing/sysfs-fs-xfs
|
||||
F: Documentation/admin-guide/xfs.rst
|
||||
F: Documentation/filesystems/xfs-*
|
||||
F: Documentation/filesystems/xfs/*
|
||||
F: fs/xfs/
|
||||
F: include/uapi/linux/dqblk_xfs.h
|
||||
F: include/uapi/linux/fsmap.h
|
||||
|
@ -326,7 +326,8 @@ void kill_dax(struct dax_device *dax_dev)
|
||||
return;
|
||||
|
||||
if (dax_dev->holder_data != NULL)
|
||||
dax_holder_notify_failure(dax_dev, 0, U64_MAX, 0);
|
||||
dax_holder_notify_failure(dax_dev, 0, U64_MAX,
|
||||
MF_MEM_PRE_REMOVE);
|
||||
|
||||
clear_bit(DAXDEV_ALIVE, &dax_dev->flags);
|
||||
synchronize_srcu(&dax_srcu);
|
||||
|
@ -145,6 +145,7 @@ ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
|
||||
|
||||
xfs-y += $(addprefix scrub/, \
|
||||
trace.o \
|
||||
agb_bitmap.o \
|
||||
agheader.o \
|
||||
alloc.o \
|
||||
attr.o \
|
||||
@ -175,14 +176,32 @@ xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
|
||||
rtsummary.o \
|
||||
)
|
||||
|
||||
xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o
|
||||
xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \
|
||||
dqiterate.o \
|
||||
quota.o \
|
||||
)
|
||||
|
||||
# online repair
|
||||
ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
|
||||
xfs-y += $(addprefix scrub/, \
|
||||
agheader_repair.o \
|
||||
alloc_repair.o \
|
||||
bmap_repair.o \
|
||||
cow_repair.o \
|
||||
ialloc_repair.o \
|
||||
inode_repair.o \
|
||||
newbt.o \
|
||||
reap.o \
|
||||
refcount_repair.o \
|
||||
repair.o \
|
||||
)
|
||||
|
||||
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
|
||||
rtbitmap_repair.o \
|
||||
)
|
||||
|
||||
xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \
|
||||
quota_repair.o \
|
||||
)
|
||||
endif
|
||||
endif
|
||||
|
@ -332,6 +332,31 @@ xfs_agino_range(
|
||||
return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free perag within the specified AG range, it is only used to free unused
|
||||
* perags under the error handling path.
|
||||
*/
|
||||
void
|
||||
xfs_free_unused_perag_range(
|
||||
struct xfs_mount *mp,
|
||||
xfs_agnumber_t agstart,
|
||||
xfs_agnumber_t agend)
|
||||
{
|
||||
struct xfs_perag *pag;
|
||||
xfs_agnumber_t index;
|
||||
|
||||
for (index = agstart; index < agend; index++) {
|
||||
spin_lock(&mp->m_perag_lock);
|
||||
pag = radix_tree_delete(&mp->m_perag_tree, index);
|
||||
spin_unlock(&mp->m_perag_lock);
|
||||
if (!pag)
|
||||
break;
|
||||
xfs_buf_hash_destroy(pag);
|
||||
xfs_defer_drain_free(&pag->pag_intents_drain);
|
||||
kmem_free(pag);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
xfs_initialize_perag(
|
||||
struct xfs_mount *mp,
|
||||
@ -424,19 +449,14 @@ xfs_initialize_perag(
|
||||
|
||||
out_remove_pag:
|
||||
xfs_defer_drain_free(&pag->pag_intents_drain);
|
||||
spin_lock(&mp->m_perag_lock);
|
||||
radix_tree_delete(&mp->m_perag_tree, index);
|
||||
spin_unlock(&mp->m_perag_lock);
|
||||
out_free_pag:
|
||||
kmem_free(pag);
|
||||
out_unwind_new_pags:
|
||||
/* unwind any prior newly initialized pags */
|
||||
for (index = first_initialised; index < agcount; index++) {
|
||||
pag = radix_tree_delete(&mp->m_perag_tree, index);
|
||||
if (!pag)
|
||||
break;
|
||||
xfs_buf_hash_destroy(pag);
|
||||
xfs_defer_drain_free(&pag->pag_intents_drain);
|
||||
kmem_free(pag);
|
||||
}
|
||||
xfs_free_unused_perag_range(mp, first_initialised, agcount);
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -984,7 +1004,7 @@ xfs_ag_shrink_space(
|
||||
if (err2 != -ENOSPC)
|
||||
goto resv_err;
|
||||
|
||||
err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
|
||||
err2 = xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
|
||||
XFS_AG_RESV_NONE, true);
|
||||
if (err2)
|
||||
goto resv_err;
|
||||
|
@ -80,6 +80,16 @@ struct xfs_perag {
|
||||
*/
|
||||
uint16_t pag_checked;
|
||||
uint16_t pag_sick;
|
||||
|
||||
#ifdef CONFIG_XFS_ONLINE_REPAIR
|
||||
/*
|
||||
* Alternate btree heights so that online repair won't trip the write
|
||||
* verifiers while rebuilding the AG btrees.
|
||||
*/
|
||||
uint8_t pagf_repair_levels[XFS_BTNUM_AGF];
|
||||
uint8_t pagf_repair_refcount_level;
|
||||
#endif
|
||||
|
||||
spinlock_t pag_state_lock;
|
||||
|
||||
spinlock_t pagb_lock; /* lock for pagb_tree */
|
||||
@ -133,6 +143,8 @@ __XFS_AG_OPSTATE(prefers_metadata, PREFERS_METADATA)
|
||||
__XFS_AG_OPSTATE(allows_inodes, ALLOWS_INODES)
|
||||
__XFS_AG_OPSTATE(agfl_needs_reset, AGFL_NEEDS_RESET)
|
||||
|
||||
void xfs_free_unused_perag_range(struct xfs_mount *mp, xfs_agnumber_t agstart,
|
||||
xfs_agnumber_t agend);
|
||||
int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount,
|
||||
xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi);
|
||||
int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno);
|
||||
|
@ -411,6 +411,8 @@ xfs_ag_resv_free_extent(
|
||||
fallthrough;
|
||||
case XFS_AG_RESV_NONE:
|
||||
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len);
|
||||
fallthrough;
|
||||
case XFS_AG_RESV_IGNORE:
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -246,11 +246,9 @@ xfs_alloc_btrec_to_irec(
|
||||
/* Simple checks for free space records. */
|
||||
xfs_failaddr_t
|
||||
xfs_alloc_check_irec(
|
||||
struct xfs_btree_cur *cur,
|
||||
const struct xfs_alloc_rec_incore *irec)
|
||||
struct xfs_perag *pag,
|
||||
const struct xfs_alloc_rec_incore *irec)
|
||||
{
|
||||
struct xfs_perag *pag = cur->bc_ag.pag;
|
||||
|
||||
if (irec->ar_blockcount == 0)
|
||||
return __this_address;
|
||||
|
||||
@ -299,7 +297,7 @@ xfs_alloc_get_rec(
|
||||
return error;
|
||||
|
||||
xfs_alloc_btrec_to_irec(rec, &irec);
|
||||
fa = xfs_alloc_check_irec(cur, &irec);
|
||||
fa = xfs_alloc_check_irec(cur->bc_ag.pag, &irec);
|
||||
if (fa)
|
||||
return xfs_alloc_complain_bad_rec(cur, fa, &irec);
|
||||
|
||||
@ -2514,7 +2512,7 @@ xfs_defer_agfl_block(
|
||||
trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
|
||||
|
||||
xfs_extent_free_get_group(mp, xefi);
|
||||
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
|
||||
xfs_defer_add(tp, &xefi->xefi_list, &xfs_agfl_free_defer_type);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2522,14 +2520,15 @@ xfs_defer_agfl_block(
|
||||
* Add the extent to the list of extents to be free at transaction end.
|
||||
* The list is maintained sorted (by block number).
|
||||
*/
|
||||
int
|
||||
__xfs_free_extent_later(
|
||||
static int
|
||||
xfs_defer_extent_free(
|
||||
struct xfs_trans *tp,
|
||||
xfs_fsblock_t bno,
|
||||
xfs_filblks_t len,
|
||||
const struct xfs_owner_info *oinfo,
|
||||
enum xfs_ag_resv_type type,
|
||||
bool skip_discard)
|
||||
bool skip_discard,
|
||||
struct xfs_defer_pending **dfpp)
|
||||
{
|
||||
struct xfs_extent_free_item *xefi;
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
@ -2577,10 +2576,105 @@ __xfs_free_extent_later(
|
||||
XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
|
||||
|
||||
xfs_extent_free_get_group(mp, xefi);
|
||||
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
|
||||
*dfpp = xfs_defer_add(tp, &xefi->xefi_list, &xfs_extent_free_defer_type);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_free_extent_later(
|
||||
struct xfs_trans *tp,
|
||||
xfs_fsblock_t bno,
|
||||
xfs_filblks_t len,
|
||||
const struct xfs_owner_info *oinfo,
|
||||
enum xfs_ag_resv_type type,
|
||||
bool skip_discard)
|
||||
{
|
||||
struct xfs_defer_pending *dontcare = NULL;
|
||||
|
||||
return xfs_defer_extent_free(tp, bno, len, oinfo, type, skip_discard,
|
||||
&dontcare);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up automatic freeing of unwritten space in the filesystem.
|
||||
*
|
||||
* This function attached a paused deferred extent free item to the
|
||||
* transaction. Pausing means that the EFI will be logged in the next
|
||||
* transaction commit, but the pending EFI will not be finished until the
|
||||
* pending item is unpaused.
|
||||
*
|
||||
* If the system goes down after the EFI has been persisted to the log but
|
||||
* before the pending item is unpaused, log recovery will find the EFI, fail to
|
||||
* find the EFD, and free the space.
|
||||
*
|
||||
* If the pending item is unpaused, the next transaction commit will log an EFD
|
||||
* without freeing the space.
|
||||
*
|
||||
* Caller must ensure that the tp, fsbno, len, oinfo, and resv flags of the
|
||||
* @args structure are set to the relevant values.
|
||||
*/
|
||||
int
|
||||
xfs_alloc_schedule_autoreap(
|
||||
const struct xfs_alloc_arg *args,
|
||||
bool skip_discard,
|
||||
struct xfs_alloc_autoreap *aarp)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = xfs_defer_extent_free(args->tp, args->fsbno, args->len,
|
||||
&args->oinfo, args->resv, skip_discard, &aarp->dfp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
xfs_defer_item_pause(args->tp, aarp->dfp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Cancel automatic freeing of unwritten space in the filesystem.
|
||||
*
|
||||
* Earlier, we created a paused deferred extent free item and attached it to
|
||||
* this transaction so that we could automatically roll back a new space
|
||||
* allocation if the system went down. Now we want to cancel the paused work
|
||||
* item by marking the EFI stale so we don't actually free the space, unpausing
|
||||
* the pending item and logging an EFD.
|
||||
*
|
||||
* The caller generally should have already mapped the space into the ondisk
|
||||
* filesystem. If the reserved space was partially used, the caller must call
|
||||
* xfs_free_extent_later to create a new EFI to free the unused space.
|
||||
*/
|
||||
void
|
||||
xfs_alloc_cancel_autoreap(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_alloc_autoreap *aarp)
|
||||
{
|
||||
struct xfs_defer_pending *dfp = aarp->dfp;
|
||||
struct xfs_extent_free_item *xefi;
|
||||
|
||||
if (!dfp)
|
||||
return;
|
||||
|
||||
list_for_each_entry(xefi, &dfp->dfp_work, xefi_list)
|
||||
xefi->xefi_flags |= XFS_EFI_CANCELLED;
|
||||
|
||||
xfs_defer_item_unpause(tp, dfp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Commit automatic freeing of unwritten space in the filesystem.
|
||||
*
|
||||
* This unpauses an earlier _schedule_autoreap and commits to freeing the
|
||||
* allocated space. Call this if none of the reserved space was used.
|
||||
*/
|
||||
void
|
||||
xfs_alloc_commit_autoreap(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_alloc_autoreap *aarp)
|
||||
{
|
||||
if (aarp->dfp)
|
||||
xfs_defer_item_unpause(tp, aarp->dfp);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
/*
|
||||
* Check if an AGF has a free extent record whose length is equal to
|
||||
@ -3848,7 +3942,7 @@ xfs_alloc_query_range_helper(
|
||||
xfs_failaddr_t fa;
|
||||
|
||||
xfs_alloc_btrec_to_irec(rec, &irec);
|
||||
fa = xfs_alloc_check_irec(cur, &irec);
|
||||
fa = xfs_alloc_check_irec(cur->bc_ag.pag, &irec);
|
||||
if (fa)
|
||||
return xfs_alloc_complain_bad_rec(cur, fa, &irec);
|
||||
|
||||
|
@ -185,7 +185,7 @@ xfs_alloc_get_rec(
|
||||
union xfs_btree_rec;
|
||||
void xfs_alloc_btrec_to_irec(const union xfs_btree_rec *rec,
|
||||
struct xfs_alloc_rec_incore *irec);
|
||||
xfs_failaddr_t xfs_alloc_check_irec(struct xfs_btree_cur *cur,
|
||||
xfs_failaddr_t xfs_alloc_check_irec(struct xfs_perag *pag,
|
||||
const struct xfs_alloc_rec_incore *irec);
|
||||
|
||||
int xfs_read_agf(struct xfs_perag *pag, struct xfs_trans *tp, int flags,
|
||||
@ -231,7 +231,7 @@ xfs_buf_to_agfl_bno(
|
||||
return bp->b_addr;
|
||||
}
|
||||
|
||||
int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
|
||||
int xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
|
||||
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
|
||||
enum xfs_ag_resv_type type, bool skip_discard);
|
||||
|
||||
@ -255,18 +255,18 @@ void xfs_extent_free_get_group(struct xfs_mount *mp,
|
||||
#define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */
|
||||
#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
|
||||
#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
|
||||
#define XFS_EFI_CANCELLED (1U << 3) /* dont actually free the space */
|
||||
|
||||
static inline int
|
||||
xfs_free_extent_later(
|
||||
struct xfs_trans *tp,
|
||||
xfs_fsblock_t bno,
|
||||
xfs_filblks_t len,
|
||||
const struct xfs_owner_info *oinfo,
|
||||
enum xfs_ag_resv_type type)
|
||||
{
|
||||
return __xfs_free_extent_later(tp, bno, len, oinfo, type, false);
|
||||
}
|
||||
struct xfs_alloc_autoreap {
|
||||
struct xfs_defer_pending *dfp;
|
||||
};
|
||||
|
||||
int xfs_alloc_schedule_autoreap(const struct xfs_alloc_arg *args,
|
||||
bool skip_discard, struct xfs_alloc_autoreap *aarp);
|
||||
void xfs_alloc_cancel_autoreap(struct xfs_trans *tp,
|
||||
struct xfs_alloc_autoreap *aarp);
|
||||
void xfs_alloc_commit_autoreap(struct xfs_trans *tp,
|
||||
struct xfs_alloc_autoreap *aarp);
|
||||
|
||||
extern struct kmem_cache *xfs_extfree_item_cache;
|
||||
|
||||
|
@ -323,7 +323,18 @@ xfs_allocbt_verify(
|
||||
if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC))
|
||||
btnum = XFS_BTNUM_CNTi;
|
||||
if (pag && xfs_perag_initialised_agf(pag)) {
|
||||
if (level >= pag->pagf_levels[btnum])
|
||||
unsigned int maxlevel = pag->pagf_levels[btnum];
|
||||
|
||||
#ifdef CONFIG_XFS_ONLINE_REPAIR
|
||||
/*
|
||||
* Online repair could be rewriting the free space btrees, so
|
||||
* we'll validate against the larger of either tree while this
|
||||
* is going on.
|
||||
*/
|
||||
maxlevel = max_t(unsigned int, maxlevel,
|
||||
pag->pagf_repair_levels[btnum]);
|
||||
#endif
|
||||
if (level >= maxlevel)
|
||||
return __this_address;
|
||||
} else if (level >= mp->m_alloc_maxlevels)
|
||||
return __this_address;
|
||||
|
@ -862,8 +862,11 @@ xfs_attr_lookup(
|
||||
if (!xfs_inode_hasattr(dp))
|
||||
return -ENOATTR;
|
||||
|
||||
if (dp->i_af.if_format == XFS_DINODE_FMT_LOCAL)
|
||||
return xfs_attr_sf_findname(args, NULL, NULL);
|
||||
if (dp->i_af.if_format == XFS_DINODE_FMT_LOCAL) {
|
||||
if (xfs_attr_sf_findname(args))
|
||||
return -EEXIST;
|
||||
return -ENOATTR;
|
||||
}
|
||||
|
||||
if (xfs_attr_is_leaf(dp)) {
|
||||
error = xfs_attr_leaf_hasname(args, &bp);
|
||||
@ -880,11 +883,10 @@ xfs_attr_lookup(
|
||||
return error;
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_attr_intent_init(
|
||||
static void
|
||||
xfs_attr_defer_add(
|
||||
struct xfs_da_args *args,
|
||||
unsigned int op_flags, /* op flag (set or remove) */
|
||||
struct xfs_attr_intent **attr) /* new xfs_attr_intent */
|
||||
unsigned int op_flags)
|
||||
{
|
||||
|
||||
struct xfs_attr_intent *new;
|
||||
@ -893,66 +895,22 @@ xfs_attr_intent_init(
|
||||
new->xattri_op_flags = op_flags;
|
||||
new->xattri_da_args = args;
|
||||
|
||||
*attr = new;
|
||||
return 0;
|
||||
}
|
||||
switch (op_flags) {
|
||||
case XFS_ATTRI_OP_FLAGS_SET:
|
||||
new->xattri_dela_state = xfs_attr_init_add_state(args);
|
||||
break;
|
||||
case XFS_ATTRI_OP_FLAGS_REPLACE:
|
||||
new->xattri_dela_state = xfs_attr_init_replace_state(args);
|
||||
break;
|
||||
case XFS_ATTRI_OP_FLAGS_REMOVE:
|
||||
new->xattri_dela_state = xfs_attr_init_remove_state(args);
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/* Sets an attribute for an inode as a deferred operation */
|
||||
static int
|
||||
xfs_attr_defer_add(
|
||||
struct xfs_da_args *args)
|
||||
{
|
||||
struct xfs_attr_intent *new;
|
||||
int error = 0;
|
||||
|
||||
error = xfs_attr_intent_init(args, XFS_ATTRI_OP_FLAGS_SET, &new);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
new->xattri_dela_state = xfs_attr_init_add_state(args);
|
||||
xfs_defer_add(args->trans, XFS_DEFER_OPS_TYPE_ATTR, &new->xattri_list);
|
||||
xfs_defer_add(args->trans, &new->xattri_list, &xfs_attr_defer_type);
|
||||
trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sets an attribute for an inode as a deferred operation */
|
||||
static int
|
||||
xfs_attr_defer_replace(
|
||||
struct xfs_da_args *args)
|
||||
{
|
||||
struct xfs_attr_intent *new;
|
||||
int error = 0;
|
||||
|
||||
error = xfs_attr_intent_init(args, XFS_ATTRI_OP_FLAGS_REPLACE, &new);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
new->xattri_dela_state = xfs_attr_init_replace_state(args);
|
||||
xfs_defer_add(args->trans, XFS_DEFER_OPS_TYPE_ATTR, &new->xattri_list);
|
||||
trace_xfs_attr_defer_replace(new->xattri_dela_state, args->dp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Removes an attribute for an inode as a deferred operation */
|
||||
static int
|
||||
xfs_attr_defer_remove(
|
||||
struct xfs_da_args *args)
|
||||
{
|
||||
|
||||
struct xfs_attr_intent *new;
|
||||
int error;
|
||||
|
||||
error = xfs_attr_intent_init(args, XFS_ATTRI_OP_FLAGS_REMOVE, &new);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
new->xattri_dela_state = xfs_attr_init_remove_state(args);
|
||||
xfs_defer_add(args->trans, XFS_DEFER_OPS_TYPE_ATTR, &new->xattri_list);
|
||||
trace_xfs_attr_defer_remove(new->xattri_dela_state, args->dp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1038,16 +996,16 @@ xfs_attr_set(
|
||||
error = xfs_attr_lookup(args);
|
||||
switch (error) {
|
||||
case -EEXIST:
|
||||
/* if no value, we are performing a remove operation */
|
||||
if (!args->value) {
|
||||
error = xfs_attr_defer_remove(args);
|
||||
/* if no value, we are performing a remove operation */
|
||||
xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REMOVE);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Pure create fails if the attr already exists */
|
||||
if (args->attr_flags & XATTR_CREATE)
|
||||
goto out_trans_cancel;
|
||||
|
||||
error = xfs_attr_defer_replace(args);
|
||||
xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REPLACE);
|
||||
break;
|
||||
case -ENOATTR:
|
||||
/* Can't remove what isn't there. */
|
||||
@ -1057,14 +1015,11 @@ xfs_attr_set(
|
||||
/* Pure replace fails if no existing attr to replace. */
|
||||
if (args->attr_flags & XATTR_REPLACE)
|
||||
goto out_trans_cancel;
|
||||
|
||||
error = xfs_attr_defer_add(args);
|
||||
xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_SET);
|
||||
break;
|
||||
default:
|
||||
goto out_trans_cancel;
|
||||
}
|
||||
if (error)
|
||||
goto out_trans_cancel;
|
||||
|
||||
/*
|
||||
* If this is a synchronous mount, make sure that the
|
||||
@ -1097,10 +1052,9 @@ xfs_attr_set(
|
||||
|
||||
static inline int xfs_attr_sf_totsize(struct xfs_inode *dp)
|
||||
{
|
||||
struct xfs_attr_shortform *sf;
|
||||
struct xfs_attr_sf_hdr *sf = dp->i_af.if_data;
|
||||
|
||||
sf = (struct xfs_attr_shortform *)dp->i_af.if_u1.if_data;
|
||||
return be16_to_cpu(sf->hdr.totsize);
|
||||
return be16_to_cpu(sf->totsize);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1112,19 +1066,13 @@ xfs_attr_shortform_addname(
|
||||
struct xfs_da_args *args)
|
||||
{
|
||||
int newsize, forkoff;
|
||||
int error;
|
||||
|
||||
trace_xfs_attr_sf_addname(args);
|
||||
|
||||
error = xfs_attr_shortform_lookup(args);
|
||||
switch (error) {
|
||||
case -ENOATTR:
|
||||
if (args->op_flags & XFS_DA_OP_REPLACE)
|
||||
return error;
|
||||
break;
|
||||
case -EEXIST:
|
||||
if (!(args->op_flags & XFS_DA_OP_REPLACE))
|
||||
return error;
|
||||
if (xfs_attr_sf_findname(args)) {
|
||||
int error;
|
||||
|
||||
ASSERT(args->op_flags & XFS_DA_OP_REPLACE);
|
||||
|
||||
error = xfs_attr_sf_removename(args);
|
||||
if (error)
|
||||
@ -1137,11 +1085,8 @@ xfs_attr_shortform_addname(
|
||||
* around.
|
||||
*/
|
||||
args->op_flags &= ~XFS_DA_OP_REPLACE;
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
return error;
|
||||
} else {
|
||||
ASSERT(!(args->op_flags & XFS_DA_OP_REPLACE));
|
||||
}
|
||||
|
||||
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
|
||||
|
@ -690,56 +690,32 @@ xfs_attr_shortform_create(
|
||||
ASSERT(ifp->if_bytes == 0);
|
||||
if (ifp->if_format == XFS_DINODE_FMT_EXTENTS)
|
||||
ifp->if_format = XFS_DINODE_FMT_LOCAL;
|
||||
xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK);
|
||||
hdr = (struct xfs_attr_sf_hdr *)ifp->if_u1.if_data;
|
||||
|
||||
hdr = xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK);
|
||||
memset(hdr, 0, sizeof(*hdr));
|
||||
hdr->totsize = cpu_to_be16(sizeof(*hdr));
|
||||
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return -EEXIST if attr is found, or -ENOATTR if not
|
||||
* args: args containing attribute name and namelen
|
||||
* sfep: If not null, pointer will be set to the last attr entry found on
|
||||
-EEXIST. On -ENOATTR pointer is left at the last entry in the list
|
||||
* basep: If not null, pointer is set to the byte offset of the entry in the
|
||||
* list on -EEXIST. On -ENOATTR, pointer is left at the byte offset of
|
||||
* the last entry in the list
|
||||
* Return the entry if the attr in args is found, or NULL if not.
|
||||
*/
|
||||
int
|
||||
struct xfs_attr_sf_entry *
|
||||
xfs_attr_sf_findname(
|
||||
struct xfs_da_args *args,
|
||||
struct xfs_attr_sf_entry **sfep,
|
||||
unsigned int *basep)
|
||||
struct xfs_da_args *args)
|
||||
{
|
||||
struct xfs_attr_shortform *sf;
|
||||
struct xfs_attr_sf_entry *sfe;
|
||||
unsigned int base = sizeof(struct xfs_attr_sf_hdr);
|
||||
int size = 0;
|
||||
int end;
|
||||
int i;
|
||||
struct xfs_attr_sf_hdr *sf = args->dp->i_af.if_data;
|
||||
struct xfs_attr_sf_entry *sfe;
|
||||
|
||||
sf = (struct xfs_attr_shortform *)args->dp->i_af.if_u1.if_data;
|
||||
sfe = &sf->list[0];
|
||||
end = sf->hdr.count;
|
||||
for (i = 0; i < end; sfe = xfs_attr_sf_nextentry(sfe),
|
||||
base += size, i++) {
|
||||
size = xfs_attr_sf_entsize(sfe);
|
||||
if (!xfs_attr_match(args, sfe->namelen, sfe->nameval,
|
||||
sfe->flags))
|
||||
continue;
|
||||
break;
|
||||
for (sfe = xfs_attr_sf_firstentry(sf);
|
||||
sfe < xfs_attr_sf_endptr(sf);
|
||||
sfe = xfs_attr_sf_nextentry(sfe)) {
|
||||
if (xfs_attr_match(args, sfe->namelen, sfe->nameval,
|
||||
sfe->flags))
|
||||
return sfe;
|
||||
}
|
||||
|
||||
if (sfep != NULL)
|
||||
*sfep = sfe;
|
||||
|
||||
if (basep != NULL)
|
||||
*basep = base;
|
||||
|
||||
if (i == end)
|
||||
return -ENOATTR;
|
||||
return -EEXIST;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -751,38 +727,31 @@ xfs_attr_shortform_add(
|
||||
struct xfs_da_args *args,
|
||||
int forkoff)
|
||||
{
|
||||
struct xfs_attr_shortform *sf;
|
||||
struct xfs_inode *dp = args->dp;
|
||||
struct xfs_mount *mp = dp->i_mount;
|
||||
struct xfs_ifork *ifp = &dp->i_af;
|
||||
struct xfs_attr_sf_hdr *sf = ifp->if_data;
|
||||
struct xfs_attr_sf_entry *sfe;
|
||||
int offset, size;
|
||||
struct xfs_mount *mp;
|
||||
struct xfs_inode *dp;
|
||||
struct xfs_ifork *ifp;
|
||||
int size;
|
||||
|
||||
trace_xfs_attr_sf_add(args);
|
||||
|
||||
dp = args->dp;
|
||||
mp = dp->i_mount;
|
||||
dp->i_forkoff = forkoff;
|
||||
|
||||
ifp = &dp->i_af;
|
||||
ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
|
||||
sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
|
||||
if (xfs_attr_sf_findname(args, &sfe, NULL) == -EEXIST)
|
||||
ASSERT(0);
|
||||
ASSERT(!xfs_attr_sf_findname(args));
|
||||
|
||||
offset = (char *)sfe - (char *)sf;
|
||||
size = xfs_attr_sf_entsize_byname(args->namelen, args->valuelen);
|
||||
xfs_idata_realloc(dp, size, XFS_ATTR_FORK);
|
||||
sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
|
||||
sfe = (struct xfs_attr_sf_entry *)((char *)sf + offset);
|
||||
sf = xfs_idata_realloc(dp, size, XFS_ATTR_FORK);
|
||||
|
||||
sfe = xfs_attr_sf_endptr(sf);
|
||||
sfe->namelen = args->namelen;
|
||||
sfe->valuelen = args->valuelen;
|
||||
sfe->flags = args->attr_filter;
|
||||
memcpy(sfe->nameval, args->name, args->namelen);
|
||||
memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen);
|
||||
sf->hdr.count++;
|
||||
be16_add_cpu(&sf->hdr.totsize, size);
|
||||
sf->count++;
|
||||
be16_add_cpu(&sf->totsize, size);
|
||||
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
|
||||
|
||||
xfs_sbversion_add_attr2(mp, args->trans);
|
||||
@ -811,48 +780,43 @@ int
|
||||
xfs_attr_sf_removename(
|
||||
struct xfs_da_args *args)
|
||||
{
|
||||
struct xfs_attr_shortform *sf;
|
||||
struct xfs_inode *dp = args->dp;
|
||||
struct xfs_mount *mp = dp->i_mount;
|
||||
struct xfs_attr_sf_hdr *sf = dp->i_af.if_data;
|
||||
struct xfs_attr_sf_entry *sfe;
|
||||
int size = 0, end, totsize;
|
||||
unsigned int base;
|
||||
struct xfs_mount *mp;
|
||||
struct xfs_inode *dp;
|
||||
int error;
|
||||
uint16_t totsize = be16_to_cpu(sf->totsize);
|
||||
void *next, *end;
|
||||
int size = 0;
|
||||
|
||||
trace_xfs_attr_sf_remove(args);
|
||||
|
||||
dp = args->dp;
|
||||
mp = dp->i_mount;
|
||||
sf = (struct xfs_attr_shortform *)dp->i_af.if_u1.if_data;
|
||||
|
||||
error = xfs_attr_sf_findname(args, &sfe, &base);
|
||||
|
||||
/*
|
||||
* If we are recovering an operation, finding nothing to
|
||||
* remove is not an error - it just means there was nothing
|
||||
* to clean up.
|
||||
*/
|
||||
if (error == -ENOATTR && (args->op_flags & XFS_DA_OP_RECOVERY))
|
||||
return 0;
|
||||
if (error != -EEXIST)
|
||||
return error;
|
||||
size = xfs_attr_sf_entsize(sfe);
|
||||
sfe = xfs_attr_sf_findname(args);
|
||||
if (!sfe) {
|
||||
/*
|
||||
* If we are recovering an operation, finding nothing to remove
|
||||
* is not an error, it just means there was nothing to clean up.
|
||||
*/
|
||||
if (args->op_flags & XFS_DA_OP_RECOVERY)
|
||||
return 0;
|
||||
return -ENOATTR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fix up the attribute fork data, covering the hole
|
||||
*/
|
||||
end = base + size;
|
||||
totsize = be16_to_cpu(sf->hdr.totsize);
|
||||
if (end != totsize)
|
||||
memmove(&((char *)sf)[base], &((char *)sf)[end], totsize - end);
|
||||
sf->hdr.count--;
|
||||
be16_add_cpu(&sf->hdr.totsize, -size);
|
||||
size = xfs_attr_sf_entsize(sfe);
|
||||
next = xfs_attr_sf_nextentry(sfe);
|
||||
end = xfs_attr_sf_endptr(sf);
|
||||
if (next < end)
|
||||
memmove(sfe, next, end - next);
|
||||
sf->count--;
|
||||
totsize -= size;
|
||||
sf->totsize = cpu_to_be16(totsize);
|
||||
|
||||
/*
|
||||
* Fix up the start offset of the attribute fork
|
||||
*/
|
||||
totsize -= size;
|
||||
if (totsize == sizeof(xfs_attr_sf_hdr_t) && xfs_has_attr2(mp) &&
|
||||
if (totsize == sizeof(struct xfs_attr_sf_hdr) && xfs_has_attr2(mp) &&
|
||||
(dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
|
||||
!(args->op_flags & (XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE))) {
|
||||
xfs_attr_fork_remove(dp, args->trans);
|
||||
@ -860,7 +824,7 @@ xfs_attr_sf_removename(
|
||||
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
|
||||
dp->i_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
|
||||
ASSERT(dp->i_forkoff);
|
||||
ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) ||
|
||||
ASSERT(totsize > sizeof(struct xfs_attr_sf_hdr) ||
|
||||
(args->op_flags & XFS_DA_OP_ADDNAME) ||
|
||||
!xfs_has_attr2(mp) ||
|
||||
dp->i_df.if_format == XFS_DINODE_FMT_BTREE);
|
||||
@ -873,33 +837,6 @@ xfs_attr_sf_removename(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look up a name in a shortform attribute list structure.
|
||||
*/
|
||||
/*ARGSUSED*/
|
||||
int
|
||||
xfs_attr_shortform_lookup(xfs_da_args_t *args)
|
||||
{
|
||||
struct xfs_attr_shortform *sf;
|
||||
struct xfs_attr_sf_entry *sfe;
|
||||
int i;
|
||||
struct xfs_ifork *ifp;
|
||||
|
||||
trace_xfs_attr_sf_lookup(args);
|
||||
|
||||
ifp = &args->dp->i_af;
|
||||
ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
|
||||
sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
|
||||
sfe = &sf->list[0];
|
||||
for (i = 0; i < sf->hdr.count;
|
||||
sfe = xfs_attr_sf_nextentry(sfe), i++) {
|
||||
if (xfs_attr_match(args, sfe->namelen, sfe->nameval,
|
||||
sfe->flags))
|
||||
return -EEXIST;
|
||||
}
|
||||
return -ENOATTR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve the attribute value and length.
|
||||
*
|
||||
@ -909,23 +846,19 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
|
||||
*/
|
||||
int
|
||||
xfs_attr_shortform_getvalue(
|
||||
struct xfs_da_args *args)
|
||||
struct xfs_da_args *args)
|
||||
{
|
||||
struct xfs_attr_shortform *sf;
|
||||
struct xfs_attr_sf_entry *sfe;
|
||||
int i;
|
||||
struct xfs_attr_sf_entry *sfe;
|
||||
|
||||
ASSERT(args->dp->i_af.if_format == XFS_DINODE_FMT_LOCAL);
|
||||
sf = (struct xfs_attr_shortform *)args->dp->i_af.if_u1.if_data;
|
||||
sfe = &sf->list[0];
|
||||
for (i = 0; i < sf->hdr.count;
|
||||
sfe = xfs_attr_sf_nextentry(sfe), i++) {
|
||||
if (xfs_attr_match(args, sfe->namelen, sfe->nameval,
|
||||
sfe->flags))
|
||||
return xfs_attr_copy_value(args,
|
||||
&sfe->nameval[args->namelen], sfe->valuelen);
|
||||
}
|
||||
return -ENOATTR;
|
||||
|
||||
trace_xfs_attr_sf_lookup(args);
|
||||
|
||||
sfe = xfs_attr_sf_findname(args);
|
||||
if (!sfe)
|
||||
return -ENOATTR;
|
||||
return xfs_attr_copy_value(args, &sfe->nameval[args->namelen],
|
||||
sfe->valuelen);
|
||||
}
|
||||
|
||||
/* Convert from using the shortform to the leaf format. */
|
||||
@ -933,26 +866,23 @@ int
|
||||
xfs_attr_shortform_to_leaf(
|
||||
struct xfs_da_args *args)
|
||||
{
|
||||
struct xfs_inode *dp;
|
||||
struct xfs_attr_shortform *sf;
|
||||
struct xfs_inode *dp = args->dp;
|
||||
struct xfs_ifork *ifp = &dp->i_af;
|
||||
struct xfs_attr_sf_hdr *sf = ifp->if_data;
|
||||
struct xfs_attr_sf_entry *sfe;
|
||||
int size = be16_to_cpu(sf->totsize);
|
||||
struct xfs_da_args nargs;
|
||||
char *tmpbuffer;
|
||||
int error, i, size;
|
||||
int error, i;
|
||||
xfs_dablk_t blkno;
|
||||
struct xfs_buf *bp;
|
||||
struct xfs_ifork *ifp;
|
||||
|
||||
trace_xfs_attr_sf_to_leaf(args);
|
||||
|
||||
dp = args->dp;
|
||||
ifp = &dp->i_af;
|
||||
sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
|
||||
size = be16_to_cpu(sf->hdr.totsize);
|
||||
tmpbuffer = kmem_alloc(size, 0);
|
||||
ASSERT(tmpbuffer != NULL);
|
||||
memcpy(tmpbuffer, ifp->if_u1.if_data, size);
|
||||
sf = (struct xfs_attr_shortform *)tmpbuffer;
|
||||
memcpy(tmpbuffer, ifp->if_data, size);
|
||||
sf = (struct xfs_attr_sf_hdr *)tmpbuffer;
|
||||
|
||||
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
|
||||
xfs_bmap_local_to_extents_empty(args->trans, dp, XFS_ATTR_FORK);
|
||||
@ -975,8 +905,8 @@ xfs_attr_shortform_to_leaf(
|
||||
nargs.trans = args->trans;
|
||||
nargs.op_flags = XFS_DA_OP_OKNOENT;
|
||||
|
||||
sfe = &sf->list[0];
|
||||
for (i = 0; i < sf->hdr.count; i++) {
|
||||
sfe = xfs_attr_sf_firstentry(sf);
|
||||
for (i = 0; i < sf->count; i++) {
|
||||
nargs.name = sfe->nameval;
|
||||
nargs.namelen = sfe->namelen;
|
||||
nargs.value = &sfe->nameval[nargs.namelen];
|
||||
@ -1040,23 +970,16 @@ xfs_attr_shortform_allfit(
|
||||
return xfs_attr_shortform_bytesfit(dp, bytes);
|
||||
}
|
||||
|
||||
/* Verify the consistency of an inline attribute fork. */
|
||||
/* Verify the consistency of a raw inline attribute fork. */
|
||||
xfs_failaddr_t
|
||||
xfs_attr_shortform_verify(
|
||||
struct xfs_inode *ip)
|
||||
struct xfs_attr_sf_hdr *sfp,
|
||||
size_t size)
|
||||
{
|
||||
struct xfs_attr_shortform *sfp;
|
||||
struct xfs_attr_sf_entry *sfep;
|
||||
struct xfs_attr_sf_entry *sfep = xfs_attr_sf_firstentry(sfp);
|
||||
struct xfs_attr_sf_entry *next_sfep;
|
||||
char *endp;
|
||||
struct xfs_ifork *ifp;
|
||||
int i;
|
||||
int64_t size;
|
||||
|
||||
ASSERT(ip->i_af.if_format == XFS_DINODE_FMT_LOCAL);
|
||||
ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK);
|
||||
sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
|
||||
size = ifp->if_bytes;
|
||||
|
||||
/*
|
||||
* Give up if the attribute is way too short.
|
||||
@ -1067,8 +990,7 @@ xfs_attr_shortform_verify(
|
||||
endp = (char *)sfp + size;
|
||||
|
||||
/* Check all reported entries */
|
||||
sfep = &sfp->list[0];
|
||||
for (i = 0; i < sfp->hdr.count; i++) {
|
||||
for (i = 0; i < sfp->count; i++) {
|
||||
/*
|
||||
* struct xfs_attr_sf_entry has a variable length.
|
||||
* Check the fixed-offset parts of the structure are
|
||||
@ -1244,14 +1166,10 @@ xfs_attr3_leaf_to_node(
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
/* copy leaf to new buffer, update identifiers */
|
||||
xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF);
|
||||
bp2->b_ops = bp1->b_ops;
|
||||
memcpy(bp2->b_addr, bp1->b_addr, args->geo->blksize);
|
||||
if (xfs_has_crc(mp)) {
|
||||
struct xfs_da3_blkinfo *hdr3 = bp2->b_addr;
|
||||
hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp2));
|
||||
}
|
||||
/*
|
||||
* Copy leaf to new buffer and log it.
|
||||
*/
|
||||
xfs_da_buf_copy(bp2, bp1, args->geo->blksize);
|
||||
xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1);
|
||||
|
||||
/*
|
||||
|
@ -47,16 +47,14 @@ struct xfs_attr3_icleaf_hdr {
|
||||
*/
|
||||
void xfs_attr_shortform_create(struct xfs_da_args *args);
|
||||
void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
|
||||
int xfs_attr_shortform_lookup(struct xfs_da_args *args);
|
||||
int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
|
||||
int xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
|
||||
int xfs_attr_sf_removename(struct xfs_da_args *args);
|
||||
int xfs_attr_sf_findname(struct xfs_da_args *args,
|
||||
struct xfs_attr_sf_entry **sfep,
|
||||
unsigned int *basep);
|
||||
struct xfs_attr_sf_entry *xfs_attr_sf_findname(struct xfs_da_args *args);
|
||||
int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
|
||||
int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
|
||||
xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_inode *ip);
|
||||
xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_attr_sf_hdr *sfp,
|
||||
size_t size);
|
||||
void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
|
||||
|
||||
/*
|
||||
|
@ -6,14 +6,6 @@
|
||||
#ifndef __XFS_ATTR_SF_H__
|
||||
#define __XFS_ATTR_SF_H__
|
||||
|
||||
/*
|
||||
* Attribute storage when stored inside the inode.
|
||||
*
|
||||
* Small attribute lists are packed as tightly as possible so as
|
||||
* to fit into the literal area of the inode.
|
||||
*/
|
||||
typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t;
|
||||
|
||||
/*
|
||||
* We generate this then sort it, attr_list() must return things in hash-order.
|
||||
*/
|
||||
@ -41,11 +33,25 @@ static inline int xfs_attr_sf_entsize(struct xfs_attr_sf_entry *sfep)
|
||||
return struct_size(sfep, nameval, sfep->namelen + sfep->valuelen);
|
||||
}
|
||||
|
||||
/* next entry in struct */
|
||||
/* first entry in the SF attr fork */
|
||||
static inline struct xfs_attr_sf_entry *
|
||||
xfs_attr_sf_firstentry(struct xfs_attr_sf_hdr *hdr)
|
||||
{
|
||||
return (struct xfs_attr_sf_entry *)(hdr + 1);
|
||||
}
|
||||
|
||||
/* next entry after sfep */
|
||||
static inline struct xfs_attr_sf_entry *
|
||||
xfs_attr_sf_nextentry(struct xfs_attr_sf_entry *sfep)
|
||||
{
|
||||
return (void *)sfep + xfs_attr_sf_entsize(sfep);
|
||||
}
|
||||
|
||||
/* pointer to the space after the last entry, e.g. for adding a new one */
|
||||
static inline struct xfs_attr_sf_entry *
|
||||
xfs_attr_sf_endptr(struct xfs_attr_sf_hdr *sf)
|
||||
{
|
||||
return (void *)sf + be16_to_cpu(sf->totsize);
|
||||
}
|
||||
|
||||
#endif /* __XFS_ATTR_SF_H__ */
|
||||
|
@ -575,7 +575,7 @@ xfs_bmap_btree_to_extents(
|
||||
|
||||
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
|
||||
error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
|
||||
XFS_AG_RESV_NONE);
|
||||
XFS_AG_RESV_NONE, false);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@ -747,7 +747,7 @@ xfs_bmap_local_to_extents_empty(
|
||||
ASSERT(ifp->if_nextents == 0);
|
||||
|
||||
xfs_bmap_forkoff_reset(ip, whichfork);
|
||||
ifp->if_u1.if_root = NULL;
|
||||
ifp->if_data = NULL;
|
||||
ifp->if_height = 0;
|
||||
ifp->if_format = XFS_DINODE_FMT_EXTENTS;
|
||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||
@ -832,7 +832,7 @@ xfs_bmap_local_to_extents(
|
||||
xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
|
||||
flags |= XFS_ILOG_CORE;
|
||||
|
||||
ifp->if_u1.if_root = NULL;
|
||||
ifp->if_data = NULL;
|
||||
ifp->if_height = 0;
|
||||
|
||||
rec.br_startoff = 0;
|
||||
@ -3044,7 +3044,8 @@ xfs_bmap_extsize_align(
|
||||
|
||||
#define XFS_ALLOC_GAP_UNITS 4
|
||||
|
||||
void
|
||||
/* returns true if ap->blkno was modified */
|
||||
bool
|
||||
xfs_bmap_adjacent(
|
||||
struct xfs_bmalloca *ap) /* bmap alloc argument struct */
|
||||
{
|
||||
@ -3079,13 +3080,14 @@ xfs_bmap_adjacent(
|
||||
if (adjust &&
|
||||
ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
|
||||
ap->blkno += adjust;
|
||||
return true;
|
||||
}
|
||||
/*
|
||||
* If not at eof, then compare the two neighbor blocks.
|
||||
* Figure out whether either one gives us a good starting point,
|
||||
* and pick the better one.
|
||||
*/
|
||||
else if (!ap->eof) {
|
||||
if (!ap->eof) {
|
||||
xfs_fsblock_t gotbno; /* right side block number */
|
||||
xfs_fsblock_t gotdiff=0; /* right side difference */
|
||||
xfs_fsblock_t prevbno; /* left side block number */
|
||||
@ -3165,14 +3167,21 @@ xfs_bmap_adjacent(
|
||||
* If both valid, pick the better one, else the only good
|
||||
* one, else ap->blkno is already set (to 0 or the inode block).
|
||||
*/
|
||||
if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
|
||||
if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) {
|
||||
ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
|
||||
else if (prevbno != NULLFSBLOCK)
|
||||
return true;
|
||||
}
|
||||
if (prevbno != NULLFSBLOCK) {
|
||||
ap->blkno = prevbno;
|
||||
else if (gotbno != NULLFSBLOCK)
|
||||
return true;
|
||||
}
|
||||
if (gotbno != NULLFSBLOCK) {
|
||||
ap->blkno = gotbno;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#undef ISVALID
|
||||
return false;
|
||||
}
|
||||
|
||||
int
|
||||
@ -3263,11 +3272,14 @@ xfs_bmap_btalloc_select_lengths(
|
||||
}
|
||||
|
||||
/* Update all inode and quota accounting for the allocation we just did. */
|
||||
static void
|
||||
xfs_bmap_btalloc_accounting(
|
||||
struct xfs_bmalloca *ap,
|
||||
struct xfs_alloc_arg *args)
|
||||
void
|
||||
xfs_bmap_alloc_account(
|
||||
struct xfs_bmalloca *ap)
|
||||
{
|
||||
bool isrt = XFS_IS_REALTIME_INODE(ap->ip) &&
|
||||
(ap->flags & XFS_BMAPI_ATTRFORK);
|
||||
uint fld;
|
||||
|
||||
if (ap->flags & XFS_BMAPI_COWFORK) {
|
||||
/*
|
||||
* COW fork blocks are in-core only and thus are treated as
|
||||
@ -3279,7 +3291,7 @@ xfs_bmap_btalloc_accounting(
|
||||
* yet.
|
||||
*/
|
||||
if (ap->wasdel) {
|
||||
xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
|
||||
xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -3291,22 +3303,25 @@ xfs_bmap_btalloc_accounting(
|
||||
* This essentially transfers the transaction quota reservation
|
||||
* to that of a delalloc extent.
|
||||
*/
|
||||
ap->ip->i_delayed_blks += args->len;
|
||||
xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS,
|
||||
-(long)args->len);
|
||||
ap->ip->i_delayed_blks += ap->length;
|
||||
xfs_trans_mod_dquot_byino(ap->tp, ap->ip, isrt ?
|
||||
XFS_TRANS_DQ_RES_RTBLKS : XFS_TRANS_DQ_RES_BLKS,
|
||||
-(long)ap->length);
|
||||
return;
|
||||
}
|
||||
|
||||
/* data/attr fork only */
|
||||
ap->ip->i_nblocks += args->len;
|
||||
ap->ip->i_nblocks += ap->length;
|
||||
xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
|
||||
if (ap->wasdel) {
|
||||
ap->ip->i_delayed_blks -= args->len;
|
||||
xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
|
||||
ap->ip->i_delayed_blks -= ap->length;
|
||||
xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length);
|
||||
fld = isrt ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_DELBCOUNT;
|
||||
} else {
|
||||
fld = isrt ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
|
||||
}
|
||||
xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
|
||||
ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT,
|
||||
args->len);
|
||||
|
||||
xfs_trans_mod_dquot_byino(ap->tp, ap->ip, fld, ap->length);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -3380,7 +3395,7 @@ xfs_bmap_process_allocated_extent(
|
||||
ap->offset = orig_offset;
|
||||
else if (ap->offset + ap->length < orig_offset + orig_length)
|
||||
ap->offset = orig_offset + orig_length - ap->length;
|
||||
xfs_bmap_btalloc_accounting(ap, args);
|
||||
xfs_bmap_alloc_account(ap);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
@ -5010,7 +5025,6 @@ xfs_bmap_del_extent_real(
|
||||
xfs_fileoff_t del_endoff; /* first offset past del */
|
||||
int do_fx; /* free extent at end of routine */
|
||||
int error; /* error return value */
|
||||
int flags = 0;/* inode logging flags */
|
||||
struct xfs_bmbt_irec got; /* current extent entry */
|
||||
xfs_fileoff_t got_endoff; /* first offset past got */
|
||||
int i; /* temp state */
|
||||
@ -5023,6 +5037,8 @@ xfs_bmap_del_extent_real(
|
||||
uint32_t state = xfs_bmap_fork_to_state(whichfork);
|
||||
struct xfs_bmbt_irec old;
|
||||
|
||||
*logflagsp = 0;
|
||||
|
||||
mp = ip->i_mount;
|
||||
XFS_STATS_INC(mp, xs_del_exlist);
|
||||
|
||||
@ -5035,7 +5051,6 @@ xfs_bmap_del_extent_real(
|
||||
ASSERT(got_endoff >= del_endoff);
|
||||
ASSERT(!isnullstartblock(got.br_startblock));
|
||||
qfield = 0;
|
||||
error = 0;
|
||||
|
||||
/*
|
||||
* If it's the case where the directory code is running with no block
|
||||
@ -5051,13 +5066,13 @@ xfs_bmap_del_extent_real(
|
||||
del->br_startoff > got.br_startoff && del_endoff < got_endoff)
|
||||
return -ENOSPC;
|
||||
|
||||
flags = XFS_ILOG_CORE;
|
||||
*logflagsp = XFS_ILOG_CORE;
|
||||
if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
|
||||
if (!(bflags & XFS_BMAPI_REMAP)) {
|
||||
error = xfs_rtfree_blocks(tp, del->br_startblock,
|
||||
del->br_blockcount);
|
||||
if (error)
|
||||
goto done;
|
||||
return error;
|
||||
}
|
||||
|
||||
do_fx = 0;
|
||||
@ -5072,11 +5087,9 @@ xfs_bmap_del_extent_real(
|
||||
if (cur) {
|
||||
error = xfs_bmbt_lookup_eq(cur, &got, &i);
|
||||
if (error)
|
||||
goto done;
|
||||
if (XFS_IS_CORRUPT(mp, i != 1)) {
|
||||
error = -EFSCORRUPTED;
|
||||
goto done;
|
||||
}
|
||||
return error;
|
||||
if (XFS_IS_CORRUPT(mp, i != 1))
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
if (got.br_startoff == del->br_startoff)
|
||||
@ -5093,17 +5106,15 @@ xfs_bmap_del_extent_real(
|
||||
xfs_iext_prev(ifp, icur);
|
||||
ifp->if_nextents--;
|
||||
|
||||
flags |= XFS_ILOG_CORE;
|
||||
*logflagsp |= XFS_ILOG_CORE;
|
||||
if (!cur) {
|
||||
flags |= xfs_ilog_fext(whichfork);
|
||||
*logflagsp |= xfs_ilog_fext(whichfork);
|
||||
break;
|
||||
}
|
||||
if ((error = xfs_btree_delete(cur, &i)))
|
||||
goto done;
|
||||
if (XFS_IS_CORRUPT(mp, i != 1)) {
|
||||
error = -EFSCORRUPTED;
|
||||
goto done;
|
||||
}
|
||||
return error;
|
||||
if (XFS_IS_CORRUPT(mp, i != 1))
|
||||
return -EFSCORRUPTED;
|
||||
break;
|
||||
case BMAP_LEFT_FILLING:
|
||||
/*
|
||||
@ -5114,12 +5125,12 @@ xfs_bmap_del_extent_real(
|
||||
got.br_blockcount -= del->br_blockcount;
|
||||
xfs_iext_update_extent(ip, state, icur, &got);
|
||||
if (!cur) {
|
||||
flags |= xfs_ilog_fext(whichfork);
|
||||
*logflagsp |= xfs_ilog_fext(whichfork);
|
||||
break;
|
||||
}
|
||||
error = xfs_bmbt_update(cur, &got);
|
||||
if (error)
|
||||
goto done;
|
||||
return error;
|
||||
break;
|
||||
case BMAP_RIGHT_FILLING:
|
||||
/*
|
||||
@ -5128,12 +5139,12 @@ xfs_bmap_del_extent_real(
|
||||
got.br_blockcount -= del->br_blockcount;
|
||||
xfs_iext_update_extent(ip, state, icur, &got);
|
||||
if (!cur) {
|
||||
flags |= xfs_ilog_fext(whichfork);
|
||||
*logflagsp |= xfs_ilog_fext(whichfork);
|
||||
break;
|
||||
}
|
||||
error = xfs_bmbt_update(cur, &got);
|
||||
if (error)
|
||||
goto done;
|
||||
return error;
|
||||
break;
|
||||
case 0:
|
||||
/*
|
||||
@ -5150,18 +5161,18 @@ xfs_bmap_del_extent_real(
|
||||
new.br_state = got.br_state;
|
||||
new.br_startblock = del_endblock;
|
||||
|
||||
flags |= XFS_ILOG_CORE;
|
||||
*logflagsp |= XFS_ILOG_CORE;
|
||||
if (cur) {
|
||||
error = xfs_bmbt_update(cur, &got);
|
||||
if (error)
|
||||
goto done;
|
||||
return error;
|
||||
error = xfs_btree_increment(cur, 0, &i);
|
||||
if (error)
|
||||
goto done;
|
||||
return error;
|
||||
cur->bc_rec.b = new;
|
||||
error = xfs_btree_insert(cur, &i);
|
||||
if (error && error != -ENOSPC)
|
||||
goto done;
|
||||
return error;
|
||||
/*
|
||||
* If get no-space back from btree insert, it tried a
|
||||
* split, and we have a zero block reservation. Fix up
|
||||
@ -5174,33 +5185,28 @@ xfs_bmap_del_extent_real(
|
||||
*/
|
||||
error = xfs_bmbt_lookup_eq(cur, &got, &i);
|
||||
if (error)
|
||||
goto done;
|
||||
if (XFS_IS_CORRUPT(mp, i != 1)) {
|
||||
error = -EFSCORRUPTED;
|
||||
goto done;
|
||||
}
|
||||
return error;
|
||||
if (XFS_IS_CORRUPT(mp, i != 1))
|
||||
return -EFSCORRUPTED;
|
||||
/*
|
||||
* Update the btree record back
|
||||
* to the original value.
|
||||
*/
|
||||
error = xfs_bmbt_update(cur, &old);
|
||||
if (error)
|
||||
goto done;
|
||||
return error;
|
||||
/*
|
||||
* Reset the extent record back
|
||||
* to the original value.
|
||||
*/
|
||||
xfs_iext_update_extent(ip, state, icur, &old);
|
||||
flags = 0;
|
||||
error = -ENOSPC;
|
||||
goto done;
|
||||
}
|
||||
if (XFS_IS_CORRUPT(mp, i != 1)) {
|
||||
error = -EFSCORRUPTED;
|
||||
goto done;
|
||||
*logflagsp = 0;
|
||||
return -ENOSPC;
|
||||
}
|
||||
if (XFS_IS_CORRUPT(mp, i != 1))
|
||||
return -EFSCORRUPTED;
|
||||
} else
|
||||
flags |= xfs_ilog_fext(whichfork);
|
||||
*logflagsp |= xfs_ilog_fext(whichfork);
|
||||
|
||||
ifp->if_nextents++;
|
||||
xfs_iext_next(ifp, icur);
|
||||
@ -5218,13 +5224,13 @@ xfs_bmap_del_extent_real(
|
||||
if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
|
||||
xfs_refcount_decrease_extent(tp, del);
|
||||
} else {
|
||||
error = __xfs_free_extent_later(tp, del->br_startblock,
|
||||
error = xfs_free_extent_later(tp, del->br_startblock,
|
||||
del->br_blockcount, NULL,
|
||||
XFS_AG_RESV_NONE,
|
||||
((bflags & XFS_BMAPI_NODISCARD) ||
|
||||
del->br_state == XFS_EXT_UNWRITTEN));
|
||||
if (error)
|
||||
goto done;
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
@ -5239,9 +5245,7 @@ xfs_bmap_del_extent_real(
|
||||
if (qfield && !(bflags & XFS_BMAPI_REMAP))
|
||||
xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
|
||||
|
||||
done:
|
||||
*logflagsp = flags;
|
||||
return error;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -5250,7 +5254,7 @@ xfs_bmap_del_extent_real(
|
||||
* that value. If not all extents in the block range can be removed then
|
||||
* *done is set.
|
||||
*/
|
||||
int /* error */
|
||||
static int
|
||||
__xfs_bunmapi(
|
||||
struct xfs_trans *tp, /* transaction pointer */
|
||||
struct xfs_inode *ip, /* incore inode */
|
||||
@ -6102,7 +6106,7 @@ __xfs_bmap_add(
|
||||
bi->bi_bmap = *bmap;
|
||||
|
||||
xfs_bmap_update_get_group(tp->t_mountp, bi);
|
||||
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
|
||||
xfs_defer_add(tp, &bi->bi_list, &xfs_bmap_update_defer_type);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -6179,19 +6183,18 @@ xfs_bmap_finish_one(
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Check that an inode's extent does not have invalid flags or bad ranges. */
|
||||
/* Check that an extent does not have invalid flags or bad ranges. */
|
||||
xfs_failaddr_t
|
||||
xfs_bmap_validate_extent(
|
||||
struct xfs_inode *ip,
|
||||
xfs_bmap_validate_extent_raw(
|
||||
struct xfs_mount *mp,
|
||||
bool rtfile,
|
||||
int whichfork,
|
||||
struct xfs_bmbt_irec *irec)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
|
||||
if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
|
||||
return __this_address;
|
||||
|
||||
if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) {
|
||||
if (rtfile && whichfork == XFS_DATA_FORK) {
|
||||
if (!xfs_verify_rtbext(mp, irec->br_startblock,
|
||||
irec->br_blockcount))
|
||||
return __this_address;
|
||||
@ -6221,3 +6224,53 @@ xfs_bmap_intent_destroy_cache(void)
|
||||
kmem_cache_destroy(xfs_bmap_intent_cache);
|
||||
xfs_bmap_intent_cache = NULL;
|
||||
}
|
||||
|
||||
/* Check that an inode's extent does not have invalid flags or bad ranges. */
|
||||
xfs_failaddr_t
|
||||
xfs_bmap_validate_extent(
|
||||
struct xfs_inode *ip,
|
||||
int whichfork,
|
||||
struct xfs_bmbt_irec *irec)
|
||||
{
|
||||
return xfs_bmap_validate_extent_raw(ip->i_mount,
|
||||
XFS_IS_REALTIME_INODE(ip), whichfork, irec);
|
||||
}
|
||||
|
||||
/*
|
||||
* Used in xfs_itruncate_extents(). This is the maximum number of extents
|
||||
* freed from a file in a single transaction.
|
||||
*/
|
||||
#define XFS_ITRUNC_MAX_EXTENTS 2
|
||||
|
||||
/*
|
||||
* Unmap every extent in part of an inode's fork. We don't do any higher level
|
||||
* invalidation work at all.
|
||||
*/
|
||||
int
|
||||
xfs_bunmapi_range(
|
||||
struct xfs_trans **tpp,
|
||||
struct xfs_inode *ip,
|
||||
uint32_t flags,
|
||||
xfs_fileoff_t startoff,
|
||||
xfs_fileoff_t endoff)
|
||||
{
|
||||
xfs_filblks_t unmap_len = endoff - startoff + 1;
|
||||
int error = 0;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
||||
|
||||
while (unmap_len > 0) {
|
||||
ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER);
|
||||
error = __xfs_bunmapi(*tpp, ip, startoff, &unmap_len, flags,
|
||||
XFS_ITRUNC_MAX_EXTENTS);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
/* free the just unmapped extents */
|
||||
error = xfs_defer_finish(tpp);
|
||||
if (error)
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
|
@ -116,6 +116,8 @@ static inline int xfs_bmapi_whichfork(uint32_t bmapi_flags)
|
||||
return XFS_DATA_FORK;
|
||||
}
|
||||
|
||||
void xfs_bmap_alloc_account(struct xfs_bmalloca *ap);
|
||||
|
||||
/*
|
||||
* Special values for xfs_bmbt_irec_t br_startblock field.
|
||||
*/
|
||||
@ -190,9 +192,6 @@ int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
|
||||
int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
|
||||
xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags,
|
||||
xfs_extlen_t total, struct xfs_bmbt_irec *mval, int *nmap);
|
||||
int __xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
|
||||
xfs_fileoff_t bno, xfs_filblks_t *rlen, uint32_t flags,
|
||||
xfs_extnum_t nexts);
|
||||
int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
|
||||
xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags,
|
||||
xfs_extnum_t nexts, int *done);
|
||||
@ -263,6 +262,8 @@ static inline uint32_t xfs_bmap_fork_to_state(int whichfork)
|
||||
}
|
||||
}
|
||||
|
||||
xfs_failaddr_t xfs_bmap_validate_extent_raw(struct xfs_mount *mp, bool rtfile,
|
||||
int whichfork, struct xfs_bmbt_irec *irec);
|
||||
xfs_failaddr_t xfs_bmap_validate_extent(struct xfs_inode *ip, int whichfork,
|
||||
struct xfs_bmbt_irec *irec);
|
||||
int xfs_bmap_complain_bad_rec(struct xfs_inode *ip, int whichfork,
|
||||
@ -271,6 +272,8 @@ int xfs_bmap_complain_bad_rec(struct xfs_inode *ip, int whichfork,
|
||||
int xfs_bmapi_remap(struct xfs_trans *tp, struct xfs_inode *ip,
|
||||
xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock,
|
||||
uint32_t flags);
|
||||
int xfs_bunmapi_range(struct xfs_trans **tpp, struct xfs_inode *ip,
|
||||
uint32_t flags, xfs_fileoff_t startoff, xfs_fileoff_t endoff);
|
||||
|
||||
extern struct kmem_cache *xfs_bmap_intent_cache;
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_btree_staging.h"
|
||||
#include "xfs_bmap_btree.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_error.h"
|
||||
@ -272,7 +273,7 @@ xfs_bmbt_free_block(
|
||||
|
||||
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
|
||||
error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo,
|
||||
XFS_AG_RESV_NONE);
|
||||
XFS_AG_RESV_NONE, false);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@ -288,10 +289,7 @@ xfs_bmbt_get_minrecs(
|
||||
int level)
|
||||
{
|
||||
if (level == cur->bc_nlevels - 1) {
|
||||
struct xfs_ifork *ifp;
|
||||
|
||||
ifp = xfs_ifork_ptr(cur->bc_ino.ip,
|
||||
cur->bc_ino.whichfork);
|
||||
struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
|
||||
|
||||
return xfs_bmbt_maxrecs(cur->bc_mp,
|
||||
ifp->if_broot_bytes, level == 0) / 2;
|
||||
@ -306,10 +304,7 @@ xfs_bmbt_get_maxrecs(
|
||||
int level)
|
||||
{
|
||||
if (level == cur->bc_nlevels - 1) {
|
||||
struct xfs_ifork *ifp;
|
||||
|
||||
ifp = xfs_ifork_ptr(cur->bc_ino.ip,
|
||||
cur->bc_ino.whichfork);
|
||||
struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
|
||||
|
||||
return xfs_bmbt_maxrecs(cur->bc_mp,
|
||||
ifp->if_broot_bytes, level == 0);
|
||||
@ -543,23 +538,19 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
|
||||
.keys_contiguous = xfs_bmbt_keys_contiguous,
|
||||
};
|
||||
|
||||
/*
|
||||
* Allocate a new bmap btree cursor.
|
||||
*/
|
||||
struct xfs_btree_cur * /* new bmap btree cursor */
|
||||
xfs_bmbt_init_cursor(
|
||||
struct xfs_mount *mp, /* file system mount point */
|
||||
struct xfs_trans *tp, /* transaction pointer */
|
||||
struct xfs_inode *ip, /* inode owning the btree */
|
||||
int whichfork) /* data or attr fork */
|
||||
static struct xfs_btree_cur *
|
||||
xfs_bmbt_init_common(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_inode *ip,
|
||||
int whichfork)
|
||||
{
|
||||
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
|
||||
struct xfs_btree_cur *cur;
|
||||
|
||||
ASSERT(whichfork != XFS_COW_FORK);
|
||||
|
||||
cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_BMAP,
|
||||
mp->m_bm_maxlevels[whichfork], xfs_bmbt_cur_cache);
|
||||
cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
|
||||
cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2);
|
||||
|
||||
cur->bc_ops = &xfs_bmbt_ops;
|
||||
@ -567,10 +558,30 @@ xfs_bmbt_init_cursor(
|
||||
if (xfs_has_crc(mp))
|
||||
cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
|
||||
|
||||
cur->bc_ino.forksize = xfs_inode_fork_size(ip, whichfork);
|
||||
cur->bc_ino.ip = ip;
|
||||
cur->bc_ino.allocated = 0;
|
||||
cur->bc_ino.flags = 0;
|
||||
|
||||
return cur;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a new bmap btree cursor.
|
||||
*/
|
||||
struct xfs_btree_cur *
|
||||
xfs_bmbt_init_cursor(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_inode *ip,
|
||||
int whichfork)
|
||||
{
|
||||
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
|
||||
struct xfs_btree_cur *cur;
|
||||
|
||||
cur = xfs_bmbt_init_common(mp, tp, ip, whichfork);
|
||||
|
||||
cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
|
||||
cur->bc_ino.forksize = xfs_inode_fork_size(ip, whichfork);
|
||||
cur->bc_ino.whichfork = whichfork;
|
||||
|
||||
return cur;
|
||||
@ -587,6 +598,76 @@ xfs_bmbt_block_maxrecs(
|
||||
return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a new bmap btree cursor for reloading an inode block mapping data
|
||||
* structure. Note that callers can use the staged cursor to reload extents
|
||||
* format inode forks if they rebuild the iext tree and commit the staged
|
||||
* cursor immediately.
|
||||
*/
|
||||
struct xfs_btree_cur *
|
||||
xfs_bmbt_stage_cursor(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_inode *ip,
|
||||
struct xbtree_ifakeroot *ifake)
|
||||
{
|
||||
struct xfs_btree_cur *cur;
|
||||
struct xfs_btree_ops *ops;
|
||||
|
||||
/* data fork always has larger maxheight */
|
||||
cur = xfs_bmbt_init_common(mp, NULL, ip, XFS_DATA_FORK);
|
||||
cur->bc_nlevels = ifake->if_levels;
|
||||
cur->bc_ino.forksize = ifake->if_fork_size;
|
||||
|
||||
/* Don't let anyone think we're attached to the real fork yet. */
|
||||
cur->bc_ino.whichfork = -1;
|
||||
xfs_btree_stage_ifakeroot(cur, ifake, &ops);
|
||||
ops->update_cursor = NULL;
|
||||
return cur;
|
||||
}
|
||||
|
||||
/*
|
||||
* Swap in the new inode fork root. Once we pass this point the newly rebuilt
|
||||
* mappings are in place and we have to kill off any old btree blocks.
|
||||
*/
|
||||
void
|
||||
xfs_bmbt_commit_staged_btree(
|
||||
struct xfs_btree_cur *cur,
|
||||
struct xfs_trans *tp,
|
||||
int whichfork)
|
||||
{
|
||||
struct xbtree_ifakeroot *ifake = cur->bc_ino.ifake;
|
||||
struct xfs_ifork *ifp;
|
||||
static const short brootflag[2] = {XFS_ILOG_DBROOT, XFS_ILOG_ABROOT};
|
||||
static const short extflag[2] = {XFS_ILOG_DEXT, XFS_ILOG_AEXT};
|
||||
int flags = XFS_ILOG_CORE;
|
||||
|
||||
ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
|
||||
ASSERT(whichfork != XFS_COW_FORK);
|
||||
|
||||
/*
|
||||
* Free any resources hanging off the real fork, then shallow-copy the
|
||||
* staging fork's contents into the real fork to transfer everything
|
||||
* we just built.
|
||||
*/
|
||||
ifp = xfs_ifork_ptr(cur->bc_ino.ip, whichfork);
|
||||
xfs_idestroy_fork(ifp);
|
||||
memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork));
|
||||
|
||||
switch (ifp->if_format) {
|
||||
case XFS_DINODE_FMT_EXTENTS:
|
||||
flags |= extflag[whichfork];
|
||||
break;
|
||||
case XFS_DINODE_FMT_BTREE:
|
||||
flags |= brootflag[whichfork];
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
xfs_trans_log_inode(tp, cur->bc_ino.ip, flags);
|
||||
xfs_btree_commit_ifakeroot(cur, tp, whichfork, &xfs_bmbt_ops);
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate number of records in a bmap btree block.
|
||||
*/
|
||||
|
@ -11,6 +11,7 @@ struct xfs_btree_block;
|
||||
struct xfs_mount;
|
||||
struct xfs_inode;
|
||||
struct xfs_trans;
|
||||
struct xbtree_ifakeroot;
|
||||
|
||||
/*
|
||||
* Btree block header size depends on a superblock flag.
|
||||
@ -106,6 +107,10 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
|
||||
|
||||
extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
|
||||
struct xfs_trans *, struct xfs_inode *, int);
|
||||
struct xfs_btree_cur *xfs_bmbt_stage_cursor(struct xfs_mount *mp,
|
||||
struct xfs_inode *ip, struct xbtree_ifakeroot *ifake);
|
||||
void xfs_bmbt_commit_staged_btree(struct xfs_btree_cur *cur,
|
||||
struct xfs_trans *tp, int whichfork);
|
||||
|
||||
extern unsigned long long xfs_bmbt_calc_size(struct xfs_mount *mp,
|
||||
unsigned long long len);
|
||||
|
@ -1330,7 +1330,7 @@ xfs_btree_get_buf_block(
|
||||
* Read in the buffer at the given ptr and return the buffer and
|
||||
* the block pointer within the buffer.
|
||||
*/
|
||||
STATIC int
|
||||
int
|
||||
xfs_btree_read_buf_block(
|
||||
struct xfs_btree_cur *cur,
|
||||
const union xfs_btree_ptr *ptr,
|
||||
@ -5212,3 +5212,29 @@ xfs_btree_destroy_cur_caches(void)
|
||||
xfs_rmapbt_destroy_cur_cache();
|
||||
xfs_refcountbt_destroy_cur_cache();
|
||||
}
|
||||
|
||||
/* Move the btree cursor before the first record. */
|
||||
int
|
||||
xfs_btree_goto_left_edge(
|
||||
struct xfs_btree_cur *cur)
|
||||
{
|
||||
int stat = 0;
|
||||
int error;
|
||||
|
||||
memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
|
||||
error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
|
||||
if (error)
|
||||
return error;
|
||||
if (!stat)
|
||||
return 0;
|
||||
|
||||
error = xfs_btree_decrement(cur, 0, &stat);
|
||||
if (error)
|
||||
return error;
|
||||
if (stat != 0) {
|
||||
ASSERT(0);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -700,6 +700,9 @@ void xfs_btree_set_ptr_null(struct xfs_btree_cur *cur,
|
||||
int xfs_btree_get_buf_block(struct xfs_btree_cur *cur,
|
||||
const union xfs_btree_ptr *ptr, struct xfs_btree_block **block,
|
||||
struct xfs_buf **bpp);
|
||||
int xfs_btree_read_buf_block(struct xfs_btree_cur *cur,
|
||||
const union xfs_btree_ptr *ptr, int flags,
|
||||
struct xfs_btree_block **block, struct xfs_buf **bpp);
|
||||
void xfs_btree_set_sibling(struct xfs_btree_cur *cur,
|
||||
struct xfs_btree_block *block, const union xfs_btree_ptr *ptr,
|
||||
int lr);
|
||||
@ -735,4 +738,6 @@ xfs_btree_alloc_cursor(
|
||||
int __init xfs_btree_init_cur_caches(void);
|
||||
void xfs_btree_destroy_cur_caches(void);
|
||||
|
||||
int xfs_btree_goto_left_edge(struct xfs_btree_cur *cur);
|
||||
|
||||
#endif /* __XFS_BTREE_H__ */
|
||||
|
@ -333,20 +333,41 @@ xfs_btree_commit_ifakeroot(
|
||||
/*
|
||||
* Put a btree block that we're loading onto the ordered list and release it.
|
||||
* The btree blocks will be written to disk when bulk loading is finished.
|
||||
* If we reach the dirty buffer threshold, flush them to disk before
|
||||
* continuing.
|
||||
*/
|
||||
static void
|
||||
static int
|
||||
xfs_btree_bload_drop_buf(
|
||||
struct list_head *buffers_list,
|
||||
struct xfs_buf **bpp)
|
||||
struct xfs_btree_bload *bbl,
|
||||
struct list_head *buffers_list,
|
||||
struct xfs_buf **bpp)
|
||||
{
|
||||
if (*bpp == NULL)
|
||||
return;
|
||||
struct xfs_buf *bp = *bpp;
|
||||
int error;
|
||||
|
||||
if (!xfs_buf_delwri_queue(*bpp, buffers_list))
|
||||
ASSERT(0);
|
||||
if (!bp)
|
||||
return 0;
|
||||
|
||||
xfs_buf_relse(*bpp);
|
||||
/*
|
||||
* Mark this buffer XBF_DONE (i.e. uptodate) so that a subsequent
|
||||
* xfs_buf_read will not pointlessly reread the contents from the disk.
|
||||
*/
|
||||
bp->b_flags |= XBF_DONE;
|
||||
|
||||
xfs_buf_delwri_queue_here(bp, buffers_list);
|
||||
xfs_buf_relse(bp);
|
||||
*bpp = NULL;
|
||||
bbl->nr_dirty++;
|
||||
|
||||
if (!bbl->max_dirty || bbl->nr_dirty < bbl->max_dirty)
|
||||
return 0;
|
||||
|
||||
error = xfs_buf_delwri_submit(buffers_list);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
bbl->nr_dirty = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -384,7 +405,7 @@ xfs_btree_bload_prep_block(
|
||||
ASSERT(*bpp == NULL);
|
||||
|
||||
/* Allocate a new incore btree root block. */
|
||||
new_size = bbl->iroot_size(cur, nr_this_block, priv);
|
||||
new_size = bbl->iroot_size(cur, level, nr_this_block, priv);
|
||||
ifp->if_broot = kmem_zalloc(new_size, 0);
|
||||
ifp->if_broot_bytes = (int)new_size;
|
||||
|
||||
@ -418,7 +439,10 @@ xfs_btree_bload_prep_block(
|
||||
*/
|
||||
if (*blockp)
|
||||
xfs_btree_set_sibling(cur, *blockp, &new_ptr, XFS_BB_RIGHTSIB);
|
||||
xfs_btree_bload_drop_buf(buffers_list, bpp);
|
||||
|
||||
ret = xfs_btree_bload_drop_buf(bbl, buffers_list, bpp);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Initialize the new btree block. */
|
||||
xfs_btree_init_block_cur(cur, new_bp, level, nr_this_block);
|
||||
@ -436,22 +460,19 @@ STATIC int
|
||||
xfs_btree_bload_leaf(
|
||||
struct xfs_btree_cur *cur,
|
||||
unsigned int recs_this_block,
|
||||
xfs_btree_bload_get_record_fn get_record,
|
||||
xfs_btree_bload_get_records_fn get_records,
|
||||
struct xfs_btree_block *block,
|
||||
void *priv)
|
||||
{
|
||||
unsigned int j;
|
||||
unsigned int j = 1;
|
||||
int ret;
|
||||
|
||||
/* Fill the leaf block with records. */
|
||||
for (j = 1; j <= recs_this_block; j++) {
|
||||
union xfs_btree_rec *block_rec;
|
||||
|
||||
ret = get_record(cur, priv);
|
||||
if (ret)
|
||||
while (j <= recs_this_block) {
|
||||
ret = get_records(cur, j, block, recs_this_block - j + 1, priv);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
block_rec = xfs_btree_rec_addr(cur, j, block);
|
||||
cur->bc_ops->init_rec_from_cur(cur, block_rec);
|
||||
j += ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -485,7 +506,12 @@ xfs_btree_bload_node(
|
||||
|
||||
ASSERT(!xfs_btree_ptr_is_null(cur, child_ptr));
|
||||
|
||||
ret = xfs_btree_get_buf_block(cur, child_ptr, &child_block,
|
||||
/*
|
||||
* Read the lower-level block in case the buffer for it has
|
||||
* been reclaimed. LRU refs will be set on the block, which is
|
||||
* desirable if the new btree commits.
|
||||
*/
|
||||
ret = xfs_btree_read_buf_block(cur, child_ptr, 0, &child_block,
|
||||
&child_bp);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -570,7 +596,14 @@ xfs_btree_bload_level_geometry(
|
||||
unsigned int desired_npb;
|
||||
unsigned int maxnr;
|
||||
|
||||
maxnr = cur->bc_ops->get_maxrecs(cur, level);
|
||||
/*
|
||||
* Compute the absolute maximum number of records that we can store in
|
||||
* the ondisk block or inode root.
|
||||
*/
|
||||
if (cur->bc_ops->get_dmaxrecs)
|
||||
maxnr = cur->bc_ops->get_dmaxrecs(cur, level);
|
||||
else
|
||||
maxnr = cur->bc_ops->get_maxrecs(cur, level);
|
||||
|
||||
/*
|
||||
* Compute the number of blocks we need to fill each block with the
|
||||
@ -764,6 +797,7 @@ xfs_btree_bload(
|
||||
cur->bc_nlevels = bbl->btree_height;
|
||||
xfs_btree_set_ptr_null(cur, &child_ptr);
|
||||
xfs_btree_set_ptr_null(cur, &ptr);
|
||||
bbl->nr_dirty = 0;
|
||||
|
||||
xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level,
|
||||
&avg_per_block, &blocks, &blocks_with_extra);
|
||||
@ -789,7 +823,7 @@ xfs_btree_bload(
|
||||
trace_xfs_btree_bload_block(cur, level, i, blocks, &ptr,
|
||||
nr_this_block);
|
||||
|
||||
ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_record,
|
||||
ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_records,
|
||||
block, priv);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -802,7 +836,10 @@ xfs_btree_bload(
|
||||
xfs_btree_copy_ptrs(cur, &child_ptr, &ptr, 1);
|
||||
}
|
||||
total_blocks += blocks;
|
||||
xfs_btree_bload_drop_buf(&buffers_list, &bp);
|
||||
|
||||
ret = xfs_btree_bload_drop_buf(bbl, &buffers_list, &bp);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* Populate the internal btree nodes. */
|
||||
for (level = 1; level < cur->bc_nlevels; level++) {
|
||||
@ -844,7 +881,11 @@ xfs_btree_bload(
|
||||
xfs_btree_copy_ptrs(cur, &first_ptr, &ptr, 1);
|
||||
}
|
||||
total_blocks += blocks;
|
||||
xfs_btree_bload_drop_buf(&buffers_list, &bp);
|
||||
|
||||
ret = xfs_btree_bload_drop_buf(bbl, &buffers_list, &bp);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
xfs_btree_copy_ptrs(cur, &child_ptr, &first_ptr, 1);
|
||||
}
|
||||
|
||||
|
@ -37,12 +37,6 @@ struct xbtree_ifakeroot {
|
||||
|
||||
/* Number of bytes available for this fork in the inode. */
|
||||
unsigned int if_fork_size;
|
||||
|
||||
/* Fork format. */
|
||||
unsigned int if_format;
|
||||
|
||||
/* Number of records. */
|
||||
unsigned int if_extents;
|
||||
};
|
||||
|
||||
/* Cursor interactions with fake roots for inode-rooted btrees. */
|
||||
@ -53,19 +47,24 @@ void xfs_btree_commit_ifakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp,
|
||||
int whichfork, const struct xfs_btree_ops *ops);
|
||||
|
||||
/* Bulk loading of staged btrees. */
|
||||
typedef int (*xfs_btree_bload_get_record_fn)(struct xfs_btree_cur *cur, void *priv);
|
||||
typedef int (*xfs_btree_bload_get_records_fn)(struct xfs_btree_cur *cur,
|
||||
unsigned int idx, struct xfs_btree_block *block,
|
||||
unsigned int nr_wanted, void *priv);
|
||||
typedef int (*xfs_btree_bload_claim_block_fn)(struct xfs_btree_cur *cur,
|
||||
union xfs_btree_ptr *ptr, void *priv);
|
||||
typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur,
|
||||
unsigned int nr_this_level, void *priv);
|
||||
unsigned int level, unsigned int nr_this_level, void *priv);
|
||||
|
||||
struct xfs_btree_bload {
|
||||
/*
|
||||
* This function will be called nr_records times to load records into
|
||||
* the btree. The function does this by setting the cursor's bc_rec
|
||||
* field in in-core format. Records must be returned in sort order.
|
||||
* This function will be called to load @nr_wanted records into the
|
||||
* btree. The implementation does this by setting the cursor's bc_rec
|
||||
* field in in-core format and using init_rec_from_cur to set the
|
||||
* records in the btree block. Records must be returned in sort order.
|
||||
* The function must return the number of records loaded or the usual
|
||||
* negative errno.
|
||||
*/
|
||||
xfs_btree_bload_get_record_fn get_record;
|
||||
xfs_btree_bload_get_records_fn get_records;
|
||||
|
||||
/*
|
||||
* This function will be called nr_blocks times to obtain a pointer
|
||||
@ -113,6 +112,16 @@ struct xfs_btree_bload {
|
||||
* height of the new btree.
|
||||
*/
|
||||
unsigned int btree_height;
|
||||
|
||||
/*
|
||||
* Flush the new btree block buffer list to disk after this many blocks
|
||||
* have been formatted. Zero prohibits writing any buffers until all
|
||||
* blocks have been formatted.
|
||||
*/
|
||||
uint16_t max_dirty;
|
||||
|
||||
/* Number of dirty buffers. */
|
||||
uint16_t nr_dirty;
|
||||
};
|
||||
|
||||
int xfs_btree_bload_compute_geometry(struct xfs_btree_cur *cur,
|
||||
|
@ -421,6 +421,25 @@ xfs_da3_node_read_mapped(
|
||||
return xfs_da3_node_set_type(tp, *bpp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy src directory/attr leaf/node buffer to the dst.
|
||||
* For v5 file systems make sure the right blkno is stamped in.
|
||||
*/
|
||||
void
|
||||
xfs_da_buf_copy(
|
||||
struct xfs_buf *dst,
|
||||
struct xfs_buf *src,
|
||||
size_t size)
|
||||
{
|
||||
struct xfs_da3_blkinfo *da3 = dst->b_addr;
|
||||
|
||||
memcpy(dst->b_addr, src->b_addr, size);
|
||||
dst->b_ops = src->b_ops;
|
||||
xfs_trans_buf_copy_type(dst, src);
|
||||
if (xfs_has_crc(dst->b_mount))
|
||||
da3->blkno = cpu_to_be64(xfs_buf_daddr(dst));
|
||||
}
|
||||
|
||||
/*========================================================================
|
||||
* Routines used for growing the Btree.
|
||||
*========================================================================*/
|
||||
@ -690,12 +709,6 @@ xfs_da3_root_split(
|
||||
btree = icnodehdr.btree;
|
||||
size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
|
||||
level = icnodehdr.level;
|
||||
|
||||
/*
|
||||
* we are about to copy oldroot to bp, so set up the type
|
||||
* of bp while we know exactly what it will be.
|
||||
*/
|
||||
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
|
||||
} else {
|
||||
struct xfs_dir3_icleaf_hdr leafhdr;
|
||||
|
||||
@ -707,31 +720,17 @@ xfs_da3_root_split(
|
||||
size = (int)((char *)&leafhdr.ents[leafhdr.count] -
|
||||
(char *)leaf);
|
||||
level = 0;
|
||||
|
||||
/*
|
||||
* we are about to copy oldroot to bp, so set up the type
|
||||
* of bp while we know exactly what it will be.
|
||||
*/
|
||||
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
|
||||
}
|
||||
|
||||
/*
|
||||
* we can copy most of the information in the node from one block to
|
||||
* another, but for CRC enabled headers we have to make sure that the
|
||||
* block specific identifiers are kept intact. We update the buffer
|
||||
* directly for this.
|
||||
* Copy old root to new buffer and log it.
|
||||
*/
|
||||
memcpy(node, oldroot, size);
|
||||
if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
|
||||
oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
|
||||
struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node;
|
||||
|
||||
node3->hdr.info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
|
||||
}
|
||||
xfs_da_buf_copy(bp, blk1->bp, size);
|
||||
xfs_trans_log_buf(tp, bp, 0, size - 1);
|
||||
|
||||
bp->b_ops = blk1->bp->b_ops;
|
||||
xfs_trans_buf_copy_type(bp, blk1->bp);
|
||||
/*
|
||||
* Update blk1 to point to new buffer.
|
||||
*/
|
||||
blk1->bp = bp;
|
||||
blk1->blkno = blkno;
|
||||
|
||||
@ -1220,21 +1219,14 @@ xfs_da3_root_join(
|
||||
xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
|
||||
|
||||
/*
|
||||
* This could be copying a leaf back into the root block in the case of
|
||||
* there only being a single leaf block left in the tree. Hence we have
|
||||
* to update the b_ops pointer as well to match the buffer type change
|
||||
* that could occur. For dir3 blocks we also need to update the block
|
||||
* number in the buffer header.
|
||||
* Copy child to root buffer and log it.
|
||||
*/
|
||||
memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize);
|
||||
root_blk->bp->b_ops = bp->b_ops;
|
||||
xfs_trans_buf_copy_type(root_blk->bp, bp);
|
||||
if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) {
|
||||
struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr;
|
||||
da3->blkno = cpu_to_be64(xfs_buf_daddr(root_blk->bp));
|
||||
}
|
||||
xfs_da_buf_copy(root_blk->bp, bp, args->geo->blksize);
|
||||
xfs_trans_log_buf(args->trans, root_blk->bp, 0,
|
||||
args->geo->blksize - 1);
|
||||
/*
|
||||
* Now we can drop the child buffer.
|
||||
*/
|
||||
error = xfs_da_shrink_inode(args, child, bp);
|
||||
return error;
|
||||
}
|
||||
@ -2317,9 +2309,10 @@ xfs_da3_swap_lastblock(
|
||||
/*
|
||||
* Copy the last block into the dead buffer and log it.
|
||||
*/
|
||||
memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
|
||||
xfs_da_buf_copy(dead_buf, last_buf, args->geo->blksize);
|
||||
xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
|
||||
dead_info = dead_buf->b_addr;
|
||||
|
||||
/*
|
||||
* Get values from the moved block.
|
||||
*/
|
||||
|
@ -219,6 +219,8 @@ int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
|
||||
const struct xfs_buf_ops *ops);
|
||||
int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
|
||||
struct xfs_buf *dead_buf);
|
||||
void xfs_da_buf_copy(struct xfs_buf *dst, struct xfs_buf *src,
|
||||
size_t size);
|
||||
|
||||
uint xfs_da_hashname(const uint8_t *name_string, int name_length);
|
||||
enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
|
||||
|
@ -578,20 +578,25 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
|
||||
#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */
|
||||
|
||||
/*
|
||||
* Entries are packed toward the top as tight as possible.
|
||||
* Attribute storage when stored inside the inode.
|
||||
*
|
||||
* Small attribute lists are packed as tightly as possible so as to fit into the
|
||||
* literal area of the inode.
|
||||
*
|
||||
* These "shortform" attribute forks consist of a single xfs_attr_sf_hdr header
|
||||
* followed by zero or more xfs_attr_sf_entry structures.
|
||||
*/
|
||||
struct xfs_attr_shortform {
|
||||
struct xfs_attr_sf_hdr { /* constant-structure header block */
|
||||
__be16 totsize; /* total bytes in shortform list */
|
||||
__u8 count; /* count of active entries */
|
||||
__u8 padding;
|
||||
} hdr;
|
||||
struct xfs_attr_sf_entry {
|
||||
uint8_t namelen; /* actual length of name (no NULL) */
|
||||
uint8_t valuelen; /* actual length of value (no NULL) */
|
||||
uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
|
||||
uint8_t nameval[]; /* name & value bytes concatenated */
|
||||
} list[]; /* variable sized array */
|
||||
struct xfs_attr_sf_hdr { /* constant-structure header block */
|
||||
__be16 totsize; /* total bytes in shortform list */
|
||||
__u8 count; /* count of active entries */
|
||||
__u8 padding;
|
||||
};
|
||||
|
||||
struct xfs_attr_sf_entry {
|
||||
__u8 namelen; /* actual length of name (no NULL) */
|
||||
__u8 valuelen; /* actual length of value (no NULL) */
|
||||
__u8 flags; /* flags bits (XFS_ATTR_*) */
|
||||
__u8 nameval[]; /* name & value bytes concatenated */
|
||||
};
|
||||
|
||||
typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "xfs_da_format.h"
|
||||
#include "xfs_da_btree.h"
|
||||
#include "xfs_attr.h"
|
||||
#include "xfs_trans_priv.h"
|
||||
|
||||
static struct kmem_cache *xfs_defer_pending_cache;
|
||||
|
||||
@ -181,16 +182,89 @@ static struct kmem_cache *xfs_defer_pending_cache;
|
||||
* Note that the continuation requested between t2 and t3 is likely to
|
||||
* reoccur.
|
||||
*/
|
||||
STATIC struct xfs_log_item *
|
||||
xfs_defer_barrier_create_intent(
|
||||
struct xfs_trans *tp,
|
||||
struct list_head *items,
|
||||
unsigned int count,
|
||||
bool sort)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const struct xfs_defer_op_type *defer_op_types[] = {
|
||||
[XFS_DEFER_OPS_TYPE_BMAP] = &xfs_bmap_update_defer_type,
|
||||
[XFS_DEFER_OPS_TYPE_REFCOUNT] = &xfs_refcount_update_defer_type,
|
||||
[XFS_DEFER_OPS_TYPE_RMAP] = &xfs_rmap_update_defer_type,
|
||||
[XFS_DEFER_OPS_TYPE_FREE] = &xfs_extent_free_defer_type,
|
||||
[XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type,
|
||||
[XFS_DEFER_OPS_TYPE_ATTR] = &xfs_attr_defer_type,
|
||||
STATIC void
|
||||
xfs_defer_barrier_abort_intent(
|
||||
struct xfs_log_item *intent)
|
||||
{
|
||||
/* empty */
|
||||
}
|
||||
|
||||
STATIC struct xfs_log_item *
|
||||
xfs_defer_barrier_create_done(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_log_item *intent,
|
||||
unsigned int count)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_defer_barrier_finish_item(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_log_item *done,
|
||||
struct list_head *item,
|
||||
struct xfs_btree_cur **state)
|
||||
{
|
||||
ASSERT(0);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_defer_barrier_cancel_item(
|
||||
struct list_head *item)
|
||||
{
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
static const struct xfs_defer_op_type xfs_barrier_defer_type = {
|
||||
.max_items = 1,
|
||||
.create_intent = xfs_defer_barrier_create_intent,
|
||||
.abort_intent = xfs_defer_barrier_abort_intent,
|
||||
.create_done = xfs_defer_barrier_create_done,
|
||||
.finish_item = xfs_defer_barrier_finish_item,
|
||||
.cancel_item = xfs_defer_barrier_cancel_item,
|
||||
};
|
||||
|
||||
/* Create a log intent done item for a log intent item. */
|
||||
static inline void
|
||||
xfs_defer_create_done(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_defer_pending *dfp)
|
||||
{
|
||||
struct xfs_log_item *lip;
|
||||
|
||||
/* If there is no log intent item, there can be no log done item. */
|
||||
if (!dfp->dfp_intent)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Mark the transaction dirty, even on error. This ensures the
|
||||
* transaction is aborted, which:
|
||||
*
|
||||
* 1.) releases the log intent item and frees the log done item
|
||||
* 2.) shuts down the filesystem
|
||||
*/
|
||||
tp->t_flags |= XFS_TRANS_DIRTY;
|
||||
lip = dfp->dfp_ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count);
|
||||
if (!lip)
|
||||
return;
|
||||
|
||||
tp->t_flags |= XFS_TRANS_HAS_INTENT_DONE;
|
||||
xfs_trans_add_item(tp, lip);
|
||||
set_bit(XFS_LI_DIRTY, &lip->li_flags);
|
||||
dfp->dfp_done = lip;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure there's a log intent item associated with this deferred work item if
|
||||
* the operation must be restarted on crash. Returns 1 if there's a log item;
|
||||
@ -202,18 +276,21 @@ xfs_defer_create_intent(
|
||||
struct xfs_defer_pending *dfp,
|
||||
bool sort)
|
||||
{
|
||||
const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
|
||||
struct xfs_log_item *lip;
|
||||
|
||||
if (dfp->dfp_intent)
|
||||
return 1;
|
||||
|
||||
lip = ops->create_intent(tp, &dfp->dfp_work, dfp->dfp_count, sort);
|
||||
lip = dfp->dfp_ops->create_intent(tp, &dfp->dfp_work, dfp->dfp_count,
|
||||
sort);
|
||||
if (!lip)
|
||||
return 0;
|
||||
if (IS_ERR(lip))
|
||||
return PTR_ERR(lip);
|
||||
|
||||
tp->t_flags |= XFS_TRANS_DIRTY;
|
||||
xfs_trans_add_item(tp, lip);
|
||||
set_bit(XFS_LI_DIRTY, &lip->li_flags);
|
||||
dfp->dfp_intent = lip;
|
||||
return 1;
|
||||
}
|
||||
@ -245,23 +322,50 @@ xfs_defer_create_intents(
|
||||
return ret;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
static inline void
|
||||
xfs_defer_pending_abort(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_defer_pending *dfp)
|
||||
{
|
||||
trace_xfs_defer_pending_abort(mp, dfp);
|
||||
|
||||
if (dfp->dfp_intent && !dfp->dfp_done) {
|
||||
dfp->dfp_ops->abort_intent(dfp->dfp_intent);
|
||||
dfp->dfp_intent = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
xfs_defer_pending_cancel_work(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_defer_pending *dfp)
|
||||
{
|
||||
struct list_head *pwi;
|
||||
struct list_head *n;
|
||||
|
||||
trace_xfs_defer_cancel_list(mp, dfp);
|
||||
|
||||
list_del(&dfp->dfp_list);
|
||||
list_for_each_safe(pwi, n, &dfp->dfp_work) {
|
||||
list_del(pwi);
|
||||
dfp->dfp_count--;
|
||||
trace_xfs_defer_cancel_item(mp, dfp, pwi);
|
||||
dfp->dfp_ops->cancel_item(pwi);
|
||||
}
|
||||
ASSERT(dfp->dfp_count == 0);
|
||||
kmem_cache_free(xfs_defer_pending_cache, dfp);
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_defer_pending_abort_list(
|
||||
struct xfs_mount *mp,
|
||||
struct list_head *dop_list)
|
||||
{
|
||||
struct xfs_defer_pending *dfp;
|
||||
const struct xfs_defer_op_type *ops;
|
||||
|
||||
/* Abort intent items that don't have a done item. */
|
||||
list_for_each_entry(dfp, dop_list, dfp_list) {
|
||||
ops = defer_op_types[dfp->dfp_type];
|
||||
trace_xfs_defer_pending_abort(mp, dfp);
|
||||
if (dfp->dfp_intent && !dfp->dfp_done) {
|
||||
ops->abort_intent(dfp->dfp_intent);
|
||||
dfp->dfp_intent = NULL;
|
||||
}
|
||||
}
|
||||
list_for_each_entry(dfp, dop_list, dfp_list)
|
||||
xfs_defer_pending_abort(mp, dfp);
|
||||
}
|
||||
|
||||
/* Abort all the intents that were committed. */
|
||||
@ -271,7 +375,7 @@ xfs_defer_trans_abort(
|
||||
struct list_head *dop_pending)
|
||||
{
|
||||
trace_xfs_defer_trans_abort(tp, _RET_IP_);
|
||||
xfs_defer_pending_abort(tp->t_mountp, dop_pending);
|
||||
xfs_defer_pending_abort_list(tp->t_mountp, dop_pending);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -389,27 +493,31 @@ xfs_defer_cancel_list(
|
||||
{
|
||||
struct xfs_defer_pending *dfp;
|
||||
struct xfs_defer_pending *pli;
|
||||
struct list_head *pwi;
|
||||
struct list_head *n;
|
||||
const struct xfs_defer_op_type *ops;
|
||||
|
||||
/*
|
||||
* Free the pending items. Caller should already have arranged
|
||||
* for the intent items to be released.
|
||||
*/
|
||||
list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
|
||||
ops = defer_op_types[dfp->dfp_type];
|
||||
trace_xfs_defer_cancel_list(mp, dfp);
|
||||
list_del(&dfp->dfp_list);
|
||||
list_for_each_safe(pwi, n, &dfp->dfp_work) {
|
||||
list_del(pwi);
|
||||
dfp->dfp_count--;
|
||||
trace_xfs_defer_cancel_item(mp, dfp, pwi);
|
||||
ops->cancel_item(pwi);
|
||||
}
|
||||
ASSERT(dfp->dfp_count == 0);
|
||||
kmem_cache_free(xfs_defer_pending_cache, dfp);
|
||||
list_for_each_entry_safe(dfp, pli, dop_list, dfp_list)
|
||||
xfs_defer_pending_cancel_work(mp, dfp);
|
||||
}
|
||||
|
||||
static inline void
|
||||
xfs_defer_relog_intent(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_defer_pending *dfp)
|
||||
{
|
||||
struct xfs_log_item *lip;
|
||||
|
||||
xfs_defer_create_done(tp, dfp);
|
||||
|
||||
lip = dfp->dfp_ops->relog_intent(tp, dfp->dfp_intent, dfp->dfp_done);
|
||||
if (lip) {
|
||||
xfs_trans_add_item(tp, lip);
|
||||
set_bit(XFS_LI_DIRTY, &lip->li_flags);
|
||||
}
|
||||
dfp->dfp_done = NULL;
|
||||
dfp->dfp_intent = lip;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -417,7 +525,7 @@ xfs_defer_cancel_list(
|
||||
* done item to release the intent item; and then log a new intent item.
|
||||
* The caller should provide a fresh transaction and roll it after we're done.
|
||||
*/
|
||||
static int
|
||||
static void
|
||||
xfs_defer_relog(
|
||||
struct xfs_trans **tpp,
|
||||
struct list_head *dfops)
|
||||
@ -456,31 +564,28 @@ xfs_defer_relog(
|
||||
|
||||
trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp);
|
||||
XFS_STATS_INC((*tpp)->t_mountp, defer_relog);
|
||||
dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp);
|
||||
}
|
||||
|
||||
if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
|
||||
return xfs_defer_trans_roll(tpp);
|
||||
return 0;
|
||||
xfs_defer_relog_intent(*tpp, dfp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Log an intent-done item for the first pending intent, and finish the work
|
||||
* items.
|
||||
*/
|
||||
static int
|
||||
int
|
||||
xfs_defer_finish_one(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_defer_pending *dfp)
|
||||
{
|
||||
const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
|
||||
const struct xfs_defer_op_type *ops = dfp->dfp_ops;
|
||||
struct xfs_btree_cur *state = NULL;
|
||||
struct list_head *li, *n;
|
||||
int error;
|
||||
|
||||
trace_xfs_defer_pending_finish(tp->t_mountp, dfp);
|
||||
|
||||
dfp->dfp_done = ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count);
|
||||
xfs_defer_create_done(tp, dfp);
|
||||
list_for_each_safe(li, n, &dfp->dfp_work) {
|
||||
list_del(li);
|
||||
dfp->dfp_count--;
|
||||
@ -517,6 +622,24 @@ xfs_defer_finish_one(
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Move all paused deferred work from @tp to @paused_list. */
|
||||
static void
|
||||
xfs_defer_isolate_paused(
|
||||
struct xfs_trans *tp,
|
||||
struct list_head *paused_list)
|
||||
{
|
||||
struct xfs_defer_pending *dfp;
|
||||
struct xfs_defer_pending *pli;
|
||||
|
||||
list_for_each_entry_safe(dfp, pli, &tp->t_dfops, dfp_list) {
|
||||
if (!(dfp->dfp_flags & XFS_DEFER_PAUSED))
|
||||
continue;
|
||||
|
||||
list_move_tail(&dfp->dfp_list, paused_list);
|
||||
trace_xfs_defer_isolate_paused(tp->t_mountp, dfp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Finish all the pending work. This involves logging intent items for
|
||||
* any work items that wandered in since the last transaction roll (if
|
||||
@ -532,6 +655,7 @@ xfs_defer_finish_noroll(
|
||||
struct xfs_defer_pending *dfp = NULL;
|
||||
int error = 0;
|
||||
LIST_HEAD(dop_pending);
|
||||
LIST_HEAD(dop_paused);
|
||||
|
||||
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
|
||||
|
||||
@ -550,6 +674,8 @@ xfs_defer_finish_noroll(
|
||||
*/
|
||||
int has_intents = xfs_defer_create_intents(*tp);
|
||||
|
||||
xfs_defer_isolate_paused(*tp, &dop_paused);
|
||||
|
||||
list_splice_init(&(*tp)->t_dfops, &dop_pending);
|
||||
|
||||
if (has_intents < 0) {
|
||||
@ -562,22 +688,33 @@ xfs_defer_finish_noroll(
|
||||
goto out_shutdown;
|
||||
|
||||
/* Relog intent items to keep the log moving. */
|
||||
error = xfs_defer_relog(tp, &dop_pending);
|
||||
if (error)
|
||||
goto out_shutdown;
|
||||
xfs_defer_relog(tp, &dop_pending);
|
||||
xfs_defer_relog(tp, &dop_paused);
|
||||
|
||||
if ((*tp)->t_flags & XFS_TRANS_DIRTY) {
|
||||
error = xfs_defer_trans_roll(tp);
|
||||
if (error)
|
||||
goto out_shutdown;
|
||||
}
|
||||
}
|
||||
|
||||
dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
|
||||
dfp_list);
|
||||
dfp = list_first_entry_or_null(&dop_pending,
|
||||
struct xfs_defer_pending, dfp_list);
|
||||
if (!dfp)
|
||||
break;
|
||||
error = xfs_defer_finish_one(*tp, dfp);
|
||||
if (error && error != -EAGAIN)
|
||||
goto out_shutdown;
|
||||
}
|
||||
|
||||
/* Requeue the paused items in the outgoing transaction. */
|
||||
list_splice_tail_init(&dop_paused, &(*tp)->t_dfops);
|
||||
|
||||
trace_xfs_defer_finish_done(*tp, _RET_IP_);
|
||||
return 0;
|
||||
|
||||
out_shutdown:
|
||||
list_splice_tail_init(&dop_paused, &dop_pending);
|
||||
xfs_defer_trans_abort(*tp, &dop_pending);
|
||||
xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
|
||||
trace_xfs_defer_finish_error(*tp, error);
|
||||
@ -590,6 +727,9 @@ int
|
||||
xfs_defer_finish(
|
||||
struct xfs_trans **tp)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
struct xfs_defer_pending *dfp;
|
||||
#endif
|
||||
int error;
|
||||
|
||||
/*
|
||||
@ -609,7 +749,10 @@ xfs_defer_finish(
|
||||
}
|
||||
|
||||
/* Reset LOWMODE now that we've finished all the dfops. */
|
||||
ASSERT(list_empty(&(*tp)->t_dfops));
|
||||
#ifdef DEBUG
|
||||
list_for_each_entry(dfp, &(*tp)->t_dfops, dfp_list)
|
||||
ASSERT(dfp->dfp_flags & XFS_DEFER_PAUSED);
|
||||
#endif
|
||||
(*tp)->t_flags &= ~XFS_TRANS_LOWMODE;
|
||||
return 0;
|
||||
}
|
||||
@ -621,48 +764,165 @@ xfs_defer_cancel(
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
|
||||
trace_xfs_defer_cancel(tp, _RET_IP_);
|
||||
xfs_defer_trans_abort(tp, &tp->t_dfops);
|
||||
xfs_defer_cancel_list(mp, &tp->t_dfops);
|
||||
}
|
||||
|
||||
/* Add an item for later deferred processing. */
|
||||
void
|
||||
xfs_defer_add(
|
||||
/*
|
||||
* Return the last pending work item attached to this transaction if it matches
|
||||
* the deferred op type.
|
||||
*/
|
||||
static inline struct xfs_defer_pending *
|
||||
xfs_defer_find_last(
|
||||
struct xfs_trans *tp,
|
||||
enum xfs_defer_ops_type type,
|
||||
struct list_head *li)
|
||||
const struct xfs_defer_op_type *ops)
|
||||
{
|
||||
struct xfs_defer_pending *dfp = NULL;
|
||||
|
||||
/* No dfops at all? */
|
||||
if (list_empty(&tp->t_dfops))
|
||||
return NULL;
|
||||
|
||||
dfp = list_last_entry(&tp->t_dfops, struct xfs_defer_pending,
|
||||
dfp_list);
|
||||
|
||||
/* Wrong type? */
|
||||
if (dfp->dfp_ops != ops)
|
||||
return NULL;
|
||||
return dfp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decide if we can add a deferred work item to the last dfops item attached
|
||||
* to the transaction.
|
||||
*/
|
||||
static inline bool
|
||||
xfs_defer_can_append(
|
||||
struct xfs_defer_pending *dfp,
|
||||
const struct xfs_defer_op_type *ops)
|
||||
{
|
||||
/* Already logged? */
|
||||
if (dfp->dfp_intent)
|
||||
return false;
|
||||
|
||||
/* Paused items cannot absorb more work */
|
||||
if (dfp->dfp_flags & XFS_DEFER_PAUSED)
|
||||
return NULL;
|
||||
|
||||
/* Already full? */
|
||||
if (ops->max_items && dfp->dfp_count >= ops->max_items)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Create a new pending item at the end of the transaction list. */
|
||||
static inline struct xfs_defer_pending *
|
||||
xfs_defer_alloc(
|
||||
struct xfs_trans *tp,
|
||||
const struct xfs_defer_op_type *ops)
|
||||
{
|
||||
struct xfs_defer_pending *dfp;
|
||||
|
||||
dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
dfp->dfp_ops = ops;
|
||||
INIT_LIST_HEAD(&dfp->dfp_work);
|
||||
list_add_tail(&dfp->dfp_list, &tp->t_dfops);
|
||||
|
||||
return dfp;
|
||||
}
|
||||
|
||||
/* Add an item for later deferred processing. */
|
||||
struct xfs_defer_pending *
|
||||
xfs_defer_add(
|
||||
struct xfs_trans *tp,
|
||||
struct list_head *li,
|
||||
const struct xfs_defer_op_type *ops)
|
||||
{
|
||||
struct xfs_defer_pending *dfp = NULL;
|
||||
const struct xfs_defer_op_type *ops = defer_op_types[type];
|
||||
|
||||
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
|
||||
BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
|
||||
|
||||
/*
|
||||
* Add the item to a pending item at the end of the intake list.
|
||||
* If the last pending item has the same type, reuse it. Else,
|
||||
* create a new pending item at the end of the intake list.
|
||||
*/
|
||||
if (!list_empty(&tp->t_dfops)) {
|
||||
dfp = list_last_entry(&tp->t_dfops,
|
||||
struct xfs_defer_pending, dfp_list);
|
||||
if (dfp->dfp_type != type ||
|
||||
(ops->max_items && dfp->dfp_count >= ops->max_items))
|
||||
dfp = NULL;
|
||||
}
|
||||
if (!dfp) {
|
||||
dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
dfp->dfp_type = type;
|
||||
dfp->dfp_intent = NULL;
|
||||
dfp->dfp_done = NULL;
|
||||
dfp->dfp_count = 0;
|
||||
INIT_LIST_HEAD(&dfp->dfp_work);
|
||||
list_add_tail(&dfp->dfp_list, &tp->t_dfops);
|
||||
}
|
||||
dfp = xfs_defer_find_last(tp, ops);
|
||||
if (!dfp || !xfs_defer_can_append(dfp, ops))
|
||||
dfp = xfs_defer_alloc(tp, ops);
|
||||
|
||||
list_add_tail(li, &dfp->dfp_work);
|
||||
xfs_defer_add_item(dfp, li);
|
||||
trace_xfs_defer_add_item(tp->t_mountp, dfp, li);
|
||||
dfp->dfp_count++;
|
||||
return dfp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a defer ops barrier to force two otherwise adjacent deferred work items
|
||||
* to be tracked separately and have separate log items.
|
||||
*/
|
||||
void
|
||||
xfs_defer_add_barrier(
|
||||
struct xfs_trans *tp)
|
||||
{
|
||||
struct xfs_defer_pending *dfp;
|
||||
|
||||
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
|
||||
|
||||
/* If the last defer op added was a barrier, we're done. */
|
||||
dfp = xfs_defer_find_last(tp, &xfs_barrier_defer_type);
|
||||
if (dfp)
|
||||
return;
|
||||
|
||||
xfs_defer_alloc(tp, &xfs_barrier_defer_type);
|
||||
|
||||
trace_xfs_defer_add_item(tp->t_mountp, dfp, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a pending deferred work item to replay the recovered intent item
|
||||
* and add it to the list.
|
||||
*/
|
||||
void
|
||||
xfs_defer_start_recovery(
|
||||
struct xfs_log_item *lip,
|
||||
struct list_head *r_dfops,
|
||||
const struct xfs_defer_op_type *ops)
|
||||
{
|
||||
struct xfs_defer_pending *dfp;
|
||||
|
||||
dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
dfp->dfp_ops = ops;
|
||||
dfp->dfp_intent = lip;
|
||||
INIT_LIST_HEAD(&dfp->dfp_work);
|
||||
list_add_tail(&dfp->dfp_list, r_dfops);
|
||||
}
|
||||
|
||||
/*
|
||||
* Cancel a deferred work item created to recover a log intent item. @dfp
|
||||
* will be freed after this function returns.
|
||||
*/
|
||||
void
|
||||
xfs_defer_cancel_recovery(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_defer_pending *dfp)
|
||||
{
|
||||
xfs_defer_pending_abort(mp, dfp);
|
||||
xfs_defer_pending_cancel_work(mp, dfp);
|
||||
}
|
||||
|
||||
/* Replay the deferred work item created from a recovered log intent item. */
|
||||
int
|
||||
xfs_defer_finish_recovery(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_defer_pending *dfp,
|
||||
struct list_head *capture_list)
|
||||
{
|
||||
const struct xfs_defer_op_type *ops = dfp->dfp_ops;
|
||||
int error;
|
||||
|
||||
/* dfp is freed by recover_work and must not be accessed afterwards */
|
||||
error = ops->recover_work(dfp, capture_list);
|
||||
if (error)
|
||||
trace_xlog_intent_recovery_failed(mp, ops, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -769,7 +1029,7 @@ xfs_defer_ops_capture_abort(
|
||||
{
|
||||
unsigned short i;
|
||||
|
||||
xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
|
||||
xfs_defer_pending_abort_list(mp, &dfc->dfc_dfops);
|
||||
xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
|
||||
|
||||
for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
|
||||
@ -938,3 +1198,36 @@ xfs_defer_destroy_item_caches(void)
|
||||
xfs_rmap_intent_destroy_cache();
|
||||
xfs_defer_destroy_cache();
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark a deferred work item so that it will be requeued indefinitely without
|
||||
* being finished. Caller must ensure there are no data dependencies on this
|
||||
* work item in the meantime.
|
||||
*/
|
||||
void
|
||||
xfs_defer_item_pause(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_defer_pending *dfp)
|
||||
{
|
||||
ASSERT(!(dfp->dfp_flags & XFS_DEFER_PAUSED));
|
||||
|
||||
dfp->dfp_flags |= XFS_DEFER_PAUSED;
|
||||
|
||||
trace_xfs_defer_item_pause(tp->t_mountp, dfp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Release a paused deferred work item so that it will be finished during the
|
||||
* next transaction roll.
|
||||
*/
|
||||
void
|
||||
xfs_defer_item_unpause(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_defer_pending *dfp)
|
||||
{
|
||||
ASSERT(dfp->dfp_flags & XFS_DEFER_PAUSED);
|
||||
|
||||
dfp->dfp_flags &= ~XFS_DEFER_PAUSED;
|
||||
|
||||
trace_xfs_defer_item_unpause(tp->t_mountp, dfp);
|
||||
}
|
||||
|
@ -10,19 +10,6 @@ struct xfs_btree_cur;
|
||||
struct xfs_defer_op_type;
|
||||
struct xfs_defer_capture;
|
||||
|
||||
/*
|
||||
* Header for deferred operation list.
|
||||
*/
|
||||
enum xfs_defer_ops_type {
|
||||
XFS_DEFER_OPS_TYPE_BMAP,
|
||||
XFS_DEFER_OPS_TYPE_REFCOUNT,
|
||||
XFS_DEFER_OPS_TYPE_RMAP,
|
||||
XFS_DEFER_OPS_TYPE_FREE,
|
||||
XFS_DEFER_OPS_TYPE_AGFL_FREE,
|
||||
XFS_DEFER_OPS_TYPE_ATTR,
|
||||
XFS_DEFER_OPS_TYPE_MAX,
|
||||
};
|
||||
|
||||
/*
|
||||
* Save a log intent item and a list of extents, so that we can replay
|
||||
* whatever action had to happen to the extent list and file the log done
|
||||
@ -33,19 +20,35 @@ struct xfs_defer_pending {
|
||||
struct list_head dfp_work; /* work items */
|
||||
struct xfs_log_item *dfp_intent; /* log intent item */
|
||||
struct xfs_log_item *dfp_done; /* log done item */
|
||||
const struct xfs_defer_op_type *dfp_ops;
|
||||
unsigned int dfp_count; /* # extent items */
|
||||
enum xfs_defer_ops_type dfp_type;
|
||||
unsigned int dfp_flags;
|
||||
};
|
||||
|
||||
void xfs_defer_add(struct xfs_trans *tp, enum xfs_defer_ops_type type,
|
||||
struct list_head *h);
|
||||
/*
|
||||
* Create a log intent item for this deferred item, but don't actually finish
|
||||
* the work. Caller must clear this before the final transaction commit.
|
||||
*/
|
||||
#define XFS_DEFER_PAUSED (1U << 0)
|
||||
|
||||
#define XFS_DEFER_PENDING_STRINGS \
|
||||
{ XFS_DEFER_PAUSED, "paused" }
|
||||
|
||||
void xfs_defer_item_pause(struct xfs_trans *tp, struct xfs_defer_pending *dfp);
|
||||
void xfs_defer_item_unpause(struct xfs_trans *tp, struct xfs_defer_pending *dfp);
|
||||
|
||||
struct xfs_defer_pending *xfs_defer_add(struct xfs_trans *tp, struct list_head *h,
|
||||
const struct xfs_defer_op_type *ops);
|
||||
int xfs_defer_finish_noroll(struct xfs_trans **tp);
|
||||
int xfs_defer_finish(struct xfs_trans **tp);
|
||||
int xfs_defer_finish_one(struct xfs_trans *tp, struct xfs_defer_pending *dfp);
|
||||
void xfs_defer_cancel(struct xfs_trans *);
|
||||
void xfs_defer_move(struct xfs_trans *dtp, struct xfs_trans *stp);
|
||||
|
||||
/* Description of a deferred type. */
|
||||
struct xfs_defer_op_type {
|
||||
const char *name;
|
||||
unsigned int max_items;
|
||||
struct xfs_log_item *(*create_intent)(struct xfs_trans *tp,
|
||||
struct list_head *items, unsigned int count, bool sort);
|
||||
void (*abort_intent)(struct xfs_log_item *intent);
|
||||
@ -56,7 +59,11 @@ struct xfs_defer_op_type {
|
||||
void (*finish_cleanup)(struct xfs_trans *tp,
|
||||
struct xfs_btree_cur *state, int error);
|
||||
void (*cancel_item)(struct list_head *item);
|
||||
unsigned int max_items;
|
||||
int (*recover_work)(struct xfs_defer_pending *dfp,
|
||||
struct list_head *capture_list);
|
||||
struct xfs_log_item *(*relog_intent)(struct xfs_trans *tp,
|
||||
struct xfs_log_item *intent,
|
||||
struct xfs_log_item *done_item);
|
||||
};
|
||||
|
||||
extern const struct xfs_defer_op_type xfs_bmap_update_defer_type;
|
||||
@ -125,7 +132,25 @@ void xfs_defer_ops_capture_abort(struct xfs_mount *mp,
|
||||
struct xfs_defer_capture *d);
|
||||
void xfs_defer_resources_rele(struct xfs_defer_resources *dres);
|
||||
|
||||
void xfs_defer_start_recovery(struct xfs_log_item *lip,
|
||||
struct list_head *r_dfops, const struct xfs_defer_op_type *ops);
|
||||
void xfs_defer_cancel_recovery(struct xfs_mount *mp,
|
||||
struct xfs_defer_pending *dfp);
|
||||
int xfs_defer_finish_recovery(struct xfs_mount *mp,
|
||||
struct xfs_defer_pending *dfp, struct list_head *capture_list);
|
||||
|
||||
static inline void
|
||||
xfs_defer_add_item(
|
||||
struct xfs_defer_pending *dfp,
|
||||
struct list_head *work)
|
||||
{
|
||||
list_add_tail(work, &dfp->dfp_work);
|
||||
dfp->dfp_count++;
|
||||
}
|
||||
|
||||
int __init xfs_defer_init_item_caches(void);
|
||||
void xfs_defer_destroy_item_caches(void);
|
||||
|
||||
void xfs_defer_add_barrier(struct xfs_trans *tp);
|
||||
|
||||
#endif /* __XFS_DEFER_H__ */
|
||||
|
@ -196,7 +196,7 @@ xfs_dir_isempty(
|
||||
return 1;
|
||||
if (dp->i_disk_size > xfs_inode_data_fork_size(dp))
|
||||
return 0;
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
sfp = dp->i_df.if_data;
|
||||
return !sfp->count;
|
||||
}
|
||||
|
||||
|
@ -1089,7 +1089,7 @@ xfs_dir2_sf_to_block(
|
||||
int newoffset; /* offset from current entry */
|
||||
unsigned int offset = geo->data_entry_offset;
|
||||
xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */
|
||||
xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */
|
||||
struct xfs_dir2_sf_hdr *oldsfp = ifp->if_data;
|
||||
xfs_dir2_sf_hdr_t *sfp; /* shortform header */
|
||||
__be16 *tagp; /* end of data entry */
|
||||
struct xfs_name name;
|
||||
@ -1099,10 +1099,8 @@ xfs_dir2_sf_to_block(
|
||||
ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
|
||||
ASSERT(dp->i_disk_size >= offsetof(struct xfs_dir2_sf_hdr, parent));
|
||||
|
||||
oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data;
|
||||
|
||||
ASSERT(ifp->if_bytes == dp->i_disk_size);
|
||||
ASSERT(ifp->if_u1.if_data != NULL);
|
||||
ASSERT(oldsfp != NULL);
|
||||
ASSERT(dp->i_disk_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count));
|
||||
ASSERT(dp->i_df.if_nextents == 0);
|
||||
|
||||
|
@ -175,7 +175,8 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
|
||||
extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
|
||||
extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
|
||||
extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
|
||||
extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip);
|
||||
xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_mount *mp,
|
||||
struct xfs_dir2_sf_hdr *sfp, int64_t size);
|
||||
int xfs_dir2_sf_entsize(struct xfs_mount *mp,
|
||||
struct xfs_dir2_sf_hdr *hdr, int len);
|
||||
void xfs_dir2_sf_put_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr,
|
||||
|
@ -364,25 +364,23 @@ int /* error */
|
||||
xfs_dir2_sf_addname(
|
||||
xfs_da_args_t *args) /* operation arguments */
|
||||
{
|
||||
xfs_inode_t *dp; /* incore directory inode */
|
||||
struct xfs_inode *dp = args->dp;
|
||||
struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data;
|
||||
int error; /* error return value */
|
||||
int incr_isize; /* total change in size */
|
||||
int new_isize; /* size after adding name */
|
||||
int objchange; /* changing to 8-byte inodes */
|
||||
xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */
|
||||
int pick; /* which algorithm to use */
|
||||
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
|
||||
xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */
|
||||
|
||||
trace_xfs_dir2_sf_addname(args);
|
||||
|
||||
ASSERT(xfs_dir2_sf_lookup(args) == -ENOENT);
|
||||
dp = args->dp;
|
||||
ASSERT(dp->i_df.if_format == XFS_DINODE_FMT_LOCAL);
|
||||
ASSERT(dp->i_disk_size >= offsetof(struct xfs_dir2_sf_hdr, parent));
|
||||
ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
|
||||
ASSERT(dp->i_df.if_u1.if_data != NULL);
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
ASSERT(sfp != NULL);
|
||||
ASSERT(dp->i_disk_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
|
||||
/*
|
||||
* Compute entry (and change in) size.
|
||||
@ -462,20 +460,17 @@ xfs_dir2_sf_addname_easy(
|
||||
{
|
||||
struct xfs_inode *dp = args->dp;
|
||||
struct xfs_mount *mp = dp->i_mount;
|
||||
int byteoff; /* byte offset in sf dir */
|
||||
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
|
||||
struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data;
|
||||
int byteoff = (int)((char *)sfep - (char *)sfp);
|
||||
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
byteoff = (int)((char *)sfep - (char *)sfp);
|
||||
/*
|
||||
* Grow the in-inode space.
|
||||
*/
|
||||
xfs_idata_realloc(dp, xfs_dir2_sf_entsize(mp, sfp, args->namelen),
|
||||
sfp = xfs_idata_realloc(dp, xfs_dir2_sf_entsize(mp, sfp, args->namelen),
|
||||
XFS_DATA_FORK);
|
||||
/*
|
||||
* Need to set up again due to realloc of the inode data.
|
||||
*/
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff);
|
||||
/*
|
||||
* Fill in the new entry.
|
||||
@ -528,11 +523,10 @@ xfs_dir2_sf_addname_hard(
|
||||
/*
|
||||
* Copy the old directory to the stack buffer.
|
||||
*/
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
old_isize = (int)dp->i_disk_size;
|
||||
buf = kmem_alloc(old_isize, 0);
|
||||
oldsfp = (xfs_dir2_sf_hdr_t *)buf;
|
||||
memcpy(oldsfp, sfp, old_isize);
|
||||
memcpy(oldsfp, dp->i_df.if_data, old_isize);
|
||||
/*
|
||||
* Loop over the old directory finding the place we're going
|
||||
* to insert the new entry.
|
||||
@ -556,11 +550,8 @@ xfs_dir2_sf_addname_hard(
|
||||
* the data.
|
||||
*/
|
||||
xfs_idata_realloc(dp, -old_isize, XFS_DATA_FORK);
|
||||
xfs_idata_realloc(dp, new_isize, XFS_DATA_FORK);
|
||||
/*
|
||||
* Reset the pointer since the buffer was reallocated.
|
||||
*/
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
sfp = xfs_idata_realloc(dp, new_isize, XFS_DATA_FORK);
|
||||
|
||||
/*
|
||||
* Copy the first part of the directory, including the header.
|
||||
*/
|
||||
@ -610,11 +601,10 @@ xfs_dir2_sf_addname_pick(
|
||||
int i; /* entry number */
|
||||
xfs_dir2_data_aoff_t offset; /* data block offset */
|
||||
xfs_dir2_sf_entry_t *sfep; /* shortform entry */
|
||||
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
|
||||
struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data;
|
||||
int size; /* entry's data size */
|
||||
int used; /* data bytes used */
|
||||
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
size = xfs_dir2_data_entsize(mp, args->namelen);
|
||||
offset = args->geo->data_first_offset;
|
||||
sfep = xfs_dir2_sf_firstentry(sfp);
|
||||
@ -673,14 +663,13 @@ xfs_dir2_sf_check(
|
||||
{
|
||||
struct xfs_inode *dp = args->dp;
|
||||
struct xfs_mount *mp = dp->i_mount;
|
||||
struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data;
|
||||
int i; /* entry number */
|
||||
int i8count; /* number of big inode#s */
|
||||
xfs_ino_t ino; /* entry inode number */
|
||||
int offset; /* data offset */
|
||||
xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */
|
||||
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
|
||||
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
offset = args->geo->data_first_offset;
|
||||
ino = xfs_dir2_sf_get_parent_ino(sfp);
|
||||
i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
|
||||
@ -707,11 +696,10 @@ xfs_dir2_sf_check(
|
||||
/* Verify the consistency of an inline directory. */
|
||||
xfs_failaddr_t
|
||||
xfs_dir2_sf_verify(
|
||||
struct xfs_inode *ip)
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_dir2_sf_hdr *sfp,
|
||||
int64_t size)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
|
||||
struct xfs_dir2_sf_hdr *sfp;
|
||||
struct xfs_dir2_sf_entry *sfep;
|
||||
struct xfs_dir2_sf_entry *next_sfep;
|
||||
char *endp;
|
||||
@ -719,15 +707,9 @@ xfs_dir2_sf_verify(
|
||||
int i;
|
||||
int i8count;
|
||||
int offset;
|
||||
int64_t size;
|
||||
int error;
|
||||
uint8_t filetype;
|
||||
|
||||
ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
|
||||
|
||||
sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
|
||||
size = ifp->if_bytes;
|
||||
|
||||
/*
|
||||
* Give up if the directory is way too short.
|
||||
*/
|
||||
@ -834,15 +816,13 @@ xfs_dir2_sf_create(
|
||||
ASSERT(dp->i_df.if_bytes == 0);
|
||||
i8count = pino > XFS_DIR2_MAX_SHORT_INUM;
|
||||
size = xfs_dir2_sf_hdr_size(i8count);
|
||||
|
||||
/*
|
||||
* Make a buffer for the data.
|
||||
* Make a buffer for the data and fill in the header.
|
||||
*/
|
||||
xfs_idata_realloc(dp, size, XFS_DATA_FORK);
|
||||
/*
|
||||
* Fill in the header,
|
||||
*/
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
sfp = xfs_idata_realloc(dp, size, XFS_DATA_FORK);
|
||||
sfp->i8count = i8count;
|
||||
|
||||
/*
|
||||
* Now can put in the inode number, since i8count is set.
|
||||
*/
|
||||
@ -864,9 +844,9 @@ xfs_dir2_sf_lookup(
|
||||
{
|
||||
struct xfs_inode *dp = args->dp;
|
||||
struct xfs_mount *mp = dp->i_mount;
|
||||
struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data;
|
||||
int i; /* entry index */
|
||||
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
|
||||
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
|
||||
enum xfs_dacmp cmp; /* comparison result */
|
||||
xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */
|
||||
|
||||
@ -877,8 +857,7 @@ xfs_dir2_sf_lookup(
|
||||
ASSERT(dp->i_df.if_format == XFS_DINODE_FMT_LOCAL);
|
||||
ASSERT(dp->i_disk_size >= offsetof(struct xfs_dir2_sf_hdr, parent));
|
||||
ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
|
||||
ASSERT(dp->i_df.if_u1.if_data != NULL);
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
ASSERT(sfp != NULL);
|
||||
ASSERT(dp->i_disk_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
|
||||
/*
|
||||
* Special case for .
|
||||
@ -940,13 +919,13 @@ xfs_dir2_sf_removename(
|
||||
{
|
||||
struct xfs_inode *dp = args->dp;
|
||||
struct xfs_mount *mp = dp->i_mount;
|
||||
struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data;
|
||||
int byteoff; /* offset of removed entry */
|
||||
int entsize; /* this entry's size */
|
||||
int i; /* shortform entry index */
|
||||
int newsize; /* new inode size */
|
||||
int oldsize; /* old inode size */
|
||||
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
|
||||
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
|
||||
|
||||
trace_xfs_dir2_sf_removename(args);
|
||||
|
||||
@ -954,8 +933,7 @@ xfs_dir2_sf_removename(
|
||||
oldsize = (int)dp->i_disk_size;
|
||||
ASSERT(oldsize >= offsetof(struct xfs_dir2_sf_hdr, parent));
|
||||
ASSERT(dp->i_df.if_bytes == oldsize);
|
||||
ASSERT(dp->i_df.if_u1.if_data != NULL);
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
ASSERT(sfp != NULL);
|
||||
ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count));
|
||||
/*
|
||||
* Loop over the old directory entries.
|
||||
@ -992,11 +970,12 @@ xfs_dir2_sf_removename(
|
||||
*/
|
||||
sfp->count--;
|
||||
dp->i_disk_size = newsize;
|
||||
|
||||
/*
|
||||
* Reallocate, making it smaller.
|
||||
*/
|
||||
xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
sfp = xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
|
||||
|
||||
/*
|
||||
* Are we changing inode number size?
|
||||
*/
|
||||
@ -1019,13 +998,12 @@ xfs_dir2_sf_replace_needblock(
|
||||
struct xfs_inode *dp,
|
||||
xfs_ino_t inum)
|
||||
{
|
||||
struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data;
|
||||
int newsize;
|
||||
struct xfs_dir2_sf_hdr *sfp;
|
||||
|
||||
if (dp->i_df.if_format != XFS_DINODE_FMT_LOCAL)
|
||||
return false;
|
||||
|
||||
sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data;
|
||||
newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF;
|
||||
|
||||
return inum > XFS_DIR2_MAX_SHORT_INUM &&
|
||||
@ -1041,19 +1019,18 @@ xfs_dir2_sf_replace(
|
||||
{
|
||||
struct xfs_inode *dp = args->dp;
|
||||
struct xfs_mount *mp = dp->i_mount;
|
||||
struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data;
|
||||
int i; /* entry index */
|
||||
xfs_ino_t ino=0; /* entry old inode number */
|
||||
int i8elevated; /* sf_toino8 set i8count=1 */
|
||||
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
|
||||
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
|
||||
|
||||
trace_xfs_dir2_sf_replace(args);
|
||||
|
||||
ASSERT(dp->i_df.if_format == XFS_DINODE_FMT_LOCAL);
|
||||
ASSERT(dp->i_disk_size >= offsetof(struct xfs_dir2_sf_hdr, parent));
|
||||
ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
|
||||
ASSERT(dp->i_df.if_u1.if_data != NULL);
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
ASSERT(sfp != NULL);
|
||||
ASSERT(dp->i_disk_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
|
||||
|
||||
/*
|
||||
@ -1076,7 +1053,7 @@ xfs_dir2_sf_replace(
|
||||
*/
|
||||
xfs_dir2_sf_toino8(args);
|
||||
i8elevated = 1;
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
sfp = dp->i_df.if_data;
|
||||
} else
|
||||
i8elevated = 0;
|
||||
|
||||
@ -1157,11 +1134,11 @@ xfs_dir2_sf_toino4(
|
||||
{
|
||||
struct xfs_inode *dp = args->dp;
|
||||
struct xfs_mount *mp = dp->i_mount;
|
||||
struct xfs_dir2_sf_hdr *oldsfp = dp->i_df.if_data;
|
||||
char *buf; /* old dir's buffer */
|
||||
int i; /* entry index */
|
||||
int newsize; /* new inode size */
|
||||
xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */
|
||||
xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */
|
||||
int oldsize; /* old inode size */
|
||||
xfs_dir2_sf_entry_t *sfep; /* new sf entry */
|
||||
xfs_dir2_sf_hdr_t *sfp; /* new sf directory */
|
||||
@ -1175,7 +1152,6 @@ xfs_dir2_sf_toino4(
|
||||
*/
|
||||
oldsize = dp->i_df.if_bytes;
|
||||
buf = kmem_alloc(oldsize, 0);
|
||||
oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
ASSERT(oldsfp->i8count == 1);
|
||||
memcpy(buf, oldsfp, oldsize);
|
||||
/*
|
||||
@ -1188,7 +1164,7 @@ xfs_dir2_sf_toino4(
|
||||
* Reset our pointers, the data has moved.
|
||||
*/
|
||||
oldsfp = (xfs_dir2_sf_hdr_t *)buf;
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
sfp = dp->i_df.if_data;
|
||||
/*
|
||||
* Fill in the new header.
|
||||
*/
|
||||
@ -1230,11 +1206,11 @@ xfs_dir2_sf_toino8(
|
||||
{
|
||||
struct xfs_inode *dp = args->dp;
|
||||
struct xfs_mount *mp = dp->i_mount;
|
||||
struct xfs_dir2_sf_hdr *oldsfp = dp->i_df.if_data;
|
||||
char *buf; /* old dir's buffer */
|
||||
int i; /* entry index */
|
||||
int newsize; /* new inode size */
|
||||
xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */
|
||||
xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */
|
||||
int oldsize; /* old inode size */
|
||||
xfs_dir2_sf_entry_t *sfep; /* new sf entry */
|
||||
xfs_dir2_sf_hdr_t *sfp; /* new sf directory */
|
||||
@ -1248,7 +1224,6 @@ xfs_dir2_sf_toino8(
|
||||
*/
|
||||
oldsize = dp->i_df.if_bytes;
|
||||
buf = kmem_alloc(oldsize, 0);
|
||||
oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
ASSERT(oldsfp->i8count == 0);
|
||||
memcpy(buf, oldsfp, oldsize);
|
||||
/*
|
||||
@ -1261,7 +1236,7 @@ xfs_dir2_sf_toino8(
|
||||
* Reset our pointers, the data has moved.
|
||||
*/
|
||||
oldsfp = (xfs_dir2_sf_hdr_t *)buf;
|
||||
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
|
||||
sfp = dp->i_df.if_data;
|
||||
/*
|
||||
* Fill in the new header.
|
||||
*/
|
||||
|
@ -1008,7 +1008,7 @@ enum xfs_dinode_fmt {
|
||||
* Return pointers to the data or attribute forks.
|
||||
*/
|
||||
#define XFS_DFORK_DPTR(dip) \
|
||||
((char *)dip + xfs_dinode_size(dip->di_version))
|
||||
((void *)dip + xfs_dinode_size(dip->di_version))
|
||||
#define XFS_DFORK_APTR(dip) \
|
||||
(XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip))
|
||||
#define XFS_DFORK_PTR(dip,w) \
|
||||
@ -1156,20 +1156,6 @@ static inline bool xfs_dinode_has_large_extent_counts(
|
||||
#define XFS_DFL_RTEXTSIZE (64 * 1024) /* 64kB */
|
||||
#define XFS_MIN_RTEXTSIZE (4 * 1024) /* 4kB */
|
||||
|
||||
#define XFS_BLOCKSIZE(mp) ((mp)->m_sb.sb_blocksize)
|
||||
#define XFS_BLOCKMASK(mp) ((mp)->m_blockmask)
|
||||
|
||||
/*
|
||||
* RT bit manipulation macros.
|
||||
*/
|
||||
#define XFS_RTMIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define XFS_RTMAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define XFS_RTLOBIT(w) xfs_lowbit32(w)
|
||||
#define XFS_RTHIBIT(w) xfs_highbit32(w)
|
||||
|
||||
#define XFS_RTBLOCKLOG(b) xfs_highbit64(b)
|
||||
|
||||
/*
|
||||
* Dquot and dquot block format definitions
|
||||
*/
|
||||
@ -1272,6 +1258,9 @@ static inline time64_t xfs_dq_bigtime_to_unix(uint32_t ondisk_seconds)
|
||||
#define XFS_DQ_GRACE_MIN ((int64_t)0)
|
||||
#define XFS_DQ_GRACE_MAX ((int64_t)U32_MAX)
|
||||
|
||||
/* Maximum id value for a quota record */
|
||||
#define XFS_DQ_ID_MAX (U32_MAX)
|
||||
|
||||
/*
|
||||
* This is the main portion of the on-disk representation of quota information
|
||||
* for a user. We pad this with some more expansion room to construct the on
|
||||
|
@ -68,6 +68,11 @@ struct xfs_fsop_geom;
|
||||
#define XFS_SICK_INO_SYMLINK (1 << 6) /* symbolic link remote target */
|
||||
#define XFS_SICK_INO_PARENT (1 << 7) /* parent pointers */
|
||||
|
||||
#define XFS_SICK_INO_BMBTD_ZAPPED (1 << 8) /* data fork erased */
|
||||
#define XFS_SICK_INO_BMBTA_ZAPPED (1 << 9) /* attr fork erased */
|
||||
#define XFS_SICK_INO_DIR_ZAPPED (1 << 10) /* directory erased */
|
||||
#define XFS_SICK_INO_SYMLINK_ZAPPED (1 << 11) /* symlink erased */
|
||||
|
||||
/* Primary evidence of health problems in a given group. */
|
||||
#define XFS_SICK_FS_PRIMARY (XFS_SICK_FS_COUNTERS | \
|
||||
XFS_SICK_FS_UQUOTA | \
|
||||
@ -97,6 +102,11 @@ struct xfs_fsop_geom;
|
||||
XFS_SICK_INO_SYMLINK | \
|
||||
XFS_SICK_INO_PARENT)
|
||||
|
||||
#define XFS_SICK_INO_ZAPPED (XFS_SICK_INO_BMBTD_ZAPPED | \
|
||||
XFS_SICK_INO_BMBTA_ZAPPED | \
|
||||
XFS_SICK_INO_DIR_ZAPPED | \
|
||||
XFS_SICK_INO_SYMLINK_ZAPPED)
|
||||
|
||||
/* These functions must be provided by the xfs implementation. */
|
||||
|
||||
void xfs_fs_mark_sick(struct xfs_mount *mp, unsigned int mask);
|
||||
|
@ -95,18 +95,28 @@ xfs_inobt_btrec_to_irec(
|
||||
irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
|
||||
}
|
||||
|
||||
/* Compute the freecount of an incore inode record. */
|
||||
uint8_t
|
||||
xfs_inobt_rec_freecount(
|
||||
const struct xfs_inobt_rec_incore *irec)
|
||||
{
|
||||
uint64_t realfree = irec->ir_free;
|
||||
|
||||
if (xfs_inobt_issparse(irec->ir_holemask))
|
||||
realfree &= xfs_inobt_irec_to_allocmask(irec);
|
||||
return hweight64(realfree);
|
||||
}
|
||||
|
||||
/* Simple checks for inode records. */
|
||||
xfs_failaddr_t
|
||||
xfs_inobt_check_irec(
|
||||
struct xfs_btree_cur *cur,
|
||||
struct xfs_perag *pag,
|
||||
const struct xfs_inobt_rec_incore *irec)
|
||||
{
|
||||
uint64_t realfree;
|
||||
|
||||
/* Record has to be properly aligned within the AG. */
|
||||
if (!xfs_verify_agino(cur->bc_ag.pag, irec->ir_startino))
|
||||
if (!xfs_verify_agino(pag, irec->ir_startino))
|
||||
return __this_address;
|
||||
if (!xfs_verify_agino(cur->bc_ag.pag,
|
||||
if (!xfs_verify_agino(pag,
|
||||
irec->ir_startino + XFS_INODES_PER_CHUNK - 1))
|
||||
return __this_address;
|
||||
if (irec->ir_count < XFS_INODES_PER_HOLEMASK_BIT ||
|
||||
@ -115,12 +125,7 @@ xfs_inobt_check_irec(
|
||||
if (irec->ir_freecount > XFS_INODES_PER_CHUNK)
|
||||
return __this_address;
|
||||
|
||||
/* if there are no holes, return the first available offset */
|
||||
if (!xfs_inobt_issparse(irec->ir_holemask))
|
||||
realfree = irec->ir_free;
|
||||
else
|
||||
realfree = irec->ir_free & xfs_inobt_irec_to_allocmask(irec);
|
||||
if (hweight64(realfree) != irec->ir_freecount)
|
||||
if (xfs_inobt_rec_freecount(irec) != irec->ir_freecount)
|
||||
return __this_address;
|
||||
|
||||
return NULL;
|
||||
@ -164,7 +169,7 @@ xfs_inobt_get_rec(
|
||||
return error;
|
||||
|
||||
xfs_inobt_btrec_to_irec(mp, rec, irec);
|
||||
fa = xfs_inobt_check_irec(cur, irec);
|
||||
fa = xfs_inobt_check_irec(cur->bc_ag.pag, irec);
|
||||
if (fa)
|
||||
return xfs_inobt_complain_bad_rec(cur, fa, irec);
|
||||
|
||||
@ -1854,7 +1859,7 @@ xfs_difree_inode_chunk(
|
||||
return xfs_free_extent_later(tp,
|
||||
XFS_AGB_TO_FSB(mp, agno, sagbno),
|
||||
M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
|
||||
XFS_AG_RESV_NONE);
|
||||
XFS_AG_RESV_NONE, false);
|
||||
}
|
||||
|
||||
/* holemask is only 16-bits (fits in an unsigned long) */
|
||||
@ -1900,7 +1905,8 @@ xfs_difree_inode_chunk(
|
||||
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
|
||||
error = xfs_free_extent_later(tp,
|
||||
XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
|
||||
&XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
|
||||
&XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE,
|
||||
false);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@ -2739,7 +2745,7 @@ xfs_ialloc_count_inodes_rec(
|
||||
xfs_failaddr_t fa;
|
||||
|
||||
xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec);
|
||||
fa = xfs_inobt_check_irec(cur, &irec);
|
||||
fa = xfs_inobt_check_irec(cur->bc_ag.pag, &irec);
|
||||
if (fa)
|
||||
return xfs_inobt_complain_bad_rec(cur, fa, &irec);
|
||||
|
||||
|
@ -79,6 +79,7 @@ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino,
|
||||
*/
|
||||
int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
|
||||
xfs_inobt_rec_incore_t *rec, int *stat);
|
||||
uint8_t xfs_inobt_rec_freecount(const struct xfs_inobt_rec_incore *irec);
|
||||
|
||||
/*
|
||||
* Inode chunk initialisation routine
|
||||
@ -93,7 +94,7 @@ union xfs_btree_rec;
|
||||
void xfs_inobt_btrec_to_irec(struct xfs_mount *mp,
|
||||
const union xfs_btree_rec *rec,
|
||||
struct xfs_inobt_rec_incore *irec);
|
||||
xfs_failaddr_t xfs_inobt_check_irec(struct xfs_btree_cur *cur,
|
||||
xfs_failaddr_t xfs_inobt_check_irec(struct xfs_perag *pag,
|
||||
const struct xfs_inobt_rec_incore *irec);
|
||||
int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur,
|
||||
xfs_agblock_t bno, xfs_extlen_t len,
|
||||
|
@ -161,7 +161,7 @@ __xfs_inobt_free_block(
|
||||
xfs_inobt_mod_blockcount(cur, -1);
|
||||
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
|
||||
return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
|
||||
&XFS_RMAP_OINFO_INOBT, resv);
|
||||
&XFS_RMAP_OINFO_INOBT, resv, false);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
|
@ -158,7 +158,7 @@ static void *
|
||||
xfs_iext_find_first_leaf(
|
||||
struct xfs_ifork *ifp)
|
||||
{
|
||||
struct xfs_iext_node *node = ifp->if_u1.if_root;
|
||||
struct xfs_iext_node *node = ifp->if_data;
|
||||
int height;
|
||||
|
||||
if (!ifp->if_height)
|
||||
@ -176,7 +176,7 @@ static void *
|
||||
xfs_iext_find_last_leaf(
|
||||
struct xfs_ifork *ifp)
|
||||
{
|
||||
struct xfs_iext_node *node = ifp->if_u1.if_root;
|
||||
struct xfs_iext_node *node = ifp->if_data;
|
||||
int height, i;
|
||||
|
||||
if (!ifp->if_height)
|
||||
@ -306,7 +306,7 @@ xfs_iext_find_level(
|
||||
xfs_fileoff_t offset,
|
||||
int level)
|
||||
{
|
||||
struct xfs_iext_node *node = ifp->if_u1.if_root;
|
||||
struct xfs_iext_node *node = ifp->if_data;
|
||||
int height, i;
|
||||
|
||||
if (!ifp->if_height)
|
||||
@ -402,12 +402,12 @@ xfs_iext_grow(
|
||||
int i;
|
||||
|
||||
if (ifp->if_height == 1) {
|
||||
struct xfs_iext_leaf *prev = ifp->if_u1.if_root;
|
||||
struct xfs_iext_leaf *prev = ifp->if_data;
|
||||
|
||||
node->keys[0] = xfs_iext_leaf_key(prev, 0);
|
||||
node->ptrs[0] = prev;
|
||||
} else {
|
||||
struct xfs_iext_node *prev = ifp->if_u1.if_root;
|
||||
struct xfs_iext_node *prev = ifp->if_data;
|
||||
|
||||
ASSERT(ifp->if_height > 1);
|
||||
|
||||
@ -418,7 +418,7 @@ xfs_iext_grow(
|
||||
for (i = 1; i < KEYS_PER_NODE; i++)
|
||||
node->keys[i] = XFS_IEXT_KEY_INVALID;
|
||||
|
||||
ifp->if_u1.if_root = node;
|
||||
ifp->if_data = node;
|
||||
ifp->if_height++;
|
||||
}
|
||||
|
||||
@ -430,7 +430,7 @@ xfs_iext_update_node(
|
||||
int level,
|
||||
void *ptr)
|
||||
{
|
||||
struct xfs_iext_node *node = ifp->if_u1.if_root;
|
||||
struct xfs_iext_node *node = ifp->if_data;
|
||||
int height, i;
|
||||
|
||||
for (height = ifp->if_height; height > level; height--) {
|
||||
@ -583,11 +583,11 @@ xfs_iext_alloc_root(
|
||||
{
|
||||
ASSERT(ifp->if_bytes == 0);
|
||||
|
||||
ifp->if_u1.if_root = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS);
|
||||
ifp->if_data = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS);
|
||||
ifp->if_height = 1;
|
||||
|
||||
/* now that we have a node step into it */
|
||||
cur->leaf = ifp->if_u1.if_root;
|
||||
cur->leaf = ifp->if_data;
|
||||
cur->pos = 0;
|
||||
}
|
||||
|
||||
@ -603,9 +603,9 @@ xfs_iext_realloc_root(
|
||||
if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF)
|
||||
new_size = NODE_SIZE;
|
||||
|
||||
new = krealloc(ifp->if_u1.if_root, new_size, GFP_NOFS | __GFP_NOFAIL);
|
||||
new = krealloc(ifp->if_data, new_size, GFP_NOFS | __GFP_NOFAIL);
|
||||
memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes);
|
||||
ifp->if_u1.if_root = new;
|
||||
ifp->if_data = new;
|
||||
cur->leaf = new;
|
||||
}
|
||||
|
||||
@ -622,13 +622,11 @@ static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp)
|
||||
}
|
||||
|
||||
void
|
||||
xfs_iext_insert(
|
||||
struct xfs_inode *ip,
|
||||
xfs_iext_insert_raw(
|
||||
struct xfs_ifork *ifp,
|
||||
struct xfs_iext_cursor *cur,
|
||||
struct xfs_bmbt_irec *irec,
|
||||
int state)
|
||||
struct xfs_bmbt_irec *irec)
|
||||
{
|
||||
struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state);
|
||||
xfs_fileoff_t offset = irec->br_startoff;
|
||||
struct xfs_iext_leaf *new = NULL;
|
||||
int nr_entries, i;
|
||||
@ -662,12 +660,23 @@ xfs_iext_insert(
|
||||
xfs_iext_set(cur_rec(cur), irec);
|
||||
ifp->if_bytes += sizeof(struct xfs_iext_rec);
|
||||
|
||||
trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
|
||||
|
||||
if (new)
|
||||
xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
|
||||
}
|
||||
|
||||
void
|
||||
xfs_iext_insert(
|
||||
struct xfs_inode *ip,
|
||||
struct xfs_iext_cursor *cur,
|
||||
struct xfs_bmbt_irec *irec,
|
||||
int state)
|
||||
{
|
||||
struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state);
|
||||
|
||||
xfs_iext_insert_raw(ifp, cur, irec);
|
||||
trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
|
||||
}
|
||||
|
||||
static struct xfs_iext_node *
|
||||
xfs_iext_rebalance_node(
|
||||
struct xfs_iext_node *parent,
|
||||
@ -777,8 +786,8 @@ xfs_iext_remove_node(
|
||||
* If we are at the root and only one entry is left we can just
|
||||
* free this node and update the root pointer.
|
||||
*/
|
||||
ASSERT(node == ifp->if_u1.if_root);
|
||||
ifp->if_u1.if_root = node->ptrs[0];
|
||||
ASSERT(node == ifp->if_data);
|
||||
ifp->if_data = node->ptrs[0];
|
||||
ifp->if_height--;
|
||||
kmem_free(node);
|
||||
}
|
||||
@ -854,8 +863,8 @@ xfs_iext_free_last_leaf(
|
||||
struct xfs_ifork *ifp)
|
||||
{
|
||||
ifp->if_height--;
|
||||
kmem_free(ifp->if_u1.if_root);
|
||||
ifp->if_u1.if_root = NULL;
|
||||
kmem_free(ifp->if_data);
|
||||
ifp->if_data = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
@ -872,7 +881,7 @@ xfs_iext_remove(
|
||||
trace_xfs_iext_remove(ip, cur, state, _RET_IP_);
|
||||
|
||||
ASSERT(ifp->if_height > 0);
|
||||
ASSERT(ifp->if_u1.if_root != NULL);
|
||||
ASSERT(ifp->if_data != NULL);
|
||||
ASSERT(xfs_iext_valid(ifp, cur));
|
||||
|
||||
xfs_iext_inc_seq(ifp);
|
||||
@ -1042,9 +1051,9 @@ void
|
||||
xfs_iext_destroy(
|
||||
struct xfs_ifork *ifp)
|
||||
{
|
||||
xfs_iext_destroy_node(ifp->if_u1.if_root, ifp->if_height);
|
||||
xfs_iext_destroy_node(ifp->if_data, ifp->if_height);
|
||||
|
||||
ifp->if_bytes = 0;
|
||||
ifp->if_height = 0;
|
||||
ifp->if_u1.if_root = NULL;
|
||||
ifp->if_data = NULL;
|
||||
}
|
||||
|
@ -50,12 +50,15 @@ xfs_init_local_fork(
|
||||
mem_size++;
|
||||
|
||||
if (size) {
|
||||
ifp->if_u1.if_data = kmem_alloc(mem_size, KM_NOFS);
|
||||
memcpy(ifp->if_u1.if_data, data, size);
|
||||
char *new_data = kmem_alloc(mem_size, KM_NOFS);
|
||||
|
||||
memcpy(new_data, data, size);
|
||||
if (zero_terminate)
|
||||
ifp->if_u1.if_data[size] = '\0';
|
||||
new_data[size] = '\0';
|
||||
|
||||
ifp->if_data = new_data;
|
||||
} else {
|
||||
ifp->if_u1.if_data = NULL;
|
||||
ifp->if_data = NULL;
|
||||
}
|
||||
|
||||
ifp->if_bytes = size;
|
||||
@ -125,7 +128,7 @@ xfs_iformat_extents(
|
||||
}
|
||||
|
||||
ifp->if_bytes = 0;
|
||||
ifp->if_u1.if_root = NULL;
|
||||
ifp->if_data = NULL;
|
||||
ifp->if_height = 0;
|
||||
if (size) {
|
||||
dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
|
||||
@ -212,7 +215,7 @@ xfs_iformat_btree(
|
||||
ifp->if_broot, size);
|
||||
|
||||
ifp->if_bytes = 0;
|
||||
ifp->if_u1.if_root = NULL;
|
||||
ifp->if_data = NULL;
|
||||
ifp->if_height = 0;
|
||||
return 0;
|
||||
}
|
||||
@ -276,10 +279,9 @@ static uint16_t
|
||||
xfs_dfork_attr_shortform_size(
|
||||
struct xfs_dinode *dip)
|
||||
{
|
||||
struct xfs_attr_shortform *atp =
|
||||
(struct xfs_attr_shortform *)XFS_DFORK_APTR(dip);
|
||||
struct xfs_attr_sf_hdr *sf = XFS_DFORK_APTR(dip);
|
||||
|
||||
return be16_to_cpu(atp->hdr.totsize);
|
||||
return be16_to_cpu(sf->totsize);
|
||||
}
|
||||
|
||||
void
|
||||
@ -493,7 +495,7 @@ xfs_iroot_realloc(
|
||||
* byte_diff -- the change in the number of bytes, positive or negative,
|
||||
* requested for the if_data array.
|
||||
*/
|
||||
void
|
||||
void *
|
||||
xfs_idata_realloc(
|
||||
struct xfs_inode *ip,
|
||||
int64_t byte_diff,
|
||||
@ -505,21 +507,18 @@ xfs_idata_realloc(
|
||||
ASSERT(new_size >= 0);
|
||||
ASSERT(new_size <= xfs_inode_fork_size(ip, whichfork));
|
||||
|
||||
if (byte_diff == 0)
|
||||
return;
|
||||
|
||||
if (new_size == 0) {
|
||||
kmem_free(ifp->if_u1.if_data);
|
||||
ifp->if_u1.if_data = NULL;
|
||||
ifp->if_bytes = 0;
|
||||
return;
|
||||
if (byte_diff) {
|
||||
ifp->if_data = krealloc(ifp->if_data, new_size,
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
if (new_size == 0)
|
||||
ifp->if_data = NULL;
|
||||
ifp->if_bytes = new_size;
|
||||
}
|
||||
|
||||
ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, new_size,
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
ifp->if_bytes = new_size;
|
||||
return ifp->if_data;
|
||||
}
|
||||
|
||||
/* Free all memory and reset a fork back to its initial state. */
|
||||
void
|
||||
xfs_idestroy_fork(
|
||||
struct xfs_ifork *ifp)
|
||||
@ -531,8 +530,8 @@ xfs_idestroy_fork(
|
||||
|
||||
switch (ifp->if_format) {
|
||||
case XFS_DINODE_FMT_LOCAL:
|
||||
kmem_free(ifp->if_u1.if_data);
|
||||
ifp->if_u1.if_data = NULL;
|
||||
kmem_free(ifp->if_data);
|
||||
ifp->if_data = NULL;
|
||||
break;
|
||||
case XFS_DINODE_FMT_EXTENTS:
|
||||
case XFS_DINODE_FMT_BTREE:
|
||||
@ -625,9 +624,9 @@ xfs_iflush_fork(
|
||||
case XFS_DINODE_FMT_LOCAL:
|
||||
if ((iip->ili_fields & dataflag[whichfork]) &&
|
||||
(ifp->if_bytes > 0)) {
|
||||
ASSERT(ifp->if_u1.if_data != NULL);
|
||||
ASSERT(ifp->if_data != NULL);
|
||||
ASSERT(ifp->if_bytes <= xfs_inode_fork_size(ip, whichfork));
|
||||
memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
|
||||
memcpy(cp, ifp->if_data, ifp->if_bytes);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -702,19 +701,27 @@ xfs_ifork_verify_local_data(
|
||||
xfs_failaddr_t fa = NULL;
|
||||
|
||||
switch (VFS_I(ip)->i_mode & S_IFMT) {
|
||||
case S_IFDIR:
|
||||
fa = xfs_dir2_sf_verify(ip);
|
||||
case S_IFDIR: {
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
|
||||
struct xfs_dir2_sf_hdr *sfp = ifp->if_data;
|
||||
|
||||
fa = xfs_dir2_sf_verify(mp, sfp, ifp->if_bytes);
|
||||
break;
|
||||
case S_IFLNK:
|
||||
fa = xfs_symlink_shortform_verify(ip);
|
||||
}
|
||||
case S_IFLNK: {
|
||||
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
|
||||
|
||||
fa = xfs_symlink_shortform_verify(ifp->if_data, ifp->if_bytes);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (fa) {
|
||||
xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork",
|
||||
ip->i_df.if_u1.if_data, ip->i_df.if_bytes, fa);
|
||||
ip->i_df.if_data, ip->i_df.if_bytes, fa);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
@ -729,14 +736,17 @@ xfs_ifork_verify_local_attr(
|
||||
struct xfs_ifork *ifp = &ip->i_af;
|
||||
xfs_failaddr_t fa;
|
||||
|
||||
if (!xfs_inode_has_attr_fork(ip))
|
||||
if (!xfs_inode_has_attr_fork(ip)) {
|
||||
fa = __this_address;
|
||||
else
|
||||
fa = xfs_attr_shortform_verify(ip);
|
||||
} else {
|
||||
struct xfs_ifork *ifp = &ip->i_af;
|
||||
|
||||
ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
|
||||
fa = xfs_attr_shortform_verify(ifp->if_data, ifp->if_bytes);
|
||||
}
|
||||
if (fa) {
|
||||
xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork",
|
||||
ifp->if_u1.if_data, ifp->if_bytes, fa);
|
||||
ifp->if_data, ifp->if_bytes, fa);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
|
@ -13,14 +13,12 @@ struct xfs_dinode;
|
||||
* File incore extent information, present for each of data & attr forks.
|
||||
*/
|
||||
struct xfs_ifork {
|
||||
int64_t if_bytes; /* bytes in if_u1 */
|
||||
int64_t if_bytes; /* bytes in if_data */
|
||||
struct xfs_btree_block *if_broot; /* file's incore btree root */
|
||||
unsigned int if_seq; /* fork mod counter */
|
||||
int if_height; /* height of the extent tree */
|
||||
union {
|
||||
void *if_root; /* extent tree root */
|
||||
char *if_data; /* inline file data */
|
||||
} if_u1;
|
||||
void *if_data; /* extent tree root or
|
||||
inline data */
|
||||
xfs_extnum_t if_nextents; /* # of extents in this fork */
|
||||
short if_broot_bytes; /* bytes allocated for root */
|
||||
int8_t if_format; /* format of this fork */
|
||||
@ -170,7 +168,7 @@ int xfs_iformat_attr_fork(struct xfs_inode *, struct xfs_dinode *);
|
||||
void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
|
||||
struct xfs_inode_log_item *, int);
|
||||
void xfs_idestroy_fork(struct xfs_ifork *ifp);
|
||||
void xfs_idata_realloc(struct xfs_inode *ip, int64_t byte_diff,
|
||||
void * xfs_idata_realloc(struct xfs_inode *ip, int64_t byte_diff,
|
||||
int whichfork);
|
||||
void xfs_iroot_realloc(struct xfs_inode *, int, int);
|
||||
int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
|
||||
@ -180,6 +178,9 @@ void xfs_init_local_fork(struct xfs_inode *ip, int whichfork,
|
||||
const void *data, int64_t size);
|
||||
|
||||
xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp);
|
||||
void xfs_iext_insert_raw(struct xfs_ifork *ifp,
|
||||
struct xfs_iext_cursor *cur,
|
||||
struct xfs_bmbt_irec *irec);
|
||||
void xfs_iext_insert(struct xfs_inode *, struct xfs_iext_cursor *cur,
|
||||
struct xfs_bmbt_irec *, int);
|
||||
void xfs_iext_remove(struct xfs_inode *, struct xfs_iext_cursor *,
|
||||
|
@ -11,6 +11,7 @@
|
||||
* define how recovery should work for that type of log item.
|
||||
*/
|
||||
struct xlog_recover_item;
|
||||
struct xfs_defer_op_type;
|
||||
|
||||
/* Sorting hat for log items as they're read in. */
|
||||
enum xlog_recover_reorder {
|
||||
@ -153,4 +154,11 @@ xlog_recover_resv(const struct xfs_trans_res *r)
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct xfs_defer_pending;
|
||||
|
||||
void xlog_recover_intent_item(struct xlog *log, struct xfs_log_item *lip,
|
||||
xfs_lsn_t lsn, const struct xfs_defer_op_type *ops);
|
||||
int xlog_recover_finish_intent(struct xfs_trans *tp,
|
||||
struct xfs_defer_pending *dfp);
|
||||
|
||||
#endif /* __XFS_LOG_RECOVER_H__ */
|
||||
|
@ -7,16 +7,16 @@
|
||||
#define __XFS_ONDISK_H
|
||||
|
||||
#define XFS_CHECK_STRUCT_SIZE(structname, size) \
|
||||
BUILD_BUG_ON_MSG(sizeof(structname) != (size), "XFS: sizeof(" \
|
||||
#structname ") is wrong, expected " #size)
|
||||
static_assert(sizeof(structname) == (size), \
|
||||
"XFS: sizeof(" #structname ") is wrong, expected " #size)
|
||||
|
||||
#define XFS_CHECK_OFFSET(structname, member, off) \
|
||||
BUILD_BUG_ON_MSG(offsetof(structname, member) != (off), \
|
||||
static_assert(offsetof(structname, member) == (off), \
|
||||
"XFS: offsetof(" #structname ", " #member ") is wrong, " \
|
||||
"expected " #off)
|
||||
|
||||
#define XFS_CHECK_VALUE(value, expected) \
|
||||
BUILD_BUG_ON_MSG((value) != (expected), \
|
||||
static_assert((value) == (expected), \
|
||||
"XFS: value of " #value " is wrong, expected " #expected)
|
||||
|
||||
static inline void __init
|
||||
@ -93,13 +93,13 @@ xfs_check_ondisk_structs(void)
|
||||
XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, namelen, 8);
|
||||
XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, name, 9);
|
||||
XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t, 32);
|
||||
XFS_CHECK_STRUCT_SIZE(struct xfs_attr_shortform, 4);
|
||||
XFS_CHECK_OFFSET(struct xfs_attr_shortform, hdr.totsize, 0);
|
||||
XFS_CHECK_OFFSET(struct xfs_attr_shortform, hdr.count, 2);
|
||||
XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].namelen, 4);
|
||||
XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].valuelen, 5);
|
||||
XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].flags, 6);
|
||||
XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].nameval, 7);
|
||||
XFS_CHECK_STRUCT_SIZE(struct xfs_attr_sf_hdr, 4);
|
||||
XFS_CHECK_OFFSET(struct xfs_attr_sf_hdr, totsize, 0);
|
||||
XFS_CHECK_OFFSET(struct xfs_attr_sf_hdr, count, 2);
|
||||
XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, namelen, 0);
|
||||
XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, valuelen, 1);
|
||||
XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, flags, 2);
|
||||
XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, nameval, 3);
|
||||
XFS_CHECK_STRUCT_SIZE(xfs_da_blkinfo_t, 12);
|
||||
XFS_CHECK_STRUCT_SIZE(xfs_da_intnode_t, 16);
|
||||
XFS_CHECK_STRUCT_SIZE(xfs_da_node_entry_t, 8);
|
@ -123,11 +123,9 @@ xfs_refcount_btrec_to_irec(
|
||||
/* Simple checks for refcount records. */
|
||||
xfs_failaddr_t
|
||||
xfs_refcount_check_irec(
|
||||
struct xfs_btree_cur *cur,
|
||||
struct xfs_perag *pag,
|
||||
const struct xfs_refcount_irec *irec)
|
||||
{
|
||||
struct xfs_perag *pag = cur->bc_ag.pag;
|
||||
|
||||
if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN)
|
||||
return __this_address;
|
||||
|
||||
@ -179,7 +177,7 @@ xfs_refcount_get_rec(
|
||||
return error;
|
||||
|
||||
xfs_refcount_btrec_to_irec(rec, irec);
|
||||
fa = xfs_refcount_check_irec(cur, irec);
|
||||
fa = xfs_refcount_check_irec(cur->bc_ag.pag, irec);
|
||||
if (fa)
|
||||
return xfs_refcount_complain_bad_rec(cur, fa, irec);
|
||||
|
||||
@ -1153,7 +1151,7 @@ xfs_refcount_adjust_extents(
|
||||
tmp.rc_startblock);
|
||||
error = xfs_free_extent_later(cur->bc_tp, fsbno,
|
||||
tmp.rc_blockcount, NULL,
|
||||
XFS_AG_RESV_NONE);
|
||||
XFS_AG_RESV_NONE, false);
|
||||
if (error)
|
||||
goto out_error;
|
||||
}
|
||||
@ -1215,7 +1213,7 @@ xfs_refcount_adjust_extents(
|
||||
ext.rc_startblock);
|
||||
error = xfs_free_extent_later(cur->bc_tp, fsbno,
|
||||
ext.rc_blockcount, NULL,
|
||||
XFS_AG_RESV_NONE);
|
||||
XFS_AG_RESV_NONE, false);
|
||||
if (error)
|
||||
goto out_error;
|
||||
}
|
||||
@ -1458,7 +1456,7 @@ __xfs_refcount_add(
|
||||
ri->ri_blockcount = blockcount;
|
||||
|
||||
xfs_refcount_update_get_group(tp->t_mountp, ri);
|
||||
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list);
|
||||
xfs_defer_add(tp, &ri->ri_list, &xfs_refcount_update_defer_type);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1899,7 +1897,7 @@ xfs_refcount_recover_extent(
|
||||
INIT_LIST_HEAD(&rr->rr_list);
|
||||
xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
|
||||
|
||||
if (xfs_refcount_check_irec(cur, &rr->rr_rrec) != NULL ||
|
||||
if (xfs_refcount_check_irec(cur->bc_ag.pag, &rr->rr_rrec) != NULL ||
|
||||
XFS_IS_CORRUPT(cur->bc_mp,
|
||||
rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) {
|
||||
kfree(rr);
|
||||
@ -1985,7 +1983,7 @@ xfs_refcount_recover_cow_leftovers(
|
||||
/* Free the block. */
|
||||
error = xfs_free_extent_later(tp, fsb,
|
||||
rr->rr_rrec.rc_blockcount, NULL,
|
||||
XFS_AG_RESV_NONE);
|
||||
XFS_AG_RESV_NONE, false);
|
||||
if (error)
|
||||
goto out_trans;
|
||||
|
||||
@ -2033,6 +2031,47 @@ xfs_refcount_has_records(
|
||||
return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
|
||||
}
|
||||
|
||||
struct xfs_refcount_query_range_info {
|
||||
xfs_refcount_query_range_fn fn;
|
||||
void *priv;
|
||||
};
|
||||
|
||||
/* Format btree record and pass to our callback. */
|
||||
STATIC int
|
||||
xfs_refcount_query_range_helper(
|
||||
struct xfs_btree_cur *cur,
|
||||
const union xfs_btree_rec *rec,
|
||||
void *priv)
|
||||
{
|
||||
struct xfs_refcount_query_range_info *query = priv;
|
||||
struct xfs_refcount_irec irec;
|
||||
xfs_failaddr_t fa;
|
||||
|
||||
xfs_refcount_btrec_to_irec(rec, &irec);
|
||||
fa = xfs_refcount_check_irec(cur->bc_ag.pag, &irec);
|
||||
if (fa)
|
||||
return xfs_refcount_complain_bad_rec(cur, fa, &irec);
|
||||
|
||||
return query->fn(cur, &irec, query->priv);
|
||||
}
|
||||
|
||||
/* Find all refcount records between two keys. */
|
||||
int
|
||||
xfs_refcount_query_range(
|
||||
struct xfs_btree_cur *cur,
|
||||
const struct xfs_refcount_irec *low_rec,
|
||||
const struct xfs_refcount_irec *high_rec,
|
||||
xfs_refcount_query_range_fn fn,
|
||||
void *priv)
|
||||
{
|
||||
union xfs_btree_irec low_brec = { .rc = *low_rec };
|
||||
union xfs_btree_irec high_brec = { .rc = *high_rec };
|
||||
struct xfs_refcount_query_range_info query = { .priv = priv, .fn = fn };
|
||||
|
||||
return xfs_btree_query_range(cur, &low_brec, &high_brec,
|
||||
xfs_refcount_query_range_helper, &query);
|
||||
}
|
||||
|
||||
int __init
|
||||
xfs_refcount_intent_init_cache(void)
|
||||
{
|
||||
|
@ -117,7 +117,7 @@ extern int xfs_refcount_has_records(struct xfs_btree_cur *cur,
|
||||
union xfs_btree_rec;
|
||||
extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec,
|
||||
struct xfs_refcount_irec *irec);
|
||||
xfs_failaddr_t xfs_refcount_check_irec(struct xfs_btree_cur *cur,
|
||||
xfs_failaddr_t xfs_refcount_check_irec(struct xfs_perag *pag,
|
||||
const struct xfs_refcount_irec *irec);
|
||||
extern int xfs_refcount_insert(struct xfs_btree_cur *cur,
|
||||
struct xfs_refcount_irec *irec, int *stat);
|
||||
@ -127,4 +127,14 @@ extern struct kmem_cache *xfs_refcount_intent_cache;
|
||||
int __init xfs_refcount_intent_init_cache(void);
|
||||
void xfs_refcount_intent_destroy_cache(void);
|
||||
|
||||
typedef int (*xfs_refcount_query_range_fn)(
|
||||
struct xfs_btree_cur *cur,
|
||||
const struct xfs_refcount_irec *rec,
|
||||
void *priv);
|
||||
|
||||
int xfs_refcount_query_range(struct xfs_btree_cur *cur,
|
||||
const struct xfs_refcount_irec *low_rec,
|
||||
const struct xfs_refcount_irec *high_rec,
|
||||
xfs_refcount_query_range_fn fn, void *priv);
|
||||
|
||||
#endif /* __XFS_REFCOUNT_H__ */
|
||||
|
@ -112,7 +112,7 @@ xfs_refcountbt_free_block(
|
||||
be32_add_cpu(&agf->agf_refcount_blocks, -1);
|
||||
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
|
||||
return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
|
||||
&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
|
||||
&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA, false);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
@ -226,7 +226,18 @@ xfs_refcountbt_verify(
|
||||
|
||||
level = be16_to_cpu(block->bb_level);
|
||||
if (pag && xfs_perag_initialised_agf(pag)) {
|
||||
if (level >= pag->pagf_refcount_level)
|
||||
unsigned int maxlevel = pag->pagf_refcount_level;
|
||||
|
||||
#ifdef CONFIG_XFS_ONLINE_REPAIR
|
||||
/*
|
||||
* Online repair could be rewriting the refcount btree, so
|
||||
* we'll validate against the larger of either tree while this
|
||||
* is going on.
|
||||
*/
|
||||
maxlevel = max_t(unsigned int, maxlevel,
|
||||
pag->pagf_repair_refcount_level);
|
||||
#endif
|
||||
if (level >= maxlevel)
|
||||
return __this_address;
|
||||
} else if (level >= mp->m_refc_maxlevels)
|
||||
return __this_address;
|
||||
|
@ -2567,7 +2567,7 @@ __xfs_rmap_add(
|
||||
ri->ri_bmap = *bmap;
|
||||
|
||||
xfs_rmap_update_get_group(tp->t_mountp, ri);
|
||||
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list);
|
||||
xfs_defer_add(tp, &ri->ri_list, &xfs_rmap_update_defer_type);
|
||||
}
|
||||
|
||||
/* Map an extent into a file. */
|
||||
|
@ -184,7 +184,7 @@ xfs_rtfind_back(
|
||||
* Calculate first (leftmost) bit number to look at,
|
||||
* and mask for all the relevant bits in this word.
|
||||
*/
|
||||
firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0);
|
||||
firstbit = max_t(xfs_srtblock_t, bit - len + 1, 0);
|
||||
mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) <<
|
||||
firstbit;
|
||||
/*
|
||||
@ -195,7 +195,7 @@ xfs_rtfind_back(
|
||||
/*
|
||||
* Different. Mark where we are and return.
|
||||
*/
|
||||
i = bit - XFS_RTHIBIT(wdiff);
|
||||
i = bit - xfs_highbit32(wdiff);
|
||||
*rtx = start - i + 1;
|
||||
return 0;
|
||||
}
|
||||
@ -233,7 +233,7 @@ xfs_rtfind_back(
|
||||
/*
|
||||
* Different, mark where we are and return.
|
||||
*/
|
||||
i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
|
||||
i += XFS_NBWORD - 1 - xfs_highbit32(wdiff);
|
||||
*rtx = start - i + 1;
|
||||
return 0;
|
||||
}
|
||||
@ -272,7 +272,7 @@ xfs_rtfind_back(
|
||||
/*
|
||||
* Different, mark where we are and return.
|
||||
*/
|
||||
i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
|
||||
i += XFS_NBWORD - 1 - xfs_highbit32(wdiff);
|
||||
*rtx = start - i + 1;
|
||||
return 0;
|
||||
} else
|
||||
@ -338,7 +338,7 @@ xfs_rtfind_forw(
|
||||
* Calculate last (rightmost) bit number to look at,
|
||||
* and mask for all the relevant bits in this word.
|
||||
*/
|
||||
lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
|
||||
lastbit = min(bit + len, XFS_NBWORD);
|
||||
mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
|
||||
/*
|
||||
* Calculate the difference between the value there
|
||||
@ -348,7 +348,7 @@ xfs_rtfind_forw(
|
||||
/*
|
||||
* Different. Mark where we are and return.
|
||||
*/
|
||||
i = XFS_RTLOBIT(wdiff) - bit;
|
||||
i = xfs_lowbit32(wdiff) - bit;
|
||||
*rtx = start + i - 1;
|
||||
return 0;
|
||||
}
|
||||
@ -386,7 +386,7 @@ xfs_rtfind_forw(
|
||||
/*
|
||||
* Different, mark where we are and return.
|
||||
*/
|
||||
i += XFS_RTLOBIT(wdiff);
|
||||
i += xfs_lowbit32(wdiff);
|
||||
*rtx = start + i - 1;
|
||||
return 0;
|
||||
}
|
||||
@ -423,7 +423,7 @@ xfs_rtfind_forw(
|
||||
/*
|
||||
* Different, mark where we are and return.
|
||||
*/
|
||||
i += XFS_RTLOBIT(wdiff);
|
||||
i += xfs_lowbit32(wdiff);
|
||||
*rtx = start + i - 1;
|
||||
return 0;
|
||||
} else
|
||||
@ -452,63 +452,9 @@ xfs_trans_log_rtsummary(
|
||||
}
|
||||
|
||||
/*
|
||||
* Read and/or modify the summary information for a given extent size,
|
||||
* bitmap block combination.
|
||||
* Keeps track of a current summary block, so we don't keep reading
|
||||
* it from the buffer cache.
|
||||
*
|
||||
* Summary information is returned in *sum if specified.
|
||||
* If no delta is specified, returns summary only.
|
||||
* Modify the summary information for a given extent size, bitmap block
|
||||
* combination.
|
||||
*/
|
||||
int
|
||||
xfs_rtmodify_summary_int(
|
||||
struct xfs_rtalloc_args *args,
|
||||
int log, /* log2 of extent size */
|
||||
xfs_fileoff_t bbno, /* bitmap block number */
|
||||
int delta, /* change to make to summary info */
|
||||
xfs_suminfo_t *sum) /* out: summary info for this block */
|
||||
{
|
||||
struct xfs_mount *mp = args->mp;
|
||||
int error;
|
||||
xfs_fileoff_t sb; /* summary fsblock */
|
||||
xfs_rtsumoff_t so; /* index into the summary file */
|
||||
unsigned int infoword;
|
||||
|
||||
/*
|
||||
* Compute entry number in the summary file.
|
||||
*/
|
||||
so = xfs_rtsumoffs(mp, log, bbno);
|
||||
/*
|
||||
* Compute the block number in the summary file.
|
||||
*/
|
||||
sb = xfs_rtsumoffs_to_block(mp, so);
|
||||
|
||||
error = xfs_rtsummary_read_buf(args, sb);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Point to the summary information, modify/log it, and/or copy it out.
|
||||
*/
|
||||
infoword = xfs_rtsumoffs_to_infoword(mp, so);
|
||||
if (delta) {
|
||||
xfs_suminfo_t val = xfs_suminfo_add(args, infoword, delta);
|
||||
|
||||
if (mp->m_rsum_cache) {
|
||||
if (val == 0 && log + 1 == mp->m_rsum_cache[bbno])
|
||||
mp->m_rsum_cache[bbno] = log;
|
||||
if (val != 0 && log >= mp->m_rsum_cache[bbno])
|
||||
mp->m_rsum_cache[bbno] = log + 1;
|
||||
}
|
||||
xfs_trans_log_rtsummary(args, infoword);
|
||||
if (sum)
|
||||
*sum = val;
|
||||
} else if (sum) {
|
||||
*sum = xfs_suminfo_get(args, infoword);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_rtmodify_summary(
|
||||
struct xfs_rtalloc_args *args,
|
||||
@ -516,7 +462,49 @@ xfs_rtmodify_summary(
|
||||
xfs_fileoff_t bbno, /* bitmap block number */
|
||||
int delta) /* in/out: summary block number */
|
||||
{
|
||||
return xfs_rtmodify_summary_int(args, log, bbno, delta, NULL);
|
||||
struct xfs_mount *mp = args->mp;
|
||||
xfs_rtsumoff_t so = xfs_rtsumoffs(mp, log, bbno);
|
||||
unsigned int infoword;
|
||||
xfs_suminfo_t val;
|
||||
int error;
|
||||
|
||||
error = xfs_rtsummary_read_buf(args, xfs_rtsumoffs_to_block(mp, so));
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
infoword = xfs_rtsumoffs_to_infoword(mp, so);
|
||||
val = xfs_suminfo_add(args, infoword, delta);
|
||||
|
||||
if (mp->m_rsum_cache) {
|
||||
if (val == 0 && log + 1 == mp->m_rsum_cache[bbno])
|
||||
mp->m_rsum_cache[bbno] = log;
|
||||
if (val != 0 && log >= mp->m_rsum_cache[bbno])
|
||||
mp->m_rsum_cache[bbno] = log + 1;
|
||||
}
|
||||
|
||||
xfs_trans_log_rtsummary(args, infoword);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read and return the summary information for a given extent size, bitmap block
|
||||
* combination.
|
||||
*/
|
||||
int
|
||||
xfs_rtget_summary(
|
||||
struct xfs_rtalloc_args *args,
|
||||
int log, /* log2 of extent size */
|
||||
xfs_fileoff_t bbno, /* bitmap block number */
|
||||
xfs_suminfo_t *sum) /* out: summary info for this block */
|
||||
{
|
||||
struct xfs_mount *mp = args->mp;
|
||||
xfs_rtsumoff_t so = xfs_rtsumoffs(mp, log, bbno);
|
||||
int error;
|
||||
|
||||
error = xfs_rtsummary_read_buf(args, xfs_rtsumoffs_to_block(mp, so));
|
||||
if (!error)
|
||||
*sum = xfs_suminfo_get(args, xfs_rtsumoffs_to_infoword(mp, so));
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Log rtbitmap block from the word @from to the byte before @next. */
|
||||
@ -585,7 +573,7 @@ xfs_rtmodify_range(
|
||||
/*
|
||||
* Compute first bit not changed and mask of relevant bits.
|
||||
*/
|
||||
lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
|
||||
lastbit = min(bit + len, XFS_NBWORD);
|
||||
mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
|
||||
/*
|
||||
* Set/clear the active bits.
|
||||
@ -720,7 +708,7 @@ xfs_rtfree_range(
|
||||
*/
|
||||
if (preblock < start) {
|
||||
error = xfs_rtmodify_summary(args,
|
||||
XFS_RTBLOCKLOG(start - preblock),
|
||||
xfs_highbit64(start - preblock),
|
||||
xfs_rtx_to_rbmblock(mp, preblock), -1);
|
||||
if (error) {
|
||||
return error;
|
||||
@ -732,7 +720,7 @@ xfs_rtfree_range(
|
||||
*/
|
||||
if (postblock > end) {
|
||||
error = xfs_rtmodify_summary(args,
|
||||
XFS_RTBLOCKLOG(postblock - end),
|
||||
xfs_highbit64(postblock - end),
|
||||
xfs_rtx_to_rbmblock(mp, end + 1), -1);
|
||||
if (error) {
|
||||
return error;
|
||||
@ -743,7 +731,7 @@ xfs_rtfree_range(
|
||||
* (new) free extent.
|
||||
*/
|
||||
return xfs_rtmodify_summary(args,
|
||||
XFS_RTBLOCKLOG(postblock + 1 - preblock),
|
||||
xfs_highbit64(postblock + 1 - preblock),
|
||||
xfs_rtx_to_rbmblock(mp, preblock), 1);
|
||||
}
|
||||
|
||||
@ -799,7 +787,7 @@ xfs_rtcheck_range(
|
||||
/*
|
||||
* Compute first bit not examined.
|
||||
*/
|
||||
lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
|
||||
lastbit = min(bit + len, XFS_NBWORD);
|
||||
/*
|
||||
* Mask of relevant bits.
|
||||
*/
|
||||
@ -812,7 +800,7 @@ xfs_rtcheck_range(
|
||||
/*
|
||||
* Different, compute first wrong bit and return.
|
||||
*/
|
||||
i = XFS_RTLOBIT(wdiff) - bit;
|
||||
i = xfs_lowbit32(wdiff) - bit;
|
||||
*new = start + i;
|
||||
*stat = 0;
|
||||
return 0;
|
||||
@ -851,7 +839,7 @@ xfs_rtcheck_range(
|
||||
/*
|
||||
* Different, compute first wrong bit and return.
|
||||
*/
|
||||
i += XFS_RTLOBIT(wdiff);
|
||||
i += xfs_lowbit32(wdiff);
|
||||
*new = start + i;
|
||||
*stat = 0;
|
||||
return 0;
|
||||
@ -889,7 +877,7 @@ xfs_rtcheck_range(
|
||||
/*
|
||||
* Different, compute first wrong bit and return.
|
||||
*/
|
||||
i += XFS_RTLOBIT(wdiff);
|
||||
i += xfs_lowbit32(wdiff);
|
||||
*new = start + i;
|
||||
*stat = 0;
|
||||
return 0;
|
||||
@ -1130,6 +1118,20 @@ xfs_rtbitmap_blockcount(
|
||||
return howmany_64(rtextents, NBBY * mp->m_sb.sb_blocksize);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the maximum level number of the realtime summary file, as defined by
|
||||
* mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct
|
||||
* use of rt volumes with more than 2^32 extents.
|
||||
*/
|
||||
uint8_t
|
||||
xfs_compute_rextslog(
|
||||
xfs_rtbxlen_t rtextents)
|
||||
{
|
||||
if (!rtextents)
|
||||
return 0;
|
||||
return xfs_highbit64(rtextents);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the number of rtbitmap words needed to populate every block of a
|
||||
* bitmap that is large enough to track the given number of rt extents.
|
||||
|
@ -321,8 +321,8 @@ int xfs_rtfind_forw(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
|
||||
xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock);
|
||||
int xfs_rtmodify_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
|
||||
xfs_rtxlen_t len, int val);
|
||||
int xfs_rtmodify_summary_int(struct xfs_rtalloc_args *args, int log,
|
||||
xfs_fileoff_t bbno, int delta, xfs_suminfo_t *sum);
|
||||
int xfs_rtget_summary(struct xfs_rtalloc_args *args, int log,
|
||||
xfs_fileoff_t bbno, xfs_suminfo_t *sum);
|
||||
int xfs_rtmodify_summary(struct xfs_rtalloc_args *args, int log,
|
||||
xfs_fileoff_t bbno, int delta);
|
||||
int xfs_rtfree_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
|
||||
@ -351,6 +351,20 @@ xfs_rtfree_extent(
|
||||
int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno,
|
||||
xfs_filblks_t rtlen);
|
||||
|
||||
uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
|
||||
|
||||
/* Do we support an rt volume having this number of rtextents? */
|
||||
static inline bool
|
||||
xfs_validate_rtextents(
|
||||
xfs_rtbxlen_t rtextents)
|
||||
{
|
||||
/* No runt rt volumes */
|
||||
if (rtextents == 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t
|
||||
rtextents);
|
||||
unsigned long long xfs_rtbitmap_wordcount(struct xfs_mount *mp,
|
||||
@ -369,6 +383,8 @@ unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp,
|
||||
# define xfs_rtsummary_read_buf(a,b) (-ENOSYS)
|
||||
# define xfs_rtbuf_cache_relse(a) (0)
|
||||
# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS)
|
||||
# define xfs_compute_rextslog(rtx) (0)
|
||||
# define xfs_validate_rtextents(rtx) (false)
|
||||
static inline xfs_filblks_t
|
||||
xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents)
|
||||
{
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "xfs_da_format.h"
|
||||
#include "xfs_health.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_rtbitmap.h"
|
||||
|
||||
/*
|
||||
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
|
||||
@ -508,8 +509,9 @@ xfs_validate_sb_common(
|
||||
rbmblocks = howmany_64(sbp->sb_rextents,
|
||||
NBBY * sbp->sb_blocksize);
|
||||
|
||||
if (sbp->sb_rextents != rexts ||
|
||||
sbp->sb_rextslog != xfs_highbit32(sbp->sb_rextents) ||
|
||||
if (!xfs_validate_rtextents(rexts) ||
|
||||
sbp->sb_rextents != rexts ||
|
||||
sbp->sb_rextslog != xfs_compute_rextslog(rexts) ||
|
||||
sbp->sb_rbmblocks != rbmblocks) {
|
||||
xfs_notice(mp,
|
||||
"realtime geometry sanity check failed");
|
||||
|
@ -139,7 +139,7 @@ bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset,
|
||||
uint32_t size, struct xfs_buf *bp);
|
||||
void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
|
||||
struct xfs_inode *ip, struct xfs_ifork *ifp);
|
||||
xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip);
|
||||
xfs_failaddr_t xfs_symlink_shortform_verify(void *sfp, int64_t size);
|
||||
|
||||
/* Computed inode geometry for the filesystem. */
|
||||
struct xfs_ino_geometry {
|
||||
|
@ -175,7 +175,7 @@ xfs_symlink_local_to_remote(
|
||||
|
||||
if (!xfs_has_crc(mp)) {
|
||||
bp->b_ops = NULL;
|
||||
memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
|
||||
memcpy(bp->b_addr, ifp->if_data, ifp->if_bytes);
|
||||
xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
|
||||
return;
|
||||
}
|
||||
@ -191,7 +191,7 @@ xfs_symlink_local_to_remote(
|
||||
|
||||
buf = bp->b_addr;
|
||||
buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
|
||||
memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
|
||||
memcpy(buf, ifp->if_data, ifp->if_bytes);
|
||||
xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) +
|
||||
ifp->if_bytes - 1);
|
||||
}
|
||||
@ -202,15 +202,11 @@ xfs_symlink_local_to_remote(
|
||||
*/
|
||||
xfs_failaddr_t
|
||||
xfs_symlink_shortform_verify(
|
||||
struct xfs_inode *ip)
|
||||
void *sfp,
|
||||
int64_t size)
|
||||
{
|
||||
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
|
||||
char *sfp = (char *)ifp->if_u1.if_data;
|
||||
int size = ifp->if_bytes;
|
||||
char *endp = sfp + size;
|
||||
|
||||
ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
|
||||
|
||||
/*
|
||||
* Zero length symlinks should never occur in memory as they are
|
||||
* never allowed to exist on disk.
|
||||
|
@ -51,7 +51,6 @@ typedef void * xfs_failaddr_t;
|
||||
#define NULLRFSBLOCK ((xfs_rfsblock_t)-1)
|
||||
#define NULLRTBLOCK ((xfs_rtblock_t)-1)
|
||||
#define NULLFILEOFF ((xfs_fileoff_t)-1)
|
||||
#define NULLRTEXTNO ((xfs_rtxnum_t)-1)
|
||||
|
||||
#define NULLAGBLOCK ((xfs_agblock_t)-1)
|
||||
#define NULLAGNUMBER ((xfs_agnumber_t)-1)
|
||||
@ -208,6 +207,13 @@ enum xfs_ag_resv_type {
|
||||
XFS_AG_RESV_AGFL,
|
||||
XFS_AG_RESV_METADATA,
|
||||
XFS_AG_RESV_RMAPBT,
|
||||
|
||||
/*
|
||||
* Don't increase fdblocks when freeing extent. This is a pony for
|
||||
* the bnobt repair functions to re-free the free space without
|
||||
* altering fdblocks. If you think you need this you're wrong.
|
||||
*/
|
||||
XFS_AG_RESV_IGNORE,
|
||||
};
|
||||
|
||||
/* Results of scanning a btree keyspace to check occupancy. */
|
||||
|
103
fs/xfs/scrub/agb_bitmap.c
Normal file
103
fs/xfs/scrub/agb_bitmap.c
Normal file
@ -0,0 +1,103 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "bitmap.h"
|
||||
#include "scrub/agb_bitmap.h"
|
||||
|
||||
/*
|
||||
* Record all btree blocks seen while iterating all records of a btree.
|
||||
*
|
||||
* We know that the btree query_all function starts at the left edge and walks
|
||||
* towards the right edge of the tree. Therefore, we know that we can walk up
|
||||
* the btree cursor towards the root; if the pointer for a given level points
|
||||
* to the first record/key in that block, we haven't seen this block before;
|
||||
* and therefore we need to remember that we saw this block in the btree.
|
||||
*
|
||||
* So if our btree is:
|
||||
*
|
||||
* 4
|
||||
* / | \
|
||||
* 1 2 3
|
||||
*
|
||||
* Pretend for this example that each leaf block has 100 btree records. For
|
||||
* the first btree record, we'll observe that bc_levels[0].ptr == 1, so we
|
||||
* record that we saw block 1. Then we observe that bc_levels[1].ptr == 1, so
|
||||
* we record block 4. The list is [1, 4].
|
||||
*
|
||||
* For the second btree record, we see that bc_levels[0].ptr == 2, so we exit
|
||||
* the loop. The list remains [1, 4].
|
||||
*
|
||||
* For the 101st btree record, we've moved onto leaf block 2. Now
|
||||
* bc_levels[0].ptr == 1 again, so we record that we saw block 2. We see that
|
||||
* bc_levels[1].ptr == 2, so we exit the loop. The list is now [1, 4, 2].
|
||||
*
|
||||
* For the 102nd record, bc_levels[0].ptr == 2, so we continue.
|
||||
*
|
||||
* For the 201st record, we've moved on to leaf block 3.
|
||||
* bc_levels[0].ptr == 1, so we add 3 to the list. Now it is [1, 4, 2, 3].
|
||||
*
|
||||
* For the 300th record we just exit, with the list being [1, 4, 2, 3].
|
||||
*/
|
||||
|
||||
/* Mark a btree block to the agblock bitmap. */
|
||||
STATIC int
|
||||
xagb_bitmap_visit_btblock(
|
||||
struct xfs_btree_cur *cur,
|
||||
int level,
|
||||
void *priv)
|
||||
{
|
||||
struct xagb_bitmap *bitmap = priv;
|
||||
struct xfs_buf *bp;
|
||||
xfs_fsblock_t fsbno;
|
||||
xfs_agblock_t agbno;
|
||||
|
||||
xfs_btree_get_block(cur, level, &bp);
|
||||
if (!bp)
|
||||
return 0;
|
||||
|
||||
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
|
||||
agbno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
|
||||
|
||||
return xagb_bitmap_set(bitmap, agbno, 1);
|
||||
}
|
||||
|
||||
/* Mark all (per-AG) btree blocks in the agblock bitmap. */
|
||||
int
|
||||
xagb_bitmap_set_btblocks(
|
||||
struct xagb_bitmap *bitmap,
|
||||
struct xfs_btree_cur *cur)
|
||||
{
|
||||
return xfs_btree_visit_blocks(cur, xagb_bitmap_visit_btblock,
|
||||
XFS_BTREE_VISIT_ALL, bitmap);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record all the buffers pointed to by the btree cursor. Callers already
|
||||
* engaged in a btree walk should call this function to capture the list of
|
||||
* blocks going from the leaf towards the root.
|
||||
*/
|
||||
int
|
||||
xagb_bitmap_set_btcur_path(
|
||||
struct xagb_bitmap *bitmap,
|
||||
struct xfs_btree_cur *cur)
|
||||
{
|
||||
int i;
|
||||
int error;
|
||||
|
||||
for (i = 0; i < cur->bc_nlevels && cur->bc_levels[i].ptr == 1; i++) {
|
||||
error = xagb_bitmap_visit_btblock(cur, i, bitmap);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
68
fs/xfs/scrub/agb_bitmap.h
Normal file
68
fs/xfs/scrub/agb_bitmap.h
Normal file
@ -0,0 +1,68 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#ifndef __XFS_SCRUB_AGB_BITMAP_H__
|
||||
#define __XFS_SCRUB_AGB_BITMAP_H__
|
||||
|
||||
/* Bitmaps, but for type-checked for xfs_agblock_t */
|
||||
|
||||
struct xagb_bitmap {
|
||||
struct xbitmap32 agbitmap;
|
||||
};
|
||||
|
||||
static inline void xagb_bitmap_init(struct xagb_bitmap *bitmap)
|
||||
{
|
||||
xbitmap32_init(&bitmap->agbitmap);
|
||||
}
|
||||
|
||||
static inline void xagb_bitmap_destroy(struct xagb_bitmap *bitmap)
|
||||
{
|
||||
xbitmap32_destroy(&bitmap->agbitmap);
|
||||
}
|
||||
|
||||
static inline int xagb_bitmap_clear(struct xagb_bitmap *bitmap,
|
||||
xfs_agblock_t start, xfs_extlen_t len)
|
||||
{
|
||||
return xbitmap32_clear(&bitmap->agbitmap, start, len);
|
||||
}
|
||||
static inline int xagb_bitmap_set(struct xagb_bitmap *bitmap,
|
||||
xfs_agblock_t start, xfs_extlen_t len)
|
||||
{
|
||||
return xbitmap32_set(&bitmap->agbitmap, start, len);
|
||||
}
|
||||
|
||||
static inline bool xagb_bitmap_test(struct xagb_bitmap *bitmap,
|
||||
xfs_agblock_t start, xfs_extlen_t *len)
|
||||
{
|
||||
return xbitmap32_test(&bitmap->agbitmap, start, len);
|
||||
}
|
||||
|
||||
static inline int xagb_bitmap_disunion(struct xagb_bitmap *bitmap,
|
||||
struct xagb_bitmap *sub)
|
||||
{
|
||||
return xbitmap32_disunion(&bitmap->agbitmap, &sub->agbitmap);
|
||||
}
|
||||
|
||||
static inline uint32_t xagb_bitmap_hweight(struct xagb_bitmap *bitmap)
|
||||
{
|
||||
return xbitmap32_hweight(&bitmap->agbitmap);
|
||||
}
|
||||
static inline bool xagb_bitmap_empty(struct xagb_bitmap *bitmap)
|
||||
{
|
||||
return xbitmap32_empty(&bitmap->agbitmap);
|
||||
}
|
||||
|
||||
static inline int xagb_bitmap_walk(struct xagb_bitmap *bitmap,
|
||||
xbitmap32_walk_fn fn, void *priv)
|
||||
{
|
||||
return xbitmap32_walk(&bitmap->agbitmap, fn, priv);
|
||||
}
|
||||
|
||||
int xagb_bitmap_set_btblocks(struct xagb_bitmap *bitmap,
|
||||
struct xfs_btree_cur *cur);
|
||||
int xagb_bitmap_set_btcur_path(struct xagb_bitmap *bitmap,
|
||||
struct xfs_btree_cur *cur);
|
||||
|
||||
#endif /* __XFS_SCRUB_AGB_BITMAP_H__ */
|
@ -26,6 +26,7 @@
|
||||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/bitmap.h"
|
||||
#include "scrub/agb_bitmap.h"
|
||||
#include "scrub/reap.h"
|
||||
|
||||
/* Superblock */
|
||||
@ -72,7 +73,7 @@ xrep_superblock(
|
||||
/* Write this to disk. */
|
||||
xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF);
|
||||
xfs_trans_log_buf(sc->tp, bp, 0, BBTOB(bp->b_length) - 1);
|
||||
return error;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* AGF */
|
||||
@ -341,7 +342,7 @@ xrep_agf_commit_new(
|
||||
pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
|
||||
set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
|
||||
|
||||
return 0;
|
||||
return xrep_roll_ag_trans(sc);
|
||||
}
|
||||
|
||||
/* Repair the AGF. v5 filesystems only. */
|
||||
@ -494,12 +495,11 @@ xrep_agfl_walk_rmap(
|
||||
/* Strike out the blocks that are cross-linked according to the rmapbt. */
|
||||
STATIC int
|
||||
xrep_agfl_check_extent(
|
||||
uint64_t start,
|
||||
uint64_t len,
|
||||
uint32_t agbno,
|
||||
uint32_t len,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_agfl *ra = priv;
|
||||
xfs_agblock_t agbno = start;
|
||||
xfs_agblock_t last_agbno = agbno + len - 1;
|
||||
int error;
|
||||
|
||||
@ -647,8 +647,8 @@ struct xrep_agfl_fill {
|
||||
/* Fill the AGFL with whatever blocks are in this extent. */
|
||||
static int
|
||||
xrep_agfl_fill(
|
||||
uint64_t start,
|
||||
uint64_t len,
|
||||
uint32_t start,
|
||||
uint32_t len,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_agfl_fill *af = priv;
|
||||
@ -789,6 +789,9 @@ xrep_agfl(
|
||||
/* Dump any AGFL overflow. */
|
||||
error = xrep_reap_agblocks(sc, &agfl_extents, &XFS_RMAP_OINFO_AG,
|
||||
XFS_AG_RESV_AGFL);
|
||||
if (error)
|
||||
goto err;
|
||||
|
||||
err:
|
||||
xagb_bitmap_destroy(&agfl_extents);
|
||||
return error;
|
||||
@ -962,7 +965,7 @@ xrep_agi_commit_new(
|
||||
pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
|
||||
set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
|
||||
|
||||
return 0;
|
||||
return xrep_roll_ag_trans(sc);
|
||||
}
|
||||
|
||||
/* Repair the AGI. */
|
||||
|
@ -9,13 +9,16 @@
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_rmap.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/btree.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "scrub/repair.h"
|
||||
|
||||
/*
|
||||
* Set us up to scrub free space btrees.
|
||||
@ -24,10 +27,19 @@ int
|
||||
xchk_setup_ag_allocbt(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (xchk_need_intent_drain(sc))
|
||||
xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
|
||||
|
||||
return xchk_setup_ag_btree(sc, false);
|
||||
error = xchk_setup_ag_btree(sc, false);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (xchk_could_repair(sc))
|
||||
return xrep_setup_ag_allocbt(sc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Free space btree scrubber. */
|
||||
@ -127,7 +139,7 @@ xchk_allocbt_rec(
|
||||
struct xchk_alloc *ca = bs->private;
|
||||
|
||||
xfs_alloc_btrec_to_irec(rec, &irec);
|
||||
if (xfs_alloc_check_irec(bs->cur, &irec) != NULL) {
|
||||
if (xfs_alloc_check_irec(bs->cur->bc_ag.pag, &irec) != NULL) {
|
||||
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
|
||||
return 0;
|
||||
}
|
||||
@ -138,33 +150,29 @@ xchk_allocbt_rec(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Scrub the freespace btrees for some AG. */
|
||||
STATIC int
|
||||
/* Scrub one of the freespace btrees for some AG. */
|
||||
int
|
||||
xchk_allocbt(
|
||||
struct xfs_scrub *sc,
|
||||
xfs_btnum_t which)
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xchk_alloc ca = { };
|
||||
struct xfs_btree_cur *cur;
|
||||
|
||||
cur = which == XFS_BTNUM_BNO ? sc->sa.bno_cur : sc->sa.cnt_cur;
|
||||
switch (sc->sm->sm_type) {
|
||||
case XFS_SCRUB_TYPE_BNOBT:
|
||||
cur = sc->sa.bno_cur;
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_CNTBT:
|
||||
cur = sc->sa.cnt_cur;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return xchk_btree(sc, cur, xchk_allocbt_rec, &XFS_RMAP_OINFO_AG, &ca);
|
||||
}
|
||||
|
||||
int
|
||||
xchk_bnobt(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
return xchk_allocbt(sc, XFS_BTNUM_BNO);
|
||||
}
|
||||
|
||||
int
|
||||
xchk_cntbt(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
return xchk_allocbt(sc, XFS_BTNUM_CNT);
|
||||
}
|
||||
|
||||
/* xref check that the extent is not free */
|
||||
void
|
||||
xchk_xref_is_used_space(
|
||||
|
934
fs/xfs/scrub/alloc_repair.c
Normal file
934
fs/xfs/scrub/alloc_repair.c
Normal file
@ -0,0 +1,934 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_defer.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_btree_staging.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_alloc_btree.h"
|
||||
#include "xfs_rmap.h"
|
||||
#include "xfs_rmap_btree.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_refcount.h"
|
||||
#include "xfs_extent_busy.h"
|
||||
#include "xfs_health.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_ialloc.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "scrub/xfs_scrub.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/btree.h"
|
||||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/bitmap.h"
|
||||
#include "scrub/agb_bitmap.h"
|
||||
#include "scrub/xfile.h"
|
||||
#include "scrub/xfarray.h"
|
||||
#include "scrub/newbt.h"
|
||||
#include "scrub/reap.h"
|
||||
|
||||
/*
|
||||
* Free Space Btree Repair
|
||||
* =======================
|
||||
*
|
||||
* The reverse mappings are supposed to record all space usage for the entire
|
||||
* AG. Therefore, we can recreate the free extent records in an AG by looking
|
||||
* for gaps in the physical extents recorded in the rmapbt. These records are
|
||||
* staged in @free_records. Identifying the gaps is more difficult on a
|
||||
* reflink filesystem because rmap records are allowed to overlap.
|
||||
*
|
||||
* Because the final step of building a new index is to free the space used by
|
||||
* the old index, repair needs to find that space. Unfortunately, all
|
||||
* structures that live in the free space (bnobt, cntbt, rmapbt, agfl) share
|
||||
* the same rmapbt owner code (OWN_AG), so this is not straightforward.
|
||||
*
|
||||
* The scan of the reverse mapping information records the space used by OWN_AG
|
||||
* in @old_allocbt_blocks, which (at this stage) is somewhat misnamed. While
|
||||
* walking the rmapbt records, we create a second bitmap @not_allocbt_blocks to
|
||||
* record all visited rmap btree blocks and all blocks owned by the AGFL.
|
||||
*
|
||||
* After that is where the definitions of old_allocbt_blocks shifts. This
|
||||
* expression identifies possible former bnobt/cntbt blocks:
|
||||
*
|
||||
* (OWN_AG blocks) & ~(rmapbt blocks | agfl blocks);
|
||||
*
|
||||
* Substituting from above definitions, that becomes:
|
||||
*
|
||||
* old_allocbt_blocks & ~not_allocbt_blocks
|
||||
*
|
||||
* The OWN_AG bitmap itself isn't needed after this point, so what we really do
|
||||
* instead is:
|
||||
*
|
||||
* old_allocbt_blocks &= ~not_allocbt_blocks;
|
||||
*
|
||||
* After this point, @old_allocbt_blocks is a bitmap of alleged former
|
||||
* bnobt/cntbt blocks. The xagb_bitmap_disunion operation modifies its first
|
||||
* parameter in place to avoid copying records around.
|
||||
*
|
||||
* Next, some of the space described by @free_records are diverted to the newbt
|
||||
* reservation and used to format new btree blocks. The remaining records are
|
||||
* written to the new btree indices. We reconstruct both bnobt and cntbt at
|
||||
* the same time since we've already done all the work.
|
||||
*
|
||||
* We use the prefix 'xrep_abt' here because we regenerate both free space
|
||||
* allocation btrees at the same time.
|
||||
*/
|
||||
|
||||
struct xrep_abt {
|
||||
/* Blocks owned by the rmapbt or the agfl. */
|
||||
struct xagb_bitmap not_allocbt_blocks;
|
||||
|
||||
/* All OWN_AG blocks. */
|
||||
struct xagb_bitmap old_allocbt_blocks;
|
||||
|
||||
/*
|
||||
* New bnobt information. All btree block reservations are added to
|
||||
* the reservation list in new_bnobt.
|
||||
*/
|
||||
struct xrep_newbt new_bnobt;
|
||||
|
||||
/* new cntbt information */
|
||||
struct xrep_newbt new_cntbt;
|
||||
|
||||
/* Free space extents. */
|
||||
struct xfarray *free_records;
|
||||
|
||||
struct xfs_scrub *sc;
|
||||
|
||||
/* Number of non-null records in @free_records. */
|
||||
uint64_t nr_real_records;
|
||||
|
||||
/* get_records()'s position in the free space record array. */
|
||||
xfarray_idx_t array_cur;
|
||||
|
||||
/*
|
||||
* Next block we anticipate seeing in the rmap records. If the next
|
||||
* rmap record is greater than next_agbno, we have found unused space.
|
||||
*/
|
||||
xfs_agblock_t next_agbno;
|
||||
|
||||
/* Number of free blocks in this AG. */
|
||||
xfs_agblock_t nr_blocks;
|
||||
|
||||
/* Longest free extent we found in the AG. */
|
||||
xfs_agblock_t longest;
|
||||
};
|
||||
|
||||
/* Set up to repair AG free space btrees. */
|
||||
int
|
||||
xrep_setup_ag_allocbt(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
unsigned int busy_gen;
|
||||
|
||||
/*
|
||||
* Make sure the busy extent list is clear because we can't put extents
|
||||
* on there twice.
|
||||
*/
|
||||
busy_gen = READ_ONCE(sc->sa.pag->pagb_gen);
|
||||
if (xfs_extent_busy_list_empty(sc->sa.pag))
|
||||
return 0;
|
||||
|
||||
return xfs_extent_busy_flush(sc->tp, sc->sa.pag, busy_gen, 0);
|
||||
}
|
||||
|
||||
/* Check for any obvious conflicts in the free extent. */
|
||||
STATIC int
|
||||
xrep_abt_check_free_ext(
|
||||
struct xfs_scrub *sc,
|
||||
const struct xfs_alloc_rec_incore *rec)
|
||||
{
|
||||
enum xbtree_recpacking outcome;
|
||||
int error;
|
||||
|
||||
if (xfs_alloc_check_irec(sc->sa.pag, rec) != NULL)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* Must not be an inode chunk. */
|
||||
error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
|
||||
rec->ar_startblock, rec->ar_blockcount, &outcome);
|
||||
if (error)
|
||||
return error;
|
||||
if (outcome != XBTREE_RECPACKING_EMPTY)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* Must not be shared or CoW staging. */
|
||||
if (sc->sa.refc_cur) {
|
||||
error = xfs_refcount_has_records(sc->sa.refc_cur,
|
||||
XFS_REFC_DOMAIN_SHARED, rec->ar_startblock,
|
||||
rec->ar_blockcount, &outcome);
|
||||
if (error)
|
||||
return error;
|
||||
if (outcome != XBTREE_RECPACKING_EMPTY)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
error = xfs_refcount_has_records(sc->sa.refc_cur,
|
||||
XFS_REFC_DOMAIN_COW, rec->ar_startblock,
|
||||
rec->ar_blockcount, &outcome);
|
||||
if (error)
|
||||
return error;
|
||||
if (outcome != XBTREE_RECPACKING_EMPTY)
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Stash a free space record for all the space since the last bno we found
|
||||
* all the way up to @end.
|
||||
*/
|
||||
static int
|
||||
xrep_abt_stash(
|
||||
struct xrep_abt *ra,
|
||||
xfs_agblock_t end)
|
||||
{
|
||||
struct xfs_alloc_rec_incore arec = {
|
||||
.ar_startblock = ra->next_agbno,
|
||||
.ar_blockcount = end - ra->next_agbno,
|
||||
};
|
||||
struct xfs_scrub *sc = ra->sc;
|
||||
int error = 0;
|
||||
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
return error;
|
||||
|
||||
error = xrep_abt_check_free_ext(ra->sc, &arec);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
trace_xrep_abt_found(sc->mp, sc->sa.pag->pag_agno, &arec);
|
||||
|
||||
error = xfarray_append(ra->free_records, &arec);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
ra->nr_blocks += arec.ar_blockcount;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Record extents that aren't in use from gaps in the rmap records. */
|
||||
STATIC int
|
||||
xrep_abt_walk_rmap(
|
||||
struct xfs_btree_cur *cur,
|
||||
const struct xfs_rmap_irec *rec,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_abt *ra = priv;
|
||||
int error;
|
||||
|
||||
/* Record all the OWN_AG blocks... */
|
||||
if (rec->rm_owner == XFS_RMAP_OWN_AG) {
|
||||
error = xagb_bitmap_set(&ra->old_allocbt_blocks,
|
||||
rec->rm_startblock, rec->rm_blockcount);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
/* ...and all the rmapbt blocks... */
|
||||
error = xagb_bitmap_set_btcur_path(&ra->not_allocbt_blocks, cur);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* ...and all the free space. */
|
||||
if (rec->rm_startblock > ra->next_agbno) {
|
||||
error = xrep_abt_stash(ra, rec->rm_startblock);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* rmap records can overlap on reflink filesystems, so project
|
||||
* next_agbno as far out into the AG space as we currently know about.
|
||||
*/
|
||||
ra->next_agbno = max_t(xfs_agblock_t, ra->next_agbno,
|
||||
rec->rm_startblock + rec->rm_blockcount);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Collect an AGFL block for the not-to-release list. */
|
||||
static int
|
||||
xrep_abt_walk_agfl(
|
||||
struct xfs_mount *mp,
|
||||
xfs_agblock_t agbno,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_abt *ra = priv;
|
||||
|
||||
return xagb_bitmap_set(&ra->not_allocbt_blocks, agbno, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare two free space extents by block number. We want to sort in order of
|
||||
* increasing block number.
|
||||
*/
|
||||
static int
|
||||
xrep_bnobt_extent_cmp(
|
||||
const void *a,
|
||||
const void *b)
|
||||
{
|
||||
const struct xfs_alloc_rec_incore *ap = a;
|
||||
const struct xfs_alloc_rec_incore *bp = b;
|
||||
|
||||
if (ap->ar_startblock > bp->ar_startblock)
|
||||
return 1;
|
||||
else if (ap->ar_startblock < bp->ar_startblock)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Re-sort the free extents by block number so that we can put the records into
|
||||
* the bnobt in the correct order. Make sure the records do not overlap in
|
||||
* physical space.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_bnobt_sort_records(
|
||||
struct xrep_abt *ra)
|
||||
{
|
||||
struct xfs_alloc_rec_incore arec;
|
||||
xfarray_idx_t cur = XFARRAY_CURSOR_INIT;
|
||||
xfs_agblock_t next_agbno = 0;
|
||||
int error;
|
||||
|
||||
error = xfarray_sort(ra->free_records, xrep_bnobt_extent_cmp, 0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
while ((error = xfarray_iter(ra->free_records, &cur, &arec)) == 1) {
|
||||
if (arec.ar_startblock < next_agbno)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
next_agbno = arec.ar_startblock + arec.ar_blockcount;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare two free space extents by length and then block number. We want
|
||||
* to sort first in order of increasing length and then in order of increasing
|
||||
* block number.
|
||||
*/
|
||||
static int
|
||||
xrep_cntbt_extent_cmp(
|
||||
const void *a,
|
||||
const void *b)
|
||||
{
|
||||
const struct xfs_alloc_rec_incore *ap = a;
|
||||
const struct xfs_alloc_rec_incore *bp = b;
|
||||
|
||||
if (ap->ar_blockcount > bp->ar_blockcount)
|
||||
return 1;
|
||||
else if (ap->ar_blockcount < bp->ar_blockcount)
|
||||
return -1;
|
||||
return xrep_bnobt_extent_cmp(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
* Sort the free extents by length so so that we can put the records into the
|
||||
* cntbt in the correct order. Don't let userspace kill us if we're resorting
|
||||
* after allocating btree blocks.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_cntbt_sort_records(
|
||||
struct xrep_abt *ra,
|
||||
bool is_resort)
|
||||
{
|
||||
return xfarray_sort(ra->free_records, xrep_cntbt_extent_cmp,
|
||||
is_resort ? 0 : XFARRAY_SORT_KILLABLE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate all reverse mappings to find (1) the gaps between rmap records (all
|
||||
* unowned space), (2) the OWN_AG extents (which encompass the free space
|
||||
* btrees, the rmapbt, and the agfl), (3) the rmapbt blocks, and (4) the AGFL
|
||||
* blocks. The free space is (1) + (2) - (3) - (4).
|
||||
*/
|
||||
STATIC int
|
||||
xrep_abt_find_freespace(
|
||||
struct xrep_abt *ra)
|
||||
{
|
||||
struct xfs_scrub *sc = ra->sc;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
|
||||
struct xfs_buf *agfl_bp;
|
||||
xfs_agblock_t agend;
|
||||
int error;
|
||||
|
||||
xagb_bitmap_init(&ra->not_allocbt_blocks);
|
||||
|
||||
xrep_ag_btcur_init(sc, &sc->sa);
|
||||
|
||||
/*
|
||||
* Iterate all the reverse mappings to find gaps in the physical
|
||||
* mappings, all the OWN_AG blocks, and all the rmapbt extents.
|
||||
*/
|
||||
error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_abt_walk_rmap, ra);
|
||||
if (error)
|
||||
goto err;
|
||||
|
||||
/* Insert a record for space between the last rmap and EOAG. */
|
||||
agend = be32_to_cpu(agf->agf_length);
|
||||
if (ra->next_agbno < agend) {
|
||||
error = xrep_abt_stash(ra, agend);
|
||||
if (error)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Collect all the AGFL blocks. */
|
||||
error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
|
||||
if (error)
|
||||
goto err;
|
||||
|
||||
error = xfs_agfl_walk(mp, agf, agfl_bp, xrep_abt_walk_agfl, ra);
|
||||
if (error)
|
||||
goto err_agfl;
|
||||
|
||||
/* Compute the old bnobt/cntbt blocks. */
|
||||
error = xagb_bitmap_disunion(&ra->old_allocbt_blocks,
|
||||
&ra->not_allocbt_blocks);
|
||||
if (error)
|
||||
goto err_agfl;
|
||||
|
||||
ra->nr_real_records = xfarray_length(ra->free_records);
|
||||
err_agfl:
|
||||
xfs_trans_brelse(sc->tp, agfl_bp);
|
||||
err:
|
||||
xchk_ag_btcur_free(&sc->sa);
|
||||
xagb_bitmap_destroy(&ra->not_allocbt_blocks);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* We're going to use the observed free space records to reserve blocks for the
|
||||
* new free space btrees, so we play an iterative game where we try to converge
|
||||
* on the number of blocks we need:
|
||||
*
|
||||
* 1. Estimate how many blocks we'll need to store the records.
|
||||
* 2. If the first free record has more blocks than we need, we're done.
|
||||
* We will have to re-sort the records prior to building the cntbt.
|
||||
* 3. If that record has exactly the number of blocks we need, null out the
|
||||
* record. We're done.
|
||||
* 4. Otherwise, we still need more blocks. Null out the record, subtract its
|
||||
* length from the number of blocks we need, and go back to step 1.
|
||||
*
|
||||
* Fortunately, we don't have to do any transaction work to play this game, so
|
||||
* we don't have to tear down the staging cursors.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_abt_reserve_space(
|
||||
struct xrep_abt *ra,
|
||||
struct xfs_btree_cur *bno_cur,
|
||||
struct xfs_btree_cur *cnt_cur,
|
||||
bool *needs_resort)
|
||||
{
|
||||
struct xfs_scrub *sc = ra->sc;
|
||||
xfarray_idx_t record_nr;
|
||||
unsigned int allocated = 0;
|
||||
int error = 0;
|
||||
|
||||
record_nr = xfarray_length(ra->free_records) - 1;
|
||||
do {
|
||||
struct xfs_alloc_rec_incore arec;
|
||||
uint64_t required;
|
||||
unsigned int desired;
|
||||
unsigned int len;
|
||||
|
||||
/* Compute how many blocks we'll need. */
|
||||
error = xfs_btree_bload_compute_geometry(cnt_cur,
|
||||
&ra->new_cntbt.bload, ra->nr_real_records);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
error = xfs_btree_bload_compute_geometry(bno_cur,
|
||||
&ra->new_bnobt.bload, ra->nr_real_records);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
/* How many btree blocks do we need to store all records? */
|
||||
required = ra->new_bnobt.bload.nr_blocks +
|
||||
ra->new_cntbt.bload.nr_blocks;
|
||||
ASSERT(required < INT_MAX);
|
||||
|
||||
/* If we've reserved enough blocks, we're done. */
|
||||
if (allocated >= required)
|
||||
break;
|
||||
|
||||
desired = required - allocated;
|
||||
|
||||
/* We need space but there's none left; bye! */
|
||||
if (ra->nr_real_records == 0) {
|
||||
error = -ENOSPC;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Grab the first record from the list. */
|
||||
error = xfarray_load(ra->free_records, record_nr, &arec);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
ASSERT(arec.ar_blockcount <= UINT_MAX);
|
||||
len = min_t(unsigned int, arec.ar_blockcount, desired);
|
||||
|
||||
trace_xrep_newbt_alloc_ag_blocks(sc->mp, sc->sa.pag->pag_agno,
|
||||
arec.ar_startblock, len, XFS_RMAP_OWN_AG);
|
||||
|
||||
error = xrep_newbt_add_extent(&ra->new_bnobt, sc->sa.pag,
|
||||
arec.ar_startblock, len);
|
||||
if (error)
|
||||
break;
|
||||
allocated += len;
|
||||
ra->nr_blocks -= len;
|
||||
|
||||
if (arec.ar_blockcount > desired) {
|
||||
/*
|
||||
* Record has more space than we need. The number of
|
||||
* free records doesn't change, so shrink the free
|
||||
* record, inform the caller that the records are no
|
||||
* longer sorted by length, and exit.
|
||||
*/
|
||||
arec.ar_startblock += desired;
|
||||
arec.ar_blockcount -= desired;
|
||||
error = xfarray_store(ra->free_records, record_nr,
|
||||
&arec);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
*needs_resort = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We're going to use up the entire record, so unset it and
|
||||
* move on to the next one. This changes the number of free
|
||||
* records (but doesn't break the sorting order), so we must
|
||||
* go around the loop once more to re-run _bload_init.
|
||||
*/
|
||||
error = xfarray_unset(ra->free_records, record_nr);
|
||||
if (error)
|
||||
break;
|
||||
ra->nr_real_records--;
|
||||
record_nr--;
|
||||
} while (1);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xrep_abt_dispose_one(
|
||||
struct xrep_abt *ra,
|
||||
struct xrep_newbt_resv *resv)
|
||||
{
|
||||
struct xfs_scrub *sc = ra->sc;
|
||||
struct xfs_perag *pag = sc->sa.pag;
|
||||
xfs_agblock_t free_agbno = resv->agbno + resv->used;
|
||||
xfs_extlen_t free_aglen = resv->len - resv->used;
|
||||
int error;
|
||||
|
||||
ASSERT(pag == resv->pag);
|
||||
|
||||
/* Add a deferred rmap for each extent we used. */
|
||||
if (resv->used > 0)
|
||||
xfs_rmap_alloc_extent(sc->tp, pag->pag_agno, resv->agbno,
|
||||
resv->used, XFS_RMAP_OWN_AG);
|
||||
|
||||
/*
|
||||
* For each reserved btree block we didn't use, add it to the free
|
||||
* space btree. We didn't touch fdblocks when we reserved them, so
|
||||
* we don't touch it now.
|
||||
*/
|
||||
if (free_aglen == 0)
|
||||
return 0;
|
||||
|
||||
trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
|
||||
free_aglen, ra->new_bnobt.oinfo.oi_owner);
|
||||
|
||||
error = __xfs_free_extent(sc->tp, resv->pag, free_agbno, free_aglen,
|
||||
&ra->new_bnobt.oinfo, XFS_AG_RESV_IGNORE, true);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
return xrep_defer_finish(sc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Deal with all the space we reserved. Blocks that were allocated for the
|
||||
* free space btrees need to have a (deferred) rmap added for the OWN_AG
|
||||
* allocation, and blocks that didn't get used can be freed via the usual
|
||||
* (deferred) means.
|
||||
*/
|
||||
STATIC void
|
||||
xrep_abt_dispose_reservations(
|
||||
struct xrep_abt *ra,
|
||||
int error)
|
||||
{
|
||||
struct xrep_newbt_resv *resv, *n;
|
||||
|
||||
if (error)
|
||||
goto junkit;
|
||||
|
||||
list_for_each_entry_safe(resv, n, &ra->new_bnobt.resv_list, list) {
|
||||
error = xrep_abt_dispose_one(ra, resv);
|
||||
if (error)
|
||||
goto junkit;
|
||||
}
|
||||
|
||||
junkit:
|
||||
list_for_each_entry_safe(resv, n, &ra->new_bnobt.resv_list, list) {
|
||||
xfs_perag_put(resv->pag);
|
||||
list_del(&resv->list);
|
||||
kfree(resv);
|
||||
}
|
||||
|
||||
xrep_newbt_cancel(&ra->new_bnobt);
|
||||
xrep_newbt_cancel(&ra->new_cntbt);
|
||||
}
|
||||
|
||||
/* Retrieve free space data for bulk load. */
|
||||
STATIC int
|
||||
xrep_abt_get_records(
|
||||
struct xfs_btree_cur *cur,
|
||||
unsigned int idx,
|
||||
struct xfs_btree_block *block,
|
||||
unsigned int nr_wanted,
|
||||
void *priv)
|
||||
{
|
||||
struct xfs_alloc_rec_incore *arec = &cur->bc_rec.a;
|
||||
struct xrep_abt *ra = priv;
|
||||
union xfs_btree_rec *block_rec;
|
||||
unsigned int loaded;
|
||||
int error;
|
||||
|
||||
for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
|
||||
error = xfarray_load_next(ra->free_records, &ra->array_cur,
|
||||
arec);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
ra->longest = max(ra->longest, arec->ar_blockcount);
|
||||
|
||||
block_rec = xfs_btree_rec_addr(cur, idx, block);
|
||||
cur->bc_ops->init_rec_from_cur(cur, block_rec);
|
||||
}
|
||||
|
||||
return loaded;
|
||||
}
|
||||
|
||||
/* Feed one of the new btree blocks to the bulk loader. */
|
||||
STATIC int
|
||||
xrep_abt_claim_block(
|
||||
struct xfs_btree_cur *cur,
|
||||
union xfs_btree_ptr *ptr,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_abt *ra = priv;
|
||||
|
||||
return xrep_newbt_claim_block(cur, &ra->new_bnobt, ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset the AGF counters to reflect the free space btrees that we just
|
||||
* rebuilt, then reinitialize the per-AG data.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_abt_reset_counters(
|
||||
struct xrep_abt *ra)
|
||||
{
|
||||
struct xfs_scrub *sc = ra->sc;
|
||||
struct xfs_perag *pag = sc->sa.pag;
|
||||
struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
|
||||
unsigned int freesp_btreeblks = 0;
|
||||
|
||||
/*
|
||||
* Compute the contribution to agf_btreeblks for the new free space
|
||||
* btrees. This is the computed btree size minus anything we didn't
|
||||
* use.
|
||||
*/
|
||||
freesp_btreeblks += ra->new_bnobt.bload.nr_blocks - 1;
|
||||
freesp_btreeblks += ra->new_cntbt.bload.nr_blocks - 1;
|
||||
|
||||
freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_bnobt);
|
||||
freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_cntbt);
|
||||
|
||||
/*
|
||||
* The AGF header contains extra information related to the free space
|
||||
* btrees, so we must update those fields here.
|
||||
*/
|
||||
agf->agf_btreeblks = cpu_to_be32(freesp_btreeblks +
|
||||
(be32_to_cpu(agf->agf_rmap_blocks) - 1));
|
||||
agf->agf_freeblks = cpu_to_be32(ra->nr_blocks);
|
||||
agf->agf_longest = cpu_to_be32(ra->longest);
|
||||
xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_BTREEBLKS |
|
||||
XFS_AGF_LONGEST |
|
||||
XFS_AGF_FREEBLKS);
|
||||
|
||||
/*
|
||||
* After we commit the new btree to disk, it is possible that the
|
||||
* process to reap the old btree blocks will race with the AIL trying
|
||||
* to checkpoint the old btree blocks into the filesystem. If the new
|
||||
* tree is shorter than the old one, the allocbt write verifier will
|
||||
* fail and the AIL will shut down the filesystem.
|
||||
*
|
||||
* To avoid this, save the old incore btree height values as the alt
|
||||
* height values before re-initializing the perag info from the updated
|
||||
* AGF to capture all the new values.
|
||||
*/
|
||||
pag->pagf_repair_levels[XFS_BTNUM_BNOi] = pag->pagf_levels[XFS_BTNUM_BNOi];
|
||||
pag->pagf_repair_levels[XFS_BTNUM_CNTi] = pag->pagf_levels[XFS_BTNUM_CNTi];
|
||||
|
||||
/* Reinitialize with the values we just logged. */
|
||||
return xrep_reinit_pagf(sc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the collected free space information to stage new free space btrees.
|
||||
* If this is successful we'll return with the new btree root
|
||||
* information logged to the repair transaction but not yet committed.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_abt_build_new_trees(
|
||||
struct xrep_abt *ra)
|
||||
{
|
||||
struct xfs_scrub *sc = ra->sc;
|
||||
struct xfs_btree_cur *bno_cur;
|
||||
struct xfs_btree_cur *cnt_cur;
|
||||
struct xfs_perag *pag = sc->sa.pag;
|
||||
bool needs_resort = false;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Sort the free extents by length so that we can set up the free space
|
||||
* btrees in as few extents as possible. This reduces the amount of
|
||||
* deferred rmap / free work we have to do at the end.
|
||||
*/
|
||||
error = xrep_cntbt_sort_records(ra, false);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Prepare to construct the new btree by reserving disk space for the
|
||||
* new btree and setting up all the accounting information we'll need
|
||||
* to root the new btree while it's under construction and before we
|
||||
* attach it to the AG header.
|
||||
*/
|
||||
xrep_newbt_init_bare(&ra->new_bnobt, sc);
|
||||
xrep_newbt_init_bare(&ra->new_cntbt, sc);
|
||||
|
||||
ra->new_bnobt.bload.get_records = xrep_abt_get_records;
|
||||
ra->new_cntbt.bload.get_records = xrep_abt_get_records;
|
||||
|
||||
ra->new_bnobt.bload.claim_block = xrep_abt_claim_block;
|
||||
ra->new_cntbt.bload.claim_block = xrep_abt_claim_block;
|
||||
|
||||
/* Allocate cursors for the staged btrees. */
|
||||
bno_cur = xfs_allocbt_stage_cursor(sc->mp, &ra->new_bnobt.afake,
|
||||
pag, XFS_BTNUM_BNO);
|
||||
cnt_cur = xfs_allocbt_stage_cursor(sc->mp, &ra->new_cntbt.afake,
|
||||
pag, XFS_BTNUM_CNT);
|
||||
|
||||
/* Last chance to abort before we start committing fixes. */
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
goto err_cur;
|
||||
|
||||
/* Reserve the space we'll need for the new btrees. */
|
||||
error = xrep_abt_reserve_space(ra, bno_cur, cnt_cur, &needs_resort);
|
||||
if (error)
|
||||
goto err_cur;
|
||||
|
||||
/*
|
||||
* If we need to re-sort the free extents by length, do so so that we
|
||||
* can put the records into the cntbt in the correct order.
|
||||
*/
|
||||
if (needs_resort) {
|
||||
error = xrep_cntbt_sort_records(ra, needs_resort);
|
||||
if (error)
|
||||
goto err_cur;
|
||||
}
|
||||
|
||||
/*
|
||||
* Due to btree slack factors, it's possible for a new btree to be one
|
||||
* level taller than the old btree. Update the alternate incore btree
|
||||
* height so that we don't trip the verifiers when writing the new
|
||||
* btree blocks to disk.
|
||||
*/
|
||||
pag->pagf_repair_levels[XFS_BTNUM_BNOi] =
|
||||
ra->new_bnobt.bload.btree_height;
|
||||
pag->pagf_repair_levels[XFS_BTNUM_CNTi] =
|
||||
ra->new_cntbt.bload.btree_height;
|
||||
|
||||
/* Load the free space by length tree. */
|
||||
ra->array_cur = XFARRAY_CURSOR_INIT;
|
||||
ra->longest = 0;
|
||||
error = xfs_btree_bload(cnt_cur, &ra->new_cntbt.bload, ra);
|
||||
if (error)
|
||||
goto err_levels;
|
||||
|
||||
error = xrep_bnobt_sort_records(ra);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Load the free space by block number tree. */
|
||||
ra->array_cur = XFARRAY_CURSOR_INIT;
|
||||
error = xfs_btree_bload(bno_cur, &ra->new_bnobt.bload, ra);
|
||||
if (error)
|
||||
goto err_levels;
|
||||
|
||||
/*
|
||||
* Install the new btrees in the AG header. After this point the old
|
||||
* btrees are no longer accessible and the new trees are live.
|
||||
*/
|
||||
xfs_allocbt_commit_staged_btree(bno_cur, sc->tp, sc->sa.agf_bp);
|
||||
xfs_btree_del_cursor(bno_cur, 0);
|
||||
xfs_allocbt_commit_staged_btree(cnt_cur, sc->tp, sc->sa.agf_bp);
|
||||
xfs_btree_del_cursor(cnt_cur, 0);
|
||||
|
||||
/* Reset the AGF counters now that we've changed the btree shape. */
|
||||
error = xrep_abt_reset_counters(ra);
|
||||
if (error)
|
||||
goto err_newbt;
|
||||
|
||||
/* Dispose of any unused blocks and the accounting information. */
|
||||
xrep_abt_dispose_reservations(ra, error);
|
||||
|
||||
return xrep_roll_ag_trans(sc);
|
||||
|
||||
err_levels:
|
||||
pag->pagf_repair_levels[XFS_BTNUM_BNOi] = 0;
|
||||
pag->pagf_repair_levels[XFS_BTNUM_CNTi] = 0;
|
||||
err_cur:
|
||||
xfs_btree_del_cursor(cnt_cur, error);
|
||||
xfs_btree_del_cursor(bno_cur, error);
|
||||
err_newbt:
|
||||
xrep_abt_dispose_reservations(ra, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that we've logged the roots of the new btrees, invalidate all of the
|
||||
* old blocks and free them.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_abt_remove_old_trees(
|
||||
struct xrep_abt *ra)
|
||||
{
|
||||
struct xfs_perag *pag = ra->sc->sa.pag;
|
||||
int error;
|
||||
|
||||
/* Free the old btree blocks if they're not in use. */
|
||||
error = xrep_reap_agblocks(ra->sc, &ra->old_allocbt_blocks,
|
||||
&XFS_RMAP_OINFO_AG, XFS_AG_RESV_IGNORE);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Now that we've zapped all the old allocbt blocks we can turn off
|
||||
* the alternate height mechanism.
|
||||
*/
|
||||
pag->pagf_repair_levels[XFS_BTNUM_BNOi] = 0;
|
||||
pag->pagf_repair_levels[XFS_BTNUM_CNTi] = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Repair the freespace btrees for some AG. */
|
||||
int
|
||||
xrep_allocbt(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xrep_abt *ra;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
char *descr;
|
||||
int error;
|
||||
|
||||
/* We require the rmapbt to rebuild anything. */
|
||||
if (!xfs_has_rmapbt(mp))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ra = kzalloc(sizeof(struct xrep_abt), XCHK_GFP_FLAGS);
|
||||
if (!ra)
|
||||
return -ENOMEM;
|
||||
ra->sc = sc;
|
||||
|
||||
/* We rebuild both data structures. */
|
||||
sc->sick_mask = XFS_SICK_AG_BNOBT | XFS_SICK_AG_CNTBT;
|
||||
|
||||
/*
|
||||
* Make sure the busy extent list is clear because we can't put extents
|
||||
* on there twice. In theory we cleared this before we started, but
|
||||
* let's not risk the filesystem.
|
||||
*/
|
||||
if (!xfs_extent_busy_list_empty(sc->sa.pag)) {
|
||||
error = -EDEADLOCK;
|
||||
goto out_ra;
|
||||
}
|
||||
|
||||
/* Set up enough storage to handle maximally fragmented free space. */
|
||||
descr = xchk_xfile_ag_descr(sc, "free space records");
|
||||
error = xfarray_create(descr, mp->m_sb.sb_agblocks / 2,
|
||||
sizeof(struct xfs_alloc_rec_incore),
|
||||
&ra->free_records);
|
||||
kfree(descr);
|
||||
if (error)
|
||||
goto out_ra;
|
||||
|
||||
/* Collect the free space data and find the old btree blocks. */
|
||||
xagb_bitmap_init(&ra->old_allocbt_blocks);
|
||||
error = xrep_abt_find_freespace(ra);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
|
||||
/* Rebuild the free space information. */
|
||||
error = xrep_abt_build_new_trees(ra);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
|
||||
/* Kill the old trees. */
|
||||
error = xrep_abt_remove_old_trees(ra);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
|
||||
out_bitmap:
|
||||
xagb_bitmap_destroy(&ra->old_allocbt_blocks);
|
||||
xfarray_destroy(ra->free_records);
|
||||
out_ra:
|
||||
kfree(ra);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Make sure both btrees are ok after we've rebuilt them. */
|
||||
int
|
||||
xrep_revalidate_allocbt(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
__u32 old_type = sc->sm->sm_type;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* We must update sm_type temporarily so that the tree-to-tree cross
|
||||
* reference checks will work in the correct direction, and also so
|
||||
* that tracing will report correctly if there are more errors.
|
||||
*/
|
||||
sc->sm->sm_type = XFS_SCRUB_TYPE_BNOBT;
|
||||
error = xchk_allocbt(sc);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
sc->sm->sm_type = XFS_SCRUB_TYPE_CNTBT;
|
||||
error = xchk_allocbt(sc);
|
||||
out:
|
||||
sc->sm->sm_type = old_type;
|
||||
return error;
|
||||
}
|
@ -527,28 +527,23 @@ xchk_xattr_check_sf(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xchk_xattr_buf *ab = sc->buf;
|
||||
struct xfs_attr_shortform *sf;
|
||||
struct xfs_attr_sf_entry *sfe;
|
||||
struct xfs_ifork *ifp = &sc->ip->i_af;
|
||||
struct xfs_attr_sf_hdr *sf = ifp->if_data;
|
||||
struct xfs_attr_sf_entry *sfe = xfs_attr_sf_firstentry(sf);
|
||||
struct xfs_attr_sf_entry *next;
|
||||
struct xfs_ifork *ifp;
|
||||
unsigned char *end;
|
||||
unsigned char *end = ifp->if_data + ifp->if_bytes;
|
||||
int i;
|
||||
int error = 0;
|
||||
|
||||
ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
|
||||
|
||||
bitmap_zero(ab->usedmap, ifp->if_bytes);
|
||||
sf = (struct xfs_attr_shortform *)sc->ip->i_af.if_u1.if_data;
|
||||
end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes;
|
||||
xchk_xattr_set_map(sc, ab->usedmap, 0, sizeof(sf->hdr));
|
||||
xchk_xattr_set_map(sc, ab->usedmap, 0, sizeof(*sf));
|
||||
|
||||
sfe = &sf->list[0];
|
||||
if ((unsigned char *)sfe > end) {
|
||||
xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < sf->hdr.count; i++) {
|
||||
for (i = 0; i < sf->count; i++) {
|
||||
unsigned char *name = sfe->nameval;
|
||||
unsigned char *value = &sfe->nameval[sfe->namelen];
|
||||
|
||||
|
@ -16,7 +16,9 @@
|
||||
|
||||
#include <linux/interval_tree_generic.h>
|
||||
|
||||
struct xbitmap_node {
|
||||
/* u64 bitmap */
|
||||
|
||||
struct xbitmap64_node {
|
||||
struct rb_node bn_rbnode;
|
||||
|
||||
/* First set bit of this interval and subtree. */
|
||||
@ -39,72 +41,72 @@ struct xbitmap_node {
|
||||
* forward-declare them anyway for clarity.
|
||||
*/
|
||||
static inline void
|
||||
xbitmap_tree_insert(struct xbitmap_node *node, struct rb_root_cached *root);
|
||||
xbitmap64_tree_insert(struct xbitmap64_node *node, struct rb_root_cached *root);
|
||||
|
||||
static inline void
|
||||
xbitmap_tree_remove(struct xbitmap_node *node, struct rb_root_cached *root);
|
||||
xbitmap64_tree_remove(struct xbitmap64_node *node, struct rb_root_cached *root);
|
||||
|
||||
static inline struct xbitmap_node *
|
||||
xbitmap_tree_iter_first(struct rb_root_cached *root, uint64_t start,
|
||||
static inline struct xbitmap64_node *
|
||||
xbitmap64_tree_iter_first(struct rb_root_cached *root, uint64_t start,
|
||||
uint64_t last);
|
||||
|
||||
static inline struct xbitmap_node *
|
||||
xbitmap_tree_iter_next(struct xbitmap_node *node, uint64_t start,
|
||||
static inline struct xbitmap64_node *
|
||||
xbitmap64_tree_iter_next(struct xbitmap64_node *node, uint64_t start,
|
||||
uint64_t last);
|
||||
|
||||
INTERVAL_TREE_DEFINE(struct xbitmap_node, bn_rbnode, uint64_t,
|
||||
__bn_subtree_last, START, LAST, static inline, xbitmap_tree)
|
||||
INTERVAL_TREE_DEFINE(struct xbitmap64_node, bn_rbnode, uint64_t,
|
||||
__bn_subtree_last, START, LAST, static inline, xbitmap64_tree)
|
||||
|
||||
/* Iterate each interval of a bitmap. Do not change the bitmap. */
|
||||
#define for_each_xbitmap_extent(bn, bitmap) \
|
||||
#define for_each_xbitmap64_extent(bn, bitmap) \
|
||||
for ((bn) = rb_entry_safe(rb_first(&(bitmap)->xb_root.rb_root), \
|
||||
struct xbitmap_node, bn_rbnode); \
|
||||
struct xbitmap64_node, bn_rbnode); \
|
||||
(bn) != NULL; \
|
||||
(bn) = rb_entry_safe(rb_next(&(bn)->bn_rbnode), \
|
||||
struct xbitmap_node, bn_rbnode))
|
||||
struct xbitmap64_node, bn_rbnode))
|
||||
|
||||
/* Clear a range of this bitmap. */
|
||||
int
|
||||
xbitmap_clear(
|
||||
struct xbitmap *bitmap,
|
||||
xbitmap64_clear(
|
||||
struct xbitmap64 *bitmap,
|
||||
uint64_t start,
|
||||
uint64_t len)
|
||||
{
|
||||
struct xbitmap_node *bn;
|
||||
struct xbitmap_node *new_bn;
|
||||
struct xbitmap64_node *bn;
|
||||
struct xbitmap64_node *new_bn;
|
||||
uint64_t last = start + len - 1;
|
||||
|
||||
while ((bn = xbitmap_tree_iter_first(&bitmap->xb_root, start, last))) {
|
||||
while ((bn = xbitmap64_tree_iter_first(&bitmap->xb_root, start, last))) {
|
||||
if (bn->bn_start < start && bn->bn_last > last) {
|
||||
uint64_t old_last = bn->bn_last;
|
||||
|
||||
/* overlaps with the entire clearing range */
|
||||
xbitmap_tree_remove(bn, &bitmap->xb_root);
|
||||
xbitmap64_tree_remove(bn, &bitmap->xb_root);
|
||||
bn->bn_last = start - 1;
|
||||
xbitmap_tree_insert(bn, &bitmap->xb_root);
|
||||
xbitmap64_tree_insert(bn, &bitmap->xb_root);
|
||||
|
||||
/* add an extent */
|
||||
new_bn = kmalloc(sizeof(struct xbitmap_node),
|
||||
new_bn = kmalloc(sizeof(struct xbitmap64_node),
|
||||
XCHK_GFP_FLAGS);
|
||||
if (!new_bn)
|
||||
return -ENOMEM;
|
||||
new_bn->bn_start = last + 1;
|
||||
new_bn->bn_last = old_last;
|
||||
xbitmap_tree_insert(new_bn, &bitmap->xb_root);
|
||||
xbitmap64_tree_insert(new_bn, &bitmap->xb_root);
|
||||
} else if (bn->bn_start < start) {
|
||||
/* overlaps with the left side of the clearing range */
|
||||
xbitmap_tree_remove(bn, &bitmap->xb_root);
|
||||
xbitmap64_tree_remove(bn, &bitmap->xb_root);
|
||||
bn->bn_last = start - 1;
|
||||
xbitmap_tree_insert(bn, &bitmap->xb_root);
|
||||
xbitmap64_tree_insert(bn, &bitmap->xb_root);
|
||||
} else if (bn->bn_last > last) {
|
||||
/* overlaps with the right side of the clearing range */
|
||||
xbitmap_tree_remove(bn, &bitmap->xb_root);
|
||||
xbitmap64_tree_remove(bn, &bitmap->xb_root);
|
||||
bn->bn_start = last + 1;
|
||||
xbitmap_tree_insert(bn, &bitmap->xb_root);
|
||||
xbitmap64_tree_insert(bn, &bitmap->xb_root);
|
||||
break;
|
||||
} else {
|
||||
/* in the middle of the clearing range */
|
||||
xbitmap_tree_remove(bn, &bitmap->xb_root);
|
||||
xbitmap64_tree_remove(bn, &bitmap->xb_root);
|
||||
kfree(bn);
|
||||
}
|
||||
}
|
||||
@ -114,59 +116,59 @@ xbitmap_clear(
|
||||
|
||||
/* Set a range of this bitmap. */
|
||||
int
|
||||
xbitmap_set(
|
||||
struct xbitmap *bitmap,
|
||||
xbitmap64_set(
|
||||
struct xbitmap64 *bitmap,
|
||||
uint64_t start,
|
||||
uint64_t len)
|
||||
{
|
||||
struct xbitmap_node *left;
|
||||
struct xbitmap_node *right;
|
||||
struct xbitmap64_node *left;
|
||||
struct xbitmap64_node *right;
|
||||
uint64_t last = start + len - 1;
|
||||
int error;
|
||||
|
||||
/* Is this whole range already set? */
|
||||
left = xbitmap_tree_iter_first(&bitmap->xb_root, start, last);
|
||||
left = xbitmap64_tree_iter_first(&bitmap->xb_root, start, last);
|
||||
if (left && left->bn_start <= start && left->bn_last >= last)
|
||||
return 0;
|
||||
|
||||
/* Clear out everything in the range we want to set. */
|
||||
error = xbitmap_clear(bitmap, start, len);
|
||||
error = xbitmap64_clear(bitmap, start, len);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Do we have a left-adjacent extent? */
|
||||
left = xbitmap_tree_iter_first(&bitmap->xb_root, start - 1, start - 1);
|
||||
left = xbitmap64_tree_iter_first(&bitmap->xb_root, start - 1, start - 1);
|
||||
ASSERT(!left || left->bn_last + 1 == start);
|
||||
|
||||
/* Do we have a right-adjacent extent? */
|
||||
right = xbitmap_tree_iter_first(&bitmap->xb_root, last + 1, last + 1);
|
||||
right = xbitmap64_tree_iter_first(&bitmap->xb_root, last + 1, last + 1);
|
||||
ASSERT(!right || right->bn_start == last + 1);
|
||||
|
||||
if (left && right) {
|
||||
/* combine left and right adjacent extent */
|
||||
xbitmap_tree_remove(left, &bitmap->xb_root);
|
||||
xbitmap_tree_remove(right, &bitmap->xb_root);
|
||||
xbitmap64_tree_remove(left, &bitmap->xb_root);
|
||||
xbitmap64_tree_remove(right, &bitmap->xb_root);
|
||||
left->bn_last = right->bn_last;
|
||||
xbitmap_tree_insert(left, &bitmap->xb_root);
|
||||
xbitmap64_tree_insert(left, &bitmap->xb_root);
|
||||
kfree(right);
|
||||
} else if (left) {
|
||||
/* combine with left extent */
|
||||
xbitmap_tree_remove(left, &bitmap->xb_root);
|
||||
xbitmap64_tree_remove(left, &bitmap->xb_root);
|
||||
left->bn_last = last;
|
||||
xbitmap_tree_insert(left, &bitmap->xb_root);
|
||||
xbitmap64_tree_insert(left, &bitmap->xb_root);
|
||||
} else if (right) {
|
||||
/* combine with right extent */
|
||||
xbitmap_tree_remove(right, &bitmap->xb_root);
|
||||
xbitmap64_tree_remove(right, &bitmap->xb_root);
|
||||
right->bn_start = start;
|
||||
xbitmap_tree_insert(right, &bitmap->xb_root);
|
||||
xbitmap64_tree_insert(right, &bitmap->xb_root);
|
||||
} else {
|
||||
/* add an extent */
|
||||
left = kmalloc(sizeof(struct xbitmap_node), XCHK_GFP_FLAGS);
|
||||
left = kmalloc(sizeof(struct xbitmap64_node), XCHK_GFP_FLAGS);
|
||||
if (!left)
|
||||
return -ENOMEM;
|
||||
left->bn_start = start;
|
||||
left->bn_last = last;
|
||||
xbitmap_tree_insert(left, &bitmap->xb_root);
|
||||
xbitmap64_tree_insert(left, &bitmap->xb_root);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -174,21 +176,21 @@ xbitmap_set(
|
||||
|
||||
/* Free everything related to this bitmap. */
|
||||
void
|
||||
xbitmap_destroy(
|
||||
struct xbitmap *bitmap)
|
||||
xbitmap64_destroy(
|
||||
struct xbitmap64 *bitmap)
|
||||
{
|
||||
struct xbitmap_node *bn;
|
||||
struct xbitmap64_node *bn;
|
||||
|
||||
while ((bn = xbitmap_tree_iter_first(&bitmap->xb_root, 0, -1ULL))) {
|
||||
xbitmap_tree_remove(bn, &bitmap->xb_root);
|
||||
while ((bn = xbitmap64_tree_iter_first(&bitmap->xb_root, 0, -1ULL))) {
|
||||
xbitmap64_tree_remove(bn, &bitmap->xb_root);
|
||||
kfree(bn);
|
||||
}
|
||||
}
|
||||
|
||||
/* Set up a per-AG block bitmap. */
|
||||
void
|
||||
xbitmap_init(
|
||||
struct xbitmap *bitmap)
|
||||
xbitmap64_init(
|
||||
struct xbitmap64 *bitmap)
|
||||
{
|
||||
bitmap->xb_root = RB_ROOT_CACHED;
|
||||
}
|
||||
@ -208,18 +210,18 @@ xbitmap_init(
|
||||
* This is the logical equivalent of bitmap &= ~sub.
|
||||
*/
|
||||
int
|
||||
xbitmap_disunion(
|
||||
struct xbitmap *bitmap,
|
||||
struct xbitmap *sub)
|
||||
xbitmap64_disunion(
|
||||
struct xbitmap64 *bitmap,
|
||||
struct xbitmap64 *sub)
|
||||
{
|
||||
struct xbitmap_node *bn;
|
||||
struct xbitmap64_node *bn;
|
||||
int error;
|
||||
|
||||
if (xbitmap_empty(bitmap) || xbitmap_empty(sub))
|
||||
if (xbitmap64_empty(bitmap) || xbitmap64_empty(sub))
|
||||
return 0;
|
||||
|
||||
for_each_xbitmap_extent(bn, sub) {
|
||||
error = xbitmap_clear(bitmap, bn->bn_start,
|
||||
for_each_xbitmap64_extent(bn, sub) {
|
||||
error = xbitmap64_clear(bitmap, bn->bn_start,
|
||||
bn->bn_last - bn->bn_start + 1);
|
||||
if (error)
|
||||
return error;
|
||||
@ -228,104 +230,15 @@ xbitmap_disunion(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Record all btree blocks seen while iterating all records of a btree.
|
||||
*
|
||||
* We know that the btree query_all function starts at the left edge and walks
|
||||
* towards the right edge of the tree. Therefore, we know that we can walk up
|
||||
* the btree cursor towards the root; if the pointer for a given level points
|
||||
* to the first record/key in that block, we haven't seen this block before;
|
||||
* and therefore we need to remember that we saw this block in the btree.
|
||||
*
|
||||
* So if our btree is:
|
||||
*
|
||||
* 4
|
||||
* / | \
|
||||
* 1 2 3
|
||||
*
|
||||
* Pretend for this example that each leaf block has 100 btree records. For
|
||||
* the first btree record, we'll observe that bc_levels[0].ptr == 1, so we
|
||||
* record that we saw block 1. Then we observe that bc_levels[1].ptr == 1, so
|
||||
* we record block 4. The list is [1, 4].
|
||||
*
|
||||
* For the second btree record, we see that bc_levels[0].ptr == 2, so we exit
|
||||
* the loop. The list remains [1, 4].
|
||||
*
|
||||
* For the 101st btree record, we've moved onto leaf block 2. Now
|
||||
* bc_levels[0].ptr == 1 again, so we record that we saw block 2. We see that
|
||||
* bc_levels[1].ptr == 2, so we exit the loop. The list is now [1, 4, 2].
|
||||
*
|
||||
* For the 102nd record, bc_levels[0].ptr == 2, so we continue.
|
||||
*
|
||||
* For the 201st record, we've moved on to leaf block 3.
|
||||
* bc_levels[0].ptr == 1, so we add 3 to the list. Now it is [1, 4, 2, 3].
|
||||
*
|
||||
* For the 300th record we just exit, with the list being [1, 4, 2, 3].
|
||||
*/
|
||||
|
||||
/* Mark a btree block to the agblock bitmap. */
|
||||
STATIC int
|
||||
xagb_bitmap_visit_btblock(
|
||||
struct xfs_btree_cur *cur,
|
||||
int level,
|
||||
void *priv)
|
||||
{
|
||||
struct xagb_bitmap *bitmap = priv;
|
||||
struct xfs_buf *bp;
|
||||
xfs_fsblock_t fsbno;
|
||||
xfs_agblock_t agbno;
|
||||
|
||||
xfs_btree_get_block(cur, level, &bp);
|
||||
if (!bp)
|
||||
return 0;
|
||||
|
||||
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
|
||||
agbno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
|
||||
|
||||
return xagb_bitmap_set(bitmap, agbno, 1);
|
||||
}
|
||||
|
||||
/* Mark all (per-AG) btree blocks in the agblock bitmap. */
|
||||
int
|
||||
xagb_bitmap_set_btblocks(
|
||||
struct xagb_bitmap *bitmap,
|
||||
struct xfs_btree_cur *cur)
|
||||
{
|
||||
return xfs_btree_visit_blocks(cur, xagb_bitmap_visit_btblock,
|
||||
XFS_BTREE_VISIT_ALL, bitmap);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record all the buffers pointed to by the btree cursor. Callers already
|
||||
* engaged in a btree walk should call this function to capture the list of
|
||||
* blocks going from the leaf towards the root.
|
||||
*/
|
||||
int
|
||||
xagb_bitmap_set_btcur_path(
|
||||
struct xagb_bitmap *bitmap,
|
||||
struct xfs_btree_cur *cur)
|
||||
{
|
||||
int i;
|
||||
int error;
|
||||
|
||||
for (i = 0; i < cur->bc_nlevels && cur->bc_levels[i].ptr == 1; i++) {
|
||||
error = xagb_bitmap_visit_btblock(cur, i, bitmap);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* How many bits are set in this bitmap? */
|
||||
uint64_t
|
||||
xbitmap_hweight(
|
||||
struct xbitmap *bitmap)
|
||||
xbitmap64_hweight(
|
||||
struct xbitmap64 *bitmap)
|
||||
{
|
||||
struct xbitmap_node *bn;
|
||||
struct xbitmap64_node *bn;
|
||||
uint64_t ret = 0;
|
||||
|
||||
for_each_xbitmap_extent(bn, bitmap)
|
||||
for_each_xbitmap64_extent(bn, bitmap)
|
||||
ret += bn->bn_last - bn->bn_start + 1;
|
||||
|
||||
return ret;
|
||||
@ -333,15 +246,15 @@ xbitmap_hweight(
|
||||
|
||||
/* Call a function for every run of set bits in this bitmap. */
|
||||
int
|
||||
xbitmap_walk(
|
||||
struct xbitmap *bitmap,
|
||||
xbitmap_walk_fn fn,
|
||||
xbitmap64_walk(
|
||||
struct xbitmap64 *bitmap,
|
||||
xbitmap64_walk_fn fn,
|
||||
void *priv)
|
||||
{
|
||||
struct xbitmap_node *bn;
|
||||
struct xbitmap64_node *bn;
|
||||
int error = 0;
|
||||
|
||||
for_each_xbitmap_extent(bn, bitmap) {
|
||||
for_each_xbitmap64_extent(bn, bitmap) {
|
||||
error = fn(bn->bn_start, bn->bn_last - bn->bn_start + 1, priv);
|
||||
if (error)
|
||||
break;
|
||||
@ -352,23 +265,297 @@ xbitmap_walk(
|
||||
|
||||
/* Does this bitmap have no bits set at all? */
|
||||
bool
|
||||
xbitmap_empty(
|
||||
struct xbitmap *bitmap)
|
||||
xbitmap64_empty(
|
||||
struct xbitmap64 *bitmap)
|
||||
{
|
||||
return bitmap->xb_root.rb_root.rb_node == NULL;
|
||||
}
|
||||
|
||||
/* Is the start of the range set or clear? And for how long? */
|
||||
bool
|
||||
xbitmap_test(
|
||||
struct xbitmap *bitmap,
|
||||
xbitmap64_test(
|
||||
struct xbitmap64 *bitmap,
|
||||
uint64_t start,
|
||||
uint64_t *len)
|
||||
{
|
||||
struct xbitmap_node *bn;
|
||||
struct xbitmap64_node *bn;
|
||||
uint64_t last = start + *len - 1;
|
||||
|
||||
bn = xbitmap_tree_iter_first(&bitmap->xb_root, start, last);
|
||||
bn = xbitmap64_tree_iter_first(&bitmap->xb_root, start, last);
|
||||
if (!bn)
|
||||
return false;
|
||||
if (bn->bn_start <= start) {
|
||||
if (bn->bn_last < last)
|
||||
*len = bn->bn_last - start + 1;
|
||||
return true;
|
||||
}
|
||||
*len = bn->bn_start - start;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* u32 bitmap */
|
||||
|
||||
struct xbitmap32_node {
|
||||
struct rb_node bn_rbnode;
|
||||
|
||||
/* First set bit of this interval and subtree. */
|
||||
uint32_t bn_start;
|
||||
|
||||
/* Last set bit of this interval. */
|
||||
uint32_t bn_last;
|
||||
|
||||
/* Last set bit of this subtree. Do not touch this. */
|
||||
uint32_t __bn_subtree_last;
|
||||
};
|
||||
|
||||
/* Define our own interval tree type with uint32_t parameters. */
|
||||
|
||||
/*
|
||||
* These functions are defined by the INTERVAL_TREE_DEFINE macro, but we'll
|
||||
* forward-declare them anyway for clarity.
|
||||
*/
|
||||
static inline void
|
||||
xbitmap32_tree_insert(struct xbitmap32_node *node, struct rb_root_cached *root);
|
||||
|
||||
static inline void
|
||||
xbitmap32_tree_remove(struct xbitmap32_node *node, struct rb_root_cached *root);
|
||||
|
||||
static inline struct xbitmap32_node *
|
||||
xbitmap32_tree_iter_first(struct rb_root_cached *root, uint32_t start,
|
||||
uint32_t last);
|
||||
|
||||
static inline struct xbitmap32_node *
|
||||
xbitmap32_tree_iter_next(struct xbitmap32_node *node, uint32_t start,
|
||||
uint32_t last);
|
||||
|
||||
INTERVAL_TREE_DEFINE(struct xbitmap32_node, bn_rbnode, uint32_t,
|
||||
__bn_subtree_last, START, LAST, static inline, xbitmap32_tree)
|
||||
|
||||
/* Iterate each interval of a bitmap. Do not change the bitmap. */
|
||||
#define for_each_xbitmap32_extent(bn, bitmap) \
|
||||
for ((bn) = rb_entry_safe(rb_first(&(bitmap)->xb_root.rb_root), \
|
||||
struct xbitmap32_node, bn_rbnode); \
|
||||
(bn) != NULL; \
|
||||
(bn) = rb_entry_safe(rb_next(&(bn)->bn_rbnode), \
|
||||
struct xbitmap32_node, bn_rbnode))
|
||||
|
||||
/* Clear a range of this bitmap. */
|
||||
int
|
||||
xbitmap32_clear(
|
||||
struct xbitmap32 *bitmap,
|
||||
uint32_t start,
|
||||
uint32_t len)
|
||||
{
|
||||
struct xbitmap32_node *bn;
|
||||
struct xbitmap32_node *new_bn;
|
||||
uint32_t last = start + len - 1;
|
||||
|
||||
while ((bn = xbitmap32_tree_iter_first(&bitmap->xb_root, start, last))) {
|
||||
if (bn->bn_start < start && bn->bn_last > last) {
|
||||
uint32_t old_last = bn->bn_last;
|
||||
|
||||
/* overlaps with the entire clearing range */
|
||||
xbitmap32_tree_remove(bn, &bitmap->xb_root);
|
||||
bn->bn_last = start - 1;
|
||||
xbitmap32_tree_insert(bn, &bitmap->xb_root);
|
||||
|
||||
/* add an extent */
|
||||
new_bn = kmalloc(sizeof(struct xbitmap32_node),
|
||||
XCHK_GFP_FLAGS);
|
||||
if (!new_bn)
|
||||
return -ENOMEM;
|
||||
new_bn->bn_start = last + 1;
|
||||
new_bn->bn_last = old_last;
|
||||
xbitmap32_tree_insert(new_bn, &bitmap->xb_root);
|
||||
} else if (bn->bn_start < start) {
|
||||
/* overlaps with the left side of the clearing range */
|
||||
xbitmap32_tree_remove(bn, &bitmap->xb_root);
|
||||
bn->bn_last = start - 1;
|
||||
xbitmap32_tree_insert(bn, &bitmap->xb_root);
|
||||
} else if (bn->bn_last > last) {
|
||||
/* overlaps with the right side of the clearing range */
|
||||
xbitmap32_tree_remove(bn, &bitmap->xb_root);
|
||||
bn->bn_start = last + 1;
|
||||
xbitmap32_tree_insert(bn, &bitmap->xb_root);
|
||||
break;
|
||||
} else {
|
||||
/* in the middle of the clearing range */
|
||||
xbitmap32_tree_remove(bn, &bitmap->xb_root);
|
||||
kfree(bn);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Set a range of this bitmap. */
|
||||
int
|
||||
xbitmap32_set(
|
||||
struct xbitmap32 *bitmap,
|
||||
uint32_t start,
|
||||
uint32_t len)
|
||||
{
|
||||
struct xbitmap32_node *left;
|
||||
struct xbitmap32_node *right;
|
||||
uint32_t last = start + len - 1;
|
||||
int error;
|
||||
|
||||
/* Is this whole range already set? */
|
||||
left = xbitmap32_tree_iter_first(&bitmap->xb_root, start, last);
|
||||
if (left && left->bn_start <= start && left->bn_last >= last)
|
||||
return 0;
|
||||
|
||||
/* Clear out everything in the range we want to set. */
|
||||
error = xbitmap32_clear(bitmap, start, len);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Do we have a left-adjacent extent? */
|
||||
left = xbitmap32_tree_iter_first(&bitmap->xb_root, start - 1, start - 1);
|
||||
ASSERT(!left || left->bn_last + 1 == start);
|
||||
|
||||
/* Do we have a right-adjacent extent? */
|
||||
right = xbitmap32_tree_iter_first(&bitmap->xb_root, last + 1, last + 1);
|
||||
ASSERT(!right || right->bn_start == last + 1);
|
||||
|
||||
if (left && right) {
|
||||
/* combine left and right adjacent extent */
|
||||
xbitmap32_tree_remove(left, &bitmap->xb_root);
|
||||
xbitmap32_tree_remove(right, &bitmap->xb_root);
|
||||
left->bn_last = right->bn_last;
|
||||
xbitmap32_tree_insert(left, &bitmap->xb_root);
|
||||
kfree(right);
|
||||
} else if (left) {
|
||||
/* combine with left extent */
|
||||
xbitmap32_tree_remove(left, &bitmap->xb_root);
|
||||
left->bn_last = last;
|
||||
xbitmap32_tree_insert(left, &bitmap->xb_root);
|
||||
} else if (right) {
|
||||
/* combine with right extent */
|
||||
xbitmap32_tree_remove(right, &bitmap->xb_root);
|
||||
right->bn_start = start;
|
||||
xbitmap32_tree_insert(right, &bitmap->xb_root);
|
||||
} else {
|
||||
/* add an extent */
|
||||
left = kmalloc(sizeof(struct xbitmap32_node), XCHK_GFP_FLAGS);
|
||||
if (!left)
|
||||
return -ENOMEM;
|
||||
left->bn_start = start;
|
||||
left->bn_last = last;
|
||||
xbitmap32_tree_insert(left, &bitmap->xb_root);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Free everything related to this bitmap. */
|
||||
void
|
||||
xbitmap32_destroy(
|
||||
struct xbitmap32 *bitmap)
|
||||
{
|
||||
struct xbitmap32_node *bn;
|
||||
|
||||
while ((bn = xbitmap32_tree_iter_first(&bitmap->xb_root, 0, -1U))) {
|
||||
xbitmap32_tree_remove(bn, &bitmap->xb_root);
|
||||
kfree(bn);
|
||||
}
|
||||
}
|
||||
|
||||
/* Set up a per-AG block bitmap. */
|
||||
void
|
||||
xbitmap32_init(
|
||||
struct xbitmap32 *bitmap)
|
||||
{
|
||||
bitmap->xb_root = RB_ROOT_CACHED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove all the blocks mentioned in @sub from the extents in @bitmap.
|
||||
*
|
||||
* The intent is that callers will iterate the rmapbt for all of its records
|
||||
* for a given owner to generate @bitmap; and iterate all the blocks of the
|
||||
* metadata structures that are not being rebuilt and have the same rmapbt
|
||||
* owner to generate @sub. This routine subtracts all the extents
|
||||
* mentioned in sub from all the extents linked in @bitmap, which leaves
|
||||
* @bitmap as the list of blocks that are not accounted for, which we assume
|
||||
* are the dead blocks of the old metadata structure. The blocks mentioned in
|
||||
* @bitmap can be reaped.
|
||||
*
|
||||
* This is the logical equivalent of bitmap &= ~sub.
|
||||
*/
|
||||
int
|
||||
xbitmap32_disunion(
|
||||
struct xbitmap32 *bitmap,
|
||||
struct xbitmap32 *sub)
|
||||
{
|
||||
struct xbitmap32_node *bn;
|
||||
int error;
|
||||
|
||||
if (xbitmap32_empty(bitmap) || xbitmap32_empty(sub))
|
||||
return 0;
|
||||
|
||||
for_each_xbitmap32_extent(bn, sub) {
|
||||
error = xbitmap32_clear(bitmap, bn->bn_start,
|
||||
bn->bn_last - bn->bn_start + 1);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* How many bits are set in this bitmap? */
|
||||
uint32_t
|
||||
xbitmap32_hweight(
|
||||
struct xbitmap32 *bitmap)
|
||||
{
|
||||
struct xbitmap32_node *bn;
|
||||
uint32_t ret = 0;
|
||||
|
||||
for_each_xbitmap32_extent(bn, bitmap)
|
||||
ret += bn->bn_last - bn->bn_start + 1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Call a function for every run of set bits in this bitmap. */
|
||||
int
|
||||
xbitmap32_walk(
|
||||
struct xbitmap32 *bitmap,
|
||||
xbitmap32_walk_fn fn,
|
||||
void *priv)
|
||||
{
|
||||
struct xbitmap32_node *bn;
|
||||
int error = 0;
|
||||
|
||||
for_each_xbitmap32_extent(bn, bitmap) {
|
||||
error = fn(bn->bn_start, bn->bn_last - bn->bn_start + 1, priv);
|
||||
if (error)
|
||||
break;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Does this bitmap have no bits set at all? */
|
||||
bool
|
||||
xbitmap32_empty(
|
||||
struct xbitmap32 *bitmap)
|
||||
{
|
||||
return bitmap->xb_root.rb_root.rb_node == NULL;
|
||||
}
|
||||
|
||||
/* Is the start of the range set or clear? And for how long? */
|
||||
bool
|
||||
xbitmap32_test(
|
||||
struct xbitmap32 *bitmap,
|
||||
uint32_t start,
|
||||
uint32_t *len)
|
||||
{
|
||||
struct xbitmap32_node *bn;
|
||||
uint32_t last = start + *len - 1;
|
||||
|
||||
bn = xbitmap32_tree_iter_first(&bitmap->xb_root, start, last);
|
||||
if (!bn)
|
||||
return false;
|
||||
if (bn->bn_start <= start) {
|
||||
|
@ -6,17 +6,19 @@
|
||||
#ifndef __XFS_SCRUB_BITMAP_H__
|
||||
#define __XFS_SCRUB_BITMAP_H__
|
||||
|
||||
struct xbitmap {
|
||||
/* u64 bitmap */
|
||||
|
||||
struct xbitmap64 {
|
||||
struct rb_root_cached xb_root;
|
||||
};
|
||||
|
||||
void xbitmap_init(struct xbitmap *bitmap);
|
||||
void xbitmap_destroy(struct xbitmap *bitmap);
|
||||
void xbitmap64_init(struct xbitmap64 *bitmap);
|
||||
void xbitmap64_destroy(struct xbitmap64 *bitmap);
|
||||
|
||||
int xbitmap_clear(struct xbitmap *bitmap, uint64_t start, uint64_t len);
|
||||
int xbitmap_set(struct xbitmap *bitmap, uint64_t start, uint64_t len);
|
||||
int xbitmap_disunion(struct xbitmap *bitmap, struct xbitmap *sub);
|
||||
uint64_t xbitmap_hweight(struct xbitmap *bitmap);
|
||||
int xbitmap64_clear(struct xbitmap64 *bitmap, uint64_t start, uint64_t len);
|
||||
int xbitmap64_set(struct xbitmap64 *bitmap, uint64_t start, uint64_t len);
|
||||
int xbitmap64_disunion(struct xbitmap64 *bitmap, struct xbitmap64 *sub);
|
||||
uint64_t xbitmap64_hweight(struct xbitmap64 *bitmap);
|
||||
|
||||
/*
|
||||
* Return codes for the bitmap iterator functions are 0 to continue iterating,
|
||||
@ -25,84 +27,39 @@ uint64_t xbitmap_hweight(struct xbitmap *bitmap);
|
||||
* iteration, because neither bitmap iterator ever generates that error code on
|
||||
* its own. Callers must not modify the bitmap while walking it.
|
||||
*/
|
||||
typedef int (*xbitmap_walk_fn)(uint64_t start, uint64_t len, void *priv);
|
||||
int xbitmap_walk(struct xbitmap *bitmap, xbitmap_walk_fn fn,
|
||||
typedef int (*xbitmap64_walk_fn)(uint64_t start, uint64_t len, void *priv);
|
||||
int xbitmap64_walk(struct xbitmap64 *bitmap, xbitmap64_walk_fn fn,
|
||||
void *priv);
|
||||
|
||||
bool xbitmap_empty(struct xbitmap *bitmap);
|
||||
bool xbitmap_test(struct xbitmap *bitmap, uint64_t start, uint64_t *len);
|
||||
bool xbitmap64_empty(struct xbitmap64 *bitmap);
|
||||
bool xbitmap64_test(struct xbitmap64 *bitmap, uint64_t start, uint64_t *len);
|
||||
|
||||
/* Bitmaps, but for type-checked for xfs_agblock_t */
|
||||
/* u32 bitmap */
|
||||
|
||||
struct xagb_bitmap {
|
||||
struct xbitmap agbitmap;
|
||||
struct xbitmap32 {
|
||||
struct rb_root_cached xb_root;
|
||||
};
|
||||
|
||||
static inline void xagb_bitmap_init(struct xagb_bitmap *bitmap)
|
||||
{
|
||||
xbitmap_init(&bitmap->agbitmap);
|
||||
}
|
||||
void xbitmap32_init(struct xbitmap32 *bitmap);
|
||||
void xbitmap32_destroy(struct xbitmap32 *bitmap);
|
||||
|
||||
static inline void xagb_bitmap_destroy(struct xagb_bitmap *bitmap)
|
||||
{
|
||||
xbitmap_destroy(&bitmap->agbitmap);
|
||||
}
|
||||
int xbitmap32_clear(struct xbitmap32 *bitmap, uint32_t start, uint32_t len);
|
||||
int xbitmap32_set(struct xbitmap32 *bitmap, uint32_t start, uint32_t len);
|
||||
int xbitmap32_disunion(struct xbitmap32 *bitmap, struct xbitmap32 *sub);
|
||||
uint32_t xbitmap32_hweight(struct xbitmap32 *bitmap);
|
||||
|
||||
static inline int xagb_bitmap_clear(struct xagb_bitmap *bitmap,
|
||||
xfs_agblock_t start, xfs_extlen_t len)
|
||||
{
|
||||
return xbitmap_clear(&bitmap->agbitmap, start, len);
|
||||
}
|
||||
static inline int xagb_bitmap_set(struct xagb_bitmap *bitmap,
|
||||
xfs_agblock_t start, xfs_extlen_t len)
|
||||
{
|
||||
return xbitmap_set(&bitmap->agbitmap, start, len);
|
||||
}
|
||||
/*
|
||||
* Return codes for the bitmap iterator functions are 0 to continue iterating,
|
||||
* and non-zero to stop iterating. Any non-zero value will be passed up to the
|
||||
* iteration caller. The special value -ECANCELED can be used to stop
|
||||
* iteration, because neither bitmap iterator ever generates that error code on
|
||||
* its own. Callers must not modify the bitmap while walking it.
|
||||
*/
|
||||
typedef int (*xbitmap32_walk_fn)(uint32_t start, uint32_t len, void *priv);
|
||||
int xbitmap32_walk(struct xbitmap32 *bitmap, xbitmap32_walk_fn fn,
|
||||
void *priv);
|
||||
|
||||
static inline bool
|
||||
xagb_bitmap_test(
|
||||
struct xagb_bitmap *bitmap,
|
||||
xfs_agblock_t start,
|
||||
xfs_extlen_t *len)
|
||||
{
|
||||
uint64_t biglen = *len;
|
||||
bool ret;
|
||||
|
||||
ret = xbitmap_test(&bitmap->agbitmap, start, &biglen);
|
||||
|
||||
if (start + biglen >= UINT_MAX) {
|
||||
ASSERT(0);
|
||||
biglen = UINT_MAX - start;
|
||||
}
|
||||
|
||||
*len = biglen;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int xagb_bitmap_disunion(struct xagb_bitmap *bitmap,
|
||||
struct xagb_bitmap *sub)
|
||||
{
|
||||
return xbitmap_disunion(&bitmap->agbitmap, &sub->agbitmap);
|
||||
}
|
||||
|
||||
static inline uint32_t xagb_bitmap_hweight(struct xagb_bitmap *bitmap)
|
||||
{
|
||||
return xbitmap_hweight(&bitmap->agbitmap);
|
||||
}
|
||||
static inline bool xagb_bitmap_empty(struct xagb_bitmap *bitmap)
|
||||
{
|
||||
return xbitmap_empty(&bitmap->agbitmap);
|
||||
}
|
||||
|
||||
static inline int xagb_bitmap_walk(struct xagb_bitmap *bitmap,
|
||||
xbitmap_walk_fn fn, void *priv)
|
||||
{
|
||||
return xbitmap_walk(&bitmap->agbitmap, fn, priv);
|
||||
}
|
||||
|
||||
int xagb_bitmap_set_btblocks(struct xagb_bitmap *bitmap,
|
||||
struct xfs_btree_cur *cur);
|
||||
int xagb_bitmap_set_btcur_path(struct xagb_bitmap *bitmap,
|
||||
struct xfs_btree_cur *cur);
|
||||
bool xbitmap32_empty(struct xbitmap32 *bitmap);
|
||||
bool xbitmap32_test(struct xbitmap32 *bitmap, uint32_t start, uint32_t *len);
|
||||
|
||||
#endif /* __XFS_SCRUB_BITMAP_H__ */
|
||||
|
@ -19,9 +19,11 @@
|
||||
#include "xfs_bmap_btree.h"
|
||||
#include "xfs_rmap.h"
|
||||
#include "xfs_rmap_btree.h"
|
||||
#include "xfs_health.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/btree.h"
|
||||
#include "scrub/health.h"
|
||||
#include "xfs_ag.h"
|
||||
|
||||
/* Set us up with an inode's bmap. */
|
||||
@ -48,9 +50,18 @@ xchk_setup_inode_bmap(
|
||||
if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
|
||||
sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
|
||||
struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
|
||||
bool is_repair = xchk_could_repair(sc);
|
||||
|
||||
xchk_ilock(sc, XFS_MMAPLOCK_EXCL);
|
||||
|
||||
/* Break all our leases, we're going to mess with things. */
|
||||
if (is_repair) {
|
||||
error = xfs_break_layouts(VFS_I(sc->ip),
|
||||
&sc->ilock_flags, BREAK_WRITE);
|
||||
if (error)
|
||||
goto out;
|
||||
}
|
||||
|
||||
inode_dio_wait(VFS_I(sc->ip));
|
||||
|
||||
/*
|
||||
@ -71,6 +82,15 @@ xchk_setup_inode_bmap(
|
||||
error = filemap_fdatawait_keep_errors(mapping);
|
||||
if (error && (error != -ENOSPC && error != -EIO))
|
||||
goto out;
|
||||
|
||||
/* Drop the page cache if we're repairing block mappings. */
|
||||
if (is_repair) {
|
||||
error = invalidate_inode_pages2(
|
||||
VFS_I(sc->ip)->i_mapping);
|
||||
if (error)
|
||||
goto out;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Got the inode, lock it and we're ready to go. */
|
||||
@ -78,6 +98,10 @@ xchk_setup_inode_bmap(
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
error = xchk_ino_dqattach(sc);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL);
|
||||
out:
|
||||
/* scrub teardown will unlock and release the inode */
|
||||
@ -632,6 +656,82 @@ xchk_bmap_check_ag_rmaps(
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decide if we want to scan the reverse mappings to determine if the attr
|
||||
* fork /really/ has zero space mappings.
|
||||
*/
|
||||
STATIC bool
|
||||
xchk_bmap_check_empty_attrfork(
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
struct xfs_ifork *ifp = &ip->i_af;
|
||||
|
||||
/*
|
||||
* If the dinode repair found a bad attr fork, it will reset the fork
|
||||
* to extents format with zero records and wait for the this scrubber
|
||||
* to reconstruct the block mappings. If the fork is not in this
|
||||
* state, then the fork cannot have been zapped.
|
||||
*/
|
||||
if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Files can have an attr fork in EXTENTS format with zero records for
|
||||
* several reasons:
|
||||
*
|
||||
* a) an attr set created a fork but ran out of space
|
||||
* b) attr replace deleted an old attr but failed during the set step
|
||||
* c) the data fork was in btree format when all attrs were deleted, so
|
||||
* the fork was left in place
|
||||
* d) the inode repair code zapped the fork
|
||||
*
|
||||
* Only in case (d) do we want to scan the rmapbt to see if we need to
|
||||
* rebuild the attr fork. The fork zap code clears all DAC permission
|
||||
* bits and zeroes the uid and gid, so avoid the scan if any of those
|
||||
* three conditions are not met.
|
||||
*/
|
||||
if ((VFS_I(ip)->i_mode & 0777) != 0)
|
||||
return false;
|
||||
if (!uid_eq(VFS_I(ip)->i_uid, GLOBAL_ROOT_UID))
|
||||
return false;
|
||||
if (!gid_eq(VFS_I(ip)->i_gid, GLOBAL_ROOT_GID))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decide if we want to scan the reverse mappings to determine if the data
|
||||
* fork /really/ has zero space mappings.
|
||||
*/
|
||||
STATIC bool
|
||||
xchk_bmap_check_empty_datafork(
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
struct xfs_ifork *ifp = &ip->i_df;
|
||||
|
||||
/* Don't support realtime rmap checks yet. */
|
||||
if (XFS_IS_REALTIME_INODE(ip))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If the dinode repair found a bad data fork, it will reset the fork
|
||||
* to extents format with zero records and wait for the this scrubber
|
||||
* to reconstruct the block mappings. If the fork is not in this
|
||||
* state, then the fork cannot have been zapped.
|
||||
*/
|
||||
if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If we encounter an empty data fork along with evidence that the fork
|
||||
* might not really be empty, we need to scan the reverse mappings to
|
||||
* decide if we're going to rebuild the fork. Data forks with nonzero
|
||||
* file size are scanned.
|
||||
*/
|
||||
return i_size_read(VFS_I(ip)) != 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decide if we want to walk every rmap btree in the fs to make sure that each
|
||||
* rmap for this file fork has corresponding bmbt entries.
|
||||
@ -641,7 +741,6 @@ xchk_bmap_want_check_rmaps(
|
||||
struct xchk_bmap_info *info)
|
||||
{
|
||||
struct xfs_scrub *sc = info->sc;
|
||||
struct xfs_ifork *ifp;
|
||||
|
||||
if (!xfs_has_rmapbt(sc->mp))
|
||||
return false;
|
||||
@ -650,28 +749,10 @@ xchk_bmap_want_check_rmaps(
|
||||
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
|
||||
return false;
|
||||
|
||||
/* Don't support realtime rmap checks yet. */
|
||||
if (info->is_rt)
|
||||
return false;
|
||||
if (info->whichfork == XFS_ATTR_FORK)
|
||||
return xchk_bmap_check_empty_attrfork(sc->ip);
|
||||
|
||||
/*
|
||||
* The inode repair code zaps broken inode forks by resetting them back
|
||||
* to EXTENTS format and zero extent records. If we encounter a fork
|
||||
* in this state along with evidence that the fork isn't supposed to be
|
||||
* empty, we need to scan the reverse mappings to decide if we're going
|
||||
* to rebuild the fork. Data forks with nonzero file size are scanned.
|
||||
* xattr forks are never empty of content, so they are always scanned.
|
||||
*/
|
||||
ifp = xfs_ifork_ptr(sc->ip, info->whichfork);
|
||||
if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && ifp->if_nextents == 0) {
|
||||
if (info->whichfork == XFS_DATA_FORK &&
|
||||
i_size_read(VFS_I(sc->ip)) == 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
return xchk_bmap_check_empty_datafork(sc->ip);
|
||||
}
|
||||
|
||||
/* Make sure each rmap has a corresponding bmbt entry. */
|
||||
@ -939,7 +1020,20 @@ int
|
||||
xchk_bmap_data(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
return xchk_bmap(sc, XFS_DATA_FORK);
|
||||
int error;
|
||||
|
||||
if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTD_ZAPPED)) {
|
||||
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
|
||||
return 0;
|
||||
}
|
||||
|
||||
error = xchk_bmap(sc, XFS_DATA_FORK);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* If the data fork is clean, it is clearly not zapped. */
|
||||
xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTD_ZAPPED);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Scrub an inode's attr fork. */
|
||||
@ -947,7 +1041,27 @@ int
|
||||
xchk_bmap_attr(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
return xchk_bmap(sc, XFS_ATTR_FORK);
|
||||
int error;
|
||||
|
||||
/*
|
||||
* If the attr fork has been zapped, it's possible that forkoff was
|
||||
* reset to zero and hence sc->ip->i_afp is NULL. We don't want the
|
||||
* NULL ifp check in xchk_bmap to conclude that the attr fork is ok,
|
||||
* so short circuit that logic by setting the corruption flag and
|
||||
* returning immediately.
|
||||
*/
|
||||
if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTA_ZAPPED)) {
|
||||
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
|
||||
return 0;
|
||||
}
|
||||
|
||||
error = xchk_bmap(sc, XFS_ATTR_FORK);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* If the attr fork is clean, it is clearly not zapped. */
|
||||
xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTA_ZAPPED);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Scrub an inode's CoW fork. */
|
||||
|
867
fs/xfs/scrub/bmap_repair.c
Normal file
867
fs/xfs/scrub/bmap_repair.c
Normal file
@ -0,0 +1,867 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_defer.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_btree_staging.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_inode_fork.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_rtalloc.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_bmap_util.h"
|
||||
#include "xfs_bmap_btree.h"
|
||||
#include "xfs_rmap.h"
|
||||
#include "xfs_rmap_btree.h"
|
||||
#include "xfs_refcount.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_ialloc.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_reflink.h"
|
||||
#include "scrub/xfs_scrub.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/btree.h"
|
||||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/bitmap.h"
|
||||
#include "scrub/fsb_bitmap.h"
|
||||
#include "scrub/xfile.h"
|
||||
#include "scrub/xfarray.h"
|
||||
#include "scrub/newbt.h"
|
||||
#include "scrub/reap.h"
|
||||
|
||||
/*
|
||||
* Inode Fork Block Mapping (BMBT) Repair
|
||||
* ======================================
|
||||
*
|
||||
* Gather all the rmap records for the inode and fork we're fixing, reset the
|
||||
* incore fork, then recreate the btree.
|
||||
*/
|
||||
|
||||
enum reflink_scan_state {
|
||||
RLS_IRRELEVANT = -1, /* not applicable to this file */
|
||||
RLS_UNKNOWN, /* shared extent scans required */
|
||||
RLS_SET_IFLAG, /* iflag must be set */
|
||||
};
|
||||
|
||||
struct xrep_bmap {
|
||||
/* Old bmbt blocks */
|
||||
struct xfsb_bitmap old_bmbt_blocks;
|
||||
|
||||
/* New fork. */
|
||||
struct xrep_newbt new_bmapbt;
|
||||
|
||||
/* List of new bmap records. */
|
||||
struct xfarray *bmap_records;
|
||||
|
||||
struct xfs_scrub *sc;
|
||||
|
||||
/* How many blocks did we find allocated to this file? */
|
||||
xfs_rfsblock_t nblocks;
|
||||
|
||||
/* How many bmbt blocks did we find for this fork? */
|
||||
xfs_rfsblock_t old_bmbt_block_count;
|
||||
|
||||
/* get_records()'s position in the free space record array. */
|
||||
xfarray_idx_t array_cur;
|
||||
|
||||
/* How many real (non-hole, non-delalloc) mappings do we have? */
|
||||
uint64_t real_mappings;
|
||||
|
||||
/* Which fork are we fixing? */
|
||||
int whichfork;
|
||||
|
||||
/* What d the REFLINK flag be set when the repair is over? */
|
||||
enum reflink_scan_state reflink_scan;
|
||||
|
||||
/* Do we allow unwritten extents? */
|
||||
bool allow_unwritten;
|
||||
};
|
||||
|
||||
/* Is this space extent shared? Flag the inode if it is. */
|
||||
STATIC int
|
||||
xrep_bmap_discover_shared(
|
||||
struct xrep_bmap *rb,
|
||||
xfs_fsblock_t startblock,
|
||||
xfs_filblks_t blockcount)
|
||||
{
|
||||
struct xfs_scrub *sc = rb->sc;
|
||||
xfs_agblock_t agbno;
|
||||
xfs_agblock_t fbno;
|
||||
xfs_extlen_t flen;
|
||||
int error;
|
||||
|
||||
agbno = XFS_FSB_TO_AGBNO(sc->mp, startblock);
|
||||
error = xfs_refcount_find_shared(sc->sa.refc_cur, agbno, blockcount,
|
||||
&fbno, &flen, false);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (fbno != NULLAGBLOCK)
|
||||
rb->reflink_scan = RLS_SET_IFLAG;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remember this reverse-mapping as a series of bmap records. */
|
||||
STATIC int
|
||||
xrep_bmap_from_rmap(
|
||||
struct xrep_bmap *rb,
|
||||
xfs_fileoff_t startoff,
|
||||
xfs_fsblock_t startblock,
|
||||
xfs_filblks_t blockcount,
|
||||
bool unwritten)
|
||||
{
|
||||
struct xfs_bmbt_irec irec = {
|
||||
.br_startoff = startoff,
|
||||
.br_startblock = startblock,
|
||||
.br_state = unwritten ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM,
|
||||
};
|
||||
struct xfs_bmbt_rec rbe;
|
||||
struct xfs_scrub *sc = rb->sc;
|
||||
int error = 0;
|
||||
|
||||
/*
|
||||
* If we're repairing the data fork of a non-reflinked regular file on
|
||||
* a reflink filesystem, we need to figure out if this space extent is
|
||||
* shared.
|
||||
*/
|
||||
if (rb->reflink_scan == RLS_UNKNOWN && !unwritten) {
|
||||
error = xrep_bmap_discover_shared(rb, startblock, blockcount);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
do {
|
||||
xfs_failaddr_t fa;
|
||||
|
||||
irec.br_blockcount = min_t(xfs_filblks_t, blockcount,
|
||||
XFS_MAX_BMBT_EXTLEN);
|
||||
|
||||
fa = xfs_bmap_validate_extent(sc->ip, rb->whichfork, &irec);
|
||||
if (fa)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
xfs_bmbt_disk_set_all(&rbe, &irec);
|
||||
|
||||
trace_xrep_bmap_found(sc->ip, rb->whichfork, &irec);
|
||||
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
return error;
|
||||
|
||||
error = xfarray_append(rb->bmap_records, &rbe);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
rb->real_mappings++;
|
||||
|
||||
irec.br_startblock += irec.br_blockcount;
|
||||
irec.br_startoff += irec.br_blockcount;
|
||||
blockcount -= irec.br_blockcount;
|
||||
} while (blockcount > 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check for any obvious errors or conflicts in the file mapping. */
|
||||
STATIC int
|
||||
xrep_bmap_check_fork_rmap(
|
||||
struct xrep_bmap *rb,
|
||||
struct xfs_btree_cur *cur,
|
||||
const struct xfs_rmap_irec *rec)
|
||||
{
|
||||
struct xfs_scrub *sc = rb->sc;
|
||||
enum xbtree_recpacking outcome;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Data extents for rt files are never stored on the data device, but
|
||||
* everything else (xattrs, bmbt blocks) can be.
|
||||
*/
|
||||
if (XFS_IS_REALTIME_INODE(sc->ip) &&
|
||||
!(rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* Check that this is within the AG. */
|
||||
if (!xfs_verify_agbext(cur->bc_ag.pag, rec->rm_startblock,
|
||||
rec->rm_blockcount))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* Check the file offset range. */
|
||||
if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
|
||||
!xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* No contradictory flags. */
|
||||
if ((rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) &&
|
||||
(rec->rm_flags & XFS_RMAP_UNWRITTEN))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* Make sure this isn't free space. */
|
||||
error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock,
|
||||
rec->rm_blockcount, &outcome);
|
||||
if (error)
|
||||
return error;
|
||||
if (outcome != XBTREE_RECPACKING_EMPTY)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* Must not be an inode chunk. */
|
||||
error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
|
||||
rec->rm_startblock, rec->rm_blockcount, &outcome);
|
||||
if (error)
|
||||
return error;
|
||||
if (outcome != XBTREE_RECPACKING_EMPTY)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Record extents that belong to this inode's fork. */
|
||||
STATIC int
|
||||
xrep_bmap_walk_rmap(
|
||||
struct xfs_btree_cur *cur,
|
||||
const struct xfs_rmap_irec *rec,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_bmap *rb = priv;
|
||||
struct xfs_mount *mp = cur->bc_mp;
|
||||
xfs_fsblock_t fsbno;
|
||||
int error = 0;
|
||||
|
||||
if (xchk_should_terminate(rb->sc, &error))
|
||||
return error;
|
||||
|
||||
if (rec->rm_owner != rb->sc->ip->i_ino)
|
||||
return 0;
|
||||
|
||||
error = xrep_bmap_check_fork_rmap(rb, cur, rec);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Record all blocks allocated to this file even if the extent isn't
|
||||
* for the fork we're rebuilding so that we can reset di_nblocks later.
|
||||
*/
|
||||
rb->nblocks += rec->rm_blockcount;
|
||||
|
||||
/* If this rmap isn't for the fork we want, we're done. */
|
||||
if (rb->whichfork == XFS_DATA_FORK &&
|
||||
(rec->rm_flags & XFS_RMAP_ATTR_FORK))
|
||||
return 0;
|
||||
if (rb->whichfork == XFS_ATTR_FORK &&
|
||||
!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
|
||||
return 0;
|
||||
|
||||
/* Reject unwritten extents if we don't allow those. */
|
||||
if ((rec->rm_flags & XFS_RMAP_UNWRITTEN) && !rb->allow_unwritten)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno,
|
||||
rec->rm_startblock);
|
||||
|
||||
if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
|
||||
rb->old_bmbt_block_count += rec->rm_blockcount;
|
||||
return xfsb_bitmap_set(&rb->old_bmbt_blocks, fsbno,
|
||||
rec->rm_blockcount);
|
||||
}
|
||||
|
||||
return xrep_bmap_from_rmap(rb, rec->rm_offset, fsbno,
|
||||
rec->rm_blockcount,
|
||||
rec->rm_flags & XFS_RMAP_UNWRITTEN);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare two block mapping records. We want to sort in order of increasing
|
||||
* file offset.
|
||||
*/
|
||||
static int
|
||||
xrep_bmap_extent_cmp(
|
||||
const void *a,
|
||||
const void *b)
|
||||
{
|
||||
const struct xfs_bmbt_rec *ba = a;
|
||||
const struct xfs_bmbt_rec *bb = b;
|
||||
xfs_fileoff_t ao = xfs_bmbt_disk_get_startoff(ba);
|
||||
xfs_fileoff_t bo = xfs_bmbt_disk_get_startoff(bb);
|
||||
|
||||
if (ao > bo)
|
||||
return 1;
|
||||
else if (ao < bo)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sort the bmap extents by fork offset or else the records will be in the
|
||||
* wrong order. Ensure there are no overlaps in the file offset ranges.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_bmap_sort_records(
|
||||
struct xrep_bmap *rb)
|
||||
{
|
||||
struct xfs_bmbt_irec irec;
|
||||
xfs_fileoff_t next_off = 0;
|
||||
xfarray_idx_t array_cur;
|
||||
int error;
|
||||
|
||||
error = xfarray_sort(rb->bmap_records, xrep_bmap_extent_cmp,
|
||||
XFARRAY_SORT_KILLABLE);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
foreach_xfarray_idx(rb->bmap_records, array_cur) {
|
||||
struct xfs_bmbt_rec rec;
|
||||
|
||||
if (xchk_should_terminate(rb->sc, &error))
|
||||
return error;
|
||||
|
||||
error = xfarray_load(rb->bmap_records, array_cur, &rec);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
xfs_bmbt_disk_get_all(&rec, &irec);
|
||||
|
||||
if (irec.br_startoff < next_off)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
next_off = irec.br_startoff + irec.br_blockcount;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Scan one AG for reverse mappings that we can turn into extent maps. */
|
||||
STATIC int
|
||||
xrep_bmap_scan_ag(
|
||||
struct xrep_bmap *rb,
|
||||
struct xfs_perag *pag)
|
||||
{
|
||||
struct xfs_scrub *sc = rb->sc;
|
||||
int error;
|
||||
|
||||
error = xrep_ag_init(sc, pag, &sc->sa);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_bmap_walk_rmap, rb);
|
||||
xchk_ag_free(sc, &sc->sa);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Find the delalloc extents from the old incore extent tree. */
|
||||
STATIC int
|
||||
xrep_bmap_find_delalloc(
|
||||
struct xrep_bmap *rb)
|
||||
{
|
||||
struct xfs_bmbt_irec irec;
|
||||
struct xfs_iext_cursor icur;
|
||||
struct xfs_bmbt_rec rbe;
|
||||
struct xfs_inode *ip = rb->sc->ip;
|
||||
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, rb->whichfork);
|
||||
int error = 0;
|
||||
|
||||
/*
|
||||
* Skip this scan if we don't expect to find delayed allocation
|
||||
* reservations in this fork.
|
||||
*/
|
||||
if (rb->whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0)
|
||||
return 0;
|
||||
|
||||
for_each_xfs_iext(ifp, &icur, &irec) {
|
||||
if (!isnullstartblock(irec.br_startblock))
|
||||
continue;
|
||||
|
||||
xfs_bmbt_disk_set_all(&rbe, &irec);
|
||||
|
||||
trace_xrep_bmap_found(ip, rb->whichfork, &irec);
|
||||
|
||||
if (xchk_should_terminate(rb->sc, &error))
|
||||
return error;
|
||||
|
||||
error = xfarray_append(rb->bmap_records, &rbe);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Collect block mappings for this fork of this inode and decide if we have
|
||||
* enough space to rebuild. Caller is responsible for cleaning up the list if
|
||||
* anything goes wrong.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_bmap_find_mappings(
|
||||
struct xrep_bmap *rb)
|
||||
{
|
||||
struct xfs_scrub *sc = rb->sc;
|
||||
struct xfs_perag *pag;
|
||||
xfs_agnumber_t agno;
|
||||
int error = 0;
|
||||
|
||||
/* Iterate the rmaps for extents. */
|
||||
for_each_perag(sc->mp, agno, pag) {
|
||||
error = xrep_bmap_scan_ag(rb, pag);
|
||||
if (error) {
|
||||
xfs_perag_rele(pag);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
return xrep_bmap_find_delalloc(rb);
|
||||
}
|
||||
|
||||
/* Retrieve real extent mappings for bulk loading the bmap btree. */
|
||||
STATIC int
|
||||
xrep_bmap_get_records(
|
||||
struct xfs_btree_cur *cur,
|
||||
unsigned int idx,
|
||||
struct xfs_btree_block *block,
|
||||
unsigned int nr_wanted,
|
||||
void *priv)
|
||||
{
|
||||
struct xfs_bmbt_rec rec;
|
||||
struct xfs_bmbt_irec *irec = &cur->bc_rec.b;
|
||||
struct xrep_bmap *rb = priv;
|
||||
union xfs_btree_rec *block_rec;
|
||||
unsigned int loaded;
|
||||
int error;
|
||||
|
||||
for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
|
||||
do {
|
||||
error = xfarray_load(rb->bmap_records, rb->array_cur++,
|
||||
&rec);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
xfs_bmbt_disk_get_all(&rec, irec);
|
||||
} while (isnullstartblock(irec->br_startblock));
|
||||
|
||||
block_rec = xfs_btree_rec_addr(cur, idx, block);
|
||||
cur->bc_ops->init_rec_from_cur(cur, block_rec);
|
||||
}
|
||||
|
||||
return loaded;
|
||||
}
|
||||
|
||||
/* Feed one of the new btree blocks to the bulk loader. */
|
||||
STATIC int
|
||||
xrep_bmap_claim_block(
|
||||
struct xfs_btree_cur *cur,
|
||||
union xfs_btree_ptr *ptr,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_bmap *rb = priv;
|
||||
|
||||
return xrep_newbt_claim_block(cur, &rb->new_bmapbt, ptr);
|
||||
}
|
||||
|
||||
/* Figure out how much space we need to create the incore btree root block. */
|
||||
STATIC size_t
|
||||
xrep_bmap_iroot_size(
|
||||
struct xfs_btree_cur *cur,
|
||||
unsigned int level,
|
||||
unsigned int nr_this_level,
|
||||
void *priv)
|
||||
{
|
||||
ASSERT(level > 0);
|
||||
|
||||
return XFS_BMAP_BROOT_SPACE_CALC(cur->bc_mp, nr_this_level);
|
||||
}
|
||||
|
||||
/* Update the inode counters. */
|
||||
STATIC int
|
||||
xrep_bmap_reset_counters(
|
||||
struct xrep_bmap *rb)
|
||||
{
|
||||
struct xfs_scrub *sc = rb->sc;
|
||||
struct xbtree_ifakeroot *ifake = &rb->new_bmapbt.ifake;
|
||||
int64_t delta;
|
||||
|
||||
if (rb->reflink_scan == RLS_SET_IFLAG)
|
||||
sc->ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
|
||||
|
||||
/*
|
||||
* Update the inode block counts to reflect the extents we found in the
|
||||
* rmapbt.
|
||||
*/
|
||||
delta = ifake->if_blocks - rb->old_bmbt_block_count;
|
||||
sc->ip->i_nblocks = rb->nblocks + delta;
|
||||
xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
|
||||
|
||||
/*
|
||||
* Adjust the quota counts by the difference in size between the old
|
||||
* and new bmbt.
|
||||
*/
|
||||
xfs_trans_mod_dquot_byino(sc->tp, sc->ip, XFS_TRANS_DQ_BCOUNT, delta);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a new iext tree and load it with block mappings. If the inode is
|
||||
* in extents format, that's all we need to do to commit the new mappings.
|
||||
* If it is in btree format, this takes care of preloading the incore tree.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_bmap_extents_load(
|
||||
struct xrep_bmap *rb)
|
||||
{
|
||||
struct xfs_iext_cursor icur;
|
||||
struct xfs_bmbt_irec irec;
|
||||
struct xfs_ifork *ifp = rb->new_bmapbt.ifake.if_fork;
|
||||
xfarray_idx_t array_cur;
|
||||
int error;
|
||||
|
||||
ASSERT(ifp->if_bytes == 0);
|
||||
|
||||
/* Add all the mappings (incl. delalloc) to the incore extent tree. */
|
||||
xfs_iext_first(ifp, &icur);
|
||||
foreach_xfarray_idx(rb->bmap_records, array_cur) {
|
||||
struct xfs_bmbt_rec rec;
|
||||
|
||||
error = xfarray_load(rb->bmap_records, array_cur, &rec);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
xfs_bmbt_disk_get_all(&rec, &irec);
|
||||
|
||||
xfs_iext_insert_raw(ifp, &icur, &irec);
|
||||
if (!isnullstartblock(irec.br_startblock))
|
||||
ifp->if_nextents++;
|
||||
|
||||
xfs_iext_next(ifp, &icur);
|
||||
}
|
||||
|
||||
return xrep_ino_ensure_extent_count(rb->sc, rb->whichfork,
|
||||
ifp->if_nextents);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reserve new btree blocks, bulk load the bmap records into the ondisk btree,
|
||||
* and load the incore extent tree.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_bmap_btree_load(
|
||||
struct xrep_bmap *rb,
|
||||
struct xfs_btree_cur *bmap_cur)
|
||||
{
|
||||
struct xfs_scrub *sc = rb->sc;
|
||||
int error;
|
||||
|
||||
/* Compute how many blocks we'll need. */
|
||||
error = xfs_btree_bload_compute_geometry(bmap_cur,
|
||||
&rb->new_bmapbt.bload, rb->real_mappings);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Last chance to abort before we start committing fixes. */
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Guess how many blocks we're going to need to rebuild an entire bmap
|
||||
* from the number of extents we found, and pump up our transaction to
|
||||
* have sufficient block reservation. We're allowed to exceed file
|
||||
* quota to repair inconsistent metadata.
|
||||
*/
|
||||
error = xfs_trans_reserve_more_inode(sc->tp, sc->ip,
|
||||
rb->new_bmapbt.bload.nr_blocks, 0, true);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Reserve the space we'll need for the new btree. */
|
||||
error = xrep_newbt_alloc_blocks(&rb->new_bmapbt,
|
||||
rb->new_bmapbt.bload.nr_blocks);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Add all observed bmap records. */
|
||||
rb->array_cur = XFARRAY_CURSOR_INIT;
|
||||
error = xfs_btree_bload(bmap_cur, &rb->new_bmapbt.bload, rb);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Load the new bmap records into the new incore extent tree to
|
||||
* preserve delalloc reservations for regular files. The directory
|
||||
* code loads the extent tree during xfs_dir_open and assumes
|
||||
* thereafter that it remains loaded, so we must not violate that
|
||||
* assumption.
|
||||
*/
|
||||
return xrep_bmap_extents_load(rb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the collected bmap information to stage a new bmap fork. If this is
|
||||
* successful we'll return with the new fork information logged to the repair
|
||||
* transaction but not yet committed. The caller must ensure that the inode
|
||||
* is joined to the transaction; the inode will be joined to a clean
|
||||
* transaction when the function returns.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_bmap_build_new_fork(
|
||||
struct xrep_bmap *rb)
|
||||
{
|
||||
struct xfs_owner_info oinfo;
|
||||
struct xfs_scrub *sc = rb->sc;
|
||||
struct xfs_btree_cur *bmap_cur;
|
||||
struct xbtree_ifakeroot *ifake = &rb->new_bmapbt.ifake;
|
||||
int error;
|
||||
|
||||
error = xrep_bmap_sort_records(rb);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Prepare to construct the new fork by initializing the new btree
|
||||
* structure and creating a fake ifork in the ifakeroot structure.
|
||||
*/
|
||||
xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
|
||||
error = xrep_newbt_init_inode(&rb->new_bmapbt, sc, rb->whichfork,
|
||||
&oinfo);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
rb->new_bmapbt.bload.get_records = xrep_bmap_get_records;
|
||||
rb->new_bmapbt.bload.claim_block = xrep_bmap_claim_block;
|
||||
rb->new_bmapbt.bload.iroot_size = xrep_bmap_iroot_size;
|
||||
bmap_cur = xfs_bmbt_stage_cursor(sc->mp, sc->ip, ifake);
|
||||
|
||||
/*
|
||||
* Figure out the size and format of the new fork, then fill it with
|
||||
* all the bmap records we've found. Join the inode to the transaction
|
||||
* so that we can roll the transaction while holding the inode locked.
|
||||
*/
|
||||
if (rb->real_mappings <= XFS_IFORK_MAXEXT(sc->ip, rb->whichfork)) {
|
||||
ifake->if_fork->if_format = XFS_DINODE_FMT_EXTENTS;
|
||||
error = xrep_bmap_extents_load(rb);
|
||||
} else {
|
||||
ifake->if_fork->if_format = XFS_DINODE_FMT_BTREE;
|
||||
error = xrep_bmap_btree_load(rb, bmap_cur);
|
||||
}
|
||||
if (error)
|
||||
goto err_cur;
|
||||
|
||||
/*
|
||||
* Install the new fork in the inode. After this point the old mapping
|
||||
* data are no longer accessible and the new tree is live. We delete
|
||||
* the cursor immediately after committing the staged root because the
|
||||
* staged fork might be in extents format.
|
||||
*/
|
||||
xfs_bmbt_commit_staged_btree(bmap_cur, sc->tp, rb->whichfork);
|
||||
xfs_btree_del_cursor(bmap_cur, 0);
|
||||
|
||||
/* Reset the inode counters now that we've changed the fork. */
|
||||
error = xrep_bmap_reset_counters(rb);
|
||||
if (error)
|
||||
goto err_newbt;
|
||||
|
||||
/* Dispose of any unused blocks and the accounting information. */
|
||||
error = xrep_newbt_commit(&rb->new_bmapbt);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
return xrep_roll_trans(sc);
|
||||
|
||||
err_cur:
|
||||
if (bmap_cur)
|
||||
xfs_btree_del_cursor(bmap_cur, error);
|
||||
err_newbt:
|
||||
xrep_newbt_cancel(&rb->new_bmapbt);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that we've logged the new inode btree, invalidate all of the old blocks
|
||||
* and free them, if there were any.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_bmap_remove_old_tree(
|
||||
struct xrep_bmap *rb)
|
||||
{
|
||||
struct xfs_scrub *sc = rb->sc;
|
||||
struct xfs_owner_info oinfo;
|
||||
|
||||
/* Free the old bmbt blocks if they're not in use. */
|
||||
xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
|
||||
return xrep_reap_fsblocks(sc, &rb->old_bmbt_blocks, &oinfo);
|
||||
}
|
||||
|
||||
/* Check for garbage inputs. Returns -ECANCELED if there's nothing to do. */
|
||||
STATIC int
|
||||
xrep_bmap_check_inputs(
|
||||
struct xfs_scrub *sc,
|
||||
int whichfork)
|
||||
{
|
||||
struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork);
|
||||
|
||||
ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
|
||||
|
||||
if (!xfs_has_rmapbt(sc->mp))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* No fork means nothing to rebuild. */
|
||||
if (!ifp)
|
||||
return -ECANCELED;
|
||||
|
||||
/*
|
||||
* We only know how to repair extent mappings, which is to say that we
|
||||
* only support extents and btree fork format. Repairs to a local
|
||||
* format fork require a higher level repair function, so we do not
|
||||
* have any work to do here.
|
||||
*/
|
||||
switch (ifp->if_format) {
|
||||
case XFS_DINODE_FMT_DEV:
|
||||
case XFS_DINODE_FMT_LOCAL:
|
||||
case XFS_DINODE_FMT_UUID:
|
||||
return -ECANCELED;
|
||||
case XFS_DINODE_FMT_EXTENTS:
|
||||
case XFS_DINODE_FMT_BTREE:
|
||||
break;
|
||||
default:
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
if (whichfork == XFS_ATTR_FORK)
|
||||
return 0;
|
||||
|
||||
/* Only files, symlinks, and directories get to have data forks. */
|
||||
switch (VFS_I(sc->ip)->i_mode & S_IFMT) {
|
||||
case S_IFREG:
|
||||
case S_IFDIR:
|
||||
case S_IFLNK:
|
||||
/* ok */
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Don't know how to rebuild realtime data forks. */
|
||||
if (XFS_IS_REALTIME_INODE(sc->ip))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Set up the initial state of the reflink scan. */
|
||||
static inline enum reflink_scan_state
|
||||
xrep_bmap_init_reflink_scan(
|
||||
struct xfs_scrub *sc,
|
||||
int whichfork)
|
||||
{
|
||||
/* cannot share on non-reflink filesystem */
|
||||
if (!xfs_has_reflink(sc->mp))
|
||||
return RLS_IRRELEVANT;
|
||||
|
||||
/* preserve flag if it's already set */
|
||||
if (xfs_is_reflink_inode(sc->ip))
|
||||
return RLS_SET_IFLAG;
|
||||
|
||||
/* can only share regular files */
|
||||
if (!S_ISREG(VFS_I(sc->ip)->i_mode))
|
||||
return RLS_IRRELEVANT;
|
||||
|
||||
/* cannot share attr fork extents */
|
||||
if (whichfork != XFS_DATA_FORK)
|
||||
return RLS_IRRELEVANT;
|
||||
|
||||
/* cannot share realtime extents */
|
||||
if (XFS_IS_REALTIME_INODE(sc->ip))
|
||||
return RLS_IRRELEVANT;
|
||||
|
||||
return RLS_UNKNOWN;
|
||||
}
|
||||
|
||||
/* Repair an inode fork. */
|
||||
int
|
||||
xrep_bmap(
|
||||
struct xfs_scrub *sc,
|
||||
int whichfork,
|
||||
bool allow_unwritten)
|
||||
{
|
||||
struct xrep_bmap *rb;
|
||||
char *descr;
|
||||
unsigned int max_bmbt_recs;
|
||||
bool large_extcount;
|
||||
int error = 0;
|
||||
|
||||
error = xrep_bmap_check_inputs(sc, whichfork);
|
||||
if (error == -ECANCELED)
|
||||
return 0;
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
rb = kzalloc(sizeof(struct xrep_bmap), XCHK_GFP_FLAGS);
|
||||
if (!rb)
|
||||
return -ENOMEM;
|
||||
rb->sc = sc;
|
||||
rb->whichfork = whichfork;
|
||||
rb->reflink_scan = xrep_bmap_init_reflink_scan(sc, whichfork);
|
||||
rb->allow_unwritten = allow_unwritten;
|
||||
|
||||
/* Set up enough storage to handle the max records for this fork. */
|
||||
large_extcount = xfs_has_large_extent_counts(sc->mp);
|
||||
max_bmbt_recs = xfs_iext_max_nextents(large_extcount, whichfork);
|
||||
descr = xchk_xfile_ino_descr(sc, "%s fork mapping records",
|
||||
whichfork == XFS_DATA_FORK ? "data" : "attr");
|
||||
error = xfarray_create(descr, max_bmbt_recs,
|
||||
sizeof(struct xfs_bmbt_rec), &rb->bmap_records);
|
||||
kfree(descr);
|
||||
if (error)
|
||||
goto out_rb;
|
||||
|
||||
/* Collect all reverse mappings for this fork's extents. */
|
||||
xfsb_bitmap_init(&rb->old_bmbt_blocks);
|
||||
error = xrep_bmap_find_mappings(rb);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
|
||||
xfs_trans_ijoin(sc->tp, sc->ip, 0);
|
||||
|
||||
/* Rebuild the bmap information. */
|
||||
error = xrep_bmap_build_new_fork(rb);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
|
||||
/* Kill the old tree. */
|
||||
error = xrep_bmap_remove_old_tree(rb);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
|
||||
out_bitmap:
|
||||
xfsb_bitmap_destroy(&rb->old_bmbt_blocks);
|
||||
xfarray_destroy(rb->bmap_records);
|
||||
out_rb:
|
||||
kfree(rb);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Repair an inode's data fork. */
|
||||
int
|
||||
xrep_bmap_data(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
return xrep_bmap(sc, XFS_DATA_FORK, true);
|
||||
}
|
||||
|
||||
/* Repair an inode's attr fork. */
|
||||
int
|
||||
xrep_bmap_attr(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
return xrep_bmap(sc, XFS_ATTR_FORK, false);
|
||||
}
|
@ -25,6 +25,7 @@
|
||||
#include "xfs_trans_priv.h"
|
||||
#include "xfs_da_format.h"
|
||||
#include "xfs_da_btree.h"
|
||||
#include "xfs_dir2_priv.h"
|
||||
#include "xfs_attr.h"
|
||||
#include "xfs_reflink.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -604,6 +605,7 @@ xchk_ag_free(
|
||||
struct xchk_ag *sa)
|
||||
{
|
||||
xchk_ag_btcur_free(sa);
|
||||
xrep_reset_perag_resv(sc);
|
||||
if (sa->agf_bp) {
|
||||
xfs_trans_brelse(sc->tp, sa->agf_bp);
|
||||
sa->agf_bp = NULL;
|
||||
@ -733,6 +735,8 @@ xchk_iget(
|
||||
xfs_ino_t inum,
|
||||
struct xfs_inode **ipp)
|
||||
{
|
||||
ASSERT(sc->tp != NULL);
|
||||
|
||||
return xfs_iget(sc->mp, sc->tp, inum, XFS_IGET_UNTRUSTED, 0, ipp);
|
||||
}
|
||||
|
||||
@ -816,6 +820,26 @@ xchk_iget_agi(
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_XFS_QUOTA
|
||||
/*
|
||||
* Try to attach dquots to this inode if we think we might want to repair it.
|
||||
* Callers must not hold any ILOCKs. If the dquots are broken and cannot be
|
||||
* attached, a quotacheck will be scheduled.
|
||||
*/
|
||||
int
|
||||
xchk_ino_dqattach(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
ASSERT(sc->tp != NULL);
|
||||
ASSERT(sc->ip != NULL);
|
||||
|
||||
if (!xchk_could_repair(sc))
|
||||
return 0;
|
||||
|
||||
return xrep_ino_dqattach(sc);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Install an inode that we opened by handle for scrubbing. */
|
||||
int
|
||||
xchk_install_handle_inode(
|
||||
@ -882,8 +906,8 @@ xchk_iget_for_scrubbing(
|
||||
if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
|
||||
return -ENOENT;
|
||||
|
||||
/* Try a regular untrusted iget. */
|
||||
error = xchk_iget(sc, sc->sm->sm_ino, &ip);
|
||||
/* Try a safe untrusted iget. */
|
||||
error = xchk_iget_safe(sc, sc->sm->sm_ino, &ip);
|
||||
if (!error)
|
||||
return xchk_install_handle_inode(sc, ip);
|
||||
if (error == -ENOENT)
|
||||
@ -1027,6 +1051,11 @@ xchk_setup_inode_contents(
|
||||
error = xchk_trans_alloc(sc, resblks);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
error = xchk_ino_dqattach(sc);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL);
|
||||
out:
|
||||
/* scrub teardown will unlock and release the inode for us */
|
||||
@ -1132,6 +1161,7 @@ xchk_metadata_inode_subtype(
|
||||
unsigned int scrub_type)
|
||||
{
|
||||
__u32 smtype = sc->sm->sm_type;
|
||||
unsigned int sick_mask = sc->sick_mask;
|
||||
int error;
|
||||
|
||||
sc->sm->sm_type = scrub_type;
|
||||
@ -1149,6 +1179,7 @@ xchk_metadata_inode_subtype(
|
||||
break;
|
||||
}
|
||||
|
||||
sc->sick_mask = sick_mask;
|
||||
sc->sm->sm_type = smtype;
|
||||
return error;
|
||||
}
|
||||
|
@ -103,9 +103,15 @@ xchk_setup_rtsummary(struct xfs_scrub *sc)
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_XFS_QUOTA
|
||||
int xchk_ino_dqattach(struct xfs_scrub *sc);
|
||||
int xchk_setup_quota(struct xfs_scrub *sc);
|
||||
#else
|
||||
static inline int
|
||||
xchk_ino_dqattach(struct xfs_scrub *sc)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int
|
||||
xchk_setup_quota(struct xfs_scrub *sc)
|
||||
{
|
||||
return -ENOENT;
|
||||
@ -151,12 +157,37 @@ void xchk_iunlock(struct xfs_scrub *sc, unsigned int ilock_flags);
|
||||
|
||||
void xchk_buffer_recheck(struct xfs_scrub *sc, struct xfs_buf *bp);
|
||||
|
||||
/*
|
||||
* Grab the inode at @inum. The caller must have created a scrub transaction
|
||||
* so that we can confirm the inumber by walking the inobt and not deadlock on
|
||||
* a loop in the inobt.
|
||||
*/
|
||||
int xchk_iget(struct xfs_scrub *sc, xfs_ino_t inum, struct xfs_inode **ipp);
|
||||
int xchk_iget_agi(struct xfs_scrub *sc, xfs_ino_t inum,
|
||||
struct xfs_buf **agi_bpp, struct xfs_inode **ipp);
|
||||
void xchk_irele(struct xfs_scrub *sc, struct xfs_inode *ip);
|
||||
int xchk_install_handle_inode(struct xfs_scrub *sc, struct xfs_inode *ip);
|
||||
|
||||
/*
|
||||
* Safe version of (untrusted) xchk_iget that uses an empty transaction to
|
||||
* avoid deadlocking on loops in the inobt. This should only be used in a
|
||||
* scrub or repair setup routine, and only prior to grabbing a transaction.
|
||||
*/
|
||||
static inline int
|
||||
xchk_iget_safe(struct xfs_scrub *sc, xfs_ino_t inum, struct xfs_inode **ipp)
|
||||
{
|
||||
int error;
|
||||
|
||||
ASSERT(sc->tp == NULL);
|
||||
|
||||
error = xchk_trans_alloc(sc, 0);
|
||||
if (error)
|
||||
return error;
|
||||
error = xchk_iget(sc, inum, ipp);
|
||||
xchk_trans_cancel(sc);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't bother cross-referencing if we already found corruption or cross
|
||||
* referencing discrepancies.
|
||||
@ -167,6 +198,8 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
|
||||
XFS_SCRUB_OFLAG_XCORRUPT);
|
||||
}
|
||||
|
||||
bool xchk_dir_looks_zapped(struct xfs_inode *dp);
|
||||
|
||||
#ifdef CONFIG_XFS_ONLINE_REPAIR
|
||||
/* Decide if a repair is required. */
|
||||
static inline bool xchk_needs_repair(const struct xfs_scrub_metadata *sm)
|
||||
@ -175,8 +208,21 @@ static inline bool xchk_needs_repair(const struct xfs_scrub_metadata *sm)
|
||||
XFS_SCRUB_OFLAG_XCORRUPT |
|
||||
XFS_SCRUB_OFLAG_PREEN);
|
||||
}
|
||||
|
||||
/*
|
||||
* "Should we prepare for a repair?"
|
||||
*
|
||||
* Return true if the caller permits us to repair metadata and we're not
|
||||
* setting up for a post-repair evaluation.
|
||||
*/
|
||||
static inline bool xchk_could_repair(const struct xfs_scrub *sc)
|
||||
{
|
||||
return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
|
||||
!(sc->flags & XREP_ALREADY_FIXED);
|
||||
}
|
||||
#else
|
||||
# define xchk_needs_repair(sc) (false)
|
||||
# define xchk_could_repair(sc) (false)
|
||||
#endif /* CONFIG_XFS_ONLINE_REPAIR */
|
||||
|
||||
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
|
||||
@ -188,6 +234,16 @@ int xchk_metadata_inode_forks(struct xfs_scrub *sc);
|
||||
#define xchk_xfile_descr(sc, fmt, ...) \
|
||||
kasprintf(XCHK_GFP_FLAGS, "XFS (%s): " fmt, \
|
||||
(sc)->mp->m_super->s_id, ##__VA_ARGS__)
|
||||
#define xchk_xfile_ag_descr(sc, fmt, ...) \
|
||||
kasprintf(XCHK_GFP_FLAGS, "XFS (%s): AG 0x%x " fmt, \
|
||||
(sc)->mp->m_super->s_id, \
|
||||
(sc)->sa.pag ? (sc)->sa.pag->pag_agno : (sc)->sm->sm_agno, \
|
||||
##__VA_ARGS__)
|
||||
#define xchk_xfile_ino_descr(sc, fmt, ...) \
|
||||
kasprintf(XCHK_GFP_FLAGS, "XFS (%s): inode 0x%llx " fmt, \
|
||||
(sc)->mp->m_super->s_id, \
|
||||
(sc)->ip ? (sc)->ip->i_ino : (sc)->sm->sm_ino, \
|
||||
##__VA_ARGS__)
|
||||
|
||||
/*
|
||||
* Setting up a hook to wait for intents to drain is costly -- we have to take
|
||||
|
614
fs/xfs/scrub/cow_repair.c
Normal file
614
fs/xfs/scrub/cow_repair.c
Normal file
@ -0,0 +1,614 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_defer.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_inode_fork.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_rmap.h"
|
||||
#include "xfs_refcount.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_ialloc.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_errortag.h"
|
||||
#include "xfs_icache.h"
|
||||
#include "xfs_refcount_btree.h"
|
||||
#include "scrub/xfs_scrub.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/bitmap.h"
|
||||
#include "scrub/off_bitmap.h"
|
||||
#include "scrub/fsb_bitmap.h"
|
||||
#include "scrub/reap.h"
|
||||
|
||||
/*
|
||||
* CoW Fork Mapping Repair
|
||||
* =======================
|
||||
*
|
||||
* Although CoW staging extents are owned by incore CoW inode forks, on disk
|
||||
* they are owned by the refcount btree. The ondisk metadata does not record
|
||||
* any ownership information, which limits what we can do to repair the
|
||||
* mappings in the CoW fork. At most, we can replace ifork mappings that lack
|
||||
* an entry in the refcount btree or are described by a reverse mapping record
|
||||
* whose owner is not OWN_COW.
|
||||
*
|
||||
* Replacing extents is also tricky -- we can't touch written CoW fork extents
|
||||
* since they are undergoing writeback, and delalloc extents do not require
|
||||
* repair since they only exist incore. Hence the most we can do is find the
|
||||
* bad parts of unwritten mappings, allocate a replacement set of blocks, and
|
||||
* replace the incore mapping. We use the regular reaping process to unmap
|
||||
* or free the discarded blocks, as appropriate.
|
||||
*/
|
||||
struct xrep_cow {
|
||||
struct xfs_scrub *sc;
|
||||
|
||||
/* Bitmap of file offset ranges that need replacing. */
|
||||
struct xoff_bitmap bad_fileoffs;
|
||||
|
||||
/* Bitmap of fsblocks that were removed from the CoW fork. */
|
||||
struct xfsb_bitmap old_cowfork_fsblocks;
|
||||
|
||||
/* CoW fork mappings used to scan for bad CoW staging extents. */
|
||||
struct xfs_bmbt_irec irec;
|
||||
|
||||
/* refcount btree block number of irec.br_startblock */
|
||||
unsigned int irec_startbno;
|
||||
|
||||
/* refcount btree block number of the next refcount record we expect */
|
||||
unsigned int next_bno;
|
||||
};
|
||||
|
||||
/* CoW staging extent. */
|
||||
struct xrep_cow_extent {
|
||||
xfs_fsblock_t fsbno;
|
||||
xfs_extlen_t len;
|
||||
};
|
||||
|
||||
/*
|
||||
* Mark the part of the file range that corresponds to the given physical
|
||||
* space. Caller must ensure that the physical range is within xc->irec.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_cow_mark_file_range(
|
||||
struct xrep_cow *xc,
|
||||
xfs_fsblock_t startblock,
|
||||
xfs_filblks_t blockcount)
|
||||
{
|
||||
xfs_fileoff_t startoff;
|
||||
|
||||
startoff = xc->irec.br_startoff +
|
||||
(startblock - xc->irec.br_startblock);
|
||||
|
||||
trace_xrep_cow_mark_file_range(xc->sc->ip, startblock, startoff,
|
||||
blockcount);
|
||||
|
||||
return xoff_bitmap_set(&xc->bad_fileoffs, startoff, blockcount);
|
||||
}
|
||||
|
||||
/*
|
||||
* Trim @src to fit within the CoW fork mapping being examined, and put the
|
||||
* result in @dst.
|
||||
*/
|
||||
static inline void
|
||||
xrep_cow_trim_refcount(
|
||||
struct xrep_cow *xc,
|
||||
struct xfs_refcount_irec *dst,
|
||||
const struct xfs_refcount_irec *src)
|
||||
{
|
||||
unsigned int adj;
|
||||
|
||||
memcpy(dst, src, sizeof(*dst));
|
||||
|
||||
if (dst->rc_startblock < xc->irec_startbno) {
|
||||
adj = xc->irec_startbno - dst->rc_startblock;
|
||||
dst->rc_blockcount -= adj;
|
||||
dst->rc_startblock += adj;
|
||||
}
|
||||
|
||||
if (dst->rc_startblock + dst->rc_blockcount >
|
||||
xc->irec_startbno + xc->irec.br_blockcount) {
|
||||
adj = (dst->rc_startblock + dst->rc_blockcount) -
|
||||
(xc->irec_startbno + xc->irec.br_blockcount);
|
||||
dst->rc_blockcount -= adj;
|
||||
}
|
||||
}
|
||||
|
||||
/* Mark any shared CoW staging extents. */
|
||||
STATIC int
|
||||
xrep_cow_mark_shared_staging(
|
||||
struct xfs_btree_cur *cur,
|
||||
const struct xfs_refcount_irec *rec,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_cow *xc = priv;
|
||||
struct xfs_refcount_irec rrec;
|
||||
xfs_fsblock_t fsbno;
|
||||
|
||||
if (!xfs_refcount_check_domain(rec) ||
|
||||
rec->rc_domain != XFS_REFC_DOMAIN_SHARED)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
xrep_cow_trim_refcount(xc, &rrec, rec);
|
||||
|
||||
fsbno = XFS_AGB_TO_FSB(xc->sc->mp, cur->bc_ag.pag->pag_agno,
|
||||
rrec.rc_startblock);
|
||||
return xrep_cow_mark_file_range(xc, fsbno, rrec.rc_blockcount);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark any portion of the CoW fork file offset range where there is not a CoW
|
||||
* staging extent record in the refcountbt, and keep a record of where we did
|
||||
* find correct refcountbt records. Staging records are always cleaned out at
|
||||
* mount time, so any two inodes trying to map the same staging area would have
|
||||
* already taken the fs down due to refcount btree verifier errors. Hence this
|
||||
* inode should be the sole creator of the staging extent records ondisk.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_cow_mark_missing_staging(
|
||||
struct xfs_btree_cur *cur,
|
||||
const struct xfs_refcount_irec *rec,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_cow *xc = priv;
|
||||
struct xfs_refcount_irec rrec;
|
||||
int error;
|
||||
|
||||
if (!xfs_refcount_check_domain(rec) ||
|
||||
rec->rc_domain != XFS_REFC_DOMAIN_COW)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
xrep_cow_trim_refcount(xc, &rrec, rec);
|
||||
|
||||
if (xc->next_bno >= rrec.rc_startblock)
|
||||
goto next;
|
||||
|
||||
error = xrep_cow_mark_file_range(xc,
|
||||
XFS_AGB_TO_FSB(xc->sc->mp, cur->bc_ag.pag->pag_agno,
|
||||
xc->next_bno),
|
||||
rrec.rc_startblock - xc->next_bno);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
next:
|
||||
xc->next_bno = rrec.rc_startblock + rrec.rc_blockcount;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark any area that does not correspond to a CoW staging rmap. These are
|
||||
* cross-linked areas that must be avoided.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_cow_mark_missing_staging_rmap(
|
||||
struct xfs_btree_cur *cur,
|
||||
const struct xfs_rmap_irec *rec,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_cow *xc = priv;
|
||||
xfs_fsblock_t fsbno;
|
||||
xfs_agblock_t rec_bno;
|
||||
xfs_extlen_t rec_len;
|
||||
unsigned int adj;
|
||||
|
||||
if (rec->rm_owner == XFS_RMAP_OWN_COW)
|
||||
return 0;
|
||||
|
||||
rec_bno = rec->rm_startblock;
|
||||
rec_len = rec->rm_blockcount;
|
||||
if (rec_bno < xc->irec_startbno) {
|
||||
adj = xc->irec_startbno - rec_bno;
|
||||
rec_len -= adj;
|
||||
rec_bno += adj;
|
||||
}
|
||||
|
||||
if (rec_bno + rec_len > xc->irec_startbno + xc->irec.br_blockcount) {
|
||||
adj = (rec_bno + rec_len) -
|
||||
(xc->irec_startbno + xc->irec.br_blockcount);
|
||||
rec_len -= adj;
|
||||
}
|
||||
|
||||
fsbno = XFS_AGB_TO_FSB(xc->sc->mp, cur->bc_ag.pag->pag_agno, rec_bno);
|
||||
return xrep_cow_mark_file_range(xc, fsbno, rec_len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find any part of the CoW fork mapping that isn't a single-owner CoW staging
|
||||
* extent and mark the corresponding part of the file range in the bitmap.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_cow_find_bad(
|
||||
struct xrep_cow *xc)
|
||||
{
|
||||
struct xfs_refcount_irec rc_low = { 0 };
|
||||
struct xfs_refcount_irec rc_high = { 0 };
|
||||
struct xfs_rmap_irec rm_low = { 0 };
|
||||
struct xfs_rmap_irec rm_high = { 0 };
|
||||
struct xfs_perag *pag;
|
||||
struct xfs_scrub *sc = xc->sc;
|
||||
xfs_agnumber_t agno;
|
||||
int error;
|
||||
|
||||
agno = XFS_FSB_TO_AGNO(sc->mp, xc->irec.br_startblock);
|
||||
xc->irec_startbno = XFS_FSB_TO_AGBNO(sc->mp, xc->irec.br_startblock);
|
||||
|
||||
pag = xfs_perag_get(sc->mp, agno);
|
||||
if (!pag)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
error = xrep_ag_init(sc, pag, &sc->sa);
|
||||
if (error)
|
||||
goto out_pag;
|
||||
|
||||
/* Mark any CoW fork extents that are shared. */
|
||||
rc_low.rc_startblock = xc->irec_startbno;
|
||||
rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
|
||||
rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED;
|
||||
error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
|
||||
xrep_cow_mark_shared_staging, xc);
|
||||
if (error)
|
||||
goto out_sa;
|
||||
|
||||
/* Make sure there are CoW staging extents for the whole mapping. */
|
||||
rc_low.rc_startblock = xc->irec_startbno;
|
||||
rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
|
||||
rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW;
|
||||
xc->next_bno = xc->irec_startbno;
|
||||
error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
|
||||
xrep_cow_mark_missing_staging, xc);
|
||||
if (error)
|
||||
goto out_sa;
|
||||
|
||||
if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
|
||||
error = xrep_cow_mark_file_range(xc,
|
||||
XFS_AGB_TO_FSB(sc->mp, pag->pag_agno,
|
||||
xc->next_bno),
|
||||
xc->irec_startbno + xc->irec.br_blockcount -
|
||||
xc->next_bno);
|
||||
if (error)
|
||||
goto out_sa;
|
||||
}
|
||||
|
||||
/* Mark any area has an rmap that isn't a COW staging extent. */
|
||||
rm_low.rm_startblock = xc->irec_startbno;
|
||||
memset(&rm_high, 0xFF, sizeof(rm_high));
|
||||
rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
|
||||
error = xfs_rmap_query_range(sc->sa.rmap_cur, &rm_low, &rm_high,
|
||||
xrep_cow_mark_missing_staging_rmap, xc);
|
||||
if (error)
|
||||
goto out_sa;
|
||||
|
||||
/*
|
||||
* If userspace is forcing us to rebuild the CoW fork or someone turned
|
||||
* on the debugging knob, replace everything in the CoW fork.
|
||||
*/
|
||||
if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
|
||||
XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
|
||||
error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
|
||||
xc->irec.br_blockcount);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
out_sa:
|
||||
xchk_ag_free(sc, &sc->sa);
|
||||
out_pag:
|
||||
xfs_perag_put(pag);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a replacement CoW staging extent of up to the given number of
|
||||
* blocks, and fill out the mapping.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_cow_alloc(
|
||||
struct xfs_scrub *sc,
|
||||
xfs_extlen_t maxlen,
|
||||
struct xrep_cow_extent *repl)
|
||||
{
|
||||
struct xfs_alloc_arg args = {
|
||||
.tp = sc->tp,
|
||||
.mp = sc->mp,
|
||||
.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE,
|
||||
.minlen = 1,
|
||||
.maxlen = maxlen,
|
||||
.prod = 1,
|
||||
.resv = XFS_AG_RESV_NONE,
|
||||
.datatype = XFS_ALLOC_USERDATA,
|
||||
};
|
||||
int error;
|
||||
|
||||
error = xfs_trans_reserve_more(sc->tp, maxlen, 0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xfs_alloc_vextent_start_ag(&args,
|
||||
XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino));
|
||||
if (error)
|
||||
return error;
|
||||
if (args.fsbno == NULLFSBLOCK)
|
||||
return -ENOSPC;
|
||||
|
||||
xfs_refcount_alloc_cow_extent(sc->tp, args.fsbno, args.len);
|
||||
|
||||
repl->fsbno = args.fsbno;
|
||||
repl->len = args.len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look up the current CoW fork mapping so that we only allocate enough to
|
||||
* replace a single mapping. If we don't find a mapping that covers the start
|
||||
* of the file range, or we find a delalloc or written extent, something is
|
||||
* seriously wrong, since we didn't drop the ILOCK.
|
||||
*/
|
||||
static inline int
|
||||
xrep_cow_find_mapping(
|
||||
struct xrep_cow *xc,
|
||||
struct xfs_iext_cursor *icur,
|
||||
xfs_fileoff_t startoff,
|
||||
struct xfs_bmbt_irec *got)
|
||||
{
|
||||
struct xfs_inode *ip = xc->sc->ip;
|
||||
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
|
||||
|
||||
if (!xfs_iext_lookup_extent(ip, ifp, startoff, icur, got))
|
||||
goto bad;
|
||||
|
||||
if (got->br_startoff > startoff)
|
||||
goto bad;
|
||||
|
||||
if (got->br_blockcount == 0)
|
||||
goto bad;
|
||||
|
||||
if (isnullstartblock(got->br_startblock))
|
||||
goto bad;
|
||||
|
||||
if (xfs_bmap_is_written_extent(got))
|
||||
goto bad;
|
||||
|
||||
return 0;
|
||||
bad:
|
||||
ASSERT(0);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
#define REPLACE_LEFT_SIDE (1U << 0)
|
||||
#define REPLACE_RIGHT_SIDE (1U << 1)
|
||||
|
||||
/*
|
||||
* Given a CoW fork mapping @got and a replacement mapping @repl, remap the
|
||||
* beginning of @got with the space described by @rep.
|
||||
*/
|
||||
static inline void
|
||||
xrep_cow_replace_mapping(
|
||||
struct xfs_inode *ip,
|
||||
struct xfs_iext_cursor *icur,
|
||||
const struct xfs_bmbt_irec *got,
|
||||
const struct xrep_cow_extent *repl)
|
||||
{
|
||||
struct xfs_bmbt_irec new = *got; /* struct copy */
|
||||
|
||||
ASSERT(repl->len > 0);
|
||||
ASSERT(!isnullstartblock(got->br_startblock));
|
||||
|
||||
trace_xrep_cow_replace_mapping(ip, got, repl->fsbno, repl->len);
|
||||
|
||||
if (got->br_blockcount == repl->len) {
|
||||
/*
|
||||
* The new extent is a complete replacement for the existing
|
||||
* extent. Update the COW fork record.
|
||||
*/
|
||||
new.br_startblock = repl->fsbno;
|
||||
xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The new extent can replace the beginning of the COW fork record.
|
||||
* Move the left side of @got upwards, then insert the new record.
|
||||
*/
|
||||
new.br_startoff += repl->len;
|
||||
new.br_startblock += repl->len;
|
||||
new.br_blockcount -= repl->len;
|
||||
xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
|
||||
|
||||
new.br_startoff = got->br_startoff;
|
||||
new.br_startblock = repl->fsbno;
|
||||
new.br_blockcount = repl->len;
|
||||
xfs_iext_insert(ip, icur, &new, BMAP_COWFORK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace the unwritten CoW staging extent backing the given file range with a
|
||||
* new space extent that isn't as problematic.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_cow_replace_range(
|
||||
struct xrep_cow *xc,
|
||||
xfs_fileoff_t startoff,
|
||||
xfs_extlen_t *blockcount)
|
||||
{
|
||||
struct xfs_iext_cursor icur;
|
||||
struct xrep_cow_extent repl;
|
||||
struct xfs_bmbt_irec got;
|
||||
struct xfs_scrub *sc = xc->sc;
|
||||
xfs_fileoff_t nextoff;
|
||||
xfs_extlen_t alloc_len;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Put the existing CoW fork mapping in @got. If @got ends before
|
||||
* @rep, truncate @rep so we only replace one extent mapping at a time.
|
||||
*/
|
||||
error = xrep_cow_find_mapping(xc, &icur, startoff, &got);
|
||||
if (error)
|
||||
return error;
|
||||
nextoff = min(startoff + *blockcount,
|
||||
got.br_startoff + got.br_blockcount);
|
||||
|
||||
/*
|
||||
* Allocate a replacement extent. If we don't fill all the blocks,
|
||||
* shorten the quantity that will be deleted in this step.
|
||||
*/
|
||||
alloc_len = min_t(xfs_fileoff_t, XFS_MAX_BMBT_EXTLEN,
|
||||
nextoff - startoff);
|
||||
error = xrep_cow_alloc(sc, alloc_len, &repl);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Replace the old mapping with the new one, and commit the metadata
|
||||
* changes made so far.
|
||||
*/
|
||||
xrep_cow_replace_mapping(sc->ip, &icur, &got, &repl);
|
||||
|
||||
xfs_inode_set_cowblocks_tag(sc->ip);
|
||||
error = xfs_defer_finish(&sc->tp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Note the old CoW staging extents; we'll reap them all later. */
|
||||
error = xfsb_bitmap_set(&xc->old_cowfork_fsblocks, got.br_startblock,
|
||||
repl.len);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
*blockcount = repl.len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace a bad part of an unwritten CoW staging extent with a fresh delalloc
|
||||
* reservation.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_cow_replace(
|
||||
uint64_t startoff,
|
||||
uint64_t blockcount,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_cow *xc = priv;
|
||||
int error = 0;
|
||||
|
||||
while (blockcount > 0) {
|
||||
xfs_extlen_t len = min_t(xfs_filblks_t, blockcount,
|
||||
XFS_MAX_BMBT_EXTLEN);
|
||||
|
||||
error = xrep_cow_replace_range(xc, startoff, &len);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
blockcount -= len;
|
||||
startoff += len;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Repair an inode's CoW fork. The CoW fork is an in-core structure, so
|
||||
* there's no btree to rebuid. Instead, we replace any mappings that are
|
||||
* cross-linked or lack ondisk CoW fork records in the refcount btree.
|
||||
*/
|
||||
int
|
||||
xrep_bmap_cow(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xrep_cow *xc;
|
||||
struct xfs_iext_cursor icur;
|
||||
struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, XFS_COW_FORK);
|
||||
int error;
|
||||
|
||||
if (!xfs_has_rmapbt(sc->mp) || !xfs_has_reflink(sc->mp))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!ifp)
|
||||
return 0;
|
||||
|
||||
/* realtime files aren't supported yet */
|
||||
if (XFS_IS_REALTIME_INODE(sc->ip))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/*
|
||||
* If we're somehow not in extents format, then reinitialize it to
|
||||
* an empty extent mapping fork and exit.
|
||||
*/
|
||||
if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
|
||||
ifp->if_format = XFS_DINODE_FMT_EXTENTS;
|
||||
ifp->if_nextents = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
xc = kzalloc(sizeof(struct xrep_cow), XCHK_GFP_FLAGS);
|
||||
if (!xc)
|
||||
return -ENOMEM;
|
||||
|
||||
xfs_trans_ijoin(sc->tp, sc->ip, 0);
|
||||
|
||||
xc->sc = sc;
|
||||
xoff_bitmap_init(&xc->bad_fileoffs);
|
||||
xfsb_bitmap_init(&xc->old_cowfork_fsblocks);
|
||||
|
||||
for_each_xfs_iext(ifp, &icur, &xc->irec) {
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
goto out_bitmap;
|
||||
|
||||
/*
|
||||
* delalloc reservations only exist incore, so there is no
|
||||
* ondisk metadata that we can examine. Hence we leave them
|
||||
* alone.
|
||||
*/
|
||||
if (isnullstartblock(xc->irec.br_startblock))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* COW fork extents are only in the written state if writeback
|
||||
* is actively writing to disk. We cannot restart the write
|
||||
* at a different disk address since we've already issued the
|
||||
* IO, so we leave these alone and hope for the best.
|
||||
*/
|
||||
if (xfs_bmap_is_written_extent(&xc->irec))
|
||||
continue;
|
||||
|
||||
error = xrep_cow_find_bad(xc);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
}
|
||||
|
||||
/* Replace any bad unwritten mappings with fresh reservations. */
|
||||
error = xoff_bitmap_walk(&xc->bad_fileoffs, xrep_cow_replace, xc);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
|
||||
/*
|
||||
* Reap as many of the old CoW blocks as we can. They are owned ondisk
|
||||
* by the refcount btree, not the inode, so it is correct to treat them
|
||||
* like inode metadata.
|
||||
*/
|
||||
error = xrep_reap_fsblocks(sc, &xc->old_cowfork_fsblocks,
|
||||
&XFS_RMAP_OINFO_COW);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
|
||||
out_bitmap:
|
||||
xfsb_bitmap_destroy(&xc->old_cowfork_fsblocks);
|
||||
xoff_bitmap_destroy(&xc->bad_fileoffs);
|
||||
kmem_free(xc);
|
||||
return error;
|
||||
}
|
@ -15,10 +15,12 @@
|
||||
#include "xfs_icache.h"
|
||||
#include "xfs_dir2.h"
|
||||
#include "xfs_dir2_priv.h"
|
||||
#include "xfs_health.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/dabtree.h"
|
||||
#include "scrub/readdir.h"
|
||||
#include "scrub/health.h"
|
||||
|
||||
/* Set us up to scrub directories. */
|
||||
int
|
||||
@ -760,6 +762,11 @@ xchk_directory(
|
||||
if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
|
||||
return -ENOENT;
|
||||
|
||||
if (xchk_file_looks_zapped(sc, XFS_SICK_INO_DIR_ZAPPED)) {
|
||||
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Plausible size? */
|
||||
if (sc->ip->i_disk_size < xfs_dir2_sf_hdr_size(0)) {
|
||||
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
|
||||
@ -784,7 +791,36 @@ xchk_directory(
|
||||
|
||||
/* Look up every name in this directory by hash. */
|
||||
error = xchk_dir_walk(sc, sc->ip, xchk_dir_actor, NULL);
|
||||
if (error == -ECANCELED)
|
||||
error = 0;
|
||||
return error;
|
||||
if (error && error != -ECANCELED)
|
||||
return error;
|
||||
|
||||
/* If the dir is clean, it is clearly not zapped. */
|
||||
xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_DIR_ZAPPED);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decide if this directory has been zapped to satisfy the inode and ifork
|
||||
* verifiers. Checking and repairing should be postponed until the directory
|
||||
* is fixed.
|
||||
*/
|
||||
bool
|
||||
xchk_dir_looks_zapped(
|
||||
struct xfs_inode *dp)
|
||||
{
|
||||
/* Repair zapped this dir's data fork a short time ago */
|
||||
if (xfs_ifork_zapped(dp, XFS_DATA_FORK))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* If the dinode repair found a bad data fork, it will reset the fork
|
||||
* to extents format with zero records and wait for the bmapbtd
|
||||
* scrubber to reconstruct the block mappings. Directories always
|
||||
* contain some content, so this is a clear sign of a zapped directory.
|
||||
* The state checked by xfs_ifork_zapped is not persisted, so this is
|
||||
* the secondary strategy if repairs are interrupted by a crash or an
|
||||
* unmount.
|
||||
*/
|
||||
return dp->i_df.if_format == XFS_DINODE_FMT_EXTENTS &&
|
||||
dp->i_df.if_nextents == 0;
|
||||
}
|
||||
|
211
fs/xfs/scrub/dqiterate.c
Normal file
211
fs/xfs/scrub/dqiterate.c
Normal file
@ -0,0 +1,211 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_qm.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/quota.h"
|
||||
#include "scrub/trace.h"
|
||||
|
||||
/* Initialize a dquot iteration cursor. */
|
||||
void
|
||||
xchk_dqiter_init(
|
||||
struct xchk_dqiter *cursor,
|
||||
struct xfs_scrub *sc,
|
||||
xfs_dqtype_t dqtype)
|
||||
{
|
||||
cursor->sc = sc;
|
||||
cursor->bmap.br_startoff = NULLFILEOFF;
|
||||
cursor->dqtype = dqtype & XFS_DQTYPE_REC_MASK;
|
||||
cursor->quota_ip = xfs_quota_inode(sc->mp, cursor->dqtype);
|
||||
cursor->id = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that the cached data fork mapping for the dqiter cursor is fresh and
|
||||
* covers the dquot pointed to by the scan cursor.
|
||||
*/
|
||||
STATIC int
|
||||
xchk_dquot_iter_revalidate_bmap(
|
||||
struct xchk_dqiter *cursor)
|
||||
{
|
||||
struct xfs_quotainfo *qi = cursor->sc->mp->m_quotainfo;
|
||||
struct xfs_ifork *ifp = xfs_ifork_ptr(cursor->quota_ip,
|
||||
XFS_DATA_FORK);
|
||||
xfs_fileoff_t fileoff;
|
||||
xfs_dqid_t this_id = cursor->id;
|
||||
int nmaps = 1;
|
||||
int error;
|
||||
|
||||
fileoff = this_id / qi->qi_dqperchunk;
|
||||
|
||||
/*
|
||||
* If we have a mapping for cursor->id and it's still fresh, there's
|
||||
* no need to reread the bmbt.
|
||||
*/
|
||||
if (cursor->bmap.br_startoff != NULLFILEOFF &&
|
||||
cursor->if_seq == ifp->if_seq &&
|
||||
cursor->bmap.br_startoff + cursor->bmap.br_blockcount > fileoff)
|
||||
return 0;
|
||||
|
||||
/* Look up the data fork mapping for the dquot id of interest. */
|
||||
error = xfs_bmapi_read(cursor->quota_ip, fileoff,
|
||||
XFS_MAX_FILEOFF - fileoff, &cursor->bmap, &nmaps, 0);
|
||||
if (error)
|
||||
return error;
|
||||
if (!nmaps) {
|
||||
ASSERT(nmaps > 0);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
if (cursor->bmap.br_startoff > fileoff) {
|
||||
ASSERT(cursor->bmap.br_startoff == fileoff);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
cursor->if_seq = ifp->if_seq;
|
||||
trace_xchk_dquot_iter_revalidate_bmap(cursor, cursor->id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Advance the dqiter cursor to the next non-sparse region of the quota file. */
|
||||
STATIC int
|
||||
xchk_dquot_iter_advance_bmap(
|
||||
struct xchk_dqiter *cursor,
|
||||
uint64_t *next_ondisk_id)
|
||||
{
|
||||
struct xfs_quotainfo *qi = cursor->sc->mp->m_quotainfo;
|
||||
struct xfs_ifork *ifp = xfs_ifork_ptr(cursor->quota_ip,
|
||||
XFS_DATA_FORK);
|
||||
xfs_fileoff_t fileoff;
|
||||
uint64_t next_id;
|
||||
int nmaps = 1;
|
||||
int error;
|
||||
|
||||
/* Find the dquot id for the next non-hole mapping. */
|
||||
do {
|
||||
fileoff = cursor->bmap.br_startoff + cursor->bmap.br_blockcount;
|
||||
if (fileoff > XFS_DQ_ID_MAX / qi->qi_dqperchunk) {
|
||||
/* The hole goes beyond the max dquot id, we're done */
|
||||
*next_ondisk_id = -1ULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
error = xfs_bmapi_read(cursor->quota_ip, fileoff,
|
||||
XFS_MAX_FILEOFF - fileoff, &cursor->bmap,
|
||||
&nmaps, 0);
|
||||
if (error)
|
||||
return error;
|
||||
if (!nmaps) {
|
||||
/* Must have reached the end of the mappings. */
|
||||
*next_ondisk_id = -1ULL;
|
||||
return 0;
|
||||
}
|
||||
if (cursor->bmap.br_startoff > fileoff) {
|
||||
ASSERT(cursor->bmap.br_startoff == fileoff);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
} while (!xfs_bmap_is_real_extent(&cursor->bmap));
|
||||
|
||||
next_id = cursor->bmap.br_startoff * qi->qi_dqperchunk;
|
||||
if (next_id > XFS_DQ_ID_MAX) {
|
||||
/* The hole goes beyond the max dquot id, we're done */
|
||||
*next_ondisk_id = -1ULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Propose jumping forward to the dquot in the next allocated block. */
|
||||
*next_ondisk_id = next_id;
|
||||
cursor->if_seq = ifp->if_seq;
|
||||
trace_xchk_dquot_iter_advance_bmap(cursor, *next_ondisk_id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the id of the next highest incore dquot. Normally this will correspond
|
||||
* exactly with the quota file block mappings, but repair might have erased a
|
||||
* mapping because it was crosslinked; in that case, we need to re-allocate the
|
||||
* space so that we can reset q_blkno.
|
||||
*/
|
||||
STATIC void
|
||||
xchk_dquot_iter_advance_incore(
|
||||
struct xchk_dqiter *cursor,
|
||||
uint64_t *next_incore_id)
|
||||
{
|
||||
struct xfs_quotainfo *qi = cursor->sc->mp->m_quotainfo;
|
||||
struct radix_tree_root *tree = xfs_dquot_tree(qi, cursor->dqtype);
|
||||
struct xfs_dquot *dq;
|
||||
unsigned int nr_found;
|
||||
|
||||
*next_incore_id = -1ULL;
|
||||
|
||||
mutex_lock(&qi->qi_tree_lock);
|
||||
nr_found = radix_tree_gang_lookup(tree, (void **)&dq, cursor->id, 1);
|
||||
if (nr_found)
|
||||
*next_incore_id = dq->q_id;
|
||||
mutex_unlock(&qi->qi_tree_lock);
|
||||
|
||||
trace_xchk_dquot_iter_advance_incore(cursor, *next_incore_id);
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk all incore dquots of this filesystem. Caller must set *@cursorp to
|
||||
* zero before the first call, and must not hold the quota file ILOCK.
|
||||
* Returns 1 and a valid *@dqpp; 0 and *@dqpp == NULL when there are no more
|
||||
* dquots to iterate; or a negative errno.
|
||||
*/
|
||||
int
|
||||
xchk_dquot_iter(
|
||||
struct xchk_dqiter *cursor,
|
||||
struct xfs_dquot **dqpp)
|
||||
{
|
||||
struct xfs_mount *mp = cursor->sc->mp;
|
||||
struct xfs_dquot *dq = NULL;
|
||||
uint64_t next_ondisk, next_incore = -1ULL;
|
||||
unsigned int lock_mode;
|
||||
int error = 0;
|
||||
|
||||
if (cursor->id > XFS_DQ_ID_MAX)
|
||||
return 0;
|
||||
next_ondisk = cursor->id;
|
||||
|
||||
/* Revalidate and/or advance the cursor. */
|
||||
lock_mode = xfs_ilock_data_map_shared(cursor->quota_ip);
|
||||
error = xchk_dquot_iter_revalidate_bmap(cursor);
|
||||
if (!error && !xfs_bmap_is_real_extent(&cursor->bmap))
|
||||
error = xchk_dquot_iter_advance_bmap(cursor, &next_ondisk);
|
||||
xfs_iunlock(cursor->quota_ip, lock_mode);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (next_ondisk > cursor->id)
|
||||
xchk_dquot_iter_advance_incore(cursor, &next_incore);
|
||||
|
||||
/* Pick the next dquot in the sequence and return it. */
|
||||
cursor->id = min(next_ondisk, next_incore);
|
||||
if (cursor->id > XFS_DQ_ID_MAX)
|
||||
return 0;
|
||||
|
||||
trace_xchk_dquot_iter(cursor, cursor->id);
|
||||
|
||||
error = xfs_qm_dqget(mp, cursor->id, cursor->dqtype, false, &dq);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
cursor->id = dq->q_id + 1;
|
||||
*dqpp = dq;
|
||||
return 1;
|
||||
}
|
37
fs/xfs/scrub/fsb_bitmap.h
Normal file
37
fs/xfs/scrub/fsb_bitmap.h
Normal file
@ -0,0 +1,37 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#ifndef __XFS_SCRUB_FSB_BITMAP_H__
|
||||
#define __XFS_SCRUB_FSB_BITMAP_H__
|
||||
|
||||
/* Bitmaps, but for type-checked for xfs_fsblock_t */
|
||||
|
||||
struct xfsb_bitmap {
|
||||
struct xbitmap64 fsbitmap;
|
||||
};
|
||||
|
||||
static inline void xfsb_bitmap_init(struct xfsb_bitmap *bitmap)
|
||||
{
|
||||
xbitmap64_init(&bitmap->fsbitmap);
|
||||
}
|
||||
|
||||
static inline void xfsb_bitmap_destroy(struct xfsb_bitmap *bitmap)
|
||||
{
|
||||
xbitmap64_destroy(&bitmap->fsbitmap);
|
||||
}
|
||||
|
||||
static inline int xfsb_bitmap_set(struct xfsb_bitmap *bitmap,
|
||||
xfs_fsblock_t start, xfs_filblks_t len)
|
||||
{
|
||||
return xbitmap64_set(&bitmap->fsbitmap, start, len);
|
||||
}
|
||||
|
||||
static inline int xfsb_bitmap_walk(struct xfsb_bitmap *bitmap,
|
||||
xbitmap64_walk_fn fn, void *priv)
|
||||
{
|
||||
return xbitmap64_walk(&bitmap->fsbitmap, fn, priv);
|
||||
}
|
||||
|
||||
#endif /* __XFS_SCRUB_FSB_BITMAP_H__ */
|
@ -10,8 +10,6 @@
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_health.h"
|
||||
#include "scrub/scrub.h"
|
||||
@ -117,6 +115,38 @@ xchk_health_mask_for_scrub_type(
|
||||
return type_to_health_flag[scrub_type].sick_mask;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the scrub state is clean, add @mask to the scrub sick mask to clear
|
||||
* additional sick flags from the metadata object's sick state.
|
||||
*/
|
||||
void
|
||||
xchk_mark_healthy_if_clean(
|
||||
struct xfs_scrub *sc,
|
||||
unsigned int mask)
|
||||
{
|
||||
if (!(sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
|
||||
XFS_SCRUB_OFLAG_XCORRUPT)))
|
||||
sc->sick_mask |= mask;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're scrubbing a piece of file metadata for the first time, does it look
|
||||
* like it has been zapped? Skip the check if we just repaired the metadata
|
||||
* and are revalidating it.
|
||||
*/
|
||||
bool
|
||||
xchk_file_looks_zapped(
|
||||
struct xfs_scrub *sc,
|
||||
unsigned int mask)
|
||||
{
|
||||
ASSERT((mask & ~XFS_SICK_INO_ZAPPED) == 0);
|
||||
|
||||
if (sc->flags & XREP_ALREADY_FIXED)
|
||||
return false;
|
||||
|
||||
return xfs_inode_has_sickness(sc->ip, mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update filesystem health assessments based on what we found and did.
|
||||
*
|
||||
|
@ -10,5 +10,7 @@ unsigned int xchk_health_mask_for_scrub_type(__u32 scrub_type);
|
||||
void xchk_update_health(struct xfs_scrub *sc);
|
||||
bool xchk_ag_btree_healthy_enough(struct xfs_scrub *sc, struct xfs_perag *pag,
|
||||
xfs_btnum_t btnum);
|
||||
void xchk_mark_healthy_if_clean(struct xfs_scrub *sc, unsigned int mask);
|
||||
bool xchk_file_looks_zapped(struct xfs_scrub *sc, unsigned int mask);
|
||||
|
||||
#endif /* __XFS_SCRUB_HEALTH_H__ */
|
||||
|
@ -585,7 +585,7 @@ xchk_iallocbt_rec(
|
||||
uint16_t holemask;
|
||||
|
||||
xfs_inobt_btrec_to_irec(mp, rec, &irec);
|
||||
if (xfs_inobt_check_irec(bs->cur, &irec) != NULL) {
|
||||
if (xfs_inobt_check_irec(bs->cur->bc_ag.pag, &irec) != NULL) {
|
||||
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
|
||||
return 0;
|
||||
}
|
||||
@ -708,11 +708,10 @@ xchk_iallocbt_xref_rmap_inodes(
|
||||
xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
|
||||
}
|
||||
|
||||
/* Scrub the inode btrees for some AG. */
|
||||
STATIC int
|
||||
/* Scrub one of the inode btrees for some AG. */
|
||||
int
|
||||
xchk_iallocbt(
|
||||
struct xfs_scrub *sc,
|
||||
xfs_btnum_t which)
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xfs_btree_cur *cur;
|
||||
struct xchk_iallocbt iabt = {
|
||||
@ -720,9 +719,23 @@ xchk_iallocbt(
|
||||
.next_startino = NULLAGINO,
|
||||
.next_cluster_ino = NULLAGINO,
|
||||
};
|
||||
xfs_btnum_t which;
|
||||
int error;
|
||||
|
||||
cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
|
||||
switch (sc->sm->sm_type) {
|
||||
case XFS_SCRUB_TYPE_INOBT:
|
||||
cur = sc->sa.ino_cur;
|
||||
which = XFS_BTNUM_INO;
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_FINOBT:
|
||||
cur = sc->sa.fino_cur;
|
||||
which = XFS_BTNUM_FINO;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
error = xchk_btree(sc, cur, xchk_iallocbt_rec, &XFS_RMAP_OINFO_INOBT,
|
||||
&iabt);
|
||||
if (error)
|
||||
@ -743,20 +756,6 @@ xchk_iallocbt(
|
||||
return error;
|
||||
}
|
||||
|
||||
int
|
||||
xchk_inobt(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
return xchk_iallocbt(sc, XFS_BTNUM_INO);
|
||||
}
|
||||
|
||||
int
|
||||
xchk_finobt(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
return xchk_iallocbt(sc, XFS_BTNUM_FINO);
|
||||
}
|
||||
|
||||
/* See if an inode btree has (or doesn't have) an inode chunk record. */
|
||||
static inline void
|
||||
xchk_xref_inode_check(
|
||||
|
884
fs/xfs/scrub/ialloc_repair.c
Normal file
884
fs/xfs/scrub/ialloc_repair.c
Normal file
@ -0,0 +1,884 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_defer.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_btree_staging.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_ialloc.h"
|
||||
#include "xfs_ialloc_btree.h"
|
||||
#include "xfs_icache.h"
|
||||
#include "xfs_rmap.h"
|
||||
#include "xfs_rmap_btree.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_trans_priv.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_health.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "scrub/xfs_scrub.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/btree.h"
|
||||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/bitmap.h"
|
||||
#include "scrub/agb_bitmap.h"
|
||||
#include "scrub/xfile.h"
|
||||
#include "scrub/xfarray.h"
|
||||
#include "scrub/newbt.h"
|
||||
#include "scrub/reap.h"
|
||||
|
||||
/*
|
||||
* Inode Btree Repair
|
||||
* ==================
|
||||
*
|
||||
* A quick refresher of inode btrees on a v5 filesystem:
|
||||
*
|
||||
* - Inode records are read into memory in units of 'inode clusters'. However
|
||||
* many inodes fit in a cluster buffer is the smallest number of inodes that
|
||||
* can be allocated or freed. Clusters are never smaller than one fs block
|
||||
* though they can span multiple blocks. The size (in fs blocks) is
|
||||
* computed with xfs_icluster_size_fsb(). The fs block alignment of a
|
||||
* cluster is computed with xfs_ialloc_cluster_alignment().
|
||||
*
|
||||
* - Each inode btree record can describe a single 'inode chunk'. The chunk
|
||||
* size is defined to be 64 inodes. If sparse inodes are enabled, every
|
||||
* inobt record must be aligned to the chunk size; if not, every record must
|
||||
* be aligned to the start of a cluster. It is possible to construct an XFS
|
||||
* geometry where one inobt record maps to multiple inode clusters; it is
|
||||
* also possible to construct a geometry where multiple inobt records map to
|
||||
* different parts of one inode cluster.
|
||||
*
|
||||
* - If sparse inodes are not enabled, the smallest unit of allocation for
|
||||
* inode records is enough to contain one inode chunk's worth of inodes.
|
||||
*
|
||||
* - If sparse inodes are enabled, the holemask field will be active. Each
|
||||
* bit of the holemask represents 4 potential inodes; if set, the
|
||||
* corresponding space does *not* contain inodes and must be left alone.
|
||||
* Clusters cannot be smaller than 4 inodes. The smallest unit of allocation
|
||||
* of inode records is one inode cluster.
|
||||
*
|
||||
* So what's the rebuild algorithm?
|
||||
*
|
||||
* Iterate the reverse mapping records looking for OWN_INODES and OWN_INOBT
|
||||
* records. The OWN_INOBT records are the old inode btree blocks and will be
|
||||
* cleared out after we've rebuilt the tree. Each possible inode cluster
|
||||
* within an OWN_INODES record will be read in; for each possible inobt record
|
||||
* associated with that cluster, compute the freemask calculated from the
|
||||
* i_mode data in the inode chunk. For sparse inodes the holemask will be
|
||||
* calculated by creating the properly aligned inobt record and punching out
|
||||
* any chunk that's missing. Inode allocations and frees grab the AGI first,
|
||||
* so repair protects itself from concurrent access by locking the AGI.
|
||||
*
|
||||
* Once we've reconstructed all the inode records, we can create new inode
|
||||
* btree roots and reload the btrees. We rebuild both inode trees at the same
|
||||
* time because they have the same rmap owner and it would be more complex to
|
||||
* figure out if the other tree isn't in need of a rebuild and which OWN_INOBT
|
||||
* blocks it owns. We have all the data we need to build both, so dump
|
||||
* everything and start over.
|
||||
*
|
||||
* We use the prefix 'xrep_ibt' because we rebuild both inode btrees at once.
|
||||
*/
|
||||
|
||||
struct xrep_ibt {
|
||||
/* Record under construction. */
|
||||
struct xfs_inobt_rec_incore rie;
|
||||
|
||||
/* new inobt information */
|
||||
struct xrep_newbt new_inobt;
|
||||
|
||||
/* new finobt information */
|
||||
struct xrep_newbt new_finobt;
|
||||
|
||||
/* Old inode btree blocks we found in the rmap. */
|
||||
struct xagb_bitmap old_iallocbt_blocks;
|
||||
|
||||
/* Reconstructed inode records. */
|
||||
struct xfarray *inode_records;
|
||||
|
||||
struct xfs_scrub *sc;
|
||||
|
||||
/* Number of inodes assigned disk space. */
|
||||
unsigned int icount;
|
||||
|
||||
/* Number of inodes in use. */
|
||||
unsigned int iused;
|
||||
|
||||
/* Number of finobt records needed. */
|
||||
unsigned int finobt_recs;
|
||||
|
||||
/* get_records()'s position in the inode record array. */
|
||||
xfarray_idx_t array_cur;
|
||||
};
|
||||
|
||||
/*
|
||||
* Is this inode in use? If the inode is in memory we can tell from i_mode,
|
||||
* otherwise we have to check di_mode in the on-disk buffer. We only care
|
||||
* that the high (i.e. non-permission) bits of _mode are zero. This should be
|
||||
* safe because repair keeps all AG headers locked until the end, and process
|
||||
* trying to perform an inode allocation/free must lock the AGI.
|
||||
*
|
||||
* @cluster_ag_base is the inode offset of the cluster within the AG.
|
||||
* @cluster_bp is the cluster buffer.
|
||||
* @cluster_index is the inode offset within the inode cluster.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_ibt_check_ifree(
|
||||
struct xrep_ibt *ri,
|
||||
xfs_agino_t cluster_ag_base,
|
||||
struct xfs_buf *cluster_bp,
|
||||
unsigned int cluster_index,
|
||||
bool *inuse)
|
||||
{
|
||||
struct xfs_scrub *sc = ri->sc;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_dinode *dip;
|
||||
xfs_ino_t fsino;
|
||||
xfs_agino_t agino;
|
||||
xfs_agnumber_t agno = ri->sc->sa.pag->pag_agno;
|
||||
unsigned int cluster_buf_base;
|
||||
unsigned int offset;
|
||||
int error;
|
||||
|
||||
agino = cluster_ag_base + cluster_index;
|
||||
fsino = XFS_AGINO_TO_INO(mp, agno, agino);
|
||||
|
||||
/* Inode uncached or half assembled, read disk buffer */
|
||||
cluster_buf_base = XFS_INO_TO_OFFSET(mp, cluster_ag_base);
|
||||
offset = (cluster_buf_base + cluster_index) * mp->m_sb.sb_inodesize;
|
||||
if (offset >= BBTOB(cluster_bp->b_length))
|
||||
return -EFSCORRUPTED;
|
||||
dip = xfs_buf_offset(cluster_bp, offset);
|
||||
if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
if (dip->di_version >= 3 && be64_to_cpu(dip->di_ino) != fsino)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* Will the in-core inode tell us if it's in use? */
|
||||
error = xchk_inode_is_allocated(sc, agino, inuse);
|
||||
if (!error)
|
||||
return 0;
|
||||
|
||||
*inuse = dip->di_mode != 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Stash the accumulated inobt record for rebuilding. */
|
||||
STATIC int
|
||||
xrep_ibt_stash(
|
||||
struct xrep_ibt *ri)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
if (xchk_should_terminate(ri->sc, &error))
|
||||
return error;
|
||||
|
||||
ri->rie.ir_freecount = xfs_inobt_rec_freecount(&ri->rie);
|
||||
if (xfs_inobt_check_irec(ri->sc->sa.pag, &ri->rie) != NULL)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
if (ri->rie.ir_freecount > 0)
|
||||
ri->finobt_recs++;
|
||||
|
||||
trace_xrep_ibt_found(ri->sc->mp, ri->sc->sa.pag->pag_agno, &ri->rie);
|
||||
|
||||
error = xfarray_append(ri->inode_records, &ri->rie);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
ri->rie.ir_startino = NULLAGINO;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given an extent of inodes and an inode cluster buffer, calculate the
|
||||
* location of the corresponding inobt record (creating it if necessary),
|
||||
* then update the parts of the holemask and freemask of that record that
|
||||
* correspond to the inode extent we were given.
|
||||
*
|
||||
* @cluster_ir_startino is the AG inode number of an inobt record that we're
|
||||
* proposing to create for this inode cluster. If sparse inodes are enabled,
|
||||
* we must round down to a chunk boundary to find the actual sparse record.
|
||||
* @cluster_bp is the buffer of the inode cluster.
|
||||
* @nr_inodes is the number of inodes to check from the cluster.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_ibt_cluster_record(
|
||||
struct xrep_ibt *ri,
|
||||
xfs_agino_t cluster_ir_startino,
|
||||
struct xfs_buf *cluster_bp,
|
||||
unsigned int nr_inodes)
|
||||
{
|
||||
struct xfs_scrub *sc = ri->sc;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
xfs_agino_t ir_startino;
|
||||
unsigned int cluster_base;
|
||||
unsigned int cluster_index;
|
||||
int error = 0;
|
||||
|
||||
ir_startino = cluster_ir_startino;
|
||||
if (xfs_has_sparseinodes(mp))
|
||||
ir_startino = rounddown(ir_startino, XFS_INODES_PER_CHUNK);
|
||||
cluster_base = cluster_ir_startino - ir_startino;
|
||||
|
||||
/*
|
||||
* If the accumulated inobt record doesn't map this cluster, add it to
|
||||
* the list and reset it.
|
||||
*/
|
||||
if (ri->rie.ir_startino != NULLAGINO &&
|
||||
ri->rie.ir_startino + XFS_INODES_PER_CHUNK <= ir_startino) {
|
||||
error = xrep_ibt_stash(ri);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
if (ri->rie.ir_startino == NULLAGINO) {
|
||||
ri->rie.ir_startino = ir_startino;
|
||||
ri->rie.ir_free = XFS_INOBT_ALL_FREE;
|
||||
ri->rie.ir_holemask = 0xFFFF;
|
||||
ri->rie.ir_count = 0;
|
||||
}
|
||||
|
||||
/* Record the whole cluster. */
|
||||
ri->icount += nr_inodes;
|
||||
ri->rie.ir_count += nr_inodes;
|
||||
ri->rie.ir_holemask &= ~xfs_inobt_maskn(
|
||||
cluster_base / XFS_INODES_PER_HOLEMASK_BIT,
|
||||
nr_inodes / XFS_INODES_PER_HOLEMASK_BIT);
|
||||
|
||||
/* Which inodes within this cluster are free? */
|
||||
for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) {
|
||||
bool inuse = false;
|
||||
|
||||
error = xrep_ibt_check_ifree(ri, cluster_ir_startino,
|
||||
cluster_bp, cluster_index, &inuse);
|
||||
if (error)
|
||||
return error;
|
||||
if (!inuse)
|
||||
continue;
|
||||
ri->iused++;
|
||||
ri->rie.ir_free &= ~XFS_INOBT_MASK(cluster_base +
|
||||
cluster_index);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* For each inode cluster covering the physical extent recorded by the rmapbt,
|
||||
* we must calculate the properly aligned startino of that cluster, then
|
||||
* iterate each cluster to fill in used and filled masks appropriately. We
|
||||
* then use the (startino, used, filled) information to construct the
|
||||
* appropriate inode records.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_ibt_process_cluster(
|
||||
struct xrep_ibt *ri,
|
||||
xfs_agblock_t cluster_bno)
|
||||
{
|
||||
struct xfs_imap imap;
|
||||
struct xfs_buf *cluster_bp;
|
||||
struct xfs_scrub *sc = ri->sc;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_ino_geometry *igeo = M_IGEO(mp);
|
||||
xfs_agino_t cluster_ag_base;
|
||||
xfs_agino_t irec_index;
|
||||
unsigned int nr_inodes;
|
||||
int error;
|
||||
|
||||
nr_inodes = min_t(unsigned int, igeo->inodes_per_cluster,
|
||||
XFS_INODES_PER_CHUNK);
|
||||
|
||||
/*
|
||||
* Grab the inode cluster buffer. This is safe to do with a broken
|
||||
* inobt because imap_to_bp directly maps the buffer without touching
|
||||
* either inode btree.
|
||||
*/
|
||||
imap.im_blkno = XFS_AGB_TO_DADDR(mp, sc->sa.pag->pag_agno, cluster_bno);
|
||||
imap.im_len = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
|
||||
imap.im_boffset = 0;
|
||||
error = xfs_imap_to_bp(mp, sc->tp, &imap, &cluster_bp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Record the contents of each possible inobt record mapping this
|
||||
* cluster.
|
||||
*/
|
||||
cluster_ag_base = XFS_AGB_TO_AGINO(mp, cluster_bno);
|
||||
for (irec_index = 0;
|
||||
irec_index < igeo->inodes_per_cluster;
|
||||
irec_index += XFS_INODES_PER_CHUNK) {
|
||||
error = xrep_ibt_cluster_record(ri,
|
||||
cluster_ag_base + irec_index, cluster_bp,
|
||||
nr_inodes);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
xfs_trans_brelse(sc->tp, cluster_bp);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Check for any obvious conflicts in the inode chunk extent. */
|
||||
STATIC int
|
||||
xrep_ibt_check_inode_ext(
|
||||
struct xfs_scrub *sc,
|
||||
xfs_agblock_t agbno,
|
||||
xfs_extlen_t len)
|
||||
{
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_ino_geometry *igeo = M_IGEO(mp);
|
||||
xfs_agino_t agino;
|
||||
enum xbtree_recpacking outcome;
|
||||
int error;
|
||||
|
||||
/* Inode records must be within the AG. */
|
||||
if (!xfs_verify_agbext(sc->sa.pag, agbno, len))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* The entire record must align to the inode cluster size. */
|
||||
if (!IS_ALIGNED(agbno, igeo->blocks_per_cluster) ||
|
||||
!IS_ALIGNED(agbno + len, igeo->blocks_per_cluster))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/*
|
||||
* The entire record must also adhere to the inode cluster alignment
|
||||
* size if sparse inodes are not enabled.
|
||||
*/
|
||||
if (!xfs_has_sparseinodes(mp) &&
|
||||
(!IS_ALIGNED(agbno, igeo->cluster_align) ||
|
||||
!IS_ALIGNED(agbno + len, igeo->cluster_align)))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/*
|
||||
* On a sparse inode fs, this cluster could be part of a sparse chunk.
|
||||
* Sparse clusters must be aligned to sparse chunk alignment.
|
||||
*/
|
||||
if (xfs_has_sparseinodes(mp) &&
|
||||
(!IS_ALIGNED(agbno, mp->m_sb.sb_spino_align) ||
|
||||
!IS_ALIGNED(agbno + len, mp->m_sb.sb_spino_align)))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* Make sure the entire range of blocks are valid AG inodes. */
|
||||
agino = XFS_AGB_TO_AGINO(mp, agbno);
|
||||
if (!xfs_verify_agino(sc->sa.pag, agino))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
agino = XFS_AGB_TO_AGINO(mp, agbno + len) - 1;
|
||||
if (!xfs_verify_agino(sc->sa.pag, agino))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* Make sure this isn't free space. */
|
||||
error = xfs_alloc_has_records(sc->sa.bno_cur, agbno, len, &outcome);
|
||||
if (error)
|
||||
return error;
|
||||
if (outcome != XBTREE_RECPACKING_EMPTY)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Found a fragment of the old inode btrees; dispose of them later. */
|
||||
STATIC int
|
||||
xrep_ibt_record_old_btree_blocks(
|
||||
struct xrep_ibt *ri,
|
||||
const struct xfs_rmap_irec *rec)
|
||||
{
|
||||
if (!xfs_verify_agbext(ri->sc->sa.pag, rec->rm_startblock,
|
||||
rec->rm_blockcount))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
return xagb_bitmap_set(&ri->old_iallocbt_blocks, rec->rm_startblock,
|
||||
rec->rm_blockcount);
|
||||
}
|
||||
|
||||
/* Record extents that belong to inode cluster blocks. */
|
||||
STATIC int
|
||||
xrep_ibt_record_inode_blocks(
|
||||
struct xrep_ibt *ri,
|
||||
const struct xfs_rmap_irec *rec)
|
||||
{
|
||||
struct xfs_mount *mp = ri->sc->mp;
|
||||
struct xfs_ino_geometry *igeo = M_IGEO(mp);
|
||||
xfs_agblock_t cluster_base;
|
||||
int error;
|
||||
|
||||
error = xrep_ibt_check_inode_ext(ri->sc, rec->rm_startblock,
|
||||
rec->rm_blockcount);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
trace_xrep_ibt_walk_rmap(mp, ri->sc->sa.pag->pag_agno,
|
||||
rec->rm_startblock, rec->rm_blockcount, rec->rm_owner,
|
||||
rec->rm_offset, rec->rm_flags);
|
||||
|
||||
/*
|
||||
* Record the free/hole masks for each inode cluster that could be
|
||||
* mapped by this rmap record.
|
||||
*/
|
||||
for (cluster_base = 0;
|
||||
cluster_base < rec->rm_blockcount;
|
||||
cluster_base += igeo->blocks_per_cluster) {
|
||||
error = xrep_ibt_process_cluster(ri,
|
||||
rec->rm_startblock + cluster_base);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xrep_ibt_walk_rmap(
|
||||
struct xfs_btree_cur *cur,
|
||||
const struct xfs_rmap_irec *rec,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_ibt *ri = priv;
|
||||
int error = 0;
|
||||
|
||||
if (xchk_should_terminate(ri->sc, &error))
|
||||
return error;
|
||||
|
||||
switch (rec->rm_owner) {
|
||||
case XFS_RMAP_OWN_INOBT:
|
||||
return xrep_ibt_record_old_btree_blocks(ri, rec);
|
||||
case XFS_RMAP_OWN_INODES:
|
||||
return xrep_ibt_record_inode_blocks(ri, rec);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate all reverse mappings to find the inodes (OWN_INODES) and the inode
|
||||
* btrees (OWN_INOBT). Figure out if we have enough free space to reconstruct
|
||||
* the inode btrees. The caller must clean up the lists if anything goes
|
||||
* wrong.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_ibt_find_inodes(
|
||||
struct xrep_ibt *ri)
|
||||
{
|
||||
struct xfs_scrub *sc = ri->sc;
|
||||
int error;
|
||||
|
||||
ri->rie.ir_startino = NULLAGINO;
|
||||
|
||||
/* Collect all reverse mappings for inode blocks. */
|
||||
xrep_ag_btcur_init(sc, &sc->sa);
|
||||
error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_ibt_walk_rmap, ri);
|
||||
xchk_ag_btcur_free(&sc->sa);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* If we have a record ready to go, add it to the array. */
|
||||
if (ri->rie.ir_startino != NULLAGINO)
|
||||
return xrep_ibt_stash(ri);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Update the AGI counters. */
|
||||
STATIC int
|
||||
xrep_ibt_reset_counters(
|
||||
struct xrep_ibt *ri)
|
||||
{
|
||||
struct xfs_scrub *sc = ri->sc;
|
||||
struct xfs_agi *agi = sc->sa.agi_bp->b_addr;
|
||||
unsigned int freecount = ri->icount - ri->iused;
|
||||
|
||||
/* Trigger inode count recalculation */
|
||||
xfs_force_summary_recalc(sc->mp);
|
||||
|
||||
/*
|
||||
* The AGI header contains extra information related to the inode
|
||||
* btrees, so we must update those fields here.
|
||||
*/
|
||||
agi->agi_count = cpu_to_be32(ri->icount);
|
||||
agi->agi_freecount = cpu_to_be32(freecount);
|
||||
xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp,
|
||||
XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
|
||||
|
||||
/* Reinitialize with the values we just logged. */
|
||||
return xrep_reinit_pagi(sc);
|
||||
}
|
||||
|
||||
/* Retrieve finobt data for bulk load. */
|
||||
STATIC int
|
||||
xrep_fibt_get_records(
|
||||
struct xfs_btree_cur *cur,
|
||||
unsigned int idx,
|
||||
struct xfs_btree_block *block,
|
||||
unsigned int nr_wanted,
|
||||
void *priv)
|
||||
{
|
||||
struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i;
|
||||
struct xrep_ibt *ri = priv;
|
||||
union xfs_btree_rec *block_rec;
|
||||
unsigned int loaded;
|
||||
int error;
|
||||
|
||||
for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
|
||||
do {
|
||||
error = xfarray_load(ri->inode_records,
|
||||
ri->array_cur++, irec);
|
||||
} while (error == 0 && xfs_inobt_rec_freecount(irec) == 0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
block_rec = xfs_btree_rec_addr(cur, idx, block);
|
||||
cur->bc_ops->init_rec_from_cur(cur, block_rec);
|
||||
}
|
||||
|
||||
return loaded;
|
||||
}
|
||||
|
||||
/* Retrieve inobt data for bulk load. */
|
||||
STATIC int
|
||||
xrep_ibt_get_records(
|
||||
struct xfs_btree_cur *cur,
|
||||
unsigned int idx,
|
||||
struct xfs_btree_block *block,
|
||||
unsigned int nr_wanted,
|
||||
void *priv)
|
||||
{
|
||||
struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i;
|
||||
struct xrep_ibt *ri = priv;
|
||||
union xfs_btree_rec *block_rec;
|
||||
unsigned int loaded;
|
||||
int error;
|
||||
|
||||
for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
|
||||
error = xfarray_load(ri->inode_records, ri->array_cur++, irec);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
block_rec = xfs_btree_rec_addr(cur, idx, block);
|
||||
cur->bc_ops->init_rec_from_cur(cur, block_rec);
|
||||
}
|
||||
|
||||
return loaded;
|
||||
}
|
||||
|
||||
/* Feed one of the new inobt blocks to the bulk loader. */
|
||||
STATIC int
|
||||
xrep_ibt_claim_block(
|
||||
struct xfs_btree_cur *cur,
|
||||
union xfs_btree_ptr *ptr,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_ibt *ri = priv;
|
||||
|
||||
return xrep_newbt_claim_block(cur, &ri->new_inobt, ptr);
|
||||
}
|
||||
|
||||
/* Feed one of the new finobt blocks to the bulk loader. */
|
||||
STATIC int
|
||||
xrep_fibt_claim_block(
|
||||
struct xfs_btree_cur *cur,
|
||||
union xfs_btree_ptr *ptr,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_ibt *ri = priv;
|
||||
|
||||
return xrep_newbt_claim_block(cur, &ri->new_finobt, ptr);
|
||||
}
|
||||
|
||||
/* Make sure the records do not overlap in inumber address space. */
|
||||
STATIC int
|
||||
xrep_ibt_check_overlap(
|
||||
struct xrep_ibt *ri)
|
||||
{
|
||||
struct xfs_inobt_rec_incore irec;
|
||||
xfarray_idx_t cur;
|
||||
xfs_agino_t next_agino = 0;
|
||||
int error = 0;
|
||||
|
||||
foreach_xfarray_idx(ri->inode_records, cur) {
|
||||
if (xchk_should_terminate(ri->sc, &error))
|
||||
return error;
|
||||
|
||||
error = xfarray_load(ri->inode_records, cur, &irec);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (irec.ir_startino < next_agino)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
next_agino = irec.ir_startino + XFS_INODES_PER_CHUNK;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Build new inode btrees and dispose of the old one. */
|
||||
STATIC int
|
||||
xrep_ibt_build_new_trees(
|
||||
struct xrep_ibt *ri)
|
||||
{
|
||||
struct xfs_scrub *sc = ri->sc;
|
||||
struct xfs_btree_cur *ino_cur;
|
||||
struct xfs_btree_cur *fino_cur = NULL;
|
||||
xfs_fsblock_t fsbno;
|
||||
bool need_finobt;
|
||||
int error;
|
||||
|
||||
need_finobt = xfs_has_finobt(sc->mp);
|
||||
|
||||
/*
|
||||
* Create new btrees for staging all the inobt records we collected
|
||||
* earlier. The records were collected in order of increasing agino,
|
||||
* so we do not have to sort them. Ensure there are no overlapping
|
||||
* records.
|
||||
*/
|
||||
error = xrep_ibt_check_overlap(ri);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* The new inode btrees will not be rooted in the AGI until we've
|
||||
* successfully rebuilt the tree.
|
||||
*
|
||||
* Start by setting up the inobt staging cursor.
|
||||
*/
|
||||
fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
|
||||
XFS_IBT_BLOCK(sc->mp)),
|
||||
xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT, fsbno,
|
||||
XFS_AG_RESV_NONE);
|
||||
ri->new_inobt.bload.claim_block = xrep_ibt_claim_block;
|
||||
ri->new_inobt.bload.get_records = xrep_ibt_get_records;
|
||||
|
||||
ino_cur = xfs_inobt_stage_cursor(sc->sa.pag, &ri->new_inobt.afake,
|
||||
XFS_BTNUM_INO);
|
||||
error = xfs_btree_bload_compute_geometry(ino_cur, &ri->new_inobt.bload,
|
||||
xfarray_length(ri->inode_records));
|
||||
if (error)
|
||||
goto err_inocur;
|
||||
|
||||
/* Set up finobt staging cursor. */
|
||||
if (need_finobt) {
|
||||
enum xfs_ag_resv_type resv = XFS_AG_RESV_METADATA;
|
||||
|
||||
if (sc->mp->m_finobt_nores)
|
||||
resv = XFS_AG_RESV_NONE;
|
||||
|
||||
fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
|
||||
XFS_FIBT_BLOCK(sc->mp)),
|
||||
xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT,
|
||||
fsbno, resv);
|
||||
ri->new_finobt.bload.claim_block = xrep_fibt_claim_block;
|
||||
ri->new_finobt.bload.get_records = xrep_fibt_get_records;
|
||||
|
||||
fino_cur = xfs_inobt_stage_cursor(sc->sa.pag,
|
||||
&ri->new_finobt.afake, XFS_BTNUM_FINO);
|
||||
error = xfs_btree_bload_compute_geometry(fino_cur,
|
||||
&ri->new_finobt.bload, ri->finobt_recs);
|
||||
if (error)
|
||||
goto err_finocur;
|
||||
}
|
||||
|
||||
/* Last chance to abort before we start committing fixes. */
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
goto err_finocur;
|
||||
|
||||
/* Reserve all the space we need to build the new btrees. */
|
||||
error = xrep_newbt_alloc_blocks(&ri->new_inobt,
|
||||
ri->new_inobt.bload.nr_blocks);
|
||||
if (error)
|
||||
goto err_finocur;
|
||||
|
||||
if (need_finobt) {
|
||||
error = xrep_newbt_alloc_blocks(&ri->new_finobt,
|
||||
ri->new_finobt.bload.nr_blocks);
|
||||
if (error)
|
||||
goto err_finocur;
|
||||
}
|
||||
|
||||
/* Add all inobt records. */
|
||||
ri->array_cur = XFARRAY_CURSOR_INIT;
|
||||
error = xfs_btree_bload(ino_cur, &ri->new_inobt.bload, ri);
|
||||
if (error)
|
||||
goto err_finocur;
|
||||
|
||||
/* Add all finobt records. */
|
||||
if (need_finobt) {
|
||||
ri->array_cur = XFARRAY_CURSOR_INIT;
|
||||
error = xfs_btree_bload(fino_cur, &ri->new_finobt.bload, ri);
|
||||
if (error)
|
||||
goto err_finocur;
|
||||
}
|
||||
|
||||
/*
|
||||
* Install the new btrees in the AG header. After this point the old
|
||||
* btrees are no longer accessible and the new trees are live.
|
||||
*/
|
||||
xfs_inobt_commit_staged_btree(ino_cur, sc->tp, sc->sa.agi_bp);
|
||||
xfs_btree_del_cursor(ino_cur, 0);
|
||||
|
||||
if (fino_cur) {
|
||||
xfs_inobt_commit_staged_btree(fino_cur, sc->tp, sc->sa.agi_bp);
|
||||
xfs_btree_del_cursor(fino_cur, 0);
|
||||
}
|
||||
|
||||
/* Reset the AGI counters now that we've changed the inode roots. */
|
||||
error = xrep_ibt_reset_counters(ri);
|
||||
if (error)
|
||||
goto err_finobt;
|
||||
|
||||
/* Free unused blocks and bitmap. */
|
||||
if (need_finobt) {
|
||||
error = xrep_newbt_commit(&ri->new_finobt);
|
||||
if (error)
|
||||
goto err_inobt;
|
||||
}
|
||||
error = xrep_newbt_commit(&ri->new_inobt);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
return xrep_roll_ag_trans(sc);
|
||||
|
||||
err_finocur:
|
||||
if (need_finobt)
|
||||
xfs_btree_del_cursor(fino_cur, error);
|
||||
err_inocur:
|
||||
xfs_btree_del_cursor(ino_cur, error);
|
||||
err_finobt:
|
||||
if (need_finobt)
|
||||
xrep_newbt_cancel(&ri->new_finobt);
|
||||
err_inobt:
|
||||
xrep_newbt_cancel(&ri->new_inobt);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that we've logged the roots of the new btrees, invalidate all of the
|
||||
* old blocks and free them.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_ibt_remove_old_trees(
|
||||
struct xrep_ibt *ri)
|
||||
{
|
||||
struct xfs_scrub *sc = ri->sc;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Free the old inode btree blocks if they're not in use. It's ok to
|
||||
* reap with XFS_AG_RESV_NONE even if the finobt had a per-AG
|
||||
* reservation because we reset the reservation before releasing the
|
||||
* AGI and AGF header buffer locks.
|
||||
*/
|
||||
error = xrep_reap_agblocks(sc, &ri->old_iallocbt_blocks,
|
||||
&XFS_RMAP_OINFO_INOBT, XFS_AG_RESV_NONE);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* If the finobt is enabled and has a per-AG reservation, make sure we
|
||||
* reinitialize the per-AG reservations.
|
||||
*/
|
||||
if (xfs_has_finobt(sc->mp) && !sc->mp->m_finobt_nores)
|
||||
sc->flags |= XREP_RESET_PERAG_RESV;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Repair both inode btrees. */
|
||||
int
|
||||
xrep_iallocbt(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xrep_ibt *ri;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
char *descr;
|
||||
xfs_agino_t first_agino, last_agino;
|
||||
int error = 0;
|
||||
|
||||
/* We require the rmapbt to rebuild anything. */
|
||||
if (!xfs_has_rmapbt(mp))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ri = kzalloc(sizeof(struct xrep_ibt), XCHK_GFP_FLAGS);
|
||||
if (!ri)
|
||||
return -ENOMEM;
|
||||
ri->sc = sc;
|
||||
|
||||
/* We rebuild both inode btrees. */
|
||||
sc->sick_mask = XFS_SICK_AG_INOBT | XFS_SICK_AG_FINOBT;
|
||||
|
||||
/* Set up enough storage to handle an AG with nothing but inodes. */
|
||||
xfs_agino_range(mp, sc->sa.pag->pag_agno, &first_agino, &last_agino);
|
||||
last_agino /= XFS_INODES_PER_CHUNK;
|
||||
descr = xchk_xfile_ag_descr(sc, "inode index records");
|
||||
error = xfarray_create(descr, last_agino,
|
||||
sizeof(struct xfs_inobt_rec_incore),
|
||||
&ri->inode_records);
|
||||
kfree(descr);
|
||||
if (error)
|
||||
goto out_ri;
|
||||
|
||||
/* Collect the inode data and find the old btree blocks. */
|
||||
xagb_bitmap_init(&ri->old_iallocbt_blocks);
|
||||
error = xrep_ibt_find_inodes(ri);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
|
||||
/* Rebuild the inode indexes. */
|
||||
error = xrep_ibt_build_new_trees(ri);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
|
||||
/* Kill the old tree. */
|
||||
error = xrep_ibt_remove_old_trees(ri);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
|
||||
out_bitmap:
|
||||
xagb_bitmap_destroy(&ri->old_iallocbt_blocks);
|
||||
xfarray_destroy(ri->inode_records);
|
||||
out_ri:
|
||||
kfree(ri);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Make sure both btrees are ok after we've rebuilt them. */
|
||||
int
|
||||
xrep_revalidate_iallocbt(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
__u32 old_type = sc->sm->sm_type;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* We must update sm_type temporarily so that the tree-to-tree cross
|
||||
* reference checks will work in the correct direction, and also so
|
||||
* that tracing will report correctly if there are more errors.
|
||||
*/
|
||||
sc->sm->sm_type = XFS_SCRUB_TYPE_INOBT;
|
||||
error = xchk_iallocbt(sc);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
if (xfs_has_finobt(sc->mp)) {
|
||||
sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT;
|
||||
error = xchk_iallocbt(sc);
|
||||
}
|
||||
|
||||
out:
|
||||
sc->sm->sm_type = old_type;
|
||||
return error;
|
||||
}
|
@ -25,6 +25,7 @@
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/btree.h"
|
||||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
|
||||
/* Prepare the attached inode for scrubbing. */
|
||||
static inline int
|
||||
@ -39,6 +40,10 @@ xchk_prepare_iscrub(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xchk_ino_dqattach(sc);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL);
|
||||
return 0;
|
||||
}
|
||||
@ -95,8 +100,8 @@ xchk_setup_inode(
|
||||
if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
|
||||
return -ENOENT;
|
||||
|
||||
/* Try a regular untrusted iget. */
|
||||
error = xchk_iget(sc, sc->sm->sm_ino, &ip);
|
||||
/* Try a safe untrusted iget. */
|
||||
error = xchk_iget_safe(sc, sc->sm->sm_ino, &ip);
|
||||
if (!error)
|
||||
return xchk_install_handle_iscrub(sc, ip);
|
||||
if (error == -ENOENT)
|
||||
@ -181,8 +186,11 @@ xchk_setup_inode(
|
||||
* saying the inode is allocated and the icache being unable to load
|
||||
* the inode until we can flag the corruption in xchk_inode. The
|
||||
* scrub function has to note the corruption, since we're not really
|
||||
* supposed to do that from the setup function.
|
||||
* supposed to do that from the setup function. Save the mapping to
|
||||
* make repairs to the ondisk inode buffer.
|
||||
*/
|
||||
if (xchk_could_repair(sc))
|
||||
xrep_setup_inode(sc, &imap);
|
||||
return 0;
|
||||
|
||||
out_cancel:
|
||||
@ -338,6 +346,10 @@ xchk_inode_flags2(
|
||||
if (xfs_dinode_has_bigtime(dip) && !xfs_has_bigtime(mp))
|
||||
goto bad;
|
||||
|
||||
/* no large extent counts without the filesystem feature */
|
||||
if ((flags2 & XFS_DIFLAG2_NREXT64) && !xfs_has_large_extent_counts(mp))
|
||||
goto bad;
|
||||
|
||||
return;
|
||||
bad:
|
||||
xchk_ino_set_corrupt(sc, ino);
|
||||
@ -548,7 +560,7 @@ xchk_dinode(
|
||||
}
|
||||
|
||||
/* di_forkoff */
|
||||
if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize)
|
||||
if (XFS_DFORK_BOFF(dip) >= mp->m_sb.sb_inodesize)
|
||||
xchk_ino_set_corrupt(sc, ino);
|
||||
if (naextents != 0 && dip->di_forkoff == 0)
|
||||
xchk_ino_set_corrupt(sc, ino);
|
||||
|
1525
fs/xfs/scrub/inode_repair.c
Normal file
1525
fs/xfs/scrub/inode_repair.c
Normal file
File diff suppressed because it is too large
Load Diff
559
fs/xfs/scrub/newbt.c
Normal file
559
fs/xfs/scrub/newbt.c
Normal file
@ -0,0 +1,559 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_btree_staging.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_rmap.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_defer.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/newbt.h"
|
||||
|
||||
/*
|
||||
* Estimate proper slack values for a btree that's being reloaded.
|
||||
*
|
||||
* Under most circumstances, we'll take whatever default loading value the
|
||||
* btree bulk loading code calculates for us. However, there are some
|
||||
* exceptions to this rule:
|
||||
*
|
||||
* (0) If someone turned one of the debug knobs.
|
||||
* (1) If this is a per-AG btree and the AG has less than 10% space free.
|
||||
* (2) If this is an inode btree and the FS has less than 10% space free.
|
||||
|
||||
* In either case, format the new btree blocks almost completely full to
|
||||
* minimize space usage.
|
||||
*/
|
||||
static void
|
||||
xrep_newbt_estimate_slack(
|
||||
struct xrep_newbt *xnr)
|
||||
{
|
||||
struct xfs_scrub *sc = xnr->sc;
|
||||
struct xfs_btree_bload *bload = &xnr->bload;
|
||||
uint64_t free;
|
||||
uint64_t sz;
|
||||
|
||||
/*
|
||||
* The xfs_globals values are set to -1 (i.e. take the bload defaults)
|
||||
* unless someone has set them otherwise, so we just pull the values
|
||||
* here.
|
||||
*/
|
||||
bload->leaf_slack = xfs_globals.bload_leaf_slack;
|
||||
bload->node_slack = xfs_globals.bload_node_slack;
|
||||
|
||||
if (sc->ops->type == ST_PERAG) {
|
||||
free = sc->sa.pag->pagf_freeblks;
|
||||
sz = xfs_ag_block_count(sc->mp, sc->sa.pag->pag_agno);
|
||||
} else {
|
||||
free = percpu_counter_sum(&sc->mp->m_fdblocks);
|
||||
sz = sc->mp->m_sb.sb_dblocks;
|
||||
}
|
||||
|
||||
/* No further changes if there's more than 10% free space left. */
|
||||
if (free >= div_u64(sz, 10))
|
||||
return;
|
||||
|
||||
/*
|
||||
* We're low on space; load the btrees as tightly as possible. Leave
|
||||
* a couple of open slots in each btree block so that we don't end up
|
||||
* splitting the btrees like crazy after a mount.
|
||||
*/
|
||||
if (bload->leaf_slack < 0)
|
||||
bload->leaf_slack = 2;
|
||||
if (bload->node_slack < 0)
|
||||
bload->node_slack = 2;
|
||||
}
|
||||
|
||||
/* Initialize accounting resources for staging a new AG btree. */
|
||||
void
|
||||
xrep_newbt_init_ag(
|
||||
struct xrep_newbt *xnr,
|
||||
struct xfs_scrub *sc,
|
||||
const struct xfs_owner_info *oinfo,
|
||||
xfs_fsblock_t alloc_hint,
|
||||
enum xfs_ag_resv_type resv)
|
||||
{
|
||||
memset(xnr, 0, sizeof(struct xrep_newbt));
|
||||
xnr->sc = sc;
|
||||
xnr->oinfo = *oinfo; /* structure copy */
|
||||
xnr->alloc_hint = alloc_hint;
|
||||
xnr->resv = resv;
|
||||
INIT_LIST_HEAD(&xnr->resv_list);
|
||||
xnr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
|
||||
xrep_newbt_estimate_slack(xnr);
|
||||
}
|
||||
|
||||
/* Initialize accounting resources for staging a new inode fork btree. */
|
||||
int
|
||||
xrep_newbt_init_inode(
|
||||
struct xrep_newbt *xnr,
|
||||
struct xfs_scrub *sc,
|
||||
int whichfork,
|
||||
const struct xfs_owner_info *oinfo)
|
||||
{
|
||||
struct xfs_ifork *ifp;
|
||||
|
||||
ifp = kmem_cache_zalloc(xfs_ifork_cache, XCHK_GFP_FLAGS);
|
||||
if (!ifp)
|
||||
return -ENOMEM;
|
||||
|
||||
xrep_newbt_init_ag(xnr, sc, oinfo,
|
||||
XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
|
||||
XFS_AG_RESV_NONE);
|
||||
xnr->ifake.if_fork = ifp;
|
||||
xnr->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, whichfork);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize accounting resources for staging a new btree. Callers are
|
||||
* expected to add their own reservations (and clean them up) manually.
|
||||
*/
|
||||
void
|
||||
xrep_newbt_init_bare(
|
||||
struct xrep_newbt *xnr,
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
|
||||
XFS_AG_RESV_NONE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Designate specific blocks to be used to build our new btree. @pag must be
|
||||
* a passive reference.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_newbt_add_blocks(
|
||||
struct xrep_newbt *xnr,
|
||||
struct xfs_perag *pag,
|
||||
const struct xfs_alloc_arg *args)
|
||||
{
|
||||
struct xfs_mount *mp = xnr->sc->mp;
|
||||
struct xrep_newbt_resv *resv;
|
||||
int error;
|
||||
|
||||
resv = kmalloc(sizeof(struct xrep_newbt_resv), XCHK_GFP_FLAGS);
|
||||
if (!resv)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&resv->list);
|
||||
resv->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
|
||||
resv->len = args->len;
|
||||
resv->used = 0;
|
||||
resv->pag = xfs_perag_hold(pag);
|
||||
|
||||
if (args->tp) {
|
||||
ASSERT(xnr->oinfo.oi_offset == 0);
|
||||
|
||||
error = xfs_alloc_schedule_autoreap(args, true, &resv->autoreap);
|
||||
if (error)
|
||||
goto out_pag;
|
||||
}
|
||||
|
||||
list_add_tail(&resv->list, &xnr->resv_list);
|
||||
return 0;
|
||||
out_pag:
|
||||
xfs_perag_put(resv->pag);
|
||||
kfree(resv);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add an extent to the new btree reservation pool. Callers are required to
|
||||
* reap this reservation manually if the repair is cancelled. @pag must be a
|
||||
* passive reference.
|
||||
*/
|
||||
int
|
||||
xrep_newbt_add_extent(
|
||||
struct xrep_newbt *xnr,
|
||||
struct xfs_perag *pag,
|
||||
xfs_agblock_t agbno,
|
||||
xfs_extlen_t len)
|
||||
{
|
||||
struct xfs_mount *mp = xnr->sc->mp;
|
||||
struct xfs_alloc_arg args = {
|
||||
.tp = NULL, /* no autoreap */
|
||||
.oinfo = xnr->oinfo,
|
||||
.fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno),
|
||||
.len = len,
|
||||
.resv = xnr->resv,
|
||||
};
|
||||
|
||||
return xrep_newbt_add_blocks(xnr, pag, &args);
|
||||
}
|
||||
|
||||
/* Don't let our allocation hint take us beyond this AG */
|
||||
static inline void
|
||||
xrep_newbt_validate_ag_alloc_hint(
|
||||
struct xrep_newbt *xnr)
|
||||
{
|
||||
struct xfs_scrub *sc = xnr->sc;
|
||||
xfs_agnumber_t agno = XFS_FSB_TO_AGNO(sc->mp, xnr->alloc_hint);
|
||||
|
||||
if (agno == sc->sa.pag->pag_agno &&
|
||||
xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
|
||||
return;
|
||||
|
||||
xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
|
||||
XFS_AGFL_BLOCK(sc->mp) + 1);
|
||||
}
|
||||
|
||||
/* Allocate disk space for a new per-AG btree. */
|
||||
STATIC int
|
||||
xrep_newbt_alloc_ag_blocks(
|
||||
struct xrep_newbt *xnr,
|
||||
uint64_t nr_blocks)
|
||||
{
|
||||
struct xfs_scrub *sc = xnr->sc;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
int error = 0;
|
||||
|
||||
ASSERT(sc->sa.pag != NULL);
|
||||
|
||||
while (nr_blocks > 0) {
|
||||
struct xfs_alloc_arg args = {
|
||||
.tp = sc->tp,
|
||||
.mp = mp,
|
||||
.oinfo = xnr->oinfo,
|
||||
.minlen = 1,
|
||||
.maxlen = nr_blocks,
|
||||
.prod = 1,
|
||||
.resv = xnr->resv,
|
||||
};
|
||||
xfs_agnumber_t agno;
|
||||
|
||||
xrep_newbt_validate_ag_alloc_hint(xnr);
|
||||
|
||||
error = xfs_alloc_vextent_near_bno(&args, xnr->alloc_hint);
|
||||
if (error)
|
||||
return error;
|
||||
if (args.fsbno == NULLFSBLOCK)
|
||||
return -ENOSPC;
|
||||
|
||||
agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
|
||||
|
||||
trace_xrep_newbt_alloc_ag_blocks(mp, agno,
|
||||
XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
|
||||
xnr->oinfo.oi_owner);
|
||||
|
||||
if (agno != sc->sa.pag->pag_agno) {
|
||||
ASSERT(agno == sc->sa.pag->pag_agno);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
error = xrep_newbt_add_blocks(xnr, sc->sa.pag, &args);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
nr_blocks -= args.len;
|
||||
xnr->alloc_hint = args.fsbno + args.len;
|
||||
|
||||
error = xrep_defer_finish(sc);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Don't let our allocation hint take us beyond EOFS */
|
||||
static inline void
|
||||
xrep_newbt_validate_file_alloc_hint(
|
||||
struct xrep_newbt *xnr)
|
||||
{
|
||||
struct xfs_scrub *sc = xnr->sc;
|
||||
|
||||
if (xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
|
||||
return;
|
||||
|
||||
xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, 0, XFS_AGFL_BLOCK(sc->mp) + 1);
|
||||
}
|
||||
|
||||
/* Allocate disk space for our new file-based btree. */
|
||||
STATIC int
|
||||
xrep_newbt_alloc_file_blocks(
|
||||
struct xrep_newbt *xnr,
|
||||
uint64_t nr_blocks)
|
||||
{
|
||||
struct xfs_scrub *sc = xnr->sc;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
int error = 0;
|
||||
|
||||
while (nr_blocks > 0) {
|
||||
struct xfs_alloc_arg args = {
|
||||
.tp = sc->tp,
|
||||
.mp = mp,
|
||||
.oinfo = xnr->oinfo,
|
||||
.minlen = 1,
|
||||
.maxlen = nr_blocks,
|
||||
.prod = 1,
|
||||
.resv = xnr->resv,
|
||||
};
|
||||
struct xfs_perag *pag;
|
||||
xfs_agnumber_t agno;
|
||||
|
||||
xrep_newbt_validate_file_alloc_hint(xnr);
|
||||
|
||||
error = xfs_alloc_vextent_start_ag(&args, xnr->alloc_hint);
|
||||
if (error)
|
||||
return error;
|
||||
if (args.fsbno == NULLFSBLOCK)
|
||||
return -ENOSPC;
|
||||
|
||||
agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
|
||||
|
||||
trace_xrep_newbt_alloc_file_blocks(mp, agno,
|
||||
XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
|
||||
xnr->oinfo.oi_owner);
|
||||
|
||||
pag = xfs_perag_get(mp, agno);
|
||||
if (!pag) {
|
||||
ASSERT(0);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
error = xrep_newbt_add_blocks(xnr, pag, &args);
|
||||
xfs_perag_put(pag);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
nr_blocks -= args.len;
|
||||
xnr->alloc_hint = args.fsbno + args.len;
|
||||
|
||||
error = xrep_defer_finish(sc);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Allocate disk space for our new btree. */
|
||||
int
|
||||
xrep_newbt_alloc_blocks(
|
||||
struct xrep_newbt *xnr,
|
||||
uint64_t nr_blocks)
|
||||
{
|
||||
if (xnr->sc->ip)
|
||||
return xrep_newbt_alloc_file_blocks(xnr, nr_blocks);
|
||||
return xrep_newbt_alloc_ag_blocks(xnr, nr_blocks);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free the unused part of a space extent that was reserved for a new ondisk
|
||||
* structure. Returns the number of EFIs logged or a negative errno.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_newbt_free_extent(
|
||||
struct xrep_newbt *xnr,
|
||||
struct xrep_newbt_resv *resv,
|
||||
bool btree_committed)
|
||||
{
|
||||
struct xfs_scrub *sc = xnr->sc;
|
||||
xfs_agblock_t free_agbno = resv->agbno;
|
||||
xfs_extlen_t free_aglen = resv->len;
|
||||
xfs_fsblock_t fsbno;
|
||||
int error;
|
||||
|
||||
if (!btree_committed || resv->used == 0) {
|
||||
/*
|
||||
* If we're not committing a new btree or we didn't use the
|
||||
* space reservation, let the existing EFI free the entire
|
||||
* space extent.
|
||||
*/
|
||||
trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno,
|
||||
free_agbno, free_aglen, xnr->oinfo.oi_owner);
|
||||
xfs_alloc_commit_autoreap(sc->tp, &resv->autoreap);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* We used space and committed the btree. Cancel the autoreap, remove
|
||||
* the written blocks from the reservation, and possibly log a new EFI
|
||||
* to free any unused reservation space.
|
||||
*/
|
||||
xfs_alloc_cancel_autoreap(sc->tp, &resv->autoreap);
|
||||
free_agbno += resv->used;
|
||||
free_aglen -= resv->used;
|
||||
|
||||
if (free_aglen == 0)
|
||||
return 0;
|
||||
|
||||
trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
|
||||
free_aglen, xnr->oinfo.oi_owner);
|
||||
|
||||
ASSERT(xnr->resv != XFS_AG_RESV_AGFL);
|
||||
ASSERT(xnr->resv != XFS_AG_RESV_IGNORE);
|
||||
|
||||
/*
|
||||
* Use EFIs to free the reservations. This reduces the chance
|
||||
* that we leak blocks if the system goes down.
|
||||
*/
|
||||
fsbno = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno, free_agbno);
|
||||
error = xfs_free_extent_later(sc->tp, fsbno, free_aglen, &xnr->oinfo,
|
||||
xnr->resv, true);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Free all the accounting info and disk space we reserved for a new btree. */
|
||||
STATIC int
|
||||
xrep_newbt_free(
|
||||
struct xrep_newbt *xnr,
|
||||
bool btree_committed)
|
||||
{
|
||||
struct xfs_scrub *sc = xnr->sc;
|
||||
struct xrep_newbt_resv *resv, *n;
|
||||
unsigned int freed = 0;
|
||||
int error = 0;
|
||||
|
||||
/*
|
||||
* If the filesystem already went down, we can't free the blocks. Skip
|
||||
* ahead to freeing the incore metadata because we can't fix anything.
|
||||
*/
|
||||
if (xfs_is_shutdown(sc->mp))
|
||||
goto junkit;
|
||||
|
||||
list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
|
||||
int ret;
|
||||
|
||||
ret = xrep_newbt_free_extent(xnr, resv, btree_committed);
|
||||
list_del(&resv->list);
|
||||
xfs_perag_put(resv->pag);
|
||||
kfree(resv);
|
||||
if (ret < 0) {
|
||||
error = ret;
|
||||
goto junkit;
|
||||
}
|
||||
|
||||
freed += ret;
|
||||
if (freed >= XREP_MAX_ITRUNCATE_EFIS) {
|
||||
error = xrep_defer_finish(sc);
|
||||
if (error)
|
||||
goto junkit;
|
||||
freed = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (freed)
|
||||
error = xrep_defer_finish(sc);
|
||||
|
||||
junkit:
|
||||
/*
|
||||
* If we still have reservations attached to @newbt, cleanup must have
|
||||
* failed and the filesystem is about to go down. Clean up the incore
|
||||
* reservations and try to commit to freeing the space we used.
|
||||
*/
|
||||
list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
|
||||
xfs_alloc_commit_autoreap(sc->tp, &resv->autoreap);
|
||||
list_del(&resv->list);
|
||||
xfs_perag_put(resv->pag);
|
||||
kfree(resv);
|
||||
}
|
||||
|
||||
if (sc->ip) {
|
||||
kmem_cache_free(xfs_ifork_cache, xnr->ifake.if_fork);
|
||||
xnr->ifake.if_fork = NULL;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free all the accounting info and unused disk space allocations after
|
||||
* committing a new btree.
|
||||
*/
|
||||
int
|
||||
xrep_newbt_commit(
|
||||
struct xrep_newbt *xnr)
|
||||
{
|
||||
return xrep_newbt_free(xnr, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free all the accounting info and all of the disk space we reserved for a new
|
||||
* btree that we're not going to commit. We want to try to roll things back
|
||||
* cleanly for things like ENOSPC midway through allocation.
|
||||
*/
|
||||
void
|
||||
xrep_newbt_cancel(
|
||||
struct xrep_newbt *xnr)
|
||||
{
|
||||
xrep_newbt_free(xnr, false);
|
||||
}
|
||||
|
||||
/* Feed one of the reserved btree blocks to the bulk loader. */
|
||||
int
|
||||
xrep_newbt_claim_block(
|
||||
struct xfs_btree_cur *cur,
|
||||
struct xrep_newbt *xnr,
|
||||
union xfs_btree_ptr *ptr)
|
||||
{
|
||||
struct xrep_newbt_resv *resv;
|
||||
struct xfs_mount *mp = cur->bc_mp;
|
||||
xfs_agblock_t agbno;
|
||||
|
||||
/*
|
||||
* The first item in the list should always have a free block unless
|
||||
* we're completely out.
|
||||
*/
|
||||
resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
|
||||
if (resv->used == resv->len)
|
||||
return -ENOSPC;
|
||||
|
||||
/*
|
||||
* Peel off a block from the start of the reservation. We allocate
|
||||
* blocks in order to place blocks on disk in increasing record or key
|
||||
* order. The block reservations tend to end up on the list in
|
||||
* decreasing order, which hopefully results in leaf blocks ending up
|
||||
* together.
|
||||
*/
|
||||
agbno = resv->agbno + resv->used;
|
||||
resv->used++;
|
||||
|
||||
/* If we used all the blocks in this reservation, move it to the end. */
|
||||
if (resv->used == resv->len)
|
||||
list_move_tail(&resv->list, &xnr->resv_list);
|
||||
|
||||
trace_xrep_newbt_claim_block(mp, resv->pag->pag_agno, agbno, 1,
|
||||
xnr->oinfo.oi_owner);
|
||||
|
||||
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
|
||||
ptr->l = cpu_to_be64(XFS_AGB_TO_FSB(mp, resv->pag->pag_agno,
|
||||
agbno));
|
||||
else
|
||||
ptr->s = cpu_to_be32(agbno);
|
||||
|
||||
/* Relog all the EFIs. */
|
||||
return xrep_defer_finish(xnr->sc);
|
||||
}
|
||||
|
||||
/* How many reserved blocks are unused? */
|
||||
unsigned int
|
||||
xrep_newbt_unused_blocks(
|
||||
struct xrep_newbt *xnr)
|
||||
{
|
||||
struct xrep_newbt_resv *resv;
|
||||
unsigned int unused = 0;
|
||||
|
||||
list_for_each_entry(resv, &xnr->resv_list, list)
|
||||
unused += resv->len - resv->used;
|
||||
return unused;
|
||||
}
|
68
fs/xfs/scrub/newbt.h
Normal file
68
fs/xfs/scrub/newbt.h
Normal file
@ -0,0 +1,68 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#ifndef __XFS_SCRUB_NEWBT_H__
|
||||
#define __XFS_SCRUB_NEWBT_H__
|
||||
|
||||
struct xrep_newbt_resv {
|
||||
/* Link to list of extents that we've reserved. */
|
||||
struct list_head list;
|
||||
|
||||
struct xfs_perag *pag;
|
||||
|
||||
/* Auto-freeing this reservation if we don't commit. */
|
||||
struct xfs_alloc_autoreap autoreap;
|
||||
|
||||
/* AG block of the extent we reserved. */
|
||||
xfs_agblock_t agbno;
|
||||
|
||||
/* Length of the reservation. */
|
||||
xfs_extlen_t len;
|
||||
|
||||
/* How much of this reservation has been used. */
|
||||
xfs_extlen_t used;
|
||||
};
|
||||
|
||||
struct xrep_newbt {
|
||||
struct xfs_scrub *sc;
|
||||
|
||||
/* List of extents that we've reserved. */
|
||||
struct list_head resv_list;
|
||||
|
||||
/* Fake root for new btree. */
|
||||
union {
|
||||
struct xbtree_afakeroot afake;
|
||||
struct xbtree_ifakeroot ifake;
|
||||
};
|
||||
|
||||
/* rmap owner of these blocks */
|
||||
struct xfs_owner_info oinfo;
|
||||
|
||||
/* btree geometry for the bulk loader */
|
||||
struct xfs_btree_bload bload;
|
||||
|
||||
/* Allocation hint */
|
||||
xfs_fsblock_t alloc_hint;
|
||||
|
||||
/* per-ag reservation type */
|
||||
enum xfs_ag_resv_type resv;
|
||||
};
|
||||
|
||||
void xrep_newbt_init_bare(struct xrep_newbt *xnr, struct xfs_scrub *sc);
|
||||
void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct xfs_scrub *sc,
|
||||
const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
|
||||
enum xfs_ag_resv_type resv);
|
||||
int xrep_newbt_init_inode(struct xrep_newbt *xnr, struct xfs_scrub *sc,
|
||||
int whichfork, const struct xfs_owner_info *oinfo);
|
||||
int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
|
||||
int xrep_newbt_add_extent(struct xrep_newbt *xnr, struct xfs_perag *pag,
|
||||
xfs_agblock_t agbno, xfs_extlen_t len);
|
||||
void xrep_newbt_cancel(struct xrep_newbt *xnr);
|
||||
int xrep_newbt_commit(struct xrep_newbt *xnr);
|
||||
int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
|
||||
union xfs_btree_ptr *ptr);
|
||||
unsigned int xrep_newbt_unused_blocks(struct xrep_newbt *xnr);
|
||||
|
||||
#endif /* __XFS_SCRUB_NEWBT_H__ */
|
37
fs/xfs/scrub/off_bitmap.h
Normal file
37
fs/xfs/scrub/off_bitmap.h
Normal file
@ -0,0 +1,37 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#ifndef __XFS_SCRUB_OFF_BITMAP_H__
|
||||
#define __XFS_SCRUB_OFF_BITMAP_H__
|
||||
|
||||
/* Bitmaps, but for type-checked for xfs_fileoff_t */
|
||||
|
||||
struct xoff_bitmap {
|
||||
struct xbitmap64 offbitmap;
|
||||
};
|
||||
|
||||
static inline void xoff_bitmap_init(struct xoff_bitmap *bitmap)
|
||||
{
|
||||
xbitmap64_init(&bitmap->offbitmap);
|
||||
}
|
||||
|
||||
static inline void xoff_bitmap_destroy(struct xoff_bitmap *bitmap)
|
||||
{
|
||||
xbitmap64_destroy(&bitmap->offbitmap);
|
||||
}
|
||||
|
||||
static inline int xoff_bitmap_set(struct xoff_bitmap *bitmap,
|
||||
xfs_fileoff_t off, xfs_filblks_t len)
|
||||
{
|
||||
return xbitmap64_set(&bitmap->offbitmap, off, len);
|
||||
}
|
||||
|
||||
static inline int xoff_bitmap_walk(struct xoff_bitmap *bitmap,
|
||||
xbitmap64_walk_fn fn, void *priv)
|
||||
{
|
||||
return xbitmap64_walk(&bitmap->offbitmap, fn, priv);
|
||||
}
|
||||
|
||||
#endif /* __XFS_SCRUB_OFF_BITMAP_H__ */
|
@ -156,6 +156,16 @@ xchk_parent_validate(
|
||||
goto out_rele;
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot yet validate this parent pointer if the directory looks as
|
||||
* though it has been zapped by the inode record repair code.
|
||||
*/
|
||||
if (xchk_dir_looks_zapped(dp)) {
|
||||
error = -EBUSY;
|
||||
xchk_set_incomplete(sc);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* Look for a directory entry in the parent pointing to the child. */
|
||||
error = xchk_dir_walk(sc, dp, xchk_parent_actor, &spc);
|
||||
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
|
||||
@ -217,6 +227,13 @@ xchk_parent(
|
||||
*/
|
||||
error = xchk_parent_validate(sc, parent_ino);
|
||||
} while (error == -EAGAIN);
|
||||
if (error == -EBUSY) {
|
||||
/*
|
||||
* We could not scan a directory, so we marked the check
|
||||
* incomplete. No further error return is necessary.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
@ -17,9 +18,10 @@
|
||||
#include "xfs_bmap.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/quota.h"
|
||||
|
||||
/* Convert a scrub type code to a DQ flag, or return 0 if error. */
|
||||
static inline xfs_dqtype_t
|
||||
xfs_dqtype_t
|
||||
xchk_quota_to_dqtype(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
@ -75,14 +77,70 @@ struct xchk_quota_info {
|
||||
xfs_dqid_t last_id;
|
||||
};
|
||||
|
||||
/* There's a written block backing this dquot, right? */
|
||||
STATIC int
|
||||
xchk_quota_item_bmap(
|
||||
struct xfs_scrub *sc,
|
||||
struct xfs_dquot *dq,
|
||||
xfs_fileoff_t offset)
|
||||
{
|
||||
struct xfs_bmbt_irec irec;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
int nmaps = 1;
|
||||
int error;
|
||||
|
||||
if (!xfs_verify_fileoff(mp, offset)) {
|
||||
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (dq->q_fileoffset != offset) {
|
||||
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
error = xfs_bmapi_read(sc->ip, offset, 1, &irec, &nmaps, 0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (nmaps != 1) {
|
||||
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!xfs_verify_fsbno(mp, irec.br_startblock))
|
||||
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
|
||||
if (XFS_FSB_TO_DADDR(mp, irec.br_startblock) != dq->q_blkno)
|
||||
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
|
||||
if (!xfs_bmap_is_written_extent(&irec))
|
||||
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Complain if a quota timer is incorrectly set. */
|
||||
static inline void
|
||||
xchk_quota_item_timer(
|
||||
struct xfs_scrub *sc,
|
||||
xfs_fileoff_t offset,
|
||||
const struct xfs_dquot_res *res)
|
||||
{
|
||||
if ((res->softlimit && res->count > res->softlimit) ||
|
||||
(res->hardlimit && res->count > res->hardlimit)) {
|
||||
if (!res->timer)
|
||||
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
|
||||
} else {
|
||||
if (res->timer)
|
||||
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
|
||||
}
|
||||
}
|
||||
|
||||
/* Scrub the fields in an individual quota item. */
|
||||
STATIC int
|
||||
xchk_quota_item(
|
||||
struct xfs_dquot *dq,
|
||||
xfs_dqtype_t dqtype,
|
||||
void *priv)
|
||||
struct xchk_quota_info *sqi,
|
||||
struct xfs_dquot *dq)
|
||||
{
|
||||
struct xchk_quota_info *sqi = priv;
|
||||
struct xfs_scrub *sc = sqi->sc;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_quotainfo *qi = mp->m_quotainfo;
|
||||
@ -93,6 +151,17 @@ xchk_quota_item(
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
return error;
|
||||
|
||||
/*
|
||||
* We want to validate the bmap record for the storage backing this
|
||||
* dquot, so we need to lock the dquot and the quota file. For quota
|
||||
* operations, the locking order is first the ILOCK and then the dquot.
|
||||
* However, dqiterate gave us a locked dquot, so drop the dquot lock to
|
||||
* get the ILOCK.
|
||||
*/
|
||||
xfs_dqunlock(dq);
|
||||
xchk_ilock(sc, XFS_ILOCK_SHARED);
|
||||
xfs_dqlock(dq);
|
||||
|
||||
/*
|
||||
* Except for the root dquot, the actual dquot we got must either have
|
||||
* the same or higher id as we saw before.
|
||||
@ -103,6 +172,11 @@ xchk_quota_item(
|
||||
|
||||
sqi->last_id = dq->q_id;
|
||||
|
||||
error = xchk_quota_item_bmap(sc, dq, offset);
|
||||
xchk_iunlock(sc, XFS_ILOCK_SHARED);
|
||||
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error))
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Warn if the hard limits are larger than the fs.
|
||||
* Administrators can do this, though in production this seems
|
||||
@ -166,6 +240,10 @@ xchk_quota_item(
|
||||
dq->q_rtb.count > dq->q_rtb.hardlimit)
|
||||
xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset);
|
||||
|
||||
xchk_quota_item_timer(sc, offset, &dq->q_blk);
|
||||
xchk_quota_item_timer(sc, offset, &dq->q_ino);
|
||||
xchk_quota_item_timer(sc, offset, &dq->q_rtb);
|
||||
|
||||
out:
|
||||
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
|
||||
return -ECANCELED;
|
||||
@ -191,7 +269,7 @@ xchk_quota_data_fork(
|
||||
return error;
|
||||
|
||||
/* Check for data fork problems that apply only to quota files. */
|
||||
max_dqid_off = ((xfs_dqid_t)-1) / qi->qi_dqperchunk;
|
||||
max_dqid_off = XFS_DQ_ID_MAX / qi->qi_dqperchunk;
|
||||
ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
|
||||
for_each_xfs_iext(ifp, &icur, &irec) {
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
@ -218,9 +296,11 @@ int
|
||||
xchk_quota(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xchk_quota_info sqi;
|
||||
struct xchk_dqiter cursor = { };
|
||||
struct xchk_quota_info sqi = { .sc = sc };
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_quotainfo *qi = mp->m_quotainfo;
|
||||
struct xfs_dquot *dq;
|
||||
xfs_dqtype_t dqtype;
|
||||
int error = 0;
|
||||
|
||||
@ -239,10 +319,15 @@ xchk_quota(
|
||||
* functions.
|
||||
*/
|
||||
xchk_iunlock(sc, sc->ilock_flags);
|
||||
sqi.sc = sc;
|
||||
sqi.last_id = 0;
|
||||
error = xfs_qm_dqiterate(mp, dqtype, xchk_quota_item, &sqi);
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL);
|
||||
|
||||
/* Now look for things that the quota verifiers won't complain about. */
|
||||
xchk_dqiter_init(&cursor, sc, dqtype);
|
||||
while ((error = xchk_dquot_iter(&cursor, &dq)) == 1) {
|
||||
error = xchk_quota_item(&sqi, dq);
|
||||
xfs_qm_dqput(dq);
|
||||
if (error)
|
||||
break;
|
||||
}
|
||||
if (error == -ECANCELED)
|
||||
error = 0;
|
||||
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK,
|
||||
|
36
fs/xfs/scrub/quota.h
Normal file
36
fs/xfs/scrub/quota.h
Normal file
@ -0,0 +1,36 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#ifndef __XFS_SCRUB_QUOTA_H__
|
||||
#define __XFS_SCRUB_QUOTA_H__
|
||||
|
||||
xfs_dqtype_t xchk_quota_to_dqtype(struct xfs_scrub *sc);
|
||||
|
||||
/* dquot iteration code */
|
||||
|
||||
struct xchk_dqiter {
|
||||
struct xfs_scrub *sc;
|
||||
|
||||
/* Quota file that we're walking. */
|
||||
struct xfs_inode *quota_ip;
|
||||
|
||||
/* Cached data fork mapping for the dquot. */
|
||||
struct xfs_bmbt_irec bmap;
|
||||
|
||||
/* The next dquot to scan. */
|
||||
uint64_t id;
|
||||
|
||||
/* Quota type (user/group/project). */
|
||||
xfs_dqtype_t dqtype;
|
||||
|
||||
/* Data fork sequence number to detect stale mappings. */
|
||||
unsigned int if_seq;
|
||||
};
|
||||
|
||||
void xchk_dqiter_init(struct xchk_dqiter *cursor, struct xfs_scrub *sc,
|
||||
xfs_dqtype_t dqtype);
|
||||
int xchk_dquot_iter(struct xchk_dqiter *cursor, struct xfs_dquot **dqpp);
|
||||
|
||||
#endif /* __XFS_SCRUB_QUOTA_H__ */
|
575
fs/xfs/scrub/quota_repair.c
Normal file
575
fs/xfs/scrub/quota_repair.c
Normal file
@ -0,0 +1,575 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_defer.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_inode_fork.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_qm.h"
|
||||
#include "xfs_dquot.h"
|
||||
#include "xfs_dquot_item.h"
|
||||
#include "xfs_reflink.h"
|
||||
#include "xfs_bmap_btree.h"
|
||||
#include "xfs_trans_space.h"
|
||||
#include "scrub/xfs_scrub.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/quota.h"
|
||||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
|
||||
/*
|
||||
* Quota Repair
|
||||
* ============
|
||||
*
|
||||
* Quota repairs are fairly simplistic; we fix everything that the dquot
|
||||
* verifiers complain about, cap any counters or limits that make no sense,
|
||||
* and schedule a quotacheck if we had to fix anything. We also repair any
|
||||
* data fork extent records that don't apply to metadata files.
|
||||
*/
|
||||
|
||||
struct xrep_quota_info {
|
||||
struct xfs_scrub *sc;
|
||||
bool need_quotacheck;
|
||||
};
|
||||
|
||||
/*
|
||||
* Allocate a new block into a sparse hole in the quota file backing this
|
||||
* dquot, initialize the block, and commit the whole mess.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_quota_item_fill_bmap_hole(
|
||||
struct xfs_scrub *sc,
|
||||
struct xfs_dquot *dq,
|
||||
struct xfs_bmbt_irec *irec)
|
||||
{
|
||||
struct xfs_buf *bp;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
int nmaps = 1;
|
||||
int error;
|
||||
|
||||
xfs_trans_ijoin(sc->tp, sc->ip, 0);
|
||||
|
||||
/* Map a block into the file. */
|
||||
error = xfs_trans_reserve_more(sc->tp, XFS_QM_DQALLOC_SPACE_RES(mp),
|
||||
0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xfs_bmapi_write(sc->tp, sc->ip, dq->q_fileoffset,
|
||||
XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 0,
|
||||
irec, &nmaps);
|
||||
if (error)
|
||||
return error;
|
||||
if (nmaps != 1)
|
||||
return -ENOSPC;
|
||||
|
||||
dq->q_blkno = XFS_FSB_TO_DADDR(mp, irec->br_startblock);
|
||||
|
||||
trace_xrep_dquot_item_fill_bmap_hole(sc->mp, dq->q_type, dq->q_id);
|
||||
|
||||
/* Initialize the new block. */
|
||||
error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp, dq->q_blkno,
|
||||
mp->m_quotainfo->qi_dqchunklen, 0, &bp);
|
||||
if (error)
|
||||
return error;
|
||||
bp->b_ops = &xfs_dquot_buf_ops;
|
||||
|
||||
xfs_qm_init_dquot_blk(sc->tp, dq->q_id, dq->q_type, bp);
|
||||
xfs_buf_set_ref(bp, XFS_DQUOT_REF);
|
||||
|
||||
/*
|
||||
* Finish the mapping transactions and roll one more time to
|
||||
* disconnect sc->ip from sc->tp.
|
||||
*/
|
||||
error = xrep_defer_finish(sc);
|
||||
if (error)
|
||||
return error;
|
||||
return xfs_trans_roll(&sc->tp);
|
||||
}
|
||||
|
||||
/* Make sure there's a written block backing this dquot */
|
||||
STATIC int
|
||||
xrep_quota_item_bmap(
|
||||
struct xfs_scrub *sc,
|
||||
struct xfs_dquot *dq,
|
||||
bool *dirty)
|
||||
{
|
||||
struct xfs_bmbt_irec irec;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_quotainfo *qi = mp->m_quotainfo;
|
||||
xfs_fileoff_t offset = dq->q_id / qi->qi_dqperchunk;
|
||||
int nmaps = 1;
|
||||
int error;
|
||||
|
||||
/* The computed file offset should always be valid. */
|
||||
if (!xfs_verify_fileoff(mp, offset)) {
|
||||
ASSERT(xfs_verify_fileoff(mp, offset));
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
dq->q_fileoffset = offset;
|
||||
|
||||
error = xfs_bmapi_read(sc->ip, offset, 1, &irec, &nmaps, 0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (nmaps < 1 || !xfs_bmap_is_real_extent(&irec)) {
|
||||
/* Hole/delalloc extent; allocate a real block. */
|
||||
error = xrep_quota_item_fill_bmap_hole(sc, dq, &irec);
|
||||
if (error)
|
||||
return error;
|
||||
} else if (irec.br_state != XFS_EXT_NORM) {
|
||||
/* Unwritten extent, which we already took care of? */
|
||||
ASSERT(irec.br_state == XFS_EXT_NORM);
|
||||
return -EFSCORRUPTED;
|
||||
} else if (dq->q_blkno != XFS_FSB_TO_DADDR(mp, irec.br_startblock)) {
|
||||
/*
|
||||
* If the cached daddr is incorrect, repair probably punched a
|
||||
* hole out of the quota file and filled it back in with a new
|
||||
* block. Update the block mapping in the dquot.
|
||||
*/
|
||||
dq->q_blkno = XFS_FSB_TO_DADDR(mp, irec.br_startblock);
|
||||
}
|
||||
|
||||
*dirty = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Reset quota timers if incorrectly set. */
|
||||
static inline void
|
||||
xrep_quota_item_timer(
|
||||
struct xfs_scrub *sc,
|
||||
const struct xfs_dquot_res *res,
|
||||
bool *dirty)
|
||||
{
|
||||
if ((res->softlimit && res->count > res->softlimit) ||
|
||||
(res->hardlimit && res->count > res->hardlimit)) {
|
||||
if (!res->timer)
|
||||
*dirty = true;
|
||||
} else {
|
||||
if (res->timer)
|
||||
*dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Scrub the fields in an individual quota item. */
|
||||
STATIC int
|
||||
xrep_quota_item(
|
||||
struct xrep_quota_info *rqi,
|
||||
struct xfs_dquot *dq)
|
||||
{
|
||||
struct xfs_scrub *sc = rqi->sc;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
xfs_ino_t fs_icount;
|
||||
bool dirty = false;
|
||||
int error = 0;
|
||||
|
||||
/* Last chance to abort before we start committing fixes. */
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
return error;
|
||||
|
||||
/*
|
||||
* We might need to fix holes in the bmap record for the storage
|
||||
* backing this dquot, so we need to lock the dquot and the quota file.
|
||||
* dqiterate gave us a locked dquot, so drop the dquot lock to get the
|
||||
* ILOCK_EXCL.
|
||||
*/
|
||||
xfs_dqunlock(dq);
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL);
|
||||
xfs_dqlock(dq);
|
||||
|
||||
error = xrep_quota_item_bmap(sc, dq, &dirty);
|
||||
xchk_iunlock(sc, XFS_ILOCK_EXCL);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Check the limits. */
|
||||
if (dq->q_blk.softlimit > dq->q_blk.hardlimit) {
|
||||
dq->q_blk.softlimit = dq->q_blk.hardlimit;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
if (dq->q_ino.softlimit > dq->q_ino.hardlimit) {
|
||||
dq->q_ino.softlimit = dq->q_ino.hardlimit;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
if (dq->q_rtb.softlimit > dq->q_rtb.hardlimit) {
|
||||
dq->q_rtb.softlimit = dq->q_rtb.hardlimit;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that usage doesn't exceed physical limits. However, on
|
||||
* a reflink filesystem we're allowed to exceed physical space
|
||||
* if there are no quota limits. We don't know what the real number
|
||||
* is, but we can make quotacheck find out for us.
|
||||
*/
|
||||
if (!xfs_has_reflink(mp) && dq->q_blk.count > mp->m_sb.sb_dblocks) {
|
||||
dq->q_blk.reserved -= dq->q_blk.count;
|
||||
dq->q_blk.reserved += mp->m_sb.sb_dblocks;
|
||||
dq->q_blk.count = mp->m_sb.sb_dblocks;
|
||||
rqi->need_quotacheck = true;
|
||||
dirty = true;
|
||||
}
|
||||
fs_icount = percpu_counter_sum(&mp->m_icount);
|
||||
if (dq->q_ino.count > fs_icount) {
|
||||
dq->q_ino.reserved -= dq->q_ino.count;
|
||||
dq->q_ino.reserved += fs_icount;
|
||||
dq->q_ino.count = fs_icount;
|
||||
rqi->need_quotacheck = true;
|
||||
dirty = true;
|
||||
}
|
||||
if (dq->q_rtb.count > mp->m_sb.sb_rblocks) {
|
||||
dq->q_rtb.reserved -= dq->q_rtb.count;
|
||||
dq->q_rtb.reserved += mp->m_sb.sb_rblocks;
|
||||
dq->q_rtb.count = mp->m_sb.sb_rblocks;
|
||||
rqi->need_quotacheck = true;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
xrep_quota_item_timer(sc, &dq->q_blk, &dirty);
|
||||
xrep_quota_item_timer(sc, &dq->q_ino, &dirty);
|
||||
xrep_quota_item_timer(sc, &dq->q_rtb, &dirty);
|
||||
|
||||
if (!dirty)
|
||||
return 0;
|
||||
|
||||
trace_xrep_dquot_item(sc->mp, dq->q_type, dq->q_id);
|
||||
|
||||
dq->q_flags |= XFS_DQFLAG_DIRTY;
|
||||
xfs_trans_dqjoin(sc->tp, dq);
|
||||
if (dq->q_id) {
|
||||
xfs_qm_adjust_dqlimits(dq);
|
||||
xfs_qm_adjust_dqtimers(dq);
|
||||
}
|
||||
xfs_trans_log_dquot(sc->tp, dq);
|
||||
error = xfs_trans_roll(&sc->tp);
|
||||
xfs_dqlock(dq);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Fix a quota timer so that we can pass the verifier. */
|
||||
STATIC void
|
||||
xrep_quota_fix_timer(
|
||||
struct xfs_mount *mp,
|
||||
const struct xfs_disk_dquot *ddq,
|
||||
__be64 softlimit,
|
||||
__be64 countnow,
|
||||
__be32 *timer,
|
||||
time64_t timelimit)
|
||||
{
|
||||
uint64_t soft = be64_to_cpu(softlimit);
|
||||
uint64_t count = be64_to_cpu(countnow);
|
||||
time64_t new_timer;
|
||||
uint32_t t;
|
||||
|
||||
if (!soft || count <= soft || *timer != 0)
|
||||
return;
|
||||
|
||||
new_timer = xfs_dquot_set_timeout(mp,
|
||||
ktime_get_real_seconds() + timelimit);
|
||||
if (ddq->d_type & XFS_DQTYPE_BIGTIME)
|
||||
t = xfs_dq_unix_to_bigtime(new_timer);
|
||||
else
|
||||
t = new_timer;
|
||||
|
||||
*timer = cpu_to_be32(t);
|
||||
}
|
||||
|
||||
/* Fix anything the verifiers complain about. */
|
||||
STATIC int
|
||||
xrep_quota_block(
|
||||
struct xfs_scrub *sc,
|
||||
xfs_daddr_t daddr,
|
||||
xfs_dqtype_t dqtype,
|
||||
xfs_dqid_t id)
|
||||
{
|
||||
struct xfs_dqblk *dqblk;
|
||||
struct xfs_disk_dquot *ddq;
|
||||
struct xfs_quotainfo *qi = sc->mp->m_quotainfo;
|
||||
struct xfs_def_quota *defq = xfs_get_defquota(qi, dqtype);
|
||||
struct xfs_buf *bp = NULL;
|
||||
enum xfs_blft buftype = 0;
|
||||
int i;
|
||||
int error;
|
||||
|
||||
error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp, daddr,
|
||||
qi->qi_dqchunklen, 0, &bp, &xfs_dquot_buf_ops);
|
||||
switch (error) {
|
||||
case -EFSBADCRC:
|
||||
case -EFSCORRUPTED:
|
||||
/* Failed verifier, retry read with no ops. */
|
||||
error = xfs_trans_read_buf(sc->mp, sc->tp,
|
||||
sc->mp->m_ddev_targp, daddr, qi->qi_dqchunklen,
|
||||
0, &bp, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
break;
|
||||
case 0:
|
||||
dqblk = bp->b_addr;
|
||||
ddq = &dqblk[0].dd_diskdq;
|
||||
|
||||
/*
|
||||
* If there's nothing that would impede a dqiterate, we're
|
||||
* done.
|
||||
*/
|
||||
if ((ddq->d_type & XFS_DQTYPE_REC_MASK) != dqtype ||
|
||||
id == be32_to_cpu(ddq->d_id)) {
|
||||
xfs_trans_brelse(sc->tp, bp);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Something's wrong with the block, fix the whole thing. */
|
||||
dqblk = bp->b_addr;
|
||||
bp->b_ops = &xfs_dquot_buf_ops;
|
||||
for (i = 0; i < qi->qi_dqperchunk; i++, dqblk++) {
|
||||
ddq = &dqblk->dd_diskdq;
|
||||
|
||||
trace_xrep_disk_dquot(sc->mp, dqtype, id + i);
|
||||
|
||||
ddq->d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
|
||||
ddq->d_version = XFS_DQUOT_VERSION;
|
||||
ddq->d_type = dqtype;
|
||||
ddq->d_id = cpu_to_be32(id + i);
|
||||
|
||||
if (xfs_has_bigtime(sc->mp) && ddq->d_id)
|
||||
ddq->d_type |= XFS_DQTYPE_BIGTIME;
|
||||
|
||||
xrep_quota_fix_timer(sc->mp, ddq, ddq->d_blk_softlimit,
|
||||
ddq->d_bcount, &ddq->d_btimer,
|
||||
defq->blk.time);
|
||||
|
||||
xrep_quota_fix_timer(sc->mp, ddq, ddq->d_ino_softlimit,
|
||||
ddq->d_icount, &ddq->d_itimer,
|
||||
defq->ino.time);
|
||||
|
||||
xrep_quota_fix_timer(sc->mp, ddq, ddq->d_rtb_softlimit,
|
||||
ddq->d_rtbcount, &ddq->d_rtbtimer,
|
||||
defq->rtb.time);
|
||||
|
||||
/* We only support v5 filesystems so always set these. */
|
||||
uuid_copy(&dqblk->dd_uuid, &sc->mp->m_sb.sb_meta_uuid);
|
||||
xfs_update_cksum((char *)dqblk, sizeof(struct xfs_dqblk),
|
||||
XFS_DQUOT_CRC_OFF);
|
||||
dqblk->dd_lsn = 0;
|
||||
}
|
||||
switch (dqtype) {
|
||||
case XFS_DQTYPE_USER:
|
||||
buftype = XFS_BLFT_UDQUOT_BUF;
|
||||
break;
|
||||
case XFS_DQTYPE_GROUP:
|
||||
buftype = XFS_BLFT_GDQUOT_BUF;
|
||||
break;
|
||||
case XFS_DQTYPE_PROJ:
|
||||
buftype = XFS_BLFT_PDQUOT_BUF;
|
||||
break;
|
||||
}
|
||||
xfs_trans_buf_set_type(sc->tp, bp, buftype);
|
||||
xfs_trans_log_buf(sc->tp, bp, 0, BBTOB(bp->b_length) - 1);
|
||||
return xrep_roll_trans(sc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Repair a quota file's data fork. The function returns with the inode
|
||||
* joined.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_quota_data_fork(
|
||||
struct xfs_scrub *sc,
|
||||
xfs_dqtype_t dqtype)
|
||||
{
|
||||
struct xfs_bmbt_irec irec = { 0 };
|
||||
struct xfs_iext_cursor icur;
|
||||
struct xfs_quotainfo *qi = sc->mp->m_quotainfo;
|
||||
struct xfs_ifork *ifp;
|
||||
xfs_fileoff_t max_dqid_off;
|
||||
xfs_fileoff_t off;
|
||||
xfs_fsblock_t fsbno;
|
||||
bool truncate = false;
|
||||
bool joined = false;
|
||||
int error = 0;
|
||||
|
||||
error = xrep_metadata_inode_forks(sc);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
/* Check for data fork problems that apply only to quota files. */
|
||||
max_dqid_off = XFS_DQ_ID_MAX / qi->qi_dqperchunk;
|
||||
ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
|
||||
for_each_xfs_iext(ifp, &icur, &irec) {
|
||||
if (isnullstartblock(irec.br_startblock)) {
|
||||
error = -EFSCORRUPTED;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (irec.br_startoff > max_dqid_off ||
|
||||
irec.br_startoff + irec.br_blockcount - 1 > max_dqid_off) {
|
||||
truncate = true;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Convert unwritten extents to real ones. */
|
||||
if (irec.br_state == XFS_EXT_UNWRITTEN) {
|
||||
struct xfs_bmbt_irec nrec;
|
||||
int nmap = 1;
|
||||
|
||||
if (!joined) {
|
||||
xfs_trans_ijoin(sc->tp, sc->ip, 0);
|
||||
joined = true;
|
||||
}
|
||||
|
||||
error = xfs_bmapi_write(sc->tp, sc->ip,
|
||||
irec.br_startoff, irec.br_blockcount,
|
||||
XFS_BMAPI_CONVERT, 0, &nrec, &nmap);
|
||||
if (error)
|
||||
goto out;
|
||||
if (nmap != 1) {
|
||||
error = -ENOSPC;
|
||||
goto out;
|
||||
}
|
||||
ASSERT(nrec.br_startoff == irec.br_startoff);
|
||||
ASSERT(nrec.br_blockcount == irec.br_blockcount);
|
||||
|
||||
error = xfs_defer_finish(&sc->tp);
|
||||
if (error)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (!joined) {
|
||||
xfs_trans_ijoin(sc->tp, sc->ip, 0);
|
||||
joined = true;
|
||||
}
|
||||
|
||||
if (truncate) {
|
||||
/* Erase everything after the block containing the max dquot */
|
||||
error = xfs_bunmapi_range(&sc->tp, sc->ip, 0,
|
||||
max_dqid_off * sc->mp->m_sb.sb_blocksize,
|
||||
XFS_MAX_FILEOFF);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
/* Remove all CoW reservations. */
|
||||
error = xfs_reflink_cancel_cow_blocks(sc->ip, &sc->tp, 0,
|
||||
XFS_MAX_FILEOFF, true);
|
||||
if (error)
|
||||
goto out;
|
||||
sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
|
||||
|
||||
/*
|
||||
* Always re-log the inode so that our permanent transaction
|
||||
* can keep on rolling it forward in the log.
|
||||
*/
|
||||
xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
|
||||
}
|
||||
|
||||
/* Now go fix anything that fails the verifiers. */
|
||||
for_each_xfs_iext(ifp, &icur, &irec) {
|
||||
for (fsbno = irec.br_startblock, off = irec.br_startoff;
|
||||
fsbno < irec.br_startblock + irec.br_blockcount;
|
||||
fsbno += XFS_DQUOT_CLUSTER_SIZE_FSB,
|
||||
off += XFS_DQUOT_CLUSTER_SIZE_FSB) {
|
||||
error = xrep_quota_block(sc,
|
||||
XFS_FSB_TO_DADDR(sc->mp, fsbno),
|
||||
dqtype, off * qi->qi_dqperchunk);
|
||||
if (error)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Go fix anything in the quota items that we could have been mad about. Now
|
||||
* that we've checked the quota inode data fork we have to drop ILOCK_EXCL to
|
||||
* use the regular dquot functions.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_quota_problems(
|
||||
struct xfs_scrub *sc,
|
||||
xfs_dqtype_t dqtype)
|
||||
{
|
||||
struct xchk_dqiter cursor = { };
|
||||
struct xrep_quota_info rqi = { .sc = sc };
|
||||
struct xfs_dquot *dq;
|
||||
int error;
|
||||
|
||||
xchk_dqiter_init(&cursor, sc, dqtype);
|
||||
while ((error = xchk_dquot_iter(&cursor, &dq)) == 1) {
|
||||
error = xrep_quota_item(&rqi, dq);
|
||||
xfs_qm_dqput(dq);
|
||||
if (error)
|
||||
break;
|
||||
}
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Make a quotacheck happen. */
|
||||
if (rqi.need_quotacheck)
|
||||
xrep_force_quotacheck(sc, dqtype);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Repair all of a quota type's items. */
|
||||
int
|
||||
xrep_quota(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
xfs_dqtype_t dqtype;
|
||||
int error;
|
||||
|
||||
dqtype = xchk_quota_to_dqtype(sc);
|
||||
|
||||
/*
|
||||
* Re-take the ILOCK so that we can fix any problems that we found
|
||||
* with the data fork mappings, or with the dquot bufs themselves.
|
||||
*/
|
||||
if (!(sc->ilock_flags & XFS_ILOCK_EXCL))
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL);
|
||||
error = xrep_quota_data_fork(sc, dqtype);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Finish deferred items and roll the transaction to unjoin the quota
|
||||
* inode from transaction so that we can unlock the quota inode; we
|
||||
* play only with dquots from now on.
|
||||
*/
|
||||
error = xrep_defer_finish(sc);
|
||||
if (error)
|
||||
return error;
|
||||
error = xfs_trans_roll(&sc->tp);
|
||||
if (error)
|
||||
return error;
|
||||
xchk_iunlock(sc, sc->ilock_flags);
|
||||
|
||||
/* Fix anything the dquot verifiers don't complain about. */
|
||||
error = xrep_quota_problems(sc, dqtype);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
return xrep_trans_commit(sc);
|
||||
}
|
@ -36,16 +36,14 @@ xchk_dir_walk_sf(
|
||||
struct xfs_mount *mp = dp->i_mount;
|
||||
struct xfs_da_geometry *geo = mp->m_dir_geo;
|
||||
struct xfs_dir2_sf_entry *sfep;
|
||||
struct xfs_dir2_sf_hdr *sfp;
|
||||
struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data;
|
||||
xfs_ino_t ino;
|
||||
xfs_dir2_dataptr_t dapos;
|
||||
unsigned int i;
|
||||
int error;
|
||||
|
||||
ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
|
||||
ASSERT(dp->i_df.if_u1.if_data != NULL);
|
||||
|
||||
sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data;
|
||||
ASSERT(sfp != NULL);
|
||||
|
||||
/* dot entry */
|
||||
dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "xfs_ialloc_btree.h"
|
||||
#include "xfs_rmap.h"
|
||||
#include "xfs_rmap_btree.h"
|
||||
#include "xfs_refcount.h"
|
||||
#include "xfs_refcount_btree.h"
|
||||
#include "xfs_extent_busy.h"
|
||||
#include "xfs_ag.h"
|
||||
@ -31,11 +32,14 @@
|
||||
#include "xfs_da_btree.h"
|
||||
#include "xfs_attr.h"
|
||||
#include "xfs_attr_remote.h"
|
||||
#include "xfs_defer.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/bitmap.h"
|
||||
#include "scrub/agb_bitmap.h"
|
||||
#include "scrub/fsb_bitmap.h"
|
||||
#include "scrub/reap.h"
|
||||
|
||||
/*
|
||||
@ -73,10 +77,10 @@
|
||||
* with only the same rmap owner but the block is not owned by something with
|
||||
* the same rmap owner, the block will be freed.
|
||||
*
|
||||
* The caller is responsible for locking the AG headers for the entire rebuild
|
||||
* operation so that nothing else can sneak in and change the AG state while
|
||||
* we're not looking. We must also invalidate any buffers associated with
|
||||
* @bitmap.
|
||||
* The caller is responsible for locking the AG headers/inode for the entire
|
||||
* rebuild operation so that nothing else can sneak in and change the incore
|
||||
* state while we're not looking. We must also invalidate any buffers
|
||||
* associated with @bitmap.
|
||||
*/
|
||||
|
||||
/* Information about reaping extents after a repair. */
|
||||
@ -247,7 +251,7 @@ xreap_agextent_binval(
|
||||
max_fsbs = min_t(xfs_agblock_t, agbno_next - bno,
|
||||
xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX));
|
||||
|
||||
for (fsbcount = 1; fsbcount < max_fsbs; fsbcount++) {
|
||||
for (fsbcount = 1; fsbcount <= max_fsbs; fsbcount++) {
|
||||
struct xfs_buf *bp = NULL;
|
||||
xfs_daddr_t daddr;
|
||||
int error;
|
||||
@ -377,6 +381,17 @@ xreap_agextent_iter(
|
||||
trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp);
|
||||
|
||||
rs->force_roll = true;
|
||||
|
||||
if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
|
||||
/*
|
||||
* If we're unmapping CoW staging extents, remove the
|
||||
* records from the refcountbt, which will remove the
|
||||
* rmap record as well.
|
||||
*/
|
||||
xfs_refcount_free_cow_extent(sc->tp, fsbno, *aglenp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
|
||||
*aglenp, rs->oinfo);
|
||||
}
|
||||
@ -395,6 +410,26 @@ xreap_agextent_iter(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're getting rid of CoW staging extents, use deferred work items
|
||||
* to remove the refcountbt records (which removes the rmap records)
|
||||
* and free the extent. We're not worried about the system going down
|
||||
* here because log recovery walks the refcount btree to clean out the
|
||||
* CoW staging extents.
|
||||
*/
|
||||
if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
|
||||
ASSERT(rs->resv == XFS_AG_RESV_NONE);
|
||||
|
||||
xfs_refcount_free_cow_extent(sc->tp, fsbno, *aglenp);
|
||||
error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, NULL,
|
||||
rs->resv, true);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
rs->force_roll = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Put blocks back on the AGFL one at a time. */
|
||||
if (rs->resv == XFS_AG_RESV_AGFL) {
|
||||
ASSERT(*aglenp == 1);
|
||||
@ -409,13 +444,17 @@ xreap_agextent_iter(
|
||||
/*
|
||||
* Use deferred frees to get rid of the old btree blocks to try to
|
||||
* minimize the window in which we could crash and lose the old blocks.
|
||||
* Add a defer ops barrier every other extent to avoid stressing the
|
||||
* system with large EFIs.
|
||||
*/
|
||||
error = __xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
|
||||
error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
|
||||
rs->resv, true);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
rs->deferred++;
|
||||
if (rs->deferred % 2 == 0)
|
||||
xfs_defer_add_barrier(sc->tp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -425,13 +464,12 @@ xreap_agextent_iter(
|
||||
*/
|
||||
STATIC int
|
||||
xreap_agmeta_extent(
|
||||
uint64_t fsbno,
|
||||
uint64_t len,
|
||||
uint32_t agbno,
|
||||
uint32_t len,
|
||||
void *priv)
|
||||
{
|
||||
struct xreap_state *rs = priv;
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
xfs_agblock_t agbno = fsbno;
|
||||
xfs_agblock_t agbno_next = agbno + len;
|
||||
int error = 0;
|
||||
|
||||
@ -496,3 +534,115 @@ xrep_reap_agblocks(
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Break a file metadata extent into sub-extents by fate (crosslinked, not
|
||||
* crosslinked), and dispose of each sub-extent separately. The extent must
|
||||
* not cross an AG boundary.
|
||||
*/
|
||||
STATIC int
|
||||
xreap_fsmeta_extent(
|
||||
uint64_t fsbno,
|
||||
uint64_t len,
|
||||
void *priv)
|
||||
{
|
||||
struct xreap_state *rs = priv;
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
xfs_agnumber_t agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
|
||||
xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
|
||||
xfs_agblock_t agbno_next = agbno + len;
|
||||
int error = 0;
|
||||
|
||||
ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
|
||||
ASSERT(sc->ip != NULL);
|
||||
ASSERT(!sc->sa.pag);
|
||||
|
||||
/*
|
||||
* We're reaping blocks after repairing file metadata, which means that
|
||||
* we have to init the xchk_ag structure ourselves.
|
||||
*/
|
||||
sc->sa.pag = xfs_perag_get(sc->mp, agno);
|
||||
if (!sc->sa.pag)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp);
|
||||
if (error)
|
||||
goto out_pag;
|
||||
|
||||
while (agbno < agbno_next) {
|
||||
xfs_extlen_t aglen;
|
||||
bool crosslinked;
|
||||
|
||||
error = xreap_agextent_select(rs, agbno, agbno_next,
|
||||
&crosslinked, &aglen);
|
||||
if (error)
|
||||
goto out_agf;
|
||||
|
||||
error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
|
||||
if (error)
|
||||
goto out_agf;
|
||||
|
||||
if (xreap_want_defer_finish(rs)) {
|
||||
/*
|
||||
* Holds the AGF buffer across the deferred chain
|
||||
* processing.
|
||||
*/
|
||||
error = xrep_defer_finish(sc);
|
||||
if (error)
|
||||
goto out_agf;
|
||||
xreap_defer_finish_reset(rs);
|
||||
} else if (xreap_want_roll(rs)) {
|
||||
/*
|
||||
* Hold the AGF buffer across the transaction roll so
|
||||
* that we don't have to reattach it to the scrub
|
||||
* context.
|
||||
*/
|
||||
xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
|
||||
error = xfs_trans_roll_inode(&sc->tp, sc->ip);
|
||||
xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
|
||||
if (error)
|
||||
goto out_agf;
|
||||
xreap_reset(rs);
|
||||
}
|
||||
|
||||
agbno += aglen;
|
||||
}
|
||||
|
||||
out_agf:
|
||||
xfs_trans_brelse(sc->tp, sc->sa.agf_bp);
|
||||
sc->sa.agf_bp = NULL;
|
||||
out_pag:
|
||||
xfs_perag_put(sc->sa.pag);
|
||||
sc->sa.pag = NULL;
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dispose of every block of every fs metadata extent in the bitmap.
|
||||
* Do not use this to dispose of the mappings in an ondisk inode fork.
|
||||
*/
|
||||
int
|
||||
xrep_reap_fsblocks(
|
||||
struct xfs_scrub *sc,
|
||||
struct xfsb_bitmap *bitmap,
|
||||
const struct xfs_owner_info *oinfo)
|
||||
{
|
||||
struct xreap_state rs = {
|
||||
.sc = sc,
|
||||
.oinfo = oinfo,
|
||||
.resv = XFS_AG_RESV_NONE,
|
||||
};
|
||||
int error;
|
||||
|
||||
ASSERT(xfs_has_rmapbt(sc->mp));
|
||||
ASSERT(sc->ip != NULL);
|
||||
|
||||
error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (xreap_dirty(&rs))
|
||||
return xrep_defer_finish(sc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -6,7 +6,12 @@
|
||||
#ifndef __XFS_SCRUB_REAP_H__
|
||||
#define __XFS_SCRUB_REAP_H__
|
||||
|
||||
struct xagb_bitmap;
|
||||
struct xfsb_bitmap;
|
||||
|
||||
int xrep_reap_agblocks(struct xfs_scrub *sc, struct xagb_bitmap *bitmap,
|
||||
const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
|
||||
int xrep_reap_fsblocks(struct xfs_scrub *sc, struct xfsb_bitmap *bitmap,
|
||||
const struct xfs_owner_info *oinfo);
|
||||
|
||||
#endif /* __XFS_SCRUB_REAP_H__ */
|
||||
|
@ -441,7 +441,7 @@ xchk_refcountbt_rec(
|
||||
struct xchk_refcbt_records *rrc = bs->private;
|
||||
|
||||
xfs_refcount_btrec_to_irec(rec, &irec);
|
||||
if (xfs_refcount_check_irec(bs->cur, &irec) != NULL) {
|
||||
if (xfs_refcount_check_irec(bs->cur->bc_ag.pag, &irec) != NULL) {
|
||||
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
|
||||
return 0;
|
||||
}
|
||||
|
794
fs/xfs/scrub/refcount_repair.c
Normal file
794
fs/xfs/scrub/refcount_repair.c
Normal file
@ -0,0 +1,794 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_defer.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_btree_staging.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_ialloc.h"
|
||||
#include "xfs_rmap.h"
|
||||
#include "xfs_rmap_btree.h"
|
||||
#include "xfs_refcount.h"
|
||||
#include "xfs_refcount_btree.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "scrub/xfs_scrub.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/btree.h"
|
||||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/bitmap.h"
|
||||
#include "scrub/agb_bitmap.h"
|
||||
#include "scrub/xfile.h"
|
||||
#include "scrub/xfarray.h"
|
||||
#include "scrub/newbt.h"
|
||||
#include "scrub/reap.h"
|
||||
|
||||
/*
|
||||
* Rebuilding the Reference Count Btree
|
||||
* ====================================
|
||||
*
|
||||
* This algorithm is "borrowed" from xfs_repair. Imagine the rmap
|
||||
* entries as rectangles representing extents of physical blocks, and
|
||||
* that the rectangles can be laid down to allow them to overlap each
|
||||
* other; then we know that we must emit a refcnt btree entry wherever
|
||||
* the amount of overlap changes, i.e. the emission stimulus is
|
||||
* level-triggered:
|
||||
*
|
||||
* - ---
|
||||
* -- ----- ---- --- ------
|
||||
* -- ---- ----------- ---- ---------
|
||||
* -------------------------------- -----------
|
||||
* ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
|
||||
* 2 1 23 21 3 43 234 2123 1 01 2 3 0
|
||||
*
|
||||
* For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
|
||||
*
|
||||
* Note that in the actual refcnt btree we don't store the refcount < 2
|
||||
* cases because the bnobt tells us which blocks are free; single-use
|
||||
* blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
|
||||
* supports storing multiple entries covering a given block we could
|
||||
* theoretically dispense with the refcntbt and simply count rmaps, but
|
||||
* that's inefficient in the (hot) write path, so we'll take the cost of
|
||||
* the extra tree to save time. Also there's no guarantee that rmap
|
||||
* will be enabled.
|
||||
*
|
||||
* Given an array of rmaps sorted by physical block number, a starting
|
||||
* physical block (sp), a bag to hold rmaps that cover sp, and the next
|
||||
* physical block where the level changes (np), we can reconstruct the
|
||||
* refcount btree as follows:
|
||||
*
|
||||
* While there are still unprocessed rmaps in the array,
|
||||
* - Set sp to the physical block (pblk) of the next unprocessed rmap.
|
||||
* - Add to the bag all rmaps in the array where startblock == sp.
|
||||
* - Set np to the physical block where the bag size will change. This
|
||||
* is the minimum of (the pblk of the next unprocessed rmap) and
|
||||
* (startblock + len of each rmap in the bag).
|
||||
* - Record the bag size as old_bag_size.
|
||||
*
|
||||
* - While the bag isn't empty,
|
||||
* - Remove from the bag all rmaps where startblock + len == np.
|
||||
* - Add to the bag all rmaps in the array where startblock == np.
|
||||
* - If the bag size isn't old_bag_size, store the refcount entry
|
||||
* (sp, np - sp, bag_size) in the refcnt btree.
|
||||
* - If the bag is empty, break out of the inner loop.
|
||||
* - Set old_bag_size to the bag size
|
||||
* - Set sp = np.
|
||||
* - Set np to the physical block where the bag size will change.
|
||||
* This is the minimum of (the pblk of the next unprocessed rmap)
|
||||
* and (startblock + len of each rmap in the bag).
|
||||
*
|
||||
* Like all the other repairers, we make a list of all the refcount
|
||||
* records we need, then reinitialize the refcount btree root and
|
||||
* insert all the records.
|
||||
*/
|
||||
|
||||
/* The only parts of the rmap that we care about for computing refcounts. */
|
||||
struct xrep_refc_rmap {
|
||||
xfs_agblock_t startblock;
|
||||
xfs_extlen_t blockcount;
|
||||
} __packed;
|
||||
|
||||
struct xrep_refc {
|
||||
/* refcount extents */
|
||||
struct xfarray *refcount_records;
|
||||
|
||||
/* new refcountbt information */
|
||||
struct xrep_newbt new_btree;
|
||||
|
||||
/* old refcountbt blocks */
|
||||
struct xagb_bitmap old_refcountbt_blocks;
|
||||
|
||||
struct xfs_scrub *sc;
|
||||
|
||||
/* get_records()'s position in the refcount record array. */
|
||||
xfarray_idx_t array_cur;
|
||||
|
||||
/* # of refcountbt blocks */
|
||||
xfs_extlen_t btblocks;
|
||||
};
|
||||
|
||||
/* Check for any obvious conflicts with this shared/CoW staging extent. */
|
||||
STATIC int
|
||||
xrep_refc_check_ext(
|
||||
struct xfs_scrub *sc,
|
||||
const struct xfs_refcount_irec *rec)
|
||||
{
|
||||
enum xbtree_recpacking outcome;
|
||||
int error;
|
||||
|
||||
if (xfs_refcount_check_irec(sc->sa.pag, rec) != NULL)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* Make sure this isn't free space. */
|
||||
error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rc_startblock,
|
||||
rec->rc_blockcount, &outcome);
|
||||
if (error)
|
||||
return error;
|
||||
if (outcome != XBTREE_RECPACKING_EMPTY)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* Must not be an inode chunk. */
|
||||
error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
|
||||
rec->rc_startblock, rec->rc_blockcount, &outcome);
|
||||
if (error)
|
||||
return error;
|
||||
if (outcome != XBTREE_RECPACKING_EMPTY)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Record a reference count extent. */
|
||||
STATIC int
|
||||
xrep_refc_stash(
|
||||
struct xrep_refc *rr,
|
||||
enum xfs_refc_domain domain,
|
||||
xfs_agblock_t agbno,
|
||||
xfs_extlen_t len,
|
||||
uint64_t refcount)
|
||||
{
|
||||
struct xfs_refcount_irec irec = {
|
||||
.rc_startblock = agbno,
|
||||
.rc_blockcount = len,
|
||||
.rc_domain = domain,
|
||||
};
|
||||
struct xfs_scrub *sc = rr->sc;
|
||||
int error = 0;
|
||||
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
return error;
|
||||
|
||||
irec.rc_refcount = min_t(uint64_t, MAXREFCOUNT, refcount);
|
||||
|
||||
error = xrep_refc_check_ext(rr->sc, &irec);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
trace_xrep_refc_found(sc->sa.pag, &irec);
|
||||
|
||||
return xfarray_append(rr->refcount_records, &irec);
|
||||
}
|
||||
|
||||
/* Record a CoW staging extent. */
|
||||
STATIC int
|
||||
xrep_refc_stash_cow(
|
||||
struct xrep_refc *rr,
|
||||
xfs_agblock_t agbno,
|
||||
xfs_extlen_t len)
|
||||
{
|
||||
return xrep_refc_stash(rr, XFS_REFC_DOMAIN_COW, agbno, len, 1);
|
||||
}
|
||||
|
||||
/* Decide if an rmap could describe a shared extent. */
|
||||
static inline bool
|
||||
xrep_refc_rmap_shareable(
|
||||
struct xfs_mount *mp,
|
||||
const struct xfs_rmap_irec *rmap)
|
||||
{
|
||||
/* AG metadata are never sharable */
|
||||
if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
|
||||
return false;
|
||||
|
||||
/* Metadata in files are never shareable */
|
||||
if (xfs_internal_inum(mp, rmap->rm_owner))
|
||||
return false;
|
||||
|
||||
/* Metadata and unwritten file blocks are not shareable. */
|
||||
if (rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
|
||||
XFS_RMAP_UNWRITTEN))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk along the reverse mapping records until we find one that could describe
|
||||
* a shared extent.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_refc_walk_rmaps(
|
||||
struct xrep_refc *rr,
|
||||
struct xrep_refc_rmap *rrm,
|
||||
bool *have_rec)
|
||||
{
|
||||
struct xfs_rmap_irec rmap;
|
||||
struct xfs_btree_cur *cur = rr->sc->sa.rmap_cur;
|
||||
struct xfs_mount *mp = cur->bc_mp;
|
||||
int have_gt;
|
||||
int error = 0;
|
||||
|
||||
*have_rec = false;
|
||||
|
||||
/*
|
||||
* Loop through the remaining rmaps. Remember CoW staging
|
||||
* extents and the refcountbt blocks from the old tree for later
|
||||
* disposal. We can only share written data fork extents, so
|
||||
* keep looping until we find an rmap for one.
|
||||
*/
|
||||
do {
|
||||
if (xchk_should_terminate(rr->sc, &error))
|
||||
return error;
|
||||
|
||||
error = xfs_btree_increment(cur, 0, &have_gt);
|
||||
if (error)
|
||||
return error;
|
||||
if (!have_gt)
|
||||
return 0;
|
||||
|
||||
error = xfs_rmap_get_rec(cur, &rmap, &have_gt);
|
||||
if (error)
|
||||
return error;
|
||||
if (XFS_IS_CORRUPT(mp, !have_gt))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
if (rmap.rm_owner == XFS_RMAP_OWN_COW) {
|
||||
error = xrep_refc_stash_cow(rr, rmap.rm_startblock,
|
||||
rmap.rm_blockcount);
|
||||
if (error)
|
||||
return error;
|
||||
} else if (rmap.rm_owner == XFS_RMAP_OWN_REFC) {
|
||||
/* refcountbt block, dump it when we're done. */
|
||||
rr->btblocks += rmap.rm_blockcount;
|
||||
error = xagb_bitmap_set(&rr->old_refcountbt_blocks,
|
||||
rmap.rm_startblock, rmap.rm_blockcount);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
} while (!xrep_refc_rmap_shareable(mp, &rmap));
|
||||
|
||||
rrm->startblock = rmap.rm_startblock;
|
||||
rrm->blockcount = rmap.rm_blockcount;
|
||||
*have_rec = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
xrep_refc_encode_startblock(
|
||||
const struct xfs_refcount_irec *irec)
|
||||
{
|
||||
uint32_t start;
|
||||
|
||||
start = irec->rc_startblock & ~XFS_REFC_COWFLAG;
|
||||
if (irec->rc_domain == XFS_REFC_DOMAIN_COW)
|
||||
start |= XFS_REFC_COWFLAG;
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
/* Sort in the same order as the ondisk records. */
|
||||
static int
|
||||
xrep_refc_extent_cmp(
|
||||
const void *a,
|
||||
const void *b)
|
||||
{
|
||||
const struct xfs_refcount_irec *ap = a;
|
||||
const struct xfs_refcount_irec *bp = b;
|
||||
uint32_t sa, sb;
|
||||
|
||||
sa = xrep_refc_encode_startblock(ap);
|
||||
sb = xrep_refc_encode_startblock(bp);
|
||||
|
||||
if (sa > sb)
|
||||
return 1;
|
||||
if (sa < sb)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sort the refcount extents by startblock or else the btree records will be in
|
||||
* the wrong order. Make sure the records do not overlap in physical space.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_refc_sort_records(
|
||||
struct xrep_refc *rr)
|
||||
{
|
||||
struct xfs_refcount_irec irec;
|
||||
xfarray_idx_t cur;
|
||||
enum xfs_refc_domain dom = XFS_REFC_DOMAIN_SHARED;
|
||||
xfs_agblock_t next_agbno = 0;
|
||||
int error;
|
||||
|
||||
error = xfarray_sort(rr->refcount_records, xrep_refc_extent_cmp,
|
||||
XFARRAY_SORT_KILLABLE);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
foreach_xfarray_idx(rr->refcount_records, cur) {
|
||||
if (xchk_should_terminate(rr->sc, &error))
|
||||
return error;
|
||||
|
||||
error = xfarray_load(rr->refcount_records, cur, &irec);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (dom == XFS_REFC_DOMAIN_SHARED &&
|
||||
irec.rc_domain == XFS_REFC_DOMAIN_COW) {
|
||||
dom = irec.rc_domain;
|
||||
next_agbno = 0;
|
||||
}
|
||||
|
||||
if (dom != irec.rc_domain)
|
||||
return -EFSCORRUPTED;
|
||||
if (irec.rc_startblock < next_agbno)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
next_agbno = irec.rc_startblock + irec.rc_blockcount;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
#define RRM_NEXT(r) ((r).startblock + (r).blockcount)
|
||||
/*
|
||||
* Find the next block where the refcount changes, given the next rmap we
|
||||
* looked at and the ones we're already tracking.
|
||||
*/
|
||||
static inline int
|
||||
xrep_refc_next_edge(
|
||||
struct xfarray *rmap_bag,
|
||||
struct xrep_refc_rmap *next_rrm,
|
||||
bool next_valid,
|
||||
xfs_agblock_t *nbnop)
|
||||
{
|
||||
struct xrep_refc_rmap rrm;
|
||||
xfarray_idx_t array_cur = XFARRAY_CURSOR_INIT;
|
||||
xfs_agblock_t nbno = NULLAGBLOCK;
|
||||
int error;
|
||||
|
||||
if (next_valid)
|
||||
nbno = next_rrm->startblock;
|
||||
|
||||
while ((error = xfarray_iter(rmap_bag, &array_cur, &rrm)) == 1)
|
||||
nbno = min_t(xfs_agblock_t, nbno, RRM_NEXT(rrm));
|
||||
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* We should have found /something/ because either next_rrm is the next
|
||||
* interesting rmap to look at after emitting this refcount extent, or
|
||||
* there are other rmaps in rmap_bag contributing to the current
|
||||
* sharing count. But if something is seriously wrong, bail out.
|
||||
*/
|
||||
if (nbno == NULLAGBLOCK)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
*nbnop = nbno;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk forward through the rmap btree to collect all rmaps starting at
|
||||
* @bno in @rmap_bag. These represent the file(s) that share ownership of
|
||||
* the current block. Upon return, the rmap cursor points to the last record
|
||||
* satisfying the startblock constraint.
|
||||
*/
|
||||
static int
|
||||
xrep_refc_push_rmaps_at(
|
||||
struct xrep_refc *rr,
|
||||
struct xfarray *rmap_bag,
|
||||
xfs_agblock_t bno,
|
||||
struct xrep_refc_rmap *rrm,
|
||||
bool *have,
|
||||
uint64_t *stack_sz)
|
||||
{
|
||||
struct xfs_scrub *sc = rr->sc;
|
||||
int have_gt;
|
||||
int error;
|
||||
|
||||
while (*have && rrm->startblock == bno) {
|
||||
error = xfarray_store_anywhere(rmap_bag, rrm);
|
||||
if (error)
|
||||
return error;
|
||||
(*stack_sz)++;
|
||||
error = xrep_refc_walk_rmaps(rr, rrm, have);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
error = xfs_btree_decrement(sc->sa.rmap_cur, 0, &have_gt);
|
||||
if (error)
|
||||
return error;
|
||||
if (XFS_IS_CORRUPT(sc->mp, !have_gt))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Iterate all the rmap records to generate reference count data. */
|
||||
STATIC int
|
||||
xrep_refc_find_refcounts(
|
||||
struct xrep_refc *rr)
|
||||
{
|
||||
struct xrep_refc_rmap rrm;
|
||||
struct xfs_scrub *sc = rr->sc;
|
||||
struct xfarray *rmap_bag;
|
||||
char *descr;
|
||||
uint64_t old_stack_sz;
|
||||
uint64_t stack_sz = 0;
|
||||
xfs_agblock_t sbno;
|
||||
xfs_agblock_t cbno;
|
||||
xfs_agblock_t nbno;
|
||||
bool have;
|
||||
int error;
|
||||
|
||||
xrep_ag_btcur_init(sc, &sc->sa);
|
||||
|
||||
/*
|
||||
* Set up a sparse array to store all the rmap records that we're
|
||||
* tracking to generate a reference count record. If this exceeds
|
||||
* MAXREFCOUNT, we clamp rc_refcount.
|
||||
*/
|
||||
descr = xchk_xfile_ag_descr(sc, "rmap record bag");
|
||||
error = xfarray_create(descr, 0, sizeof(struct xrep_refc_rmap),
|
||||
&rmap_bag);
|
||||
kfree(descr);
|
||||
if (error)
|
||||
goto out_cur;
|
||||
|
||||
/* Start the rmapbt cursor to the left of all records. */
|
||||
error = xfs_btree_goto_left_edge(sc->sa.rmap_cur);
|
||||
if (error)
|
||||
goto out_bag;
|
||||
|
||||
/* Process reverse mappings into refcount data. */
|
||||
while (xfs_btree_has_more_records(sc->sa.rmap_cur)) {
|
||||
/* Push all rmaps with pblk == sbno onto the stack */
|
||||
error = xrep_refc_walk_rmaps(rr, &rrm, &have);
|
||||
if (error)
|
||||
goto out_bag;
|
||||
if (!have)
|
||||
break;
|
||||
sbno = cbno = rrm.startblock;
|
||||
error = xrep_refc_push_rmaps_at(rr, rmap_bag, sbno,
|
||||
&rrm, &have, &stack_sz);
|
||||
if (error)
|
||||
goto out_bag;
|
||||
|
||||
/* Set nbno to the bno of the next refcount change */
|
||||
error = xrep_refc_next_edge(rmap_bag, &rrm, have, &nbno);
|
||||
if (error)
|
||||
goto out_bag;
|
||||
|
||||
ASSERT(nbno > sbno);
|
||||
old_stack_sz = stack_sz;
|
||||
|
||||
/* While stack isn't empty... */
|
||||
while (stack_sz) {
|
||||
xfarray_idx_t array_cur = XFARRAY_CURSOR_INIT;
|
||||
|
||||
/* Pop all rmaps that end at nbno */
|
||||
while ((error = xfarray_iter(rmap_bag, &array_cur,
|
||||
&rrm)) == 1) {
|
||||
if (RRM_NEXT(rrm) != nbno)
|
||||
continue;
|
||||
error = xfarray_unset(rmap_bag, array_cur - 1);
|
||||
if (error)
|
||||
goto out_bag;
|
||||
stack_sz--;
|
||||
}
|
||||
if (error)
|
||||
goto out_bag;
|
||||
|
||||
/* Push array items that start at nbno */
|
||||
error = xrep_refc_walk_rmaps(rr, &rrm, &have);
|
||||
if (error)
|
||||
goto out_bag;
|
||||
if (have) {
|
||||
error = xrep_refc_push_rmaps_at(rr, rmap_bag,
|
||||
nbno, &rrm, &have, &stack_sz);
|
||||
if (error)
|
||||
goto out_bag;
|
||||
}
|
||||
|
||||
/* Emit refcount if necessary */
|
||||
ASSERT(nbno > cbno);
|
||||
if (stack_sz != old_stack_sz) {
|
||||
if (old_stack_sz > 1) {
|
||||
error = xrep_refc_stash(rr,
|
||||
XFS_REFC_DOMAIN_SHARED,
|
||||
cbno, nbno - cbno,
|
||||
old_stack_sz);
|
||||
if (error)
|
||||
goto out_bag;
|
||||
}
|
||||
cbno = nbno;
|
||||
}
|
||||
|
||||
/* Stack empty, go find the next rmap */
|
||||
if (stack_sz == 0)
|
||||
break;
|
||||
old_stack_sz = stack_sz;
|
||||
sbno = nbno;
|
||||
|
||||
/* Set nbno to the bno of the next refcount change */
|
||||
error = xrep_refc_next_edge(rmap_bag, &rrm, have,
|
||||
&nbno);
|
||||
if (error)
|
||||
goto out_bag;
|
||||
|
||||
ASSERT(nbno > sbno);
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(stack_sz == 0);
|
||||
out_bag:
|
||||
xfarray_destroy(rmap_bag);
|
||||
out_cur:
|
||||
xchk_ag_btcur_free(&sc->sa);
|
||||
return error;
|
||||
}
|
||||
#undef RRM_NEXT
|
||||
|
||||
/* Retrieve refcountbt data for bulk load. */
|
||||
STATIC int
|
||||
xrep_refc_get_records(
|
||||
struct xfs_btree_cur *cur,
|
||||
unsigned int idx,
|
||||
struct xfs_btree_block *block,
|
||||
unsigned int nr_wanted,
|
||||
void *priv)
|
||||
{
|
||||
struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
|
||||
struct xrep_refc *rr = priv;
|
||||
union xfs_btree_rec *block_rec;
|
||||
unsigned int loaded;
|
||||
int error;
|
||||
|
||||
for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
|
||||
error = xfarray_load(rr->refcount_records, rr->array_cur++,
|
||||
irec);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
block_rec = xfs_btree_rec_addr(cur, idx, block);
|
||||
cur->bc_ops->init_rec_from_cur(cur, block_rec);
|
||||
}
|
||||
|
||||
return loaded;
|
||||
}
|
||||
|
||||
/* Feed one of the new btree blocks to the bulk loader. */
|
||||
STATIC int
|
||||
xrep_refc_claim_block(
|
||||
struct xfs_btree_cur *cur,
|
||||
union xfs_btree_ptr *ptr,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_refc *rr = priv;
|
||||
|
||||
return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
|
||||
}
|
||||
|
||||
/* Update the AGF counters. */
|
||||
STATIC int
|
||||
xrep_refc_reset_counters(
|
||||
struct xrep_refc *rr)
|
||||
{
|
||||
struct xfs_scrub *sc = rr->sc;
|
||||
struct xfs_perag *pag = sc->sa.pag;
|
||||
|
||||
/*
|
||||
* After we commit the new btree to disk, it is possible that the
|
||||
* process to reap the old btree blocks will race with the AIL trying
|
||||
* to checkpoint the old btree blocks into the filesystem. If the new
|
||||
* tree is shorter than the old one, the refcountbt write verifier will
|
||||
* fail and the AIL will shut down the filesystem.
|
||||
*
|
||||
* To avoid this, save the old incore btree height values as the alt
|
||||
* height values before re-initializing the perag info from the updated
|
||||
* AGF to capture all the new values.
|
||||
*/
|
||||
pag->pagf_repair_refcount_level = pag->pagf_refcount_level;
|
||||
|
||||
/* Reinitialize with the values we just logged. */
|
||||
return xrep_reinit_pagf(sc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the collected refcount information to stage a new refcount btree. If
|
||||
* this is successful we'll return with the new btree root information logged
|
||||
* to the repair transaction but not yet committed.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_refc_build_new_tree(
|
||||
struct xrep_refc *rr)
|
||||
{
|
||||
struct xfs_scrub *sc = rr->sc;
|
||||
struct xfs_btree_cur *refc_cur;
|
||||
struct xfs_perag *pag = sc->sa.pag;
|
||||
xfs_fsblock_t fsbno;
|
||||
int error;
|
||||
|
||||
error = xrep_refc_sort_records(rr);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Prepare to construct the new btree by reserving disk space for the
|
||||
* new btree and setting up all the accounting information we'll need
|
||||
* to root the new btree while it's under construction and before we
|
||||
* attach it to the AG header.
|
||||
*/
|
||||
fsbno = XFS_AGB_TO_FSB(sc->mp, pag->pag_agno, xfs_refc_block(sc->mp));
|
||||
xrep_newbt_init_ag(&rr->new_btree, sc, &XFS_RMAP_OINFO_REFC, fsbno,
|
||||
XFS_AG_RESV_METADATA);
|
||||
rr->new_btree.bload.get_records = xrep_refc_get_records;
|
||||
rr->new_btree.bload.claim_block = xrep_refc_claim_block;
|
||||
|
||||
/* Compute how many blocks we'll need. */
|
||||
refc_cur = xfs_refcountbt_stage_cursor(sc->mp, &rr->new_btree.afake,
|
||||
pag);
|
||||
error = xfs_btree_bload_compute_geometry(refc_cur,
|
||||
&rr->new_btree.bload,
|
||||
xfarray_length(rr->refcount_records));
|
||||
if (error)
|
||||
goto err_cur;
|
||||
|
||||
/* Last chance to abort before we start committing fixes. */
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
goto err_cur;
|
||||
|
||||
/* Reserve the space we'll need for the new btree. */
|
||||
error = xrep_newbt_alloc_blocks(&rr->new_btree,
|
||||
rr->new_btree.bload.nr_blocks);
|
||||
if (error)
|
||||
goto err_cur;
|
||||
|
||||
/*
|
||||
* Due to btree slack factors, it's possible for a new btree to be one
|
||||
* level taller than the old btree. Update the incore btree height so
|
||||
* that we don't trip the verifiers when writing the new btree blocks
|
||||
* to disk.
|
||||
*/
|
||||
pag->pagf_repair_refcount_level = rr->new_btree.bload.btree_height;
|
||||
|
||||
/* Add all observed refcount records. */
|
||||
rr->array_cur = XFARRAY_CURSOR_INIT;
|
||||
error = xfs_btree_bload(refc_cur, &rr->new_btree.bload, rr);
|
||||
if (error)
|
||||
goto err_level;
|
||||
|
||||
/*
|
||||
* Install the new btree in the AG header. After this point the old
|
||||
* btree is no longer accessible and the new tree is live.
|
||||
*/
|
||||
xfs_refcountbt_commit_staged_btree(refc_cur, sc->tp, sc->sa.agf_bp);
|
||||
xfs_btree_del_cursor(refc_cur, 0);
|
||||
|
||||
/* Reset the AGF counters now that we've changed the btree shape. */
|
||||
error = xrep_refc_reset_counters(rr);
|
||||
if (error)
|
||||
goto err_newbt;
|
||||
|
||||
/* Dispose of any unused blocks and the accounting information. */
|
||||
error = xrep_newbt_commit(&rr->new_btree);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
return xrep_roll_ag_trans(sc);
|
||||
|
||||
err_level:
|
||||
pag->pagf_repair_refcount_level = 0;
|
||||
err_cur:
|
||||
xfs_btree_del_cursor(refc_cur, error);
|
||||
err_newbt:
|
||||
xrep_newbt_cancel(&rr->new_btree);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that we've logged the roots of the new btrees, invalidate all of the
|
||||
* old blocks and free them.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_refc_remove_old_tree(
|
||||
struct xrep_refc *rr)
|
||||
{
|
||||
struct xfs_scrub *sc = rr->sc;
|
||||
struct xfs_perag *pag = sc->sa.pag;
|
||||
int error;
|
||||
|
||||
/* Free the old refcountbt blocks if they're not in use. */
|
||||
error = xrep_reap_agblocks(sc, &rr->old_refcountbt_blocks,
|
||||
&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Now that we've zapped all the old refcountbt blocks we can turn off
|
||||
* the alternate height mechanism and reset the per-AG space
|
||||
* reservations.
|
||||
*/
|
||||
pag->pagf_repair_refcount_level = 0;
|
||||
sc->flags |= XREP_RESET_PERAG_RESV;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Rebuild the refcount btree. */
|
||||
int
|
||||
xrep_refcountbt(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xrep_refc *rr;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
char *descr;
|
||||
int error;
|
||||
|
||||
/* We require the rmapbt to rebuild anything. */
|
||||
if (!xfs_has_rmapbt(mp))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
rr = kzalloc(sizeof(struct xrep_refc), XCHK_GFP_FLAGS);
|
||||
if (!rr)
|
||||
return -ENOMEM;
|
||||
rr->sc = sc;
|
||||
|
||||
/* Set up enough storage to handle one refcount record per block. */
|
||||
descr = xchk_xfile_ag_descr(sc, "reference count records");
|
||||
error = xfarray_create(descr, mp->m_sb.sb_agblocks,
|
||||
sizeof(struct xfs_refcount_irec),
|
||||
&rr->refcount_records);
|
||||
kfree(descr);
|
||||
if (error)
|
||||
goto out_rr;
|
||||
|
||||
/* Collect all reference counts. */
|
||||
xagb_bitmap_init(&rr->old_refcountbt_blocks);
|
||||
error = xrep_refc_find_refcounts(rr);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
|
||||
/* Rebuild the refcount information. */
|
||||
error = xrep_refc_build_new_tree(rr);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
|
||||
/* Kill the old tree. */
|
||||
error = xrep_refc_remove_old_tree(rr);
|
||||
if (error)
|
||||
goto out_bitmap;
|
||||
|
||||
out_bitmap:
|
||||
xagb_bitmap_destroy(&rr->old_refcountbt_blocks);
|
||||
xfarray_destroy(rr->refcount_records);
|
||||
out_rr:
|
||||
kfree(rr);
|
||||
return error;
|
||||
}
|
@ -27,6 +27,9 @@
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_qm.h"
|
||||
#include "xfs_defer.h"
|
||||
#include "xfs_errortag.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_reflink.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/trace.h"
|
||||
@ -176,6 +179,16 @@ xrep_roll_ag_trans(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Roll the scrub transaction, holding the primary metadata locked. */
|
||||
int
|
||||
xrep_roll_trans(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
if (!sc->ip)
|
||||
return xrep_roll_ag_trans(sc);
|
||||
return xfs_trans_roll_inode(&sc->tp, sc->ip);
|
||||
}
|
||||
|
||||
/* Finish all deferred work attached to the repair transaction. */
|
||||
int
|
||||
xrep_defer_finish(
|
||||
@ -673,6 +686,7 @@ xrep_find_ag_btree_roots(
|
||||
return error;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_XFS_QUOTA
|
||||
/* Force a quotacheck the next time we mount. */
|
||||
void
|
||||
xrep_force_quotacheck(
|
||||
@ -699,10 +713,10 @@ xrep_force_quotacheck(
|
||||
*
|
||||
* This function ensures that the appropriate dquots are attached to an inode.
|
||||
* We cannot allow the dquot code to allocate an on-disk dquot block here
|
||||
* because we're already in transaction context with the inode locked. The
|
||||
* on-disk dquot should already exist anyway. If the quota code signals
|
||||
* corruption or missing quota information, schedule quotacheck, which will
|
||||
* repair corruptions in the quota metadata.
|
||||
* because we're already in transaction context. The on-disk dquot should
|
||||
* already exist anyway. If the quota code signals corruption or missing quota
|
||||
* information, schedule quotacheck, which will repair corruptions in the quota
|
||||
* metadata.
|
||||
*/
|
||||
int
|
||||
xrep_ino_dqattach(
|
||||
@ -710,7 +724,10 @@ xrep_ino_dqattach(
|
||||
{
|
||||
int error;
|
||||
|
||||
error = xfs_qm_dqattach_locked(sc->ip, false);
|
||||
ASSERT(sc->tp != NULL);
|
||||
ASSERT(sc->ip != NULL);
|
||||
|
||||
error = xfs_qm_dqattach(sc->ip);
|
||||
switch (error) {
|
||||
case -EFSBADCRC:
|
||||
case -EFSCORRUPTED:
|
||||
@ -734,3 +751,367 @@ xrep_ino_dqattach(
|
||||
|
||||
return error;
|
||||
}
|
||||
#endif /* CONFIG_XFS_QUOTA */
|
||||
|
||||
/*
|
||||
* Ensure that the inode being repaired is ready to handle a certain number of
|
||||
* extents, or return EFSCORRUPTED. Caller must hold the ILOCK of the inode
|
||||
* being repaired and have joined it to the scrub transaction.
|
||||
*/
|
||||
int
|
||||
xrep_ino_ensure_extent_count(
|
||||
struct xfs_scrub *sc,
|
||||
int whichfork,
|
||||
xfs_extnum_t nextents)
|
||||
{
|
||||
xfs_extnum_t max_extents;
|
||||
bool inode_has_nrext64;
|
||||
|
||||
inode_has_nrext64 = xfs_inode_has_large_extent_counts(sc->ip);
|
||||
max_extents = xfs_iext_max_nextents(inode_has_nrext64, whichfork);
|
||||
if (nextents <= max_extents)
|
||||
return 0;
|
||||
if (inode_has_nrext64)
|
||||
return -EFSCORRUPTED;
|
||||
if (!xfs_has_large_extent_counts(sc->mp))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
max_extents = xfs_iext_max_nextents(true, whichfork);
|
||||
if (nextents > max_extents)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
sc->ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
|
||||
xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize all the btree cursors for an AG repair except for the btree that
|
||||
* we're rebuilding.
|
||||
*/
|
||||
void
|
||||
xrep_ag_btcur_init(
|
||||
struct xfs_scrub *sc,
|
||||
struct xchk_ag *sa)
|
||||
{
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
|
||||
/* Set up a bnobt cursor for cross-referencing. */
|
||||
if (sc->sm->sm_type != XFS_SCRUB_TYPE_BNOBT &&
|
||||
sc->sm->sm_type != XFS_SCRUB_TYPE_CNTBT) {
|
||||
sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
|
||||
sc->sa.pag, XFS_BTNUM_BNO);
|
||||
sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
|
||||
sc->sa.pag, XFS_BTNUM_CNT);
|
||||
}
|
||||
|
||||
/* Set up a inobt cursor for cross-referencing. */
|
||||
if (sc->sm->sm_type != XFS_SCRUB_TYPE_INOBT &&
|
||||
sc->sm->sm_type != XFS_SCRUB_TYPE_FINOBT) {
|
||||
sa->ino_cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp,
|
||||
sa->agi_bp, XFS_BTNUM_INO);
|
||||
if (xfs_has_finobt(mp))
|
||||
sa->fino_cur = xfs_inobt_init_cursor(sc->sa.pag,
|
||||
sc->tp, sa->agi_bp, XFS_BTNUM_FINO);
|
||||
}
|
||||
|
||||
/* Set up a rmapbt cursor for cross-referencing. */
|
||||
if (sc->sm->sm_type != XFS_SCRUB_TYPE_RMAPBT &&
|
||||
xfs_has_rmapbt(mp))
|
||||
sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
|
||||
sc->sa.pag);
|
||||
|
||||
/* Set up a refcountbt cursor for cross-referencing. */
|
||||
if (sc->sm->sm_type != XFS_SCRUB_TYPE_REFCNTBT &&
|
||||
xfs_has_reflink(mp))
|
||||
sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
|
||||
sa->agf_bp, sc->sa.pag);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reinitialize the in-core AG state after a repair by rereading the AGF
|
||||
* buffer. We had better get the same AGF buffer as the one that's attached
|
||||
* to the scrub context.
|
||||
*/
|
||||
int
|
||||
xrep_reinit_pagf(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xfs_perag *pag = sc->sa.pag;
|
||||
struct xfs_buf *bp;
|
||||
int error;
|
||||
|
||||
ASSERT(pag);
|
||||
ASSERT(xfs_perag_initialised_agf(pag));
|
||||
|
||||
clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
|
||||
error = xfs_alloc_read_agf(pag, sc->tp, 0, &bp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (bp != sc->sa.agf_bp) {
|
||||
ASSERT(bp == sc->sa.agf_bp);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reinitialize the in-core AG state after a repair by rereading the AGI
|
||||
* buffer. We had better get the same AGI buffer as the one that's attached
|
||||
* to the scrub context.
|
||||
*/
|
||||
int
|
||||
xrep_reinit_pagi(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xfs_perag *pag = sc->sa.pag;
|
||||
struct xfs_buf *bp;
|
||||
int error;
|
||||
|
||||
ASSERT(pag);
|
||||
ASSERT(xfs_perag_initialised_agi(pag));
|
||||
|
||||
clear_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
|
||||
error = xfs_ialloc_read_agi(pag, sc->tp, &bp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (bp != sc->sa.agi_bp) {
|
||||
ASSERT(bp == sc->sa.agi_bp);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given an active reference to a perag structure, load AG headers and cursors.
|
||||
* This should only be called to scan an AG while repairing file-based metadata.
|
||||
*/
|
||||
int
|
||||
xrep_ag_init(
|
||||
struct xfs_scrub *sc,
|
||||
struct xfs_perag *pag,
|
||||
struct xchk_ag *sa)
|
||||
{
|
||||
int error;
|
||||
|
||||
ASSERT(!sa->pag);
|
||||
|
||||
error = xfs_ialloc_read_agi(pag, sc->tp, &sa->agi_bp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xfs_alloc_read_agf(pag, sc->tp, 0, &sa->agf_bp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Grab our own passive reference from the caller's ref. */
|
||||
sa->pag = xfs_perag_hold(pag);
|
||||
xrep_ag_btcur_init(sc, sa);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Reinitialize the per-AG block reservation for the AG we just fixed. */
|
||||
int
|
||||
xrep_reset_perag_resv(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (!(sc->flags & XREP_RESET_PERAG_RESV))
|
||||
return 0;
|
||||
|
||||
ASSERT(sc->sa.pag != NULL);
|
||||
ASSERT(sc->ops->type == ST_PERAG);
|
||||
ASSERT(sc->tp);
|
||||
|
||||
sc->flags &= ~XREP_RESET_PERAG_RESV;
|
||||
error = xfs_ag_resv_free(sc->sa.pag);
|
||||
if (error)
|
||||
goto out;
|
||||
error = xfs_ag_resv_init(sc->sa.pag, sc->tp);
|
||||
if (error == -ENOSPC) {
|
||||
xfs_err(sc->mp,
|
||||
"Insufficient free space to reset per-AG reservation for AG %u after repair.",
|
||||
sc->sa.pag->pag_agno);
|
||||
error = 0;
|
||||
}
|
||||
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Decide if we are going to call the repair function for a scrub type. */
|
||||
bool
|
||||
xrep_will_attempt(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
/* Userspace asked us to rebuild the structure regardless. */
|
||||
if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD)
|
||||
return true;
|
||||
|
||||
/* Let debug users force us into the repair routines. */
|
||||
if (XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
|
||||
return true;
|
||||
|
||||
/* Metadata is corrupt or failed cross-referencing. */
|
||||
if (xchk_needs_repair(sc->sm))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Try to fix some part of a metadata inode by calling another scrubber. */
|
||||
STATIC int
|
||||
xrep_metadata_inode_subtype(
|
||||
struct xfs_scrub *sc,
|
||||
unsigned int scrub_type)
|
||||
{
|
||||
__u32 smtype = sc->sm->sm_type;
|
||||
__u32 smflags = sc->sm->sm_flags;
|
||||
unsigned int sick_mask = sc->sick_mask;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Let's see if the inode needs repair. We're going to open-code calls
|
||||
* to the scrub and repair functions so that we can hang on to the
|
||||
* resources that we already acquired instead of using the standard
|
||||
* setup/teardown routines.
|
||||
*/
|
||||
sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
|
||||
sc->sm->sm_type = scrub_type;
|
||||
|
||||
switch (scrub_type) {
|
||||
case XFS_SCRUB_TYPE_INODE:
|
||||
error = xchk_inode(sc);
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_BMBTD:
|
||||
error = xchk_bmap_data(sc);
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_BMBTA:
|
||||
error = xchk_bmap_attr(sc);
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
error = -EFSCORRUPTED;
|
||||
}
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
if (!xrep_will_attempt(sc))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Repair some part of the inode. This will potentially join the inode
|
||||
* to the transaction.
|
||||
*/
|
||||
switch (scrub_type) {
|
||||
case XFS_SCRUB_TYPE_INODE:
|
||||
error = xrep_inode(sc);
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_BMBTD:
|
||||
error = xrep_bmap(sc, XFS_DATA_FORK, false);
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_BMBTA:
|
||||
error = xrep_bmap(sc, XFS_ATTR_FORK, false);
|
||||
break;
|
||||
}
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Finish all deferred intent items and then roll the transaction so
|
||||
* that the inode will not be joined to the transaction when we exit
|
||||
* the function.
|
||||
*/
|
||||
error = xfs_defer_finish(&sc->tp);
|
||||
if (error)
|
||||
goto out;
|
||||
error = xfs_trans_roll(&sc->tp);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Clear the corruption flags and re-check the metadata that we just
|
||||
* repaired.
|
||||
*/
|
||||
sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
|
||||
|
||||
switch (scrub_type) {
|
||||
case XFS_SCRUB_TYPE_INODE:
|
||||
error = xchk_inode(sc);
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_BMBTD:
|
||||
error = xchk_bmap_data(sc);
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_BMBTA:
|
||||
error = xchk_bmap_attr(sc);
|
||||
break;
|
||||
}
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
/* If corruption persists, the repair has failed. */
|
||||
if (xchk_needs_repair(sc->sm)) {
|
||||
error = -EFSCORRUPTED;
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
sc->sick_mask = sick_mask;
|
||||
sc->sm->sm_type = smtype;
|
||||
sc->sm->sm_flags = smflags;
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Repair the ondisk forks of a metadata inode. The caller must ensure that
|
||||
* sc->ip points to the metadata inode and the ILOCK is held on that inode.
|
||||
* The inode must not be joined to the transaction before the call, and will
|
||||
* not be afterwards.
|
||||
*/
|
||||
int
|
||||
xrep_metadata_inode_forks(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
bool dirty = false;
|
||||
int error;
|
||||
|
||||
/* Repair the inode record and the data fork. */
|
||||
error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Make sure the attr fork looks ok before we delete it. */
|
||||
error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Clear the reflink flag since metadata never shares. */
|
||||
if (xfs_is_reflink_inode(sc->ip)) {
|
||||
dirty = true;
|
||||
xfs_trans_ijoin(sc->tp, sc->ip, 0);
|
||||
error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we modified the inode, roll the transaction but don't rejoin the
|
||||
* inode to the new transaction because xrep_bmap_data can do that.
|
||||
*/
|
||||
if (dirty) {
|
||||
error = xfs_trans_roll(&sc->tp);
|
||||
if (error)
|
||||
return error;
|
||||
dirty = false;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -28,15 +28,28 @@ static inline int xrep_notsupported(struct xfs_scrub *sc)
|
||||
/* Repair helpers */
|
||||
|
||||
int xrep_attempt(struct xfs_scrub *sc, struct xchk_stats_run *run);
|
||||
bool xrep_will_attempt(struct xfs_scrub *sc);
|
||||
void xrep_failure(struct xfs_mount *mp);
|
||||
int xrep_roll_ag_trans(struct xfs_scrub *sc);
|
||||
int xrep_roll_trans(struct xfs_scrub *sc);
|
||||
int xrep_defer_finish(struct xfs_scrub *sc);
|
||||
bool xrep_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks,
|
||||
enum xfs_ag_resv_type type);
|
||||
xfs_extlen_t xrep_calc_ag_resblks(struct xfs_scrub *sc);
|
||||
|
||||
static inline int
|
||||
xrep_trans_commit(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
int error = xfs_trans_commit(sc->tp);
|
||||
|
||||
sc->tp = NULL;
|
||||
return error;
|
||||
}
|
||||
|
||||
struct xbitmap;
|
||||
struct xagb_bitmap;
|
||||
struct xfsb_bitmap;
|
||||
|
||||
int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink);
|
||||
|
||||
@ -57,8 +70,35 @@ struct xrep_find_ag_btree {
|
||||
|
||||
int xrep_find_ag_btree_roots(struct xfs_scrub *sc, struct xfs_buf *agf_bp,
|
||||
struct xrep_find_ag_btree *btree_info, struct xfs_buf *agfl_bp);
|
||||
|
||||
#ifdef CONFIG_XFS_QUOTA
|
||||
void xrep_force_quotacheck(struct xfs_scrub *sc, xfs_dqtype_t type);
|
||||
int xrep_ino_dqattach(struct xfs_scrub *sc);
|
||||
#else
|
||||
# define xrep_force_quotacheck(sc, type) ((void)0)
|
||||
# define xrep_ino_dqattach(sc) (0)
|
||||
#endif /* CONFIG_XFS_QUOTA */
|
||||
|
||||
int xrep_ino_ensure_extent_count(struct xfs_scrub *sc, int whichfork,
|
||||
xfs_extnum_t nextents);
|
||||
int xrep_reset_perag_resv(struct xfs_scrub *sc);
|
||||
int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten);
|
||||
int xrep_metadata_inode_forks(struct xfs_scrub *sc);
|
||||
|
||||
/* Repair setup functions */
|
||||
int xrep_setup_ag_allocbt(struct xfs_scrub *sc);
|
||||
|
||||
struct xfs_imap;
|
||||
int xrep_setup_inode(struct xfs_scrub *sc, const struct xfs_imap *imap);
|
||||
|
||||
void xrep_ag_btcur_init(struct xfs_scrub *sc, struct xchk_ag *sa);
|
||||
int xrep_ag_init(struct xfs_scrub *sc, struct xfs_perag *pag,
|
||||
struct xchk_ag *sa);
|
||||
|
||||
/* Metadata revalidators */
|
||||
|
||||
int xrep_revalidate_allocbt(struct xfs_scrub *sc);
|
||||
int xrep_revalidate_iallocbt(struct xfs_scrub *sc);
|
||||
|
||||
/* Metadata repairers */
|
||||
|
||||
@ -67,9 +107,34 @@ int xrep_superblock(struct xfs_scrub *sc);
|
||||
int xrep_agf(struct xfs_scrub *sc);
|
||||
int xrep_agfl(struct xfs_scrub *sc);
|
||||
int xrep_agi(struct xfs_scrub *sc);
|
||||
int xrep_allocbt(struct xfs_scrub *sc);
|
||||
int xrep_iallocbt(struct xfs_scrub *sc);
|
||||
int xrep_refcountbt(struct xfs_scrub *sc);
|
||||
int xrep_inode(struct xfs_scrub *sc);
|
||||
int xrep_bmap_data(struct xfs_scrub *sc);
|
||||
int xrep_bmap_attr(struct xfs_scrub *sc);
|
||||
int xrep_bmap_cow(struct xfs_scrub *sc);
|
||||
|
||||
#ifdef CONFIG_XFS_RT
|
||||
int xrep_rtbitmap(struct xfs_scrub *sc);
|
||||
#else
|
||||
# define xrep_rtbitmap xrep_notsupported
|
||||
#endif /* CONFIG_XFS_RT */
|
||||
|
||||
#ifdef CONFIG_XFS_QUOTA
|
||||
int xrep_quota(struct xfs_scrub *sc);
|
||||
#else
|
||||
# define xrep_quota xrep_notsupported
|
||||
#endif /* CONFIG_XFS_QUOTA */
|
||||
|
||||
int xrep_reinit_pagf(struct xfs_scrub *sc);
|
||||
int xrep_reinit_pagi(struct xfs_scrub *sc);
|
||||
|
||||
#else
|
||||
|
||||
#define xrep_ino_dqattach(sc) (0)
|
||||
#define xrep_will_attempt(sc) (false)
|
||||
|
||||
static inline int
|
||||
xrep_attempt(
|
||||
struct xfs_scrub *sc,
|
||||
@ -87,11 +152,45 @@ xrep_calc_ag_resblks(
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
xrep_reset_perag_resv(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
if (!(sc->flags & XREP_RESET_PERAG_RESV))
|
||||
return 0;
|
||||
|
||||
ASSERT(0);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* repair setup functions for no-repair */
|
||||
static inline int
|
||||
xrep_setup_nothing(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#define xrep_setup_ag_allocbt xrep_setup_nothing
|
||||
|
||||
#define xrep_setup_inode(sc, imap) ((void)0)
|
||||
|
||||
#define xrep_revalidate_allocbt (NULL)
|
||||
#define xrep_revalidate_iallocbt (NULL)
|
||||
|
||||
#define xrep_probe xrep_notsupported
|
||||
#define xrep_superblock xrep_notsupported
|
||||
#define xrep_agf xrep_notsupported
|
||||
#define xrep_agfl xrep_notsupported
|
||||
#define xrep_agi xrep_notsupported
|
||||
#define xrep_allocbt xrep_notsupported
|
||||
#define xrep_iallocbt xrep_notsupported
|
||||
#define xrep_refcountbt xrep_notsupported
|
||||
#define xrep_inode xrep_notsupported
|
||||
#define xrep_bmap_data xrep_notsupported
|
||||
#define xrep_bmap_attr xrep_notsupported
|
||||
#define xrep_bmap_cow xrep_notsupported
|
||||
#define xrep_rtbitmap xrep_notsupported
|
||||
#define xrep_quota xrep_notsupported
|
||||
|
||||
#endif /* CONFIG_XFS_ONLINE_REPAIR */
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/btree.h"
|
||||
#include "scrub/bitmap.h"
|
||||
#include "scrub/agb_bitmap.h"
|
||||
|
||||
/*
|
||||
* Set us up to scrub reverse mapping btrees.
|
||||
|
@ -14,17 +14,33 @@
|
||||
#include "xfs_rtbitmap.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/rtbitmap.h"
|
||||
|
||||
/* Set us up with the realtime metadata locked. */
|
||||
int
|
||||
xchk_setup_rtbitmap(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xchk_rtbitmap *rtb;
|
||||
int error;
|
||||
|
||||
error = xchk_trans_alloc(sc, 0);
|
||||
rtb = kzalloc(sizeof(struct xchk_rtbitmap), XCHK_GFP_FLAGS);
|
||||
if (!rtb)
|
||||
return -ENOMEM;
|
||||
sc->buf = rtb;
|
||||
|
||||
if (xchk_could_repair(sc)) {
|
||||
error = xrep_setup_rtbitmap(sc, rtb);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
error = xchk_trans_alloc(sc, rtb->resblks);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@ -32,7 +48,22 @@ xchk_setup_rtbitmap(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xchk_ino_dqattach(sc);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP);
|
||||
|
||||
/*
|
||||
* Now that we've locked the rtbitmap, we can't race with growfsrt
|
||||
* trying to expand the bitmap or change the size of the rt volume.
|
||||
* Hence it is safe to compute and check the geometry values.
|
||||
*/
|
||||
if (mp->m_sb.sb_rblocks) {
|
||||
rtb->rextents = xfs_rtb_to_rtx(mp, mp->m_sb.sb_rblocks);
|
||||
rtb->rextslog = xfs_compute_rextslog(rtb->rextents);
|
||||
rtb->rbmblocks = xfs_rtbitmap_blockcount(mp, rtb->rextents);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -63,21 +94,30 @@ STATIC int
|
||||
xchk_rtbitmap_check_extents(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_bmbt_irec map;
|
||||
xfs_rtblock_t off;
|
||||
int nmap;
|
||||
struct xfs_iext_cursor icur;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_inode *ip = sc->ip;
|
||||
xfs_fileoff_t off = 0;
|
||||
xfs_fileoff_t endoff;
|
||||
int error = 0;
|
||||
|
||||
for (off = 0; off < mp->m_sb.sb_rbmblocks;) {
|
||||
/* Mappings may not cross or lie beyond EOF. */
|
||||
endoff = XFS_B_TO_FSB(mp, ip->i_disk_size);
|
||||
if (xfs_iext_lookup_extent(ip, &ip->i_df, endoff, &icur, &map)) {
|
||||
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, endoff);
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (off < endoff) {
|
||||
int nmap = 1;
|
||||
|
||||
if (xchk_should_terminate(sc, &error) ||
|
||||
(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
|
||||
break;
|
||||
|
||||
/* Make sure we have a written extent. */
|
||||
nmap = 1;
|
||||
error = xfs_bmapi_read(mp->m_rbmip, off,
|
||||
mp->m_sb.sb_rbmblocks - off, &map, &nmap,
|
||||
error = xfs_bmapi_read(ip, off, endoff - off, &map, &nmap,
|
||||
XFS_DATA_FORK);
|
||||
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error))
|
||||
break;
|
||||
@ -98,12 +138,48 @@ int
|
||||
xchk_rtbitmap(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xchk_rtbitmap *rtb = sc->buf;
|
||||
int error;
|
||||
|
||||
/* Is the size of the rtbitmap correct? */
|
||||
if (sc->mp->m_rbmip->i_disk_size !=
|
||||
XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks)) {
|
||||
xchk_ino_set_corrupt(sc, sc->mp->m_rbmip->i_ino);
|
||||
/* Is sb_rextents correct? */
|
||||
if (mp->m_sb.sb_rextents != rtb->rextents) {
|
||||
xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Is sb_rextslog correct? */
|
||||
if (mp->m_sb.sb_rextslog != rtb->rextslog) {
|
||||
xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Is sb_rbmblocks large enough to handle the current rt volume? In no
|
||||
* case can we exceed 4bn bitmap blocks since the super field is a u32.
|
||||
*/
|
||||
if (rtb->rbmblocks > U32_MAX) {
|
||||
xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
|
||||
return 0;
|
||||
}
|
||||
if (mp->m_sb.sb_rbmblocks != rtb->rbmblocks) {
|
||||
xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The bitmap file length must be aligned to an fsblock. */
|
||||
if (mp->m_rbmip->i_disk_size & mp->m_blockmask) {
|
||||
xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Is the bitmap file itself large enough to handle the rt volume?
|
||||
* growfsrt expands the bitmap file before updating sb_rextents, so the
|
||||
* file can be larger than sb_rbmblocks.
|
||||
*/
|
||||
if (mp->m_rbmip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks)) {
|
||||
xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -116,12 +192,11 @@ xchk_rtbitmap(
|
||||
if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
|
||||
return error;
|
||||
|
||||
error = xfs_rtalloc_query_all(sc->mp, sc->tp, xchk_rtbitmap_rec, sc);
|
||||
error = xfs_rtalloc_query_all(mp, sc->tp, xchk_rtbitmap_rec, sc);
|
||||
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
|
||||
goto out;
|
||||
return error;
|
||||
|
||||
out:
|
||||
return error;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* xref check that the extent is not free in the rtbitmap */
|
||||
|
22
fs/xfs/scrub/rtbitmap.h
Normal file
22
fs/xfs/scrub/rtbitmap.h
Normal file
@ -0,0 +1,22 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#ifndef __XFS_SCRUB_RTBITMAP_H__
|
||||
#define __XFS_SCRUB_RTBITMAP_H__
|
||||
|
||||
struct xchk_rtbitmap {
|
||||
uint64_t rextents;
|
||||
uint64_t rbmblocks;
|
||||
unsigned int rextslog;
|
||||
unsigned int resblks;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_XFS_ONLINE_REPAIR
|
||||
int xrep_setup_rtbitmap(struct xfs_scrub *sc, struct xchk_rtbitmap *rtb);
|
||||
#else
|
||||
# define xrep_setup_rtbitmap(sc, rtb) (0)
|
||||
#endif /* CONFIG_XFS_ONLINE_REPAIR */
|
||||
|
||||
#endif /* __XFS_SCRUB_RTBITMAP_H__ */
|
202
fs/xfs/scrub/rtbitmap_repair.c
Normal file
202
fs/xfs/scrub/rtbitmap_repair.c
Normal file
@ -0,0 +1,202 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_bmap_btree.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/xfile.h"
|
||||
#include "scrub/rtbitmap.h"
|
||||
|
||||
/* Set up to repair the realtime bitmap file metadata. */
|
||||
int
|
||||
xrep_setup_rtbitmap(
|
||||
struct xfs_scrub *sc,
|
||||
struct xchk_rtbitmap *rtb)
|
||||
{
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
unsigned long long blocks = 0;
|
||||
|
||||
/*
|
||||
* Reserve enough blocks to write out a completely new bmbt for a
|
||||
* maximally fragmented bitmap file. We do not hold the rtbitmap
|
||||
* ILOCK yet, so this is entirely speculative.
|
||||
*/
|
||||
blocks = xfs_bmbt_calc_size(mp, mp->m_sb.sb_rbmblocks);
|
||||
if (blocks > UINT_MAX)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
rtb->resblks += blocks;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure that the given range of the data fork of the realtime file is
|
||||
* mapped to written blocks. The caller must ensure that the inode is joined
|
||||
* to the transaction.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_rtbitmap_data_mappings(
|
||||
struct xfs_scrub *sc,
|
||||
xfs_filblks_t len)
|
||||
{
|
||||
struct xfs_bmbt_irec map;
|
||||
xfs_fileoff_t off = 0;
|
||||
int error;
|
||||
|
||||
ASSERT(sc->ip != NULL);
|
||||
|
||||
while (off < len) {
|
||||
int nmaps = 1;
|
||||
|
||||
/*
|
||||
* If we have a real extent mapping this block then we're
|
||||
* in ok shape.
|
||||
*/
|
||||
error = xfs_bmapi_read(sc->ip, off, len - off, &map, &nmaps,
|
||||
XFS_DATA_FORK);
|
||||
if (error)
|
||||
return error;
|
||||
if (nmaps == 0) {
|
||||
ASSERT(nmaps != 0);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Written extents are ok. Holes are not filled because we
|
||||
* do not know the freespace information.
|
||||
*/
|
||||
if (xfs_bmap_is_written_extent(&map) ||
|
||||
map.br_startblock == HOLESTARTBLOCK) {
|
||||
off = map.br_startoff + map.br_blockcount;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we find a delalloc reservation then something is very
|
||||
* very wrong. Bail out.
|
||||
*/
|
||||
if (map.br_startblock == DELAYSTARTBLOCK)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* Make sure we're really converting an unwritten extent. */
|
||||
if (map.br_state != XFS_EXT_UNWRITTEN) {
|
||||
ASSERT(map.br_state == XFS_EXT_UNWRITTEN);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
/* Make sure this block has a real zeroed extent mapped. */
|
||||
nmaps = 1;
|
||||
error = xfs_bmapi_write(sc->tp, sc->ip, map.br_startoff,
|
||||
map.br_blockcount,
|
||||
XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO,
|
||||
0, &map, &nmaps);
|
||||
if (error)
|
||||
return error;
|
||||
if (nmaps != 1)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/* Commit new extent and all deferred work. */
|
||||
error = xrep_defer_finish(sc);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
off = map.br_startoff + map.br_blockcount;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Fix broken rt volume geometry. */
|
||||
STATIC int
|
||||
xrep_rtbitmap_geometry(
|
||||
struct xfs_scrub *sc,
|
||||
struct xchk_rtbitmap *rtb)
|
||||
{
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_trans *tp = sc->tp;
|
||||
|
||||
/* Superblock fields */
|
||||
if (mp->m_sb.sb_rextents != rtb->rextents)
|
||||
xfs_trans_mod_sb(sc->tp, XFS_TRANS_SB_REXTENTS,
|
||||
rtb->rextents - mp->m_sb.sb_rextents);
|
||||
|
||||
if (mp->m_sb.sb_rbmblocks != rtb->rbmblocks)
|
||||
xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS,
|
||||
rtb->rbmblocks - mp->m_sb.sb_rbmblocks);
|
||||
|
||||
if (mp->m_sb.sb_rextslog != rtb->rextslog)
|
||||
xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG,
|
||||
rtb->rextslog - mp->m_sb.sb_rextslog);
|
||||
|
||||
/* Fix broken isize */
|
||||
sc->ip->i_disk_size = roundup_64(sc->ip->i_disk_size,
|
||||
mp->m_sb.sb_blocksize);
|
||||
|
||||
if (sc->ip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks))
|
||||
sc->ip->i_disk_size = XFS_FSB_TO_B(mp, rtb->rbmblocks);
|
||||
|
||||
xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
|
||||
return xrep_roll_trans(sc);
|
||||
}
|
||||
|
||||
/* Repair the realtime bitmap file metadata. */
|
||||
int
|
||||
xrep_rtbitmap(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xchk_rtbitmap *rtb = sc->buf;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
unsigned long long blocks = 0;
|
||||
int error;
|
||||
|
||||
/* Impossibly large rtbitmap means we can't touch the filesystem. */
|
||||
if (rtb->rbmblocks > U32_MAX)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If the size of the rt bitmap file is larger than what we reserved,
|
||||
* figure out if we need to adjust the block reservation in the
|
||||
* transaction.
|
||||
*/
|
||||
blocks = xfs_bmbt_calc_size(mp, rtb->rbmblocks);
|
||||
if (blocks > UINT_MAX)
|
||||
return -EOPNOTSUPP;
|
||||
if (blocks > rtb->resblks) {
|
||||
error = xfs_trans_reserve_more(sc->tp, blocks, 0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
rtb->resblks += blocks;
|
||||
}
|
||||
|
||||
/* Fix inode core and forks. */
|
||||
error = xrep_metadata_inode_forks(sc);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
xfs_trans_ijoin(sc->tp, sc->ip, 0);
|
||||
|
||||
/* Ensure no unwritten extents. */
|
||||
error = xrep_rtbitmap_data_mappings(sc, rtb->rbmblocks);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Fix inconsistent bitmap geometry */
|
||||
return xrep_rtbitmap_geometry(sc, rtb);
|
||||
}
|
@ -31,6 +31,18 @@
|
||||
* (potentially large) amount of data in pageable memory.
|
||||
*/
|
||||
|
||||
struct xchk_rtsummary {
|
||||
struct xfs_rtalloc_args args;
|
||||
|
||||
uint64_t rextents;
|
||||
uint64_t rbmblocks;
|
||||
uint64_t rsumsize;
|
||||
unsigned int rsumlevels;
|
||||
|
||||
/* Memory buffer for the summary comparison. */
|
||||
union xfs_suminfo_raw words[];
|
||||
};
|
||||
|
||||
/* Set us up to check the rtsummary file. */
|
||||
int
|
||||
xchk_setup_rtsummary(
|
||||
@ -38,8 +50,15 @@ xchk_setup_rtsummary(
|
||||
{
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
char *descr;
|
||||
struct xchk_rtsummary *rts;
|
||||
int error;
|
||||
|
||||
rts = kvzalloc(struct_size(rts, words, mp->m_blockwsize),
|
||||
XCHK_GFP_FLAGS);
|
||||
if (!rts)
|
||||
return -ENOMEM;
|
||||
sc->buf = rts;
|
||||
|
||||
/*
|
||||
* Create an xfile to construct a new rtsummary file. The xfile allows
|
||||
* us to avoid pinning kernel memory for this purpose.
|
||||
@ -54,15 +73,14 @@ xchk_setup_rtsummary(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Allocate a memory buffer for the summary comparison. */
|
||||
sc->buf = kvmalloc(mp->m_sb.sb_blocksize, XCHK_GFP_FLAGS);
|
||||
if (!sc->buf)
|
||||
return -ENOMEM;
|
||||
|
||||
error = xchk_install_live_inode(sc, mp->m_rsumip);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xchk_ino_dqattach(sc);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Locking order requires us to take the rtbitmap first. We must be
|
||||
* careful to unlock it ourselves when we are done with the rtbitmap
|
||||
@ -71,13 +89,29 @@ xchk_setup_rtsummary(
|
||||
*/
|
||||
xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM);
|
||||
|
||||
/*
|
||||
* Now that we've locked the rtbitmap and rtsummary, we can't race with
|
||||
* growfsrt trying to expand the summary or change the size of the rt
|
||||
* volume. Hence it is safe to compute and check the geometry values.
|
||||
*/
|
||||
if (mp->m_sb.sb_rblocks) {
|
||||
xfs_filblks_t rsumblocks;
|
||||
int rextslog;
|
||||
|
||||
rts->rextents = xfs_rtb_to_rtx(mp, mp->m_sb.sb_rblocks);
|
||||
rextslog = xfs_compute_rextslog(rts->rextents);
|
||||
rts->rsumlevels = rextslog + 1;
|
||||
rts->rbmblocks = xfs_rtbitmap_blockcount(mp, rts->rextents);
|
||||
rsumblocks = xfs_rtsummary_blockcount(mp, rts->rsumlevels,
|
||||
rts->rbmblocks);
|
||||
rts->rsumsize = XFS_FSB_TO_B(mp, rsumblocks);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Helper functions to record suminfo words in an xfile. */
|
||||
|
||||
typedef unsigned int xchk_rtsumoff_t;
|
||||
|
||||
static inline int
|
||||
xfsum_load(
|
||||
struct xfs_scrub *sc,
|
||||
@ -143,7 +177,7 @@ xchk_rtsum_record_free(
|
||||
|
||||
/* Compute the relevant location in the rtsum file. */
|
||||
rbmoff = xfs_rtx_to_rbmblock(mp, rec->ar_startext);
|
||||
lenlog = XFS_RTBLOCKLOG(rec->ar_extcount);
|
||||
lenlog = xfs_highbit64(rec->ar_extcount);
|
||||
offs = xfs_rtsumoffs(mp, lenlog, rbmoff);
|
||||
|
||||
rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext);
|
||||
@ -188,19 +222,29 @@ STATIC int
|
||||
xchk_rtsum_compare(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xfs_rtalloc_args args = {
|
||||
.mp = sc->mp,
|
||||
.tp = sc->tp,
|
||||
};
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_bmbt_irec map;
|
||||
xfs_fileoff_t off;
|
||||
xchk_rtsumoff_t sumoff = 0;
|
||||
int nmap;
|
||||
struct xfs_iext_cursor icur;
|
||||
|
||||
for (off = 0; off < XFS_B_TO_FSB(mp, mp->m_rsumsize); off++) {
|
||||
union xfs_suminfo_raw *ondisk_info;
|
||||
int error = 0;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_inode *ip = sc->ip;
|
||||
struct xchk_rtsummary *rts = sc->buf;
|
||||
xfs_fileoff_t off = 0;
|
||||
xfs_fileoff_t endoff;
|
||||
xfs_rtsumoff_t sumoff = 0;
|
||||
int error = 0;
|
||||
|
||||
rts->args.mp = sc->mp;
|
||||
rts->args.tp = sc->tp;
|
||||
|
||||
/* Mappings may not cross or lie beyond EOF. */
|
||||
endoff = XFS_B_TO_FSB(mp, ip->i_disk_size);
|
||||
if (xfs_iext_lookup_extent(ip, &ip->i_df, endoff, &icur, &map)) {
|
||||
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, endoff);
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (off < endoff) {
|
||||
int nmap = 1;
|
||||
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
return error;
|
||||
@ -208,8 +252,7 @@ xchk_rtsum_compare(
|
||||
return 0;
|
||||
|
||||
/* Make sure we have a written extent. */
|
||||
nmap = 1;
|
||||
error = xfs_bmapi_read(mp->m_rsumip, off, 1, &map, &nmap,
|
||||
error = xfs_bmapi_read(ip, off, endoff - off, &map, &nmap,
|
||||
XFS_DATA_FORK);
|
||||
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error))
|
||||
return error;
|
||||
@ -219,24 +262,33 @@ xchk_rtsum_compare(
|
||||
return 0;
|
||||
}
|
||||
|
||||
off += map.br_blockcount;
|
||||
}
|
||||
|
||||
for (off = 0; off < endoff; off++) {
|
||||
union xfs_suminfo_raw *ondisk_info;
|
||||
|
||||
/* Read a block's worth of ondisk rtsummary file. */
|
||||
error = xfs_rtsummary_read_buf(&args, off);
|
||||
error = xfs_rtsummary_read_buf(&rts->args, off);
|
||||
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error))
|
||||
return error;
|
||||
|
||||
/* Read a block's worth of computed rtsummary file. */
|
||||
error = xfsum_copyout(sc, sumoff, sc->buf, mp->m_blockwsize);
|
||||
error = xfsum_copyout(sc, sumoff, rts->words, mp->m_blockwsize);
|
||||
if (error) {
|
||||
xfs_rtbuf_cache_relse(&args);
|
||||
xfs_rtbuf_cache_relse(&rts->args);
|
||||
return error;
|
||||
}
|
||||
|
||||
ondisk_info = xfs_rsumblock_infoptr(&args, 0);
|
||||
if (memcmp(ondisk_info, sc->buf,
|
||||
mp->m_blockwsize << XFS_WORDLOG) != 0)
|
||||
ondisk_info = xfs_rsumblock_infoptr(&rts->args, 0);
|
||||
if (memcmp(ondisk_info, rts->words,
|
||||
mp->m_blockwsize << XFS_WORDLOG) != 0) {
|
||||
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, off);
|
||||
xfs_rtbuf_cache_relse(&rts->args);
|
||||
return error;
|
||||
}
|
||||
|
||||
xfs_rtbuf_cache_relse(&args);
|
||||
xfs_rtbuf_cache_relse(&rts->args);
|
||||
sumoff += mp->m_blockwsize;
|
||||
}
|
||||
|
||||
@ -249,8 +301,43 @@ xchk_rtsummary(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xchk_rtsummary *rts = sc->buf;
|
||||
int error = 0;
|
||||
|
||||
/* Is sb_rextents correct? */
|
||||
if (mp->m_sb.sb_rextents != rts->rextents) {
|
||||
xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
|
||||
goto out_rbm;
|
||||
}
|
||||
|
||||
/* Is m_rsumlevels correct? */
|
||||
if (mp->m_rsumlevels != rts->rsumlevels) {
|
||||
xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino);
|
||||
goto out_rbm;
|
||||
}
|
||||
|
||||
/* Is m_rsumsize correct? */
|
||||
if (mp->m_rsumsize != rts->rsumsize) {
|
||||
xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino);
|
||||
goto out_rbm;
|
||||
}
|
||||
|
||||
/* The summary file length must be aligned to an fsblock. */
|
||||
if (mp->m_rsumip->i_disk_size & mp->m_blockmask) {
|
||||
xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino);
|
||||
goto out_rbm;
|
||||
}
|
||||
|
||||
/*
|
||||
* Is the summary file itself large enough to handle the rt volume?
|
||||
* growfsrt expands the summary file before updating sb_rextents, so
|
||||
* the file can be larger than rsumsize.
|
||||
*/
|
||||
if (mp->m_rsumip->i_disk_size < rts->rsumsize) {
|
||||
xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino);
|
||||
goto out_rbm;
|
||||
}
|
||||
|
||||
/* Invoke the fork scrubber. */
|
||||
error = xchk_metadata_inode_forks(sc);
|
||||
if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
|
||||
|
@ -14,8 +14,6 @@
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_qm.h"
|
||||
#include "xfs_errortag.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_scrub.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
@ -238,27 +236,31 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
|
||||
[XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */
|
||||
.type = ST_PERAG,
|
||||
.setup = xchk_setup_ag_allocbt,
|
||||
.scrub = xchk_bnobt,
|
||||
.repair = xrep_notsupported,
|
||||
.scrub = xchk_allocbt,
|
||||
.repair = xrep_allocbt,
|
||||
.repair_eval = xrep_revalidate_allocbt,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */
|
||||
.type = ST_PERAG,
|
||||
.setup = xchk_setup_ag_allocbt,
|
||||
.scrub = xchk_cntbt,
|
||||
.repair = xrep_notsupported,
|
||||
.scrub = xchk_allocbt,
|
||||
.repair = xrep_allocbt,
|
||||
.repair_eval = xrep_revalidate_allocbt,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_INOBT] = { /* inobt */
|
||||
.type = ST_PERAG,
|
||||
.setup = xchk_setup_ag_iallocbt,
|
||||
.scrub = xchk_inobt,
|
||||
.repair = xrep_notsupported,
|
||||
.scrub = xchk_iallocbt,
|
||||
.repair = xrep_iallocbt,
|
||||
.repair_eval = xrep_revalidate_iallocbt,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_FINOBT] = { /* finobt */
|
||||
.type = ST_PERAG,
|
||||
.setup = xchk_setup_ag_iallocbt,
|
||||
.scrub = xchk_finobt,
|
||||
.scrub = xchk_iallocbt,
|
||||
.has = xfs_has_finobt,
|
||||
.repair = xrep_notsupported,
|
||||
.repair = xrep_iallocbt,
|
||||
.repair_eval = xrep_revalidate_iallocbt,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */
|
||||
.type = ST_PERAG,
|
||||
@ -272,31 +274,31 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
|
||||
.setup = xchk_setup_ag_refcountbt,
|
||||
.scrub = xchk_refcountbt,
|
||||
.has = xfs_has_reflink,
|
||||
.repair = xrep_notsupported,
|
||||
.repair = xrep_refcountbt,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_INODE] = { /* inode record */
|
||||
.type = ST_INODE,
|
||||
.setup = xchk_setup_inode,
|
||||
.scrub = xchk_inode,
|
||||
.repair = xrep_notsupported,
|
||||
.repair = xrep_inode,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */
|
||||
.type = ST_INODE,
|
||||
.setup = xchk_setup_inode_bmap,
|
||||
.scrub = xchk_bmap_data,
|
||||
.repair = xrep_notsupported,
|
||||
.repair = xrep_bmap_data,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */
|
||||
.type = ST_INODE,
|
||||
.setup = xchk_setup_inode_bmap,
|
||||
.scrub = xchk_bmap_attr,
|
||||
.repair = xrep_notsupported,
|
||||
.repair = xrep_bmap_attr,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */
|
||||
.type = ST_INODE,
|
||||
.setup = xchk_setup_inode_bmap,
|
||||
.scrub = xchk_bmap_cow,
|
||||
.repair = xrep_notsupported,
|
||||
.repair = xrep_bmap_cow,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_DIR] = { /* directory */
|
||||
.type = ST_INODE,
|
||||
@ -326,33 +328,31 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
|
||||
.type = ST_FS,
|
||||
.setup = xchk_setup_rtbitmap,
|
||||
.scrub = xchk_rtbitmap,
|
||||
.has = xfs_has_realtime,
|
||||
.repair = xrep_notsupported,
|
||||
.repair = xrep_rtbitmap,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */
|
||||
.type = ST_FS,
|
||||
.setup = xchk_setup_rtsummary,
|
||||
.scrub = xchk_rtsummary,
|
||||
.has = xfs_has_realtime,
|
||||
.repair = xrep_notsupported,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */
|
||||
.type = ST_FS,
|
||||
.setup = xchk_setup_quota,
|
||||
.scrub = xchk_quota,
|
||||
.repair = xrep_notsupported,
|
||||
.repair = xrep_quota,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */
|
||||
.type = ST_FS,
|
||||
.setup = xchk_setup_quota,
|
||||
.scrub = xchk_quota,
|
||||
.repair = xrep_notsupported,
|
||||
.repair = xrep_quota,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */
|
||||
.type = ST_FS,
|
||||
.setup = xchk_setup_quota,
|
||||
.scrub = xchk_quota,
|
||||
.repair = xrep_notsupported,
|
||||
.repair = xrep_quota,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_FSCOUNTERS] = { /* fs summary counters */
|
||||
.type = ST_FS,
|
||||
@ -531,7 +531,10 @@ xfs_scrub_metadata(
|
||||
|
||||
/* Scrub for errors. */
|
||||
check_start = xchk_stats_now();
|
||||
error = sc->ops->scrub(sc);
|
||||
if ((sc->flags & XREP_ALREADY_FIXED) && sc->ops->repair_eval != NULL)
|
||||
error = sc->ops->repair_eval(sc);
|
||||
else
|
||||
error = sc->ops->scrub(sc);
|
||||
run.scrub_ns += xchk_stats_elapsed_ns(check_start);
|
||||
if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
|
||||
goto try_harder;
|
||||
@ -542,23 +545,12 @@ xfs_scrub_metadata(
|
||||
|
||||
xchk_update_health(sc);
|
||||
|
||||
if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
|
||||
!(sc->flags & XREP_ALREADY_FIXED)) {
|
||||
bool needs_fix = xchk_needs_repair(sc->sm);
|
||||
|
||||
/* Userspace asked us to rebuild the structure regardless. */
|
||||
if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD)
|
||||
needs_fix = true;
|
||||
|
||||
/* Let debug users force us into the repair routines. */
|
||||
if (XFS_TEST_ERROR(needs_fix, mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
|
||||
needs_fix = true;
|
||||
|
||||
if (xchk_could_repair(sc)) {
|
||||
/*
|
||||
* If userspace asked for a repair but it wasn't necessary,
|
||||
* report that back to userspace.
|
||||
*/
|
||||
if (!needs_fix) {
|
||||
if (!xrep_will_attempt(sc)) {
|
||||
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED;
|
||||
goto out_nofix;
|
||||
}
|
||||
|
@ -35,6 +35,14 @@ struct xchk_meta_ops {
|
||||
/* Repair or optimize the metadata. */
|
||||
int (*repair)(struct xfs_scrub *);
|
||||
|
||||
/*
|
||||
* Re-scrub the metadata we repaired, in case there's extra work that
|
||||
* we need to do to check our repair work. If this is NULL, we'll use
|
||||
* the ->scrub function pointer, assuming that the regular scrub is
|
||||
* sufficient.
|
||||
*/
|
||||
int (*repair_eval)(struct xfs_scrub *sc);
|
||||
|
||||
/* Decide if we even have this piece of metadata. */
|
||||
bool (*has)(struct xfs_mount *);
|
||||
|
||||
@ -113,6 +121,7 @@ struct xfs_scrub {
|
||||
#define XCHK_HAVE_FREEZE_PROT (1U << 1) /* do we have freeze protection? */
|
||||
#define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */
|
||||
#define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */
|
||||
#define XREP_RESET_PERAG_RESV (1U << 30) /* must reset AG space reservation */
|
||||
#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */
|
||||
|
||||
/*
|
||||
@ -129,10 +138,8 @@ int xchk_superblock(struct xfs_scrub *sc);
|
||||
int xchk_agf(struct xfs_scrub *sc);
|
||||
int xchk_agfl(struct xfs_scrub *sc);
|
||||
int xchk_agi(struct xfs_scrub *sc);
|
||||
int xchk_bnobt(struct xfs_scrub *sc);
|
||||
int xchk_cntbt(struct xfs_scrub *sc);
|
||||
int xchk_inobt(struct xfs_scrub *sc);
|
||||
int xchk_finobt(struct xfs_scrub *sc);
|
||||
int xchk_allocbt(struct xfs_scrub *sc);
|
||||
int xchk_iallocbt(struct xfs_scrub *sc);
|
||||
int xchk_rmapbt(struct xfs_scrub *sc);
|
||||
int xchk_refcountbt(struct xfs_scrub *sc);
|
||||
int xchk_inode(struct xfs_scrub *sc);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user