xfs: scrub should use ECHRNG to signal that the drain is needed

In the previous patch, we added jump labels to the intent drain code so
that regular filesystem operations need not pay the price of checking
for someone (scrub) waiting on intents to drain from some part of the
filesystem when that someone isn't running.

However, I observed that xfs/285 now spends a lot more time pushing the
AIL from the inode btree scrubber than it used to.  This is because the
inobt scrubber will try push the AIL to try to get logged inode cores
written to the filesystem when it sees a weird discrepancy between the
ondisk inode and the inobt records.  This AIL push is triggered when the
setup function sees TRY_HARDER is set; and the requisite EDEADLOCK
return is initiated when the discrepancy is seen.

The solution to this performance slow down is to use a different result
code (ECHRNG) for scrub code to signal that it needs to wait for
deferred intent work items to drain out of some part of the filesystem.
When this happens, set a new scrub state flag (XCHK_NEED_DRAIN) so that
setup functions will activate the jump label.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
This commit is contained in:
Darrick J. Wong 2023-04-11 19:00:00 -07:00
parent 466c525d6d
commit 88accf1722
8 changed files with 21 additions and 2 deletions

View File

@ -36,6 +36,7 @@ __xchk_btree_process_error(
switch (*error) {
case -EDEADLOCK:
case -ECHRNG:
/* Used to restart an op with deadlock avoidance. */
trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
break;

View File

@ -75,6 +75,7 @@ __xchk_process_error(
case 0:
return true;
case -EDEADLOCK:
case -ECHRNG:
/* Used to restart an op with deadlock avoidance. */
trace_xchk_deadlock_retry(
sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
@ -130,6 +131,7 @@ __xchk_fblock_process_error(
case 0:
return true;
case -EDEADLOCK:
case -ECHRNG:
/* Used to restart an op with deadlock avoidance. */
trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
break;
@ -488,7 +490,7 @@ xchk_perag_drain_and_lock(
}
if (!(sc->flags & XCHK_FSGATES_DRAIN))
return -EDEADLOCK;
return -ECHRNG;
error = xfs_perag_intent_drain(sa->pag);
if (error == -ERESTARTSYS)
error = -EINTR;

View File

@ -161,7 +161,7 @@ void xchk_start_reaping(struct xfs_scrub *sc);
*/
static inline bool xchk_need_intent_drain(struct xfs_scrub *sc)
{
return sc->flags & XCHK_TRY_HARDER;
return sc->flags & XCHK_NEED_DRAIN;
}
void xchk_fsgates_enable(struct xfs_scrub *sc, unsigned int scrub_fshooks);

View File

@ -39,6 +39,7 @@ xchk_da_process_error(
switch (*error) {
case -EDEADLOCK:
case -ECHRNG:
/* Used to restart an op with deadlock avoidance. */
trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
break;

View File

@ -60,6 +60,9 @@ xrep_attempt(
sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
sc->flags |= XREP_ALREADY_FIXED;
return -EAGAIN;
case -ECHRNG:
sc->flags |= XCHK_NEED_DRAIN;
return -EAGAIN;
case -EDEADLOCK:
/* Tell the caller to try again having grabbed all the locks. */
if (!(sc->flags & XCHK_TRY_HARDER)) {

View File

@ -510,6 +510,8 @@ xfs_scrub_metadata(
error = sc->ops->setup(sc);
if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
goto try_harder;
if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
goto need_drain;
if (error)
goto out_teardown;
@ -517,6 +519,8 @@ xfs_scrub_metadata(
error = sc->ops->scrub(sc);
if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
goto try_harder;
if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
goto need_drain;
if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE))
goto out_teardown;
@ -575,6 +579,12 @@ xfs_scrub_metadata(
error = 0;
}
return error;
need_drain:
error = xchk_teardown(sc, 0);
if (error)
goto out_sc;
sc->flags |= XCHK_NEED_DRAIN;
goto retry_op;
try_harder:
/*
* Scrubbers return -EDEADLOCK to mean 'try harder'. Tear down

View File

@ -98,6 +98,7 @@ struct xfs_scrub {
#define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */
#define XCHK_REAPING_DISABLED (1 << 1) /* background block reaping paused */
#define XCHK_FSGATES_DRAIN (1 << 2) /* defer ops draining enabled */
#define XCHK_NEED_DRAIN (1 << 3) /* scrub needs to drain defer ops */
#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */
/*

View File

@ -100,6 +100,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
{ XCHK_TRY_HARDER, "try_harder" }, \
{ XCHK_REAPING_DISABLED, "reaping_disabled" }, \
{ XCHK_FSGATES_DRAIN, "fsgates_drain" }, \
{ XCHK_NEED_DRAIN, "need_drain" }, \
{ XREP_ALREADY_FIXED, "already_fixed" }
DECLARE_EVENT_CLASS(xchk_class,