mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-08 14:13:53 +00:00
xfs: rework xfs_alloc_vextent()
It's a multiplexing mess that can be greatly simplified, and really needs to be simplified to allow active per-ag references to propagate from initial AG selection code the the bmapi code. This splits the code out into separate a parameter checking function, an iterator function, and allocation completion functions and then implements the individual policies using these functions. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
This commit is contained in:
parent
76257a1587
commit
ecd788a924
@ -3151,29 +3151,20 @@ xfs_alloc_read_agf(
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate an extent (variable-size).
|
||||
* Depending on the allocation type, we either look in a single allocation
|
||||
* group or loop over the allocation groups to find the result.
|
||||
* Pre-proces allocation arguments to set initial state that we don't require
|
||||
* callers to set up correctly, as well as bounds check the allocation args
|
||||
* that are set up.
|
||||
*/
|
||||
int /* error */
|
||||
xfs_alloc_vextent(
|
||||
struct xfs_alloc_arg *args) /* allocation argument structure */
|
||||
static int
|
||||
xfs_alloc_vextent_check_args(
|
||||
struct xfs_alloc_arg *args)
|
||||
{
|
||||
xfs_agblock_t agsize; /* allocation group size */
|
||||
int error;
|
||||
int flags; /* XFS_ALLOC_FLAG_... locking flags */
|
||||
struct xfs_mount *mp; /* mount structure pointer */
|
||||
xfs_agnumber_t sagno; /* starting allocation group number */
|
||||
xfs_alloctype_t type; /* input allocation type */
|
||||
int bump_rotor = 0;
|
||||
xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
|
||||
xfs_agnumber_t minimum_agno = 0;
|
||||
struct xfs_mount *mp = args->mp;
|
||||
xfs_agblock_t agsize;
|
||||
|
||||
mp = args->mp;
|
||||
type = args->otype = args->type;
|
||||
args->otype = args->type;
|
||||
args->agbno = NULLAGBLOCK;
|
||||
if (args->tp->t_highest_agno != NULLAGNUMBER)
|
||||
minimum_agno = args->tp->t_highest_agno;
|
||||
|
||||
/*
|
||||
* Just fix this up, for the case where the last a.g. is shorter
|
||||
* (or there's only one a.g.) and the caller couldn't easily figure
|
||||
@ -3195,199 +3186,314 @@ xfs_alloc_vextent(
|
||||
args->mod >= args->prod) {
|
||||
args->fsbno = NULLFSBLOCK;
|
||||
trace_xfs_alloc_vextent_badargs(args);
|
||||
return 0;
|
||||
return -ENOSPC;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case XFS_ALLOCTYPE_THIS_AG:
|
||||
case XFS_ALLOCTYPE_NEAR_BNO:
|
||||
case XFS_ALLOCTYPE_THIS_BNO:
|
||||
/*
|
||||
* These three force us into a single a.g.
|
||||
*/
|
||||
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
|
||||
args->pag = xfs_perag_get(mp, args->agno);
|
||||
|
||||
if (minimum_agno > args->agno) {
|
||||
trace_xfs_alloc_vextent_skip_deadlock(args);
|
||||
error = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
error = xfs_alloc_fix_freelist(args, 0);
|
||||
if (error) {
|
||||
trace_xfs_alloc_vextent_nofix(args);
|
||||
goto error0;
|
||||
}
|
||||
if (!args->agbp) {
|
||||
trace_xfs_alloc_vextent_noagbp(args);
|
||||
break;
|
||||
}
|
||||
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
|
||||
if ((error = xfs_alloc_ag_vextent(args)))
|
||||
goto error0;
|
||||
break;
|
||||
case XFS_ALLOCTYPE_START_BNO:
|
||||
/*
|
||||
* Try near allocation first, then anywhere-in-ag after
|
||||
* the first a.g. fails.
|
||||
*/
|
||||
if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
|
||||
xfs_is_inode32(mp)) {
|
||||
args->fsbno = XFS_AGB_TO_FSB(mp,
|
||||
((mp->m_agfrotor / rotorstep) %
|
||||
mp->m_sb.sb_agcount), 0);
|
||||
bump_rotor = 1;
|
||||
}
|
||||
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
|
||||
args->type = XFS_ALLOCTYPE_NEAR_BNO;
|
||||
fallthrough;
|
||||
case XFS_ALLOCTYPE_FIRST_AG:
|
||||
/*
|
||||
* Rotate through the allocation groups looking for a winner.
|
||||
* If we are blocking, we must obey minimum_agno contraints for
|
||||
* avoiding ABBA deadlocks on AGF locking.
|
||||
*/
|
||||
if (type == XFS_ALLOCTYPE_FIRST_AG) {
|
||||
/*
|
||||
* Start with allocation group given by bno.
|
||||
*/
|
||||
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
|
||||
args->type = XFS_ALLOCTYPE_THIS_AG;
|
||||
sagno = minimum_agno;
|
||||
flags = 0;
|
||||
} else {
|
||||
/*
|
||||
* Start with the given allocation group.
|
||||
*/
|
||||
args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
|
||||
flags = XFS_ALLOC_FLAG_TRYLOCK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop over allocation groups twice; first time with
|
||||
* trylock set, second time without.
|
||||
*/
|
||||
for (;;) {
|
||||
args->pag = xfs_perag_get(mp, args->agno);
|
||||
error = xfs_alloc_fix_freelist(args, flags);
|
||||
if (error) {
|
||||
trace_xfs_alloc_vextent_nofix(args);
|
||||
goto error0;
|
||||
}
|
||||
/*
|
||||
* If we get a buffer back then the allocation will fly.
|
||||
*/
|
||||
if (args->agbp) {
|
||||
if ((error = xfs_alloc_ag_vextent(args)))
|
||||
goto error0;
|
||||
break;
|
||||
}
|
||||
|
||||
trace_xfs_alloc_vextent_loopfailed(args);
|
||||
|
||||
/*
|
||||
* Didn't work, figure out the next iteration.
|
||||
*/
|
||||
if (args->agno == sagno &&
|
||||
type == XFS_ALLOCTYPE_START_BNO)
|
||||
args->type = XFS_ALLOCTYPE_THIS_AG;
|
||||
|
||||
/*
|
||||
* If we are try-locking, we can't deadlock on AGF
|
||||
* locks, so we can wrap all the way back to the first
|
||||
* AG. Otherwise, wrap back to the start AG so we can't
|
||||
* deadlock, and let the end of scan handler decide what
|
||||
* to do next.
|
||||
*/
|
||||
if (++(args->agno) == mp->m_sb.sb_agcount) {
|
||||
if (flags & XFS_ALLOC_FLAG_TRYLOCK)
|
||||
args->agno = 0;
|
||||
else
|
||||
args->agno = sagno;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reached the starting a.g., must either be done
|
||||
* or switch to non-trylock mode.
|
||||
*/
|
||||
if (args->agno == sagno) {
|
||||
if (flags == 0) {
|
||||
args->agbno = NULLAGBLOCK;
|
||||
trace_xfs_alloc_vextent_allfailed(args);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Blocking pass next, so we must obey minimum
|
||||
* agno constraints to avoid ABBA AGF deadlocks.
|
||||
*/
|
||||
flags = 0;
|
||||
if (minimum_agno > sagno)
|
||||
sagno = minimum_agno;
|
||||
|
||||
if (type == XFS_ALLOCTYPE_START_BNO) {
|
||||
args->agbno = XFS_FSB_TO_AGBNO(mp,
|
||||
args->fsbno);
|
||||
args->type = XFS_ALLOCTYPE_NEAR_BNO;
|
||||
}
|
||||
}
|
||||
xfs_perag_put(args->pag);
|
||||
}
|
||||
if (bump_rotor) {
|
||||
if (args->agno == sagno)
|
||||
mp->m_agfrotor = (mp->m_agfrotor + 1) %
|
||||
(mp->m_sb.sb_agcount * rotorstep);
|
||||
else
|
||||
mp->m_agfrotor = (args->agno * rotorstep + 1) %
|
||||
(mp->m_sb.sb_agcount * rotorstep);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
/* NOTREACHED */
|
||||
}
|
||||
if (args->agbno == NULLAGBLOCK) {
|
||||
args->fsbno = NULLFSBLOCK;
|
||||
} else {
|
||||
args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
|
||||
#ifdef DEBUG
|
||||
ASSERT(args->len >= args->minlen);
|
||||
ASSERT(args->len <= args->maxlen);
|
||||
ASSERT(args->agbno % args->alignment == 0);
|
||||
XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
|
||||
args->len);
|
||||
#endif
|
||||
|
||||
}
|
||||
/*
|
||||
* Post-process allocation results to set the allocated block number correctly
|
||||
* for the caller.
|
||||
*
|
||||
* XXX: xfs_alloc_vextent() should really be returning ENOSPC for ENOSPC, not
|
||||
* hiding it behind a "successful" NULLFSBLOCK allocation.
|
||||
*/
|
||||
static void
|
||||
xfs_alloc_vextent_set_fsbno(
|
||||
struct xfs_alloc_arg *args,
|
||||
xfs_agnumber_t minimum_agno)
|
||||
{
|
||||
struct xfs_mount *mp = args->mp;
|
||||
|
||||
/*
|
||||
* We end up here with a locked AGF. If we failed, the caller is likely
|
||||
* going to try to allocate again with different parameters, and that
|
||||
* can widen the AGs that are searched for free space. If we have to do
|
||||
* BMBT block allocation, we have to do a new allocation.
|
||||
* We can end up here with a locked AGF. If we failed, the caller is
|
||||
* likely going to try to allocate again with different parameters, and
|
||||
* that can widen the AGs that are searched for free space. If we have
|
||||
* to do BMBT block allocation, we have to do a new allocation.
|
||||
*
|
||||
* Hence leaving this function with the AGF locked opens up potential
|
||||
* ABBA AGF deadlocks because a future allocation attempt in this
|
||||
* transaction may attempt to lock a lower number AGF.
|
||||
*
|
||||
* We can't release the AGF until the transaction is commited, so at
|
||||
* this point we must update the "firstblock" tracker to point at this
|
||||
* AG if the tracker is empty or points to a lower AG. This allows the
|
||||
* next allocation attempt to be modified appropriately to avoid
|
||||
* this point we must update the "first allocation" tracker to point at
|
||||
* this AG if the tracker is empty or points to a lower AG. This allows
|
||||
* the next allocation attempt to be modified appropriately to avoid
|
||||
* deadlocks.
|
||||
*/
|
||||
if (args->agbp &&
|
||||
(args->tp->t_highest_agno == NULLAGNUMBER ||
|
||||
args->pag->pag_agno > minimum_agno))
|
||||
args->tp->t_highest_agno = args->pag->pag_agno;
|
||||
xfs_perag_put(args->pag);
|
||||
return 0;
|
||||
error0:
|
||||
args->agno > minimum_agno))
|
||||
args->tp->t_highest_agno = args->agno;
|
||||
|
||||
/* Allocation failed with ENOSPC if NULLAGBLOCK was returned. */
|
||||
if (args->agbno == NULLAGBLOCK) {
|
||||
args->fsbno = NULLFSBLOCK;
|
||||
return;
|
||||
}
|
||||
|
||||
args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
|
||||
#ifdef DEBUG
|
||||
ASSERT(args->len >= args->minlen);
|
||||
ASSERT(args->len <= args->maxlen);
|
||||
ASSERT(args->agbno % args->alignment == 0);
|
||||
XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate within a single AG only.
|
||||
*/
|
||||
static int
|
||||
xfs_alloc_vextent_this_ag(
|
||||
struct xfs_alloc_arg *args,
|
||||
xfs_agnumber_t minimum_agno)
|
||||
{
|
||||
struct xfs_mount *mp = args->mp;
|
||||
int error;
|
||||
|
||||
error = xfs_alloc_vextent_check_args(args);
|
||||
if (error) {
|
||||
if (error == -ENOSPC)
|
||||
return 0;
|
||||
return error;
|
||||
}
|
||||
|
||||
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
|
||||
if (minimum_agno > args->agno) {
|
||||
trace_xfs_alloc_vextent_skip_deadlock(args);
|
||||
args->fsbno = NULLFSBLOCK;
|
||||
return 0;
|
||||
}
|
||||
|
||||
args->pag = xfs_perag_get(mp, args->agno);
|
||||
error = xfs_alloc_fix_freelist(args, 0);
|
||||
if (error) {
|
||||
trace_xfs_alloc_vextent_nofix(args);
|
||||
goto out_error;
|
||||
}
|
||||
if (!args->agbp) {
|
||||
trace_xfs_alloc_vextent_noagbp(args);
|
||||
args->fsbno = NULLFSBLOCK;
|
||||
goto out_error;
|
||||
}
|
||||
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
|
||||
error = xfs_alloc_ag_vextent(args);
|
||||
|
||||
xfs_alloc_vextent_set_fsbno(args, minimum_agno);
|
||||
out_error:
|
||||
xfs_perag_put(args->pag);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate all AGs trying to allocate an extent starting from @start_ag.
|
||||
*
|
||||
* If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
|
||||
* allocation attempts in @start_agno have locality information. If we fail to
|
||||
* allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
|
||||
* we attempt to allocation in as there is no locality optimisation possible for
|
||||
* those allocations.
|
||||
*
|
||||
* When we wrap the AG iteration at the end of the filesystem, we have to be
|
||||
* careful not to wrap into AGs below ones we already have locked in the
|
||||
* transaction if we are doing a blocking iteration. This will result in an
|
||||
* out-of-order locking of AGFs and hence can cause deadlocks.
|
||||
*/
|
||||
static int
|
||||
xfs_alloc_vextent_iterate_ags(
|
||||
struct xfs_alloc_arg *args,
|
||||
xfs_agnumber_t minimum_agno,
|
||||
xfs_agnumber_t start_agno,
|
||||
uint32_t flags)
|
||||
{
|
||||
struct xfs_mount *mp = args->mp;
|
||||
int error = 0;
|
||||
|
||||
ASSERT(start_agno >= minimum_agno);
|
||||
|
||||
/*
|
||||
* Loop over allocation groups twice; first time with
|
||||
* trylock set, second time without.
|
||||
*/
|
||||
args->agno = start_agno;
|
||||
for (;;) {
|
||||
args->pag = xfs_perag_get(mp, args->agno);
|
||||
error = xfs_alloc_fix_freelist(args, flags);
|
||||
if (error) {
|
||||
trace_xfs_alloc_vextent_nofix(args);
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* If we get a buffer back then the allocation will fly.
|
||||
*/
|
||||
if (args->agbp) {
|
||||
error = xfs_alloc_ag_vextent(args);
|
||||
break;
|
||||
}
|
||||
|
||||
trace_xfs_alloc_vextent_loopfailed(args);
|
||||
|
||||
/*
|
||||
* Didn't work, figure out the next iteration.
|
||||
*/
|
||||
if (args->agno == start_agno &&
|
||||
args->otype == XFS_ALLOCTYPE_START_BNO)
|
||||
args->type = XFS_ALLOCTYPE_THIS_AG;
|
||||
|
||||
/*
|
||||
* If we are try-locking, we can't deadlock on AGF locks so we
|
||||
* can wrap all the way back to the first AG. Otherwise, wrap
|
||||
* back to the start AG so we can't deadlock and let the end of
|
||||
* scan handler decide what to do next.
|
||||
*/
|
||||
if (++(args->agno) == mp->m_sb.sb_agcount) {
|
||||
if (flags & XFS_ALLOC_FLAG_TRYLOCK)
|
||||
args->agno = 0;
|
||||
else
|
||||
args->agno = minimum_agno;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reached the starting a.g., must either be done
|
||||
* or switch to non-trylock mode.
|
||||
*/
|
||||
if (args->agno == start_agno) {
|
||||
if (flags == 0) {
|
||||
args->agbno = NULLAGBLOCK;
|
||||
trace_xfs_alloc_vextent_allfailed(args);
|
||||
break;
|
||||
}
|
||||
|
||||
flags = 0;
|
||||
if (args->otype == XFS_ALLOCTYPE_START_BNO) {
|
||||
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
|
||||
args->type = XFS_ALLOCTYPE_NEAR_BNO;
|
||||
}
|
||||
}
|
||||
xfs_perag_put(args->pag);
|
||||
args->pag = NULL;
|
||||
}
|
||||
if (args->pag) {
|
||||
xfs_perag_put(args->pag);
|
||||
args->pag = NULL;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate from the AGs from the start AG to the end of the filesystem, trying
|
||||
* to allocate blocks. It starts with a near allocation attempt in the initial
|
||||
* AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
|
||||
* back to zero if allowed by previous allocations in this transaction,
|
||||
* otherwise will wrap back to the start AG and run a second blocking pass to
|
||||
* the end of the filesystem.
|
||||
*/
|
||||
static int
|
||||
xfs_alloc_vextent_start_ag(
|
||||
struct xfs_alloc_arg *args,
|
||||
xfs_agnumber_t minimum_agno)
|
||||
{
|
||||
struct xfs_mount *mp = args->mp;
|
||||
xfs_agnumber_t start_agno;
|
||||
xfs_agnumber_t rotorstep = xfs_rotorstep;
|
||||
bool bump_rotor = false;
|
||||
int error;
|
||||
|
||||
error = xfs_alloc_vextent_check_args(args);
|
||||
if (error) {
|
||||
if (error == -ENOSPC)
|
||||
return 0;
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
|
||||
xfs_is_inode32(mp)) {
|
||||
args->fsbno = XFS_AGB_TO_FSB(mp,
|
||||
((mp->m_agfrotor / rotorstep) %
|
||||
mp->m_sb.sb_agcount), 0);
|
||||
bump_rotor = 1;
|
||||
}
|
||||
start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, args->fsbno));
|
||||
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
|
||||
args->type = XFS_ALLOCTYPE_NEAR_BNO;
|
||||
|
||||
error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
|
||||
XFS_ALLOC_FLAG_TRYLOCK);
|
||||
if (bump_rotor) {
|
||||
if (args->agno == start_agno)
|
||||
mp->m_agfrotor = (mp->m_agfrotor + 1) %
|
||||
(mp->m_sb.sb_agcount * rotorstep);
|
||||
else
|
||||
mp->m_agfrotor = (args->agno * rotorstep + 1) %
|
||||
(mp->m_sb.sb_agcount * rotorstep);
|
||||
}
|
||||
|
||||
xfs_alloc_vextent_set_fsbno(args, minimum_agno);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate from the agno indicated from args->fsbno through to the end of the
|
||||
* filesystem attempting blocking allocation. This does not wrap or try a second
|
||||
* pass, so will not recurse into AGs lower than indicated by fsbno.
|
||||
*/
|
||||
static int
|
||||
xfs_alloc_vextent_first_ag(
|
||||
struct xfs_alloc_arg *args,
|
||||
xfs_agnumber_t minimum_agno)
|
||||
{
|
||||
struct xfs_mount *mp = args->mp;
|
||||
xfs_agnumber_t start_agno;
|
||||
int error;
|
||||
|
||||
error = xfs_alloc_vextent_check_args(args);
|
||||
if (error) {
|
||||
if (error == -ENOSPC)
|
||||
return 0;
|
||||
return error;
|
||||
}
|
||||
|
||||
start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, args->fsbno));
|
||||
|
||||
args->type = XFS_ALLOCTYPE_THIS_AG;
|
||||
error = xfs_alloc_vextent_iterate_ags(args, minimum_agno,
|
||||
start_agno, 0);
|
||||
xfs_alloc_vextent_set_fsbno(args, minimum_agno);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate an extent (variable-size).
|
||||
* Depending on the allocation type, we either look in a single allocation
|
||||
* group or loop over the allocation groups to find the result.
|
||||
*/
|
||||
int
|
||||
xfs_alloc_vextent(
|
||||
struct xfs_alloc_arg *args)
|
||||
{
|
||||
xfs_agnumber_t minimum_agno = 0;
|
||||
|
||||
if (args->tp->t_highest_agno != NULLAGNUMBER)
|
||||
minimum_agno = args->tp->t_highest_agno;
|
||||
|
||||
switch (args->type) {
|
||||
case XFS_ALLOCTYPE_THIS_AG:
|
||||
case XFS_ALLOCTYPE_NEAR_BNO:
|
||||
case XFS_ALLOCTYPE_THIS_BNO:
|
||||
return xfs_alloc_vextent_this_ag(args, minimum_agno);
|
||||
case XFS_ALLOCTYPE_START_BNO:
|
||||
return xfs_alloc_vextent_start_ag(args, minimum_agno);
|
||||
case XFS_ALLOCTYPE_FIRST_AG:
|
||||
return xfs_alloc_vextent_first_ag(args, minimum_agno);
|
||||
default:
|
||||
ASSERT(0);
|
||||
/* NOTREACHED */
|
||||
}
|
||||
/* Should never get here */
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
/* Ensure that the freelist is at full capacity. */
|
||||
int
|
||||
xfs_free_extent_fix_freelist(
|
||||
|
Loading…
Reference in New Issue
Block a user