mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-08 15:04:45 +00:00
More new code for 6.5:
* Fix some ordering problems with log items during log recovery. * Don't deadlock the system by trying to flush busy freed extents while holding on to busy freed extents. * Improve validation of log geometry parameters when reading the primary superblock. * Validate the length field in the AGF header. * Fix recordset filtering bugs when re-calling GETFSMAP to return more results when the resultset didn't previously fit in the caller's buffer. * Fix integer overflows in GETFSMAP when working with rt volumes larger than 2^32 fsblocks. * Fix GETFSMAP reporting the undefined space beyond the last rtextent. * Fix filtering bugs in GETFSMAP's log device backend if the log ever becomes longer than 2^32 fsblocks. * Improve validation of file offsets in the GETFSMAP range parameters. * Fix an off by one bug in the pmem media failure notification computation. * Validate the length field in the AGI header too. Signed-off-by: Darrick J. Wong <djwong@kernel.org> -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQQ2qTKExjcn+O1o2YRKO3ySh0YRpgUCZKL9IwAKCRBKO3ySh0YR prFLAQC+dp1bV5ShBPfYJMCSUS7gmZEge01QrLTqcpyu8mO5GgD/YLUdD2Iebc8t AS1Awj1iec7AFtCWcd3bTeNZD7vL9w0= =j/oi -----END PGP SIGNATURE----- Merge tag 'xfs-6.5-merge-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux Pull more xfs updates from Darrick Wong: - Fix some ordering problems with log items during log recovery - Don't deadlock the system by trying to flush busy freed extents while holding on to busy freed extents - Improve validation of log geometry parameters when reading the primary superblock - Validate the length field in the AGF header - Fix recordset filtering bugs when re-calling GETFSMAP to return more results when the resultset didn't previously fit in the caller's buffer - Fix integer overflows in GETFSMAP when working with rt volumes larger than 2^32 fsblocks - Fix GETFSMAP reporting the undefined space beyond the last rtextent - Fix filtering bugs in GETFSMAP's log device backend if the log ever becomes longer than 2^32 fsblocks - Improve validation of file offsets in the GETFSMAP range parameters - Fix an off by one bug in the pmem media failure notification computation - Validate the length field in the AGI header too * tag 'xfs-6.5-merge-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: Remove unneeded semicolon xfs: AGI length should be bounds checked xfs: fix the calculation for "end" and "length" xfs: fix xfs_btree_query_range callers to initialize btree rec fully xfs: validate fsmap offsets specified in the query keys xfs: fix logdev fsmap query result filtering xfs: clean up the rtbitmap fsmap backend xfs: fix getfsmap reporting past the last rt extent xfs: fix integer overflows in the fsmap rtbitmap and logdev backends xfs: fix interval filtering in multi-step fsmap queries xfs: fix bounds check in xfs_defer_agfl_block() xfs: AGF length has never been bounds checked xfs: journal geometry is not properly bounds checked xfs: don't block in busy flushing when freeing extents xfs: allow extent free intents to be retried xfs: pass alloc flags through to xfs_extent_busy_flush() xfs: use deferred frees for btree block freeing xfs: don't reverse order of items in bulk AIL insertion xfs: remove redundant initializations of pointers drop_leaf and save_leaf
This commit is contained in:
commit
bb8e7e9f0b
@ -985,7 +985,7 @@ xfs_ag_shrink_space(
|
||||
goto resv_err;
|
||||
|
||||
err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
|
||||
true);
|
||||
XFS_AG_RESV_NONE, true);
|
||||
if (err2)
|
||||
goto resv_err;
|
||||
|
||||
|
@ -1536,7 +1536,8 @@ xfs_alloc_ag_vextent_lastblock(
|
||||
*/
|
||||
STATIC int
|
||||
xfs_alloc_ag_vextent_near(
|
||||
struct xfs_alloc_arg *args)
|
||||
struct xfs_alloc_arg *args,
|
||||
uint32_t alloc_flags)
|
||||
{
|
||||
struct xfs_alloc_cur acur = {};
|
||||
int error; /* error code */
|
||||
@ -1555,6 +1556,8 @@ xfs_alloc_ag_vextent_near(
|
||||
if (args->agbno > args->max_agbno)
|
||||
args->agbno = args->max_agbno;
|
||||
|
||||
/* Retry once quickly if we find busy extents before blocking. */
|
||||
alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
|
||||
restart:
|
||||
len = 0;
|
||||
|
||||
@ -1610,9 +1613,20 @@ xfs_alloc_ag_vextent_near(
|
||||
*/
|
||||
if (!acur.len) {
|
||||
if (acur.busy) {
|
||||
/*
|
||||
* Our only valid extents must have been busy. Flush and
|
||||
* retry the allocation again. If we get an -EAGAIN
|
||||
* error, we're being told that a deadlock was avoided
|
||||
* and the current transaction needs committing before
|
||||
* the allocation can be retried.
|
||||
*/
|
||||
trace_xfs_alloc_near_busy(args);
|
||||
xfs_extent_busy_flush(args->mp, args->pag,
|
||||
acur.busy_gen);
|
||||
error = xfs_extent_busy_flush(args->tp, args->pag,
|
||||
acur.busy_gen, alloc_flags);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
|
||||
goto restart;
|
||||
}
|
||||
trace_xfs_alloc_size_neither(args);
|
||||
@ -1635,22 +1649,25 @@ xfs_alloc_ag_vextent_near(
|
||||
* and of the form k * prod + mod unless there's nothing that large.
|
||||
* Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
|
||||
*/
|
||||
STATIC int /* error */
|
||||
static int
|
||||
xfs_alloc_ag_vextent_size(
|
||||
xfs_alloc_arg_t *args) /* allocation argument structure */
|
||||
struct xfs_alloc_arg *args,
|
||||
uint32_t alloc_flags)
|
||||
{
|
||||
struct xfs_agf *agf = args->agbp->b_addr;
|
||||
struct xfs_btree_cur *bno_cur; /* cursor for bno btree */
|
||||
struct xfs_btree_cur *cnt_cur; /* cursor for cnt btree */
|
||||
int error; /* error result */
|
||||
xfs_agblock_t fbno; /* start of found freespace */
|
||||
xfs_extlen_t flen; /* length of found freespace */
|
||||
int i; /* temp status variable */
|
||||
xfs_agblock_t rbno; /* returned block number */
|
||||
xfs_extlen_t rlen; /* length of returned extent */
|
||||
bool busy;
|
||||
unsigned busy_gen;
|
||||
struct xfs_agf *agf = args->agbp->b_addr;
|
||||
struct xfs_btree_cur *bno_cur;
|
||||
struct xfs_btree_cur *cnt_cur;
|
||||
xfs_agblock_t fbno; /* start of found freespace */
|
||||
xfs_extlen_t flen; /* length of found freespace */
|
||||
xfs_agblock_t rbno; /* returned block number */
|
||||
xfs_extlen_t rlen; /* length of returned extent */
|
||||
bool busy;
|
||||
unsigned busy_gen;
|
||||
int error;
|
||||
int i;
|
||||
|
||||
/* Retry once quickly if we find busy extents before blocking. */
|
||||
alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
|
||||
restart:
|
||||
/*
|
||||
* Allocate and initialize a cursor for the by-size btree.
|
||||
@ -1708,19 +1725,25 @@ xfs_alloc_ag_vextent_size(
|
||||
error = xfs_btree_increment(cnt_cur, 0, &i);
|
||||
if (error)
|
||||
goto error0;
|
||||
if (i == 0) {
|
||||
/*
|
||||
* Our only valid extents must have been busy.
|
||||
* Make it unbusy by forcing the log out and
|
||||
* retrying.
|
||||
*/
|
||||
xfs_btree_del_cursor(cnt_cur,
|
||||
XFS_BTREE_NOERROR);
|
||||
trace_xfs_alloc_size_busy(args);
|
||||
xfs_extent_busy_flush(args->mp,
|
||||
args->pag, busy_gen);
|
||||
goto restart;
|
||||
}
|
||||
if (i)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Our only valid extents must have been busy. Flush and
|
||||
* retry the allocation again. If we get an -EAGAIN
|
||||
* error, we're being told that a deadlock was avoided
|
||||
* and the current transaction needs committing before
|
||||
* the allocation can be retried.
|
||||
*/
|
||||
trace_xfs_alloc_size_busy(args);
|
||||
error = xfs_extent_busy_flush(args->tp, args->pag,
|
||||
busy_gen, alloc_flags);
|
||||
if (error)
|
||||
goto error0;
|
||||
|
||||
alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
|
||||
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
|
||||
goto restart;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1800,9 +1823,21 @@ xfs_alloc_ag_vextent_size(
|
||||
args->len = rlen;
|
||||
if (rlen < args->minlen) {
|
||||
if (busy) {
|
||||
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
|
||||
/*
|
||||
* Our only valid extents must have been busy. Flush and
|
||||
* retry the allocation again. If we get an -EAGAIN
|
||||
* error, we're being told that a deadlock was avoided
|
||||
* and the current transaction needs committing before
|
||||
* the allocation can be retried.
|
||||
*/
|
||||
trace_xfs_alloc_size_busy(args);
|
||||
xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
|
||||
error = xfs_extent_busy_flush(args->tp, args->pag,
|
||||
busy_gen, alloc_flags);
|
||||
if (error)
|
||||
goto error0;
|
||||
|
||||
alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
|
||||
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
|
||||
goto restart;
|
||||
}
|
||||
goto out_nominleft;
|
||||
@ -2435,23 +2470,25 @@ static int
|
||||
xfs_defer_agfl_block(
|
||||
struct xfs_trans *tp,
|
||||
xfs_agnumber_t agno,
|
||||
xfs_fsblock_t agbno,
|
||||
xfs_agblock_t agbno,
|
||||
struct xfs_owner_info *oinfo)
|
||||
{
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
struct xfs_extent_free_item *xefi;
|
||||
xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
|
||||
|
||||
ASSERT(xfs_extfree_item_cache != NULL);
|
||||
ASSERT(oinfo != NULL);
|
||||
|
||||
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
|
||||
GFP_KERNEL | __GFP_NOFAIL);
|
||||
xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
|
||||
xefi->xefi_startblock = fsbno;
|
||||
xefi->xefi_blockcount = 1;
|
||||
xefi->xefi_owner = oinfo->oi_owner;
|
||||
|
||||
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock)))
|
||||
return -EFSCORRUPTED;
|
||||
xefi->xefi_agresv = XFS_AG_RESV_AGFL;
|
||||
|
||||
trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
|
||||
|
||||
@ -2470,6 +2507,7 @@ __xfs_free_extent_later(
|
||||
xfs_fsblock_t bno,
|
||||
xfs_filblks_t len,
|
||||
const struct xfs_owner_info *oinfo,
|
||||
enum xfs_ag_resv_type type,
|
||||
bool skip_discard)
|
||||
{
|
||||
struct xfs_extent_free_item *xefi;
|
||||
@ -2490,6 +2528,7 @@ __xfs_free_extent_later(
|
||||
ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
|
||||
#endif
|
||||
ASSERT(xfs_extfree_item_cache != NULL);
|
||||
ASSERT(type != XFS_AG_RESV_AGFL);
|
||||
|
||||
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
|
||||
return -EFSCORRUPTED;
|
||||
@ -2498,6 +2537,7 @@ __xfs_free_extent_later(
|
||||
GFP_KERNEL | __GFP_NOFAIL);
|
||||
xefi->xefi_startblock = bno;
|
||||
xefi->xefi_blockcount = (xfs_extlen_t)len;
|
||||
xefi->xefi_agresv = type;
|
||||
if (skip_discard)
|
||||
xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
|
||||
if (oinfo) {
|
||||
@ -2568,7 +2608,7 @@ xfs_exact_minlen_extent_available(
|
||||
int /* error */
|
||||
xfs_alloc_fix_freelist(
|
||||
struct xfs_alloc_arg *args, /* allocation argument structure */
|
||||
int flags) /* XFS_ALLOC_FLAG_... */
|
||||
uint32_t alloc_flags)
|
||||
{
|
||||
struct xfs_mount *mp = args->mp;
|
||||
struct xfs_perag *pag = args->pag;
|
||||
@ -2584,7 +2624,7 @@ xfs_alloc_fix_freelist(
|
||||
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
|
||||
|
||||
if (!xfs_perag_initialised_agf(pag)) {
|
||||
error = xfs_alloc_read_agf(pag, tp, flags, &agbp);
|
||||
error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
|
||||
if (error) {
|
||||
/* Couldn't lock the AGF so skip this AG. */
|
||||
if (error == -EAGAIN)
|
||||
@ -2600,13 +2640,13 @@ xfs_alloc_fix_freelist(
|
||||
*/
|
||||
if (xfs_perag_prefers_metadata(pag) &&
|
||||
(args->datatype & XFS_ALLOC_USERDATA) &&
|
||||
(flags & XFS_ALLOC_FLAG_TRYLOCK)) {
|
||||
ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
|
||||
(alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) {
|
||||
ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));
|
||||
goto out_agbp_relse;
|
||||
}
|
||||
|
||||
need = xfs_alloc_min_freelist(mp, pag);
|
||||
if (!xfs_alloc_space_available(args, need, flags |
|
||||
if (!xfs_alloc_space_available(args, need, alloc_flags |
|
||||
XFS_ALLOC_FLAG_CHECK))
|
||||
goto out_agbp_relse;
|
||||
|
||||
@ -2615,7 +2655,7 @@ xfs_alloc_fix_freelist(
|
||||
* Can fail if we're not blocking on locks, and it's held.
|
||||
*/
|
||||
if (!agbp) {
|
||||
error = xfs_alloc_read_agf(pag, tp, flags, &agbp);
|
||||
error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
|
||||
if (error) {
|
||||
/* Couldn't lock the AGF so skip this AG. */
|
||||
if (error == -EAGAIN)
|
||||
@ -2630,7 +2670,7 @@ xfs_alloc_fix_freelist(
|
||||
|
||||
/* If there isn't enough total space or single-extent, reject it. */
|
||||
need = xfs_alloc_min_freelist(mp, pag);
|
||||
if (!xfs_alloc_space_available(args, need, flags))
|
||||
if (!xfs_alloc_space_available(args, need, alloc_flags))
|
||||
goto out_agbp_relse;
|
||||
|
||||
#ifdef DEBUG
|
||||
@ -2668,11 +2708,12 @@ xfs_alloc_fix_freelist(
|
||||
*/
|
||||
memset(&targs, 0, sizeof(targs));
|
||||
/* struct copy below */
|
||||
if (flags & XFS_ALLOC_FLAG_NORMAP)
|
||||
if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)
|
||||
targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
|
||||
else
|
||||
targs.oinfo = XFS_RMAP_OINFO_AG;
|
||||
while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
|
||||
while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) &&
|
||||
pag->pagf_flcount > need) {
|
||||
error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
|
||||
if (error)
|
||||
goto out_agbp_relse;
|
||||
@ -2700,7 +2741,7 @@ xfs_alloc_fix_freelist(
|
||||
targs.resv = XFS_AG_RESV_AGFL;
|
||||
|
||||
/* Allocate as many blocks as possible at once. */
|
||||
error = xfs_alloc_ag_vextent_size(&targs);
|
||||
error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);
|
||||
if (error)
|
||||
goto out_agflbp_relse;
|
||||
|
||||
@ -2710,7 +2751,7 @@ xfs_alloc_fix_freelist(
|
||||
* on a completely full ag.
|
||||
*/
|
||||
if (targs.agbno == NULLAGBLOCK) {
|
||||
if (flags & XFS_ALLOC_FLAG_FREEING)
|
||||
if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
|
||||
break;
|
||||
goto out_agflbp_relse;
|
||||
}
|
||||
@ -2915,6 +2956,47 @@ xfs_alloc_put_freelist(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that this AGF/AGI header's sequence number and length matches the AG
|
||||
* number and size in fsblocks.
|
||||
*/
|
||||
xfs_failaddr_t
|
||||
xfs_validate_ag_length(
|
||||
struct xfs_buf *bp,
|
||||
uint32_t seqno,
|
||||
uint32_t length)
|
||||
{
|
||||
struct xfs_mount *mp = bp->b_mount;
|
||||
/*
|
||||
* During growfs operations, the perag is not fully initialised,
|
||||
* so we can't use it for any useful checking. growfs ensures we can't
|
||||
* use it by using uncached buffers that don't have the perag attached
|
||||
* so we can detect and avoid this problem.
|
||||
*/
|
||||
if (bp->b_pag && seqno != bp->b_pag->pag_agno)
|
||||
return __this_address;
|
||||
|
||||
/*
|
||||
* Only the last AG in the filesystem is allowed to be shorter
|
||||
* than the AG size recorded in the superblock.
|
||||
*/
|
||||
if (length != mp->m_sb.sb_agblocks) {
|
||||
/*
|
||||
* During growfs, the new last AG can get here before we
|
||||
* have updated the superblock. Give it a pass on the seqno
|
||||
* check.
|
||||
*/
|
||||
if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1)
|
||||
return __this_address;
|
||||
if (length < XFS_MIN_AG_BLOCKS)
|
||||
return __this_address;
|
||||
if (length > mp->m_sb.sb_agblocks)
|
||||
return __this_address;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify the AGF is consistent.
|
||||
*
|
||||
@ -2934,6 +3016,9 @@ xfs_agf_verify(
|
||||
{
|
||||
struct xfs_mount *mp = bp->b_mount;
|
||||
struct xfs_agf *agf = bp->b_addr;
|
||||
xfs_failaddr_t fa;
|
||||
uint32_t agf_seqno = be32_to_cpu(agf->agf_seqno);
|
||||
uint32_t agf_length = be32_to_cpu(agf->agf_length);
|
||||
|
||||
if (xfs_has_crc(mp)) {
|
||||
if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
|
||||
@ -2945,18 +3030,26 @@ xfs_agf_verify(
|
||||
if (!xfs_verify_magic(bp, agf->agf_magicnum))
|
||||
return __this_address;
|
||||
|
||||
if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
|
||||
be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
|
||||
be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
|
||||
be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
|
||||
be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
|
||||
if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))
|
||||
return __this_address;
|
||||
|
||||
if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks)
|
||||
/*
|
||||
* Both agf_seqno and agf_length need to validated before anything else
|
||||
* block number related in the AGF or AGFL can be checked.
|
||||
*/
|
||||
fa = xfs_validate_ag_length(bp, agf_seqno, agf_length);
|
||||
if (fa)
|
||||
return fa;
|
||||
|
||||
if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp))
|
||||
return __this_address;
|
||||
if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp))
|
||||
return __this_address;
|
||||
if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))
|
||||
return __this_address;
|
||||
|
||||
if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
|
||||
be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))
|
||||
be32_to_cpu(agf->agf_freeblks) > agf_length)
|
||||
return __this_address;
|
||||
|
||||
if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
|
||||
@ -2967,38 +3060,28 @@ xfs_agf_verify(
|
||||
mp->m_alloc_maxlevels)
|
||||
return __this_address;
|
||||
|
||||
if (xfs_has_rmapbt(mp) &&
|
||||
(be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
|
||||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
|
||||
mp->m_rmap_maxlevels))
|
||||
return __this_address;
|
||||
|
||||
if (xfs_has_rmapbt(mp) &&
|
||||
be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length))
|
||||
return __this_address;
|
||||
|
||||
/*
|
||||
* during growfs operations, the perag is not fully initialised,
|
||||
* so we can't use it for any useful checking. growfs ensures we can't
|
||||
* use it by using uncached buffers that don't have the perag attached
|
||||
* so we can detect and avoid this problem.
|
||||
*/
|
||||
if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
|
||||
return __this_address;
|
||||
|
||||
if (xfs_has_lazysbcount(mp) &&
|
||||
be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
|
||||
be32_to_cpu(agf->agf_btreeblks) > agf_length)
|
||||
return __this_address;
|
||||
|
||||
if (xfs_has_reflink(mp) &&
|
||||
be32_to_cpu(agf->agf_refcount_blocks) >
|
||||
be32_to_cpu(agf->agf_length))
|
||||
return __this_address;
|
||||
if (xfs_has_rmapbt(mp)) {
|
||||
if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
|
||||
return __this_address;
|
||||
|
||||
if (xfs_has_reflink(mp) &&
|
||||
(be32_to_cpu(agf->agf_refcount_level) < 1 ||
|
||||
be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels))
|
||||
return __this_address;
|
||||
if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
|
||||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
|
||||
mp->m_rmap_maxlevels)
|
||||
return __this_address;
|
||||
}
|
||||
|
||||
if (xfs_has_reflink(mp)) {
|
||||
if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length)
|
||||
return __this_address;
|
||||
|
||||
if (be32_to_cpu(agf->agf_refcount_level) < 1 ||
|
||||
be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)
|
||||
return __this_address;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@ -3226,7 +3309,7 @@ xfs_alloc_vextent_check_args(
|
||||
static int
|
||||
xfs_alloc_vextent_prepare_ag(
|
||||
struct xfs_alloc_arg *args,
|
||||
uint32_t flags)
|
||||
uint32_t alloc_flags)
|
||||
{
|
||||
bool need_pag = !args->pag;
|
||||
int error;
|
||||
@ -3235,7 +3318,7 @@ xfs_alloc_vextent_prepare_ag(
|
||||
args->pag = xfs_perag_get(args->mp, args->agno);
|
||||
|
||||
args->agbp = NULL;
|
||||
error = xfs_alloc_fix_freelist(args, flags);
|
||||
error = xfs_alloc_fix_freelist(args, alloc_flags);
|
||||
if (error) {
|
||||
trace_xfs_alloc_vextent_nofix(args);
|
||||
if (need_pag)
|
||||
@ -3357,6 +3440,7 @@ xfs_alloc_vextent_this_ag(
|
||||
{
|
||||
struct xfs_mount *mp = args->mp;
|
||||
xfs_agnumber_t minimum_agno;
|
||||
uint32_t alloc_flags = 0;
|
||||
int error;
|
||||
|
||||
ASSERT(args->pag != NULL);
|
||||
@ -3375,9 +3459,9 @@ xfs_alloc_vextent_this_ag(
|
||||
return error;
|
||||
}
|
||||
|
||||
error = xfs_alloc_vextent_prepare_ag(args, 0);
|
||||
error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
|
||||
if (!error && args->agbp)
|
||||
error = xfs_alloc_ag_vextent_size(args);
|
||||
error = xfs_alloc_ag_vextent_size(args, alloc_flags);
|
||||
|
||||
return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
|
||||
}
|
||||
@ -3406,20 +3490,20 @@ xfs_alloc_vextent_iterate_ags(
|
||||
xfs_agnumber_t minimum_agno,
|
||||
xfs_agnumber_t start_agno,
|
||||
xfs_agblock_t target_agbno,
|
||||
uint32_t flags)
|
||||
uint32_t alloc_flags)
|
||||
{
|
||||
struct xfs_mount *mp = args->mp;
|
||||
xfs_agnumber_t restart_agno = minimum_agno;
|
||||
xfs_agnumber_t agno;
|
||||
int error = 0;
|
||||
|
||||
if (flags & XFS_ALLOC_FLAG_TRYLOCK)
|
||||
if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)
|
||||
restart_agno = 0;
|
||||
restart:
|
||||
for_each_perag_wrap_range(mp, start_agno, restart_agno,
|
||||
mp->m_sb.sb_agcount, agno, args->pag) {
|
||||
args->agno = agno;
|
||||
error = xfs_alloc_vextent_prepare_ag(args, flags);
|
||||
error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
|
||||
if (error)
|
||||
break;
|
||||
if (!args->agbp) {
|
||||
@ -3433,10 +3517,10 @@ xfs_alloc_vextent_iterate_ags(
|
||||
*/
|
||||
if (args->agno == start_agno && target_agbno) {
|
||||
args->agbno = target_agbno;
|
||||
error = xfs_alloc_ag_vextent_near(args);
|
||||
error = xfs_alloc_ag_vextent_near(args, alloc_flags);
|
||||
} else {
|
||||
args->agbno = 0;
|
||||
error = xfs_alloc_ag_vextent_size(args);
|
||||
error = xfs_alloc_ag_vextent_size(args, alloc_flags);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -3453,8 +3537,8 @@ xfs_alloc_vextent_iterate_ags(
|
||||
* constraining flags by the caller, drop them and retry the allocation
|
||||
* without any constraints being set.
|
||||
*/
|
||||
if (flags) {
|
||||
flags = 0;
|
||||
if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) {
|
||||
alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;
|
||||
restart_agno = minimum_agno;
|
||||
goto restart;
|
||||
}
|
||||
@ -3482,6 +3566,7 @@ xfs_alloc_vextent_start_ag(
|
||||
xfs_agnumber_t start_agno;
|
||||
xfs_agnumber_t rotorstep = xfs_rotorstep;
|
||||
bool bump_rotor = false;
|
||||
uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
|
||||
int error;
|
||||
|
||||
ASSERT(args->pag == NULL);
|
||||
@ -3508,7 +3593,7 @@ xfs_alloc_vextent_start_ag(
|
||||
|
||||
start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
|
||||
error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
|
||||
XFS_FSB_TO_AGBNO(mp, target), XFS_ALLOC_FLAG_TRYLOCK);
|
||||
XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
|
||||
|
||||
if (bump_rotor) {
|
||||
if (args->agno == start_agno)
|
||||
@ -3535,6 +3620,7 @@ xfs_alloc_vextent_first_ag(
|
||||
struct xfs_mount *mp = args->mp;
|
||||
xfs_agnumber_t minimum_agno;
|
||||
xfs_agnumber_t start_agno;
|
||||
uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
|
||||
int error;
|
||||
|
||||
ASSERT(args->pag == NULL);
|
||||
@ -3553,7 +3639,7 @@ xfs_alloc_vextent_first_ag(
|
||||
|
||||
start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
|
||||
error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
|
||||
XFS_FSB_TO_AGBNO(mp, target), 0);
|
||||
XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
|
||||
return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
|
||||
}
|
||||
|
||||
@ -3606,6 +3692,7 @@ xfs_alloc_vextent_near_bno(
|
||||
struct xfs_mount *mp = args->mp;
|
||||
xfs_agnumber_t minimum_agno;
|
||||
bool needs_perag = args->pag == NULL;
|
||||
uint32_t alloc_flags = 0;
|
||||
int error;
|
||||
|
||||
if (!needs_perag)
|
||||
@ -3626,9 +3713,9 @@ xfs_alloc_vextent_near_bno(
|
||||
if (needs_perag)
|
||||
args->pag = xfs_perag_grab(mp, args->agno);
|
||||
|
||||
error = xfs_alloc_vextent_prepare_ag(args, 0);
|
||||
error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
|
||||
if (!error && args->agbp)
|
||||
error = xfs_alloc_ag_vextent_near(args);
|
||||
error = xfs_alloc_ag_vextent_near(args, alloc_flags);
|
||||
|
||||
return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
|
||||
}
|
||||
@ -3756,15 +3843,11 @@ xfs_alloc_query_range(
|
||||
xfs_alloc_query_range_fn fn,
|
||||
void *priv)
|
||||
{
|
||||
union xfs_btree_irec low_brec;
|
||||
union xfs_btree_irec high_brec;
|
||||
struct xfs_alloc_query_range_info query;
|
||||
union xfs_btree_irec low_brec = { .a = *low_rec };
|
||||
union xfs_btree_irec high_brec = { .a = *high_rec };
|
||||
struct xfs_alloc_query_range_info query = { .priv = priv, .fn = fn };
|
||||
|
||||
ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
|
||||
low_brec.a = *low_rec;
|
||||
high_brec.a = *high_rec;
|
||||
query.priv = priv;
|
||||
query.fn = fn;
|
||||
return xfs_btree_query_range(cur, &low_brec, &high_brec,
|
||||
xfs_alloc_query_range_helper, &query);
|
||||
}
|
||||
|
@ -19,11 +19,12 @@ unsigned int xfs_agfl_size(struct xfs_mount *mp);
|
||||
/*
|
||||
* Flags for xfs_alloc_fix_freelist.
|
||||
*/
|
||||
#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */
|
||||
#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
|
||||
#define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */
|
||||
#define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */
|
||||
#define XFS_ALLOC_FLAG_CHECK 0x00000010 /* test only, don't modify args */
|
||||
#define XFS_ALLOC_FLAG_TRYLOCK (1U << 0) /* use trylock for buffer locking */
|
||||
#define XFS_ALLOC_FLAG_FREEING (1U << 1) /* indicate caller is freeing extents*/
|
||||
#define XFS_ALLOC_FLAG_NORMAP (1U << 2) /* don't modify the rmapbt */
|
||||
#define XFS_ALLOC_FLAG_NOSHRINK (1U << 3) /* don't shrink the freelist */
|
||||
#define XFS_ALLOC_FLAG_CHECK (1U << 4) /* test only, don't modify args */
|
||||
#define XFS_ALLOC_FLAG_TRYFLUSH (1U << 5) /* don't wait in busy extent flush */
|
||||
|
||||
/*
|
||||
* Argument structure for xfs_alloc routines.
|
||||
@ -195,7 +196,7 @@ int xfs_alloc_read_agfl(struct xfs_perag *pag, struct xfs_trans *tp,
|
||||
struct xfs_buf **bpp);
|
||||
int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t,
|
||||
struct xfs_buf *, struct xfs_owner_info *);
|
||||
int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
|
||||
int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, uint32_t alloc_flags);
|
||||
int xfs_free_extent_fix_freelist(struct xfs_trans *tp, struct xfs_perag *pag,
|
||||
struct xfs_buf **agbp);
|
||||
|
||||
@ -232,7 +233,7 @@ xfs_buf_to_agfl_bno(
|
||||
|
||||
int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
|
||||
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
|
||||
bool skip_discard);
|
||||
enum xfs_ag_resv_type type, bool skip_discard);
|
||||
|
||||
/*
|
||||
* List of extents to be free "later".
|
||||
@ -245,6 +246,7 @@ struct xfs_extent_free_item {
|
||||
xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
|
||||
struct xfs_perag *xefi_pag;
|
||||
unsigned int xefi_flags;
|
||||
enum xfs_ag_resv_type xefi_agresv;
|
||||
};
|
||||
|
||||
void xfs_extent_free_get_group(struct xfs_mount *mp,
|
||||
@ -259,9 +261,10 @@ xfs_free_extent_later(
|
||||
struct xfs_trans *tp,
|
||||
xfs_fsblock_t bno,
|
||||
xfs_filblks_t len,
|
||||
const struct xfs_owner_info *oinfo)
|
||||
const struct xfs_owner_info *oinfo,
|
||||
enum xfs_ag_resv_type type)
|
||||
{
|
||||
return __xfs_free_extent_later(tp, bno, len, oinfo, false);
|
||||
return __xfs_free_extent_later(tp, bno, len, oinfo, type, false);
|
||||
}
|
||||
|
||||
|
||||
@ -270,4 +273,7 @@ extern struct kmem_cache *xfs_extfree_item_cache;
|
||||
int __init xfs_extfree_intent_init_cache(void);
|
||||
void xfs_extfree_intent_destroy_cache(void);
|
||||
|
||||
xfs_failaddr_t xfs_validate_ag_length(struct xfs_buf *bp, uint32_t seqno,
|
||||
uint32_t length);
|
||||
|
||||
#endif /* __XFS_ALLOC_H__ */
|
||||
|
@ -2293,8 +2293,6 @@ xfs_attr3_leaf_unbalance(
|
||||
|
||||
trace_xfs_attr_leaf_unbalance(state->args);
|
||||
|
||||
drop_leaf = drop_blk->bp->b_addr;
|
||||
save_leaf = save_blk->bp->b_addr;
|
||||
xfs_attr3_leaf_hdr_from_disk(state->args->geo, &drophdr, drop_leaf);
|
||||
xfs_attr3_leaf_hdr_from_disk(state->args->geo, &savehdr, save_leaf);
|
||||
entry = xfs_attr3_leaf_entryp(drop_leaf);
|
||||
|
@ -574,7 +574,8 @@ xfs_bmap_btree_to_extents(
|
||||
return error;
|
||||
|
||||
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
|
||||
error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
|
||||
error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
|
||||
XFS_AG_RESV_NONE);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@ -5236,8 +5237,9 @@ xfs_bmap_del_extent_real(
|
||||
} else {
|
||||
error = __xfs_free_extent_later(tp, del->br_startblock,
|
||||
del->br_blockcount, NULL,
|
||||
(bflags & XFS_BMAPI_NODISCARD) ||
|
||||
del->br_state == XFS_EXT_UNWRITTEN);
|
||||
XFS_AG_RESV_NONE,
|
||||
((bflags & XFS_BMAPI_NODISCARD) ||
|
||||
del->br_state == XFS_EXT_UNWRITTEN));
|
||||
if (error)
|
||||
goto done;
|
||||
}
|
||||
|
@ -271,7 +271,8 @@ xfs_bmbt_free_block(
|
||||
int error;
|
||||
|
||||
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
|
||||
error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
|
||||
error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo,
|
||||
XFS_AG_RESV_NONE);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
|
@ -1853,8 +1853,8 @@ xfs_difree_inode_chunk(
|
||||
/* not sparse, calculate extent info directly */
|
||||
return xfs_free_extent_later(tp,
|
||||
XFS_AGB_TO_FSB(mp, agno, sagbno),
|
||||
M_IGEO(mp)->ialloc_blks,
|
||||
&XFS_RMAP_OINFO_INODES);
|
||||
M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
|
||||
XFS_AG_RESV_NONE);
|
||||
}
|
||||
|
||||
/* holemask is only 16-bits (fits in an unsigned long) */
|
||||
@ -1899,8 +1899,8 @@ xfs_difree_inode_chunk(
|
||||
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
|
||||
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
|
||||
error = xfs_free_extent_later(tp,
|
||||
XFS_AGB_TO_FSB(mp, agno, agbno),
|
||||
contigblk, &XFS_RMAP_OINFO_INODES);
|
||||
XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
|
||||
&XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@ -2486,11 +2486,14 @@ xfs_ialloc_log_agi(
|
||||
|
||||
static xfs_failaddr_t
|
||||
xfs_agi_verify(
|
||||
struct xfs_buf *bp)
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
struct xfs_mount *mp = bp->b_mount;
|
||||
struct xfs_agi *agi = bp->b_addr;
|
||||
int i;
|
||||
struct xfs_mount *mp = bp->b_mount;
|
||||
struct xfs_agi *agi = bp->b_addr;
|
||||
xfs_failaddr_t fa;
|
||||
uint32_t agi_seqno = be32_to_cpu(agi->agi_seqno);
|
||||
uint32_t agi_length = be32_to_cpu(agi->agi_length);
|
||||
int i;
|
||||
|
||||
if (xfs_has_crc(mp)) {
|
||||
if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
|
||||
@ -2507,6 +2510,10 @@ xfs_agi_verify(
|
||||
if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
|
||||
return __this_address;
|
||||
|
||||
fa = xfs_validate_ag_length(bp, agi_seqno, agi_length);
|
||||
if (fa)
|
||||
return fa;
|
||||
|
||||
if (be32_to_cpu(agi->agi_level) < 1 ||
|
||||
be32_to_cpu(agi->agi_level) > M_IGEO(mp)->inobt_maxlevels)
|
||||
return __this_address;
|
||||
@ -2516,15 +2523,6 @@ xfs_agi_verify(
|
||||
be32_to_cpu(agi->agi_free_level) > M_IGEO(mp)->inobt_maxlevels))
|
||||
return __this_address;
|
||||
|
||||
/*
|
||||
* during growfs operations, the perag is not fully initialised,
|
||||
* so we can't use it for any useful checking. growfs ensures we can't
|
||||
* use it by using uncached buffers that don't have the perag attached
|
||||
* so we can detect and avoid this problem.
|
||||
*/
|
||||
if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno)
|
||||
return __this_address;
|
||||
|
||||
for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
|
||||
if (agi->agi_unlinked[i] == cpu_to_be32(NULLAGINO))
|
||||
continue;
|
||||
|
@ -160,8 +160,7 @@ __xfs_inobt_free_block(
|
||||
|
||||
xfs_inobt_mod_blockcount(cur, -1);
|
||||
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
|
||||
return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
|
||||
XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
|
||||
return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
|
||||
&XFS_RMAP_OINFO_INOBT, resv);
|
||||
}
|
||||
|
||||
|
@ -1152,7 +1152,8 @@ xfs_refcount_adjust_extents(
|
||||
cur->bc_ag.pag->pag_agno,
|
||||
tmp.rc_startblock);
|
||||
error = xfs_free_extent_later(cur->bc_tp, fsbno,
|
||||
tmp.rc_blockcount, NULL);
|
||||
tmp.rc_blockcount, NULL,
|
||||
XFS_AG_RESV_NONE);
|
||||
if (error)
|
||||
goto out_error;
|
||||
}
|
||||
@ -1213,7 +1214,8 @@ xfs_refcount_adjust_extents(
|
||||
cur->bc_ag.pag->pag_agno,
|
||||
ext.rc_startblock);
|
||||
error = xfs_free_extent_later(cur->bc_tp, fsbno,
|
||||
ext.rc_blockcount, NULL);
|
||||
ext.rc_blockcount, NULL,
|
||||
XFS_AG_RESV_NONE);
|
||||
if (error)
|
||||
goto out_error;
|
||||
}
|
||||
@ -1919,8 +1921,13 @@ xfs_refcount_recover_cow_leftovers(
|
||||
struct xfs_buf *agbp;
|
||||
struct xfs_refcount_recovery *rr, *n;
|
||||
struct list_head debris;
|
||||
union xfs_btree_irec low;
|
||||
union xfs_btree_irec high;
|
||||
union xfs_btree_irec low = {
|
||||
.rc.rc_domain = XFS_REFC_DOMAIN_COW,
|
||||
};
|
||||
union xfs_btree_irec high = {
|
||||
.rc.rc_domain = XFS_REFC_DOMAIN_COW,
|
||||
.rc.rc_startblock = -1U,
|
||||
};
|
||||
xfs_fsblock_t fsb;
|
||||
int error;
|
||||
|
||||
@ -1951,10 +1958,6 @@ xfs_refcount_recover_cow_leftovers(
|
||||
cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);
|
||||
|
||||
/* Find all the leftover CoW staging extents. */
|
||||
memset(&low, 0, sizeof(low));
|
||||
memset(&high, 0, sizeof(high));
|
||||
low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW;
|
||||
high.rc.rc_startblock = -1U;
|
||||
error = xfs_btree_query_range(cur, &low, &high,
|
||||
xfs_refcount_recover_extent, &debris);
|
||||
xfs_btree_del_cursor(cur, error);
|
||||
@ -1981,7 +1984,8 @@ xfs_refcount_recover_cow_leftovers(
|
||||
|
||||
/* Free the block. */
|
||||
error = xfs_free_extent_later(tp, fsb,
|
||||
rr->rr_rrec.rc_blockcount, NULL);
|
||||
rr->rr_rrec.rc_blockcount, NULL,
|
||||
XFS_AG_RESV_NONE);
|
||||
if (error)
|
||||
goto out_trans;
|
||||
|
||||
|
@ -106,19 +106,13 @@ xfs_refcountbt_free_block(
|
||||
struct xfs_buf *agbp = cur->bc_ag.agbp;
|
||||
struct xfs_agf *agf = agbp->b_addr;
|
||||
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
|
||||
int error;
|
||||
|
||||
trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
|
||||
XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
|
||||
be32_add_cpu(&agf->agf_refcount_blocks, -1);
|
||||
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
|
||||
error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
|
||||
XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
|
||||
return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
|
||||
&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
|
@ -2389,14 +2389,10 @@ xfs_rmap_query_range(
|
||||
xfs_rmap_query_range_fn fn,
|
||||
void *priv)
|
||||
{
|
||||
union xfs_btree_irec low_brec;
|
||||
union xfs_btree_irec high_brec;
|
||||
struct xfs_rmap_query_range_info query;
|
||||
union xfs_btree_irec low_brec = { .r = *low_rec };
|
||||
union xfs_btree_irec high_brec = { .r = *high_rec };
|
||||
struct xfs_rmap_query_range_info query = { .priv = priv, .fn = fn };
|
||||
|
||||
low_brec.r = *low_rec;
|
||||
high_brec.r = *high_rec;
|
||||
query.priv = priv;
|
||||
query.fn = fn;
|
||||
return xfs_btree_query_range(cur, &low_brec, &high_brec,
|
||||
xfs_rmap_query_range_helper, &query);
|
||||
}
|
||||
|
@ -412,7 +412,6 @@ xfs_validate_sb_common(
|
||||
sbp->sb_inodelog < XFS_DINODE_MIN_LOG ||
|
||||
sbp->sb_inodelog > XFS_DINODE_MAX_LOG ||
|
||||
sbp->sb_inodesize != (1 << sbp->sb_inodelog) ||
|
||||
sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE ||
|
||||
sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
|
||||
XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES ||
|
||||
XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES ||
|
||||
@ -430,6 +429,61 @@ xfs_validate_sb_common(
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Logs that are too large are not supported at all. Reject them
|
||||
* outright. Logs that are too small are tolerated on v4 filesystems,
|
||||
* but we can only check that when mounting the log. Hence we skip
|
||||
* those checks here.
|
||||
*/
|
||||
if (sbp->sb_logblocks > XFS_MAX_LOG_BLOCKS) {
|
||||
xfs_notice(mp,
|
||||
"Log size 0x%x blocks too large, maximum size is 0x%llx blocks",
|
||||
sbp->sb_logblocks, XFS_MAX_LOG_BLOCKS);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
if (XFS_FSB_TO_B(mp, sbp->sb_logblocks) > XFS_MAX_LOG_BYTES) {
|
||||
xfs_warn(mp,
|
||||
"log size 0x%llx bytes too large, maximum size is 0x%llx bytes",
|
||||
XFS_FSB_TO_B(mp, sbp->sb_logblocks),
|
||||
XFS_MAX_LOG_BYTES);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do not allow filesystems with corrupted log sector or stripe units to
|
||||
* be mounted. We cannot safely size the iclogs or write to the log if
|
||||
* the log stripe unit is not valid.
|
||||
*/
|
||||
if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) {
|
||||
if (sbp->sb_logsectsize != (1U << sbp->sb_logsectlog)) {
|
||||
xfs_notice(mp,
|
||||
"log sector size in bytes/log2 (0x%x/0x%x) must match",
|
||||
sbp->sb_logsectsize, 1U << sbp->sb_logsectlog);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
} else if (sbp->sb_logsectsize || sbp->sb_logsectlog) {
|
||||
xfs_notice(mp,
|
||||
"log sector size in bytes/log2 (0x%x/0x%x) are not zero",
|
||||
sbp->sb_logsectsize, sbp->sb_logsectlog);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
if (sbp->sb_logsunit > 1) {
|
||||
if (sbp->sb_logsunit % sbp->sb_blocksize) {
|
||||
xfs_notice(mp,
|
||||
"log stripe unit 0x%x bytes must be a multiple of block size",
|
||||
sbp->sb_logsunit);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
if (sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE) {
|
||||
xfs_notice(mp,
|
||||
"log stripe unit 0x%x bytes over maximum size (0x%x bytes)",
|
||||
sbp->sb_logsunit, XLOG_MAX_RECORD_BSIZE);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate the realtime geometry; stolen from xfs_repair */
|
||||
if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE ||
|
||||
sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) {
|
||||
|
@ -566,20 +566,45 @@ xfs_extent_busy_clear(
|
||||
|
||||
/*
|
||||
* Flush out all busy extents for this AG.
|
||||
*
|
||||
* If the current transaction is holding busy extents, the caller may not want
|
||||
* to wait for committed busy extents to resolve. If we are being told just to
|
||||
* try a flush or progress has been made since we last skipped a busy extent,
|
||||
* return immediately to allow the caller to try again.
|
||||
*
|
||||
* If we are freeing extents, we might actually be holding the only free extents
|
||||
* in the transaction busy list and the log force won't resolve that situation.
|
||||
* In this case, we must return -EAGAIN to avoid a deadlock by informing the
|
||||
* caller it needs to commit the busy extents it holds before retrying the
|
||||
* extent free operation.
|
||||
*/
|
||||
void
|
||||
int
|
||||
xfs_extent_busy_flush(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_perag *pag,
|
||||
unsigned busy_gen)
|
||||
unsigned busy_gen,
|
||||
uint32_t alloc_flags)
|
||||
{
|
||||
DEFINE_WAIT (wait);
|
||||
int error;
|
||||
|
||||
error = xfs_log_force(mp, XFS_LOG_SYNC);
|
||||
error = xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
|
||||
if (error)
|
||||
return;
|
||||
return error;
|
||||
|
||||
/* Avoid deadlocks on uncommitted busy extents. */
|
||||
if (!list_empty(&tp->t_busy)) {
|
||||
if (alloc_flags & XFS_ALLOC_FLAG_TRYFLUSH)
|
||||
return 0;
|
||||
|
||||
if (busy_gen != READ_ONCE(pag->pagb_gen))
|
||||
return 0;
|
||||
|
||||
if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
/* Wait for committed busy extents to resolve. */
|
||||
do {
|
||||
prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);
|
||||
if (busy_gen != READ_ONCE(pag->pagb_gen))
|
||||
@ -588,6 +613,7 @@ xfs_extent_busy_flush(
|
||||
} while (1);
|
||||
|
||||
finish_wait(&pag->pagb_wait, &wait);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -51,9 +51,9 @@ bool
|
||||
xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno,
|
||||
xfs_extlen_t *len, unsigned *busy_gen);
|
||||
|
||||
void
|
||||
xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag,
|
||||
unsigned busy_gen);
|
||||
int
|
||||
xfs_extent_busy_flush(struct xfs_trans *tp, struct xfs_perag *pag,
|
||||
unsigned busy_gen, uint32_t alloc_flags);
|
||||
|
||||
void
|
||||
xfs_extent_busy_wait_all(struct xfs_mount *mp);
|
||||
|
@ -336,6 +336,34 @@ xfs_trans_get_efd(
|
||||
return efdp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill the EFD with all extents from the EFI when we need to roll the
|
||||
* transaction and continue with a new EFI.
|
||||
*
|
||||
* This simply copies all the extents in the EFI to the EFD rather than make
|
||||
* assumptions about which extents in the EFI have already been processed. We
|
||||
* currently keep the xefi list in the same order as the EFI extent list, but
|
||||
* that may not always be the case. Copying everything avoids leaving a landmine
|
||||
* were we fail to cancel all the extents in an EFI if the xefi list is
|
||||
* processed in a different order to the extents in the EFI.
|
||||
*/
|
||||
static void
|
||||
xfs_efd_from_efi(
|
||||
struct xfs_efd_log_item *efdp)
|
||||
{
|
||||
struct xfs_efi_log_item *efip = efdp->efd_efip;
|
||||
uint i;
|
||||
|
||||
ASSERT(efip->efi_format.efi_nextents > 0);
|
||||
ASSERT(efdp->efd_next_extent < efip->efi_format.efi_nextents);
|
||||
|
||||
for (i = 0; i < efip->efi_format.efi_nextents; i++) {
|
||||
efdp->efd_format.efd_extents[i] =
|
||||
efip->efi_format.efi_extents[i];
|
||||
}
|
||||
efdp->efd_next_extent = efip->efi_format.efi_nextents;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free an extent and log it to the EFD. Note that the transaction is marked
|
||||
* dirty regardless of whether the extent free succeeds or fails to support the
|
||||
@ -365,7 +393,7 @@ xfs_trans_free_extent(
|
||||
agbno, xefi->xefi_blockcount);
|
||||
|
||||
error = __xfs_free_extent(tp, xefi->xefi_pag, agbno,
|
||||
xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE,
|
||||
xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv,
|
||||
xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
|
||||
|
||||
/*
|
||||
@ -378,6 +406,17 @@ xfs_trans_free_extent(
|
||||
tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE;
|
||||
set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
|
||||
|
||||
/*
|
||||
* If we need a new transaction to make progress, the caller will log a
|
||||
* new EFI with the current contents. It will also log an EFD to cancel
|
||||
* the existing EFI, and so we need to copy all the unprocessed extents
|
||||
* in this EFI to the EFD so this works correctly.
|
||||
*/
|
||||
if (error == -EAGAIN) {
|
||||
xfs_efd_from_efi(efdp);
|
||||
return error;
|
||||
}
|
||||
|
||||
next_extent = efdp->efd_next_extent;
|
||||
ASSERT(next_extent < efdp->efd_format.efd_nextents);
|
||||
extp = &(efdp->efd_format.efd_extents[next_extent]);
|
||||
@ -495,6 +534,13 @@ xfs_extent_free_finish_item(
|
||||
|
||||
error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi);
|
||||
|
||||
/*
|
||||
* Don't free the XEFI if we need a new transaction to complete
|
||||
* processing of it.
|
||||
*/
|
||||
if (error == -EAGAIN)
|
||||
return error;
|
||||
|
||||
xfs_extent_free_put_group(xefi);
|
||||
kmem_cache_free(xfs_extfree_item_cache, xefi);
|
||||
return error;
|
||||
@ -620,6 +666,7 @@ xfs_efi_item_recover(
|
||||
struct xfs_trans *tp;
|
||||
int i;
|
||||
int error = 0;
|
||||
bool requeue_only = false;
|
||||
|
||||
/*
|
||||
* First check the validity of the extents described by the
|
||||
@ -644,6 +691,7 @@ xfs_efi_item_recover(
|
||||
for (i = 0; i < efip->efi_format.efi_nextents; i++) {
|
||||
struct xfs_extent_free_item fake = {
|
||||
.xefi_owner = XFS_RMAP_OWN_UNKNOWN,
|
||||
.xefi_agresv = XFS_AG_RESV_NONE,
|
||||
};
|
||||
struct xfs_extent *extp;
|
||||
|
||||
@ -652,9 +700,28 @@ xfs_efi_item_recover(
|
||||
fake.xefi_startblock = extp->ext_start;
|
||||
fake.xefi_blockcount = extp->ext_len;
|
||||
|
||||
xfs_extent_free_get_group(mp, &fake);
|
||||
error = xfs_trans_free_extent(tp, efdp, &fake);
|
||||
xfs_extent_free_put_group(&fake);
|
||||
if (!requeue_only) {
|
||||
xfs_extent_free_get_group(mp, &fake);
|
||||
error = xfs_trans_free_extent(tp, efdp, &fake);
|
||||
xfs_extent_free_put_group(&fake);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we can't free the extent without potentially deadlocking,
|
||||
* requeue the rest of the extents to a new so that they get
|
||||
* run again later with a new transaction context.
|
||||
*/
|
||||
if (error == -EAGAIN || requeue_only) {
|
||||
error = xfs_free_extent_later(tp, fake.xefi_startblock,
|
||||
fake.xefi_blockcount,
|
||||
&XFS_RMAP_OINFO_ANY_OWNER,
|
||||
fake.xefi_agresv);
|
||||
if (!error) {
|
||||
requeue_only = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (error == -EFSCORRUPTED)
|
||||
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
|
||||
extp, sizeof(*extp));
|
||||
|
@ -160,9 +160,18 @@ struct xfs_getfsmap_info {
|
||||
struct xfs_buf *agf_bp; /* AGF, for refcount queries */
|
||||
struct xfs_perag *pag; /* AG info, if applicable */
|
||||
xfs_daddr_t next_daddr; /* next daddr we expect */
|
||||
/* daddr of low fsmap key when we're using the rtbitmap */
|
||||
xfs_daddr_t low_daddr;
|
||||
u64 missing_owner; /* owner of holes */
|
||||
u32 dev; /* device id */
|
||||
struct xfs_rmap_irec low; /* low rmap key */
|
||||
/*
|
||||
* Low rmap key for the query. If low.rm_blockcount is nonzero, this
|
||||
* is the second (or later) call to retrieve the recordset in pieces.
|
||||
* xfs_getfsmap_rec_before_start will compare all records retrieved
|
||||
* by the rmapbt query to filter out any records that start before
|
||||
* the last record.
|
||||
*/
|
||||
struct xfs_rmap_irec low;
|
||||
struct xfs_rmap_irec high; /* high rmap key */
|
||||
bool last; /* last extent? */
|
||||
};
|
||||
@ -237,16 +246,31 @@ xfs_getfsmap_format(
|
||||
xfs_fsmap_from_internal(rec, xfm);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
xfs_getfsmap_rec_before_start(
|
||||
struct xfs_getfsmap_info *info,
|
||||
const struct xfs_rmap_irec *rec,
|
||||
xfs_daddr_t rec_daddr)
|
||||
{
|
||||
if (info->low_daddr != -1ULL)
|
||||
return rec_daddr < info->low_daddr;
|
||||
if (info->low.rm_blockcount)
|
||||
return xfs_rmap_compare(rec, &info->low) < 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Format a reverse mapping for getfsmap, having translated rm_startblock
|
||||
* into the appropriate daddr units.
|
||||
* into the appropriate daddr units. Pass in a nonzero @len_daddr if the
|
||||
* length could be larger than rm_blockcount in struct xfs_rmap_irec.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_getfsmap_helper(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_getfsmap_info *info,
|
||||
const struct xfs_rmap_irec *rec,
|
||||
xfs_daddr_t rec_daddr)
|
||||
xfs_daddr_t rec_daddr,
|
||||
xfs_daddr_t len_daddr)
|
||||
{
|
||||
struct xfs_fsmap fmr;
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
@ -256,12 +280,15 @@ xfs_getfsmap_helper(
|
||||
if (fatal_signal_pending(current))
|
||||
return -EINTR;
|
||||
|
||||
if (len_daddr == 0)
|
||||
len_daddr = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
|
||||
|
||||
/*
|
||||
* Filter out records that start before our startpoint, if the
|
||||
* caller requested that.
|
||||
*/
|
||||
if (xfs_rmap_compare(rec, &info->low) < 0) {
|
||||
rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
|
||||
if (xfs_getfsmap_rec_before_start(info, rec, rec_daddr)) {
|
||||
rec_daddr += len_daddr;
|
||||
if (info->next_daddr < rec_daddr)
|
||||
info->next_daddr = rec_daddr;
|
||||
return 0;
|
||||
@ -280,7 +307,7 @@ xfs_getfsmap_helper(
|
||||
|
||||
info->head->fmh_entries++;
|
||||
|
||||
rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
|
||||
rec_daddr += len_daddr;
|
||||
if (info->next_daddr < rec_daddr)
|
||||
info->next_daddr = rec_daddr;
|
||||
return 0;
|
||||
@ -320,7 +347,7 @@ xfs_getfsmap_helper(
|
||||
if (error)
|
||||
return error;
|
||||
fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset);
|
||||
fmr.fmr_length = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
|
||||
fmr.fmr_length = len_daddr;
|
||||
if (rec->rm_flags & XFS_RMAP_UNWRITTEN)
|
||||
fmr.fmr_flags |= FMR_OF_PREALLOC;
|
||||
if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
|
||||
@ -337,7 +364,7 @@ xfs_getfsmap_helper(
|
||||
|
||||
xfs_getfsmap_format(mp, &fmr, info);
|
||||
out:
|
||||
rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
|
||||
rec_daddr += len_daddr;
|
||||
if (info->next_daddr < rec_daddr)
|
||||
info->next_daddr = rec_daddr;
|
||||
return 0;
|
||||
@ -358,7 +385,7 @@ xfs_getfsmap_datadev_helper(
|
||||
fsb = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, rec->rm_startblock);
|
||||
rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
|
||||
|
||||
return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
|
||||
return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr, 0);
|
||||
}
|
||||
|
||||
/* Transform a bnobt irec into a fsmap */
|
||||
@ -382,7 +409,7 @@ xfs_getfsmap_datadev_bnobt_helper(
|
||||
irec.rm_offset = 0;
|
||||
irec.rm_flags = 0;
|
||||
|
||||
return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr);
|
||||
return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr, 0);
|
||||
}
|
||||
|
||||
/* Set rmap flags based on the getfsmap flags */
|
||||
@ -409,31 +436,25 @@ xfs_getfsmap_logdev(
|
||||
{
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
struct xfs_rmap_irec rmap;
|
||||
int error;
|
||||
xfs_daddr_t rec_daddr, len_daddr;
|
||||
xfs_fsblock_t start_fsb, end_fsb;
|
||||
uint64_t eofs;
|
||||
|
||||
/* Set up search keys */
|
||||
info->low.rm_startblock = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
|
||||
info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
|
||||
error = xfs_fsmap_owner_to_rmap(&info->low, keys);
|
||||
if (error)
|
||||
return error;
|
||||
info->low.rm_blockcount = 0;
|
||||
xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
|
||||
eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
|
||||
if (keys[0].fmr_physical >= eofs)
|
||||
return 0;
|
||||
start_fsb = XFS_BB_TO_FSBT(mp,
|
||||
keys[0].fmr_physical + keys[0].fmr_length);
|
||||
end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
|
||||
|
||||
error = xfs_fsmap_owner_to_rmap(&info->high, keys + 1);
|
||||
if (error)
|
||||
return error;
|
||||
info->high.rm_startblock = -1U;
|
||||
info->high.rm_owner = ULLONG_MAX;
|
||||
info->high.rm_offset = ULLONG_MAX;
|
||||
info->high.rm_blockcount = 0;
|
||||
info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
|
||||
info->missing_owner = XFS_FMR_OWN_FREE;
|
||||
/* Adjust the low key if we are continuing from where we left off. */
|
||||
if (keys[0].fmr_length > 0)
|
||||
info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb);
|
||||
|
||||
trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low);
|
||||
trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high);
|
||||
trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb);
|
||||
trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb);
|
||||
|
||||
if (keys[0].fmr_physical > 0)
|
||||
if (start_fsb > 0)
|
||||
return 0;
|
||||
|
||||
/* Fabricate an rmap entry for the external log device. */
|
||||
@ -443,7 +464,9 @@ xfs_getfsmap_logdev(
|
||||
rmap.rm_offset = 0;
|
||||
rmap.rm_flags = 0;
|
||||
|
||||
return xfs_getfsmap_helper(tp, info, &rmap, 0);
|
||||
rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock);
|
||||
len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount);
|
||||
return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_XFS_RT
|
||||
@ -457,81 +480,67 @@ xfs_getfsmap_rtdev_rtbitmap_helper(
|
||||
{
|
||||
struct xfs_getfsmap_info *info = priv;
|
||||
struct xfs_rmap_irec irec;
|
||||
xfs_daddr_t rec_daddr;
|
||||
xfs_rtblock_t rtbno;
|
||||
xfs_daddr_t rec_daddr, len_daddr;
|
||||
|
||||
rtbno = rec->ar_startext * mp->m_sb.sb_rextsize;
|
||||
rec_daddr = XFS_FSB_TO_BB(mp, rtbno);
|
||||
irec.rm_startblock = rtbno;
|
||||
|
||||
rtbno = rec->ar_extcount * mp->m_sb.sb_rextsize;
|
||||
len_daddr = XFS_FSB_TO_BB(mp, rtbno);
|
||||
irec.rm_blockcount = rtbno;
|
||||
|
||||
irec.rm_startblock = rec->ar_startext * mp->m_sb.sb_rextsize;
|
||||
rec_daddr = XFS_FSB_TO_BB(mp, irec.rm_startblock);
|
||||
irec.rm_blockcount = rec->ar_extcount * mp->m_sb.sb_rextsize;
|
||||
irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
|
||||
irec.rm_offset = 0;
|
||||
irec.rm_flags = 0;
|
||||
|
||||
return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
|
||||
return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr);
|
||||
}
|
||||
|
||||
/* Execute a getfsmap query against the realtime device. */
|
||||
/* Execute a getfsmap query against the realtime device rtbitmap. */
|
||||
STATIC int
|
||||
__xfs_getfsmap_rtdev(
|
||||
xfs_getfsmap_rtdev_rtbitmap(
|
||||
struct xfs_trans *tp,
|
||||
const struct xfs_fsmap *keys,
|
||||
int (*query_fn)(struct xfs_trans *,
|
||||
struct xfs_getfsmap_info *),
|
||||
struct xfs_getfsmap_info *info)
|
||||
{
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
xfs_fsblock_t start_fsb;
|
||||
xfs_fsblock_t end_fsb;
|
||||
uint64_t eofs;
|
||||
int error = 0;
|
||||
|
||||
eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
|
||||
if (keys[0].fmr_physical >= eofs)
|
||||
return 0;
|
||||
start_fsb = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
|
||||
end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
|
||||
|
||||
/* Set up search keys */
|
||||
info->low.rm_startblock = start_fsb;
|
||||
error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
|
||||
if (error)
|
||||
return error;
|
||||
info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
|
||||
info->low.rm_blockcount = 0;
|
||||
xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
|
||||
|
||||
info->high.rm_startblock = end_fsb;
|
||||
error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
|
||||
if (error)
|
||||
return error;
|
||||
info->high.rm_offset = XFS_BB_TO_FSBT(mp, keys[1].fmr_offset);
|
||||
info->high.rm_blockcount = 0;
|
||||
xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
|
||||
|
||||
trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low);
|
||||
trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high);
|
||||
|
||||
return query_fn(tp, info);
|
||||
}
|
||||
|
||||
/* Actually query the realtime bitmap. */
|
||||
STATIC int
|
||||
xfs_getfsmap_rtdev_rtbitmap_query(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_getfsmap_info *info)
|
||||
{
|
||||
struct xfs_rtalloc_rec alow = { 0 };
|
||||
struct xfs_rtalloc_rec ahigh = { 0 };
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
xfs_rtblock_t start_rtb;
|
||||
xfs_rtblock_t end_rtb;
|
||||
uint64_t eofs;
|
||||
int error;
|
||||
|
||||
eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rextents * mp->m_sb.sb_rextsize);
|
||||
if (keys[0].fmr_physical >= eofs)
|
||||
return 0;
|
||||
start_rtb = XFS_BB_TO_FSBT(mp,
|
||||
keys[0].fmr_physical + keys[0].fmr_length);
|
||||
end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
|
||||
|
||||
info->missing_owner = XFS_FMR_OWN_UNKNOWN;
|
||||
|
||||
/* Adjust the low key if we are continuing from where we left off. */
|
||||
if (keys[0].fmr_length > 0) {
|
||||
info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb);
|
||||
if (info->low_daddr >= eofs)
|
||||
return 0;
|
||||
}
|
||||
|
||||
trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb);
|
||||
trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb);
|
||||
|
||||
xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
|
||||
|
||||
/*
|
||||
* Set up query parameters to return free rtextents covering the range
|
||||
* we want.
|
||||
*/
|
||||
alow.ar_startext = info->low.rm_startblock;
|
||||
ahigh.ar_startext = info->high.rm_startblock;
|
||||
alow.ar_startext = start_rtb;
|
||||
ahigh.ar_startext = end_rtb;
|
||||
do_div(alow.ar_startext, mp->m_sb.sb_rextsize);
|
||||
if (do_div(ahigh.ar_startext, mp->m_sb.sb_rextsize))
|
||||
ahigh.ar_startext++;
|
||||
@ -554,18 +563,6 @@ xfs_getfsmap_rtdev_rtbitmap_query(
|
||||
xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Execute a getfsmap query against the realtime device rtbitmap. */
|
||||
STATIC int
|
||||
xfs_getfsmap_rtdev_rtbitmap(
|
||||
struct xfs_trans *tp,
|
||||
const struct xfs_fsmap *keys,
|
||||
struct xfs_getfsmap_info *info)
|
||||
{
|
||||
info->missing_owner = XFS_FMR_OWN_UNKNOWN;
|
||||
return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query,
|
||||
info);
|
||||
}
|
||||
#endif /* CONFIG_XFS_RT */
|
||||
|
||||
/* Execute a getfsmap query against the regular data device. */
|
||||
@ -606,9 +603,27 @@ __xfs_getfsmap_datadev(
|
||||
error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
|
||||
if (error)
|
||||
return error;
|
||||
info->low.rm_blockcount = 0;
|
||||
info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);
|
||||
xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
|
||||
|
||||
/* Adjust the low key if we are continuing from where we left off. */
|
||||
if (info->low.rm_blockcount == 0) {
|
||||
/* empty */
|
||||
} else if (XFS_RMAP_NON_INODE_OWNER(info->low.rm_owner) ||
|
||||
(info->low.rm_flags & (XFS_RMAP_ATTR_FORK |
|
||||
XFS_RMAP_BMBT_BLOCK |
|
||||
XFS_RMAP_UNWRITTEN))) {
|
||||
info->low.rm_startblock += info->low.rm_blockcount;
|
||||
info->low.rm_owner = 0;
|
||||
info->low.rm_offset = 0;
|
||||
|
||||
start_fsb += info->low.rm_blockcount;
|
||||
if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs)
|
||||
return 0;
|
||||
} else {
|
||||
info->low.rm_offset += info->low.rm_blockcount;
|
||||
}
|
||||
|
||||
info->high.rm_startblock = -1U;
|
||||
info->high.rm_owner = ULLONG_MAX;
|
||||
info->high.rm_offset = ULLONG_MAX;
|
||||
@ -659,12 +674,8 @@ __xfs_getfsmap_datadev(
|
||||
* Set the AG low key to the start of the AG prior to
|
||||
* moving on to the next AG.
|
||||
*/
|
||||
if (pag->pag_agno == start_ag) {
|
||||
info->low.rm_startblock = 0;
|
||||
info->low.rm_owner = 0;
|
||||
info->low.rm_offset = 0;
|
||||
info->low.rm_flags = 0;
|
||||
}
|
||||
if (pag->pag_agno == start_ag)
|
||||
memset(&info->low, 0, sizeof(info->low));
|
||||
|
||||
/*
|
||||
* If this is the last AG, report any gap at the end of it
|
||||
@ -791,6 +802,19 @@ xfs_getfsmap_check_keys(
|
||||
struct xfs_fsmap *low_key,
|
||||
struct xfs_fsmap *high_key)
|
||||
{
|
||||
if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
|
||||
if (low_key->fmr_offset)
|
||||
return false;
|
||||
}
|
||||
if (high_key->fmr_flags != -1U &&
|
||||
(high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER |
|
||||
FMR_OF_EXTENT_MAP))) {
|
||||
if (high_key->fmr_offset && high_key->fmr_offset != -1ULL)
|
||||
return false;
|
||||
}
|
||||
if (high_key->fmr_length && high_key->fmr_length != -1ULL)
|
||||
return false;
|
||||
|
||||
if (low_key->fmr_device > high_key->fmr_device)
|
||||
return false;
|
||||
if (low_key->fmr_device < high_key->fmr_device)
|
||||
@ -834,15 +858,15 @@ xfs_getfsmap_check_keys(
|
||||
* ----------------
|
||||
* There are multiple levels of keys and counters at work here:
|
||||
* xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in;
|
||||
* these reflect fs-wide sector addrs.
|
||||
* these reflect fs-wide sector addrs.
|
||||
* dkeys -- fmh_keys used to query each device;
|
||||
* these are fmh_keys but w/ the low key
|
||||
* bumped up by fmr_length.
|
||||
* these are fmh_keys but w/ the low key
|
||||
* bumped up by fmr_length.
|
||||
* xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this
|
||||
* is how we detect gaps in the fsmap
|
||||
records and report them.
|
||||
* xfs_getfsmap_info.low/high -- per-AG low/high keys computed from
|
||||
* dkeys; used to query the metadata.
|
||||
* dkeys; used to query the metadata.
|
||||
*/
|
||||
int
|
||||
xfs_getfsmap(
|
||||
@ -863,6 +887,8 @@ xfs_getfsmap(
|
||||
if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) ||
|
||||
!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
|
||||
return -EINVAL;
|
||||
if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1]))
|
||||
return -EINVAL;
|
||||
|
||||
use_rmap = xfs_has_rmapbt(mp) &&
|
||||
has_capability_noaudit(current, CAP_SYS_ADMIN);
|
||||
@ -901,26 +927,15 @@ xfs_getfsmap(
|
||||
* blocks could be mapped to several other files/offsets.
|
||||
* According to rmapbt record ordering, the minimal next
|
||||
* possible record for the block range is the next starting
|
||||
* offset in the same inode. Therefore, bump the file offset to
|
||||
* continue the search appropriately. For all other low key
|
||||
* mapping types (attr blocks, metadata), bump the physical
|
||||
* offset as there can be no other mapping for the same physical
|
||||
* block range.
|
||||
* offset in the same inode. Therefore, each fsmap backend bumps
|
||||
* the file offset to continue the search appropriately. For
|
||||
* all other low key mapping types (attr blocks, metadata), each
|
||||
* fsmap backend bumps the physical offset as there can be no
|
||||
* other mapping for the same physical block range.
|
||||
*/
|
||||
dkeys[0] = head->fmh_keys[0];
|
||||
if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
|
||||
dkeys[0].fmr_physical += dkeys[0].fmr_length;
|
||||
dkeys[0].fmr_owner = 0;
|
||||
if (dkeys[0].fmr_offset)
|
||||
return -EINVAL;
|
||||
} else
|
||||
dkeys[0].fmr_offset += dkeys[0].fmr_length;
|
||||
dkeys[0].fmr_length = 0;
|
||||
memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
|
||||
|
||||
if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1]))
|
||||
return -EINVAL;
|
||||
|
||||
info.next_daddr = head->fmh_keys[0].fmr_physical +
|
||||
head->fmh_keys[0].fmr_length;
|
||||
info.fsmap_recs = fsmap_recs;
|
||||
@ -960,6 +975,8 @@ xfs_getfsmap(
|
||||
info.dev = handlers[i].dev;
|
||||
info.last = false;
|
||||
info.pag = NULL;
|
||||
info.low_daddr = -1ULL;
|
||||
info.low.rm_blockcount = 0;
|
||||
error = handlers[i].fn(tp, dkeys, &info);
|
||||
if (error)
|
||||
break;
|
||||
|
@ -639,7 +639,6 @@ xfs_log_mount(
|
||||
int num_bblks)
|
||||
{
|
||||
struct xlog *log;
|
||||
bool fatal = xfs_has_crc(mp);
|
||||
int error = 0;
|
||||
int min_logfsbs;
|
||||
|
||||
@ -663,53 +662,37 @@ xfs_log_mount(
|
||||
mp->m_log = log;
|
||||
|
||||
/*
|
||||
* Validate the given log space and drop a critical message via syslog
|
||||
* if the log size is too small that would lead to some unexpected
|
||||
* situations in transaction log space reservation stage.
|
||||
* Now that we have set up the log and it's internal geometry
|
||||
* parameters, we can validate the given log space and drop a critical
|
||||
* message via syslog if the log size is too small. A log that is too
|
||||
* small can lead to unexpected situations in transaction log space
|
||||
* reservation stage. The superblock verifier has already validated all
|
||||
* the other log geometry constraints, so we don't have to check those
|
||||
* here.
|
||||
*
|
||||
* Note: we can't just reject the mount if the validation fails. This
|
||||
* would mean that people would have to downgrade their kernel just to
|
||||
* remedy the situation as there is no way to grow the log (short of
|
||||
* black magic surgery with xfs_db).
|
||||
* Note: For v4 filesystems, we can't just reject the mount if the
|
||||
* validation fails. This would mean that people would have to
|
||||
* downgrade their kernel just to remedy the situation as there is no
|
||||
* way to grow the log (short of black magic surgery with xfs_db).
|
||||
*
|
||||
* We can, however, reject mounts for CRC format filesystems, as the
|
||||
* We can, however, reject mounts for V5 format filesystems, as the
|
||||
* mkfs binary being used to make the filesystem should never create a
|
||||
* filesystem with a log that is too small.
|
||||
*/
|
||||
min_logfsbs = xfs_log_calc_minimum_size(mp);
|
||||
|
||||
if (mp->m_sb.sb_logblocks < min_logfsbs) {
|
||||
xfs_warn(mp,
|
||||
"Log size %d blocks too small, minimum size is %d blocks",
|
||||
mp->m_sb.sb_logblocks, min_logfsbs);
|
||||
error = -EINVAL;
|
||||
} else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) {
|
||||
xfs_warn(mp,
|
||||
"Log size %d blocks too large, maximum size is %lld blocks",
|
||||
mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS);
|
||||
error = -EINVAL;
|
||||
} else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) {
|
||||
xfs_warn(mp,
|
||||
"log size %lld bytes too large, maximum size is %lld bytes",
|
||||
XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
|
||||
XFS_MAX_LOG_BYTES);
|
||||
error = -EINVAL;
|
||||
} else if (mp->m_sb.sb_logsunit > 1 &&
|
||||
mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) {
|
||||
xfs_warn(mp,
|
||||
"log stripe unit %u bytes must be a multiple of block size",
|
||||
mp->m_sb.sb_logsunit);
|
||||
error = -EINVAL;
|
||||
fatal = true;
|
||||
}
|
||||
if (error) {
|
||||
|
||||
/*
|
||||
* Log check errors are always fatal on v5; or whenever bad
|
||||
* metadata leads to a crash.
|
||||
*/
|
||||
if (fatal) {
|
||||
if (xfs_has_crc(mp)) {
|
||||
xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");
|
||||
ASSERT(0);
|
||||
error = -EINVAL;
|
||||
goto out_free_log;
|
||||
}
|
||||
xfs_crit(mp, "Log size out of supported range.");
|
||||
|
@ -114,7 +114,8 @@ xfs_dax_notify_ddev_failure(
|
||||
int error = 0;
|
||||
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, daddr);
|
||||
xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno);
|
||||
xfs_fsblock_t end_fsbno = XFS_DADDR_TO_FSB(mp, daddr + bblen);
|
||||
xfs_fsblock_t end_fsbno = XFS_DADDR_TO_FSB(mp,
|
||||
daddr + bblen - 1);
|
||||
xfs_agnumber_t end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno);
|
||||
|
||||
error = xfs_trans_alloc_empty(mp, &tp);
|
||||
@ -210,7 +211,7 @@ xfs_dax_notify_failure(
|
||||
ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1;
|
||||
|
||||
/* Ignore the range out of filesystem area */
|
||||
if (offset + len < ddev_start)
|
||||
if (offset + len - 1 < ddev_start)
|
||||
return -ENXIO;
|
||||
if (offset > ddev_end)
|
||||
return -ENXIO;
|
||||
@ -222,8 +223,8 @@ xfs_dax_notify_failure(
|
||||
len -= ddev_start - offset;
|
||||
offset = 0;
|
||||
}
|
||||
if (offset + len > ddev_end)
|
||||
len -= ddev_end - offset;
|
||||
if (offset + len - 1 > ddev_end)
|
||||
len = ddev_end - offset + 1;
|
||||
|
||||
return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len),
|
||||
mf_flags);
|
||||
|
@ -617,7 +617,8 @@ xfs_reflink_cancel_cow_blocks(
|
||||
del.br_blockcount);
|
||||
|
||||
error = xfs_free_extent_later(*tpp, del.br_startblock,
|
||||
del.br_blockcount, NULL);
|
||||
del.br_blockcount, NULL,
|
||||
XFS_AG_RESV_NONE);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
|
@ -3623,6 +3623,31 @@ DEFINE_FSMAP_EVENT(xfs_fsmap_low_key);
|
||||
DEFINE_FSMAP_EVENT(xfs_fsmap_high_key);
|
||||
DEFINE_FSMAP_EVENT(xfs_fsmap_mapping);
|
||||
|
||||
DECLARE_EVENT_CLASS(xfs_fsmap_linear_class,
|
||||
TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno),
|
||||
TP_ARGS(mp, keydev, bno),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(dev_t, keydev)
|
||||
__field(xfs_fsblock_t, bno)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->dev = mp->m_super->s_dev;
|
||||
__entry->keydev = new_decode_dev(keydev);
|
||||
__entry->bno = bno;
|
||||
),
|
||||
TP_printk("dev %d:%d keydev %d:%d bno 0x%llx",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
MAJOR(__entry->keydev), MINOR(__entry->keydev),
|
||||
__entry->bno)
|
||||
)
|
||||
#define DEFINE_FSMAP_LINEAR_EVENT(name) \
|
||||
DEFINE_EVENT(xfs_fsmap_linear_class, name, \
|
||||
TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno), \
|
||||
TP_ARGS(mp, keydev, bno))
|
||||
DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_low_key_linear);
|
||||
DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_high_key_linear);
|
||||
|
||||
DECLARE_EVENT_CLASS(xfs_getfsmap_class,
|
||||
TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap),
|
||||
TP_ARGS(mp, fsmap),
|
||||
|
@ -823,7 +823,7 @@ xfs_trans_ail_update_bulk(
|
||||
trace_xfs_ail_insert(lip, 0, lsn);
|
||||
}
|
||||
lip->li_lsn = lsn;
|
||||
list_add(&lip->li_ail, &tmp);
|
||||
list_add_tail(&lip->li_ail, &tmp);
|
||||
}
|
||||
|
||||
if (!list_empty(&tmp))
|
||||
|
Loading…
Reference in New Issue
Block a user