mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-07 13:43:51 +00:00
94a0333b92
A concurrent file creation and little writing could unexpectedly return -ENOSPC error since there is a race window that the allocator could get the wrong agf->agf_longest. Write file process steps: 1) Find the entry that best meets the conditions, then calculate the start address and length of the remaining part of the entry after allocation. 2) Delete this entry and update the -current- agf->agf_longest. 3) Insert the remaining unused parts of this entry based on the calculations in 1), and update the agf->agf_longest again if necessary. Create file process steps: 1) Check whether there are free inodes in the inode chunk. 2) If there is no free inode, check whether there has space for creating inode chunks, perform the no-lock judgment first. 3) If the judgment succeeds, the judgment is performed again with agf lock held. Otherwire, an error is returned directly. If the write process is in step 2) but not go to 3) yet, the create file process goes to 2) at this time, it may be mistaken for no space, resulting in the file system still has space but the file creation fails. We have sent two different commits to the community in order to fix this problem[1][2]. Unfortunately, both solutions have flaws. In [2], I discussed with Dave and Darrick, realized that a better solution to this problem requires the "last cnt record tracking" to be ripped out of the generic btree code. And surprisingly, Dave directly provided his fix code. This patch includes appropriate modifications based on his tmp-code to address this issue. The entire fix can be roughly divided into two parts: 1) Delete the code related to lastrec-update in the generic btree code. 2) Place the process of updating longest freespace with cntbt separately to the end of the cntbt modifications. Move the cursor to the rightmost firstly, and update the longest free extent based on the record. Note that we can not update the longest with xfs_alloc_get_rec() after find the longest record, as xfs_verify_agbno() may not pass because pag->block_count is updated on the outside. Therefore, use xfs_btree_get_rec() as a replacement. [1] https://lore.kernel.org/all/20240419061848.1032366-2-yebin10@huawei.com [2] https://lore.kernel.org/all/20240604071121.3981686-1-wozizhi@huawei.com Reported by: Ye Bin <yebin10@huawei.com> Signed-off-by: Zizhi Wo <wozizhi@huawei.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
628 lines
15 KiB
C
628 lines
15 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
|
|
* All Rights Reserved.
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_btree.h"
|
|
#include "xfs_btree_staging.h"
|
|
#include "xfs_alloc_btree.h"
|
|
#include "xfs_alloc.h"
|
|
#include "xfs_extent_busy.h"
|
|
#include "xfs_error.h"
|
|
#include "xfs_health.h"
|
|
#include "xfs_trace.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_ag.h"
|
|
|
|
static struct kmem_cache *xfs_allocbt_cur_cache;
|
|
|
|
STATIC struct xfs_btree_cur *
|
|
xfs_bnobt_dup_cursor(
|
|
struct xfs_btree_cur *cur)
|
|
{
|
|
return xfs_bnobt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp,
|
|
cur->bc_ag.pag);
|
|
}
|
|
|
|
STATIC struct xfs_btree_cur *
|
|
xfs_cntbt_dup_cursor(
|
|
struct xfs_btree_cur *cur)
|
|
{
|
|
return xfs_cntbt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp,
|
|
cur->bc_ag.pag);
|
|
}
|
|
|
|
|
|
STATIC void
|
|
xfs_allocbt_set_root(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_ptr *ptr,
|
|
int inc)
|
|
{
|
|
struct xfs_buf *agbp = cur->bc_ag.agbp;
|
|
struct xfs_agf *agf = agbp->b_addr;
|
|
|
|
ASSERT(ptr->s != 0);
|
|
|
|
if (xfs_btree_is_bno(cur->bc_ops)) {
|
|
agf->agf_bno_root = ptr->s;
|
|
be32_add_cpu(&agf->agf_bno_level, inc);
|
|
cur->bc_ag.pag->pagf_bno_level += inc;
|
|
} else {
|
|
agf->agf_cnt_root = ptr->s;
|
|
be32_add_cpu(&agf->agf_cnt_level, inc);
|
|
cur->bc_ag.pag->pagf_cnt_level += inc;
|
|
}
|
|
|
|
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
|
|
}
|
|
|
|
STATIC int
|
|
xfs_allocbt_alloc_block(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_ptr *start,
|
|
union xfs_btree_ptr *new,
|
|
int *stat)
|
|
{
|
|
int error;
|
|
xfs_agblock_t bno;
|
|
|
|
/* Allocate the new block from the freelist. If we can't, give up. */
|
|
error = xfs_alloc_get_freelist(cur->bc_ag.pag, cur->bc_tp,
|
|
cur->bc_ag.agbp, &bno, 1);
|
|
if (error)
|
|
return error;
|
|
|
|
if (bno == NULLAGBLOCK) {
|
|
*stat = 0;
|
|
return 0;
|
|
}
|
|
|
|
atomic64_inc(&cur->bc_mp->m_allocbt_blks);
|
|
xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.pag, bno, 1, false);
|
|
|
|
new->s = cpu_to_be32(bno);
|
|
|
|
*stat = 1;
|
|
return 0;
|
|
}
|
|
|
|
STATIC int
|
|
xfs_allocbt_free_block(
|
|
struct xfs_btree_cur *cur,
|
|
struct xfs_buf *bp)
|
|
{
|
|
struct xfs_buf *agbp = cur->bc_ag.agbp;
|
|
xfs_agblock_t bno;
|
|
int error;
|
|
|
|
bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
|
|
error = xfs_alloc_put_freelist(cur->bc_ag.pag, cur->bc_tp, agbp, NULL,
|
|
bno, 1);
|
|
if (error)
|
|
return error;
|
|
|
|
atomic64_dec(&cur->bc_mp->m_allocbt_blks);
|
|
xfs_extent_busy_insert(cur->bc_tp, agbp->b_pag, bno, 1,
|
|
XFS_EXTENT_BUSY_SKIP_DISCARD);
|
|
return 0;
|
|
}
|
|
|
|
STATIC int
|
|
xfs_allocbt_get_minrecs(
|
|
struct xfs_btree_cur *cur,
|
|
int level)
|
|
{
|
|
return cur->bc_mp->m_alloc_mnr[level != 0];
|
|
}
|
|
|
|
STATIC int
|
|
xfs_allocbt_get_maxrecs(
|
|
struct xfs_btree_cur *cur,
|
|
int level)
|
|
{
|
|
return cur->bc_mp->m_alloc_mxr[level != 0];
|
|
}
|
|
|
|
STATIC void
|
|
xfs_allocbt_init_key_from_rec(
|
|
union xfs_btree_key *key,
|
|
const union xfs_btree_rec *rec)
|
|
{
|
|
key->alloc.ar_startblock = rec->alloc.ar_startblock;
|
|
key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
|
|
}
|
|
|
|
STATIC void
|
|
xfs_bnobt_init_high_key_from_rec(
|
|
union xfs_btree_key *key,
|
|
const union xfs_btree_rec *rec)
|
|
{
|
|
__u32 x;
|
|
|
|
x = be32_to_cpu(rec->alloc.ar_startblock);
|
|
x += be32_to_cpu(rec->alloc.ar_blockcount) - 1;
|
|
key->alloc.ar_startblock = cpu_to_be32(x);
|
|
key->alloc.ar_blockcount = 0;
|
|
}
|
|
|
|
STATIC void
|
|
xfs_cntbt_init_high_key_from_rec(
|
|
union xfs_btree_key *key,
|
|
const union xfs_btree_rec *rec)
|
|
{
|
|
key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
|
|
key->alloc.ar_startblock = 0;
|
|
}
|
|
|
|
STATIC void
|
|
xfs_allocbt_init_rec_from_cur(
|
|
struct xfs_btree_cur *cur,
|
|
union xfs_btree_rec *rec)
|
|
{
|
|
rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
|
|
rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
|
|
}
|
|
|
|
STATIC void
|
|
xfs_allocbt_init_ptr_from_cur(
|
|
struct xfs_btree_cur *cur,
|
|
union xfs_btree_ptr *ptr)
|
|
{
|
|
struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
|
|
|
|
ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
|
|
|
|
if (xfs_btree_is_bno(cur->bc_ops))
|
|
ptr->s = agf->agf_bno_root;
|
|
else
|
|
ptr->s = agf->agf_cnt_root;
|
|
}
|
|
|
|
STATIC int64_t
|
|
xfs_bnobt_key_diff(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_key *key)
|
|
{
|
|
struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a;
|
|
const struct xfs_alloc_rec *kp = &key->alloc;
|
|
|
|
return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
|
|
}
|
|
|
|
STATIC int64_t
|
|
xfs_cntbt_key_diff(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_key *key)
|
|
{
|
|
struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a;
|
|
const struct xfs_alloc_rec *kp = &key->alloc;
|
|
int64_t diff;
|
|
|
|
diff = (int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
|
|
if (diff)
|
|
return diff;
|
|
|
|
return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
|
|
}
|
|
|
|
STATIC int64_t
|
|
xfs_bnobt_diff_two_keys(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_key *k1,
|
|
const union xfs_btree_key *k2,
|
|
const union xfs_btree_key *mask)
|
|
{
|
|
ASSERT(!mask || mask->alloc.ar_startblock);
|
|
|
|
return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
|
|
be32_to_cpu(k2->alloc.ar_startblock);
|
|
}
|
|
|
|
STATIC int64_t
|
|
xfs_cntbt_diff_two_keys(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_key *k1,
|
|
const union xfs_btree_key *k2,
|
|
const union xfs_btree_key *mask)
|
|
{
|
|
int64_t diff;
|
|
|
|
ASSERT(!mask || (mask->alloc.ar_blockcount &&
|
|
mask->alloc.ar_startblock));
|
|
|
|
diff = be32_to_cpu(k1->alloc.ar_blockcount) -
|
|
be32_to_cpu(k2->alloc.ar_blockcount);
|
|
if (diff)
|
|
return diff;
|
|
|
|
return be32_to_cpu(k1->alloc.ar_startblock) -
|
|
be32_to_cpu(k2->alloc.ar_startblock);
|
|
}
|
|
|
|
static xfs_failaddr_t
|
|
xfs_allocbt_verify(
|
|
struct xfs_buf *bp)
|
|
{
|
|
struct xfs_mount *mp = bp->b_mount;
|
|
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
|
|
struct xfs_perag *pag = bp->b_pag;
|
|
xfs_failaddr_t fa;
|
|
unsigned int level;
|
|
|
|
if (!xfs_verify_magic(bp, block->bb_magic))
|
|
return __this_address;
|
|
|
|
if (xfs_has_crc(mp)) {
|
|
fa = xfs_btree_agblock_v5hdr_verify(bp);
|
|
if (fa)
|
|
return fa;
|
|
}
|
|
|
|
/*
|
|
* The perag may not be attached during grow operations or fully
|
|
* initialized from the AGF during log recovery. Therefore we can only
|
|
* check against maximum tree depth from those contexts.
|
|
*
|
|
* Otherwise check against the per-tree limit. Peek at one of the
|
|
* verifier magic values to determine the type of tree we're verifying
|
|
* against.
|
|
*/
|
|
level = be16_to_cpu(block->bb_level);
|
|
if (pag && xfs_perag_initialised_agf(pag)) {
|
|
unsigned int maxlevel, repair_maxlevel = 0;
|
|
|
|
/*
|
|
* Online repair could be rewriting the free space btrees, so
|
|
* we'll validate against the larger of either tree while this
|
|
* is going on.
|
|
*/
|
|
if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC)) {
|
|
maxlevel = pag->pagf_cnt_level;
|
|
#ifdef CONFIG_XFS_ONLINE_REPAIR
|
|
repair_maxlevel = pag->pagf_repair_cnt_level;
|
|
#endif
|
|
} else {
|
|
maxlevel = pag->pagf_bno_level;
|
|
#ifdef CONFIG_XFS_ONLINE_REPAIR
|
|
repair_maxlevel = pag->pagf_repair_bno_level;
|
|
#endif
|
|
}
|
|
|
|
if (level >= max(maxlevel, repair_maxlevel))
|
|
return __this_address;
|
|
} else if (level >= mp->m_alloc_maxlevels)
|
|
return __this_address;
|
|
|
|
return xfs_btree_agblock_verify(bp, mp->m_alloc_mxr[level != 0]);
|
|
}
|
|
|
|
static void
|
|
xfs_allocbt_read_verify(
|
|
struct xfs_buf *bp)
|
|
{
|
|
xfs_failaddr_t fa;
|
|
|
|
if (!xfs_btree_agblock_verify_crc(bp))
|
|
xfs_verifier_error(bp, -EFSBADCRC, __this_address);
|
|
else {
|
|
fa = xfs_allocbt_verify(bp);
|
|
if (fa)
|
|
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
|
|
}
|
|
|
|
if (bp->b_error)
|
|
trace_xfs_btree_corrupt(bp, _RET_IP_);
|
|
}
|
|
|
|
static void
|
|
xfs_allocbt_write_verify(
|
|
struct xfs_buf *bp)
|
|
{
|
|
xfs_failaddr_t fa;
|
|
|
|
fa = xfs_allocbt_verify(bp);
|
|
if (fa) {
|
|
trace_xfs_btree_corrupt(bp, _RET_IP_);
|
|
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
|
|
return;
|
|
}
|
|
xfs_btree_agblock_calc_crc(bp);
|
|
|
|
}
|
|
|
|
const struct xfs_buf_ops xfs_bnobt_buf_ops = {
|
|
.name = "xfs_bnobt",
|
|
.magic = { cpu_to_be32(XFS_ABTB_MAGIC),
|
|
cpu_to_be32(XFS_ABTB_CRC_MAGIC) },
|
|
.verify_read = xfs_allocbt_read_verify,
|
|
.verify_write = xfs_allocbt_write_verify,
|
|
.verify_struct = xfs_allocbt_verify,
|
|
};
|
|
|
|
const struct xfs_buf_ops xfs_cntbt_buf_ops = {
|
|
.name = "xfs_cntbt",
|
|
.magic = { cpu_to_be32(XFS_ABTC_MAGIC),
|
|
cpu_to_be32(XFS_ABTC_CRC_MAGIC) },
|
|
.verify_read = xfs_allocbt_read_verify,
|
|
.verify_write = xfs_allocbt_write_verify,
|
|
.verify_struct = xfs_allocbt_verify,
|
|
};
|
|
|
|
STATIC int
|
|
xfs_bnobt_keys_inorder(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_key *k1,
|
|
const union xfs_btree_key *k2)
|
|
{
|
|
return be32_to_cpu(k1->alloc.ar_startblock) <
|
|
be32_to_cpu(k2->alloc.ar_startblock);
|
|
}
|
|
|
|
STATIC int
|
|
xfs_bnobt_recs_inorder(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_rec *r1,
|
|
const union xfs_btree_rec *r2)
|
|
{
|
|
return be32_to_cpu(r1->alloc.ar_startblock) +
|
|
be32_to_cpu(r1->alloc.ar_blockcount) <=
|
|
be32_to_cpu(r2->alloc.ar_startblock);
|
|
}
|
|
|
|
STATIC int
|
|
xfs_cntbt_keys_inorder(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_key *k1,
|
|
const union xfs_btree_key *k2)
|
|
{
|
|
return be32_to_cpu(k1->alloc.ar_blockcount) <
|
|
be32_to_cpu(k2->alloc.ar_blockcount) ||
|
|
(k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
|
|
be32_to_cpu(k1->alloc.ar_startblock) <
|
|
be32_to_cpu(k2->alloc.ar_startblock));
|
|
}
|
|
|
|
STATIC int
|
|
xfs_cntbt_recs_inorder(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_rec *r1,
|
|
const union xfs_btree_rec *r2)
|
|
{
|
|
return be32_to_cpu(r1->alloc.ar_blockcount) <
|
|
be32_to_cpu(r2->alloc.ar_blockcount) ||
|
|
(r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
|
|
be32_to_cpu(r1->alloc.ar_startblock) <
|
|
be32_to_cpu(r2->alloc.ar_startblock));
|
|
}
|
|
|
|
STATIC enum xbtree_key_contig
|
|
xfs_allocbt_keys_contiguous(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_key *key1,
|
|
const union xfs_btree_key *key2,
|
|
const union xfs_btree_key *mask)
|
|
{
|
|
ASSERT(!mask || mask->alloc.ar_startblock);
|
|
|
|
return xbtree_key_contig(be32_to_cpu(key1->alloc.ar_startblock),
|
|
be32_to_cpu(key2->alloc.ar_startblock));
|
|
}
|
|
|
|
const struct xfs_btree_ops xfs_bnobt_ops = {
|
|
.name = "bno",
|
|
.type = XFS_BTREE_TYPE_AG,
|
|
|
|
.rec_len = sizeof(xfs_alloc_rec_t),
|
|
.key_len = sizeof(xfs_alloc_key_t),
|
|
.ptr_len = XFS_BTREE_SHORT_PTR_LEN,
|
|
|
|
.lru_refs = XFS_ALLOC_BTREE_REF,
|
|
.statoff = XFS_STATS_CALC_INDEX(xs_abtb_2),
|
|
.sick_mask = XFS_SICK_AG_BNOBT,
|
|
|
|
.dup_cursor = xfs_bnobt_dup_cursor,
|
|
.set_root = xfs_allocbt_set_root,
|
|
.alloc_block = xfs_allocbt_alloc_block,
|
|
.free_block = xfs_allocbt_free_block,
|
|
.get_minrecs = xfs_allocbt_get_minrecs,
|
|
.get_maxrecs = xfs_allocbt_get_maxrecs,
|
|
.init_key_from_rec = xfs_allocbt_init_key_from_rec,
|
|
.init_high_key_from_rec = xfs_bnobt_init_high_key_from_rec,
|
|
.init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
|
|
.init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
|
|
.key_diff = xfs_bnobt_key_diff,
|
|
.buf_ops = &xfs_bnobt_buf_ops,
|
|
.diff_two_keys = xfs_bnobt_diff_two_keys,
|
|
.keys_inorder = xfs_bnobt_keys_inorder,
|
|
.recs_inorder = xfs_bnobt_recs_inorder,
|
|
.keys_contiguous = xfs_allocbt_keys_contiguous,
|
|
};
|
|
|
|
const struct xfs_btree_ops xfs_cntbt_ops = {
|
|
.name = "cnt",
|
|
.type = XFS_BTREE_TYPE_AG,
|
|
|
|
.rec_len = sizeof(xfs_alloc_rec_t),
|
|
.key_len = sizeof(xfs_alloc_key_t),
|
|
.ptr_len = XFS_BTREE_SHORT_PTR_LEN,
|
|
|
|
.lru_refs = XFS_ALLOC_BTREE_REF,
|
|
.statoff = XFS_STATS_CALC_INDEX(xs_abtc_2),
|
|
.sick_mask = XFS_SICK_AG_CNTBT,
|
|
|
|
.dup_cursor = xfs_cntbt_dup_cursor,
|
|
.set_root = xfs_allocbt_set_root,
|
|
.alloc_block = xfs_allocbt_alloc_block,
|
|
.free_block = xfs_allocbt_free_block,
|
|
.get_minrecs = xfs_allocbt_get_minrecs,
|
|
.get_maxrecs = xfs_allocbt_get_maxrecs,
|
|
.init_key_from_rec = xfs_allocbt_init_key_from_rec,
|
|
.init_high_key_from_rec = xfs_cntbt_init_high_key_from_rec,
|
|
.init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
|
|
.init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
|
|
.key_diff = xfs_cntbt_key_diff,
|
|
.buf_ops = &xfs_cntbt_buf_ops,
|
|
.diff_two_keys = xfs_cntbt_diff_two_keys,
|
|
.keys_inorder = xfs_cntbt_keys_inorder,
|
|
.recs_inorder = xfs_cntbt_recs_inorder,
|
|
.keys_contiguous = NULL, /* not needed right now */
|
|
};
|
|
|
|
/*
|
|
* Allocate a new bnobt cursor.
|
|
*
|
|
* For staging cursors tp and agbp are NULL.
|
|
*/
|
|
struct xfs_btree_cur *
|
|
xfs_bnobt_init_cursor(
|
|
struct xfs_mount *mp,
|
|
struct xfs_trans *tp,
|
|
struct xfs_buf *agbp,
|
|
struct xfs_perag *pag)
|
|
{
|
|
struct xfs_btree_cur *cur;
|
|
|
|
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_bnobt_ops,
|
|
mp->m_alloc_maxlevels, xfs_allocbt_cur_cache);
|
|
cur->bc_ag.pag = xfs_perag_hold(pag);
|
|
cur->bc_ag.agbp = agbp;
|
|
if (agbp) {
|
|
struct xfs_agf *agf = agbp->b_addr;
|
|
|
|
cur->bc_nlevels = be32_to_cpu(agf->agf_bno_level);
|
|
}
|
|
return cur;
|
|
}
|
|
|
|
/*
|
|
* Allocate a new cntbt cursor.
|
|
*
|
|
* For staging cursors tp and agbp are NULL.
|
|
*/
|
|
struct xfs_btree_cur *
|
|
xfs_cntbt_init_cursor(
|
|
struct xfs_mount *mp,
|
|
struct xfs_trans *tp,
|
|
struct xfs_buf *agbp,
|
|
struct xfs_perag *pag)
|
|
{
|
|
struct xfs_btree_cur *cur;
|
|
|
|
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_cntbt_ops,
|
|
mp->m_alloc_maxlevels, xfs_allocbt_cur_cache);
|
|
cur->bc_ag.pag = xfs_perag_hold(pag);
|
|
cur->bc_ag.agbp = agbp;
|
|
if (agbp) {
|
|
struct xfs_agf *agf = agbp->b_addr;
|
|
|
|
cur->bc_nlevels = be32_to_cpu(agf->agf_cnt_level);
|
|
}
|
|
return cur;
|
|
}
|
|
|
|
/*
|
|
* Install a new free space btree root. Caller is responsible for invalidating
|
|
* and freeing the old btree blocks.
|
|
*/
|
|
void
|
|
xfs_allocbt_commit_staged_btree(
|
|
struct xfs_btree_cur *cur,
|
|
struct xfs_trans *tp,
|
|
struct xfs_buf *agbp)
|
|
{
|
|
struct xfs_agf *agf = agbp->b_addr;
|
|
struct xbtree_afakeroot *afake = cur->bc_ag.afake;
|
|
|
|
ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
|
|
|
|
if (xfs_btree_is_bno(cur->bc_ops)) {
|
|
agf->agf_bno_root = cpu_to_be32(afake->af_root);
|
|
agf->agf_bno_level = cpu_to_be32(afake->af_levels);
|
|
} else {
|
|
agf->agf_cnt_root = cpu_to_be32(afake->af_root);
|
|
agf->agf_cnt_level = cpu_to_be32(afake->af_levels);
|
|
}
|
|
xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
|
|
|
|
xfs_btree_commit_afakeroot(cur, tp, agbp);
|
|
}
|
|
|
|
/* Calculate number of records in an alloc btree block. */
|
|
static inline unsigned int
|
|
xfs_allocbt_block_maxrecs(
|
|
unsigned int blocklen,
|
|
bool leaf)
|
|
{
|
|
if (leaf)
|
|
return blocklen / sizeof(xfs_alloc_rec_t);
|
|
return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t));
|
|
}
|
|
|
|
/*
|
|
* Calculate number of records in an alloc btree block.
|
|
*/
|
|
int
|
|
xfs_allocbt_maxrecs(
|
|
struct xfs_mount *mp,
|
|
int blocklen,
|
|
int leaf)
|
|
{
|
|
blocklen -= XFS_ALLOC_BLOCK_LEN(mp);
|
|
return xfs_allocbt_block_maxrecs(blocklen, leaf);
|
|
}
|
|
|
|
/* Free space btrees are at their largest when every other block is free. */
|
|
#define XFS_MAX_FREESP_RECORDS ((XFS_MAX_AG_BLOCKS + 1) / 2)
|
|
|
|
/* Compute the max possible height for free space btrees. */
|
|
unsigned int
|
|
xfs_allocbt_maxlevels_ondisk(void)
|
|
{
|
|
unsigned int minrecs[2];
|
|
unsigned int blocklen;
|
|
|
|
blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN,
|
|
XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN);
|
|
|
|
minrecs[0] = xfs_allocbt_block_maxrecs(blocklen, true) / 2;
|
|
minrecs[1] = xfs_allocbt_block_maxrecs(blocklen, false) / 2;
|
|
|
|
return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_FREESP_RECORDS);
|
|
}
|
|
|
|
/* Calculate the freespace btree size for some records. */
|
|
xfs_extlen_t
|
|
xfs_allocbt_calc_size(
|
|
struct xfs_mount *mp,
|
|
unsigned long long len)
|
|
{
|
|
return xfs_btree_calc_size(mp->m_alloc_mnr, len);
|
|
}
|
|
|
|
int __init
|
|
xfs_allocbt_init_cur_cache(void)
|
|
{
|
|
xfs_allocbt_cur_cache = kmem_cache_create("xfs_bnobt_cur",
|
|
xfs_btree_cur_sizeof(xfs_allocbt_maxlevels_ondisk()),
|
|
0, 0, NULL);
|
|
|
|
if (!xfs_allocbt_cur_cache)
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
xfs_allocbt_destroy_cur_cache(void)
|
|
{
|
|
kmem_cache_destroy(xfs_allocbt_cur_cache);
|
|
xfs_allocbt_cur_cache = NULL;
|
|
}
|