xfs: rewrite getbmap using the xfs_iext_* helpers

Currently getbmap uses xfs_bmapi_read to query the extent map, and then
fixes up various bits that are eventually reported to userspace.

This patch instead rewrites it to use xfs_iext_lookup_extent and
xfs_iext_get_extent to iteratively process the extent map.  This not
only avoids the need to allocate a map for the returned xfs_bmbt_irec
structures but also greatly simplified the code.

There are two intentional behavior changes compared to the old code:

 - the current code reports unwritten extents that don't directly border
   a written one as unwritten even when not passing the BMV_IF_PREALLOC
   option, contrary to the documentation.  The new code requires the
   BMV_IF_PREALLOC flag to report the unwrittent extent bit.
 - The new code does never merges consecutive extents, unlike the old
   code that sometimes does it based on the boundaries of the
   xfs_bmapi_read calls.  Note that the extent merging behavior was
   entirely undocumented.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
This commit is contained in:
Christoph Hellwig 2017-10-17 14:16:18 -07:00 committed by Darrick J. Wong
parent 15f859ae5c
commit abbf9e8a45

View File

@ -405,127 +405,105 @@ xfs_bmap_count_blocks(
return 0; return 0;
} }
/* static int
* returns 1 for success, 0 if we failed to map the extent. xfs_getbmap_report_one(
*/ struct xfs_inode *ip,
STATIC int struct getbmapx *bmv,
xfs_getbmapx_fix_eof_hole( struct getbmapx *out,
xfs_inode_t *ip, /* xfs incore inode pointer */ int64_t bmv_end,
int whichfork, struct xfs_bmbt_irec *got)
struct getbmapx *out, /* output structure */
int prealloced, /* this is a file with
* preallocated data space */
int64_t end, /* last block requested */
xfs_fsblock_t startblock,
bool moretocome)
{ {
int64_t fixlen; struct getbmapx *p = out + bmv->bmv_entries;
xfs_mount_t *mp; /* file system mount point */ bool shared = false, trimmed = false;
xfs_ifork_t *ifp; /* inode fork pointer */ int error;
xfs_extnum_t lastx; /* last extent pointer */
xfs_fileoff_t fileblock;
if (startblock == HOLESTARTBLOCK) { error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed);
mp = ip->i_mount;
out->bmv_block = -1;
fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
fixlen -= out->bmv_offset;
if (prealloced && out->bmv_offset + out->bmv_length == end) {
/* Came to hole at EOF. Trim it. */
if (fixlen <= 0)
return 0;
out->bmv_length = fixlen;
}
} else {
if (startblock == DELAYSTARTBLOCK)
out->bmv_block = -2;
else
out->bmv_block = xfs_fsb_to_db(ip, startblock);
fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
ifp = XFS_IFORK_PTR(ip, whichfork);
if (!moretocome &&
xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
(lastx == xfs_iext_count(ifp) - 1))
out->bmv_oflags |= BMV_OF_LAST;
}
return 1;
}
/* Adjust the reported bmap around shared/unshared extent transitions. */
STATIC int
xfs_getbmap_adjust_shared(
struct xfs_inode *ip,
int whichfork,
struct xfs_bmbt_irec *map,
struct getbmapx *out,
struct xfs_bmbt_irec *next_map)
{
struct xfs_mount *mp = ip->i_mount;
xfs_agnumber_t agno;
xfs_agblock_t agbno;
xfs_agblock_t ebno;
xfs_extlen_t elen;
xfs_extlen_t nlen;
int error;
next_map->br_startblock = NULLFSBLOCK;
next_map->br_startoff = NULLFILEOFF;
next_map->br_blockcount = 0;
/* Only written data blocks can be shared. */
if (!xfs_is_reflink_inode(ip) ||
whichfork != XFS_DATA_FORK ||
!xfs_bmap_is_real_extent(map))
return 0;
agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
map->br_blockcount, &ebno, &elen, true);
if (error) if (error)
return error; return error;
if (ebno == NULLAGBLOCK) { if (isnullstartblock(got->br_startblock) ||
/* No shared blocks at all. */ got->br_startblock == DELAYSTARTBLOCK) {
return 0;
} else if (agbno == ebno) {
/* /*
* Shared extent at (agbno, elen). Shrink the reported * Delalloc extents that start beyond EOF can occur due to
* extent length and prepare to move the start of map[i] * speculative EOF allocation when the delalloc extent is larger
* to agbno+elen, with the aim of (re)formatting the new * than the largest freespace extent at conversion time. These
* map[i] the next time through the inner loop. * extents cannot be converted by data writeback, so can exist
* here even if we are not supposed to be finding delalloc
* extents.
*/ */
out->bmv_length = XFS_FSB_TO_BB(mp, elen); if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
out->bmv_oflags |= BMV_OF_SHARED; ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
if (elen != map->br_blockcount) {
*next_map = *map; p->bmv_oflags |= BMV_OF_DELALLOC;
next_map->br_startblock += elen; p->bmv_block = -2;
next_map->br_startoff += elen;
next_map->br_blockcount -= elen;
}
map->br_blockcount -= elen;
} else { } else {
/* p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
* There's an unshared extent (agbno, ebno - agbno)
* followed by shared extent at (ebno, elen). Shrink
* the reported extent length to cover only the unshared
* extent and prepare to move up the start of map[i] to
* ebno, with the aim of (re)formatting the new map[i]
* the next time through the inner loop.
*/
*next_map = *map;
nlen = ebno - agbno;
out->bmv_length = XFS_FSB_TO_BB(mp, nlen);
next_map->br_startblock += nlen;
next_map->br_startoff += nlen;
next_map->br_blockcount -= nlen;
map->br_blockcount -= nlen;
} }
if (got->br_state == XFS_EXT_UNWRITTEN &&
(bmv->bmv_iflags & BMV_IF_PREALLOC))
p->bmv_oflags |= BMV_OF_PREALLOC;
if (shared)
p->bmv_oflags |= BMV_OF_SHARED;
p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
bmv->bmv_offset = p->bmv_offset + p->bmv_length;
bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
bmv->bmv_entries++;
return 0; return 0;
} }
static void
xfs_getbmap_report_hole(
struct xfs_inode *ip,
struct getbmapx *bmv,
struct getbmapx *out,
int64_t bmv_end,
xfs_fileoff_t bno,
xfs_fileoff_t end)
{
struct getbmapx *p = out + bmv->bmv_entries;
if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
return;
p->bmv_block = -1;
p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
bmv->bmv_offset = p->bmv_offset + p->bmv_length;
bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
bmv->bmv_entries++;
}
static inline bool
xfs_getbmap_full(
struct getbmapx *bmv)
{
return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1;
}
static bool
xfs_getbmap_next_rec(
struct xfs_bmbt_irec *rec,
xfs_fileoff_t total_end)
{
xfs_fileoff_t end = rec->br_startoff + rec->br_blockcount;
if (end == total_end)
return false;
rec->br_startoff += rec->br_blockcount;
if (!isnullstartblock(rec->br_startblock) &&
rec->br_startblock != DELAYSTARTBLOCK)
rec->br_startblock += rec->br_blockcount;
rec->br_blockcount = total_end - end;
return true;
}
/* /*
* Get inode's extents as described in bmv, and format for output. * Get inode's extents as described in bmv, and format for output.
* Calls formatter to fill the user's buffer until all extents * Calls formatter to fill the user's buffer until all extents
@ -540,119 +518,72 @@ xfs_getbmap(
xfs_bmap_format_t formatter, /* format to user */ xfs_bmap_format_t formatter, /* format to user */
void *arg) /* formatter arg */ void *arg) /* formatter arg */
{ {
int64_t bmvend; /* last block requested */ struct xfs_mount *mp = ip->i_mount;
int error = 0; /* return value */ int iflags = bmv->bmv_iflags;
int64_t fixlen; /* length for -1 case */ int whichfork, lock, i, error = 0;
int i; /* extent number */ int64_t bmv_end, max_len;
int lock; /* lock state */ xfs_fileoff_t bno, first_bno;
xfs_bmbt_irec_t *map; /* buffer for user's data */ struct xfs_ifork *ifp;
xfs_mount_t *mp; /* file system mount point */ struct getbmapx *out;
int nex; /* # of user extents can do */ struct xfs_bmbt_irec got, rec;
int subnex; /* # of bmapi's can do */ xfs_filblks_t len;
int nmap; /* number of map entries */ xfs_extnum_t idx;
struct getbmapx *out; /* output structure */
int whichfork; /* data or attr fork */
int prealloced; /* this is a file with
* preallocated data space */
int iflags; /* interface flags */
int bmapi_flags; /* flags for xfs_bmapi */
int cur_ext = 0;
struct xfs_bmbt_irec inject_map;
mp = ip->i_mount;
iflags = bmv->bmv_iflags;
#ifndef DEBUG #ifndef DEBUG
/* Only allow CoW fork queries if we're debugging. */ /* Only allow CoW fork queries if we're debugging. */
if (iflags & BMV_IF_COWFORK) if (iflags & BMV_IF_COWFORK)
return -EINVAL; return -EINVAL;
#endif #endif
if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK)) if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
return -EINVAL; return -EINVAL;
if (bmv->bmv_count <= 1)
return -EINVAL;
if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
return -ENOMEM;
if (bmv->bmv_length < -1)
return -EINVAL;
bmv->bmv_entries = 0;
if (bmv->bmv_length == 0)
return 0;
out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
if (!out)
return -ENOMEM;
if (iflags & BMV_IF_ATTRFORK) if (iflags & BMV_IF_ATTRFORK)
whichfork = XFS_ATTR_FORK; whichfork = XFS_ATTR_FORK;
else if (iflags & BMV_IF_COWFORK) else if (iflags & BMV_IF_COWFORK)
whichfork = XFS_COW_FORK; whichfork = XFS_COW_FORK;
else else
whichfork = XFS_DATA_FORK; whichfork = XFS_DATA_FORK;
ifp = XFS_IFORK_PTR(ip, whichfork);
switch (whichfork) {
case XFS_ATTR_FORK:
if (XFS_IFORK_Q(ip)) {
if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
return -EINVAL;
} else if (unlikely(
ip->i_d.di_aformat != 0 &&
ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
ip->i_mount);
return -EFSCORRUPTED;
}
prealloced = 0;
fixlen = 1LL << 32;
break;
case XFS_COW_FORK:
if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS)
return -EINVAL;
if (xfs_get_cowextsz_hint(ip)) {
prealloced = 1;
fixlen = mp->m_super->s_maxbytes;
} else {
prealloced = 0;
fixlen = XFS_ISIZE(ip);
}
break;
default:
/* Local format data forks report no extents. */
if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
bmv->bmv_entries = 0;
return 0;
}
if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
return -EINVAL;
if (xfs_get_extsz_hint(ip) ||
ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
prealloced = 1;
fixlen = mp->m_super->s_maxbytes;
} else {
prealloced = 0;
fixlen = XFS_ISIZE(ip);
}
break;
}
if (bmv->bmv_length == -1) {
fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
bmv->bmv_length =
max_t(int64_t, fixlen - bmv->bmv_offset, 0);
} else if (bmv->bmv_length == 0) {
bmv->bmv_entries = 0;
return 0;
} else if (bmv->bmv_length < 0) {
return -EINVAL;
}
nex = bmv->bmv_count - 1;
if (nex <= 0)
return -EINVAL;
bmvend = bmv->bmv_offset + bmv->bmv_length;
if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
return -ENOMEM;
out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
if (!out)
return -ENOMEM;
xfs_ilock(ip, XFS_IOLOCK_SHARED); xfs_ilock(ip, XFS_IOLOCK_SHARED);
switch (whichfork) { switch (whichfork) {
case XFS_ATTR_FORK:
if (!XFS_IFORK_Q(ip))
goto out_unlock_iolock;
max_len = 1LL << 32;
lock = xfs_ilock_attr_map_shared(ip);
break;
case XFS_COW_FORK:
/* No CoW fork? Just return */
if (!ifp)
goto out_unlock_iolock;
if (xfs_get_cowextsz_hint(ip))
max_len = mp->m_super->s_maxbytes;
else
max_len = XFS_ISIZE(ip);
lock = XFS_ILOCK_SHARED;
xfs_ilock(ip, lock);
break;
case XFS_DATA_FORK: case XFS_DATA_FORK:
if (!(iflags & BMV_IF_DELALLOC) && if (!(iflags & BMV_IF_DELALLOC) &&
(ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
@ -670,147 +601,107 @@ xfs_getbmap(
*/ */
} }
if (xfs_get_extsz_hint(ip) ||
(ip->i_d.di_flags &
(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
max_len = mp->m_super->s_maxbytes;
else
max_len = XFS_ISIZE(ip);
lock = xfs_ilock_data_map_shared(ip); lock = xfs_ilock_data_map_shared(ip);
break; break;
case XFS_COW_FORK:
lock = XFS_ILOCK_SHARED;
xfs_ilock(ip, lock);
break;
case XFS_ATTR_FORK:
lock = xfs_ilock_attr_map_shared(ip);
break;
} }
/* switch (XFS_IFORK_FORMAT(ip, whichfork)) {
* Don't let nex be bigger than the number of extents case XFS_DINODE_FMT_EXTENTS:
* we can have assuming alternating holes and real extents. case XFS_DINODE_FMT_BTREE:
*/ break;
if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) case XFS_DINODE_FMT_LOCAL:
nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; /* Local format inode forks report no extents. */
goto out_unlock_ilock;
bmapi_flags = xfs_bmapi_aflag(whichfork); default:
if (!(iflags & BMV_IF_PREALLOC)) error = -EINVAL;
bmapi_flags |= XFS_BMAPI_IGSTATE;
/*
* Allocate enough space to handle "subnex" maps at a time.
*/
error = -ENOMEM;
subnex = 16;
map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
if (!map)
goto out_unlock_ilock; goto out_unlock_ilock;
bmv->bmv_entries = 0;
if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
(whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
error = 0;
goto out_free_map;
} }
do { if (bmv->bmv_length == -1) {
nmap = (nex> subnex) ? subnex : nex; max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
XFS_BB_TO_FSB(mp, bmv->bmv_length), }
map, &nmap, bmapi_flags);
bmv_end = bmv->bmv_offset + bmv->bmv_length;
first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
error = xfs_iread_extents(NULL, ip, whichfork);
if (error) if (error)
goto out_free_map; goto out_unlock_ilock;
ASSERT(nmap <= subnex); }
for (i = 0; i < nmap && bmv->bmv_length && if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) {
cur_ext < bmv->bmv_count - 1; i++) { /*
out[cur_ext].bmv_oflags = 0; * Report a whole-file hole if the delalloc flag is set to
if (map[i].br_state == XFS_EXT_UNWRITTEN) * stay compatible with the old implementation.
out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; */
else if (map[i].br_startblock == DELAYSTARTBLOCK) if (iflags & BMV_IF_DELALLOC)
out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
out[cur_ext].bmv_offset = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
XFS_FSB_TO_BB(mp, map[i].br_startoff); goto out_unlock_ilock;
out[cur_ext].bmv_length = }
XFS_FSB_TO_BB(mp, map[i].br_blockcount);
out[cur_ext].bmv_unused1 = 0;
out[cur_ext].bmv_unused2 = 0;
/* while (!xfs_getbmap_full(bmv)) {
* delayed allocation extents that start beyond EOF can xfs_trim_extent(&got, first_bno, len);
* occur due to speculative EOF allocation when the
* delalloc extent is larger than the largest freespace
* extent at conversion time. These extents cannot be
* converted by data writeback, so can exist here even
* if we are not supposed to be finding delalloc
* extents.
*/
if (map[i].br_startblock == DELAYSTARTBLOCK &&
map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
ASSERT((iflags & BMV_IF_DELALLOC) != 0);
if (map[i].br_startblock == HOLESTARTBLOCK && /*
whichfork == XFS_ATTR_FORK) { * Report an entry for a hole if this extent doesn't directly
/* came to the end of attribute fork */ * follow the previous one.
out[cur_ext].bmv_oflags |= BMV_OF_LAST; */
goto out_free_map; if (got.br_startoff > bno) {
} xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
got.br_startoff);
/* Is this a shared block? */ if (xfs_getbmap_full(bmv))
error = xfs_getbmap_adjust_shared(ip, whichfork, break;
&map[i], &out[cur_ext], &inject_map);
if (error)
goto out_free_map;
if (!xfs_getbmapx_fix_eof_hole(ip, whichfork,
&out[cur_ext], prealloced, bmvend,
map[i].br_startblock,
inject_map.br_startblock != NULLFSBLOCK))
goto out_free_map;
bmv->bmv_offset =
out[cur_ext].bmv_offset +
out[cur_ext].bmv_length;
bmv->bmv_length =
max_t(int64_t, 0, bmvend - bmv->bmv_offset);
/*
* In case we don't want to return the hole,
* don't increase cur_ext so that we can reuse
* it in the next loop.
*/
if ((iflags & BMV_IF_NO_HOLES) &&
map[i].br_startblock == HOLESTARTBLOCK) {
memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
continue;
}
/*
* In order to report shared extents accurately,
* we report each distinct shared/unshared part
* of a single bmbt record using multiple bmap
* extents. To make that happen, we iterate the
* same map array item multiple times, each
* time trimming out the subextent that we just
* reported.
*
* Because of this, we must check the out array
* index (cur_ext) directly against bmv_count-1
* to avoid overflows.
*/
if (inject_map.br_startblock != NULLFSBLOCK) {
map[i] = inject_map;
i--;
}
bmv->bmv_entries++;
cur_ext++;
} }
} while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
out_free_map: /*
kmem_free(map); * In order to report shared extents accurately, we report each
out_unlock_ilock: * distinct shared / unshared part of a single bmbt record with
* an individual getbmapx record.
*/
bno = got.br_startoff + got.br_blockcount;
rec = got;
do {
error = xfs_getbmap_report_one(ip, bmv, out, bmv_end,
&rec);
if (error || xfs_getbmap_full(bmv))
goto out_unlock_ilock;
} while (xfs_getbmap_next_rec(&rec, bno));
if (!xfs_iext_get_extent(ifp, ++idx, &got)) {
xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST;
if (whichfork != XFS_ATTR_FORK && bno < end &&
!xfs_getbmap_full(bmv)) {
xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
bno, end);
}
break;
}
if (bno >= first_bno + len)
break;
}
out_unlock_ilock:
xfs_iunlock(ip, lock); xfs_iunlock(ip, lock);
out_unlock_iolock: out_unlock_iolock:
xfs_iunlock(ip, XFS_IOLOCK_SHARED); xfs_iunlock(ip, XFS_IOLOCK_SHARED);
for (i = 0; i < cur_ext; i++) { for (i = 0; i < bmv->bmv_entries; i++) {
/* format results & advance arg */ /* format results & advance arg */
error = formatter(&arg, &out[i]); error = formatter(&arg, &out[i]);
if (error) if (error)