xfs: updates for 4.10-rc1

Contained in this update:
 - DAX PMD vaults via iomap infrastructure
 - Direct-io support in iomap infrastructure
 - removal of now-redundant XFS inode iolock, replaced with VFS i_rwsem
 - synchronisation with fixes and changes in userspace libxfs code
 - extent tree lookup helpers
 - lots of little corruption detection improvements to verifiers
 - optimised CRC calculations
 - faster buffer cache lookups
 - deprecation of barrier/nobarrier mount options - we always use
   REQ_FUA/REQ_FLUSH where appropriate for data integrity now
 - cleanups to speculative preallocation
 - miscellaneous minor bug fixes and cleanups
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJYUgqdAAoJEK3oKUf0dfodQgsP/1dJ4qUc6cRk8kL+f10FoIek
 oFzdViRHZj8cROGe2n2YTBJtPa9KjU5DNHnxaxWZBN4ZpItp/uN1sAQhgtNQ4/cN
 C3JF6B/+/dIbNSbd7DwvSl0dMWknzmrB+Myfs2ZPpMA1S4GInk1MOJSj7AQdYAvJ
 dS0dQWAuIB20cahwuGA4y7zUniYL1IcF/BH8hlmzpcUNUoJ9AkR1hTg5/aVfmga3
 w2p1vZyT2E4xs/Ff4FYW5MzPGxLVQMZVNIAXAcJl+c61z46ndXqidSmVHGvc+Tlt
 ouxftHy/7KqowZlCFss1pSXg9HlXHhjS+iLbZerfcjO2qldriZS+QqQyASmQzPAz
 +PpnMfVOj+yjsXKyIHWuS1G35aV16pPWwdA0ECeU6yv9iZ7tSz5rvSrsPZPLFz4x
 RVhcKbmXR3y8DugkmtznU5ozxPt5hbbstEV3leCzxJpZu5reRJThUW7nYkSd0CEJ
 ZyT/GP6Aq/MM8O/hOgVutAH409dsrYok8m/lq1J7VbNUt8inylcsMWsBeX/0/AHY
 aC7I2Vx8bnbfL+C8wYKYhuShOGSch93O5hDUXdH2K/Sm5cK4y2asWge6MfFsS6Lu
 waVYwd5aYBlNbzkvUMm2I5EV4cCCR3YwWYwfBEP7kPYUDxN14huOz6lVXnQPDLQ1
 qsV1aNfK9PPiw6Fcaop0
 =HwDG
 -----END PGP SIGNATURE-----

Merge tag 'xfs-for-linus-4.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs updates from Dave Chinner:
 "There is quite a varied bunch of stuff in this update, and some of it
  you will have already merged through the ext4 tree which imported the
  dax-4.10-iomap-pmd topic branch from the XFS tree.

  There is also a new direct IO implementation that uses the iomap
  infrastructure. It's much simpler, faster, and has lower IO latency
  than the existing direct IO infrastructure.

  Summary:
   - DAX PMD faults via iomap infrastructure
   - Direct-io support in iomap infrastructure
   - removal of now-redundant XFS inode iolock, replaced with VFS
     i_rwsem
   - synchronisation with fixes and changes in userspace libxfs code
   - extent tree lookup helpers
   - lots of little corruption detection improvements to verifiers
   - optimised CRC calculations
   - faster buffer cache lookups
   - deprecation of barrier/nobarrier mount options - we always use
     REQ_FUA/REQ_FLUSH where appropriate for data integrity now
   - cleanups to speculative preallocation
   - miscellaneous minor bug fixes and cleanups"

* tag 'xfs-for-linus-4.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (63 commits)
  xfs: nuke unused tracepoint definitions
  xfs: use GPF_NOFS when allocating btree cursors
  xfs: use xfs_vn_setattr_size to check on new size
  xfs: deprecate barrier/nobarrier mount option
  xfs: Always flush caches when integrity is required
  xfs: ignore leaf attr ichdr.count in verifier during log replay
  xfs: use rhashtable to track buffer cache
  xfs: optimise CRC updates
  xfs: make xfs btree stats less huge
  xfs: don't cap maximum dedupe request length
  xfs: don't allow di_size with high bit set
  xfs: error out if trying to add attrs and anextents > 0
  xfs: don't crash if reading a directory results in an unexpected hole
  xfs: complain if we don't get nextents bmap records
  xfs: check for bogus values in btree block headers
  xfs: forbid AG btrees with level == 0
  xfs: several xattr functions can be void
  xfs: handle cow fork in xfs_bmap_trace_exlist
  xfs: pass state not whichfork to trace_xfs_extlist
  xfs: Move AGI buffer type setting to xfs_read_agi
  ...
This commit is contained in:
Linus Torvalds 2016-12-14 21:35:31 -08:00
commit 5cc60aeedf
67 changed files with 1357 additions and 1464 deletions

View File

@ -51,13 +51,6 @@ default behaviour.
CRC enabled filesystems always use the attr2 format, and so
will reject the noattr2 mount option if it is set.
barrier (*)
nobarrier
Enables/disables the use of block layer write barriers for
writes into the journal and for data integrity operations.
This allows for drive level write caching to be enabled, for
devices that support write barriers.
discard
nodiscard (*)
Enable/disable the issuing of commands to let the block
@ -228,7 +221,10 @@ default behaviour.
Deprecated Mount Options
========================
None at present.
Name Removal Schedule
---- ----------------
barrier no earlier than v4.15
nobarrier no earlier than v4.15
Removed Mount Options

View File

@ -554,7 +554,7 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
* filesystems that don't need it and also allows us to create the workqueue
* late enough so the we can include s_id in the name of the workqueue.
*/
static int sb_init_dio_done_wq(struct super_block *sb)
int sb_init_dio_done_wq(struct super_block *sb)
{
struct workqueue_struct *old;
struct workqueue_struct *wq = alloc_workqueue("dio/%s",

View File

@ -184,3 +184,6 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
unsigned flags, struct iomap_ops *ops, void *data,
iomap_actor_t actor);
/* direct-io.c: */
int sb_init_dio_done_wq(struct super_block *sb);

View File

@ -24,6 +24,7 @@
#include <linux/uio.h>
#include <linux/backing-dev.h>
#include <linux/buffer_head.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/dax.h>
#include "internal.h"
@ -584,3 +585,375 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
return 0;
}
EXPORT_SYMBOL_GPL(iomap_fiemap);
/*
* Private flags for iomap_dio, must not overlap with the public ones in
* iomap.h:
*/
#define IOMAP_DIO_WRITE (1 << 30)
#define IOMAP_DIO_DIRTY (1 << 31)
struct iomap_dio {
struct kiocb *iocb;
iomap_dio_end_io_t *end_io;
loff_t i_size;
loff_t size;
atomic_t ref;
unsigned flags;
int error;
union {
/* used during submission and for synchronous completion: */
struct {
struct iov_iter *iter;
struct task_struct *waiter;
struct request_queue *last_queue;
blk_qc_t cookie;
} submit;
/* used for aio completion: */
struct {
struct work_struct work;
} aio;
};
};
static ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
struct kiocb *iocb = dio->iocb;
ssize_t ret;
if (dio->end_io) {
ret = dio->end_io(iocb,
dio->error ? dio->error : dio->size,
dio->flags);
} else {
ret = dio->error;
}
if (likely(!ret)) {
ret = dio->size;
/* check for short read */
if (iocb->ki_pos + ret > dio->i_size &&
!(dio->flags & IOMAP_DIO_WRITE))
ret = dio->i_size - iocb->ki_pos;
iocb->ki_pos += ret;
}
inode_dio_end(file_inode(iocb->ki_filp));
kfree(dio);
return ret;
}
static void iomap_dio_complete_work(struct work_struct *work)
{
struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
struct kiocb *iocb = dio->iocb;
bool is_write = (dio->flags & IOMAP_DIO_WRITE);
ssize_t ret;
ret = iomap_dio_complete(dio);
if (is_write && ret > 0)
ret = generic_write_sync(iocb, ret);
iocb->ki_complete(iocb, ret, 0);
}
/*
* Set an error in the dio if none is set yet. We have to use cmpxchg
* as the submission context and the completion context(s) can race to
* update the error.
*/
static inline void iomap_dio_set_error(struct iomap_dio *dio, int ret)
{
cmpxchg(&dio->error, 0, ret);
}
static void iomap_dio_bio_end_io(struct bio *bio)
{
struct iomap_dio *dio = bio->bi_private;
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
if (bio->bi_error)
iomap_dio_set_error(dio, bio->bi_error);
if (atomic_dec_and_test(&dio->ref)) {
if (is_sync_kiocb(dio->iocb)) {
struct task_struct *waiter = dio->submit.waiter;
WRITE_ONCE(dio->submit.waiter, NULL);
wake_up_process(waiter);
} else if (dio->flags & IOMAP_DIO_WRITE) {
struct inode *inode = file_inode(dio->iocb->ki_filp);
INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work);
} else {
iomap_dio_complete_work(&dio->aio.work);
}
}
if (should_dirty) {
bio_check_pages_dirty(bio);
} else {
struct bio_vec *bvec;
int i;
bio_for_each_segment_all(bvec, bio, i)
put_page(bvec->bv_page);
bio_put(bio);
}
}
static blk_qc_t
iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
unsigned len)
{
struct page *page = ZERO_PAGE(0);
struct bio *bio;
bio = bio_alloc(GFP_KERNEL, 1);
bio->bi_bdev = iomap->bdev;
bio->bi_iter.bi_sector =
iomap->blkno + ((pos - iomap->offset) >> 9);
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
get_page(page);
if (bio_add_page(bio, page, len, 0) != len)
BUG();
bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
atomic_inc(&dio->ref);
return submit_bio(bio);
}
static loff_t
iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
void *data, struct iomap *iomap)
{
struct iomap_dio *dio = data;
unsigned blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
unsigned fs_block_size = (1 << inode->i_blkbits), pad;
unsigned align = iov_iter_alignment(dio->submit.iter);
struct iov_iter iter;
struct bio *bio;
bool need_zeroout = false;
int nr_pages, ret;
if ((pos | length | align) & ((1 << blkbits) - 1))
return -EINVAL;
switch (iomap->type) {
case IOMAP_HOLE:
if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE))
return -EIO;
/*FALLTHRU*/
case IOMAP_UNWRITTEN:
if (!(dio->flags & IOMAP_DIO_WRITE)) {
iov_iter_zero(length, dio->submit.iter);
dio->size += length;
return length;
}
dio->flags |= IOMAP_DIO_UNWRITTEN;
need_zeroout = true;
break;
case IOMAP_MAPPED:
if (iomap->flags & IOMAP_F_SHARED)
dio->flags |= IOMAP_DIO_COW;
if (iomap->flags & IOMAP_F_NEW)
need_zeroout = true;
break;
default:
WARN_ON_ONCE(1);
return -EIO;
}
/*
* Operate on a partial iter trimmed to the extent we were called for.
* We'll update the iter in the dio once we're done with this extent.
*/
iter = *dio->submit.iter;
iov_iter_truncate(&iter, length);
nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
if (nr_pages <= 0)
return nr_pages;
if (need_zeroout) {
/* zero out from the start of the block to the write offset */
pad = pos & (fs_block_size - 1);
if (pad)
iomap_dio_zero(dio, iomap, pos - pad, pad);
}
do {
if (dio->error)
return 0;
bio = bio_alloc(GFP_KERNEL, nr_pages);
bio->bi_bdev = iomap->bdev;
bio->bi_iter.bi_sector =
iomap->blkno + ((pos - iomap->offset) >> 9);
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
ret = bio_iov_iter_get_pages(bio, &iter);
if (unlikely(ret)) {
bio_put(bio);
return ret;
}
if (dio->flags & IOMAP_DIO_WRITE) {
bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
task_io_account_write(bio->bi_iter.bi_size);
} else {
bio_set_op_attrs(bio, REQ_OP_READ, 0);
if (dio->flags & IOMAP_DIO_DIRTY)
bio_set_pages_dirty(bio);
}
dio->size += bio->bi_iter.bi_size;
pos += bio->bi_iter.bi_size;
nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
atomic_inc(&dio->ref);
dio->submit.last_queue = bdev_get_queue(iomap->bdev);
dio->submit.cookie = submit_bio(bio);
} while (nr_pages);
if (need_zeroout) {
/* zero out from the end of the write to the end of the block */
pad = pos & (fs_block_size - 1);
if (pad)
iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
}
iov_iter_advance(dio->submit.iter, length);
return length;
}
ssize_t
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops,
iomap_dio_end_io_t end_io)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = file_inode(iocb->ki_filp);
size_t count = iov_iter_count(iter);
loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0;
unsigned int flags = IOMAP_DIRECT;
struct blk_plug plug;
struct iomap_dio *dio;
lockdep_assert_held(&inode->i_rwsem);
if (!count)
return 0;
dio = kmalloc(sizeof(*dio), GFP_KERNEL);
if (!dio)
return -ENOMEM;
dio->iocb = iocb;
atomic_set(&dio->ref, 1);
dio->size = 0;
dio->i_size = i_size_read(inode);
dio->end_io = end_io;
dio->error = 0;
dio->flags = 0;
dio->submit.iter = iter;
if (is_sync_kiocb(iocb)) {
dio->submit.waiter = current;
dio->submit.cookie = BLK_QC_T_NONE;
dio->submit.last_queue = NULL;
}
if (iov_iter_rw(iter) == READ) {
if (pos >= dio->i_size)
goto out_free_dio;
if (iter->type == ITER_IOVEC)
dio->flags |= IOMAP_DIO_DIRTY;
} else {
dio->flags |= IOMAP_DIO_WRITE;
flags |= IOMAP_WRITE;
}
if (mapping->nrpages) {
ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
if (ret)
goto out_free_dio;
ret = invalidate_inode_pages2_range(mapping,
iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
ret = 0;
}
inode_dio_begin(inode);
blk_start_plug(&plug);
do {
ret = iomap_apply(inode, pos, count, flags, ops, dio,
iomap_dio_actor);
if (ret <= 0) {
/* magic error code to fall back to buffered I/O */
if (ret == -ENOTBLK)
ret = 0;
break;
}
pos += ret;
} while ((count = iov_iter_count(iter)) > 0);
blk_finish_plug(&plug);
if (ret < 0)
iomap_dio_set_error(dio, ret);
if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
!inode->i_sb->s_dio_done_wq) {
ret = sb_init_dio_done_wq(inode->i_sb);
if (ret < 0)
iomap_dio_set_error(dio, ret);
}
if (!atomic_dec_and_test(&dio->ref)) {
if (!is_sync_kiocb(iocb))
return -EIOCBQUEUED;
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!READ_ONCE(dio->submit.waiter))
break;
if (!(iocb->ki_flags & IOCB_HIPRI) ||
!dio->submit.last_queue ||
!blk_mq_poll(dio->submit.last_queue,
dio->submit.cookie))
io_schedule();
}
__set_current_state(TASK_RUNNING);
}
/*
* Try again to invalidate clean pages which might have been cached by
* non-direct readahead, or faulted in by get_user_pages() if the source
* of the write was an mmap'ed region of the file we're writing. Either
* one is a pretty crazy thing to do, so we don't support it 100%. If
* this invalidation fails, tough, the write still worked...
*/
if (iov_iter_rw(iter) == WRITE && mapping->nrpages) {
ret = invalidate_inode_pages2_range(mapping,
iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
}
return iomap_dio_complete(dio);
out_free_dio:
kfree(dio);
return ret;
}
EXPORT_SYMBOL_GPL(iomap_dio_rw);

View File

@ -2455,12 +2455,15 @@ xfs_agf_verify(
be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
return false;
if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
return false;
if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)
(be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
return false;
/*
@ -2477,7 +2480,8 @@ xfs_agf_verify(
return false;
if (xfs_sb_version_hasreflink(&mp->m_sb) &&
be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)
(be32_to_cpu(agf->agf_refcount_level) < 1 ||
be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
return false;
return true;;

View File

@ -421,13 +421,17 @@ xfs_allocbt_init_cursor(
ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
cur->bc_tp = tp;
cur->bc_mp = mp;
cur->bc_btnum = btnum;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
cur->bc_ops = &xfs_allocbt_ops;
if (btnum == XFS_BTNUM_BNO)
cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
else
cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
if (btnum == XFS_BTNUM_CNT) {
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);

View File

@ -253,6 +253,7 @@ xfs_attr3_leaf_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_attr_leafblock *leaf = bp->b_addr;
struct xfs_perag *pag = bp->b_pag;
struct xfs_attr3_icleaf_hdr ichdr;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
@ -273,7 +274,12 @@ xfs_attr3_leaf_verify(
if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
return false;
}
if (ichdr.count == 0)
/*
* In recovery there is a transient state where count == 0 is valid
* because we may have transitioned an empty shortform attr to a leaf
* if the attr didn't fit in shortform.
*/
if (pag && pag->pagf_init && ichdr.count == 0)
return false;
/* XXX: need to range check rest of attr header values */

View File

@ -51,7 +51,7 @@ int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
int xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
int xfs_attr_shortform_remove(struct xfs_da_args *args);
int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
/*
@ -77,7 +77,7 @@ int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
int xfs_attr3_leaf_list_int(struct xfs_buf *bp,
void xfs_attr3_leaf_list_int(struct xfs_buf *bp,
struct xfs_attr_list_context *context);
/*

View File

@ -49,6 +49,8 @@
#include "xfs_rmap.h"
#include "xfs_ag_resv.h"
#include "xfs_refcount.h"
#include "xfs_rmap_btree.h"
#include "xfs_icache.h"
kmem_zone_t *xfs_bmap_free_item_zone;
@ -190,8 +192,12 @@ xfs_bmap_worst_indlen(
int maxrecs; /* maximum record count at this level */
xfs_mount_t *mp; /* mount structure */
xfs_filblks_t rval; /* return value */
xfs_filblks_t orig_len;
mp = ip->i_mount;
/* Calculate the worst-case size of the bmbt. */
orig_len = len;
maxrecs = mp->m_bmap_dmxr[0];
for (level = 0, rval = 0;
level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
@ -199,12 +205,20 @@ xfs_bmap_worst_indlen(
len += maxrecs - 1;
do_div(len, maxrecs);
rval += len;
if (len == 1)
return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
if (len == 1) {
rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
level - 1;
break;
}
if (level == 0)
maxrecs = mp->m_bmap_dmxr[1];
}
/* Calculate the worst-case size of the rmapbt. */
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) +
mp->m_rmap_maxlevels;
return rval;
}
@ -504,7 +518,7 @@ void
xfs_bmap_trace_exlist(
xfs_inode_t *ip, /* incore inode pointer */
xfs_extnum_t cnt, /* count of entries in the list */
int whichfork, /* data or attr fork */
int whichfork, /* data or attr or cow fork */
unsigned long caller_ip)
{
xfs_extnum_t idx; /* extent record index */
@ -513,11 +527,13 @@ xfs_bmap_trace_exlist(
if (whichfork == XFS_ATTR_FORK)
state |= BMAP_ATTRFORK;
else if (whichfork == XFS_COW_FORK)
state |= BMAP_COWFORK;
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
ASSERT(cnt == xfs_iext_count(ifp));
for (idx = 0; idx < cnt; idx++)
trace_xfs_extlist(ip, idx, whichfork, caller_ip);
trace_xfs_extlist(ip, idx, state, caller_ip);
}
/*
@ -811,7 +827,7 @@ xfs_bmap_extents_to_btree(
XFS_BTREE_LONG_PTRS);
arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
nextents = xfs_iext_count(ifp);
for (cnt = i = 0; i < nextents; i++) {
ep = xfs_iext_get_ext(ifp, i);
if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
@ -1137,6 +1153,10 @@ xfs_bmap_add_attrfork(
goto trans_cancel;
if (XFS_IFORK_Q(ip))
goto trans_cancel;
if (ip->i_d.di_anextents != 0) {
error = -EFSCORRUPTED;
goto trans_cancel;
}
if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
/*
* For inodes coming from pre-6.2 filesystems.
@ -1144,7 +1164,6 @@ xfs_bmap_add_attrfork(
ASSERT(ip->i_d.di_aformat == 0);
ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
}
ASSERT(ip->i_d.di_anextents == 0);
xfs_trans_ijoin(tp, ip, 0);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@ -1296,7 +1315,7 @@ xfs_bmap_read_extents(
/*
* Here with bp and block set to the leftmost leaf node in the tree.
*/
room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
room = xfs_iext_count(ifp);
i = 0;
/*
* Loop over all leaf nodes. Copy information to the extent records.
@ -1361,8 +1380,9 @@ xfs_bmap_read_extents(
return error;
block = XFS_BUF_TO_BLOCK(bp);
}
ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
if (i != XFS_IFORK_NEXTENTS(ip, whichfork))
return -EFSCORRUPTED;
ASSERT(i == xfs_iext_count(ifp));
XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
return 0;
error0:
@ -1370,97 +1390,6 @@ xfs_bmap_read_extents(
return -EFSCORRUPTED;
}
/*
* Search the extent records for the entry containing block bno.
* If bno lies in a hole, point to the next entry. If bno lies
* past eof, *eofp will be set, and *prevp will contain the last
* entry (null if none). Else, *lastxp will be set to the index
* of the found entry; *gotp will contain the entry.
*/
STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
xfs_bmap_search_multi_extents(
xfs_ifork_t *ifp, /* inode fork pointer */
xfs_fileoff_t bno, /* block number searched for */
int *eofp, /* out: end of file found */
xfs_extnum_t *lastxp, /* out: last extent index */
xfs_bmbt_irec_t *gotp, /* out: extent entry found */
xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
{
xfs_bmbt_rec_host_t *ep; /* extent record pointer */
xfs_extnum_t lastx; /* last extent index */
/*
* Initialize the extent entry structure to catch access to
* uninitialized br_startblock field.
*/
gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
gotp->br_state = XFS_EXT_INVALID;
gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
prevp->br_startoff = NULLFILEOFF;
ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
if (lastx > 0) {
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
}
if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
xfs_bmbt_get_all(ep, gotp);
*eofp = 0;
} else {
if (lastx > 0) {
*gotp = *prevp;
}
*eofp = 1;
ep = NULL;
}
*lastxp = lastx;
return ep;
}
/*
* Search the extents list for the inode, for the extent containing bno.
* If bno lies in a hole, point to the next entry. If bno lies past eof,
* *eofp will be set, and *prevp will contain the last entry (null if none).
* Else, *lastxp will be set to the index of the found
* entry; *gotp will contain the entry.
*/
xfs_bmbt_rec_host_t * /* pointer to found extent entry */
xfs_bmap_search_extents(
xfs_inode_t *ip, /* incore inode pointer */
xfs_fileoff_t bno, /* block number searched for */
int fork, /* data or attr fork */
int *eofp, /* out: end of file found */
xfs_extnum_t *lastxp, /* out: last extent index */
xfs_bmbt_irec_t *gotp, /* out: extent entry found */
xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
{
xfs_ifork_t *ifp; /* inode fork pointer */
xfs_bmbt_rec_host_t *ep; /* extent record pointer */
XFS_STATS_INC(ip->i_mount, xs_look_exlist);
ifp = XFS_IFORK_PTR(ip, fork);
ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
!(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
"Access to block zero in inode %llu "
"start_block: %llx start_off: %llx "
"blkcnt: %llx extent-state: %x lastx: %x",
(unsigned long long)ip->i_ino,
(unsigned long long)gotp->br_startblock,
(unsigned long long)gotp->br_startoff,
(unsigned long long)gotp->br_blockcount,
gotp->br_state, *lastxp);
*lastxp = NULLEXTNUM;
*eofp = 1;
return NULL;
}
return ep;
}
/*
* Returns the file-relative block number of the first unused block(s)
* in the file with at least "len" logically contiguous blocks free.
@ -1497,7 +1426,7 @@ xfs_bmap_first_unused(
(error = xfs_iread_extents(tp, ip, whichfork)))
return error;
lowest = *first_unused;
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
nextents = xfs_iext_count(ifp);
for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
off = xfs_bmbt_get_startoff(ep);
@ -1523,44 +1452,44 @@ xfs_bmap_first_unused(
*/
int /* error */
xfs_bmap_last_before(
xfs_trans_t *tp, /* transaction pointer */
xfs_inode_t *ip, /* incore inode */
xfs_fileoff_t *last_block, /* last block */
int whichfork) /* data or attr fork */
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_fileoff_t *last_block, /* last block */
int whichfork) /* data or attr fork */
{
xfs_fileoff_t bno; /* input file offset */
int eof; /* hit end of file */
xfs_bmbt_rec_host_t *ep; /* pointer to last extent */
int error; /* error return value */
xfs_bmbt_irec_t got; /* current extent value */
xfs_ifork_t *ifp; /* inode fork pointer */
xfs_extnum_t lastx; /* last extent used */
xfs_bmbt_irec_t prev; /* previous extent value */
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_bmbt_irec got;
xfs_extnum_t idx;
int error;
if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
return -EIO;
if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
switch (XFS_IFORK_FORMAT(ip, whichfork)) {
case XFS_DINODE_FMT_LOCAL:
*last_block = 0;
return 0;
case XFS_DINODE_FMT_BTREE:
case XFS_DINODE_FMT_EXTENTS:
break;
default:
return -EIO;
}
ifp = XFS_IFORK_PTR(ip, whichfork);
if (!(ifp->if_flags & XFS_IFEXTENTS) &&
(error = xfs_iread_extents(tp, ip, whichfork)))
return error;
bno = *last_block - 1;
ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
&prev);
if (eof || xfs_bmbt_get_startoff(ep) > bno) {
if (prev.br_startoff == NULLFILEOFF)
*last_block = 0;
else
*last_block = prev.br_startoff + prev.br_blockcount;
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
error = xfs_iread_extents(tp, ip, whichfork);
if (error)
return error;
}
/*
* Otherwise *last_block is already the right answer.
*/
if (xfs_iext_lookup_extent(ip, ifp, *last_block - 1, &idx, &got)) {
if (got.br_startoff <= *last_block - 1)
return 0;
}
if (xfs_iext_get_extent(ifp, idx - 1, &got)) {
*last_block = got.br_startoff + got.br_blockcount;
return 0;
}
*last_block = 0;
return 0;
}
@ -1582,7 +1511,7 @@ xfs_bmap_last_extent(
return error;
}
nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
nextents = xfs_iext_count(ifp);
if (nextents == 0) {
*is_empty = 1;
return 0;
@ -1735,7 +1664,7 @@ xfs_bmap_add_extent_delay_real(
&bma->ip->i_d.di_nextents);
ASSERT(bma->idx >= 0);
ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
ASSERT(bma->idx <= xfs_iext_count(ifp));
ASSERT(!isnullstartblock(new->br_startblock));
ASSERT(!bma->cur ||
(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
@ -1794,7 +1723,7 @@ xfs_bmap_add_extent_delay_real(
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
if (bma->idx < xfs_iext_count(ifp) - 1) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
@ -2300,7 +2229,7 @@ xfs_bmap_add_extent_unwritten_real(
ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
ASSERT(*idx >= 0);
ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
ASSERT(*idx <= xfs_iext_count(ifp));
ASSERT(!isnullstartblock(new->br_startblock));
XFS_STATS_INC(mp, xs_add_exlist);
@ -2356,7 +2285,7 @@ xfs_bmap_add_extent_unwritten_real(
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
if (*idx < xfs_iext_count(&ip->i_df) - 1) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
if (isnullstartblock(RIGHT.br_startblock))
@ -2836,7 +2765,7 @@ xfs_bmap_add_extent_hole_delay(
* Check and set flags if the current (right) segment exists.
* If it doesn't exist, we're converting the hole at end-of-file.
*/
if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
if (*idx < xfs_iext_count(ifp)) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
@ -2966,7 +2895,7 @@ xfs_bmap_add_extent_hole_real(
ifp = XFS_IFORK_PTR(bma->ip, whichfork);
ASSERT(bma->idx >= 0);
ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
ASSERT(bma->idx <= xfs_iext_count(ifp));
ASSERT(!isnullstartblock(new->br_startblock));
ASSERT(!bma->cur ||
!(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
@ -2992,7 +2921,7 @@ xfs_bmap_add_extent_hole_real(
* Check and set flags if this segment has a current value.
* Not true if we're inserting into the "hole" at eof.
*/
if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
if (bma->idx < xfs_iext_count(ifp)) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
if (isnullstartblock(right.br_startblock))
@ -4145,12 +4074,11 @@ xfs_bmapi_read(
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp;
struct xfs_bmbt_irec got;
struct xfs_bmbt_irec prev;
xfs_fileoff_t obno;
xfs_fileoff_t end;
xfs_extnum_t lastx;
xfs_extnum_t idx;
int error;
int eof;
bool eof = false;
int n = 0;
int whichfork = xfs_bmapi_whichfork(flags);
@ -4190,7 +4118,8 @@ xfs_bmapi_read(
return error;
}
xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got))
eof = true;
end = bno + len;
obno = bno;
@ -4221,10 +4150,8 @@ xfs_bmapi_read(
break;
/* Else go on to the next record. */
if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
else
eof = 1;
if (!xfs_iext_get_extent(ifp, ++idx, &got))
eof = true;
}
*nmap = n;
return 0;
@ -4234,10 +4161,10 @@ int
xfs_bmapi_reserve_delalloc(
struct xfs_inode *ip,
int whichfork,
xfs_fileoff_t aoff,
xfs_fileoff_t off,
xfs_filblks_t len,
xfs_filblks_t prealloc,
struct xfs_bmbt_irec *got,
struct xfs_bmbt_irec *prev,
xfs_extnum_t *lastx,
int eof)
{
@ -4248,10 +4175,17 @@ xfs_bmapi_reserve_delalloc(
char rt = XFS_IS_REALTIME_INODE(ip);
xfs_extlen_t extsz;
int error;
xfs_fileoff_t aoff = off;
alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
/*
* Cap the alloc length. Keep track of prealloc so we know whether to
* tag the inode before we return.
*/
alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
if (!eof)
alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
if (prealloc && alen >= len)
prealloc = alen - len;
/* Figure out the extent size, adjust alen */
if (whichfork == XFS_COW_FORK)
@ -4259,7 +4193,12 @@ xfs_bmapi_reserve_delalloc(
else
extsz = xfs_get_extsz_hint(ip);
if (extsz) {
error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
struct xfs_bmbt_irec prev;
if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev))
prev.br_startoff = NULLFILEOFF;
error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof,
1, 0, &aoff, &alen);
ASSERT(!error);
}
@ -4312,6 +4251,16 @@ xfs_bmapi_reserve_delalloc(
*/
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
/*
* Tag the inode if blocks were preallocated. Note that COW fork
* preallocation can occur at the start or end of the extent, even when
* prealloc == 0, so we must also check the aligned offset and length.
*/
if (whichfork == XFS_DATA_FORK && prealloc)
xfs_inode_set_eofblocks_tag(ip);
if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
xfs_inode_set_cowblocks_tag(ip);
ASSERT(got->br_startoff <= aoff);
ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
ASSERT(isnullstartblock(got->br_startblock));
@ -4349,7 +4298,7 @@ xfs_bmapi_allocate(
if (bma->wasdel) {
bma->length = (xfs_extlen_t)bma->got.br_blockcount;
bma->offset = bma->got.br_startoff;
if (bma->idx != NULLEXTNUM && bma->idx) {
if (bma->idx) {
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
&bma->prev);
}
@ -4563,7 +4512,7 @@ xfs_bmapi_write(
struct xfs_ifork *ifp;
struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */
xfs_fileoff_t end; /* end of mapped file region */
int eof; /* after the end of extents */
bool eof = false; /* after the end of extents */
int error; /* error return */
int n; /* current extent index */
xfs_fileoff_t obno; /* old block number (offset) */
@ -4641,12 +4590,14 @@ xfs_bmapi_write(
goto error0;
}
xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got,
&bma.prev);
n = 0;
end = bno + len;
obno = bno;
if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.idx, &bma.got))
eof = true;
if (!xfs_iext_get_extent(ifp, bma.idx - 1, &bma.prev))
bma.prev.br_startoff = NULLFILEOFF;
bma.tp = tp;
bma.ip = ip;
bma.total = total;
@ -4733,11 +4684,8 @@ xfs_bmapi_write(
/* Else go on to the next record. */
bma.prev = bma.got;
if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
&bma.got);
} else
eof = 1;
if (!xfs_iext_get_extent(ifp, ++bma.idx, &bma.got))
eof = true;
}
*nmap = n;
@ -4885,7 +4833,7 @@ xfs_bmap_del_extent_delay(
da_new = 0;
ASSERT(*idx >= 0);
ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
ASSERT(*idx <= xfs_iext_count(ifp));
ASSERT(del->br_blockcount > 0);
ASSERT(got->br_startoff <= del->br_startoff);
ASSERT(got_endoff >= del_endoff);
@ -4902,8 +4850,11 @@ xfs_bmap_del_extent_delay(
* sb counters as we might have to borrow some blocks for the
* indirect block accounting.
*/
xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0,
error = xfs_trans_reserve_quota_nblks(NULL, ip,
-((long)del->br_blockcount), 0,
isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
if (error)
return error;
ip->i_delayed_blks -= del->br_blockcount;
if (whichfork == XFS_COW_FORK)
@ -5013,7 +4964,7 @@ xfs_bmap_del_extent_cow(
got_endoff = got->br_startoff + got->br_blockcount;
ASSERT(*idx >= 0);
ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
ASSERT(*idx <= xfs_iext_count(ifp));
ASSERT(del->br_blockcount > 0);
ASSERT(got->br_startoff <= del->br_startoff);
ASSERT(got_endoff >= del_endoff);
@ -5119,8 +5070,7 @@ xfs_bmap_del_extent(
state |= BMAP_COWFORK;
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
(uint)sizeof(xfs_bmbt_rec_t)));
ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp)));
ASSERT(del->br_blockcount > 0);
ep = xfs_iext_get_ext(ifp, *idx);
xfs_bmbt_get_all(ep, &got);
@ -5434,8 +5384,6 @@ __xfs_bunmapi(
{
xfs_btree_cur_t *cur; /* bmap btree cursor */
xfs_bmbt_irec_t del; /* extent being deleted */
int eof; /* is deleting at eof */
xfs_bmbt_rec_host_t *ep; /* extent record pointer */
int error; /* error return value */
xfs_extnum_t extno; /* extent number in list */
xfs_bmbt_irec_t got; /* current extent record */
@ -5445,8 +5393,6 @@ __xfs_bunmapi(
int logflags; /* transaction logging flags */
xfs_extlen_t mod; /* rt extent offset */
xfs_mount_t *mp; /* mount structure */
xfs_extnum_t nextents; /* number of file extents */
xfs_bmbt_irec_t prev; /* previous extent record */
xfs_fileoff_t start; /* first file offset deleted */
int tmp_logflags; /* partial logging flags */
int wasdel; /* was a delayed alloc extent */
@ -5477,8 +5423,7 @@ __xfs_bunmapi(
if (!(ifp->if_flags & XFS_IFEXTENTS) &&
(error = xfs_iread_extents(tp, ip, whichfork)))
return error;
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
if (nextents == 0) {
if (xfs_iext_count(ifp) == 0) {
*rlen = 0;
return 0;
}
@ -5486,18 +5431,17 @@ __xfs_bunmapi(
isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
start = bno;
bno = start + len - 1;
ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
&prev);
/*
* Check to see if the given block number is past the end of the
* file, back up to the last block if so...
*/
if (eof) {
ep = xfs_iext_get_ext(ifp, --lastx);
xfs_bmbt_get_all(ep, &got);
if (!xfs_iext_lookup_extent(ip, ifp, bno, &lastx, &got)) {
ASSERT(lastx > 0);
xfs_iext_get_extent(ifp, --lastx, &got);
bno = got.br_startoff + got.br_blockcount - 1;
}
logflags = 0;
if (ifp->if_flags & XFS_IFBROOT) {
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
@ -5528,8 +5472,7 @@ __xfs_bunmapi(
if (got.br_startoff > bno) {
if (--lastx < 0)
break;
ep = xfs_iext_get_ext(ifp, lastx);
xfs_bmbt_get_all(ep, &got);
xfs_iext_get_extent(ifp, lastx, &got);
}
/*
* Is the last block of this extent before the range
@ -5543,7 +5486,6 @@ __xfs_bunmapi(
* Then deal with the (possibly delayed) allocated space
* we found.
*/
ASSERT(ep != NULL);
del = got;
wasdel = isnullstartblock(del.br_startblock);
if (got.br_startoff < start) {
@ -5624,15 +5566,12 @@ __xfs_bunmapi(
*/
ASSERT(bno >= del.br_blockcount);
bno -= del.br_blockcount;
if (got.br_startoff > bno) {
if (--lastx >= 0) {
ep = xfs_iext_get_ext(ifp,
lastx);
xfs_bmbt_get_all(ep, &got);
}
}
if (got.br_startoff > bno && --lastx >= 0)
xfs_iext_get_extent(ifp, lastx, &got);
continue;
} else if (del.br_state == XFS_EXT_UNWRITTEN) {
struct xfs_bmbt_irec prev;
/*
* This one is already unwritten.
* It must have a written left neighbor.
@ -5640,8 +5579,7 @@ __xfs_bunmapi(
* try again.
*/
ASSERT(lastx > 0);
xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
lastx - 1), &prev);
xfs_iext_get_extent(ifp, lastx - 1, &prev);
ASSERT(prev.br_state == XFS_EXT_NORM);
ASSERT(!isnullstartblock(prev.br_startblock));
ASSERT(del.br_startblock ==
@ -5739,13 +5677,9 @@ __xfs_bunmapi(
*/
if (bno != (xfs_fileoff_t)-1 && bno >= start) {
if (lastx >= 0) {
ep = xfs_iext_get_ext(ifp, lastx);
if (xfs_bmbt_get_startoff(ep) > bno) {
if (--lastx >= 0)
ep = xfs_iext_get_ext(ifp,
lastx);
}
xfs_bmbt_get_all(ep, &got);
xfs_iext_get_extent(ifp, lastx, &got);
if (got.br_startoff > bno && --lastx >= 0)
xfs_iext_get_extent(ifp, lastx, &got);
}
extno++;
}
@ -5963,7 +5897,7 @@ xfs_bmse_shift_one(
mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
total_extents = xfs_iext_count(ifp);
xfs_bmbt_get_all(gotp, &got);
@ -6140,7 +6074,7 @@ xfs_bmap_shift_extents(
* are collapsing out, so we cannot use the count of real extents here.
* Instead we have to calculate it from the incore fork.
*/
total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
total_extents = xfs_iext_count(ifp);
if (total_extents == 0) {
*done = 1;
goto del_cursor;
@ -6200,7 +6134,7 @@ xfs_bmap_shift_extents(
* count can change. Update the total and grade the next record.
*/
if (direction == SHIFT_LEFT) {
total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
total_extents = xfs_iext_count(ifp);
stop_extent = total_extents;
}

View File

@ -237,14 +237,9 @@ int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
struct xfs_defer_ops *dfops, enum shift_direction direction,
int num_exts);
int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
struct xfs_bmbt_rec_host *
xfs_bmap_search_extents(struct xfs_inode *ip, xfs_fileoff_t bno,
int fork, int *eofp, xfs_extnum_t *lastxp,
struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp);
int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
xfs_fileoff_t aoff, xfs_filblks_t len,
struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *prev,
xfs_extnum_t *lastx, int eof);
xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof);
enum xfs_bmap_intent_type {
XFS_BMAP_MAP = 1,

View File

@ -796,13 +796,14 @@ xfs_bmbt_init_cursor(
struct xfs_btree_cur *cur;
ASSERT(whichfork != XFS_COW_FORK);
cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
cur->bc_tp = tp;
cur->bc_mp = mp;
cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
cur->bc_btnum = XFS_BTNUM_BMAP;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2);
cur->bc_ops = &xfs_bmbt_ops;
cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;

View File

@ -1769,8 +1769,28 @@ xfs_btree_lookup_get_block(
if (error)
return error;
/* Check the inode owner since the verifiers don't. */
if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) &&
(cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
cur->bc_private.b.ip->i_ino)
goto out_bad;
/* Did we get the level we were looking for? */
if (be16_to_cpu((*blkp)->bb_level) != level)
goto out_bad;
/* Check that internal nodes have at least one record. */
if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
goto out_bad;
xfs_btree_setbuf(cur, level, bp);
return 0;
out_bad:
*blkp = NULL;
xfs_trans_brelse(cur->bc_tp, bp);
return -EFSCORRUPTED;
}
/*

View File

@ -96,46 +96,10 @@ union xfs_btree_rec {
/*
* Generic stats interface
*/
#define __XFS_BTREE_STATS_INC(mp, type, stat) \
XFS_STATS_INC(mp, xs_ ## type ## _2_ ## stat)
#define XFS_BTREE_STATS_INC(cur, stat) \
do { \
struct xfs_mount *__mp = cur->bc_mp; \
switch (cur->bc_btnum) { \
case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(__mp, abtb, stat); break; \
case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(__mp, abtc, stat); break; \
case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \
case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(__mp, rmap, stat); break; \
case XFS_BTNUM_REFC: __XFS_BTREE_STATS_INC(__mp, refcbt, stat); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
#define __XFS_BTREE_STATS_ADD(mp, type, stat, val) \
XFS_STATS_ADD(mp, xs_ ## type ## _2_ ## stat, val)
#define XFS_BTREE_STATS_ADD(cur, stat, val) \
do { \
struct xfs_mount *__mp = cur->bc_mp; \
switch (cur->bc_btnum) { \
case XFS_BTNUM_BNO: \
__XFS_BTREE_STATS_ADD(__mp, abtb, stat, val); break; \
case XFS_BTNUM_CNT: \
__XFS_BTREE_STATS_ADD(__mp, abtc, stat, val); break; \
case XFS_BTNUM_BMAP: \
__XFS_BTREE_STATS_ADD(__mp, bmbt, stat, val); break; \
case XFS_BTNUM_INO: \
__XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \
case XFS_BTNUM_FINO: \
__XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \
case XFS_BTNUM_RMAP: \
__XFS_BTREE_STATS_ADD(__mp, rmap, stat, val); break; \
case XFS_BTNUM_REFC: \
__XFS_BTREE_STATS_ADD(__mp, refcbt, stat, val); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
XFS_STATS_INC_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat)
#define XFS_BTREE_STATS_ADD(cur, stat, val) \
XFS_STATS_ADD_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat, val)
#define XFS_BTREE_MAXLEVELS 9 /* max of all btrees */
@ -253,6 +217,7 @@ typedef struct xfs_btree_cur
__uint8_t bc_nlevels; /* number of levels in the tree */
__uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */
xfs_btnum_t bc_btnum; /* identifies which btree type */
int bc_statoff; /* offset of btre stats array */
union {
struct { /* needed for BNO, CNT, INO */
struct xfs_buf *agbp; /* agf/agi buffer pointer */

View File

@ -6,10 +6,11 @@
/*
* Calculate the intermediate checksum for a buffer that has the CRC field
* inside it. The offset of the 32bit crc fields is passed as the
* cksum_offset parameter.
* cksum_offset parameter. We do not modify the buffer during verification,
* hence we have to split the CRC calculation across the cksum_offset.
*/
static inline __uint32_t
xfs_start_cksum(char *buffer, size_t length, unsigned long cksum_offset)
xfs_start_cksum_safe(char *buffer, size_t length, unsigned long cksum_offset)
{
__uint32_t zero = 0;
__uint32_t crc;
@ -25,6 +26,20 @@ xfs_start_cksum(char *buffer, size_t length, unsigned long cksum_offset)
length - (cksum_offset + sizeof(__be32)));
}
/*
* Fast CRC method where the buffer is modified. Callers must have exclusive
* access to the buffer while the calculation takes place.
*/
static inline __uint32_t
xfs_start_cksum_update(char *buffer, size_t length, unsigned long cksum_offset)
{
/* zero the CRC field */
*(__le32 *)(buffer + cksum_offset) = 0;
/* single pass CRC calculation for the entire buffer */
return crc32c(XFS_CRC_SEED, buffer, length);
}
/*
* Convert the intermediate checksum to the final ondisk format.
*
@ -40,11 +55,14 @@ xfs_end_cksum(__uint32_t crc)
/*
* Helper to generate the checksum for a buffer.
*
* This modifies the buffer temporarily - callers must have exclusive
* access to the buffer while the calculation takes place.
*/
static inline void
xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset)
{
__uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset);
__uint32_t crc = xfs_start_cksum_update(buffer, length, cksum_offset);
*(__le32 *)(buffer + cksum_offset) = xfs_end_cksum(crc);
}
@ -55,7 +73,7 @@ xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset)
static inline int
xfs_verify_cksum(char *buffer, size_t length, unsigned long cksum_offset)
{
__uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset);
__uint32_t crc = xfs_start_cksum_safe(buffer, length, cksum_offset);
return *(__le32 *)(buffer + cksum_offset) == xfs_end_cksum(crc);
}

View File

@ -93,7 +93,7 @@ xfs_ascii_ci_compname(
return result;
}
static struct xfs_nameops xfs_ascii_ci_nameops = {
static const struct xfs_nameops xfs_ascii_ci_nameops = {
.hashname = xfs_ascii_ci_hashname,
.compname = xfs_ascii_ci_compname,
};

View File

@ -157,6 +157,9 @@ extern int xfs_dir2_isleaf(struct xfs_da_args *args, int *r);
extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
struct xfs_buf *bp);
extern void xfs_dir2_data_freescan_int(struct xfs_da_geometry *geo,
const struct xfs_dir_ops *ops,
struct xfs_dir2_data_hdr *hdr, int *loghead);
extern void xfs_dir2_data_freescan(struct xfs_inode *dp,
struct xfs_dir2_data_hdr *hdr, int *loghead);
extern void xfs_dir2_data_log_entry(struct xfs_da_args *args,
@ -177,6 +180,8 @@ extern struct xfs_dir2_data_free *xfs_dir2_data_freefind(
struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf,
struct xfs_dir2_data_unused *dup);
extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;
extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops;
extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops;

View File

@ -329,7 +329,7 @@ xfs_dir3_data_read(
err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
XFS_DATA_FORK, &xfs_dir3_data_buf_ops);
if (!err && tp)
if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
return err;
}
@ -505,8 +505,9 @@ xfs_dir2_data_freeremove(
* Given a data block, reconstruct its bestfree map.
*/
void
xfs_dir2_data_freescan(
struct xfs_inode *dp,
xfs_dir2_data_freescan_int(
struct xfs_da_geometry *geo,
const struct xfs_dir_ops *ops,
struct xfs_dir2_data_hdr *hdr,
int *loghead)
{
@ -516,7 +517,6 @@ xfs_dir2_data_freescan(
struct xfs_dir2_data_free *bf;
char *endp; /* end of block's data */
char *p; /* current entry pointer */
struct xfs_da_geometry *geo = dp->i_mount->m_dir_geo;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
@ -526,13 +526,13 @@ xfs_dir2_data_freescan(
/*
* Start by clearing the table.
*/
bf = dp->d_ops->data_bestfree_p(hdr);
bf = ops->data_bestfree_p(hdr);
memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT);
*loghead = 1;
/*
* Set up pointers.
*/
p = (char *)dp->d_ops->data_entry_p(hdr);
p = (char *)ops->data_entry_p(hdr);
if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
btp = xfs_dir2_block_tail_p(geo, hdr);
@ -559,12 +559,22 @@ xfs_dir2_data_freescan(
else {
dep = (xfs_dir2_data_entry_t *)p;
ASSERT((char *)dep - (char *)hdr ==
be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep)));
p += dp->d_ops->data_entsize(dep->namelen);
be16_to_cpu(*ops->data_entry_tag_p(dep)));
p += ops->data_entsize(dep->namelen);
}
}
}
void
xfs_dir2_data_freescan(
struct xfs_inode *dp,
struct xfs_dir2_data_hdr *hdr,
int *loghead)
{
return xfs_dir2_data_freescan_int(dp->i_mount->m_dir_geo, dp->d_ops,
hdr, loghead);
}
/*
* Initialize a data block at the given block number in the directory.
* Give back the buffer for the created block.

View File

@ -21,7 +21,6 @@
struct dir_context;
/* xfs_dir2.c */
extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
xfs_dir2_db_t *dbp);
extern int xfs_dir_cilookup_result(struct xfs_da_args *args,

View File

@ -2344,7 +2344,8 @@ xfs_imap(
imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
imap->im_len = XFS_FSB_TO_BB(mp, 1);
imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
imap->im_boffset = (unsigned short)(offset <<
mp->m_sb.sb_inodelog);
return 0;
}
@ -2372,7 +2373,7 @@ xfs_imap(
imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog);
/*
* If the inode number maps to a block outside the bounds
@ -2450,8 +2451,6 @@ xfs_ialloc_log_agi(
ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
#endif
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
/*
* Compute byte offsets for the first and last fields in the first
* region and log the agi buffer. This only logs up through
@ -2512,8 +2511,15 @@ xfs_agi_verify(
if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
return false;
if (be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS)
if (be32_to_cpu(agi->agi_level) < 1 ||
be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS)
return false;
if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
(be32_to_cpu(agi->agi_free_level) < 1 ||
be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS))
return false;
/*
* during growfs operations, the perag is not fully initialised,
* so we can't use it for any useful checking. growfs ensures we can't
@ -2592,6 +2598,8 @@ xfs_read_agi(
XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops);
if (error)
return error;
if (tp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_AGI_BUF);
xfs_buf_set_ref(*bpp, XFS_AGI_REF);
return 0;

View File

@ -357,7 +357,7 @@ xfs_inobt_init_cursor(
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
struct xfs_btree_cur *cur;
cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
cur->bc_tp = tp;
cur->bc_mp = mp;
@ -365,9 +365,11 @@ xfs_inobt_init_cursor(
if (btnum == XFS_BTNUM_INO) {
cur->bc_nlevels = be32_to_cpu(agi->agi_level);
cur->bc_ops = &xfs_inobt_ops;
cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_ibt_2);
} else {
cur->bc_nlevels = be32_to_cpu(agi->agi_free_level);
cur->bc_ops = &xfs_finobt_ops;
cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_fibt_2);
}
cur->bc_blocklog = mp->m_sb.sb_blocklog;

View File

@ -383,7 +383,7 @@ xfs_log_dinode_to_disk(
static bool
xfs_dinode_verify(
struct xfs_mount *mp,
struct xfs_inode *ip,
xfs_ino_t ino,
struct xfs_dinode *dip)
{
uint16_t flags;
@ -392,6 +392,14 @@ xfs_dinode_verify(
if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
return false;
/* don't allow invalid i_size */
if (be64_to_cpu(dip->di_size) & (1ULL << 63))
return false;
/* No zero-length symlinks. */
if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0)
return false;
/* only version 3 or greater inodes are extensively verified here */
if (dip->di_version < 3)
return true;
@ -401,7 +409,7 @@ xfs_dinode_verify(
if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
XFS_DINODE_CRC_OFF))
return false;
if (be64_to_cpu(dip->di_ino) != ip->i_ino)
if (be64_to_cpu(dip->di_ino) != ino)
return false;
if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
return false;
@ -436,7 +444,7 @@ xfs_dinode_calc_crc(
return;
ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
XFS_DINODE_CRC_OFF);
dip->di_crc = xfs_end_cksum(crc);
}
@ -493,7 +501,7 @@ xfs_iread(
return error;
/* even unallocated inodes are verified */
if (!xfs_dinode_verify(mp, ip, dip)) {
if (!xfs_dinode_verify(mp, ip->i_ino, dip)) {
xfs_alert(mp, "%s: validation failed for inode %lld failed",
__func__, ip->i_ino);

View File

@ -58,8 +58,8 @@ struct xfs_icdinode {
*/
struct xfs_imap {
xfs_daddr_t im_blkno; /* starting BB of inode chunk */
ushort im_len; /* length in BBs of inode chunk */
ushort im_boffset; /* inode offset in block in bytes */
unsigned short im_len; /* length in BBs of inode chunk */
unsigned short im_boffset; /* inode offset in block in bytes */
};
int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,

View File

@ -775,6 +775,13 @@ xfs_idestroy_fork(
}
}
/* Count number of incore extents based on if_bytes */
xfs_extnum_t
xfs_iext_count(struct xfs_ifork *ifp)
{
return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
}
/*
* Convert in-core extents to on-disk form
*
@ -803,7 +810,7 @@ xfs_iextents_copy(
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(ifp->if_bytes > 0);
nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
nrecs = xfs_iext_count(ifp);
XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
ASSERT(nrecs > 0);
@ -941,7 +948,7 @@ xfs_iext_get_ext(
xfs_extnum_t idx) /* index of target extent */
{
ASSERT(idx >= 0);
ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
ASSERT(idx < xfs_iext_count(ifp));
if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
return ifp->if_u1.if_ext_irec->er_extbuf;
@ -1017,7 +1024,7 @@ xfs_iext_add(
int new_size; /* size of extents after adding */
xfs_extnum_t nextents; /* number of extents in file */
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
nextents = xfs_iext_count(ifp);
ASSERT((idx >= 0) && (idx <= nextents));
byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
new_size = ifp->if_bytes + byte_diff;
@ -1241,7 +1248,7 @@ xfs_iext_remove(
trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
ASSERT(ext_diff > 0);
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
nextents = xfs_iext_count(ifp);
new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
if (new_size == 0) {
@ -1270,7 +1277,7 @@ xfs_iext_remove_inline(
ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
ASSERT(idx < XFS_INLINE_EXTS);
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
nextents = xfs_iext_count(ifp);
ASSERT(((nextents - ext_diff) > 0) &&
(nextents - ext_diff) < XFS_INLINE_EXTS);
@ -1309,7 +1316,7 @@ xfs_iext_remove_direct(
ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
new_size = ifp->if_bytes -
(ext_diff * sizeof(xfs_bmbt_rec_t));
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
nextents = xfs_iext_count(ifp);
if (new_size == 0) {
xfs_iext_destroy(ifp);
@ -1546,7 +1553,7 @@ xfs_iext_indirect_to_direct(
int size; /* size of file extents */
ASSERT(ifp->if_flags & XFS_IFEXTIREC);
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
nextents = xfs_iext_count(ifp);
ASSERT(nextents <= XFS_LINEAR_EXTS);
size = nextents * sizeof(xfs_bmbt_rec_t);
@ -1620,7 +1627,7 @@ xfs_iext_bno_to_ext(
xfs_extnum_t nextents; /* number of file extents */
xfs_fileoff_t startoff = 0; /* start offset of extent */
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
nextents = xfs_iext_count(ifp);
if (nextents == 0) {
*idxp = 0;
return NULL;
@ -1733,8 +1740,8 @@ xfs_iext_idx_to_irec(
ASSERT(ifp->if_flags & XFS_IFEXTIREC);
ASSERT(page_idx >= 0);
ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
ASSERT(page_idx <= xfs_iext_count(ifp));
ASSERT(page_idx < xfs_iext_count(ifp) || realloc);
nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
erp_idx = 0;
@ -1782,7 +1789,7 @@ xfs_iext_irec_init(
xfs_extnum_t nextents; /* number of extents in file */
ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
nextents = xfs_iext_count(ifp);
ASSERT(nextents <= XFS_LINEAR_EXTS);
erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
@ -1906,7 +1913,7 @@ xfs_iext_irec_compact(
ASSERT(ifp->if_flags & XFS_IFEXTIREC);
nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
nextents = xfs_iext_count(ifp);
if (nextents == 0) {
xfs_iext_destroy(ifp);
@ -1996,3 +2003,49 @@ xfs_ifork_init_cow(
ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
ip->i_cnextents = 0;
}
/*
* Lookup the extent covering bno.
*
* If there is an extent covering bno return the extent index, and store the
* expanded extent structure in *gotp, and the extent index in *idx.
* If there is no extent covering bno, but there is an extent after it (e.g.
* it lies in a hole) return that extent in *gotp and its index in *idx
* instead.
* If bno is beyond the last extent return false, and return the index after
* the last valid index in *idxp.
*/
bool
xfs_iext_lookup_extent(
struct xfs_inode *ip,
struct xfs_ifork *ifp,
xfs_fileoff_t bno,
xfs_extnum_t *idxp,
struct xfs_bmbt_irec *gotp)
{
struct xfs_bmbt_rec_host *ep;
XFS_STATS_INC(ip->i_mount, xs_look_exlist);
ep = xfs_iext_bno_to_ext(ifp, bno, idxp);
if (!ep)
return false;
xfs_bmbt_get_all(ep, gotp);
return true;
}
/*
* Return true if there is an extent at index idx, and return the expanded
* extent structure at idx in that case. Else return false.
*/
bool
xfs_iext_get_extent(
struct xfs_ifork *ifp,
xfs_extnum_t idx,
struct xfs_bmbt_irec *gotp)
{
if (idx < 0 || idx >= xfs_iext_count(ifp))
return false;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp);
return true;
}

View File

@ -152,6 +152,7 @@ void xfs_init_local_fork(struct xfs_inode *, int, const void *, int);
struct xfs_bmbt_rec_host *
xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t);
xfs_extnum_t xfs_iext_count(struct xfs_ifork *);
void xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t,
struct xfs_bmbt_irec *, int);
void xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int);
@ -181,6 +182,12 @@ void xfs_iext_irec_compact_pages(struct xfs_ifork *);
void xfs_iext_irec_compact_full(struct xfs_ifork *);
void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int);
bool xfs_iext_lookup_extent(struct xfs_inode *ip,
struct xfs_ifork *ifp, xfs_fileoff_t bno,
xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp);
bool xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx,
struct xfs_bmbt_irec *gotp);
extern struct kmem_zone *xfs_ifork_zone;
extern void xfs_ifork_init_cow(struct xfs_inode *ip);

View File

@ -481,8 +481,8 @@ static inline uint xfs_log_dinode_size(int version)
typedef struct xfs_buf_log_format {
unsigned short blf_type; /* buf log item type indicator */
unsigned short blf_size; /* size of this item */
ushort blf_flags; /* misc state */
ushort blf_len; /* number of blocks in this buf */
unsigned short blf_flags; /* misc state */
unsigned short blf_len; /* number of blocks in this buf */
__int64_t blf_blkno; /* starting blkno of this buf */
unsigned int blf_map_size; /* used size of data bitmap in words */
unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */

View File

@ -52,7 +52,7 @@ typedef struct xlog_recover {
struct list_head r_itemq; /* q for items */
} xlog_recover_t;
#define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr)
#define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].i_addr)
/*
* This is the number of entries in the l_buf_cancel_table used during

View File

@ -354,6 +354,7 @@ xfs_refcountbt_init_cursor(
cur->bc_btnum = XFS_BTNUM_REFC;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
cur->bc_ops = &xfs_refcountbt_ops;
cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2);
cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level);

View File

@ -484,6 +484,7 @@ xfs_rmapbt_init_cursor(
cur->bc_blocklog = mp->m_sb.sb_blocklog;
cur->bc_ops = &xfs_rmapbt_ops;
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
cur->bc_private.a.agbp = agbp;
cur->bc_private.a.agno = agno;

View File

@ -1016,4 +1016,3 @@ xfs_rtfree_extent(
}
return 0;
}

View File

@ -262,6 +262,12 @@ xfs_mount_validate_sb(
return -EFSCORRUPTED;
}
if (xfs_sb_version_hascrc(&mp->m_sb) &&
sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) {
xfs_notice(mp, "v5 SB sanity check failed");
return -EFSCORRUPTED;
}
/*
* Until this is fixed only page-sized or smaller data blocks work.
*/
@ -338,13 +344,16 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp)
XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD;
sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD);
if (sbp->sb_qflags & XFS_PQUOTA_ACCT) {
if (sbp->sb_qflags & XFS_PQUOTA_ACCT &&
sbp->sb_gquotino != NULLFSINO) {
/*
* In older version of superblock, on-disk superblock only
* has sb_gquotino, and in-core superblock has both sb_gquotino
* and sb_pquotino. But, only one of them is supported at any
* point of time. So, if PQUOTA is set in disk superblock,
* copy over sb_gquotino to sb_pquotino.
* copy over sb_gquotino to sb_pquotino. The NULLFSINO test
* above is to make sure we don't do this twice and wipe them
* both out!
*/
sbp->sb_pquotino = sbp->sb_gquotino;
sbp->sb_gquotino = NULLFSINO;

View File

@ -57,7 +57,6 @@ typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
#define NULLAGBLOCK ((xfs_agblock_t)-1)
#define NULLAGNUMBER ((xfs_agnumber_t)-1)
#define NULLEXTNUM ((xfs_extnum_t)-1)
#define NULLCOMMITLSN ((xfs_lsn_t)-1)
@ -75,11 +74,14 @@ typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
* Minimum and maximum blocksize and sectorsize.
* The blocksize upper limit is pretty much arbitrary.
* The sectorsize upper limit is due to sizeof(sb_sectsize).
* CRC enable filesystems use 512 byte inodes, meaning 512 byte block sizes
* cannot be used.
*/
#define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */
#define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */
#define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG)
#define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG)
#define XFS_MIN_CRC_BLOCKSIZE (1 << (XFS_MIN_BLOCKSIZE_LOG + 1))
#define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */
#define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */
#define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG)

View File

@ -37,11 +37,6 @@
#include <linux/pagevec.h>
#include <linux/writeback.h>
/* flags for direct write completions */
#define XFS_DIO_FLAG_UNWRITTEN (1 << 0)
#define XFS_DIO_FLAG_APPEND (1 << 1)
#define XFS_DIO_FLAG_COW (1 << 2)
/*
* structure owned by writepages passed to individual writepage calls
*/
@ -776,7 +771,7 @@ xfs_map_cow(
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_bmbt_irec imap;
bool is_cow = false, need_alloc = false;
bool is_cow = false;
int error;
/*
@ -794,7 +789,7 @@ xfs_map_cow(
* Else we need to check if there is a COW mapping at this offset.
*/
xfs_ilock(ip, XFS_ILOCK_SHARED);
is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc);
is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (!is_cow)
@ -804,7 +799,7 @@ xfs_map_cow(
* And if the COW mapping has a delayed extent here we need to
* allocate real space for it now.
*/
if (need_alloc) {
if (isnullstartblock(imap.br_startblock)) {
error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
&imap);
if (error)
@ -1174,45 +1169,6 @@ xfs_vm_releasepage(
return try_to_free_buffers(page);
}
/*
* When we map a DIO buffer, we may need to pass flags to
* xfs_end_io_direct_write to tell it what kind of write IO we are doing.
*
* Note that for DIO, an IO to the highest supported file block offset (i.e.
* 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
* bit variable. Hence if we see this overflow, we have to assume that the IO is
* extending the file size. We won't know for sure until IO completion is run
* and the actual max write offset is communicated to the IO completion
* routine.
*/
static void
xfs_map_direct(
struct inode *inode,
struct buffer_head *bh_result,
struct xfs_bmbt_irec *imap,
xfs_off_t offset,
bool is_cow)
{
uintptr_t *flags = (uintptr_t *)&bh_result->b_private;
xfs_off_t size = bh_result->b_size;
trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size,
ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : is_cow ? XFS_IO_COW :
XFS_IO_OVERWRITE, imap);
if (ISUNWRITTEN(imap)) {
*flags |= XFS_DIO_FLAG_UNWRITTEN;
set_buffer_defer_completion(bh_result);
} else if (is_cow) {
*flags |= XFS_DIO_FLAG_COW;
set_buffer_defer_completion(bh_result);
}
if (offset + size > i_size_read(inode) || offset + size < 0) {
*flags |= XFS_DIO_FLAG_APPEND;
set_buffer_defer_completion(bh_result);
}
}
/*
* If this is O_DIRECT or the mpage code calling tell them how large the mapping
* is, so that we can avoid repeated get_blocks calls.
@ -1253,51 +1209,12 @@ xfs_map_trim_size(
bh_result->b_size = mapping_size;
}
/* Bounce unaligned directio writes to the page cache. */
static int
xfs_bounce_unaligned_dio_write(
struct xfs_inode *ip,
xfs_fileoff_t offset_fsb,
struct xfs_bmbt_irec *imap)
{
struct xfs_bmbt_irec irec;
xfs_fileoff_t delta;
bool shared;
bool x;
int error;
irec = *imap;
if (offset_fsb > irec.br_startoff) {
delta = offset_fsb - irec.br_startoff;
irec.br_blockcount -= delta;
irec.br_startblock += delta;
irec.br_startoff = offset_fsb;
}
error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x);
if (error)
return error;
/*
* We're here because we're trying to do a directio write to a
* region that isn't aligned to a filesystem block. If any part
* of the extent is shared, fall back to buffered mode to handle
* the RMW. This is done by returning -EREMCHG ("remote addr
* changed"), which is caught further up the call stack.
*/
if (shared) {
trace_xfs_reflink_bounce_dio_write(ip, imap);
return -EREMCHG;
}
return 0;
}
STATIC int
__xfs_get_blocks(
xfs_get_blocks(
struct inode *inode,
sector_t iblock,
struct buffer_head *bh_result,
int create,
bool direct)
int create)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
@ -1308,11 +1225,8 @@ __xfs_get_blocks(
int nimaps = 1;
xfs_off_t offset;
ssize_t size;
int new = 0;
bool is_cow = false;
bool need_alloc = false;
BUG_ON(create && !direct);
BUG_ON(create);
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
@ -1321,7 +1235,7 @@ __xfs_get_blocks(
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
size = bh_result->b_size;
if (!create && offset >= i_size_read(inode))
if (offset >= i_size_read(inode))
return 0;
/*
@ -1336,52 +1250,12 @@ __xfs_get_blocks(
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
if (create && direct && xfs_is_reflink_inode(ip))
is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap,
&need_alloc);
if (!is_cow) {
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
&imap, &nimaps, XFS_BMAPI_ENTIRE);
/*
* Truncate an overwrite extent if there's a pending CoW
* reservation before the end of this extent. This
* forces us to come back to get_blocks to take care of
* the CoW.
*/
if (create && direct && nimaps &&
imap.br_startblock != HOLESTARTBLOCK &&
imap.br_startblock != DELAYSTARTBLOCK &&
!ISUNWRITTEN(&imap))
xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb,
&imap);
}
ASSERT(!need_alloc);
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
&imap, &nimaps, XFS_BMAPI_ENTIRE);
if (error)
goto out_unlock;
/* for DAX, we convert unwritten extents directly */
if (create &&
(!nimaps ||
(imap.br_startblock == HOLESTARTBLOCK ||
imap.br_startblock == DELAYSTARTBLOCK) ||
(IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
/*
* xfs_iomap_write_direct() expects the shared lock. It
* is unlocked on return.
*/
if (lockmode == XFS_ILOCK_EXCL)
xfs_ilock_demote(ip, lockmode);
error = xfs_iomap_write_direct(ip, offset, size,
&imap, nimaps);
if (error)
return error;
new = 1;
trace_xfs_get_blocks_alloc(ip, offset, size,
ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
: XFS_IO_DELALLOC, &imap);
} else if (nimaps) {
if (nimaps) {
trace_xfs_get_blocks_found(ip, offset, size,
ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
: XFS_IO_OVERWRITE, &imap);
@ -1391,12 +1265,6 @@ __xfs_get_blocks(
goto out_unlock;
}
if (IS_DAX(inode) && create) {
ASSERT(!ISUNWRITTEN(&imap));
/* zeroing is not needed at a higher layer */
new = 0;
}
/* trim mapping down to size requested */
xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
@ -1406,45 +1274,14 @@ __xfs_get_blocks(
*/
if (imap.br_startblock != HOLESTARTBLOCK &&
imap.br_startblock != DELAYSTARTBLOCK &&
(create || !ISUNWRITTEN(&imap))) {
if (create && direct && !is_cow) {
error = xfs_bounce_unaligned_dio_write(ip, offset_fsb,
&imap);
if (error)
return error;
}
!ISUNWRITTEN(&imap))
xfs_map_buffer(inode, bh_result, &imap, offset);
if (ISUNWRITTEN(&imap))
set_buffer_unwritten(bh_result);
/* direct IO needs special help */
if (create)
xfs_map_direct(inode, bh_result, &imap, offset, is_cow);
}
/*
* If this is a realtime file, data may be on a different device.
* to that pointed to from the buffer_head b_bdev currently.
*/
bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
/*
* If we previously allocated a block out beyond eof and we are now
* coming back to use it then we will need to flag it as new even if it
* has a disk address.
*
* With sub-block writes into unwritten extents we also need to mark
* the buffer as new so that the unwritten parts of the buffer gets
* correctly zeroed.
*/
if (create &&
((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
(offset >= i_size_read(inode)) ||
(new || ISUNWRITTEN(&imap))))
set_buffer_new(bh_result);
BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
return 0;
out_unlock:
@ -1452,100 +1289,6 @@ __xfs_get_blocks(
return error;
}
int
xfs_get_blocks(
struct inode *inode,
sector_t iblock,
struct buffer_head *bh_result,
int create)
{
return __xfs_get_blocks(inode, iblock, bh_result, create, false);
}
int
xfs_get_blocks_direct(
struct inode *inode,
sector_t iblock,
struct buffer_head *bh_result,
int create)
{
return __xfs_get_blocks(inode, iblock, bh_result, create, true);
}
/*
* Complete a direct I/O write request.
*
* xfs_map_direct passes us some flags in the private data to tell us what to
* do. If no flags are set, then the write IO is an overwrite wholly within
* the existing allocated file size and so there is nothing for us to do.
*
* Note that in this case the completion can be called in interrupt context,
* whereas if we have flags set we will always be called in task context
* (i.e. from a workqueue).
*/
int
xfs_end_io_direct_write(
struct kiocb *iocb,
loff_t offset,
ssize_t size,
void *private)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct xfs_inode *ip = XFS_I(inode);
uintptr_t flags = (uintptr_t)private;
int error = 0;
trace_xfs_end_io_direct_write(ip, offset, size);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
if (size <= 0)
return size;
/*
* The flags tell us whether we are doing unwritten extent conversions
* or an append transaction that updates the on-disk file size. These
* cases are the only cases where we should *potentially* be needing
* to update the VFS inode size.
*/
if (flags == 0) {
ASSERT(offset + size <= i_size_read(inode));
return 0;
}
/*
* We need to update the in-core inode size here so that we don't end up
* with the on-disk inode size being outside the in-core inode size. We
* have no other method of updating EOF for AIO, so always do it here
* if necessary.
*
* We need to lock the test/set EOF update as we can be racing with
* other IO completions here to update the EOF. Failing to serialise
* here can result in EOF moving backwards and Bad Things Happen when
* that occurs.
*/
spin_lock(&ip->i_flags_lock);
if (offset + size > i_size_read(inode))
i_size_write(inode, offset + size);
spin_unlock(&ip->i_flags_lock);
if (flags & XFS_DIO_FLAG_COW)
error = xfs_reflink_end_cow(ip, offset, size);
if (flags & XFS_DIO_FLAG_UNWRITTEN) {
trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
error = xfs_iomap_write_unwritten(ip, offset, size);
}
if (flags & XFS_DIO_FLAG_APPEND) {
trace_xfs_end_io_direct_write_append(ip, offset, size);
error = xfs_setfilesize(ip, offset, size);
}
return error;
}
STATIC ssize_t
xfs_vm_direct_IO(
struct kiocb *iocb,
@ -1566,7 +1309,6 @@ xfs_vm_bmap(
struct xfs_inode *ip = XFS_I(inode);
trace_xfs_vm_bmap(XFS_I(inode));
xfs_ilock(ip, XFS_IOLOCK_SHARED);
/*
* The swap code (ab-)uses ->bmap to get a block mapping and then
@ -1574,12 +1316,10 @@ xfs_vm_bmap(
* that on reflinks inodes, so we have to skip out here. And yes,
* 0 is the magic code for a bmap error..
*/
if (xfs_is_reflink_inode(ip)) {
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
if (xfs_is_reflink_inode(ip))
return 0;
}
filemap_write_and_wait(mapping);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return generic_block_bmap(mapping, block, xfs_get_blocks);
}

View File

@ -55,12 +55,6 @@ struct xfs_ioend {
extern const struct address_space_operations xfs_address_space_operations;
int xfs_get_blocks(struct inode *inode, sector_t offset,
struct buffer_head *map_bh, int create);
int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
struct buffer_head *map_bh, int create);
int xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private);
int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
extern void xfs_count_page_state(struct page *, int *, int *);

View File

@ -112,8 +112,8 @@ typedef struct attrlist_cursor_kern {
*========================================================================*/
/* Return 0 on success, or -errno; other state communicated via *context */
typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
/* void; state communicated via *context */
typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int,
unsigned char *, int, int);
typedef struct xfs_attr_list_context {

View File

@ -74,7 +74,6 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
xfs_attr_sf_entry_t *sfe;
xfs_inode_t *dp;
int sbsize, nsbuf, count, i;
int error;
ASSERT(context != NULL);
dp = context->dp;
@ -102,13 +101,11 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
(XFS_ISRESET_CURSOR(cursor) &&
(dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
error = context->put_listent(context,
sfe->flags,
sfe->nameval,
(int)sfe->namelen,
(int)sfe->valuelen);
if (error)
return error;
context->put_listent(context,
sfe->flags,
sfe->nameval,
(int)sfe->namelen,
(int)sfe->valuelen);
/*
* Either search callback finished early or
* didn't fit it all in the buffer after all.
@ -193,15 +190,11 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
cursor->hashval = sbp->hash;
cursor->offset = 0;
}
error = context->put_listent(context,
sbp->flags,
sbp->name,
sbp->namelen,
sbp->valuelen);
if (error) {
kmem_free(sbuf);
return error;
}
context->put_listent(context,
sbp->flags,
sbp->name,
sbp->namelen,
sbp->valuelen);
if (context->seen_enough)
break;
cursor->offset++;
@ -335,11 +328,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
*/
for (;;) {
leaf = bp->b_addr;
error = xfs_attr3_leaf_list_int(bp, context);
if (error) {
xfs_trans_brelse(NULL, bp);
return error;
}
xfs_attr3_leaf_list_int(bp, context);
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
if (context->seen_enough || leafhdr.forw == 0)
break;
@ -356,7 +345,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
/*
* Copy out attribute list entries for attr_list(), for leaf attribute lists.
*/
int
void
xfs_attr3_leaf_list_int(
struct xfs_buf *bp,
struct xfs_attr_list_context *context)
@ -366,7 +355,6 @@ xfs_attr3_leaf_list_int(
struct xfs_attr3_icleaf_hdr ichdr;
struct xfs_attr_leaf_entry *entries;
struct xfs_attr_leaf_entry *entry;
int retval;
int i;
struct xfs_mount *mp = context->dp->i_mount;
@ -399,7 +387,7 @@ xfs_attr3_leaf_list_int(
}
if (i == ichdr.count) {
trace_xfs_attr_list_notfound(context);
return 0;
return;
}
} else {
entry = &entries[0];
@ -410,7 +398,6 @@ xfs_attr3_leaf_list_int(
/*
* We have found our place, start copying out the new attributes.
*/
retval = 0;
for (; i < ichdr.count; entry++, i++) {
char *name;
int namelen, valuelen;
@ -439,16 +426,14 @@ xfs_attr3_leaf_list_int(
valuelen = be32_to_cpu(name_rmt->valuelen);
}
retval = context->put_listent(context, entry->flags,
context->put_listent(context, entry->flags,
name, namelen, valuelen);
if (retval)
break;
if (context->seen_enough)
break;
cursor->offset++;
}
trace_xfs_attr_list_leaf_end(context);
return retval;
return;
}
/*
@ -467,9 +452,9 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
if (error)
return error;
error = xfs_attr3_leaf_list_int(bp, context);
xfs_attr3_leaf_list_int(bp, context);
xfs_trans_brelse(NULL, bp);
return error;
return 0;
}
int
@ -513,7 +498,7 @@ xfs_attr_list_int(
* Take care to check values and protect against them changing later,
* we may be reading them directly out of a user buffer.
*/
STATIC int
STATIC void
xfs_attr_put_listent(
xfs_attr_list_context_t *context,
int flags,
@ -536,10 +521,10 @@ xfs_attr_put_listent(
*/
if (((context->flags & ATTR_SECURE) == 0) !=
((flags & XFS_ATTR_SECURE) == 0))
return 0;
return;
if (((context->flags & ATTR_ROOT) == 0) !=
((flags & XFS_ATTR_ROOT) == 0))
return 0;
return;
arraytop = sizeof(*alist) +
context->count * sizeof(alist->al_offset[0]);
@ -548,7 +533,7 @@ xfs_attr_put_listent(
trace_xfs_attr_list_full(context);
alist->al_more = 1;
context->seen_enough = 1;
return 0;
return;
}
aep = (attrlist_ent_t *)&context->alist[context->firstu];
@ -558,7 +543,7 @@ xfs_attr_put_listent(
alist->al_offset[context->count++] = context->firstu;
alist->al_count = context->count;
trace_xfs_attr_list_add(context);
return 0;
return;
}
/*

View File

@ -359,9 +359,7 @@ xfs_bmap_count_blocks(
mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
xfs_bmap_count_leaves(ifp, 0,
ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
count);
xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count);
return 0;
}
@ -426,7 +424,7 @@ xfs_getbmapx_fix_eof_hole(
ifp = XFS_IFORK_PTR(ip, whichfork);
if (!moretocome &&
xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
(lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
(lastx == xfs_iext_count(ifp) - 1))
out->bmv_oflags |= BMV_OF_LAST;
}
@ -1792,6 +1790,7 @@ xfs_swap_extent_forks(
struct xfs_ifork tempifp, *ifp, *tifp;
int aforkblks = 0;
int taforkblks = 0;
xfs_extnum_t nextents;
__uint64_t tmp;
int error;
@ -1877,14 +1876,13 @@ xfs_swap_extent_forks(
switch (ip->i_d.di_format) {
case XFS_DINODE_FMT_EXTENTS:
/* If the extents fit in the inode, fix the
* pointer. Otherwise it's already NULL or
* pointing to the extent.
/*
* If the extents fit in the inode, fix the pointer. Otherwise
* it's already NULL or pointing to the extent.
*/
if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) {
ifp->if_u1.if_extents =
ifp->if_u2.if_inline_ext;
}
nextents = xfs_iext_count(&ip->i_df);
if (nextents <= XFS_INLINE_EXTS)
ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
(*src_log_flags) |= XFS_ILOG_DEXT;
break;
case XFS_DINODE_FMT_BTREE:
@ -1896,14 +1894,13 @@ xfs_swap_extent_forks(
switch (tip->i_d.di_format) {
case XFS_DINODE_FMT_EXTENTS:
/* If the extents fit in the inode, fix the
* pointer. Otherwise it's already NULL or
* pointing to the extent.
/*
* If the extents fit in the inode, fix the pointer. Otherwise
* it's already NULL or pointing to the extent.
*/
if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) {
tifp->if_u1.if_extents =
tifp->if_u2.if_inline_ext;
}
nextents = xfs_iext_count(&tip->i_df);
if (nextents <= XFS_INLINE_EXTS)
tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext;
(*target_log_flags) |= XFS_ILOG_DEXT;
break;
case XFS_DINODE_FMT_BTREE:
@ -1938,8 +1935,8 @@ xfs_swap_extents(
* page cache safely. Once we have done this we can take the ilocks and
* do the rest of the checks.
*/
lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
lock_two_nondirectories(VFS_I(ip), VFS_I(tip));
lock_flags = XFS_MMAPLOCK_EXCL;
xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
/* Verify that both files have the same format */
@ -2079,15 +2076,13 @@ xfs_swap_extents(
trace_xfs_swap_extent_after(ip, 0);
trace_xfs_swap_extent_after(tip, 1);
out_unlock:
xfs_iunlock(ip, lock_flags);
xfs_iunlock(tip, lock_flags);
unlock_two_nondirectories(VFS_I(ip), VFS_I(tip));
return error;
out_trans_cancel:
xfs_trans_cancel(tp);
out_unlock:
xfs_iunlock(ip, lock_flags);
xfs_iunlock(tip, lock_flags);
return error;
goto out_unlock;
}

View File

@ -219,7 +219,6 @@ _xfs_buf_alloc(
init_completion(&bp->b_iowait);
INIT_LIST_HEAD(&bp->b_lru);
INIT_LIST_HEAD(&bp->b_list);
RB_CLEAR_NODE(&bp->b_rbnode);
sema_init(&bp->b_sema, 0); /* held, no waiters */
spin_lock_init(&bp->b_lock);
XB_SET_OWNER(bp);
@ -473,6 +472,62 @@ _xfs_buf_map_pages(
/*
* Finding and Reading Buffers
*/
static int
_xfs_buf_obj_cmp(
struct rhashtable_compare_arg *arg,
const void *obj)
{
const struct xfs_buf_map *map = arg->key;
const struct xfs_buf *bp = obj;
/*
* The key hashing in the lookup path depends on the key being the
* first element of the compare_arg, make sure to assert this.
*/
BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
if (bp->b_bn != map->bm_bn)
return 1;
if (unlikely(bp->b_length != map->bm_len)) {
/*
* found a block number match. If the range doesn't
* match, the only way this is allowed is if the buffer
* in the cache is stale and the transaction that made
* it stale has not yet committed. i.e. we are
* reallocating a busy extent. Skip this buffer and
* continue searching for an exact match.
*/
ASSERT(bp->b_flags & XBF_STALE);
return 1;
}
return 0;
}
static const struct rhashtable_params xfs_buf_hash_params = {
.min_size = 32, /* empty AGs have minimal footprint */
.nelem_hint = 16,
.key_len = sizeof(xfs_daddr_t),
.key_offset = offsetof(struct xfs_buf, b_bn),
.head_offset = offsetof(struct xfs_buf, b_rhash_head),
.automatic_shrinking = true,
.obj_cmpfn = _xfs_buf_obj_cmp,
};
int
xfs_buf_hash_init(
struct xfs_perag *pag)
{
spin_lock_init(&pag->pag_buf_lock);
return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
}
void
xfs_buf_hash_destroy(
struct xfs_perag *pag)
{
rhashtable_destroy(&pag->pag_buf_hash);
}
/*
* Look up, and creates if absent, a lockable buffer for
@ -488,27 +543,24 @@ _xfs_buf_find(
xfs_buf_t *new_bp)
{
struct xfs_perag *pag;
struct rb_node **rbp;
struct rb_node *parent;
xfs_buf_t *bp;
xfs_daddr_t blkno = map[0].bm_bn;
struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn };
xfs_daddr_t eofs;
int numblks = 0;
int i;
for (i = 0; i < nmaps; i++)
numblks += map[i].bm_len;
cmap.bm_len += map[i].bm_len;
/* Check for IOs smaller than the sector size / not sector aligned */
ASSERT(!(BBTOB(numblks) < btp->bt_meta_sectorsize));
ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_meta_sectormask));
ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize));
ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
/*
* Corrupted block numbers can get through to here, unfortunately, so we
* have to check that the buffer falls within the filesystem bounds.
*/
eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
if (blkno < 0 || blkno >= eofs) {
if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
/*
* XXX (dgc): we should really be returning -EFSCORRUPTED here,
* but none of the higher level infrastructure supports
@ -516,53 +568,29 @@ _xfs_buf_find(
*/
xfs_alert(btp->bt_mount,
"%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
__func__, blkno, eofs);
__func__, cmap.bm_bn, eofs);
WARN_ON(1);
return NULL;
}
/* get tree root */
pag = xfs_perag_get(btp->bt_mount,
xfs_daddr_to_agno(btp->bt_mount, blkno));
xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
/* walk tree */
spin_lock(&pag->pag_buf_lock);
rbp = &pag->pag_buf_tree.rb_node;
parent = NULL;
bp = NULL;
while (*rbp) {
parent = *rbp;
bp = rb_entry(parent, struct xfs_buf, b_rbnode);
if (blkno < bp->b_bn)
rbp = &(*rbp)->rb_left;
else if (blkno > bp->b_bn)
rbp = &(*rbp)->rb_right;
else {
/*
* found a block number match. If the range doesn't
* match, the only way this is allowed is if the buffer
* in the cache is stale and the transaction that made
* it stale has not yet committed. i.e. we are
* reallocating a busy extent. Skip this buffer and
* continue searching to the right for an exact match.
*/
if (bp->b_length != numblks) {
ASSERT(bp->b_flags & XBF_STALE);
rbp = &(*rbp)->rb_right;
continue;
}
atomic_inc(&bp->b_hold);
goto found;
}
bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap,
xfs_buf_hash_params);
if (bp) {
atomic_inc(&bp->b_hold);
goto found;
}
/* No match found */
if (new_bp) {
rb_link_node(&new_bp->b_rbnode, parent, rbp);
rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
/* the buffer keeps the perag reference until it is freed */
new_bp->b_pag = pag;
rhashtable_insert_fast(&pag->pag_buf_hash,
&new_bp->b_rhash_head,
xfs_buf_hash_params);
spin_unlock(&pag->pag_buf_lock);
} else {
XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
@ -930,7 +958,6 @@ xfs_buf_rele(
if (!pag) {
ASSERT(list_empty(&bp->b_lru));
ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
if (atomic_dec_and_test(&bp->b_hold)) {
xfs_buf_ioacct_dec(bp);
xfs_buf_free(bp);
@ -938,8 +965,6 @@ xfs_buf_rele(
return;
}
ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
ASSERT(atomic_read(&bp->b_hold) > 0);
release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
@ -983,7 +1008,8 @@ xfs_buf_rele(
}
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
xfs_buf_hash_params);
spin_unlock(&pag->pag_buf_lock);
xfs_perag_put(pag);
freebuf = true;
@ -1711,8 +1737,7 @@ xfs_free_buftarg(
percpu_counter_destroy(&btp->bt_io_count);
list_lru_destroy(&btp->bt_lru);
if (mp->m_flags & XFS_MOUNT_BARRIER)
xfs_blkdev_issue_flush(btp);
xfs_blkdev_issue_flush(btp);
kmem_free(btp);
}

View File

@ -71,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_READ, "READ" }, \
{ XBF_WRITE, "WRITE" }, \
{ XBF_READ_AHEAD, "READ_AHEAD" }, \
{ XBF_NO_IOACCT, "NO_IOACCT" }, \
{ XBF_ASYNC, "ASYNC" }, \
{ XBF_DONE, "DONE" }, \
{ XBF_STALE, "STALE" }, \
@ -150,7 +151,7 @@ typedef struct xfs_buf {
* which is the only bit that is touched if we hit the semaphore
* fast-path on locking.
*/
struct rb_node b_rbnode; /* rbtree node */
struct rhash_head b_rhash_head; /* pag buffer hash node */
xfs_daddr_t b_bn; /* block number of buffer */
int b_length; /* size of buffer in BBs */
atomic_t b_hold; /* reference count */

View File

@ -677,7 +677,6 @@ xfs_readdir(
args.dp = dp;
args.geo = dp->i_mount->m_dir_geo;
xfs_ilock(dp, XFS_IOLOCK_SHARED);
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_getdents(&args, ctx);
else if ((rval = xfs_dir2_isblock(&args, &v)))
@ -686,7 +685,6 @@ xfs_readdir(
rval = xfs_dir2_block_getdents(&args, ctx);
else
rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
xfs_iunlock(dp, XFS_IOLOCK_SHARED);
return rval;
}

View File

@ -47,40 +47,6 @@
static const struct vm_operations_struct xfs_file_vm_ops;
/*
* Locking primitives for read and write IO paths to ensure we consistently use
* and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
*/
static inline void
xfs_rw_ilock(
struct xfs_inode *ip,
int type)
{
if (type & XFS_IOLOCK_EXCL)
inode_lock(VFS_I(ip));
xfs_ilock(ip, type);
}
static inline void
xfs_rw_iunlock(
struct xfs_inode *ip,
int type)
{
xfs_iunlock(ip, type);
if (type & XFS_IOLOCK_EXCL)
inode_unlock(VFS_I(ip));
}
static inline void
xfs_rw_ilock_demote(
struct xfs_inode *ip,
int type)
{
xfs_ilock_demote(ip, type);
if (type & XFS_IOLOCK_EXCL)
inode_unlock(VFS_I(ip));
}
/*
* Clear the specified ranges to zero through either the pagecache or DAX.
* Holes and unwritten extents will be left as-is as they already are zeroed.
@ -183,19 +149,16 @@ xfs_file_fsync(
xfs_iflags_clear(ip, XFS_ITRUNCATED);
if (mp->m_flags & XFS_MOUNT_BARRIER) {
/*
* If we have an RT and/or log subvolume we need to make sure
* to flush the write cache the device used for file data
* first. This is to ensure newly written file data make
* it to disk before logging the new inode size in case of
* an extending write.
*/
if (XFS_IS_REALTIME_INODE(ip))
xfs_blkdev_issue_flush(mp->m_rtdev_targp);
else if (mp->m_logdev_targp != mp->m_ddev_targp)
xfs_blkdev_issue_flush(mp->m_ddev_targp);
}
/*
* If we have an RT and/or log subvolume we need to make sure to flush
* the write cache the device used for file data first. This is to
* ensure newly written file data make it to disk before logging the new
* inode size in case of an extending write.
*/
if (XFS_IS_REALTIME_INODE(ip))
xfs_blkdev_issue_flush(mp->m_rtdev_targp);
else if (mp->m_logdev_targp != mp->m_ddev_targp)
xfs_blkdev_issue_flush(mp->m_ddev_targp);
/*
* All metadata updates are logged, which means that we just have to
@ -230,10 +193,8 @@ xfs_file_fsync(
* an already allocated file and thus do not have any metadata to
* commit.
*/
if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
mp->m_logdev_targp == mp->m_ddev_targp &&
!XFS_IS_REALTIME_INODE(ip) &&
!log_flushed)
if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
mp->m_logdev_targp == mp->m_ddev_targp)
xfs_blkdev_issue_flush(mp->m_ddev_targp);
return error;
@ -244,62 +205,21 @@ xfs_file_dio_aio_read(
struct kiocb *iocb,
struct iov_iter *to)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
loff_t isize = i_size_read(inode);
struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
size_t count = iov_iter_count(to);
loff_t end = iocb->ki_pos + count - 1;
struct iov_iter data;
struct xfs_buftarg *target;
ssize_t ret = 0;
ssize_t ret;
trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
if (!count)
return 0; /* skip atime */
if (XFS_IS_REALTIME_INODE(ip))
target = ip->i_mount->m_rtdev_targp;
else
target = ip->i_mount->m_ddev_targp;
/* DIO must be aligned to device logical sector size */
if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
if (iocb->ki_pos == isize)
return 0;
return -EINVAL;
}
file_accessed(iocb->ki_filp);
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
if (mapping->nrpages) {
ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
if (ret)
goto out_unlock;
xfs_ilock(ip, XFS_IOLOCK_SHARED);
ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
/*
* Invalidate whole pages. This can return an error if we fail
* to invalidate a page, but this should never happen on XFS.
* Warn if it does fail.
*/
ret = invalidate_inode_pages2_range(mapping,
iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
ret = 0;
}
data = *to;
ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
xfs_get_blocks_direct, NULL, NULL, 0);
if (ret >= 0) {
iocb->ki_pos += ret;
iov_iter_advance(to, ret);
}
out_unlock:
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
@ -317,9 +237,9 @@ xfs_file_dax_read(
if (!count)
return 0; /* skip atime */
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
xfs_ilock(ip, XFS_IOLOCK_SHARED);
ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
file_accessed(iocb->ki_filp);
return ret;
@ -335,9 +255,9 @@ xfs_file_buffered_aio_read(
trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
xfs_ilock(ip, XFS_IOLOCK_SHARED);
ret = generic_file_read_iter(iocb, to);
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
@ -418,15 +338,18 @@ xfs_file_aio_write_checks(
if (error <= 0)
return error;
error = xfs_break_layouts(inode, iolock, true);
error = xfs_break_layouts(inode, iolock);
if (error)
return error;
/* For changing security info in file_remove_privs() we need i_mutex */
/*
* For changing security info in file_remove_privs() we need i_rwsem
* exclusively.
*/
if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
xfs_rw_iunlock(ip, *iolock);
xfs_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, *iolock);
xfs_ilock(ip, *iolock);
goto restart;
}
/*
@ -451,9 +374,9 @@ xfs_file_aio_write_checks(
spin_unlock(&ip->i_flags_lock);
if (!drained_dio) {
if (*iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, *iolock);
xfs_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, *iolock);
xfs_ilock(ip, *iolock);
iov_iter_reexpand(from, count);
}
/*
@ -496,6 +419,58 @@ xfs_file_aio_write_checks(
return 0;
}
static int
xfs_dio_write_end_io(
struct kiocb *iocb,
ssize_t size,
unsigned flags)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct xfs_inode *ip = XFS_I(inode);
loff_t offset = iocb->ki_pos;
bool update_size = false;
int error = 0;
trace_xfs_end_io_direct_write(ip, offset, size);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
if (size <= 0)
return size;
/*
* We need to update the in-core inode size here so that we don't end up
* with the on-disk inode size being outside the in-core inode size. We
* have no other method of updating EOF for AIO, so always do it here
* if necessary.
*
* We need to lock the test/set EOF update as we can be racing with
* other IO completions here to update the EOF. Failing to serialise
* here can result in EOF moving backwards and Bad Things Happen when
* that occurs.
*/
spin_lock(&ip->i_flags_lock);
if (offset + size > i_size_read(inode)) {
i_size_write(inode, offset + size);
update_size = true;
}
spin_unlock(&ip->i_flags_lock);
if (flags & IOMAP_DIO_COW) {
error = xfs_reflink_end_cow(ip, offset, size);
if (error)
return error;
}
if (flags & IOMAP_DIO_UNWRITTEN)
error = xfs_iomap_write_unwritten(ip, offset, size);
else if (update_size)
error = xfs_setfilesize(ip, offset, size);
return error;
}
/*
* xfs_file_dio_aio_write - handle direct IO writes
*
@ -535,9 +510,7 @@ xfs_file_dio_aio_write(
int unaligned_io = 0;
int iolock;
size_t count = iov_iter_count(from);
loff_t end;
struct iov_iter data;
struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
/* DIO must be aligned to device logical sector size */
@ -559,29 +532,12 @@ xfs_file_dio_aio_write(
iolock = XFS_IOLOCK_SHARED;
}
xfs_rw_ilock(ip, iolock);
xfs_ilock(ip, iolock);
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
count = iov_iter_count(from);
end = iocb->ki_pos + count - 1;
if (mapping->nrpages) {
ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
if (ret)
goto out;
/*
* Invalidate whole pages. This can return an error if we fail
* to invalidate a page, but this should never happen on XFS.
* Warn if it does fail.
*/
ret = invalidate_inode_pages2_range(mapping,
iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
ret = 0;
}
/*
* If we are doing unaligned IO, wait for all other IO to drain,
@ -591,7 +547,7 @@ xfs_file_dio_aio_write(
if (unaligned_io)
inode_dio_wait(inode);
else if (iolock == XFS_IOLOCK_EXCL) {
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
iolock = XFS_IOLOCK_SHARED;
}
@ -604,24 +560,9 @@ xfs_file_dio_aio_write(
goto out;
}
data = *from;
ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
xfs_get_blocks_direct, xfs_end_io_direct_write,
NULL, DIO_ASYNC_EXTEND);
/* see generic_file_direct_write() for why this is necessary */
if (mapping->nrpages) {
invalidate_inode_pages2_range(mapping,
iocb->ki_pos >> PAGE_SHIFT,
end >> PAGE_SHIFT);
}
if (ret > 0) {
iocb->ki_pos += ret;
iov_iter_advance(from, ret);
}
ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
out:
xfs_rw_iunlock(ip, iolock);
xfs_iunlock(ip, iolock);
/*
* No fallback to buffered IO on errors for XFS, direct IO will either
@ -643,7 +584,7 @@ xfs_file_dax_write(
size_t count;
loff_t pos;
xfs_rw_ilock(ip, iolock);
xfs_ilock(ip, iolock);
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
@ -652,15 +593,13 @@ xfs_file_dax_write(
count = iov_iter_count(from);
trace_xfs_file_dax_write(ip, count, pos);
ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops);
if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
i_size_write(inode, iocb->ki_pos);
error = xfs_setfilesize(ip, pos, ret);
}
out:
xfs_rw_iunlock(ip, iolock);
xfs_iunlock(ip, iolock);
return error ? error : ret;
}
@ -677,7 +616,7 @@ xfs_file_buffered_aio_write(
int enospc = 0;
int iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, iolock);
xfs_ilock(ip, iolock);
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
@ -721,7 +660,7 @@ xfs_file_buffered_aio_write(
current->backing_dev_info = NULL;
out:
xfs_rw_iunlock(ip, iolock);
xfs_iunlock(ip, iolock);
return ret;
}
@ -797,7 +736,7 @@ xfs_file_fallocate(
return -EOPNOTSUPP;
xfs_ilock(ip, iolock);
error = xfs_break_layouts(inode, &iolock, false);
error = xfs_break_layouts(inode, &iolock);
if (error)
goto out_unlock;
@ -939,7 +878,6 @@ xfs_file_clone_range(
len, false);
}
#define XFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
STATIC ssize_t
xfs_file_dedupe_range(
struct file *src_file,
@ -950,14 +888,6 @@ xfs_file_dedupe_range(
{
int error;
/*
* Limit the total length we will dedupe for each operation.
* This is intended to bound the total time spent in this
* ioctl to something sane.
*/
if (len > XFS_MAX_DEDUPE_LEN)
len = XFS_MAX_DEDUPE_LEN;
error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
len, true);
if (error)
@ -1501,15 +1431,9 @@ xfs_filemap_fault(
return xfs_filemap_page_mkwrite(vma, vmf);
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
/*
* we do not want to trigger unwritten extent conversion on read
* faults - that is unnecessary overhead and would also require
* changes to xfs_get_blocks_direct() to map unwritten extent
* ioend for conversion on read-only mappings.
*/
if (IS_DAX(inode))
ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
} else
else
ret = filemap_fault(vma, vmf);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);

View File

@ -70,8 +70,6 @@ xfs_inode_alloc(
ASSERT(!xfs_isiflocked(ip));
ASSERT(ip->i_ino == 0);
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
/* initialise the xfs inode */
ip->i_ino = ino;
ip->i_mount = mp;
@ -123,7 +121,6 @@ __xfs_inode_free(
{
/* asserts to verify all state is correct here */
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!xfs_isiflocked(ip));
XFS_STATS_DEC(ip->i_mount, vn_active);
call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
@ -133,6 +130,8 @@ void
xfs_inode_free(
struct xfs_inode *ip)
{
ASSERT(!xfs_isiflocked(ip));
/*
* Because we use RCU freeing we need to ensure the inode always
* appears to be reclaimed with an invalid inode number when in the
@ -393,8 +392,8 @@ xfs_iget_cache_hit(
xfs_inode_clear_reclaim_tag(pag, ip->i_ino);
inode->i_state = I_NEW;
ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
ASSERT(!rwsem_is_locked(&inode->i_rwsem));
init_rwsem(&inode->i_rwsem);
spin_unlock(&ip->i_flags_lock);
spin_unlock(&pag->pag_ici_lock);
@ -981,6 +980,7 @@ xfs_reclaim_inode(
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
xfs_iunpin_wait(ip);
/* xfs_iflush_abort() drops the flush lock */
xfs_iflush_abort(ip, false);
goto reclaim;
}
@ -989,10 +989,10 @@ xfs_reclaim_inode(
goto out_ifunlock;
xfs_iunpin_wait(ip);
}
if (xfs_iflags_test(ip, XFS_ISTALE))
goto reclaim;
if (xfs_inode_clean(ip))
if (xfs_iflags_test(ip, XFS_ISTALE) || xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
goto reclaim;
}
/*
* Never flush out dirty data during non-blocking reclaim, as it would
@ -1030,25 +1030,24 @@ xfs_reclaim_inode(
xfs_buf_relse(bp);
}
xfs_iflock(ip);
reclaim:
ASSERT(!xfs_isiflocked(ip));
/*
* Because we use RCU freeing we need to ensure the inode always appears
* to be reclaimed with an invalid inode number when in the free state.
* We do this as early as possible under the ILOCK and flush lock so
* that xfs_iflush_cluster() can be guaranteed to detect races with us
* here. By doing this, we guarantee that once xfs_iflush_cluster has
* locked both the XFS_ILOCK and the flush lock that it will see either
* a valid, flushable inode that will serialise correctly against the
* locks below, or it will see a clean (and invalid) inode that it can
* skip.
* We do this as early as possible under the ILOCK so that
* xfs_iflush_cluster() can be guaranteed to detect races with us here.
* By doing this, we guarantee that once xfs_iflush_cluster has locked
* XFS_ILOCK that it will see either a valid, flushable inode that will
* serialise correctly, or it will see a clean (and invalid) inode that
* it can skip.
*/
spin_lock(&ip->i_flags_lock);
ip->i_flags = XFS_IRECLAIM;
ip->i_ino = 0;
spin_unlock(&ip->i_flags_lock);
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
XFS_STATS_INC(ip->i_mount, xs_ig_reclaims);
@ -1580,10 +1579,15 @@ xfs_inode_free_cowblocks(
struct xfs_eofblocks *eofb = args;
bool need_iolock = true;
int match;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
if (!xfs_reflink_has_real_cow_blocks(ip)) {
/*
* Just clear the tag if we have an empty cow fork or none at all. It's
* possible the inode was fully unshared since it was originally tagged.
*/
if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) {
trace_xfs_inode_free_cowblocks_invalid(ip);
xfs_inode_clear_cowblocks_tag(ip);
return 0;

View File

@ -133,7 +133,7 @@ xfs_icreate_item_committing(
/*
* This is the ops vector shared by all buf log items.
*/
static struct xfs_item_ops xfs_icreate_item_ops = {
static const struct xfs_item_ops xfs_icreate_item_ops = {
.iop_size = xfs_icreate_item_size,
.iop_format = xfs_icreate_item_format,
.iop_pin = xfs_icreate_item_pin,

View File

@ -142,31 +142,31 @@ xfs_ilock_attr_map_shared(
}
/*
* The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and
* the i_lock. This routine allows various combinations of the locks to be
* obtained.
* In addition to i_rwsem in the VFS inode, the xfs inode contains 2
* multi-reader locks: i_mmap_lock and the i_lock. This routine allows
* various combinations of the locks to be obtained.
*
* The 3 locks should always be ordered so that the IO lock is obtained first,
* the mmap lock second and the ilock last in order to prevent deadlock.
*
* Basic locking order:
*
* i_iolock -> i_mmap_lock -> page_lock -> i_ilock
* i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
*
* mmap_sem locking order:
*
* i_iolock -> page lock -> mmap_sem
* i_rwsem -> page lock -> mmap_sem
* mmap_sem -> i_mmap_lock -> page_lock
*
* The difference in mmap_sem locking order mean that we cannot hold the
* i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
* fault in pages during copy in/out (for buffered IO) or require the mmap_sem
* in get_user_pages() to map the user pages into the kernel address space for
* direct IO. Similarly the i_iolock cannot be taken inside a page fault because
* direct IO. Similarly the i_rwsem cannot be taken inside a page fault because
* page faults already hold the mmap_sem.
*
* Hence to serialise fully against both syscall and mmap based IO, we need to
* take both the i_iolock and the i_mmap_lock. These locks should *only* be both
* take both the i_rwsem and the i_mmap_lock. These locks should *only* be both
* taken in places where we need to invalidate the page cache in a race
* free manner (e.g. truncate, hole punch and other extent manipulation
* functions).
@ -191,10 +191,13 @@ xfs_ilock(
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
if (lock_flags & XFS_IOLOCK_EXCL)
mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
else if (lock_flags & XFS_IOLOCK_SHARED)
mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
if (lock_flags & XFS_IOLOCK_EXCL) {
down_write_nested(&VFS_I(ip)->i_rwsem,
XFS_IOLOCK_DEP(lock_flags));
} else if (lock_flags & XFS_IOLOCK_SHARED) {
down_read_nested(&VFS_I(ip)->i_rwsem,
XFS_IOLOCK_DEP(lock_flags));
}
if (lock_flags & XFS_MMAPLOCK_EXCL)
mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
@ -240,10 +243,10 @@ xfs_ilock_nowait(
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
if (lock_flags & XFS_IOLOCK_EXCL) {
if (!mrtryupdate(&ip->i_iolock))
if (!down_write_trylock(&VFS_I(ip)->i_rwsem))
goto out;
} else if (lock_flags & XFS_IOLOCK_SHARED) {
if (!mrtryaccess(&ip->i_iolock))
if (!down_read_trylock(&VFS_I(ip)->i_rwsem))
goto out;
}
@ -271,9 +274,9 @@ xfs_ilock_nowait(
mrunlock_shared(&ip->i_mmaplock);
out_undo_iolock:
if (lock_flags & XFS_IOLOCK_EXCL)
mrunlock_excl(&ip->i_iolock);
up_write(&VFS_I(ip)->i_rwsem);
else if (lock_flags & XFS_IOLOCK_SHARED)
mrunlock_shared(&ip->i_iolock);
up_read(&VFS_I(ip)->i_rwsem);
out:
return 0;
}
@ -310,9 +313,9 @@ xfs_iunlock(
ASSERT(lock_flags != 0);
if (lock_flags & XFS_IOLOCK_EXCL)
mrunlock_excl(&ip->i_iolock);
up_write(&VFS_I(ip)->i_rwsem);
else if (lock_flags & XFS_IOLOCK_SHARED)
mrunlock_shared(&ip->i_iolock);
up_read(&VFS_I(ip)->i_rwsem);
if (lock_flags & XFS_MMAPLOCK_EXCL)
mrunlock_excl(&ip->i_mmaplock);
@ -345,7 +348,7 @@ xfs_ilock_demote(
if (lock_flags & XFS_MMAPLOCK_EXCL)
mrdemote(&ip->i_mmaplock);
if (lock_flags & XFS_IOLOCK_EXCL)
mrdemote(&ip->i_iolock);
downgrade_write(&VFS_I(ip)->i_rwsem);
trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
}
@ -370,8 +373,9 @@ xfs_isilocked(
if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
if (!(lock_flags & XFS_IOLOCK_SHARED))
return !!ip->i_iolock.mr_writer;
return rwsem_is_locked(&ip->i_iolock.mr_lock);
return !debug_locks ||
lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0);
return rwsem_is_locked(&VFS_I(ip)->i_rwsem);
}
ASSERT(0);
@ -421,11 +425,7 @@ xfs_lock_inumorder(int lock_mode, int subclass)
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
ASSERT(xfs_lockdep_subclass_ok(subclass +
XFS_IOLOCK_PARENT_VAL));
class += subclass << XFS_IOLOCK_SHIFT;
if (lock_mode & XFS_IOLOCK_PARENT)
class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT;
}
if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
@ -477,8 +477,6 @@ xfs_lock_inodes(
XFS_ILOCK_EXCL));
ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
XFS_ILOCK_SHARED)));
ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) ||
inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1);
ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
@ -581,10 +579,8 @@ xfs_lock_two_inodes(
int attempts = 0;
xfs_log_item_t *lp;
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
} else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
ASSERT(ip0->i_ino != ip1->i_ino);
@ -715,7 +711,6 @@ xfs_lookup(
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return -EIO;
xfs_ilock(dp, XFS_IOLOCK_SHARED);
error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
if (error)
goto out_unlock;
@ -724,14 +719,12 @@ xfs_lookup(
if (error)
goto out_free_name;
xfs_iunlock(dp, XFS_IOLOCK_SHARED);
return 0;
out_free_name:
if (ci_name)
kmem_free(ci_name->name);
out_unlock:
xfs_iunlock(dp, XFS_IOLOCK_SHARED);
*ipp = NULL;
return error;
}
@ -1215,8 +1208,7 @@ xfs_create(
if (error)
goto out_release_inode;
xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
unlock_dp_on_error = true;
xfs_defer_init(&dfops, &first_block);
@ -1252,7 +1244,7 @@ xfs_create(
* the transaction cancel unlocking dp so don't do it explicitly in the
* error path.
*/
xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
unlock_dp_on_error = false;
error = xfs_dir_createname(tp, dp, name, ip->i_ino,
@ -1325,7 +1317,7 @@ xfs_create(
xfs_qm_dqrele(pdqp);
if (unlock_dp_on_error)
xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
}
@ -1466,11 +1458,10 @@ xfs_link(
if (error)
goto std_return;
xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, tdp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
/*
* If we are using project inheritance, we only allow hard link
@ -2041,7 +2032,6 @@ xfs_iunlink(
agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
offset = offsetof(xfs_agi_t, agi_unlinked) +
(sizeof(xfs_agino_t) * bucket_index);
xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
xfs_trans_log_buf(tp, agibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
return 0;
@ -2133,7 +2123,6 @@ xfs_iunlink_remove(
agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
offset = offsetof(xfs_agi_t, agi_unlinked) +
(sizeof(xfs_agino_t) * bucket_index);
xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
xfs_trans_log_buf(tp, agibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
} else {
@ -2579,10 +2568,9 @@ xfs_remove(
goto std_return;
}
xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
/*
@ -2963,12 +2951,6 @@ xfs_rename(
* whether the target directory is the same as the source
* directory, we can lock from 2 to 4 inodes.
*/
if (!new_parent)
xfs_ilock(src_dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
else
xfs_lock_two_inodes(src_dp, target_dp,
XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
/*
@ -2976,9 +2958,9 @@ xfs_rename(
* we can rely on either trans_commit or trans_cancel to unlock
* them.
*/
xfs_trans_ijoin(tp, src_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
if (new_parent)
xfs_trans_ijoin(tp, target_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
if (target_ip)
xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);

View File

@ -56,7 +56,6 @@ typedef struct xfs_inode {
/* Transaction and locking information. */
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
mrlock_t i_iolock; /* inode IO lock */
mrlock_t i_mmaplock; /* inode mmap IO lock */
atomic_t i_pincount; /* inode pin count */
spinlock_t i_flags_lock; /* inode i_flags lock */
@ -246,6 +245,11 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
* Synchronize processes attempting to flush the in-core inode back to disk.
*/
static inline int xfs_isiflocked(struct xfs_inode *ip)
{
return xfs_iflags_test(ip, XFS_IFLOCK);
}
extern void __xfs_iflock(struct xfs_inode *ip);
static inline int xfs_iflock_nowait(struct xfs_inode *ip)
@ -261,16 +265,12 @@ static inline void xfs_iflock(struct xfs_inode *ip)
static inline void xfs_ifunlock(struct xfs_inode *ip)
{
ASSERT(xfs_isiflocked(ip));
xfs_iflags_clear(ip, XFS_IFLOCK);
smp_mb();
wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
}
static inline int xfs_isiflocked(struct xfs_inode *ip)
{
return xfs_iflags_test(ip, XFS_IFLOCK);
}
/*
* Flags for inode locking.
* Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
@ -332,7 +332,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
* IOLOCK values
*
* 0-3 subclass value
* 4-7 PARENT subclass values
* 4-7 unused
*
* MMAPLOCK values
*
@ -347,10 +347,8 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
*
*/
#define XFS_IOLOCK_SHIFT 16
#define XFS_IOLOCK_PARENT_VAL 4
#define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1)
#define XFS_IOLOCK_MAX_SUBCLASS 3
#define XFS_IOLOCK_DEP_MASK 0x000f0000
#define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT)
#define XFS_MMAPLOCK_SHIFT 20
#define XFS_MMAPLOCK_NUMORDER 0

View File

@ -164,7 +164,7 @@ xfs_inode_item_format_data_fork(
struct xfs_bmbt_rec *p;
ASSERT(ip->i_df.if_u1.if_extents != NULL);
ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
ASSERT(xfs_iext_count(&ip->i_df) > 0);
p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
@ -261,7 +261,7 @@ xfs_inode_item_format_attr_fork(
ip->i_afp->if_bytes > 0) {
struct xfs_bmbt_rec *p;
ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
ASSERT(xfs_iext_count(ip->i_afp) ==
ip->i_d.di_anextents);
ASSERT(ip->i_afp->if_u1.if_extents != NULL);

View File

@ -639,7 +639,7 @@ xfs_ioc_space(
return error;
xfs_ilock(ip, iolock);
error = xfs_break_layouts(inode, &iolock, false);
error = xfs_break_layouts(inode, &iolock);
if (error)
goto out_unlock;
@ -910,16 +910,14 @@ xfs_ioc_fsgetxattr(
if (attr) {
if (ip->i_afp) {
if (ip->i_afp->if_flags & XFS_IFEXTENTS)
fa.fsx_nextents = ip->i_afp->if_bytes /
sizeof(xfs_bmbt_rec_t);
fa.fsx_nextents = xfs_iext_count(ip->i_afp);
else
fa.fsx_nextents = ip->i_d.di_anextents;
} else
fa.fsx_nextents = 0;
} else {
if (ip->i_df.if_flags & XFS_IFEXTENTS)
fa.fsx_nextents = ip->i_df.if_bytes /
sizeof(xfs_bmbt_rec_t);
fa.fsx_nextents = xfs_iext_count(&ip->i_df);
else
fa.fsx_nextents = ip->i_d.di_nextents;
}

View File

@ -395,11 +395,12 @@ xfs_iomap_prealloc_size(
struct xfs_inode *ip,
loff_t offset,
loff_t count,
xfs_extnum_t idx,
struct xfs_bmbt_irec *prev)
xfs_extnum_t idx)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
struct xfs_bmbt_irec prev;
int shift = 0;
int64_t freesp;
xfs_fsblock_t qblocks;
@ -419,8 +420,8 @@ xfs_iomap_prealloc_size(
*/
if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ||
XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) ||
idx == 0 ||
prev->br_startoff + prev->br_blockcount < offset_fsb)
!xfs_iext_get_extent(ifp, idx - 1, &prev) ||
prev.br_startoff + prev.br_blockcount < offset_fsb)
return mp->m_writeio_blocks;
/*
@ -439,8 +440,8 @@ xfs_iomap_prealloc_size(
* always extends to MAXEXTLEN rather than falling short due to things
* like stripe unit/width alignment of real extents.
*/
if (prev->br_blockcount <= (MAXEXTLEN >> 1))
alloc_blocks = prev->br_blockcount << 1;
if (prev.br_blockcount <= (MAXEXTLEN >> 1))
alloc_blocks = prev.br_blockcount << 1;
else
alloc_blocks = XFS_B_TO_FSB(mp, offset);
if (!alloc_blocks)
@ -535,11 +536,11 @@ xfs_file_iomap_begin_delay(
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
xfs_fileoff_t maxbytes_fsb =
XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
xfs_fileoff_t end_fsb, orig_end_fsb;
xfs_fileoff_t end_fsb;
int error = 0, eof = 0;
struct xfs_bmbt_irec got;
struct xfs_bmbt_irec prev;
xfs_extnum_t idx;
xfs_fsblock_t prealloc_blocks = 0;
ASSERT(!XFS_IS_REALTIME_INODE(ip));
ASSERT(!xfs_get_extsz_hint(ip));
@ -563,8 +564,7 @@ xfs_file_iomap_begin_delay(
goto out_unlock;
}
xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx,
&got, &prev);
eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
if (!eof && got.br_startoff <= offset_fsb) {
if (xfs_is_reflink_inode(ip)) {
bool shared;
@ -595,35 +595,32 @@ xfs_file_iomap_begin_delay(
* the lower level functions are updated.
*/
count = min_t(loff_t, count, 1024 * PAGE_SIZE);
end_fsb = orig_end_fsb =
min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
if (eof) {
xfs_fsblock_t prealloc_blocks;
prealloc_blocks =
xfs_iomap_prealloc_size(ip, offset, count, idx, &prev);
prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx);
if (prealloc_blocks) {
xfs_extlen_t align;
xfs_off_t end_offset;
xfs_fileoff_t p_end_fsb;
end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1);
end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
prealloc_blocks;
p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
prealloc_blocks;
align = xfs_eof_alignment(ip, 0);
if (align)
end_fsb = roundup_64(end_fsb, align);
p_end_fsb = roundup_64(p_end_fsb, align);
end_fsb = min(end_fsb, maxbytes_fsb);
ASSERT(end_fsb > offset_fsb);
p_end_fsb = min(p_end_fsb, maxbytes_fsb);
ASSERT(p_end_fsb > offset_fsb);
prealloc_blocks = p_end_fsb - end_fsb;
}
}
retry:
error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb,
end_fsb - offset_fsb, &got,
&prev, &idx, eof);
end_fsb - offset_fsb, prealloc_blocks, &got, &idx, eof);
switch (error) {
case 0:
break;
@ -631,8 +628,8 @@ xfs_file_iomap_begin_delay(
case -EDQUOT:
/* retry without any preallocation */
trace_xfs_delalloc_enospc(ip, offset, count);
if (end_fsb != orig_end_fsb) {
end_fsb = orig_end_fsb;
if (prealloc_blocks) {
prealloc_blocks = 0;
goto retry;
}
/*FALLTHRU*/
@ -640,13 +637,6 @@ xfs_file_iomap_begin_delay(
goto out_unlock;
}
/*
* Tag the inode as speculatively preallocated so we can reclaim this
* space on demand, if necessary.
*/
if (end_fsb != orig_end_fsb)
xfs_inode_set_eofblocks_tag(ip);
trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
done:
if (isnullstartblock(got.br_startblock))
@ -960,6 +950,19 @@ static inline bool imap_needs_alloc(struct inode *inode,
(IS_DAX(inode) && ISUNWRITTEN(imap));
}
static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags)
{
/*
* COW writes will allocate delalloc space, so we need to make sure
* to take the lock exclusively here.
*/
if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO)))
return true;
if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE))
return true;
return false;
}
static int
xfs_file_iomap_begin(
struct inode *inode,
@ -979,18 +982,14 @@ xfs_file_iomap_begin(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
if ((flags & IOMAP_WRITE) && !IS_DAX(inode) &&
!xfs_get_extsz_hint(ip)) {
if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
/* Reserve delalloc blocks for regular writeback. */
return xfs_file_iomap_begin_delay(inode, offset, length, flags,
iomap);
}
/*
* COW writes will allocate delalloc space, so we need to make sure
* to take the lock exclusively here.
*/
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
if (need_excl_ilock(ip, flags)) {
lockmode = XFS_ILOCK_EXCL;
xfs_ilock(ip, XFS_ILOCK_EXCL);
} else {
@ -1003,17 +1002,41 @@ xfs_file_iomap_begin(
offset_fsb = XFS_B_TO_FSBT(mp, offset);
end_fsb = XFS_B_TO_FSB(mp, offset + length);
if (xfs_is_reflink_inode(ip) &&
(flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) {
shared = xfs_reflink_find_cow_mapping(ip, offset, &imap);
if (shared) {
xfs_iunlock(ip, lockmode);
goto alloc_done;
}
ASSERT(!isnullstartblock(imap.br_startblock));
}
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, 0);
if (error)
goto out_unlock;
if (flags & IOMAP_REPORT) {
if ((flags & IOMAP_REPORT) ||
(xfs_is_reflink_inode(ip) &&
(flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) {
/* Trim the mapping to the nearest shared extent boundary. */
error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
&trimmed);
if (error)
goto out_unlock;
/*
* We're here because we're trying to do a directio write to a
* region that isn't aligned to a filesystem block. If the
* extent is shared, fall back to buffered mode to handle the
* RMW.
*/
if (!(flags & IOMAP_REPORT) && shared) {
trace_xfs_reflink_bounce_dio_write(ip, &imap);
error = -EREMCHG;
goto out_unlock;
}
}
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
@ -1048,6 +1071,7 @@ xfs_file_iomap_begin(
if (error)
return error;
alloc_done:
iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
} else {

View File

@ -983,15 +983,13 @@ xfs_vn_setattr(
struct xfs_inode *ip = XFS_I(d_inode(dentry));
uint iolock = XFS_IOLOCK_EXCL;
xfs_ilock(ip, iolock);
error = xfs_break_layouts(d_inode(dentry), &iolock, true);
if (!error) {
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
iolock |= XFS_MMAPLOCK_EXCL;
error = xfs_break_layouts(d_inode(dentry), &iolock);
if (error)
return error;
error = xfs_vn_setattr_size(dentry, iattr);
}
xfs_iunlock(ip, iolock);
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
error = xfs_vn_setattr_size(dentry, iattr);
xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
} else {
error = xfs_vn_setattr_nonsize(dentry, iattr);
}

View File

@ -78,6 +78,7 @@ typedef __u32 xfs_nlink_t;
#include <linux/freezer.h>
#include <linux/list_sort.h>
#include <linux/ratelimit.h>
#include <linux/rhashtable.h>
#include <asm/page.h>
#include <asm/div64.h>

View File

@ -1668,7 +1668,7 @@ xlog_cksum(
__uint32_t crc;
/* first generate the crc for the record header ... */
crc = xfs_start_cksum((char *)rhead,
crc = xfs_start_cksum_update((char *)rhead,
sizeof(struct xlog_rec_header),
offsetof(struct xlog_rec_header, h_crc));
@ -1862,26 +1862,21 @@ xlog_sync(
bp->b_io_length = BTOBB(count);
bp->b_fspriv = iclog;
bp->b_flags &= ~(XBF_FUA | XBF_FLUSH);
bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE);
bp->b_flags &= ~XBF_FLUSH;
bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
bp->b_flags |= XBF_FUA;
/*
* Flush the data device before flushing the log to make
* sure all meta data written back from the AIL actually made
* it to disk before stamping the new log tail LSN into the
* log buffer. For an external log we need to issue the
* flush explicitly, and unfortunately synchronously here;
* for an internal log we can simply use the block layer
* state machine for preflushes.
*/
if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
else
bp->b_flags |= XBF_FLUSH;
}
/*
* Flush the data device before flushing the log to make sure all meta
* data written back from the AIL actually made it to disk before
* stamping the new log tail LSN into the log buffer. For an external
* log we need to issue the flush explicitly, and unfortunately
* synchronously here; for an internal log we can simply use the block
* layer state machine for preflushes.
*/
if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
else
bp->b_flags |= XBF_FLUSH;
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
@ -1906,10 +1901,8 @@ xlog_sync(
xfs_buf_associate_memory(bp,
(char *)&iclog->ic_header + count, split);
bp->b_fspriv = iclog;
bp->b_flags &= ~(XBF_FUA | XBF_FLUSH);
bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE);
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
bp->b_flags |= XBF_FUA;
bp->b_flags &= ~XBF_FLUSH;
bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);

View File

@ -2025,7 +2025,7 @@ xlog_peek_buffer_cancelled(
struct xlog *log,
xfs_daddr_t blkno,
uint len,
ushort flags)
unsigned short flags)
{
struct list_head *bucket;
struct xfs_buf_cancel *bcp;
@ -2065,7 +2065,7 @@ xlog_check_buffer_cancelled(
struct xlog *log,
xfs_daddr_t blkno,
uint len,
ushort flags)
unsigned short flags)
{
struct xfs_buf_cancel *bcp;
@ -5113,19 +5113,21 @@ xlog_recover_process(
struct list_head *buffer_list)
{
int error;
__le32 old_crc = rhead->h_crc;
__le32 crc;
crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
/*
* Nothing else to do if this is a CRC verification pass. Just return
* if this a record with a non-zero crc. Unfortunately, mkfs always
* sets h_crc to 0 so we must consider this valid even on v5 supers.
* sets old_crc to 0 so we must consider this valid even on v5 supers.
* Otherwise, return EFSBADCRC on failure so the callers up the stack
* know precisely what failed.
*/
if (pass == XLOG_RECOVER_CRCPASS) {
if (rhead->h_crc && crc != rhead->h_crc)
if (old_crc && crc != old_crc)
return -EFSBADCRC;
return 0;
}
@ -5136,11 +5138,11 @@ xlog_recover_process(
* zero CRC check prevents warnings from being emitted when upgrading
* the kernel from one that does not add CRCs by default.
*/
if (crc != rhead->h_crc) {
if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
if (crc != old_crc) {
if (old_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
xfs_alert(log->l_mp,
"log record CRC mismatch: found 0x%x, expected 0x%x.",
le32_to_cpu(rhead->h_crc),
le32_to_cpu(old_crc),
le32_to_cpu(crc));
xfs_hex_dump(dp, 32);
}

View File

@ -157,6 +157,7 @@ xfs_free_perag(
spin_unlock(&mp->m_perag_lock);
ASSERT(pag);
ASSERT(atomic_read(&pag->pag_ref) == 0);
xfs_buf_hash_destroy(pag);
call_rcu(&pag->rcu_head, __xfs_free_perag);
}
}
@ -212,8 +213,8 @@ xfs_initialize_perag(
spin_lock_init(&pag->pag_ici_lock);
mutex_init(&pag->pag_ici_reclaim_lock);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
spin_lock_init(&pag->pag_buf_lock);
pag->pag_buf_tree = RB_ROOT;
if (xfs_buf_hash_init(pag))
goto out_unwind;
if (radix_tree_preload(GFP_NOFS))
goto out_unwind;
@ -239,9 +240,11 @@ xfs_initialize_perag(
return 0;
out_unwind:
xfs_buf_hash_destroy(pag);
kmem_free(pag);
for (; index > first_initialised; index--) {
pag = radix_tree_delete(&mp->m_perag_tree, index);
xfs_buf_hash_destroy(pag);
kmem_free(pag);
}
return error;

View File

@ -393,8 +393,8 @@ typedef struct xfs_perag {
unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */
/* buffer cache index */
spinlock_t pag_buf_lock; /* lock for pag_buf_tree */
struct rb_root pag_buf_tree; /* ordered tree of active buffers */
spinlock_t pag_buf_lock; /* lock for pag_buf_hash */
struct rhashtable pag_buf_hash;
/* for rcu-safe freeing */
struct rcu_head rcu_head;
@ -424,6 +424,9 @@ xfs_perag_resv(
}
}
int xfs_buf_hash_init(xfs_perag_t *pag);
void xfs_buf_hash_destroy(xfs_perag_t *pag);
extern void xfs_uuid_table_free(void);
extern int xfs_log_sbcount(xfs_mount_t *);
extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);

View File

@ -32,8 +32,7 @@
int
xfs_break_layouts(
struct inode *inode,
uint *iolock,
bool with_imutex)
uint *iolock)
{
struct xfs_inode *ip = XFS_I(inode);
int error;
@ -42,12 +41,8 @@ xfs_break_layouts(
while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
xfs_iunlock(ip, *iolock);
if (with_imutex && (*iolock & XFS_IOLOCK_EXCL))
inode_unlock(inode);
error = break_layout(inode, true);
*iolock = XFS_IOLOCK_EXCL;
if (with_imutex)
inode_lock(inode);
xfs_ilock(ip, *iolock);
}

View File

@ -8,10 +8,10 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps,
struct iattr *iattr);
int xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex);
int xfs_break_layouts(struct inode *inode, uint *iolock);
#else
static inline int
xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex)
xfs_break_layouts(struct inode *inode, uint *iolock)
{
return 0;
}

View File

@ -1135,7 +1135,7 @@ xfs_qm_get_rtblks(
return error;
}
rtblks = 0;
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
nextents = xfs_iext_count(ifp);
for (idx = 0; idx < nextents; idx++)
rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
*O_rtblks = (xfs_qcnt_t)rtblks;

View File

@ -243,12 +243,11 @@ xfs_reflink_reserve_cow(
struct xfs_bmbt_irec *imap,
bool *shared)
{
struct xfs_bmbt_irec got, prev;
xfs_fileoff_t end_fsb, orig_end_fsb;
int eof = 0, error = 0;
bool trimmed;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec got;
int error = 0;
bool eof = false, trimmed;
xfs_extnum_t idx;
xfs_extlen_t align;
/*
* Search the COW fork extent list first. This serves two purposes:
@ -258,8 +257,9 @@ xfs_reflink_reserve_cow(
* extent list is generally faster than going out to the shared extent
* tree.
*/
xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx,
&got, &prev);
if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got))
eof = true;
if (!eof && got.br_startoff <= imap->br_startoff) {
trace_xfs_reflink_cow_found(ip, imap);
xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
@ -285,33 +285,12 @@ xfs_reflink_reserve_cow(
if (error)
return error;
end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount;
align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
if (align)
end_fsb = roundup_64(end_fsb, align);
retry:
error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
end_fsb - imap->br_startoff, &got, &prev, &idx, eof);
switch (error) {
case 0:
break;
case -ENOSPC:
case -EDQUOT:
/* retry without any preallocation */
imap->br_blockcount, 0, &got, &idx, eof);
if (error == -ENOSPC || error == -EDQUOT)
trace_xfs_reflink_cow_enospc(ip, imap);
if (end_fsb != orig_end_fsb) {
end_fsb = orig_end_fsb;
goto retry;
}
/*FALLTHRU*/
default:
if (error)
return error;
}
if (end_fsb != orig_end_fsb)
xfs_inode_set_cowblocks_tag(ip);
trace_xfs_reflink_cow_alloc(ip, &got);
return 0;
@ -418,87 +397,65 @@ xfs_reflink_allocate_cow_range(
}
/*
* Find the CoW reservation (and whether or not it needs block allocation)
* for a given byte offset of a file.
* Find the CoW reservation for a given byte offset of a file.
*/
bool
xfs_reflink_find_cow_mapping(
struct xfs_inode *ip,
xfs_off_t offset,
struct xfs_bmbt_irec *imap,
bool *need_alloc)
struct xfs_bmbt_irec *imap)
{
struct xfs_bmbt_irec irec;
struct xfs_ifork *ifp;
struct xfs_bmbt_rec_host *gotp;
xfs_fileoff_t bno;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
xfs_fileoff_t offset_fsb;
struct xfs_bmbt_irec got;
xfs_extnum_t idx;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
ASSERT(xfs_is_reflink_inode(ip));
/* Find the extent in the CoW fork. */
ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
bno = XFS_B_TO_FSBT(ip->i_mount, offset);
gotp = xfs_iext_bno_to_ext(ifp, bno, &idx);
if (!gotp)
offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
return false;
xfs_bmbt_get_all(gotp, &irec);
if (bno >= irec.br_startoff + irec.br_blockcount ||
bno < irec.br_startoff)
if (got.br_startoff > offset_fsb)
return false;
trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE,
&irec);
/* If it's still delalloc, we must allocate later. */
*imap = irec;
*need_alloc = !!(isnullstartblock(irec.br_startblock));
&got);
*imap = got;
return true;
}
/*
* Trim an extent to end at the next CoW reservation past offset_fsb.
*/
int
void
xfs_reflink_trim_irec_to_next_cow(
struct xfs_inode *ip,
xfs_fileoff_t offset_fsb,
struct xfs_bmbt_irec *imap)
{
struct xfs_bmbt_irec irec;
struct xfs_ifork *ifp;
struct xfs_bmbt_rec_host *gotp;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec got;
xfs_extnum_t idx;
if (!xfs_is_reflink_inode(ip))
return 0;
return;
/* Find the extent in the CoW fork. */
ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
gotp = xfs_iext_bno_to_ext(ifp, offset_fsb, &idx);
if (!gotp)
return 0;
xfs_bmbt_get_all(gotp, &irec);
if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
return;
/* This is the extent before; try sliding up one. */
if (irec.br_startoff < offset_fsb) {
idx++;
if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
return 0;
gotp = xfs_iext_get_ext(ifp, idx);
xfs_bmbt_get_all(gotp, &irec);
if (got.br_startoff < offset_fsb) {
if (!xfs_iext_get_extent(ifp, idx + 1, &got))
return;
}
if (irec.br_startoff >= imap->br_startoff + imap->br_blockcount)
return 0;
if (got.br_startoff >= imap->br_startoff + imap->br_blockcount)
return;
imap->br_blockcount = irec.br_startoff - imap->br_startoff;
imap->br_blockcount = got.br_startoff - imap->br_startoff;
trace_xfs_reflink_trim_irec(ip, imap);
return 0;
}
/*
@ -512,18 +469,15 @@ xfs_reflink_cancel_cow_blocks(
xfs_fileoff_t end_fsb)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec got, prev, del;
struct xfs_bmbt_irec got, del;
xfs_extnum_t idx;
xfs_fsblock_t firstfsb;
struct xfs_defer_ops dfops;
int error = 0, eof = 0;
int error = 0;
if (!xfs_is_reflink_inode(ip))
return 0;
xfs_bmap_search_extents(ip, offset_fsb, XFS_COW_FORK, &eof, &idx,
&got, &prev);
if (eof)
if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
return 0;
while (got.br_startoff < end_fsb) {
@ -566,9 +520,8 @@ xfs_reflink_cancel_cow_blocks(
xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
}
if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec))
if (!xfs_iext_get_extent(ifp, ++idx, &got))
break;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
}
/* clear tag if cow fork is emptied */
@ -638,13 +591,13 @@ xfs_reflink_end_cow(
xfs_off_t count)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec got, prev, del;
struct xfs_bmbt_irec got, del;
struct xfs_trans *tp;
xfs_fileoff_t offset_fsb;
xfs_fileoff_t end_fsb;
xfs_fsblock_t firstfsb;
struct xfs_defer_ops dfops;
int error, eof = 0;
int error;
unsigned int resblks;
xfs_filblks_t rlen;
xfs_extnum_t idx;
@ -668,13 +621,11 @@ xfs_reflink_end_cow(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
xfs_bmap_search_extents(ip, end_fsb - 1, XFS_COW_FORK, &eof, &idx,
&got, &prev);
/* If there is a hole at end_fsb - 1 go to the previous extent */
if (eof || got.br_startoff > end_fsb) {
if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) ||
got.br_startoff > end_fsb) {
ASSERT(idx > 0);
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got);
xfs_iext_get_extent(ifp, --idx, &got);
}
/* Walk backwards until we're out of the I/O range... */
@ -722,11 +673,9 @@ xfs_reflink_end_cow(
error = xfs_defer_finish(&tp, &dfops, ip);
if (error)
goto out_defer;
next_extent:
if (idx < 0)
if (!xfs_iext_get_extent(ifp, idx, &got))
break;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
}
error = xfs_trans_commit(tp);
@ -1302,13 +1251,11 @@ xfs_reflink_remap_range(
return -EIO;
/* Lock both files against IO */
if (same_inode) {
xfs_ilock(src, XFS_IOLOCK_EXCL);
lock_two_nondirectories(inode_in, inode_out);
if (same_inode)
xfs_ilock(src, XFS_MMAPLOCK_EXCL);
} else {
xfs_lock_two_inodes(src, dest, XFS_IOLOCK_EXCL);
else
xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
}
/* Don't touch certain kinds of inodes */
ret = -EPERM;
@ -1345,8 +1292,14 @@ xfs_reflink_remap_range(
goto out_unlock;
}
if (len == 0)
/* Zero length dedupe exits immediately; reflink goes to EOF. */
if (len == 0) {
if (is_dedupe) {
ret = 0;
goto out_unlock;
}
len = isize - pos_in;
}
/* Ensure offsets don't wrap and the input is inside i_size */
if (pos_in + len < pos_in || pos_out + len < pos_out ||
@ -1447,11 +1400,9 @@ xfs_reflink_remap_range(
out_unlock:
xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
xfs_iunlock(src, XFS_IOLOCK_EXCL);
if (src->i_ino != dest->i_ino) {
if (!same_inode)
xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
xfs_iunlock(dest, XFS_IOLOCK_EXCL);
}
unlock_two_nondirectories(inode_in, inode_out);
if (ret)
trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
return ret;
@ -1697,37 +1648,3 @@ xfs_reflink_unshare(
trace_xfs_reflink_unshare_error(ip, error, _RET_IP_);
return error;
}
/*
* Does this inode have any real CoW reservations?
*/
bool
xfs_reflink_has_real_cow_blocks(
struct xfs_inode *ip)
{
struct xfs_bmbt_irec irec;
struct xfs_ifork *ifp;
struct xfs_bmbt_rec_host *gotp;
xfs_extnum_t idx;
if (!xfs_is_reflink_inode(ip))
return false;
/* Go find the old extent in the CoW fork. */
ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
gotp = xfs_iext_bno_to_ext(ifp, 0, &idx);
while (gotp) {
xfs_bmbt_get_all(gotp, &irec);
if (!isnullstartblock(irec.br_startblock))
return true;
/* Roll on... */
idx++;
if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
break;
gotp = xfs_iext_get_ext(ifp, idx);
}
return false;
}

View File

@ -31,8 +31,8 @@ extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
xfs_off_t offset, xfs_off_t count);
extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
struct xfs_bmbt_irec *imap, bool *need_alloc);
extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
struct xfs_bmbt_irec *imap);
extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap);
extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
@ -50,6 +50,4 @@ extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len);
extern bool xfs_reflink_has_real_cow_blocks(struct xfs_inode *ip);
#endif /* __XFS_REFLINK_H */

View File

@ -80,9 +80,9 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
}
/* extra precision counters */
for_each_possible_cpu(i) {
xs_xstrat_bytes += per_cpu_ptr(stats, i)->xs_xstrat_bytes;
xs_write_bytes += per_cpu_ptr(stats, i)->xs_write_bytes;
xs_read_bytes += per_cpu_ptr(stats, i)->xs_read_bytes;
xs_xstrat_bytes += per_cpu_ptr(stats, i)->s.xs_xstrat_bytes;
xs_write_bytes += per_cpu_ptr(stats, i)->s.xs_write_bytes;
xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes;
}
len += snprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
@ -106,9 +106,9 @@ void xfs_stats_clearall(struct xfsstats __percpu *stats)
for_each_possible_cpu(c) {
preempt_disable();
/* save vn_active, it's a universal truth! */
vn_active = per_cpu_ptr(stats, c)->vn_active;
vn_active = per_cpu_ptr(stats, c)->s.vn_active;
memset(per_cpu_ptr(stats, c), 0, sizeof(*stats));
per_cpu_ptr(stats, c)->vn_active = vn_active;
per_cpu_ptr(stats, c)->s.vn_active = vn_active;
preempt_enable();
}
}

View File

@ -21,10 +21,38 @@
#include <linux/percpu.h>
/*
* The btree stats arrays have fixed offsets for the different stats. We
* store the base index in the btree cursor via XFS_STATS_CALC_INDEX() and
* that allows us to use fixed offsets into the stats array for each btree
* stat. These index offsets are defined in the order they will be emitted
* in the stats files, so it is possible to add new btree stat types by
* appending to the enum list below.
*/
enum {
__XBTS_lookup = 0,
__XBTS_compare = 1,
__XBTS_insrec = 2,
__XBTS_delrec = 3,
__XBTS_newroot = 4,
__XBTS_killroot = 5,
__XBTS_increment = 6,
__XBTS_decrement = 7,
__XBTS_lshift = 8,
__XBTS_rshift = 9,
__XBTS_split = 10,
__XBTS_join = 11,
__XBTS_alloc = 12,
__XBTS_free = 13,
__XBTS_moves = 14,
__XBTS_MAX = 15,
};
/*
* XFS global statistics
*/
struct xfsstats {
struct __xfsstats {
# define XFSSTAT_END_EXTENT_ALLOC 4
__uint32_t xs_allocx;
__uint32_t xs_allocb;
@ -117,118 +145,20 @@ struct xfsstats {
__uint32_t xb_page_found;
__uint32_t xb_get_read;
/* Version 2 btree counters */
#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15)
__uint32_t xs_abtb_2_lookup;
__uint32_t xs_abtb_2_compare;
__uint32_t xs_abtb_2_insrec;
__uint32_t xs_abtb_2_delrec;
__uint32_t xs_abtb_2_newroot;
__uint32_t xs_abtb_2_killroot;
__uint32_t xs_abtb_2_increment;
__uint32_t xs_abtb_2_decrement;
__uint32_t xs_abtb_2_lshift;
__uint32_t xs_abtb_2_rshift;
__uint32_t xs_abtb_2_split;
__uint32_t xs_abtb_2_join;
__uint32_t xs_abtb_2_alloc;
__uint32_t xs_abtb_2_free;
__uint32_t xs_abtb_2_moves;
#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15)
__uint32_t xs_abtc_2_lookup;
__uint32_t xs_abtc_2_compare;
__uint32_t xs_abtc_2_insrec;
__uint32_t xs_abtc_2_delrec;
__uint32_t xs_abtc_2_newroot;
__uint32_t xs_abtc_2_killroot;
__uint32_t xs_abtc_2_increment;
__uint32_t xs_abtc_2_decrement;
__uint32_t xs_abtc_2_lshift;
__uint32_t xs_abtc_2_rshift;
__uint32_t xs_abtc_2_split;
__uint32_t xs_abtc_2_join;
__uint32_t xs_abtc_2_alloc;
__uint32_t xs_abtc_2_free;
__uint32_t xs_abtc_2_moves;
#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15)
__uint32_t xs_bmbt_2_lookup;
__uint32_t xs_bmbt_2_compare;
__uint32_t xs_bmbt_2_insrec;
__uint32_t xs_bmbt_2_delrec;
__uint32_t xs_bmbt_2_newroot;
__uint32_t xs_bmbt_2_killroot;
__uint32_t xs_bmbt_2_increment;
__uint32_t xs_bmbt_2_decrement;
__uint32_t xs_bmbt_2_lshift;
__uint32_t xs_bmbt_2_rshift;
__uint32_t xs_bmbt_2_split;
__uint32_t xs_bmbt_2_join;
__uint32_t xs_bmbt_2_alloc;
__uint32_t xs_bmbt_2_free;
__uint32_t xs_bmbt_2_moves;
#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15)
__uint32_t xs_ibt_2_lookup;
__uint32_t xs_ibt_2_compare;
__uint32_t xs_ibt_2_insrec;
__uint32_t xs_ibt_2_delrec;
__uint32_t xs_ibt_2_newroot;
__uint32_t xs_ibt_2_killroot;
__uint32_t xs_ibt_2_increment;
__uint32_t xs_ibt_2_decrement;
__uint32_t xs_ibt_2_lshift;
__uint32_t xs_ibt_2_rshift;
__uint32_t xs_ibt_2_split;
__uint32_t xs_ibt_2_join;
__uint32_t xs_ibt_2_alloc;
__uint32_t xs_ibt_2_free;
__uint32_t xs_ibt_2_moves;
#define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2+15)
__uint32_t xs_fibt_2_lookup;
__uint32_t xs_fibt_2_compare;
__uint32_t xs_fibt_2_insrec;
__uint32_t xs_fibt_2_delrec;
__uint32_t xs_fibt_2_newroot;
__uint32_t xs_fibt_2_killroot;
__uint32_t xs_fibt_2_increment;
__uint32_t xs_fibt_2_decrement;
__uint32_t xs_fibt_2_lshift;
__uint32_t xs_fibt_2_rshift;
__uint32_t xs_fibt_2_split;
__uint32_t xs_fibt_2_join;
__uint32_t xs_fibt_2_alloc;
__uint32_t xs_fibt_2_free;
__uint32_t xs_fibt_2_moves;
#define XFSSTAT_END_RMAP_V2 (XFSSTAT_END_FIBT_V2+15)
__uint32_t xs_rmap_2_lookup;
__uint32_t xs_rmap_2_compare;
__uint32_t xs_rmap_2_insrec;
__uint32_t xs_rmap_2_delrec;
__uint32_t xs_rmap_2_newroot;
__uint32_t xs_rmap_2_killroot;
__uint32_t xs_rmap_2_increment;
__uint32_t xs_rmap_2_decrement;
__uint32_t xs_rmap_2_lshift;
__uint32_t xs_rmap_2_rshift;
__uint32_t xs_rmap_2_split;
__uint32_t xs_rmap_2_join;
__uint32_t xs_rmap_2_alloc;
__uint32_t xs_rmap_2_free;
__uint32_t xs_rmap_2_moves;
#define XFSSTAT_END_REFCOUNT (XFSSTAT_END_RMAP_V2 + 15)
__uint32_t xs_refcbt_2_lookup;
__uint32_t xs_refcbt_2_compare;
__uint32_t xs_refcbt_2_insrec;
__uint32_t xs_refcbt_2_delrec;
__uint32_t xs_refcbt_2_newroot;
__uint32_t xs_refcbt_2_killroot;
__uint32_t xs_refcbt_2_increment;
__uint32_t xs_refcbt_2_decrement;
__uint32_t xs_refcbt_2_lshift;
__uint32_t xs_refcbt_2_rshift;
__uint32_t xs_refcbt_2_split;
__uint32_t xs_refcbt_2_join;
__uint32_t xs_refcbt_2_alloc;
__uint32_t xs_refcbt_2_free;
__uint32_t xs_refcbt_2_moves;
#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF + __XBTS_MAX)
__uint32_t xs_abtb_2[__XBTS_MAX];
#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2 + __XBTS_MAX)
__uint32_t xs_abtc_2[__XBTS_MAX];
#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2 + __XBTS_MAX)
__uint32_t xs_bmbt_2[__XBTS_MAX];
#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2 + __XBTS_MAX)
__uint32_t xs_ibt_2[__XBTS_MAX];
#define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2 + __XBTS_MAX)
__uint32_t xs_fibt_2[__XBTS_MAX];
#define XFSSTAT_END_RMAP_V2 (XFSSTAT_END_FIBT_V2 + __XBTS_MAX)
__uint32_t xs_rmap_2[__XBTS_MAX];
#define XFSSTAT_END_REFCOUNT (XFSSTAT_END_RMAP_V2 + __XBTS_MAX)
__uint32_t xs_refcbt_2[__XBTS_MAX];
#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_REFCOUNT + 6)
__uint32_t xs_qm_dqreclaims;
__uint32_t xs_qm_dqreclaim_misses;
@ -245,26 +175,58 @@ struct xfsstats {
__uint64_t xs_read_bytes;
};
struct xfsstats {
union {
struct __xfsstats s;
uint32_t a[XFSSTAT_END_XQMSTAT];
};
};
/*
* simple wrapper for getting the array index of s struct member offset
*/
#define XFS_STATS_CALC_INDEX(member) \
(offsetof(struct __xfsstats, member) / (int)sizeof(__uint32_t))
int xfs_stats_format(struct xfsstats __percpu *stats, char *buf);
void xfs_stats_clearall(struct xfsstats __percpu *stats);
extern struct xstats xfsstats;
#define XFS_STATS_INC(mp, v) \
do { \
per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v++; \
per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v++; \
per_cpu_ptr(xfsstats.xs_stats, current_cpu())->s.v++; \
per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->s.v++; \
} while (0)
#define XFS_STATS_DEC(mp, v) \
do { \
per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v--; \
per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v--; \
per_cpu_ptr(xfsstats.xs_stats, current_cpu())->s.v--; \
per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->s.v--; \
} while (0)
#define XFS_STATS_ADD(mp, v, inc) \
do { \
per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v += (inc); \
per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v += (inc); \
per_cpu_ptr(xfsstats.xs_stats, current_cpu())->s.v += (inc); \
per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->s.v += (inc); \
} while (0)
#define XFS_STATS_INC_OFF(mp, off) \
do { \
per_cpu_ptr(xfsstats.xs_stats, current_cpu())->a[off]++; \
per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->a[off]++; \
} while (0)
#define XFS_STATS_DEC_OFF(mp, off) \
do { \
per_cpu_ptr(xfsstats.xs_stats, current_cpu())->a[off]; \
per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->a[off]; \
} while (0)
#define XFS_STATS_ADD_OFF(mp, off, inc) \
do { \
per_cpu_ptr(xfsstats.xs_stats, current_cpu())->a[off] += (inc); \
per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->a[off] += (inc); \
} while (0)
#if defined(CONFIG_PROC_FS)

View File

@ -104,9 +104,6 @@ static const match_table_t tokens = {
{Opt_sysvgroups,"sysvgroups"}, /* group-ID from current process */
{Opt_allocsize, "allocsize=%s"},/* preferred allocation size */
{Opt_norecovery,"norecovery"}, /* don't run XFS recovery */
{Opt_barrier, "barrier"}, /* use writer barriers for log write and
* unwritten extent conversion */
{Opt_nobarrier, "nobarrier"}, /* .. disable */
{Opt_inode64, "inode64"}, /* inodes can be allocated anywhere */
{Opt_inode32, "inode32"}, /* inode allocation limited to
* XFS_MAXINUMBER_32 */
@ -134,6 +131,12 @@ static const match_table_t tokens = {
{Opt_nodiscard, "nodiscard"}, /* Do not discard unused blocks */
{Opt_dax, "dax"}, /* Enable direct access to bdev pages */
/* Deprecated mount options scheduled for removal */
{Opt_barrier, "barrier"}, /* use writer barriers for log write and
* unwritten extent conversion */
{Opt_nobarrier, "nobarrier"}, /* .. disable */
{Opt_err, NULL},
};
@ -301,12 +304,6 @@ xfs_parseargs(
case Opt_nouuid:
mp->m_flags |= XFS_MOUNT_NOUUID;
break;
case Opt_barrier:
mp->m_flags |= XFS_MOUNT_BARRIER;
break;
case Opt_nobarrier:
mp->m_flags &= ~XFS_MOUNT_BARRIER;
break;
case Opt_ikeep:
mp->m_flags |= XFS_MOUNT_IKEEP;
break;
@ -374,6 +371,14 @@ xfs_parseargs(
mp->m_flags |= XFS_MOUNT_DAX;
break;
#endif
case Opt_barrier:
xfs_warn(mp, "%s option is deprecated, ignoring.", p);
mp->m_flags |= XFS_MOUNT_BARRIER;
break;
case Opt_nobarrier:
xfs_warn(mp, "%s option is deprecated, ignoring.", p);
mp->m_flags &= ~XFS_MOUNT_BARRIER;
break;
default:
xfs_warn(mp, "unknown mount option [%s].", p);
return -EINVAL;
@ -943,7 +948,7 @@ xfs_fs_destroy_inode(
trace_xfs_destroy_inode(ip);
ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
ASSERT(!rwsem_is_locked(&inode->i_rwsem));
XFS_STATS_INC(ip->i_mount, vn_rele);
XFS_STATS_INC(ip->i_mount, vn_remove);
@ -1238,9 +1243,11 @@ xfs_fs_remount(
token = match_token(p, tokens, args);
switch (token) {
case Opt_barrier:
xfs_warn(mp, "%s option is deprecated, ignoring.", p);
mp->m_flags |= XFS_MOUNT_BARRIER;
break;
case Opt_nobarrier:
xfs_warn(mp, "%s option is deprecated, ignoring.", p);
mp->m_flags &= ~XFS_MOUNT_BARRIER;
break;
case Opt_inode64:

View File

@ -238,8 +238,7 @@ xfs_symlink(
if (error)
goto out_release_inode;
xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
unlock_dp_on_error = true;
/*
@ -287,7 +286,7 @@ xfs_symlink(
* the transaction cancel unlocking dp so don't do it explicitly in the
* error path.
*/
xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
unlock_dp_on_error = false;
/*
@ -412,7 +411,7 @@ xfs_symlink(
xfs_qm_dqrele(pdqp);
if (unlock_dp_on_error)
xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
}

View File

@ -355,7 +355,6 @@ DEFINE_BUF_EVENT(xfs_buf_rele);
DEFINE_BUF_EVENT(xfs_buf_iodone);
DEFINE_BUF_EVENT(xfs_buf_submit);
DEFINE_BUF_EVENT(xfs_buf_submit_wait);
DEFINE_BUF_EVENT(xfs_buf_bawrite);
DEFINE_BUF_EVENT(xfs_buf_lock);
DEFINE_BUF_EVENT(xfs_buf_lock_done);
DEFINE_BUF_EVENT(xfs_buf_trylock_fail);
@ -367,19 +366,15 @@ DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
DEFINE_BUF_EVENT(xfs_buf_delwri_queued);
DEFINE_BUF_EVENT(xfs_buf_delwri_split);
DEFINE_BUF_EVENT(xfs_buf_get_uncached);
DEFINE_BUF_EVENT(xfs_bdstrat_shut);
DEFINE_BUF_EVENT(xfs_buf_item_relse);
DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
DEFINE_BUF_EVENT(xfs_buf_error_relse);
DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
/* not really buffer traces, but the buf provides useful information */
DEFINE_BUF_EVENT(xfs_btree_corrupt);
DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
DEFINE_BUF_EVENT(xfs_reset_dqcounts);
DEFINE_BUF_EVENT(xfs_inode_item_push);
/* pass flags explicitly */
DECLARE_EVENT_CLASS(xfs_buf_flags_class,
@ -541,7 +536,6 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered);
DECLARE_EVENT_CLASS(xfs_filestream_class,
TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno),
@ -680,7 +674,6 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr);
DEFINE_INODE_EVENT(xfs_dir_fsync);
DEFINE_INODE_EVENT(xfs_file_fsync);
DEFINE_INODE_EVENT(xfs_destroy_inode);
DEFINE_INODE_EVENT(xfs_evict_inode);
DEFINE_INODE_EVENT(xfs_update_time);
DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
@ -798,7 +791,6 @@ TRACE_EVENT(xfs_irec_merge_post,
DEFINE_EVENT(xfs_iref_class, name, \
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
TP_ARGS(ip, caller_ip))
DEFINE_IREF_EVENT(xfs_ihold);
DEFINE_IREF_EVENT(xfs_irele);
DEFINE_IREF_EVENT(xfs_inode_pin);
DEFINE_IREF_EVENT(xfs_inode_unpin);
@ -939,7 +931,6 @@ DEFINE_DQUOT_EVENT(xfs_dqget_miss);
DEFINE_DQUOT_EVENT(xfs_dqget_freeing);
DEFINE_DQUOT_EVENT(xfs_dqget_dup);
DEFINE_DQUOT_EVENT(xfs_dqput);
DEFINE_DQUOT_EVENT(xfs_dqput_wait);
DEFINE_DQUOT_EVENT(xfs_dqput_free);
DEFINE_DQUOT_EVENT(xfs_dqrele);
DEFINE_DQUOT_EVENT(xfs_dqflush);
@ -1815,7 +1806,6 @@ DEFINE_ATTR_EVENT(xfs_attr_sf_addname);
DEFINE_ATTR_EVENT(xfs_attr_sf_create);
DEFINE_ATTR_EVENT(xfs_attr_sf_lookup);
DEFINE_ATTR_EVENT(xfs_attr_sf_remove);
DEFINE_ATTR_EVENT(xfs_attr_sf_removename);
DEFINE_ATTR_EVENT(xfs_attr_sf_to_leaf);
DEFINE_ATTR_EVENT(xfs_attr_leaf_add);
@ -1844,7 +1834,6 @@ DEFINE_ATTR_EVENT(xfs_attr_leaf_toosmall);
DEFINE_ATTR_EVENT(xfs_attr_node_addname);
DEFINE_ATTR_EVENT(xfs_attr_node_get);
DEFINE_ATTR_EVENT(xfs_attr_node_lookup);
DEFINE_ATTR_EVENT(xfs_attr_node_replace);
DEFINE_ATTR_EVENT(xfs_attr_node_removename);
@ -2440,11 +2429,9 @@ DEFINE_DEFER_EVENT(xfs_defer_finish_done);
DEFINE_DEFER_ERROR_EVENT(xfs_defer_trans_roll_error);
DEFINE_DEFER_ERROR_EVENT(xfs_defer_finish_error);
DEFINE_DEFER_ERROR_EVENT(xfs_defer_op_finish_error);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_work);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_cancel);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_commit);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_cancel);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort);
@ -3092,87 +3079,6 @@ DEFINE_EVENT(xfs_double_io_class, name, \
struct xfs_inode *dest, xfs_off_t doffset), \
TP_ARGS(src, soffset, len, dest, doffset))
/* two-file vfs io tracepoint class */
DECLARE_EVENT_CLASS(xfs_double_vfs_io_class,
TP_PROTO(struct inode *src, u64 soffset, u64 len,
struct inode *dest, u64 doffset),
TP_ARGS(src, soffset, len, dest, doffset),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned long, src_ino)
__field(loff_t, src_isize)
__field(loff_t, src_offset)
__field(size_t, len)
__field(unsigned long, dest_ino)
__field(loff_t, dest_isize)
__field(loff_t, dest_offset)
),
TP_fast_assign(
__entry->dev = src->i_sb->s_dev;
__entry->src_ino = src->i_ino;
__entry->src_isize = i_size_read(src);
__entry->src_offset = soffset;
__entry->len = len;
__entry->dest_ino = dest->i_ino;
__entry->dest_isize = i_size_read(dest);
__entry->dest_offset = doffset;
),
TP_printk("dev %d:%d count %zd "
"ino 0x%lx isize 0x%llx offset 0x%llx -> "
"ino 0x%lx isize 0x%llx offset 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->len,
__entry->src_ino,
__entry->src_isize,
__entry->src_offset,
__entry->dest_ino,
__entry->dest_isize,
__entry->dest_offset)
)
#define DEFINE_DOUBLE_VFS_IO_EVENT(name) \
DEFINE_EVENT(xfs_double_vfs_io_class, name, \
TP_PROTO(struct inode *src, u64 soffset, u64 len, \
struct inode *dest, u64 doffset), \
TP_ARGS(src, soffset, len, dest, doffset))
/* CoW write tracepoint */
DECLARE_EVENT_CLASS(xfs_copy_on_write_class,
TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk, xfs_fsblock_t pblk,
xfs_extlen_t len, xfs_fsblock_t new_pblk),
TP_ARGS(ip, lblk, pblk, len, new_pblk),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_fileoff_t, lblk)
__field(xfs_fsblock_t, pblk)
__field(xfs_extlen_t, len)
__field(xfs_fsblock_t, new_pblk)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->lblk = lblk;
__entry->pblk = pblk;
__entry->len = len;
__entry->new_pblk = new_pblk;
),
TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx pblk 0x%llx "
"len 0x%x new_pblk %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->lblk,
__entry->pblk,
__entry->len,
__entry->new_pblk)
)
#define DEFINE_COW_EVENT(name) \
DEFINE_EVENT(xfs_copy_on_write_class, name, \
TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk, xfs_fsblock_t pblk, \
xfs_extlen_t len, xfs_fsblock_t new_pblk), \
TP_ARGS(ip, lblk, pblk, len, new_pblk))
/* inode/irec events */
DECLARE_EVENT_CLASS(xfs_inode_irec_class,
TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec),
@ -3292,8 +3198,6 @@ DEFINE_DOUBLE_IO_EVENT(xfs_reflink_remap_range);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_set_inode_flag_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_update_inode_size_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_reflink_main_loop_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_read_iomap_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_blocks_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_extent_error);
@ -3302,9 +3206,6 @@ DEFINE_DOUBLE_IO_EVENT(xfs_reflink_compare_extents);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_compare_extents_error);
/* ioctl tracepoints */
DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_reflink);
DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_clone_range);
DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_file_extent_same);
TRACE_EVENT(xfs_ioctl_clone,
TP_PROTO(struct inode *src, struct inode *dest),
TP_ARGS(src, dest),
@ -3334,11 +3235,7 @@ TRACE_EVENT(xfs_ioctl_clone,
/* unshare tracepoints */
DEFINE_SIMPLE_IO_EVENT(xfs_reflink_unshare);
DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cow_eof_block);
DEFINE_PAGE_EVENT(xfs_reflink_unshare_page);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_cow_eof_block_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_dirty_page_error);
/* copy on write */
DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared);
@ -3361,14 +3258,8 @@ DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
DEFINE_COW_EVENT(xfs_reflink_fork_buf);
DEFINE_COW_EVENT(xfs_reflink_finish_fork_buf);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_fork_buf_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_finish_fork_buf_error);
DEFINE_INODE_EVENT(xfs_reflink_cancel_pending_cow);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cancel_cow);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_pending_cow_error);
/* rmap swapext tracepoints */
DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap);

View File

@ -130,7 +130,7 @@ const struct xattr_handler *xfs_xattr_handlers[] = {
NULL
};
static int
static void
__xfs_xattr_put_listent(
struct xfs_attr_list_context *context,
char *prefix,
@ -148,7 +148,7 @@ __xfs_xattr_put_listent(
if (arraytop > context->firstu) {
context->count = -1; /* insufficient space */
context->seen_enough = 1;
return 0;
return;
}
offset = (char *)context->alist + context->count;
strncpy(offset, prefix, prefix_len);
@ -159,10 +159,10 @@ __xfs_xattr_put_listent(
compute_size:
context->count += prefix_len + namelen + 1;
return 0;
return;
}
static int
static void
xfs_xattr_put_listent(
struct xfs_attr_list_context *context,
int flags,
@ -180,23 +180,19 @@ xfs_xattr_put_listent(
if (namelen == SGI_ACL_FILE_SIZE &&
strncmp(name, SGI_ACL_FILE,
SGI_ACL_FILE_SIZE) == 0) {
int ret = __xfs_xattr_put_listent(
__xfs_xattr_put_listent(
context, XATTR_SYSTEM_PREFIX,
XATTR_SYSTEM_PREFIX_LEN,
XATTR_POSIX_ACL_ACCESS,
strlen(XATTR_POSIX_ACL_ACCESS));
if (ret)
return ret;
} else if (namelen == SGI_ACL_DEFAULT_SIZE &&
strncmp(name, SGI_ACL_DEFAULT,
SGI_ACL_DEFAULT_SIZE) == 0) {
int ret = __xfs_xattr_put_listent(
__xfs_xattr_put_listent(
context, XATTR_SYSTEM_PREFIX,
XATTR_SYSTEM_PREFIX_LEN,
XATTR_POSIX_ACL_DEFAULT,
strlen(XATTR_POSIX_ACL_DEFAULT));
if (ret)
return ret;
}
#endif
@ -205,7 +201,7 @@ xfs_xattr_put_listent(
* see them.
*/
if (!capable(CAP_SYS_ADMIN))
return 0;
return;
prefix = XATTR_TRUSTED_PREFIX;
prefix_len = XATTR_TRUSTED_PREFIX_LEN;
@ -217,8 +213,9 @@ xfs_xattr_put_listent(
prefix_len = XATTR_USER_PREFIX_LEN;
}
return __xfs_xattr_put_listent(context, prefix, prefix_len, name,
namelen);
__xfs_xattr_put_listent(context, prefix, prefix_len, name,
namelen);
return;
}
ssize_t

View File

@ -50,6 +50,7 @@ struct iomap {
#define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */
#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */
#define IOMAP_FAULT (1 << 3) /* mapping for page fault */
#define IOMAP_DIRECT (1 << 4) /* direct I/O */
struct iomap_ops {
/*
@ -83,4 +84,14 @@ int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
loff_t start, loff_t len, struct iomap_ops *ops);
/*
* Flags for direct I/O ->end_io:
*/
#define IOMAP_DIO_UNWRITTEN (1 << 0) /* covers unwritten extent(s) */
#define IOMAP_DIO_COW (1 << 1) /* covers COW extent(s) */
typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret,
unsigned flags);
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
struct iomap_ops *ops, iomap_dio_end_io_t end_io);
#endif /* LINUX_IOMAP_H */

View File

@ -338,9 +338,18 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
extern void lock_release(struct lockdep_map *lock, int nested,
unsigned long ip);
#define lockdep_is_held(lock) lock_is_held(&(lock)->dep_map)
/*
* Same "read" as for lock_acquire(), except -1 means any.
*/
extern int lock_is_held_type(struct lockdep_map *lock, int read);
extern int lock_is_held(struct lockdep_map *lock);
static inline int lock_is_held(struct lockdep_map *lock)
{
return lock_is_held_type(lock, -1);
}
#define lockdep_is_held(lock) lock_is_held(&(lock)->dep_map)
#define lockdep_is_held_type(lock, r) lock_is_held_type(&(lock)->dep_map, (r))
extern void lock_set_class(struct lockdep_map *lock, const char *name,
struct lock_class_key *key, unsigned int subclass,
@ -372,6 +381,14 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
WARN_ON(debug_locks && !lockdep_is_held(l)); \
} while (0)
#define lockdep_assert_held_exclusive(l) do { \
WARN_ON(debug_locks && !lockdep_is_held_type(l, 0)); \
} while (0)
#define lockdep_assert_held_read(l) do { \
WARN_ON(debug_locks && !lockdep_is_held_type(l, 1)); \
} while (0)
#define lockdep_assert_held_once(l) do { \
WARN_ON_ONCE(debug_locks && !lockdep_is_held(l)); \
} while (0)
@ -428,7 +445,11 @@ struct lock_class_key { };
#define lockdep_depth(tsk) (0)
#define lockdep_is_held_type(l, r) (1)
#define lockdep_assert_held(l) do { (void)(l); } while (0)
#define lockdep_assert_held_exclusive(l) do { (void)(l); } while (0)
#define lockdep_assert_held_read(l) do { (void)(l); } while (0)
#define lockdep_assert_held_once(l) do { (void)(l); } while (0)
#define lockdep_recursing(tsk) (0)

View File

@ -3191,7 +3191,7 @@ print_lock_nested_lock_not_held(struct task_struct *curr,
return 0;
}
static int __lock_is_held(struct lockdep_map *lock);
static int __lock_is_held(struct lockdep_map *lock, int read);
/*
* This gets called for every mutex_lock*()/spin_lock*() operation.
@ -3332,7 +3332,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
}
chain_key = iterate_chain_key(chain_key, class_idx);
if (nest_lock && !__lock_is_held(nest_lock))
if (nest_lock && !__lock_is_held(nest_lock, -1))
return print_lock_nested_lock_not_held(curr, hlock, ip);
if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
@ -3579,7 +3579,7 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
return 1;
}
static int __lock_is_held(struct lockdep_map *lock)
static int __lock_is_held(struct lockdep_map *lock, int read)
{
struct task_struct *curr = current;
int i;
@ -3587,8 +3587,12 @@ static int __lock_is_held(struct lockdep_map *lock)
for (i = 0; i < curr->lockdep_depth; i++) {
struct held_lock *hlock = curr->held_locks + i;
if (match_held_lock(hlock, lock))
return 1;
if (match_held_lock(hlock, lock)) {
if (read == -1 || hlock->read == read)
return 1;
return 0;
}
}
return 0;
@ -3772,7 +3776,7 @@ void lock_release(struct lockdep_map *lock, int nested,
}
EXPORT_SYMBOL_GPL(lock_release);
int lock_is_held(struct lockdep_map *lock)
int lock_is_held_type(struct lockdep_map *lock, int read)
{
unsigned long flags;
int ret = 0;
@ -3784,13 +3788,13 @@ int lock_is_held(struct lockdep_map *lock)
check_flags(flags);
current->lockdep_recursion = 1;
ret = __lock_is_held(lock);
ret = __lock_is_held(lock, read);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
return ret;
}
EXPORT_SYMBOL_GPL(lock_is_held);
EXPORT_SYMBOL_GPL(lock_is_held_type);
struct pin_cookie lock_pin_lock(struct lockdep_map *lock)
{