mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-12 16:11:04 +00:00
298f7bec50
When we dirty an inode, we are going to have to write it disk at some point in the near future. This requires the inode cluster backing buffer to be present in memory. Unfortunately, under severe memory pressure we can reclaim the inode backing buffer while the inode is dirty in memory, resulting in stalling the AIL pushing because it has to do a read-modify-write cycle on the cluster buffer. When we have no memory available, the read of the cluster buffer blocks the AIL pushing process, and this causes all sorts of issues for memory reclaim as it requires inode writeback to make forwards progress. Allocating a cluster buffer causes more memory pressure, and results in more cluster buffers to be reclaimed, resulting in more RMW cycles to be done in the AIL context and everything then backs up on AIL progress. Only the synchronous inode cluster writeback in the the inode reclaim code provides some level of forwards progress guarantees that prevent OOM-killer rampages in this situation. Fix this by pinning the inode backing buffer to the inode log item when the inode is first dirtied (i.e. in xfs_trans_log_inode()). This may mean the first modification of an inode that has been held in cache for a long time may block on a cluster buffer read, but we can do that in transaction context and block safely until the buffer has been allocated and read. Once we have the cluster buffer, the inode log item takes a reference to it, pinning it in memory, and attaches it to the log item for future reference. This means we can always grab the cluster buffer from the inode log item when we need it. When the inode is finally cleaned and removed from the AIL, we can drop the reference the inode log item holds on the cluster buffer. Once all inodes on the cluster buffer are clean, the cluster buffer will be unpinned and it will be available for memory reclaim to reclaim again. This avoids the issues with needing to do RMW cycles in the AIL pushing context, and hence allows complete non-blocking inode flushing to be performed by the AIL pushing context. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
1224 lines
32 KiB
C
1224 lines
32 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
|
|
* All Rights Reserved.
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_bit.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_trans_priv.h"
|
|
#include "xfs_buf_item.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_inode_item.h"
|
|
#include "xfs_quota.h"
|
|
#include "xfs_dquot_item.h"
|
|
#include "xfs_dquot.h"
|
|
#include "xfs_trans_priv.h"
|
|
#include "xfs_trace.h"
|
|
#include "xfs_log.h"
|
|
|
|
|
|
kmem_zone_t *xfs_buf_item_zone;
|
|
|
|
static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
|
|
{
|
|
return container_of(lip, struct xfs_buf_log_item, bli_item);
|
|
}
|
|
|
|
static void xfs_buf_item_done(struct xfs_buf *bp);
|
|
|
|
/* Is this log iovec plausibly large enough to contain the buffer log format? */
|
|
bool
|
|
xfs_buf_log_check_iovec(
|
|
struct xfs_log_iovec *iovec)
|
|
{
|
|
struct xfs_buf_log_format *blfp = iovec->i_addr;
|
|
char *bmp_end;
|
|
char *item_end;
|
|
|
|
if (offsetof(struct xfs_buf_log_format, blf_data_map) > iovec->i_len)
|
|
return false;
|
|
|
|
item_end = (char *)iovec->i_addr + iovec->i_len;
|
|
bmp_end = (char *)&blfp->blf_data_map[blfp->blf_map_size];
|
|
return bmp_end <= item_end;
|
|
}
|
|
|
|
static inline int
|
|
xfs_buf_log_format_size(
|
|
struct xfs_buf_log_format *blfp)
|
|
{
|
|
return offsetof(struct xfs_buf_log_format, blf_data_map) +
|
|
(blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
|
|
}
|
|
|
|
/*
|
|
* This returns the number of log iovecs needed to log the
|
|
* given buf log item.
|
|
*
|
|
* It calculates this as 1 iovec for the buf log format structure
|
|
* and 1 for each stretch of non-contiguous chunks to be logged.
|
|
* Contiguous chunks are logged in a single iovec.
|
|
*
|
|
* If the XFS_BLI_STALE flag has been set, then log nothing.
|
|
*/
|
|
STATIC void
|
|
xfs_buf_item_size_segment(
|
|
struct xfs_buf_log_item *bip,
|
|
struct xfs_buf_log_format *blfp,
|
|
int *nvecs,
|
|
int *nbytes)
|
|
{
|
|
struct xfs_buf *bp = bip->bli_buf;
|
|
int next_bit;
|
|
int last_bit;
|
|
|
|
last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
|
|
if (last_bit == -1)
|
|
return;
|
|
|
|
/*
|
|
* initial count for a dirty buffer is 2 vectors - the format structure
|
|
* and the first dirty region.
|
|
*/
|
|
*nvecs += 2;
|
|
*nbytes += xfs_buf_log_format_size(blfp) + XFS_BLF_CHUNK;
|
|
|
|
while (last_bit != -1) {
|
|
/*
|
|
* This takes the bit number to start looking from and
|
|
* returns the next set bit from there. It returns -1
|
|
* if there are no more bits set or the start bit is
|
|
* beyond the end of the bitmap.
|
|
*/
|
|
next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
|
|
last_bit + 1);
|
|
/*
|
|
* If we run out of bits, leave the loop,
|
|
* else if we find a new set of bits bump the number of vecs,
|
|
* else keep scanning the current set of bits.
|
|
*/
|
|
if (next_bit == -1) {
|
|
break;
|
|
} else if (next_bit != last_bit + 1) {
|
|
last_bit = next_bit;
|
|
(*nvecs)++;
|
|
} else if (xfs_buf_offset(bp, next_bit * XFS_BLF_CHUNK) !=
|
|
(xfs_buf_offset(bp, last_bit * XFS_BLF_CHUNK) +
|
|
XFS_BLF_CHUNK)) {
|
|
last_bit = next_bit;
|
|
(*nvecs)++;
|
|
} else {
|
|
last_bit++;
|
|
}
|
|
*nbytes += XFS_BLF_CHUNK;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* This returns the number of log iovecs needed to log the given buf log item.
|
|
*
|
|
* It calculates this as 1 iovec for the buf log format structure and 1 for each
|
|
* stretch of non-contiguous chunks to be logged. Contiguous chunks are logged
|
|
* in a single iovec.
|
|
*
|
|
* Discontiguous buffers need a format structure per region that that is being
|
|
* logged. This makes the changes in the buffer appear to log recovery as though
|
|
* they came from separate buffers, just like would occur if multiple buffers
|
|
* were used instead of a single discontiguous buffer. This enables
|
|
* discontiguous buffers to be in-memory constructs, completely transparent to
|
|
* what ends up on disk.
|
|
*
|
|
* If the XFS_BLI_STALE flag has been set, then log nothing but the buf log
|
|
* format structures.
|
|
*/
|
|
STATIC void
|
|
xfs_buf_item_size(
|
|
struct xfs_log_item *lip,
|
|
int *nvecs,
|
|
int *nbytes)
|
|
{
|
|
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
|
|
int i;
|
|
|
|
ASSERT(atomic_read(&bip->bli_refcount) > 0);
|
|
if (bip->bli_flags & XFS_BLI_STALE) {
|
|
/*
|
|
* The buffer is stale, so all we need to log
|
|
* is the buf log format structure with the
|
|
* cancel flag in it.
|
|
*/
|
|
trace_xfs_buf_item_size_stale(bip);
|
|
ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
|
|
*nvecs += bip->bli_format_count;
|
|
for (i = 0; i < bip->bli_format_count; i++) {
|
|
*nbytes += xfs_buf_log_format_size(&bip->bli_formats[i]);
|
|
}
|
|
return;
|
|
}
|
|
|
|
ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
|
|
|
|
if (bip->bli_flags & XFS_BLI_ORDERED) {
|
|
/*
|
|
* The buffer has been logged just to order it.
|
|
* It is not being included in the transaction
|
|
* commit, so no vectors are used at all.
|
|
*/
|
|
trace_xfs_buf_item_size_ordered(bip);
|
|
*nvecs = XFS_LOG_VEC_ORDERED;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* the vector count is based on the number of buffer vectors we have
|
|
* dirty bits in. This will only be greater than one when we have a
|
|
* compound buffer with more than one segment dirty. Hence for compound
|
|
* buffers we need to track which segment the dirty bits correspond to,
|
|
* and when we move from one segment to the next increment the vector
|
|
* count for the extra buf log format structure that will need to be
|
|
* written.
|
|
*/
|
|
for (i = 0; i < bip->bli_format_count; i++) {
|
|
xfs_buf_item_size_segment(bip, &bip->bli_formats[i],
|
|
nvecs, nbytes);
|
|
}
|
|
trace_xfs_buf_item_size(bip);
|
|
}
|
|
|
|
static inline void
|
|
xfs_buf_item_copy_iovec(
|
|
struct xfs_log_vec *lv,
|
|
struct xfs_log_iovec **vecp,
|
|
struct xfs_buf *bp,
|
|
uint offset,
|
|
int first_bit,
|
|
uint nbits)
|
|
{
|
|
offset += first_bit * XFS_BLF_CHUNK;
|
|
xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BCHUNK,
|
|
xfs_buf_offset(bp, offset),
|
|
nbits * XFS_BLF_CHUNK);
|
|
}
|
|
|
|
static inline bool
|
|
xfs_buf_item_straddle(
|
|
struct xfs_buf *bp,
|
|
uint offset,
|
|
int next_bit,
|
|
int last_bit)
|
|
{
|
|
return xfs_buf_offset(bp, offset + (next_bit << XFS_BLF_SHIFT)) !=
|
|
(xfs_buf_offset(bp, offset + (last_bit << XFS_BLF_SHIFT)) +
|
|
XFS_BLF_CHUNK);
|
|
}
|
|
|
|
static void
|
|
xfs_buf_item_format_segment(
|
|
struct xfs_buf_log_item *bip,
|
|
struct xfs_log_vec *lv,
|
|
struct xfs_log_iovec **vecp,
|
|
uint offset,
|
|
struct xfs_buf_log_format *blfp)
|
|
{
|
|
struct xfs_buf *bp = bip->bli_buf;
|
|
uint base_size;
|
|
int first_bit;
|
|
int last_bit;
|
|
int next_bit;
|
|
uint nbits;
|
|
|
|
/* copy the flags across from the base format item */
|
|
blfp->blf_flags = bip->__bli_format.blf_flags;
|
|
|
|
/*
|
|
* Base size is the actual size of the ondisk structure - it reflects
|
|
* the actual size of the dirty bitmap rather than the size of the in
|
|
* memory structure.
|
|
*/
|
|
base_size = xfs_buf_log_format_size(blfp);
|
|
|
|
first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
|
|
if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) {
|
|
/*
|
|
* If the map is not be dirty in the transaction, mark
|
|
* the size as zero and do not advance the vector pointer.
|
|
*/
|
|
return;
|
|
}
|
|
|
|
blfp = xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BFORMAT, blfp, base_size);
|
|
blfp->blf_size = 1;
|
|
|
|
if (bip->bli_flags & XFS_BLI_STALE) {
|
|
/*
|
|
* The buffer is stale, so all we need to log
|
|
* is the buf log format structure with the
|
|
* cancel flag in it.
|
|
*/
|
|
trace_xfs_buf_item_format_stale(bip);
|
|
ASSERT(blfp->blf_flags & XFS_BLF_CANCEL);
|
|
return;
|
|
}
|
|
|
|
|
|
/*
|
|
* Fill in an iovec for each set of contiguous chunks.
|
|
*/
|
|
last_bit = first_bit;
|
|
nbits = 1;
|
|
for (;;) {
|
|
/*
|
|
* This takes the bit number to start looking from and
|
|
* returns the next set bit from there. It returns -1
|
|
* if there are no more bits set or the start bit is
|
|
* beyond the end of the bitmap.
|
|
*/
|
|
next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
|
|
(uint)last_bit + 1);
|
|
/*
|
|
* If we run out of bits fill in the last iovec and get out of
|
|
* the loop. Else if we start a new set of bits then fill in
|
|
* the iovec for the series we were looking at and start
|
|
* counting the bits in the new one. Else we're still in the
|
|
* same set of bits so just keep counting and scanning.
|
|
*/
|
|
if (next_bit == -1) {
|
|
xfs_buf_item_copy_iovec(lv, vecp, bp, offset,
|
|
first_bit, nbits);
|
|
blfp->blf_size++;
|
|
break;
|
|
} else if (next_bit != last_bit + 1 ||
|
|
xfs_buf_item_straddle(bp, offset, next_bit, last_bit)) {
|
|
xfs_buf_item_copy_iovec(lv, vecp, bp, offset,
|
|
first_bit, nbits);
|
|
blfp->blf_size++;
|
|
first_bit = next_bit;
|
|
last_bit = next_bit;
|
|
nbits = 1;
|
|
} else {
|
|
last_bit++;
|
|
nbits++;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* This is called to fill in the vector of log iovecs for the
|
|
* given log buf item. It fills the first entry with a buf log
|
|
* format structure, and the rest point to contiguous chunks
|
|
* within the buffer.
|
|
*/
|
|
STATIC void
|
|
xfs_buf_item_format(
|
|
struct xfs_log_item *lip,
|
|
struct xfs_log_vec *lv)
|
|
{
|
|
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
|
|
struct xfs_buf *bp = bip->bli_buf;
|
|
struct xfs_log_iovec *vecp = NULL;
|
|
uint offset = 0;
|
|
int i;
|
|
|
|
ASSERT(atomic_read(&bip->bli_refcount) > 0);
|
|
ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
|
|
(bip->bli_flags & XFS_BLI_STALE));
|
|
ASSERT((bip->bli_flags & XFS_BLI_STALE) ||
|
|
(xfs_blft_from_flags(&bip->__bli_format) > XFS_BLFT_UNKNOWN_BUF
|
|
&& xfs_blft_from_flags(&bip->__bli_format) < XFS_BLFT_MAX_BUF));
|
|
ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED) ||
|
|
(bip->bli_flags & XFS_BLI_STALE));
|
|
|
|
|
|
/*
|
|
* If it is an inode buffer, transfer the in-memory state to the
|
|
* format flags and clear the in-memory state.
|
|
*
|
|
* For buffer based inode allocation, we do not transfer
|
|
* this state if the inode buffer allocation has not yet been committed
|
|
* to the log as setting the XFS_BLI_INODE_BUF flag will prevent
|
|
* correct replay of the inode allocation.
|
|
*
|
|
* For icreate item based inode allocation, the buffers aren't written
|
|
* to the journal during allocation, and hence we should always tag the
|
|
* buffer as an inode buffer so that the correct unlinked list replay
|
|
* occurs during recovery.
|
|
*/
|
|
if (bip->bli_flags & XFS_BLI_INODE_BUF) {
|
|
if (xfs_sb_version_has_v3inode(&lip->li_mountp->m_sb) ||
|
|
!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
|
|
xfs_log_item_in_current_chkpt(lip)))
|
|
bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF;
|
|
bip->bli_flags &= ~XFS_BLI_INODE_BUF;
|
|
}
|
|
|
|
for (i = 0; i < bip->bli_format_count; i++) {
|
|
xfs_buf_item_format_segment(bip, lv, &vecp, offset,
|
|
&bip->bli_formats[i]);
|
|
offset += BBTOB(bp->b_maps[i].bm_len);
|
|
}
|
|
|
|
/*
|
|
* Check to make sure everything is consistent.
|
|
*/
|
|
trace_xfs_buf_item_format(bip);
|
|
}
|
|
|
|
/*
|
|
* This is called to pin the buffer associated with the buf log item in memory
|
|
* so it cannot be written out.
|
|
*
|
|
* We also always take a reference to the buffer log item here so that the bli
|
|
* is held while the item is pinned in memory. This means that we can
|
|
* unconditionally drop the reference count a transaction holds when the
|
|
* transaction is completed.
|
|
*/
|
|
STATIC void
|
|
xfs_buf_item_pin(
|
|
struct xfs_log_item *lip)
|
|
{
|
|
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
|
|
|
|
ASSERT(atomic_read(&bip->bli_refcount) > 0);
|
|
ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
|
|
(bip->bli_flags & XFS_BLI_ORDERED) ||
|
|
(bip->bli_flags & XFS_BLI_STALE));
|
|
|
|
trace_xfs_buf_item_pin(bip);
|
|
|
|
atomic_inc(&bip->bli_refcount);
|
|
atomic_inc(&bip->bli_buf->b_pin_count);
|
|
}
|
|
|
|
/*
|
|
* This is called to unpin the buffer associated with the buf log
|
|
* item which was previously pinned with a call to xfs_buf_item_pin().
|
|
*
|
|
* Also drop the reference to the buf item for the current transaction.
|
|
* If the XFS_BLI_STALE flag is set and we are the last reference,
|
|
* then free up the buf log item and unlock the buffer.
|
|
*
|
|
* If the remove flag is set we are called from uncommit in the
|
|
* forced-shutdown path. If that is true and the reference count on
|
|
* the log item is going to drop to zero we need to free the item's
|
|
* descriptor in the transaction.
|
|
*/
|
|
STATIC void
|
|
xfs_buf_item_unpin(
|
|
struct xfs_log_item *lip,
|
|
int remove)
|
|
{
|
|
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
|
|
xfs_buf_t *bp = bip->bli_buf;
|
|
int stale = bip->bli_flags & XFS_BLI_STALE;
|
|
int freed;
|
|
|
|
ASSERT(bp->b_log_item == bip);
|
|
ASSERT(atomic_read(&bip->bli_refcount) > 0);
|
|
|
|
trace_xfs_buf_item_unpin(bip);
|
|
|
|
freed = atomic_dec_and_test(&bip->bli_refcount);
|
|
|
|
if (atomic_dec_and_test(&bp->b_pin_count))
|
|
wake_up_all(&bp->b_waiters);
|
|
|
|
if (freed && stale) {
|
|
ASSERT(bip->bli_flags & XFS_BLI_STALE);
|
|
ASSERT(xfs_buf_islocked(bp));
|
|
ASSERT(bp->b_flags & XBF_STALE);
|
|
ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
|
|
|
|
trace_xfs_buf_item_unpin_stale(bip);
|
|
|
|
if (remove) {
|
|
/*
|
|
* If we are in a transaction context, we have to
|
|
* remove the log item from the transaction as we are
|
|
* about to release our reference to the buffer. If we
|
|
* don't, the unlock that occurs later in
|
|
* xfs_trans_uncommit() will try to reference the
|
|
* buffer which we no longer have a hold on.
|
|
*/
|
|
if (!list_empty(&lip->li_trans))
|
|
xfs_trans_del_item(lip);
|
|
|
|
/*
|
|
* Since the transaction no longer refers to the buffer,
|
|
* the buffer should no longer refer to the transaction.
|
|
*/
|
|
bp->b_transp = NULL;
|
|
}
|
|
|
|
/*
|
|
* If we get called here because of an IO error, we may or may
|
|
* not have the item on the AIL. xfs_trans_ail_delete() will
|
|
* take care of that situation. xfs_trans_ail_delete() drops
|
|
* the AIL lock.
|
|
*/
|
|
if (bip->bli_flags & XFS_BLI_STALE_INODE) {
|
|
xfs_buf_item_done(bp);
|
|
xfs_iflush_done(bp);
|
|
} else {
|
|
xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
|
|
xfs_buf_item_relse(bp);
|
|
ASSERT(bp->b_log_item == NULL);
|
|
}
|
|
xfs_buf_relse(bp);
|
|
} else if (freed && remove) {
|
|
/*
|
|
* The buffer must be locked and held by the caller to simulate
|
|
* an async I/O failure.
|
|
*/
|
|
xfs_buf_lock(bp);
|
|
xfs_buf_hold(bp);
|
|
bp->b_flags |= XBF_ASYNC;
|
|
xfs_buf_ioend_fail(bp);
|
|
}
|
|
}
|
|
|
|
STATIC uint
|
|
xfs_buf_item_push(
|
|
struct xfs_log_item *lip,
|
|
struct list_head *buffer_list)
|
|
{
|
|
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
|
|
struct xfs_buf *bp = bip->bli_buf;
|
|
uint rval = XFS_ITEM_SUCCESS;
|
|
|
|
if (xfs_buf_ispinned(bp))
|
|
return XFS_ITEM_PINNED;
|
|
if (!xfs_buf_trylock(bp)) {
|
|
/*
|
|
* If we have just raced with a buffer being pinned and it has
|
|
* been marked stale, we could end up stalling until someone else
|
|
* issues a log force to unpin the stale buffer. Check for the
|
|
* race condition here so xfsaild recognizes the buffer is pinned
|
|
* and queues a log force to move it along.
|
|
*/
|
|
if (xfs_buf_ispinned(bp))
|
|
return XFS_ITEM_PINNED;
|
|
return XFS_ITEM_LOCKED;
|
|
}
|
|
|
|
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
|
|
|
|
trace_xfs_buf_item_push(bip);
|
|
|
|
/* has a previous flush failed due to IO errors? */
|
|
if (bp->b_flags & XBF_WRITE_FAIL) {
|
|
xfs_buf_alert_ratelimited(bp, "XFS: Failing async write",
|
|
"Failing async write on buffer block 0x%llx. Retrying async write.",
|
|
(long long)bp->b_bn);
|
|
}
|
|
|
|
if (!xfs_buf_delwri_queue(bp, buffer_list))
|
|
rval = XFS_ITEM_FLUSHING;
|
|
xfs_buf_unlock(bp);
|
|
return rval;
|
|
}
|
|
|
|
/*
|
|
* Drop the buffer log item refcount and take appropriate action. This helper
|
|
* determines whether the bli must be freed or not, since a decrement to zero
|
|
* does not necessarily mean the bli is unused.
|
|
*
|
|
* Return true if the bli is freed, false otherwise.
|
|
*/
|
|
bool
|
|
xfs_buf_item_put(
|
|
struct xfs_buf_log_item *bip)
|
|
{
|
|
struct xfs_log_item *lip = &bip->bli_item;
|
|
bool aborted;
|
|
bool dirty;
|
|
|
|
/* drop the bli ref and return if it wasn't the last one */
|
|
if (!atomic_dec_and_test(&bip->bli_refcount))
|
|
return false;
|
|
|
|
/*
|
|
* We dropped the last ref and must free the item if clean or aborted.
|
|
* If the bli is dirty and non-aborted, the buffer was clean in the
|
|
* transaction but still awaiting writeback from previous changes. In
|
|
* that case, the bli is freed on buffer writeback completion.
|
|
*/
|
|
aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
|
|
XFS_FORCED_SHUTDOWN(lip->li_mountp);
|
|
dirty = bip->bli_flags & XFS_BLI_DIRTY;
|
|
if (dirty && !aborted)
|
|
return false;
|
|
|
|
/*
|
|
* The bli is aborted or clean. An aborted item may be in the AIL
|
|
* regardless of dirty state. For example, consider an aborted
|
|
* transaction that invalidated a dirty bli and cleared the dirty
|
|
* state.
|
|
*/
|
|
if (aborted)
|
|
xfs_trans_ail_delete(lip, 0);
|
|
xfs_buf_item_relse(bip->bli_buf);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Release the buffer associated with the buf log item. If there is no dirty
|
|
* logged data associated with the buffer recorded in the buf log item, then
|
|
* free the buf log item and remove the reference to it in the buffer.
|
|
*
|
|
* This call ignores the recursion count. It is only called when the buffer
|
|
* should REALLY be unlocked, regardless of the recursion count.
|
|
*
|
|
* We unconditionally drop the transaction's reference to the log item. If the
|
|
* item was logged, then another reference was taken when it was pinned, so we
|
|
* can safely drop the transaction reference now. This also allows us to avoid
|
|
* potential races with the unpin code freeing the bli by not referencing the
|
|
* bli after we've dropped the reference count.
|
|
*
|
|
* If the XFS_BLI_HOLD flag is set in the buf log item, then free the log item
|
|
* if necessary but do not unlock the buffer. This is for support of
|
|
* xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't
|
|
* free the item.
|
|
*/
|
|
STATIC void
|
|
xfs_buf_item_release(
|
|
struct xfs_log_item *lip)
|
|
{
|
|
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
|
|
struct xfs_buf *bp = bip->bli_buf;
|
|
bool released;
|
|
bool hold = bip->bli_flags & XFS_BLI_HOLD;
|
|
bool stale = bip->bli_flags & XFS_BLI_STALE;
|
|
#if defined(DEBUG) || defined(XFS_WARN)
|
|
bool ordered = bip->bli_flags & XFS_BLI_ORDERED;
|
|
bool dirty = bip->bli_flags & XFS_BLI_DIRTY;
|
|
bool aborted = test_bit(XFS_LI_ABORTED,
|
|
&lip->li_flags);
|
|
#endif
|
|
|
|
trace_xfs_buf_item_release(bip);
|
|
|
|
/*
|
|
* The bli dirty state should match whether the blf has logged segments
|
|
* except for ordered buffers, where only the bli should be dirty.
|
|
*/
|
|
ASSERT((!ordered && dirty == xfs_buf_item_dirty_format(bip)) ||
|
|
(ordered && dirty && !xfs_buf_item_dirty_format(bip)));
|
|
ASSERT(!stale || (bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
|
|
|
|
/*
|
|
* Clear the buffer's association with this transaction and
|
|
* per-transaction state from the bli, which has been copied above.
|
|
*/
|
|
bp->b_transp = NULL;
|
|
bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
|
|
|
|
/*
|
|
* Unref the item and unlock the buffer unless held or stale. Stale
|
|
* buffers remain locked until final unpin unless the bli is freed by
|
|
* the unref call. The latter implies shutdown because buffer
|
|
* invalidation dirties the bli and transaction.
|
|
*/
|
|
released = xfs_buf_item_put(bip);
|
|
if (hold || (stale && !released))
|
|
return;
|
|
ASSERT(!stale || aborted);
|
|
xfs_buf_relse(bp);
|
|
}
|
|
|
|
STATIC void
|
|
xfs_buf_item_committing(
|
|
struct xfs_log_item *lip,
|
|
xfs_lsn_t commit_lsn)
|
|
{
|
|
return xfs_buf_item_release(lip);
|
|
}
|
|
|
|
/*
|
|
* This is called to find out where the oldest active copy of the
|
|
* buf log item in the on disk log resides now that the last log
|
|
* write of it completed at the given lsn.
|
|
* We always re-log all the dirty data in a buffer, so usually the
|
|
* latest copy in the on disk log is the only one that matters. For
|
|
* those cases we simply return the given lsn.
|
|
*
|
|
* The one exception to this is for buffers full of newly allocated
|
|
* inodes. These buffers are only relogged with the XFS_BLI_INODE_BUF
|
|
* flag set, indicating that only the di_next_unlinked fields from the
|
|
* inodes in the buffers will be replayed during recovery. If the
|
|
* original newly allocated inode images have not yet been flushed
|
|
* when the buffer is so relogged, then we need to make sure that we
|
|
* keep the old images in the 'active' portion of the log. We do this
|
|
* by returning the original lsn of that transaction here rather than
|
|
* the current one.
|
|
*/
|
|
STATIC xfs_lsn_t
|
|
xfs_buf_item_committed(
|
|
struct xfs_log_item *lip,
|
|
xfs_lsn_t lsn)
|
|
{
|
|
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
|
|
|
|
trace_xfs_buf_item_committed(bip);
|
|
|
|
if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && lip->li_lsn != 0)
|
|
return lip->li_lsn;
|
|
return lsn;
|
|
}
|
|
|
|
static const struct xfs_item_ops xfs_buf_item_ops = {
|
|
.iop_size = xfs_buf_item_size,
|
|
.iop_format = xfs_buf_item_format,
|
|
.iop_pin = xfs_buf_item_pin,
|
|
.iop_unpin = xfs_buf_item_unpin,
|
|
.iop_release = xfs_buf_item_release,
|
|
.iop_committing = xfs_buf_item_committing,
|
|
.iop_committed = xfs_buf_item_committed,
|
|
.iop_push = xfs_buf_item_push,
|
|
};
|
|
|
|
STATIC void
|
|
xfs_buf_item_get_format(
|
|
struct xfs_buf_log_item *bip,
|
|
int count)
|
|
{
|
|
ASSERT(bip->bli_formats == NULL);
|
|
bip->bli_format_count = count;
|
|
|
|
if (count == 1) {
|
|
bip->bli_formats = &bip->__bli_format;
|
|
return;
|
|
}
|
|
|
|
bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format),
|
|
0);
|
|
}
|
|
|
|
STATIC void
|
|
xfs_buf_item_free_format(
|
|
struct xfs_buf_log_item *bip)
|
|
{
|
|
if (bip->bli_formats != &bip->__bli_format) {
|
|
kmem_free(bip->bli_formats);
|
|
bip->bli_formats = NULL;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Allocate a new buf log item to go with the given buffer.
|
|
* Set the buffer's b_log_item field to point to the new
|
|
* buf log item.
|
|
*/
|
|
int
|
|
xfs_buf_item_init(
|
|
struct xfs_buf *bp,
|
|
struct xfs_mount *mp)
|
|
{
|
|
struct xfs_buf_log_item *bip = bp->b_log_item;
|
|
int chunks;
|
|
int map_size;
|
|
int i;
|
|
|
|
/*
|
|
* Check to see if there is already a buf log item for
|
|
* this buffer. If we do already have one, there is
|
|
* nothing to do here so return.
|
|
*/
|
|
ASSERT(bp->b_mount == mp);
|
|
if (bip) {
|
|
ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
|
|
ASSERT(!bp->b_transp);
|
|
ASSERT(bip->bli_buf == bp);
|
|
return 0;
|
|
}
|
|
|
|
bip = kmem_zone_zalloc(xfs_buf_item_zone, 0);
|
|
xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
|
|
bip->bli_buf = bp;
|
|
|
|
/*
|
|
* chunks is the number of XFS_BLF_CHUNK size pieces the buffer
|
|
* can be divided into. Make sure not to truncate any pieces.
|
|
* map_size is the size of the bitmap needed to describe the
|
|
* chunks of the buffer.
|
|
*
|
|
* Discontiguous buffer support follows the layout of the underlying
|
|
* buffer. This makes the implementation as simple as possible.
|
|
*/
|
|
xfs_buf_item_get_format(bip, bp->b_map_count);
|
|
|
|
for (i = 0; i < bip->bli_format_count; i++) {
|
|
chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
|
|
XFS_BLF_CHUNK);
|
|
map_size = DIV_ROUND_UP(chunks, NBWORD);
|
|
|
|
if (map_size > XFS_BLF_DATAMAP_SIZE) {
|
|
kmem_cache_free(xfs_buf_item_zone, bip);
|
|
xfs_err(mp,
|
|
"buffer item dirty bitmap (%u uints) too small to reflect %u bytes!",
|
|
map_size,
|
|
BBTOB(bp->b_maps[i].bm_len));
|
|
return -EFSCORRUPTED;
|
|
}
|
|
|
|
bip->bli_formats[i].blf_type = XFS_LI_BUF;
|
|
bip->bli_formats[i].blf_blkno = bp->b_maps[i].bm_bn;
|
|
bip->bli_formats[i].blf_len = bp->b_maps[i].bm_len;
|
|
bip->bli_formats[i].blf_map_size = map_size;
|
|
}
|
|
|
|
bp->b_log_item = bip;
|
|
xfs_buf_hold(bp);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Mark bytes first through last inclusive as dirty in the buf
|
|
* item's bitmap.
|
|
*/
|
|
static void
|
|
xfs_buf_item_log_segment(
|
|
uint first,
|
|
uint last,
|
|
uint *map)
|
|
{
|
|
uint first_bit;
|
|
uint last_bit;
|
|
uint bits_to_set;
|
|
uint bits_set;
|
|
uint word_num;
|
|
uint *wordp;
|
|
uint bit;
|
|
uint end_bit;
|
|
uint mask;
|
|
|
|
ASSERT(first < XFS_BLF_DATAMAP_SIZE * XFS_BLF_CHUNK * NBWORD);
|
|
ASSERT(last < XFS_BLF_DATAMAP_SIZE * XFS_BLF_CHUNK * NBWORD);
|
|
|
|
/*
|
|
* Convert byte offsets to bit numbers.
|
|
*/
|
|
first_bit = first >> XFS_BLF_SHIFT;
|
|
last_bit = last >> XFS_BLF_SHIFT;
|
|
|
|
/*
|
|
* Calculate the total number of bits to be set.
|
|
*/
|
|
bits_to_set = last_bit - first_bit + 1;
|
|
|
|
/*
|
|
* Get a pointer to the first word in the bitmap
|
|
* to set a bit in.
|
|
*/
|
|
word_num = first_bit >> BIT_TO_WORD_SHIFT;
|
|
wordp = &map[word_num];
|
|
|
|
/*
|
|
* Calculate the starting bit in the first word.
|
|
*/
|
|
bit = first_bit & (uint)(NBWORD - 1);
|
|
|
|
/*
|
|
* First set any bits in the first word of our range.
|
|
* If it starts at bit 0 of the word, it will be
|
|
* set below rather than here. That is what the variable
|
|
* bit tells us. The variable bits_set tracks the number
|
|
* of bits that have been set so far. End_bit is the number
|
|
* of the last bit to be set in this word plus one.
|
|
*/
|
|
if (bit) {
|
|
end_bit = min(bit + bits_to_set, (uint)NBWORD);
|
|
mask = ((1U << (end_bit - bit)) - 1) << bit;
|
|
*wordp |= mask;
|
|
wordp++;
|
|
bits_set = end_bit - bit;
|
|
} else {
|
|
bits_set = 0;
|
|
}
|
|
|
|
/*
|
|
* Now set bits a whole word at a time that are between
|
|
* first_bit and last_bit.
|
|
*/
|
|
while ((bits_to_set - bits_set) >= NBWORD) {
|
|
*wordp = 0xffffffff;
|
|
bits_set += NBWORD;
|
|
wordp++;
|
|
}
|
|
|
|
/*
|
|
* Finally, set any bits left to be set in one last partial word.
|
|
*/
|
|
end_bit = bits_to_set - bits_set;
|
|
if (end_bit) {
|
|
mask = (1U << end_bit) - 1;
|
|
*wordp |= mask;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Mark bytes first through last inclusive as dirty in the buf
|
|
* item's bitmap.
|
|
*/
|
|
void
|
|
xfs_buf_item_log(
|
|
struct xfs_buf_log_item *bip,
|
|
uint first,
|
|
uint last)
|
|
{
|
|
int i;
|
|
uint start;
|
|
uint end;
|
|
struct xfs_buf *bp = bip->bli_buf;
|
|
|
|
/*
|
|
* walk each buffer segment and mark them dirty appropriately.
|
|
*/
|
|
start = 0;
|
|
for (i = 0; i < bip->bli_format_count; i++) {
|
|
if (start > last)
|
|
break;
|
|
end = start + BBTOB(bp->b_maps[i].bm_len) - 1;
|
|
|
|
/* skip to the map that includes the first byte to log */
|
|
if (first > end) {
|
|
start += BBTOB(bp->b_maps[i].bm_len);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Trim the range to this segment and mark it in the bitmap.
|
|
* Note that we must convert buffer offsets to segment relative
|
|
* offsets (e.g., the first byte of each segment is byte 0 of
|
|
* that segment).
|
|
*/
|
|
if (first < start)
|
|
first = start;
|
|
if (end > last)
|
|
end = last;
|
|
xfs_buf_item_log_segment(first - start, end - start,
|
|
&bip->bli_formats[i].blf_data_map[0]);
|
|
|
|
start += BBTOB(bp->b_maps[i].bm_len);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Return true if the buffer has any ranges logged/dirtied by a transaction,
|
|
* false otherwise.
|
|
*/
|
|
bool
|
|
xfs_buf_item_dirty_format(
|
|
struct xfs_buf_log_item *bip)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < bip->bli_format_count; i++) {
|
|
if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
|
|
bip->bli_formats[i].blf_map_size))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
STATIC void
|
|
xfs_buf_item_free(
|
|
struct xfs_buf_log_item *bip)
|
|
{
|
|
xfs_buf_item_free_format(bip);
|
|
kmem_free(bip->bli_item.li_lv_shadow);
|
|
kmem_cache_free(xfs_buf_item_zone, bip);
|
|
}
|
|
|
|
/*
|
|
* xfs_buf_item_relse() is called when the buf log item is no longer needed.
|
|
*/
|
|
void
|
|
xfs_buf_item_relse(
|
|
xfs_buf_t *bp)
|
|
{
|
|
struct xfs_buf_log_item *bip = bp->b_log_item;
|
|
|
|
trace_xfs_buf_item_relse(bp, _RET_IP_);
|
|
ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
|
|
|
|
bp->b_log_item = NULL;
|
|
xfs_buf_rele(bp);
|
|
xfs_buf_item_free(bip);
|
|
}
|
|
|
|
/*
|
|
* Decide if we're going to retry the write after a failure, and prepare
|
|
* the buffer for retrying the write.
|
|
*/
|
|
static bool
|
|
xfs_buf_ioerror_fail_without_retry(
|
|
struct xfs_buf *bp)
|
|
{
|
|
struct xfs_mount *mp = bp->b_mount;
|
|
static ulong lasttime;
|
|
static xfs_buftarg_t *lasttarg;
|
|
|
|
/*
|
|
* If we've already decided to shutdown the filesystem because of
|
|
* I/O errors, there's no point in giving this a retry.
|
|
*/
|
|
if (XFS_FORCED_SHUTDOWN(mp))
|
|
return true;
|
|
|
|
if (bp->b_target != lasttarg ||
|
|
time_after(jiffies, (lasttime + 5*HZ))) {
|
|
lasttime = jiffies;
|
|
xfs_buf_ioerror_alert(bp, __this_address);
|
|
}
|
|
lasttarg = bp->b_target;
|
|
|
|
/* synchronous writes will have callers process the error */
|
|
if (!(bp->b_flags & XBF_ASYNC))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
static bool
|
|
xfs_buf_ioerror_retry(
|
|
struct xfs_buf *bp,
|
|
struct xfs_error_cfg *cfg)
|
|
{
|
|
if ((bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) &&
|
|
bp->b_last_error == bp->b_error)
|
|
return false;
|
|
|
|
bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
|
|
bp->b_last_error = bp->b_error;
|
|
if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
|
|
!bp->b_first_retry_time)
|
|
bp->b_first_retry_time = jiffies;
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Account for this latest trip around the retry handler, and decide if
|
|
* we've failed enough times to constitute a permanent failure.
|
|
*/
|
|
static bool
|
|
xfs_buf_ioerror_permanent(
|
|
struct xfs_buf *bp,
|
|
struct xfs_error_cfg *cfg)
|
|
{
|
|
struct xfs_mount *mp = bp->b_mount;
|
|
|
|
if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
|
|
++bp->b_retries > cfg->max_retries)
|
|
return true;
|
|
if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
|
|
time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
|
|
return true;
|
|
|
|
/* At unmount we may treat errors differently */
|
|
if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* On a sync write or shutdown we just want to stale the buffer and let the
|
|
* caller handle the error in bp->b_error appropriately.
|
|
*
|
|
* If the write was asynchronous then no one will be looking for the error. If
|
|
* this is the first failure of this type, clear the error state and write the
|
|
* buffer out again. This means we always retry an async write failure at least
|
|
* once, but we also need to set the buffer up to behave correctly now for
|
|
* repeated failures.
|
|
*
|
|
* If we get repeated async write failures, then we take action according to the
|
|
* error configuration we have been set up to use.
|
|
*
|
|
* Multi-state return value:
|
|
*
|
|
* XBF_IOERROR_FINISH: clear IO error retry state and run callback completions
|
|
* XBF_IOERROR_DONE: resubmitted immediately, do not run any completions
|
|
* XBF_IOERROR_FAIL: transient error, run failure callback completions and then
|
|
* release the buffer
|
|
*/
|
|
enum {
|
|
XBF_IOERROR_FINISH,
|
|
XBF_IOERROR_DONE,
|
|
XBF_IOERROR_FAIL,
|
|
};
|
|
|
|
static int
|
|
xfs_buf_iodone_error(
|
|
struct xfs_buf *bp)
|
|
{
|
|
struct xfs_mount *mp = bp->b_mount;
|
|
struct xfs_error_cfg *cfg;
|
|
|
|
if (xfs_buf_ioerror_fail_without_retry(bp))
|
|
goto out_stale;
|
|
|
|
trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
|
|
|
|
cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
|
|
if (xfs_buf_ioerror_retry(bp, cfg)) {
|
|
xfs_buf_ioerror(bp, 0);
|
|
xfs_buf_submit(bp);
|
|
return XBF_IOERROR_DONE;
|
|
}
|
|
|
|
/*
|
|
* Permanent error - we need to trigger a shutdown if we haven't already
|
|
* to indicate that inconsistency will result from this action.
|
|
*/
|
|
if (xfs_buf_ioerror_permanent(bp, cfg)) {
|
|
xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
|
|
goto out_stale;
|
|
}
|
|
|
|
/* Still considered a transient error. Caller will schedule retries. */
|
|
return XBF_IOERROR_FAIL;
|
|
|
|
out_stale:
|
|
xfs_buf_stale(bp);
|
|
bp->b_flags |= XBF_DONE;
|
|
trace_xfs_buf_error_relse(bp, _RET_IP_);
|
|
return XBF_IOERROR_FINISH;
|
|
}
|
|
|
|
static void
|
|
xfs_buf_item_done(
|
|
struct xfs_buf *bp)
|
|
{
|
|
struct xfs_buf_log_item *bip = bp->b_log_item;
|
|
|
|
if (!bip)
|
|
return;
|
|
|
|
/*
|
|
* If we are forcibly shutting down, this may well be off the AIL
|
|
* already. That's because we simulate the log-committed callbacks to
|
|
* unpin these buffers. Or we may never have put this item on AIL
|
|
* because of the transaction was aborted forcibly.
|
|
* xfs_trans_ail_delete() takes care of these.
|
|
*
|
|
* Either way, AIL is useless if we're forcing a shutdown.
|
|
*/
|
|
xfs_trans_ail_delete(&bip->bli_item, SHUTDOWN_CORRUPT_INCORE);
|
|
bp->b_log_item = NULL;
|
|
xfs_buf_item_free(bip);
|
|
xfs_buf_rele(bp);
|
|
}
|
|
|
|
static inline void
|
|
xfs_buf_clear_ioerror_retry_state(
|
|
struct xfs_buf *bp)
|
|
{
|
|
bp->b_last_error = 0;
|
|
bp->b_retries = 0;
|
|
bp->b_first_retry_time = 0;
|
|
}
|
|
|
|
/*
|
|
* Inode buffer iodone callback function.
|
|
*/
|
|
void
|
|
xfs_buf_inode_iodone(
|
|
struct xfs_buf *bp)
|
|
{
|
|
if (bp->b_error) {
|
|
struct xfs_log_item *lip;
|
|
int ret = xfs_buf_iodone_error(bp);
|
|
|
|
if (ret == XBF_IOERROR_FINISH)
|
|
goto finish_iodone;
|
|
if (ret == XBF_IOERROR_DONE)
|
|
return;
|
|
ASSERT(ret == XBF_IOERROR_FAIL);
|
|
list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
|
|
set_bit(XFS_LI_FAILED, &lip->li_flags);
|
|
}
|
|
xfs_buf_ioerror(bp, 0);
|
|
xfs_buf_relse(bp);
|
|
return;
|
|
}
|
|
|
|
finish_iodone:
|
|
xfs_buf_clear_ioerror_retry_state(bp);
|
|
xfs_buf_item_done(bp);
|
|
xfs_iflush_done(bp);
|
|
xfs_buf_ioend_finish(bp);
|
|
}
|
|
|
|
/*
|
|
* Dquot buffer iodone callback function.
|
|
*/
|
|
void
|
|
xfs_buf_dquot_iodone(
|
|
struct xfs_buf *bp)
|
|
{
|
|
if (bp->b_error) {
|
|
struct xfs_log_item *lip;
|
|
int ret = xfs_buf_iodone_error(bp);
|
|
|
|
if (ret == XBF_IOERROR_FINISH)
|
|
goto finish_iodone;
|
|
if (ret == XBF_IOERROR_DONE)
|
|
return;
|
|
ASSERT(ret == XBF_IOERROR_FAIL);
|
|
spin_lock(&bp->b_mount->m_ail->ail_lock);
|
|
list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
|
|
xfs_set_li_failed(lip, bp);
|
|
}
|
|
spin_unlock(&bp->b_mount->m_ail->ail_lock);
|
|
xfs_buf_ioerror(bp, 0);
|
|
xfs_buf_relse(bp);
|
|
return;
|
|
}
|
|
|
|
finish_iodone:
|
|
xfs_buf_clear_ioerror_retry_state(bp);
|
|
/* a newly allocated dquot buffer might have a log item attached */
|
|
xfs_buf_item_done(bp);
|
|
xfs_dquot_done(bp);
|
|
xfs_buf_ioend_finish(bp);
|
|
}
|
|
|
|
/*
|
|
* Dirty buffer iodone callback function.
|
|
*
|
|
* Note that for things like remote attribute buffers, there may not be a buffer
|
|
* log item here, so processing the buffer log item must remain be optional.
|
|
*/
|
|
void
|
|
xfs_buf_iodone(
|
|
struct xfs_buf *bp)
|
|
{
|
|
if (bp->b_error) {
|
|
int ret = xfs_buf_iodone_error(bp);
|
|
|
|
if (ret == XBF_IOERROR_FINISH)
|
|
goto finish_iodone;
|
|
if (ret == XBF_IOERROR_DONE)
|
|
return;
|
|
ASSERT(ret == XBF_IOERROR_FAIL);
|
|
ASSERT(list_empty(&bp->b_li_list));
|
|
xfs_buf_ioerror(bp, 0);
|
|
xfs_buf_relse(bp);
|
|
return;
|
|
}
|
|
|
|
finish_iodone:
|
|
xfs_buf_clear_ioerror_retry_state(bp);
|
|
xfs_buf_item_done(bp);
|
|
xfs_buf_ioend_finish(bp);
|
|
}
|