mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-14 09:47:20 +00:00
Merge branch 'for-2.6.34-rc1-batch2' into for-linus
This commit is contained in:
commit
9b1f56d60a
@ -105,7 +105,6 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
|
||||
xfs_globals.o \
|
||||
xfs_ioctl.o \
|
||||
xfs_iops.o \
|
||||
xfs_lrw.o \
|
||||
xfs_super.o \
|
||||
xfs_sync.o \
|
||||
xfs_xattr.o)
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include "xfs_iomap.h"
|
||||
#include "xfs_vnodeops.h"
|
||||
#include "xfs_trace.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include <linux/mpage.h>
|
||||
#include <linux/pagevec.h>
|
||||
#include <linux/writeback.h>
|
||||
@ -163,14 +164,17 @@ xfs_ioend_new_eof(
|
||||
}
|
||||
|
||||
/*
|
||||
* Update on-disk file size now that data has been written to disk.
|
||||
* The current in-memory file size is i_size. If a write is beyond
|
||||
* eof i_new_size will be the intended file size until i_size is
|
||||
* updated. If this write does not extend all the way to the valid
|
||||
* file size then restrict this update to the end of the write.
|
||||
* Update on-disk file size now that data has been written to disk. The
|
||||
* current in-memory file size is i_size. If a write is beyond eof i_new_size
|
||||
* will be the intended file size until i_size is updated. If this write does
|
||||
* not extend all the way to the valid file size then restrict this update to
|
||||
* the end of the write.
|
||||
*
|
||||
* This function does not block as blocking on the inode lock in IO completion
|
||||
* can lead to IO completion order dependency deadlocks.. If it can't get the
|
||||
* inode ilock it will return EAGAIN. Callers must handle this.
|
||||
*/
|
||||
|
||||
STATIC void
|
||||
STATIC int
|
||||
xfs_setfilesize(
|
||||
xfs_ioend_t *ioend)
|
||||
{
|
||||
@ -181,50 +185,19 @@ xfs_setfilesize(
|
||||
ASSERT(ioend->io_type != IOMAP_READ);
|
||||
|
||||
if (unlikely(ioend->io_error))
|
||||
return;
|
||||
return 0;
|
||||
|
||||
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
|
||||
return EAGAIN;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
isize = xfs_ioend_new_eof(ioend);
|
||||
if (isize) {
|
||||
ip->i_d.di_size = isize;
|
||||
xfs_mark_inode_dirty_sync(ip);
|
||||
xfs_mark_inode_dirty(ip);
|
||||
}
|
||||
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
/*
|
||||
* IO write completion.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_end_io(
|
||||
struct work_struct *work)
|
||||
{
|
||||
xfs_ioend_t *ioend =
|
||||
container_of(work, xfs_ioend_t, io_work);
|
||||
struct xfs_inode *ip = XFS_I(ioend->io_inode);
|
||||
|
||||
/*
|
||||
* For unwritten extents we need to issue transactions to convert a
|
||||
* range to normal written extens after the data I/O has finished.
|
||||
*/
|
||||
if (ioend->io_type == IOMAP_UNWRITTEN &&
|
||||
likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
|
||||
int error;
|
||||
|
||||
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
|
||||
ioend->io_size);
|
||||
if (error)
|
||||
ioend->io_error = error;
|
||||
}
|
||||
|
||||
/*
|
||||
* We might have to update the on-disk file size after extending
|
||||
* writes.
|
||||
*/
|
||||
if (ioend->io_type != IOMAP_READ)
|
||||
xfs_setfilesize(ioend);
|
||||
xfs_destroy_ioend(ioend);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -248,6 +221,53 @@ xfs_finish_ioend(
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* IO write completion.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_end_io(
|
||||
struct work_struct *work)
|
||||
{
|
||||
xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work);
|
||||
struct xfs_inode *ip = XFS_I(ioend->io_inode);
|
||||
int error = 0;
|
||||
|
||||
/*
|
||||
* For unwritten extents we need to issue transactions to convert a
|
||||
* range to normal written extens after the data I/O has finished.
|
||||
*/
|
||||
if (ioend->io_type == IOMAP_UNWRITTEN &&
|
||||
likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
|
||||
|
||||
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
|
||||
ioend->io_size);
|
||||
if (error)
|
||||
ioend->io_error = error;
|
||||
}
|
||||
|
||||
/*
|
||||
* We might have to update the on-disk file size after extending
|
||||
* writes.
|
||||
*/
|
||||
if (ioend->io_type != IOMAP_READ) {
|
||||
error = xfs_setfilesize(ioend);
|
||||
ASSERT(!error || error == EAGAIN);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we didn't complete processing of the ioend, requeue it to the
|
||||
* tail of the workqueue for another attempt later. Otherwise destroy
|
||||
* it.
|
||||
*/
|
||||
if (error == EAGAIN) {
|
||||
atomic_inc(&ioend->io_remaining);
|
||||
xfs_finish_ioend(ioend, 0);
|
||||
/* ensure we don't spin on blocked ioends */
|
||||
delay(1);
|
||||
} else
|
||||
xfs_destroy_ioend(ioend);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate and initialise an IO completion structure.
|
||||
* We need to track unwritten extent write completion here initially.
|
||||
@ -341,7 +361,7 @@ xfs_submit_ioend_bio(
|
||||
* but don't update the inode size until I/O completion.
|
||||
*/
|
||||
if (xfs_ioend_new_eof(ioend))
|
||||
xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode));
|
||||
xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
|
||||
|
||||
submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
|
||||
WRITE_SYNC_PLUG : WRITE, bio);
|
||||
@ -874,6 +894,118 @@ xfs_cluster_write(
|
||||
}
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_vm_invalidatepage(
|
||||
struct page *page,
|
||||
unsigned long offset)
|
||||
{
|
||||
trace_xfs_invalidatepage(page->mapping->host, page, offset);
|
||||
block_invalidatepage(page, offset);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the page has delalloc buffers on it, we need to punch them out before we
|
||||
* invalidate the page. If we don't, we leave a stale delalloc mapping on the
|
||||
* inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
|
||||
* is done on that same region - the delalloc extent is returned when none is
|
||||
* supposed to be there.
|
||||
*
|
||||
* We prevent this by truncating away the delalloc regions on the page before
|
||||
* invalidating it. Because they are delalloc, we can do this without needing a
|
||||
* transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
|
||||
* truncation without a transaction as there is no space left for block
|
||||
* reservation (typically why we see a ENOSPC in writeback).
|
||||
*
|
||||
* This is not a performance critical path, so for now just do the punching a
|
||||
* buffer head at a time.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_aops_discard_page(
|
||||
struct page *page)
|
||||
{
|
||||
struct inode *inode = page->mapping->host;
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct buffer_head *bh, *head;
|
||||
loff_t offset = page_offset(page);
|
||||
ssize_t len = 1 << inode->i_blkbits;
|
||||
|
||||
if (!xfs_is_delayed_page(page, IOMAP_DELAY))
|
||||
goto out_invalidate;
|
||||
|
||||
xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
|
||||
"page discard on page %p, inode 0x%llx, offset %llu.",
|
||||
page, ip->i_ino, offset);
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
bh = head = page_buffers(page);
|
||||
do {
|
||||
int done;
|
||||
xfs_fileoff_t offset_fsb;
|
||||
xfs_bmbt_irec_t imap;
|
||||
int nimaps = 1;
|
||||
int error;
|
||||
xfs_fsblock_t firstblock;
|
||||
xfs_bmap_free_t flist;
|
||||
|
||||
if (!buffer_delay(bh))
|
||||
goto next_buffer;
|
||||
|
||||
offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
|
||||
|
||||
/*
|
||||
* Map the range first and check that it is a delalloc extent
|
||||
* before trying to unmap the range. Otherwise we will be
|
||||
* trying to remove a real extent (which requires a
|
||||
* transaction) or a hole, which is probably a bad idea...
|
||||
*/
|
||||
error = xfs_bmapi(NULL, ip, offset_fsb, 1,
|
||||
XFS_BMAPI_ENTIRE, NULL, 0, &imap,
|
||||
&nimaps, NULL, NULL);
|
||||
|
||||
if (error) {
|
||||
/* something screwed, just bail */
|
||||
xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
|
||||
"page discard failed delalloc mapping lookup.");
|
||||
break;
|
||||
}
|
||||
if (!nimaps) {
|
||||
/* nothing there */
|
||||
goto next_buffer;
|
||||
}
|
||||
if (imap.br_startblock != DELAYSTARTBLOCK) {
|
||||
/* been converted, ignore */
|
||||
goto next_buffer;
|
||||
}
|
||||
WARN_ON(imap.br_blockcount == 0);
|
||||
|
||||
/*
|
||||
* Note: while we initialise the firstblock/flist pair, they
|
||||
* should never be used because blocks should never be
|
||||
* allocated or freed for a delalloc extent and hence we need
|
||||
* don't cancel or finish them after the xfs_bunmapi() call.
|
||||
*/
|
||||
xfs_bmap_init(&flist, &firstblock);
|
||||
error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
|
||||
&flist, NULL, &done);
|
||||
|
||||
ASSERT(!flist.xbf_count && !flist.xbf_first);
|
||||
if (error) {
|
||||
/* something screwed, just bail */
|
||||
xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
|
||||
"page discard unable to remove delalloc mapping.");
|
||||
break;
|
||||
}
|
||||
next_buffer:
|
||||
offset += len;
|
||||
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
out_invalidate:
|
||||
xfs_vm_invalidatepage(page, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calling this without startio set means we are being asked to make a dirty
|
||||
* page ready for freeing it's buffers. When called with startio set then
|
||||
@ -1125,7 +1257,7 @@ error:
|
||||
*/
|
||||
if (err != -EAGAIN) {
|
||||
if (!unmapped)
|
||||
block_invalidatepage(page, 0);
|
||||
xfs_aops_discard_page(page);
|
||||
ClearPageUptodate(page);
|
||||
}
|
||||
return err;
|
||||
@ -1535,15 +1667,6 @@ xfs_vm_readpages(
|
||||
return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_vm_invalidatepage(
|
||||
struct page *page,
|
||||
unsigned long offset)
|
||||
{
|
||||
trace_xfs_invalidatepage(page->mapping->host, page, offset);
|
||||
block_invalidatepage(page, offset);
|
||||
}
|
||||
|
||||
const struct address_space_operations xfs_address_space_operations = {
|
||||
.readpage = xfs_vm_readpage,
|
||||
.readpages = xfs_vm_readpages,
|
||||
|
@ -16,6 +16,7 @@
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
@ -34,52 +35,279 @@
|
||||
#include "xfs_dir2_sf.h"
|
||||
#include "xfs_dinode.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_inode_item.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_rw.h"
|
||||
#include "xfs_vnodeops.h"
|
||||
#include "xfs_da_btree.h"
|
||||
#include "xfs_ioctl.h"
|
||||
#include "xfs_trace.h"
|
||||
|
||||
#include <linux/dcache.h>
|
||||
|
||||
static const struct vm_operations_struct xfs_file_vm_ops;
|
||||
|
||||
STATIC ssize_t
|
||||
xfs_file_aio_read(
|
||||
struct kiocb *iocb,
|
||||
const struct iovec *iov,
|
||||
unsigned long nr_segs,
|
||||
loff_t pos)
|
||||
/*
|
||||
* xfs_iozero
|
||||
*
|
||||
* xfs_iozero clears the specified range of buffer supplied,
|
||||
* and marks all the affected blocks as valid and modified. If
|
||||
* an affected block is not allocated, it will be allocated. If
|
||||
* an affected block is not completely overwritten, and is not
|
||||
* valid before the operation, it will be read from disk before
|
||||
* being partially zeroed.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_iozero(
|
||||
struct xfs_inode *ip, /* inode */
|
||||
loff_t pos, /* offset in file */
|
||||
size_t count) /* size of data to zero */
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
int ioflags = 0;
|
||||
struct page *page;
|
||||
struct address_space *mapping;
|
||||
int status;
|
||||
|
||||
BUG_ON(iocb->ki_pos != pos);
|
||||
if (unlikely(file->f_flags & O_DIRECT))
|
||||
ioflags |= IO_ISDIRECT;
|
||||
if (file->f_mode & FMODE_NOCMTIME)
|
||||
ioflags |= IO_INVIS;
|
||||
return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov,
|
||||
nr_segs, &iocb->ki_pos, ioflags);
|
||||
mapping = VFS_I(ip)->i_mapping;
|
||||
do {
|
||||
unsigned offset, bytes;
|
||||
void *fsdata;
|
||||
|
||||
offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
|
||||
bytes = PAGE_CACHE_SIZE - offset;
|
||||
if (bytes > count)
|
||||
bytes = count;
|
||||
|
||||
status = pagecache_write_begin(NULL, mapping, pos, bytes,
|
||||
AOP_FLAG_UNINTERRUPTIBLE,
|
||||
&page, &fsdata);
|
||||
if (status)
|
||||
break;
|
||||
|
||||
zero_user(page, offset, bytes);
|
||||
|
||||
status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
|
||||
page, fsdata);
|
||||
WARN_ON(status <= 0); /* can't return less than zero! */
|
||||
pos += bytes;
|
||||
count -= bytes;
|
||||
status = 0;
|
||||
} while (count);
|
||||
|
||||
return (-status);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_file_fsync(
|
||||
struct file *file,
|
||||
struct dentry *dentry,
|
||||
int datasync)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(dentry->d_inode);
|
||||
struct xfs_trans *tp;
|
||||
int error = 0;
|
||||
int log_flushed = 0;
|
||||
|
||||
xfs_itrace_entry(ip);
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
||||
return -XFS_ERROR(EIO);
|
||||
|
||||
xfs_iflags_clear(ip, XFS_ITRUNCATED);
|
||||
|
||||
/*
|
||||
* We always need to make sure that the required inode state is safe on
|
||||
* disk. The inode might be clean but we still might need to force the
|
||||
* log because of committed transactions that haven't hit the disk yet.
|
||||
* Likewise, there could be unflushed non-transactional changes to the
|
||||
* inode core that have to go to disk and this requires us to issue
|
||||
* a synchronous transaction to capture these changes correctly.
|
||||
*
|
||||
* This code relies on the assumption that if the i_update_core field
|
||||
* of the inode is clear and the inode is unpinned then it is clean
|
||||
* and no action is required.
|
||||
*/
|
||||
xfs_ilock(ip, XFS_ILOCK_SHARED);
|
||||
|
||||
/*
|
||||
* First check if the VFS inode is marked dirty. All the dirtying
|
||||
* of non-transactional updates no goes through mark_inode_dirty*,
|
||||
* which allows us to distinguish beteeen pure timestamp updates
|
||||
* and i_size updates which need to be caught for fdatasync.
|
||||
* After that also theck for the dirty state in the XFS inode, which
|
||||
* might gets cleared when the inode gets written out via the AIL
|
||||
* or xfs_iflush_cluster.
|
||||
*/
|
||||
if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) ||
|
||||
((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
|
||||
ip->i_update_core) {
|
||||
/*
|
||||
* Kick off a transaction to log the inode core to get the
|
||||
* updates. The sync transaction will also force the log.
|
||||
*/
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
|
||||
error = xfs_trans_reserve(tp, 0,
|
||||
XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
|
||||
if (error) {
|
||||
xfs_trans_cancel(tp, 0);
|
||||
return -error;
|
||||
}
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
/*
|
||||
* Note - it's possible that we might have pushed ourselves out
|
||||
* of the way during trans_reserve which would flush the inode.
|
||||
* But there's no guarantee that the inode buffer has actually
|
||||
* gone out yet (it's delwri). Plus the buffer could be pinned
|
||||
* anyway if it's part of an inode in another recent
|
||||
* transaction. So we play it safe and fire off the
|
||||
* transaction anyway.
|
||||
*/
|
||||
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
|
||||
xfs_trans_ihold(tp, ip);
|
||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||
xfs_trans_set_sync(tp);
|
||||
error = _xfs_trans_commit(tp, 0, &log_flushed);
|
||||
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
} else {
|
||||
/*
|
||||
* Timestamps/size haven't changed since last inode flush or
|
||||
* inode transaction commit. That means either nothing got
|
||||
* written or a transaction committed which caught the updates.
|
||||
* If the latter happened and the transaction hasn't hit the
|
||||
* disk yet, the inode will be still be pinned. If it is,
|
||||
* force the log.
|
||||
*/
|
||||
if (xfs_ipincount(ip)) {
|
||||
error = _xfs_log_force_lsn(ip->i_mount,
|
||||
ip->i_itemp->ili_last_lsn,
|
||||
XFS_LOG_SYNC, &log_flushed);
|
||||
}
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
}
|
||||
|
||||
if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
|
||||
/*
|
||||
* If the log write didn't issue an ordered tag we need
|
||||
* to flush the disk cache for the data device now.
|
||||
*/
|
||||
if (!log_flushed)
|
||||
xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
|
||||
|
||||
/*
|
||||
* If this inode is on the RT dev we need to flush that
|
||||
* cache as well.
|
||||
*/
|
||||
if (XFS_IS_REALTIME_INODE(ip))
|
||||
xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
|
||||
}
|
||||
|
||||
return -error;
|
||||
}
|
||||
|
||||
STATIC ssize_t
|
||||
xfs_file_aio_write(
|
||||
xfs_file_aio_read(
|
||||
struct kiocb *iocb,
|
||||
const struct iovec *iov,
|
||||
const struct iovec *iovp,
|
||||
unsigned long nr_segs,
|
||||
loff_t pos)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
size_t size = 0;
|
||||
ssize_t ret = 0;
|
||||
int ioflags = 0;
|
||||
xfs_fsize_t n;
|
||||
unsigned long seg;
|
||||
|
||||
XFS_STATS_INC(xs_read_calls);
|
||||
|
||||
BUG_ON(iocb->ki_pos != pos);
|
||||
|
||||
if (unlikely(file->f_flags & O_DIRECT))
|
||||
ioflags |= IO_ISDIRECT;
|
||||
if (file->f_mode & FMODE_NOCMTIME)
|
||||
ioflags |= IO_INVIS;
|
||||
return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs,
|
||||
&iocb->ki_pos, ioflags);
|
||||
|
||||
/* START copy & waste from filemap.c */
|
||||
for (seg = 0; seg < nr_segs; seg++) {
|
||||
const struct iovec *iv = &iovp[seg];
|
||||
|
||||
/*
|
||||
* If any segment has a negative length, or the cumulative
|
||||
* length ever wraps negative then return -EINVAL.
|
||||
*/
|
||||
size += iv->iov_len;
|
||||
if (unlikely((ssize_t)(size|iv->iov_len) < 0))
|
||||
return XFS_ERROR(-EINVAL);
|
||||
}
|
||||
/* END copy & waste from filemap.c */
|
||||
|
||||
if (unlikely(ioflags & IO_ISDIRECT)) {
|
||||
xfs_buftarg_t *target =
|
||||
XFS_IS_REALTIME_INODE(ip) ?
|
||||
mp->m_rtdev_targp : mp->m_ddev_targp;
|
||||
if ((iocb->ki_pos & target->bt_smask) ||
|
||||
(size & target->bt_smask)) {
|
||||
if (iocb->ki_pos == ip->i_size)
|
||||
return 0;
|
||||
return -XFS_ERROR(EINVAL);
|
||||
}
|
||||
}
|
||||
|
||||
n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
|
||||
if (n <= 0 || size == 0)
|
||||
return 0;
|
||||
|
||||
if (n < size)
|
||||
size = n;
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(mp))
|
||||
return -EIO;
|
||||
|
||||
if (unlikely(ioflags & IO_ISDIRECT))
|
||||
mutex_lock(&inode->i_mutex);
|
||||
xfs_ilock(ip, XFS_IOLOCK_SHARED);
|
||||
|
||||
if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
|
||||
int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
|
||||
int iolock = XFS_IOLOCK_SHARED;
|
||||
|
||||
ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size,
|
||||
dmflags, &iolock);
|
||||
if (ret) {
|
||||
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||
if (unlikely(ioflags & IO_ISDIRECT))
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(ioflags & IO_ISDIRECT)) {
|
||||
if (inode->i_mapping->nrpages) {
|
||||
ret = -xfs_flushinval_pages(ip,
|
||||
(iocb->ki_pos & PAGE_CACHE_MASK),
|
||||
-1, FI_REMAPF_LOCKED);
|
||||
}
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
if (ret) {
|
||||
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
|
||||
|
||||
ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
|
||||
if (ret > 0)
|
||||
XFS_STATS_ADD(xs_read_bytes, ret);
|
||||
|
||||
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||
return ret;
|
||||
}
|
||||
|
||||
STATIC ssize_t
|
||||
@ -87,16 +315,44 @@ xfs_file_splice_read(
|
||||
struct file *infilp,
|
||||
loff_t *ppos,
|
||||
struct pipe_inode_info *pipe,
|
||||
size_t len,
|
||||
size_t count,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
int ioflags = 0;
|
||||
ssize_t ret;
|
||||
|
||||
XFS_STATS_INC(xs_read_calls);
|
||||
|
||||
if (infilp->f_mode & FMODE_NOCMTIME)
|
||||
ioflags |= IO_INVIS;
|
||||
|
||||
return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
|
||||
infilp, ppos, pipe, len, flags, ioflags);
|
||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
||||
return -EIO;
|
||||
|
||||
xfs_ilock(ip, XFS_IOLOCK_SHARED);
|
||||
|
||||
if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
|
||||
int iolock = XFS_IOLOCK_SHARED;
|
||||
int error;
|
||||
|
||||
error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
|
||||
FILP_DELAY_FLAG(infilp), &iolock);
|
||||
if (error) {
|
||||
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||
return -error;
|
||||
}
|
||||
}
|
||||
|
||||
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
|
||||
|
||||
ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
|
||||
if (ret > 0)
|
||||
XFS_STATS_ADD(xs_read_bytes, ret);
|
||||
|
||||
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||
return ret;
|
||||
}
|
||||
|
||||
STATIC ssize_t
|
||||
@ -104,16 +360,538 @@ xfs_file_splice_write(
|
||||
struct pipe_inode_info *pipe,
|
||||
struct file *outfilp,
|
||||
loff_t *ppos,
|
||||
size_t len,
|
||||
size_t count,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct inode *inode = outfilp->f_mapping->host;
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
xfs_fsize_t isize, new_size;
|
||||
int ioflags = 0;
|
||||
ssize_t ret;
|
||||
|
||||
XFS_STATS_INC(xs_write_calls);
|
||||
|
||||
if (outfilp->f_mode & FMODE_NOCMTIME)
|
||||
ioflags |= IO_INVIS;
|
||||
|
||||
return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
|
||||
pipe, outfilp, ppos, len, flags, ioflags);
|
||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
||||
return -EIO;
|
||||
|
||||
xfs_ilock(ip, XFS_IOLOCK_EXCL);
|
||||
|
||||
if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
|
||||
int iolock = XFS_IOLOCK_EXCL;
|
||||
int error;
|
||||
|
||||
error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
|
||||
FILP_DELAY_FLAG(outfilp), &iolock);
|
||||
if (error) {
|
||||
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
|
||||
return -error;
|
||||
}
|
||||
}
|
||||
|
||||
new_size = *ppos + count;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
if (new_size > ip->i_size)
|
||||
ip->i_new_size = new_size;
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
|
||||
|
||||
ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
|
||||
if (ret > 0)
|
||||
XFS_STATS_ADD(xs_write_bytes, ret);
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
|
||||
*ppos = isize;
|
||||
|
||||
if (*ppos > ip->i_size) {
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
if (*ppos > ip->i_size)
|
||||
ip->i_size = *ppos;
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
if (ip->i_new_size) {
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
ip->i_new_size = 0;
|
||||
if (ip->i_d.di_size > ip->i_size)
|
||||
ip->i_d.di_size = ip->i_size;
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine is called to handle zeroing any space in the last
|
||||
* block of the file that is beyond the EOF. We do this since the
|
||||
* size is being increased without writing anything to that block
|
||||
* and we don't want anyone to read the garbage on the disk.
|
||||
*/
|
||||
STATIC int /* error (positive) */
|
||||
xfs_zero_last_block(
|
||||
xfs_inode_t *ip,
|
||||
xfs_fsize_t offset,
|
||||
xfs_fsize_t isize)
|
||||
{
|
||||
xfs_fileoff_t last_fsb;
|
||||
xfs_mount_t *mp = ip->i_mount;
|
||||
int nimaps;
|
||||
int zero_offset;
|
||||
int zero_len;
|
||||
int error = 0;
|
||||
xfs_bmbt_irec_t imap;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
||||
|
||||
zero_offset = XFS_B_FSB_OFFSET(mp, isize);
|
||||
if (zero_offset == 0) {
|
||||
/*
|
||||
* There are no extra bytes in the last block on disk to
|
||||
* zero, so return.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
last_fsb = XFS_B_TO_FSBT(mp, isize);
|
||||
nimaps = 1;
|
||||
error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
|
||||
&nimaps, NULL, NULL);
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
ASSERT(nimaps > 0);
|
||||
/*
|
||||
* If the block underlying isize is just a hole, then there
|
||||
* is nothing to zero.
|
||||
*/
|
||||
if (imap.br_startblock == HOLESTARTBLOCK) {
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Zero the part of the last block beyond the EOF, and write it
|
||||
* out sync. We need to drop the ilock while we do this so we
|
||||
* don't deadlock when the buffer cache calls back to us.
|
||||
*/
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
zero_len = mp->m_sb.sb_blocksize - zero_offset;
|
||||
if (isize + zero_len > offset)
|
||||
zero_len = offset - isize;
|
||||
error = xfs_iozero(ip, isize, zero_len);
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
ASSERT(error >= 0);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Zero any on disk space between the current EOF and the new,
|
||||
* larger EOF. This handles the normal case of zeroing the remainder
|
||||
* of the last block in the file and the unusual case of zeroing blocks
|
||||
* out beyond the size of the file. This second case only happens
|
||||
* with fixed size extents and when the system crashes before the inode
|
||||
* size was updated but after blocks were allocated. If fill is set,
|
||||
* then any holes in the range are filled and zeroed. If not, the holes
|
||||
* are left alone as holes.
|
||||
*/
|
||||
|
||||
int /* error (positive) */
|
||||
xfs_zero_eof(
|
||||
xfs_inode_t *ip,
|
||||
xfs_off_t offset, /* starting I/O offset */
|
||||
xfs_fsize_t isize) /* current inode size */
|
||||
{
|
||||
xfs_mount_t *mp = ip->i_mount;
|
||||
xfs_fileoff_t start_zero_fsb;
|
||||
xfs_fileoff_t end_zero_fsb;
|
||||
xfs_fileoff_t zero_count_fsb;
|
||||
xfs_fileoff_t last_fsb;
|
||||
xfs_fileoff_t zero_off;
|
||||
xfs_fsize_t zero_len;
|
||||
int nimaps;
|
||||
int error = 0;
|
||||
xfs_bmbt_irec_t imap;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
|
||||
ASSERT(offset > isize);
|
||||
|
||||
/*
|
||||
* First handle zeroing the block on which isize resides.
|
||||
* We only zero a part of that block so it is handled specially.
|
||||
*/
|
||||
error = xfs_zero_last_block(ip, offset, isize);
|
||||
if (error) {
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the range between the new size and the old
|
||||
* where blocks needing to be zeroed may exist. To get the
|
||||
* block where the last byte in the file currently resides,
|
||||
* we need to subtract one from the size and truncate back
|
||||
* to a block boundary. We subtract 1 in case the size is
|
||||
* exactly on a block boundary.
|
||||
*/
|
||||
last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
|
||||
start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
|
||||
end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
|
||||
ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
|
||||
if (last_fsb == end_zero_fsb) {
|
||||
/*
|
||||
* The size was only incremented on its last block.
|
||||
* We took care of that above, so just return.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
ASSERT(start_zero_fsb <= end_zero_fsb);
|
||||
while (start_zero_fsb <= end_zero_fsb) {
|
||||
nimaps = 1;
|
||||
zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
|
||||
error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
|
||||
0, NULL, 0, &imap, &nimaps, NULL, NULL);
|
||||
if (error) {
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
|
||||
return error;
|
||||
}
|
||||
ASSERT(nimaps > 0);
|
||||
|
||||
if (imap.br_state == XFS_EXT_UNWRITTEN ||
|
||||
imap.br_startblock == HOLESTARTBLOCK) {
|
||||
/*
|
||||
* This loop handles initializing pages that were
|
||||
* partially initialized by the code below this
|
||||
* loop. It basically zeroes the part of the page
|
||||
* that sits on a hole and sets the page as P_HOLE
|
||||
* and calls remapf if it is a mapped file.
|
||||
*/
|
||||
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
|
||||
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* There are blocks we need to zero.
|
||||
* Drop the inode lock while we're doing the I/O.
|
||||
* We'll still have the iolock to protect us.
|
||||
*/
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
|
||||
zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
|
||||
|
||||
if ((zero_off + zero_len) > offset)
|
||||
zero_len = offset - zero_off;
|
||||
|
||||
error = xfs_iozero(ip, zero_off, zero_len);
|
||||
if (error) {
|
||||
goto out_lock;
|
||||
}
|
||||
|
||||
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
|
||||
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
out_lock:
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
ASSERT(error >= 0);
|
||||
return error;
|
||||
}
|
||||
|
||||
STATIC ssize_t
|
||||
xfs_file_aio_write(
|
||||
struct kiocb *iocb,
|
||||
const struct iovec *iovp,
|
||||
unsigned long nr_segs,
|
||||
loff_t pos)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
ssize_t ret = 0, error = 0;
|
||||
int ioflags = 0;
|
||||
xfs_fsize_t isize, new_size;
|
||||
int iolock;
|
||||
int eventsent = 0;
|
||||
size_t ocount = 0, count;
|
||||
int need_i_mutex;
|
||||
|
||||
XFS_STATS_INC(xs_write_calls);
|
||||
|
||||
BUG_ON(iocb->ki_pos != pos);
|
||||
|
||||
if (unlikely(file->f_flags & O_DIRECT))
|
||||
ioflags |= IO_ISDIRECT;
|
||||
if (file->f_mode & FMODE_NOCMTIME)
|
||||
ioflags |= IO_INVIS;
|
||||
|
||||
error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
count = ocount;
|
||||
if (count == 0)
|
||||
return 0;
|
||||
|
||||
xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(mp))
|
||||
return -EIO;
|
||||
|
||||
relock:
|
||||
if (ioflags & IO_ISDIRECT) {
|
||||
iolock = XFS_IOLOCK_SHARED;
|
||||
need_i_mutex = 0;
|
||||
} else {
|
||||
iolock = XFS_IOLOCK_EXCL;
|
||||
need_i_mutex = 1;
|
||||
mutex_lock(&inode->i_mutex);
|
||||
}
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
|
||||
|
||||
start:
|
||||
error = -generic_write_checks(file, &pos, &count,
|
||||
S_ISBLK(inode->i_mode));
|
||||
if (error) {
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
|
||||
goto out_unlock_mutex;
|
||||
}
|
||||
|
||||
if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) &&
|
||||
!(ioflags & IO_INVIS) && !eventsent)) {
|
||||
int dmflags = FILP_DELAY_FLAG(file);
|
||||
|
||||
if (need_i_mutex)
|
||||
dmflags |= DM_FLAGS_IMUX;
|
||||
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip,
|
||||
pos, count, dmflags, &iolock);
|
||||
if (error) {
|
||||
goto out_unlock_internal;
|
||||
}
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
eventsent = 1;
|
||||
|
||||
/*
|
||||
* The iolock was dropped and reacquired in XFS_SEND_DATA
|
||||
* so we have to recheck the size when appending.
|
||||
* We will only "goto start;" once, since having sent the
|
||||
* event prevents another call to XFS_SEND_DATA, which is
|
||||
* what allows the size to change in the first place.
|
||||
*/
|
||||
if ((file->f_flags & O_APPEND) && pos != ip->i_size)
|
||||
goto start;
|
||||
}
|
||||
|
||||
if (ioflags & IO_ISDIRECT) {
|
||||
xfs_buftarg_t *target =
|
||||
XFS_IS_REALTIME_INODE(ip) ?
|
||||
mp->m_rtdev_targp : mp->m_ddev_targp;
|
||||
|
||||
if ((pos & target->bt_smask) || (count & target->bt_smask)) {
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
|
||||
return XFS_ERROR(-EINVAL);
|
||||
}
|
||||
|
||||
if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) {
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
|
||||
iolock = XFS_IOLOCK_EXCL;
|
||||
need_i_mutex = 1;
|
||||
mutex_lock(&inode->i_mutex);
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
|
||||
goto start;
|
||||
}
|
||||
}
|
||||
|
||||
new_size = pos + count;
|
||||
if (new_size > ip->i_size)
|
||||
ip->i_new_size = new_size;
|
||||
|
||||
if (likely(!(ioflags & IO_INVIS)))
|
||||
file_update_time(file);
|
||||
|
||||
/*
|
||||
* If the offset is beyond the size of the file, we have a couple
|
||||
* of things to do. First, if there is already space allocated
|
||||
* we need to either create holes or zero the disk or ...
|
||||
*
|
||||
* If there is a page where the previous size lands, we need
|
||||
* to zero it out up to the new size.
|
||||
*/
|
||||
|
||||
if (pos > ip->i_size) {
|
||||
error = xfs_zero_eof(ip, pos, ip->i_size);
|
||||
if (error) {
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
goto out_unlock_internal;
|
||||
}
|
||||
}
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
/*
|
||||
* If we're writing the file then make sure to clear the
|
||||
* setuid and setgid bits if the process is not being run
|
||||
* by root. This keeps people from modifying setuid and
|
||||
* setgid binaries.
|
||||
*/
|
||||
error = -file_remove_suid(file);
|
||||
if (unlikely(error))
|
||||
goto out_unlock_internal;
|
||||
|
||||
/* We can write back this queue in page reclaim */
|
||||
current->backing_dev_info = mapping->backing_dev_info;
|
||||
|
||||
if ((ioflags & IO_ISDIRECT)) {
|
||||
if (mapping->nrpages) {
|
||||
WARN_ON(need_i_mutex == 0);
|
||||
error = xfs_flushinval_pages(ip,
|
||||
(pos & PAGE_CACHE_MASK),
|
||||
-1, FI_REMAPF_LOCKED);
|
||||
if (error)
|
||||
goto out_unlock_internal;
|
||||
}
|
||||
|
||||
if (need_i_mutex) {
|
||||
/* demote the lock now the cached pages are gone */
|
||||
xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
iolock = XFS_IOLOCK_SHARED;
|
||||
need_i_mutex = 0;
|
||||
}
|
||||
|
||||
trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
|
||||
ret = generic_file_direct_write(iocb, iovp,
|
||||
&nr_segs, pos, &iocb->ki_pos, count, ocount);
|
||||
|
||||
/*
|
||||
* direct-io write to a hole: fall through to buffered I/O
|
||||
* for completing the rest of the request.
|
||||
*/
|
||||
if (ret >= 0 && ret != count) {
|
||||
XFS_STATS_ADD(xs_write_bytes, ret);
|
||||
|
||||
pos += ret;
|
||||
count -= ret;
|
||||
|
||||
ioflags &= ~IO_ISDIRECT;
|
||||
xfs_iunlock(ip, iolock);
|
||||
goto relock;
|
||||
}
|
||||
} else {
|
||||
int enospc = 0;
|
||||
ssize_t ret2 = 0;
|
||||
|
||||
write_retry:
|
||||
trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags);
|
||||
ret2 = generic_file_buffered_write(iocb, iovp, nr_segs,
|
||||
pos, &iocb->ki_pos, count, ret);
|
||||
/*
|
||||
* if we just got an ENOSPC, flush the inode now we
|
||||
* aren't holding any page locks and retry *once*
|
||||
*/
|
||||
if (ret2 == -ENOSPC && !enospc) {
|
||||
error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
|
||||
if (error)
|
||||
goto out_unlock_internal;
|
||||
enospc = 1;
|
||||
goto write_retry;
|
||||
}
|
||||
ret = ret2;
|
||||
}
|
||||
|
||||
current->backing_dev_info = NULL;
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize))
|
||||
iocb->ki_pos = isize;
|
||||
|
||||
if (iocb->ki_pos > ip->i_size) {
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
if (iocb->ki_pos > ip->i_size)
|
||||
ip->i_size = iocb->ki_pos;
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
if (ret == -ENOSPC &&
|
||||
DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
|
||||
xfs_iunlock(ip, iolock);
|
||||
if (need_i_mutex)
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip,
|
||||
DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL,
|
||||
0, 0, 0); /* Delay flag intentionally unused */
|
||||
if (need_i_mutex)
|
||||
mutex_lock(&inode->i_mutex);
|
||||
xfs_ilock(ip, iolock);
|
||||
if (error)
|
||||
goto out_unlock_internal;
|
||||
goto start;
|
||||
}
|
||||
|
||||
error = -ret;
|
||||
if (ret <= 0)
|
||||
goto out_unlock_internal;
|
||||
|
||||
XFS_STATS_ADD(xs_write_bytes, ret);
|
||||
|
||||
/* Handle various SYNC-type writes */
|
||||
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
|
||||
loff_t end = pos + ret - 1;
|
||||
int error2;
|
||||
|
||||
xfs_iunlock(ip, iolock);
|
||||
if (need_i_mutex)
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
error2 = filemap_write_and_wait_range(mapping, pos, end);
|
||||
if (!error)
|
||||
error = error2;
|
||||
if (need_i_mutex)
|
||||
mutex_lock(&inode->i_mutex);
|
||||
xfs_ilock(ip, iolock);
|
||||
|
||||
error2 = -xfs_file_fsync(file, file->f_path.dentry,
|
||||
(file->f_flags & __O_SYNC) ? 0 : 1);
|
||||
if (!error)
|
||||
error = error2;
|
||||
}
|
||||
|
||||
out_unlock_internal:
|
||||
if (ip->i_new_size) {
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
ip->i_new_size = 0;
|
||||
/*
|
||||
* If this was a direct or synchronous I/O that failed (such
|
||||
* as ENOSPC) then part of the I/O may have been written to
|
||||
* disk before the error occured. In this case the on-disk
|
||||
* file size may have been adjusted beyond the in-memory file
|
||||
* size and now needs to be truncated back.
|
||||
*/
|
||||
if (ip->i_d.di_size > ip->i_size)
|
||||
ip->i_d.di_size = ip->i_size;
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
xfs_iunlock(ip, iolock);
|
||||
out_unlock_mutex:
|
||||
if (need_i_mutex)
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return -error;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
@ -160,28 +938,6 @@ xfs_file_release(
|
||||
return -xfs_release(XFS_I(inode));
|
||||
}
|
||||
|
||||
/*
|
||||
* We ignore the datasync flag here because a datasync is effectively
|
||||
* identical to an fsync. That is, datasync implies that we need to write
|
||||
* only the metadata needed to be able to access the data that is written
|
||||
* if we crash after the call completes. Hence if we are writing beyond
|
||||
* EOF we have to log the inode size change as well, which makes it a
|
||||
* full fsync. If we don't write beyond EOF, the inode core will be
|
||||
* clean in memory and so we don't need to log the inode, just like
|
||||
* fsync.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_file_fsync(
|
||||
struct file *file,
|
||||
struct dentry *dentry,
|
||||
int datasync)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(dentry->d_inode);
|
||||
|
||||
xfs_iflags_clear(ip, XFS_ITRUNCATED);
|
||||
return -xfs_fsync(ip);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_file_readdir(
|
||||
struct file *filp,
|
||||
@ -203,9 +959,9 @@ xfs_file_readdir(
|
||||
*
|
||||
* Try to give it an estimate that's good enough, maybe at some
|
||||
* point we can change the ->readdir prototype to include the
|
||||
* buffer size.
|
||||
* buffer size. For now we use the current glibc buffer size.
|
||||
*/
|
||||
bufsize = (size_t)min_t(loff_t, PAGE_SIZE, ip->i_d.di_size);
|
||||
bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
|
||||
|
||||
error = xfs_readdir(ip, dirent, bufsize,
|
||||
(xfs_off_t *)&filp->f_pos, filldir);
|
||||
|
@ -91,6 +91,16 @@ xfs_mark_inode_dirty_sync(
|
||||
mark_inode_dirty_sync(inode);
|
||||
}
|
||||
|
||||
void
|
||||
xfs_mark_inode_dirty(
|
||||
xfs_inode_t *ip)
|
||||
{
|
||||
struct inode *inode = VFS_I(ip);
|
||||
|
||||
if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
|
||||
mark_inode_dirty(inode);
|
||||
}
|
||||
|
||||
/*
|
||||
* Change the requested timestamp in the given inode.
|
||||
* We don't lock across timestamp updates, and we don't log them but
|
||||
|
@ -88,7 +88,6 @@
|
||||
#include <xfs_super.h>
|
||||
#include <xfs_globals.h>
|
||||
#include <xfs_fs_subr.h>
|
||||
#include <xfs_lrw.h>
|
||||
#include <xfs_buf.h>
|
||||
|
||||
/*
|
||||
|
@ -1,796 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_dir2.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_dmapi.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_bmap_btree.h"
|
||||
#include "xfs_alloc_btree.h"
|
||||
#include "xfs_ialloc_btree.h"
|
||||
#include "xfs_dir2_sf.h"
|
||||
#include "xfs_attr_sf.h"
|
||||
#include "xfs_dinode.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_ialloc.h"
|
||||
#include "xfs_rtalloc.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_itable.h"
|
||||
#include "xfs_rw.h"
|
||||
#include "xfs_attr.h"
|
||||
#include "xfs_inode_item.h"
|
||||
#include "xfs_buf_item.h"
|
||||
#include "xfs_utils.h"
|
||||
#include "xfs_iomap.h"
|
||||
#include "xfs_vnodeops.h"
|
||||
#include "xfs_trace.h"
|
||||
|
||||
#include <linux/capability.h>
|
||||
#include <linux/writeback.h>
|
||||
|
||||
|
||||
/*
|
||||
* xfs_iozero
|
||||
*
|
||||
* xfs_iozero clears the specified range of buffer supplied,
|
||||
* and marks all the affected blocks as valid and modified. If
|
||||
* an affected block is not allocated, it will be allocated. If
|
||||
* an affected block is not completely overwritten, and is not
|
||||
* valid before the operation, it will be read from disk before
|
||||
* being partially zeroed.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_iozero(
|
||||
struct xfs_inode *ip, /* inode */
|
||||
loff_t pos, /* offset in file */
|
||||
size_t count) /* size of data to zero */
|
||||
{
|
||||
struct page *page;
|
||||
struct address_space *mapping;
|
||||
int status;
|
||||
|
||||
mapping = VFS_I(ip)->i_mapping;
|
||||
do {
|
||||
unsigned offset, bytes;
|
||||
void *fsdata;
|
||||
|
||||
offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
|
||||
bytes = PAGE_CACHE_SIZE - offset;
|
||||
if (bytes > count)
|
||||
bytes = count;
|
||||
|
||||
status = pagecache_write_begin(NULL, mapping, pos, bytes,
|
||||
AOP_FLAG_UNINTERRUPTIBLE,
|
||||
&page, &fsdata);
|
||||
if (status)
|
||||
break;
|
||||
|
||||
zero_user(page, offset, bytes);
|
||||
|
||||
status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
|
||||
page, fsdata);
|
||||
WARN_ON(status <= 0); /* can't return less than zero! */
|
||||
pos += bytes;
|
||||
count -= bytes;
|
||||
status = 0;
|
||||
} while (count);
|
||||
|
||||
return (-status);
|
||||
}
|
||||
|
||||
ssize_t /* bytes read, or (-) error */
|
||||
xfs_read(
|
||||
xfs_inode_t *ip,
|
||||
struct kiocb *iocb,
|
||||
const struct iovec *iovp,
|
||||
unsigned int segs,
|
||||
loff_t *offset,
|
||||
int ioflags)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
xfs_mount_t *mp = ip->i_mount;
|
||||
size_t size = 0;
|
||||
ssize_t ret = 0;
|
||||
xfs_fsize_t n;
|
||||
unsigned long seg;
|
||||
|
||||
|
||||
XFS_STATS_INC(xs_read_calls);
|
||||
|
||||
/* START copy & waste from filemap.c */
|
||||
for (seg = 0; seg < segs; seg++) {
|
||||
const struct iovec *iv = &iovp[seg];
|
||||
|
||||
/*
|
||||
* If any segment has a negative length, or the cumulative
|
||||
* length ever wraps negative then return -EINVAL.
|
||||
*/
|
||||
size += iv->iov_len;
|
||||
if (unlikely((ssize_t)(size|iv->iov_len) < 0))
|
||||
return XFS_ERROR(-EINVAL);
|
||||
}
|
||||
/* END copy & waste from filemap.c */
|
||||
|
||||
if (unlikely(ioflags & IO_ISDIRECT)) {
|
||||
xfs_buftarg_t *target =
|
||||
XFS_IS_REALTIME_INODE(ip) ?
|
||||
mp->m_rtdev_targp : mp->m_ddev_targp;
|
||||
if ((*offset & target->bt_smask) ||
|
||||
(size & target->bt_smask)) {
|
||||
if (*offset == ip->i_size) {
|
||||
return (0);
|
||||
}
|
||||
return -XFS_ERROR(EINVAL);
|
||||
}
|
||||
}
|
||||
|
||||
n = XFS_MAXIOFFSET(mp) - *offset;
|
||||
if ((n <= 0) || (size == 0))
|
||||
return 0;
|
||||
|
||||
if (n < size)
|
||||
size = n;
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(mp))
|
||||
return -EIO;
|
||||
|
||||
if (unlikely(ioflags & IO_ISDIRECT))
|
||||
mutex_lock(&inode->i_mutex);
|
||||
xfs_ilock(ip, XFS_IOLOCK_SHARED);
|
||||
|
||||
if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
|
||||
int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
|
||||
int iolock = XFS_IOLOCK_SHARED;
|
||||
|
||||
ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size,
|
||||
dmflags, &iolock);
|
||||
if (ret) {
|
||||
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||
if (unlikely(ioflags & IO_ISDIRECT))
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(ioflags & IO_ISDIRECT)) {
|
||||
if (inode->i_mapping->nrpages)
|
||||
ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
|
||||
-1, FI_REMAPF_LOCKED);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
if (ret) {
|
||||
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
trace_xfs_file_read(ip, size, *offset, ioflags);
|
||||
|
||||
iocb->ki_pos = *offset;
|
||||
ret = generic_file_aio_read(iocb, iovp, segs, *offset);
|
||||
if (ret > 0)
|
||||
XFS_STATS_ADD(xs_read_bytes, ret);
|
||||
|
||||
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t
|
||||
xfs_splice_read(
|
||||
xfs_inode_t *ip,
|
||||
struct file *infilp,
|
||||
loff_t *ppos,
|
||||
struct pipe_inode_info *pipe,
|
||||
size_t count,
|
||||
int flags,
|
||||
int ioflags)
|
||||
{
|
||||
xfs_mount_t *mp = ip->i_mount;
|
||||
ssize_t ret;
|
||||
|
||||
XFS_STATS_INC(xs_read_calls);
|
||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
||||
return -EIO;
|
||||
|
||||
xfs_ilock(ip, XFS_IOLOCK_SHARED);
|
||||
|
||||
if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
|
||||
int iolock = XFS_IOLOCK_SHARED;
|
||||
int error;
|
||||
|
||||
error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
|
||||
FILP_DELAY_FLAG(infilp), &iolock);
|
||||
if (error) {
|
||||
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||
return -error;
|
||||
}
|
||||
}
|
||||
|
||||
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
|
||||
|
||||
ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
|
||||
if (ret > 0)
|
||||
XFS_STATS_ADD(xs_read_bytes, ret);
|
||||
|
||||
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t
|
||||
xfs_splice_write(
|
||||
xfs_inode_t *ip,
|
||||
struct pipe_inode_info *pipe,
|
||||
struct file *outfilp,
|
||||
loff_t *ppos,
|
||||
size_t count,
|
||||
int flags,
|
||||
int ioflags)
|
||||
{
|
||||
xfs_mount_t *mp = ip->i_mount;
|
||||
ssize_t ret;
|
||||
struct inode *inode = outfilp->f_mapping->host;
|
||||
xfs_fsize_t isize, new_size;
|
||||
|
||||
XFS_STATS_INC(xs_write_calls);
|
||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
||||
return -EIO;
|
||||
|
||||
xfs_ilock(ip, XFS_IOLOCK_EXCL);
|
||||
|
||||
if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
|
||||
int iolock = XFS_IOLOCK_EXCL;
|
||||
int error;
|
||||
|
||||
error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
|
||||
FILP_DELAY_FLAG(outfilp), &iolock);
|
||||
if (error) {
|
||||
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
|
||||
return -error;
|
||||
}
|
||||
}
|
||||
|
||||
new_size = *ppos + count;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
if (new_size > ip->i_size)
|
||||
ip->i_new_size = new_size;
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
|
||||
|
||||
ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
|
||||
if (ret > 0)
|
||||
XFS_STATS_ADD(xs_write_bytes, ret);
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
|
||||
*ppos = isize;
|
||||
|
||||
if (*ppos > ip->i_size) {
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
if (*ppos > ip->i_size)
|
||||
ip->i_size = *ppos;
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
if (ip->i_new_size) {
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
ip->i_new_size = 0;
|
||||
if (ip->i_d.di_size > ip->i_size)
|
||||
ip->i_d.di_size = ip->i_size;
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine is called to handle zeroing any space in the last
|
||||
* block of the file that is beyond the EOF. We do this since the
|
||||
* size is being increased without writing anything to that block
|
||||
* and we don't want anyone to read the garbage on the disk.
|
||||
*/
|
||||
STATIC int /* error (positive) */
|
||||
xfs_zero_last_block(
|
||||
xfs_inode_t *ip,
|
||||
xfs_fsize_t offset,
|
||||
xfs_fsize_t isize)
|
||||
{
|
||||
xfs_fileoff_t last_fsb;
|
||||
xfs_mount_t *mp = ip->i_mount;
|
||||
int nimaps;
|
||||
int zero_offset;
|
||||
int zero_len;
|
||||
int error = 0;
|
||||
xfs_bmbt_irec_t imap;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
||||
|
||||
zero_offset = XFS_B_FSB_OFFSET(mp, isize);
|
||||
if (zero_offset == 0) {
|
||||
/*
|
||||
* There are no extra bytes in the last block on disk to
|
||||
* zero, so return.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
last_fsb = XFS_B_TO_FSBT(mp, isize);
|
||||
nimaps = 1;
|
||||
error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
|
||||
&nimaps, NULL, NULL);
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
ASSERT(nimaps > 0);
|
||||
/*
|
||||
* If the block underlying isize is just a hole, then there
|
||||
* is nothing to zero.
|
||||
*/
|
||||
if (imap.br_startblock == HOLESTARTBLOCK) {
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Zero the part of the last block beyond the EOF, and write it
|
||||
* out sync. We need to drop the ilock while we do this so we
|
||||
* don't deadlock when the buffer cache calls back to us.
|
||||
*/
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
zero_len = mp->m_sb.sb_blocksize - zero_offset;
|
||||
if (isize + zero_len > offset)
|
||||
zero_len = offset - isize;
|
||||
error = xfs_iozero(ip, isize, zero_len);
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
ASSERT(error >= 0);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Zero any on disk space between the current EOF and the new,
|
||||
* larger EOF. This handles the normal case of zeroing the remainder
|
||||
* of the last block in the file and the unusual case of zeroing blocks
|
||||
* out beyond the size of the file. This second case only happens
|
||||
* with fixed size extents and when the system crashes before the inode
|
||||
* size was updated but after blocks were allocated. If fill is set,
|
||||
* then any holes in the range are filled and zeroed. If not, the holes
|
||||
* are left alone as holes.
|
||||
*/
|
||||
|
||||
int /* error (positive) */
|
||||
xfs_zero_eof(
|
||||
xfs_inode_t *ip,
|
||||
xfs_off_t offset, /* starting I/O offset */
|
||||
xfs_fsize_t isize) /* current inode size */
|
||||
{
|
||||
xfs_mount_t *mp = ip->i_mount;
|
||||
xfs_fileoff_t start_zero_fsb;
|
||||
xfs_fileoff_t end_zero_fsb;
|
||||
xfs_fileoff_t zero_count_fsb;
|
||||
xfs_fileoff_t last_fsb;
|
||||
xfs_fileoff_t zero_off;
|
||||
xfs_fsize_t zero_len;
|
||||
int nimaps;
|
||||
int error = 0;
|
||||
xfs_bmbt_irec_t imap;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
|
||||
ASSERT(offset > isize);
|
||||
|
||||
/*
|
||||
* First handle zeroing the block on which isize resides.
|
||||
* We only zero a part of that block so it is handled specially.
|
||||
*/
|
||||
error = xfs_zero_last_block(ip, offset, isize);
|
||||
if (error) {
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the range between the new size and the old
|
||||
* where blocks needing to be zeroed may exist. To get the
|
||||
* block where the last byte in the file currently resides,
|
||||
* we need to subtract one from the size and truncate back
|
||||
* to a block boundary. We subtract 1 in case the size is
|
||||
* exactly on a block boundary.
|
||||
*/
|
||||
last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
|
||||
start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
|
||||
end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
|
||||
ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
|
||||
if (last_fsb == end_zero_fsb) {
|
||||
/*
|
||||
* The size was only incremented on its last block.
|
||||
* We took care of that above, so just return.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
ASSERT(start_zero_fsb <= end_zero_fsb);
|
||||
while (start_zero_fsb <= end_zero_fsb) {
|
||||
nimaps = 1;
|
||||
zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
|
||||
error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
|
||||
0, NULL, 0, &imap, &nimaps, NULL, NULL);
|
||||
if (error) {
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
|
||||
return error;
|
||||
}
|
||||
ASSERT(nimaps > 0);
|
||||
|
||||
if (imap.br_state == XFS_EXT_UNWRITTEN ||
|
||||
imap.br_startblock == HOLESTARTBLOCK) {
|
||||
/*
|
||||
* This loop handles initializing pages that were
|
||||
* partially initialized by the code below this
|
||||
* loop. It basically zeroes the part of the page
|
||||
* that sits on a hole and sets the page as P_HOLE
|
||||
* and calls remapf if it is a mapped file.
|
||||
*/
|
||||
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
|
||||
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* There are blocks we need to zero.
|
||||
* Drop the inode lock while we're doing the I/O.
|
||||
* We'll still have the iolock to protect us.
|
||||
*/
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
|
||||
zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
|
||||
|
||||
if ((zero_off + zero_len) > offset)
|
||||
zero_len = offset - zero_off;
|
||||
|
||||
error = xfs_iozero(ip, zero_off, zero_len);
|
||||
if (error) {
|
||||
goto out_lock;
|
||||
}
|
||||
|
||||
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
|
||||
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
out_lock:
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
ASSERT(error >= 0);
|
||||
return error;
|
||||
}
|
||||
|
||||
ssize_t /* bytes written, or (-) error */
|
||||
xfs_write(
|
||||
struct xfs_inode *xip,
|
||||
struct kiocb *iocb,
|
||||
const struct iovec *iovp,
|
||||
unsigned int nsegs,
|
||||
loff_t *offset,
|
||||
int ioflags)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
unsigned long segs = nsegs;
|
||||
xfs_mount_t *mp;
|
||||
ssize_t ret = 0, error = 0;
|
||||
xfs_fsize_t isize, new_size;
|
||||
int iolock;
|
||||
int eventsent = 0;
|
||||
size_t ocount = 0, count;
|
||||
loff_t pos;
|
||||
int need_i_mutex;
|
||||
|
||||
XFS_STATS_INC(xs_write_calls);
|
||||
|
||||
error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
count = ocount;
|
||||
pos = *offset;
|
||||
|
||||
if (count == 0)
|
||||
return 0;
|
||||
|
||||
mp = xip->i_mount;
|
||||
|
||||
xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(mp))
|
||||
return -EIO;
|
||||
|
||||
relock:
|
||||
if (ioflags & IO_ISDIRECT) {
|
||||
iolock = XFS_IOLOCK_SHARED;
|
||||
need_i_mutex = 0;
|
||||
} else {
|
||||
iolock = XFS_IOLOCK_EXCL;
|
||||
need_i_mutex = 1;
|
||||
mutex_lock(&inode->i_mutex);
|
||||
}
|
||||
|
||||
xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
|
||||
|
||||
start:
|
||||
error = -generic_write_checks(file, &pos, &count,
|
||||
S_ISBLK(inode->i_mode));
|
||||
if (error) {
|
||||
xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
|
||||
goto out_unlock_mutex;
|
||||
}
|
||||
|
||||
if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) &&
|
||||
!(ioflags & IO_INVIS) && !eventsent)) {
|
||||
int dmflags = FILP_DELAY_FLAG(file);
|
||||
|
||||
if (need_i_mutex)
|
||||
dmflags |= DM_FLAGS_IMUX;
|
||||
|
||||
xfs_iunlock(xip, XFS_ILOCK_EXCL);
|
||||
error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip,
|
||||
pos, count, dmflags, &iolock);
|
||||
if (error) {
|
||||
goto out_unlock_internal;
|
||||
}
|
||||
xfs_ilock(xip, XFS_ILOCK_EXCL);
|
||||
eventsent = 1;
|
||||
|
||||
/*
|
||||
* The iolock was dropped and reacquired in XFS_SEND_DATA
|
||||
* so we have to recheck the size when appending.
|
||||
* We will only "goto start;" once, since having sent the
|
||||
* event prevents another call to XFS_SEND_DATA, which is
|
||||
* what allows the size to change in the first place.
|
||||
*/
|
||||
if ((file->f_flags & O_APPEND) && pos != xip->i_size)
|
||||
goto start;
|
||||
}
|
||||
|
||||
if (ioflags & IO_ISDIRECT) {
|
||||
xfs_buftarg_t *target =
|
||||
XFS_IS_REALTIME_INODE(xip) ?
|
||||
mp->m_rtdev_targp : mp->m_ddev_targp;
|
||||
|
||||
if ((pos & target->bt_smask) || (count & target->bt_smask)) {
|
||||
xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
|
||||
return XFS_ERROR(-EINVAL);
|
||||
}
|
||||
|
||||
if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) {
|
||||
xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
|
||||
iolock = XFS_IOLOCK_EXCL;
|
||||
need_i_mutex = 1;
|
||||
mutex_lock(&inode->i_mutex);
|
||||
xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
|
||||
goto start;
|
||||
}
|
||||
}
|
||||
|
||||
new_size = pos + count;
|
||||
if (new_size > xip->i_size)
|
||||
xip->i_new_size = new_size;
|
||||
|
||||
if (likely(!(ioflags & IO_INVIS)))
|
||||
file_update_time(file);
|
||||
|
||||
/*
|
||||
* If the offset is beyond the size of the file, we have a couple
|
||||
* of things to do. First, if there is already space allocated
|
||||
* we need to either create holes or zero the disk or ...
|
||||
*
|
||||
* If there is a page where the previous size lands, we need
|
||||
* to zero it out up to the new size.
|
||||
*/
|
||||
|
||||
if (pos > xip->i_size) {
|
||||
error = xfs_zero_eof(xip, pos, xip->i_size);
|
||||
if (error) {
|
||||
xfs_iunlock(xip, XFS_ILOCK_EXCL);
|
||||
goto out_unlock_internal;
|
||||
}
|
||||
}
|
||||
xfs_iunlock(xip, XFS_ILOCK_EXCL);
|
||||
|
||||
/*
|
||||
* If we're writing the file then make sure to clear the
|
||||
* setuid and setgid bits if the process is not being run
|
||||
* by root. This keeps people from modifying setuid and
|
||||
* setgid binaries.
|
||||
*/
|
||||
error = -file_remove_suid(file);
|
||||
if (unlikely(error))
|
||||
goto out_unlock_internal;
|
||||
|
||||
/* We can write back this queue in page reclaim */
|
||||
current->backing_dev_info = mapping->backing_dev_info;
|
||||
|
||||
if ((ioflags & IO_ISDIRECT)) {
|
||||
if (mapping->nrpages) {
|
||||
WARN_ON(need_i_mutex == 0);
|
||||
error = xfs_flushinval_pages(xip,
|
||||
(pos & PAGE_CACHE_MASK),
|
||||
-1, FI_REMAPF_LOCKED);
|
||||
if (error)
|
||||
goto out_unlock_internal;
|
||||
}
|
||||
|
||||
if (need_i_mutex) {
|
||||
/* demote the lock now the cached pages are gone */
|
||||
xfs_ilock_demote(xip, XFS_IOLOCK_EXCL);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
iolock = XFS_IOLOCK_SHARED;
|
||||
need_i_mutex = 0;
|
||||
}
|
||||
|
||||
trace_xfs_file_direct_write(xip, count, *offset, ioflags);
|
||||
ret = generic_file_direct_write(iocb, iovp,
|
||||
&segs, pos, offset, count, ocount);
|
||||
|
||||
/*
|
||||
* direct-io write to a hole: fall through to buffered I/O
|
||||
* for completing the rest of the request.
|
||||
*/
|
||||
if (ret >= 0 && ret != count) {
|
||||
XFS_STATS_ADD(xs_write_bytes, ret);
|
||||
|
||||
pos += ret;
|
||||
count -= ret;
|
||||
|
||||
ioflags &= ~IO_ISDIRECT;
|
||||
xfs_iunlock(xip, iolock);
|
||||
goto relock;
|
||||
}
|
||||
} else {
|
||||
int enospc = 0;
|
||||
ssize_t ret2 = 0;
|
||||
|
||||
write_retry:
|
||||
trace_xfs_file_buffered_write(xip, count, *offset, ioflags);
|
||||
ret2 = generic_file_buffered_write(iocb, iovp, segs,
|
||||
pos, offset, count, ret);
|
||||
/*
|
||||
* if we just got an ENOSPC, flush the inode now we
|
||||
* aren't holding any page locks and retry *once*
|
||||
*/
|
||||
if (ret2 == -ENOSPC && !enospc) {
|
||||
error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
|
||||
if (error)
|
||||
goto out_unlock_internal;
|
||||
enospc = 1;
|
||||
goto write_retry;
|
||||
}
|
||||
ret = ret2;
|
||||
}
|
||||
|
||||
current->backing_dev_info = NULL;
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
|
||||
*offset = isize;
|
||||
|
||||
if (*offset > xip->i_size) {
|
||||
xfs_ilock(xip, XFS_ILOCK_EXCL);
|
||||
if (*offset > xip->i_size)
|
||||
xip->i_size = *offset;
|
||||
xfs_iunlock(xip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
if (ret == -ENOSPC &&
|
||||
DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
|
||||
xfs_iunlock(xip, iolock);
|
||||
if (need_i_mutex)
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip,
|
||||
DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL,
|
||||
0, 0, 0); /* Delay flag intentionally unused */
|
||||
if (need_i_mutex)
|
||||
mutex_lock(&inode->i_mutex);
|
||||
xfs_ilock(xip, iolock);
|
||||
if (error)
|
||||
goto out_unlock_internal;
|
||||
goto start;
|
||||
}
|
||||
|
||||
error = -ret;
|
||||
if (ret <= 0)
|
||||
goto out_unlock_internal;
|
||||
|
||||
XFS_STATS_ADD(xs_write_bytes, ret);
|
||||
|
||||
/* Handle various SYNC-type writes */
|
||||
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
|
||||
loff_t end = pos + ret - 1;
|
||||
int error2;
|
||||
|
||||
xfs_iunlock(xip, iolock);
|
||||
if (need_i_mutex)
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
error2 = filemap_write_and_wait_range(mapping, pos, end);
|
||||
if (!error)
|
||||
error = error2;
|
||||
if (need_i_mutex)
|
||||
mutex_lock(&inode->i_mutex);
|
||||
xfs_ilock(xip, iolock);
|
||||
|
||||
error2 = xfs_fsync(xip);
|
||||
if (!error)
|
||||
error = error2;
|
||||
}
|
||||
|
||||
out_unlock_internal:
|
||||
if (xip->i_new_size) {
|
||||
xfs_ilock(xip, XFS_ILOCK_EXCL);
|
||||
xip->i_new_size = 0;
|
||||
/*
|
||||
* If this was a direct or synchronous I/O that failed (such
|
||||
* as ENOSPC) then part of the I/O may have been written to
|
||||
* disk before the error occured. In this case the on-disk
|
||||
* file size may have been adjusted beyond the in-memory file
|
||||
* size and now needs to be truncated back.
|
||||
*/
|
||||
if (xip->i_d.di_size > xip->i_size)
|
||||
xip->i_d.di_size = xip->i_size;
|
||||
xfs_iunlock(xip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
xfs_iunlock(xip, iolock);
|
||||
out_unlock_mutex:
|
||||
if (need_i_mutex)
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return -error;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the underlying (data/log/rt) device is readonly, there are some
|
||||
* operations that cannot proceed.
|
||||
*/
|
||||
int
|
||||
xfs_dev_is_read_only(
|
||||
xfs_mount_t *mp,
|
||||
char *message)
|
||||
{
|
||||
if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
|
||||
xfs_readonly_buftarg(mp->m_logdev_targp) ||
|
||||
(mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
|
||||
cmn_err(CE_NOTE,
|
||||
"XFS: %s required on read-only device.", message);
|
||||
cmn_err(CE_NOTE,
|
||||
"XFS: write access unavailable, cannot proceed.");
|
||||
return EROFS;
|
||||
}
|
||||
return 0;
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#ifndef __XFS_LRW_H__
|
||||
#define __XFS_LRW_H__
|
||||
|
||||
struct xfs_mount;
|
||||
struct xfs_inode;
|
||||
struct xfs_buf;
|
||||
|
||||
extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
|
||||
|
||||
extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
|
||||
|
||||
#endif /* __XFS_LRW_H__ */
|
@ -607,7 +607,8 @@ xfssyncd(
|
||||
set_freezable();
|
||||
timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
|
||||
for (;;) {
|
||||
timeleft = schedule_timeout_interruptible(timeleft);
|
||||
if (list_empty(&mp->m_sync_list))
|
||||
timeleft = schedule_timeout_interruptible(timeleft);
|
||||
/* swsusp */
|
||||
try_to_freeze();
|
||||
if (kthread_should_stop() && list_empty(&mp->m_sync_list))
|
||||
@ -627,8 +628,7 @@ xfssyncd(
|
||||
list_add_tail(&mp->m_sync_work.w_list,
|
||||
&mp->m_sync_list);
|
||||
}
|
||||
list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
|
||||
list_move(&work->w_list, &tmp);
|
||||
list_splice_init(&mp->m_sync_list, &tmp);
|
||||
spin_unlock(&mp->m_sync_lock);
|
||||
|
||||
list_for_each_entry_safe(work, n, &tmp, w_list) {
|
||||
@ -688,12 +688,12 @@ xfs_inode_set_reclaim_tag(
|
||||
struct xfs_perag *pag;
|
||||
|
||||
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
|
||||
read_lock(&pag->pag_ici_lock);
|
||||
write_lock(&pag->pag_ici_lock);
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
__xfs_inode_set_reclaim_tag(pag, ip);
|
||||
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
write_unlock(&pag->pag_ici_lock);
|
||||
xfs_perag_put(pag);
|
||||
}
|
||||
|
||||
|
@ -51,22 +51,6 @@
|
||||
#include "quota/xfs_dquot_item.h"
|
||||
#include "quota/xfs_dquot.h"
|
||||
|
||||
/*
|
||||
* Format fsblock number into a static buffer & return it.
|
||||
*/
|
||||
STATIC char *xfs_fmtfsblock(xfs_fsblock_t bno)
|
||||
{
|
||||
static char rval[50];
|
||||
|
||||
if (bno == NULLFSBLOCK)
|
||||
sprintf(rval, "NULLFSBLOCK");
|
||||
else if (isnullstartblock(bno))
|
||||
sprintf(rval, "NULLSTARTBLOCK(%lld)", startblockval(bno));
|
||||
else
|
||||
sprintf(rval, "%lld", (xfs_dfsbno_t)bno);
|
||||
return rval;
|
||||
}
|
||||
|
||||
/*
|
||||
* We include this last to have the helpers above available for the trace
|
||||
* event implementations.
|
||||
|
@ -197,13 +197,13 @@ TRACE_EVENT(xfs_iext_insert,
|
||||
__entry->caller_ip = caller_ip;
|
||||
),
|
||||
TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
|
||||
"offset %lld block %s count %lld flag %d caller %pf",
|
||||
"offset %lld block %lld count %lld flag %d caller %pf",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->ino,
|
||||
__print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
|
||||
(long)__entry->idx,
|
||||
__entry->startoff,
|
||||
xfs_fmtfsblock(__entry->startblock),
|
||||
(__int64_t)__entry->startblock,
|
||||
__entry->blockcount,
|
||||
__entry->state,
|
||||
(char *)__entry->caller_ip)
|
||||
@ -241,13 +241,13 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
|
||||
__entry->caller_ip = caller_ip;
|
||||
),
|
||||
TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
|
||||
"offset %lld block %s count %lld flag %d caller %pf",
|
||||
"offset %lld block %lld count %lld flag %d caller %pf",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->ino,
|
||||
__print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
|
||||
(long)__entry->idx,
|
||||
__entry->startoff,
|
||||
xfs_fmtfsblock(__entry->startblock),
|
||||
(__int64_t)__entry->startblock,
|
||||
__entry->blockcount,
|
||||
__entry->state,
|
||||
(char *)__entry->caller_ip)
|
||||
@ -593,7 +593,7 @@ DECLARE_EVENT_CLASS(xfs_dquot_class,
|
||||
TP_ARGS(dqp),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(__be32, id)
|
||||
__field(u32, id)
|
||||
__field(unsigned, flags)
|
||||
__field(unsigned, nrefs)
|
||||
__field(unsigned long long, res_bcount)
|
||||
@ -606,7 +606,7 @@ DECLARE_EVENT_CLASS(xfs_dquot_class,
|
||||
), \
|
||||
TP_fast_assign(
|
||||
__entry->dev = dqp->q_mount->m_super->s_dev;
|
||||
__entry->id = dqp->q_core.d_id;
|
||||
__entry->id = be32_to_cpu(dqp->q_core.d_id);
|
||||
__entry->flags = dqp->dq_flags;
|
||||
__entry->nrefs = dqp->q_nrefs;
|
||||
__entry->res_bcount = dqp->q_res_bcount;
|
||||
@ -622,10 +622,10 @@ DECLARE_EVENT_CLASS(xfs_dquot_class,
|
||||
be64_to_cpu(dqp->q_core.d_ino_softlimit);
|
||||
),
|
||||
TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
|
||||
"bcnt 0x%llx [hard 0x%llx | soft 0x%llx] "
|
||||
"icnt 0x%llx [hard 0x%llx | soft 0x%llx]",
|
||||
"bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
|
||||
"icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
be32_to_cpu(__entry->id),
|
||||
__entry->id,
|
||||
__print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
|
||||
__entry->nrefs,
|
||||
__entry->res_bcount,
|
||||
@ -881,7 +881,7 @@ TRACE_EVENT(name, \
|
||||
), \
|
||||
TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
|
||||
"offset 0x%llx count %zd flags %s " \
|
||||
"startoff 0x%llx startblock %s blockcount 0x%llx", \
|
||||
"startoff 0x%llx startblock %lld blockcount 0x%llx", \
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev), \
|
||||
__entry->ino, \
|
||||
__entry->size, \
|
||||
@ -890,7 +890,7 @@ TRACE_EVENT(name, \
|
||||
__entry->count, \
|
||||
__print_flags(__entry->flags, "|", BMAPI_FLAGS), \
|
||||
__entry->startoff, \
|
||||
xfs_fmtfsblock(__entry->startblock), \
|
||||
(__int64_t)__entry->startblock, \
|
||||
__entry->blockcount) \
|
||||
)
|
||||
DEFINE_IOMAP_EVENT(xfs_iomap_enter);
|
||||
|
@ -2549,6 +2549,121 @@ xfs_bmap_rtalloc(
|
||||
return 0;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_bmap_btalloc_nullfb(
|
||||
struct xfs_bmalloca *ap,
|
||||
struct xfs_alloc_arg *args,
|
||||
xfs_extlen_t *blen)
|
||||
{
|
||||
struct xfs_mount *mp = ap->ip->i_mount;
|
||||
struct xfs_perag *pag;
|
||||
xfs_agnumber_t ag, startag;
|
||||
int notinit = 0;
|
||||
int error;
|
||||
|
||||
if (ap->userdata && xfs_inode_is_filestream(ap->ip))
|
||||
args->type = XFS_ALLOCTYPE_NEAR_BNO;
|
||||
else
|
||||
args->type = XFS_ALLOCTYPE_START_BNO;
|
||||
args->total = ap->total;
|
||||
|
||||
/*
|
||||
* Search for an allocation group with a single extent large enough
|
||||
* for the request. If one isn't found, then adjust the minimum
|
||||
* allocation size to the largest space found.
|
||||
*/
|
||||
startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
|
||||
if (startag == NULLAGNUMBER)
|
||||
startag = ag = 0;
|
||||
|
||||
pag = xfs_perag_get(mp, ag);
|
||||
while (*blen < ap->alen) {
|
||||
if (!pag->pagf_init) {
|
||||
error = xfs_alloc_pagf_init(mp, args->tp, ag,
|
||||
XFS_ALLOC_FLAG_TRYLOCK);
|
||||
if (error) {
|
||||
xfs_perag_put(pag);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* See xfs_alloc_fix_freelist...
|
||||
*/
|
||||
if (pag->pagf_init) {
|
||||
xfs_extlen_t longest;
|
||||
longest = xfs_alloc_longest_free_extent(mp, pag);
|
||||
if (*blen < longest)
|
||||
*blen = longest;
|
||||
} else
|
||||
notinit = 1;
|
||||
|
||||
if (xfs_inode_is_filestream(ap->ip)) {
|
||||
if (*blen >= ap->alen)
|
||||
break;
|
||||
|
||||
if (ap->userdata) {
|
||||
/*
|
||||
* If startag is an invalid AG, we've
|
||||
* come here once before and
|
||||
* xfs_filestream_new_ag picked the
|
||||
* best currently available.
|
||||
*
|
||||
* Don't continue looping, since we
|
||||
* could loop forever.
|
||||
*/
|
||||
if (startag == NULLAGNUMBER)
|
||||
break;
|
||||
|
||||
error = xfs_filestream_new_ag(ap, &ag);
|
||||
xfs_perag_put(pag);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* loop again to set 'blen'*/
|
||||
startag = NULLAGNUMBER;
|
||||
pag = xfs_perag_get(mp, ag);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (++ag == mp->m_sb.sb_agcount)
|
||||
ag = 0;
|
||||
if (ag == startag)
|
||||
break;
|
||||
xfs_perag_put(pag);
|
||||
pag = xfs_perag_get(mp, ag);
|
||||
}
|
||||
xfs_perag_put(pag);
|
||||
|
||||
/*
|
||||
* Since the above loop did a BUF_TRYLOCK, it is
|
||||
* possible that there is space for this request.
|
||||
*/
|
||||
if (notinit || *blen < ap->minlen)
|
||||
args->minlen = ap->minlen;
|
||||
/*
|
||||
* If the best seen length is less than the request
|
||||
* length, use the best as the minimum.
|
||||
*/
|
||||
else if (*blen < ap->alen)
|
||||
args->minlen = *blen;
|
||||
/*
|
||||
* Otherwise we've seen an extent as big as alen,
|
||||
* use that as the minimum.
|
||||
*/
|
||||
else
|
||||
args->minlen = ap->alen;
|
||||
|
||||
/*
|
||||
* set the failure fallback case to look in the selected
|
||||
* AG as the stream may have moved.
|
||||
*/
|
||||
if (xfs_inode_is_filestream(ap->ip))
|
||||
ap->rval = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_bmap_btalloc(
|
||||
xfs_bmalloca_t *ap) /* bmap alloc argument struct */
|
||||
@ -2556,16 +2671,13 @@ xfs_bmap_btalloc(
|
||||
xfs_mount_t *mp; /* mount point structure */
|
||||
xfs_alloctype_t atype = 0; /* type for allocation routines */
|
||||
xfs_extlen_t align; /* minimum allocation alignment */
|
||||
xfs_agnumber_t ag;
|
||||
xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
|
||||
xfs_agnumber_t startag;
|
||||
xfs_agnumber_t ag;
|
||||
xfs_alloc_arg_t args;
|
||||
xfs_extlen_t blen;
|
||||
xfs_extlen_t nextminlen = 0;
|
||||
xfs_perag_t *pag;
|
||||
int nullfb; /* true if ap->firstblock isn't set */
|
||||
int isaligned;
|
||||
int notinit;
|
||||
int tryagain;
|
||||
int error;
|
||||
|
||||
@ -2612,103 +2724,9 @@ xfs_bmap_btalloc(
|
||||
args.firstblock = ap->firstblock;
|
||||
blen = 0;
|
||||
if (nullfb) {
|
||||
if (ap->userdata && xfs_inode_is_filestream(ap->ip))
|
||||
args.type = XFS_ALLOCTYPE_NEAR_BNO;
|
||||
else
|
||||
args.type = XFS_ALLOCTYPE_START_BNO;
|
||||
args.total = ap->total;
|
||||
|
||||
/*
|
||||
* Search for an allocation group with a single extent
|
||||
* large enough for the request.
|
||||
*
|
||||
* If one isn't found, then adjust the minimum allocation
|
||||
* size to the largest space found.
|
||||
*/
|
||||
startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
|
||||
if (startag == NULLAGNUMBER)
|
||||
startag = ag = 0;
|
||||
notinit = 0;
|
||||
pag = xfs_perag_get(mp, ag);
|
||||
while (blen < ap->alen) {
|
||||
if (!pag->pagf_init &&
|
||||
(error = xfs_alloc_pagf_init(mp, args.tp,
|
||||
ag, XFS_ALLOC_FLAG_TRYLOCK))) {
|
||||
xfs_perag_put(pag);
|
||||
return error;
|
||||
}
|
||||
/*
|
||||
* See xfs_alloc_fix_freelist...
|
||||
*/
|
||||
if (pag->pagf_init) {
|
||||
xfs_extlen_t longest;
|
||||
longest = xfs_alloc_longest_free_extent(mp, pag);
|
||||
if (blen < longest)
|
||||
blen = longest;
|
||||
} else
|
||||
notinit = 1;
|
||||
|
||||
if (xfs_inode_is_filestream(ap->ip)) {
|
||||
if (blen >= ap->alen)
|
||||
break;
|
||||
|
||||
if (ap->userdata) {
|
||||
/*
|
||||
* If startag is an invalid AG, we've
|
||||
* come here once before and
|
||||
* xfs_filestream_new_ag picked the
|
||||
* best currently available.
|
||||
*
|
||||
* Don't continue looping, since we
|
||||
* could loop forever.
|
||||
*/
|
||||
if (startag == NULLAGNUMBER)
|
||||
break;
|
||||
|
||||
error = xfs_filestream_new_ag(ap, &ag);
|
||||
xfs_perag_put(pag);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* loop again to set 'blen'*/
|
||||
startag = NULLAGNUMBER;
|
||||
pag = xfs_perag_get(mp, ag);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (++ag == mp->m_sb.sb_agcount)
|
||||
ag = 0;
|
||||
if (ag == startag)
|
||||
break;
|
||||
xfs_perag_put(pag);
|
||||
pag = xfs_perag_get(mp, ag);
|
||||
}
|
||||
xfs_perag_put(pag);
|
||||
/*
|
||||
* Since the above loop did a BUF_TRYLOCK, it is
|
||||
* possible that there is space for this request.
|
||||
*/
|
||||
if (notinit || blen < ap->minlen)
|
||||
args.minlen = ap->minlen;
|
||||
/*
|
||||
* If the best seen length is less than the request
|
||||
* length, use the best as the minimum.
|
||||
*/
|
||||
else if (blen < ap->alen)
|
||||
args.minlen = blen;
|
||||
/*
|
||||
* Otherwise we've seen an extent as big as alen,
|
||||
* use that as the minimum.
|
||||
*/
|
||||
else
|
||||
args.minlen = ap->alen;
|
||||
|
||||
/*
|
||||
* set the failure fallback case to look in the selected
|
||||
* AG as the stream may have moved.
|
||||
*/
|
||||
if (xfs_inode_is_filestream(ap->ip))
|
||||
ap->rval = args.fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
|
||||
error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
|
||||
if (error)
|
||||
return error;
|
||||
} else if (ap->low) {
|
||||
if (xfs_inode_is_filestream(ap->ip))
|
||||
args.type = XFS_ALLOCTYPE_FIRST_AG;
|
||||
|
@ -292,7 +292,8 @@ typedef struct xfs_bstat {
|
||||
__s32 bs_extents; /* number of extents */
|
||||
__u32 bs_gen; /* generation count */
|
||||
__u16 bs_projid; /* project id */
|
||||
unsigned char bs_pad[14]; /* pad space, unused */
|
||||
__u16 bs_forkoff; /* inode fork offset in bytes */
|
||||
unsigned char bs_pad[12]; /* pad space, unused */
|
||||
__u32 bs_dmevmask; /* DMIG event mask */
|
||||
__u16 bs_dmstate; /* DMIG state info */
|
||||
__u16 bs_aextents; /* attribute number of extents */
|
||||
|
@ -190,13 +190,12 @@ xfs_iget_cache_hit(
|
||||
trace_xfs_iget_reclaim(ip);
|
||||
|
||||
/*
|
||||
* We need to set XFS_INEW atomically with clearing the
|
||||
* reclaimable tag so that we do have an indicator of the
|
||||
* inode still being initialized.
|
||||
* We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
|
||||
* from stomping over us while we recycle the inode. We can't
|
||||
* clear the radix tree reclaimable tag yet as it requires
|
||||
* pag_ici_lock to be held exclusive.
|
||||
*/
|
||||
ip->i_flags |= XFS_INEW;
|
||||
ip->i_flags &= ~XFS_IRECLAIMABLE;
|
||||
__xfs_inode_clear_reclaim_tag(mp, pag, ip);
|
||||
ip->i_flags |= XFS_IRECLAIM;
|
||||
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
@ -216,7 +215,15 @@ xfs_iget_cache_hit(
|
||||
trace_xfs_iget_reclaim(ip);
|
||||
goto out_error;
|
||||
}
|
||||
|
||||
write_lock(&pag->pag_ici_lock);
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM);
|
||||
ip->i_flags |= XFS_INEW;
|
||||
__xfs_inode_clear_reclaim_tag(mp, pag, ip);
|
||||
inode->i_state = I_NEW;
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
write_unlock(&pag->pag_ici_lock);
|
||||
} else {
|
||||
/* If the VFS inode is being torn down, pause and try again. */
|
||||
if (!igrab(inode)) {
|
||||
|
@ -2439,75 +2439,31 @@ xfs_idestroy_fork(
|
||||
}
|
||||
|
||||
/*
|
||||
* Increment the pin count of the given buffer.
|
||||
* This value is protected by ipinlock spinlock in the mount structure.
|
||||
* This is called to unpin an inode. The caller must have the inode locked
|
||||
* in at least shared mode so that the buffer cannot be subsequently pinned
|
||||
* once someone is waiting for it to be unpinned.
|
||||
*/
|
||||
void
|
||||
xfs_ipin(
|
||||
xfs_inode_t *ip)
|
||||
static void
|
||||
xfs_iunpin_nowait(
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
||||
|
||||
atomic_inc(&ip->i_pincount);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decrement the pin count of the given inode, and wake up
|
||||
* anyone in xfs_iwait_unpin() if the count goes to 0. The
|
||||
* inode must have been previously pinned with a call to xfs_ipin().
|
||||
*/
|
||||
void
|
||||
xfs_iunpin(
|
||||
xfs_inode_t *ip)
|
||||
{
|
||||
ASSERT(atomic_read(&ip->i_pincount) > 0);
|
||||
|
||||
if (atomic_dec_and_test(&ip->i_pincount))
|
||||
wake_up(&ip->i_ipin_wait);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called to unpin an inode. It can be directed to wait or to return
|
||||
* immediately without waiting for the inode to be unpinned. The caller must
|
||||
* have the inode locked in at least shared mode so that the buffer cannot be
|
||||
* subsequently pinned once someone is waiting for it to be unpinned.
|
||||
*/
|
||||
STATIC void
|
||||
__xfs_iunpin_wait(
|
||||
xfs_inode_t *ip,
|
||||
int wait)
|
||||
{
|
||||
xfs_inode_log_item_t *iip = ip->i_itemp;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
|
||||
if (atomic_read(&ip->i_pincount) == 0)
|
||||
return;
|
||||
|
||||
/* Give the log a push to start the unpinning I/O */
|
||||
if (iip && iip->ili_last_lsn)
|
||||
xfs_log_force_lsn(ip->i_mount, iip->ili_last_lsn, 0);
|
||||
else
|
||||
xfs_log_force(ip->i_mount, 0);
|
||||
xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
|
||||
|
||||
if (wait)
|
||||
wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
|
||||
}
|
||||
|
||||
void
|
||||
xfs_iunpin_wait(
|
||||
xfs_inode_t *ip)
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
__xfs_iunpin_wait(ip, 1);
|
||||
if (xfs_ipincount(ip)) {
|
||||
xfs_iunpin_nowait(ip);
|
||||
wait_event(ip->i_ipin_wait, (xfs_ipincount(ip) == 0));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
xfs_iunpin_nowait(
|
||||
xfs_inode_t *ip)
|
||||
{
|
||||
__xfs_iunpin_wait(ip, 0);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* xfs_iextents_copy()
|
||||
*
|
||||
|
@ -471,8 +471,6 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
|
||||
int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
|
||||
|
||||
void xfs_iext_realloc(xfs_inode_t *, int, int);
|
||||
void xfs_ipin(xfs_inode_t *);
|
||||
void xfs_iunpin(xfs_inode_t *);
|
||||
void xfs_iunpin_wait(xfs_inode_t *);
|
||||
int xfs_iflush(xfs_inode_t *, uint);
|
||||
void xfs_ichgtime(xfs_inode_t *, int);
|
||||
@ -480,6 +478,7 @@ void xfs_lock_inodes(xfs_inode_t **, int, uint);
|
||||
void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
|
||||
|
||||
void xfs_synchronize_times(xfs_inode_t *);
|
||||
void xfs_mark_inode_dirty(xfs_inode_t *);
|
||||
void xfs_mark_inode_dirty_sync(xfs_inode_t *);
|
||||
|
||||
#define IHOLD(ip) \
|
||||
|
@ -535,23 +535,23 @@ xfs_inode_item_format(
|
||||
|
||||
/*
|
||||
* This is called to pin the inode associated with the inode log
|
||||
* item in memory so it cannot be written out. Do this by calling
|
||||
* xfs_ipin() to bump the pin count in the inode while holding the
|
||||
* inode pin lock.
|
||||
* item in memory so it cannot be written out.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_inode_item_pin(
|
||||
xfs_inode_log_item_t *iip)
|
||||
{
|
||||
ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
|
||||
xfs_ipin(iip->ili_inode);
|
||||
|
||||
atomic_inc(&iip->ili_inode->i_pincount);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This is called to unpin the inode associated with the inode log
|
||||
* item which was previously pinned with a call to xfs_inode_item_pin().
|
||||
* Just call xfs_iunpin() on the inode to do this.
|
||||
*
|
||||
* Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
STATIC void
|
||||
@ -559,7 +559,11 @@ xfs_inode_item_unpin(
|
||||
xfs_inode_log_item_t *iip,
|
||||
int stale)
|
||||
{
|
||||
xfs_iunpin(iip->ili_inode);
|
||||
struct xfs_inode *ip = iip->ili_inode;
|
||||
|
||||
ASSERT(atomic_read(&ip->i_pincount) > 0);
|
||||
if (atomic_dec_and_test(&ip->i_pincount))
|
||||
wake_up(&ip->i_ipin_wait);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
@ -568,7 +572,7 @@ xfs_inode_item_unpin_remove(
|
||||
xfs_inode_log_item_t *iip,
|
||||
xfs_trans_t *tp)
|
||||
{
|
||||
xfs_iunpin(iip->ili_inode);
|
||||
xfs_inode_item_unpin(iip, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -106,6 +106,7 @@ xfs_bulkstat_one_iget(
|
||||
buf->bs_dmevmask = dic->di_dmevmask;
|
||||
buf->bs_dmstate = dic->di_dmstate;
|
||||
buf->bs_aextents = dic->di_anextents;
|
||||
buf->bs_forkoff = XFS_IFORK_BOFF(ip);
|
||||
|
||||
switch (dic->di_format) {
|
||||
case XFS_DINODE_FMT_DEV:
|
||||
@ -176,6 +177,7 @@ xfs_bulkstat_one_dinode(
|
||||
buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask);
|
||||
buf->bs_dmstate = be16_to_cpu(dic->di_dmstate);
|
||||
buf->bs_aextents = be16_to_cpu(dic->di_anextents);
|
||||
buf->bs_forkoff = XFS_DFORK_BOFF(dic);
|
||||
|
||||
switch (dic->di_format) {
|
||||
case XFS_DINODE_FMT_DEV:
|
||||
|
106
fs/xfs/xfs_log.c
106
fs/xfs/xfs_log.c
@ -60,7 +60,7 @@ STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
|
||||
STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
|
||||
STATIC void xlog_dealloc_log(xlog_t *log);
|
||||
STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
|
||||
int nentries, xfs_log_ticket_t tic,
|
||||
int nentries, struct xlog_ticket *tic,
|
||||
xfs_lsn_t *start_lsn,
|
||||
xlog_in_core_t **commit_iclog,
|
||||
uint flags);
|
||||
@ -243,14 +243,14 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
|
||||
* out when the next write occurs.
|
||||
*/
|
||||
xfs_lsn_t
|
||||
xfs_log_done(xfs_mount_t *mp,
|
||||
xfs_log_ticket_t xtic,
|
||||
void **iclog,
|
||||
uint flags)
|
||||
xfs_log_done(
|
||||
struct xfs_mount *mp,
|
||||
struct xlog_ticket *ticket,
|
||||
struct xlog_in_core **iclog,
|
||||
uint flags)
|
||||
{
|
||||
xlog_t *log = mp->m_log;
|
||||
xlog_ticket_t *ticket = (xfs_log_ticket_t) xtic;
|
||||
xfs_lsn_t lsn = 0;
|
||||
struct log *log = mp->m_log;
|
||||
xfs_lsn_t lsn = 0;
|
||||
|
||||
if (XLOG_FORCED_SHUTDOWN(log) ||
|
||||
/*
|
||||
@ -258,8 +258,7 @@ xfs_log_done(xfs_mount_t *mp,
|
||||
* If we get an error, just continue and give back the log ticket.
|
||||
*/
|
||||
(((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
|
||||
(xlog_commit_record(mp, ticket,
|
||||
(xlog_in_core_t **)iclog, &lsn)))) {
|
||||
(xlog_commit_record(mp, ticket, iclog, &lsn)))) {
|
||||
lsn = (xfs_lsn_t) -1;
|
||||
if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
|
||||
flags |= XFS_LOG_REL_PERM_RESERV;
|
||||
@ -289,7 +288,7 @@ xfs_log_done(xfs_mount_t *mp,
|
||||
}
|
||||
|
||||
return lsn;
|
||||
} /* xfs_log_done */
|
||||
}
|
||||
|
||||
/*
|
||||
* Attaches a new iclog I/O completion callback routine during
|
||||
@ -298,11 +297,11 @@ xfs_log_done(xfs_mount_t *mp,
|
||||
* executing the callback at an appropriate time.
|
||||
*/
|
||||
int
|
||||
xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
|
||||
void *iclog_hndl, /* iclog to hang callback off */
|
||||
xfs_log_callback_t *cb)
|
||||
xfs_log_notify(
|
||||
struct xfs_mount *mp,
|
||||
struct xlog_in_core *iclog,
|
||||
xfs_log_callback_t *cb)
|
||||
{
|
||||
xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
|
||||
int abortflg;
|
||||
|
||||
spin_lock(&iclog->ic_callback_lock);
|
||||
@ -316,16 +315,14 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
|
||||
}
|
||||
spin_unlock(&iclog->ic_callback_lock);
|
||||
return abortflg;
|
||||
} /* xfs_log_notify */
|
||||
}
|
||||
|
||||
int
|
||||
xfs_log_release_iclog(xfs_mount_t *mp,
|
||||
void *iclog_hndl)
|
||||
xfs_log_release_iclog(
|
||||
struct xfs_mount *mp,
|
||||
struct xlog_in_core *iclog)
|
||||
{
|
||||
xlog_t *log = mp->m_log;
|
||||
xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
|
||||
|
||||
if (xlog_state_release_iclog(log, iclog)) {
|
||||
if (xlog_state_release_iclog(mp->m_log, iclog)) {
|
||||
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
|
||||
return EIO;
|
||||
}
|
||||
@ -344,17 +341,18 @@ xfs_log_release_iclog(xfs_mount_t *mp,
|
||||
* reservation, we prevent over allocation problems.
|
||||
*/
|
||||
int
|
||||
xfs_log_reserve(xfs_mount_t *mp,
|
||||
int unit_bytes,
|
||||
int cnt,
|
||||
xfs_log_ticket_t *ticket,
|
||||
__uint8_t client,
|
||||
uint flags,
|
||||
uint t_type)
|
||||
xfs_log_reserve(
|
||||
struct xfs_mount *mp,
|
||||
int unit_bytes,
|
||||
int cnt,
|
||||
struct xlog_ticket **ticket,
|
||||
__uint8_t client,
|
||||
uint flags,
|
||||
uint t_type)
|
||||
{
|
||||
xlog_t *log = mp->m_log;
|
||||
xlog_ticket_t *internal_ticket;
|
||||
int retval = 0;
|
||||
struct log *log = mp->m_log;
|
||||
struct xlog_ticket *internal_ticket;
|
||||
int retval = 0;
|
||||
|
||||
ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
|
||||
ASSERT((flags & XFS_LOG_NOSLEEP) == 0);
|
||||
@ -367,7 +365,7 @@ xfs_log_reserve(xfs_mount_t *mp,
|
||||
|
||||
if (*ticket != NULL) {
|
||||
ASSERT(flags & XFS_LOG_PERM_RESERV);
|
||||
internal_ticket = (xlog_ticket_t *)*ticket;
|
||||
internal_ticket = *ticket;
|
||||
|
||||
trace_xfs_log_reserve(log, internal_ticket);
|
||||
|
||||
@ -519,7 +517,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
|
||||
xlog_in_core_t *first_iclog;
|
||||
#endif
|
||||
xfs_log_iovec_t reg[1];
|
||||
xfs_log_ticket_t tic = NULL;
|
||||
xlog_ticket_t *tic = NULL;
|
||||
xfs_lsn_t lsn;
|
||||
int error;
|
||||
|
||||
@ -656,24 +654,24 @@ xfs_log_unmount(xfs_mount_t *mp)
|
||||
* transaction occur with one call to xfs_log_write().
|
||||
*/
|
||||
int
|
||||
xfs_log_write(xfs_mount_t * mp,
|
||||
xfs_log_iovec_t reg[],
|
||||
int nentries,
|
||||
xfs_log_ticket_t tic,
|
||||
xfs_lsn_t *start_lsn)
|
||||
xfs_log_write(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_log_iovec reg[],
|
||||
int nentries,
|
||||
struct xlog_ticket *tic,
|
||||
xfs_lsn_t *start_lsn)
|
||||
{
|
||||
int error;
|
||||
xlog_t *log = mp->m_log;
|
||||
struct log *log = mp->m_log;
|
||||
int error;
|
||||
|
||||
if (XLOG_FORCED_SHUTDOWN(log))
|
||||
return XFS_ERROR(EIO);
|
||||
|
||||
if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) {
|
||||
error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0);
|
||||
if (error)
|
||||
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
|
||||
}
|
||||
return error;
|
||||
} /* xfs_log_write */
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
xfs_log_move_tail(xfs_mount_t *mp,
|
||||
@ -1642,16 +1640,16 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
|
||||
* bytes have been written out.
|
||||
*/
|
||||
STATIC int
|
||||
xlog_write(xfs_mount_t * mp,
|
||||
xfs_log_iovec_t reg[],
|
||||
int nentries,
|
||||
xfs_log_ticket_t tic,
|
||||
xfs_lsn_t *start_lsn,
|
||||
xlog_in_core_t **commit_iclog,
|
||||
uint flags)
|
||||
xlog_write(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_log_iovec reg[],
|
||||
int nentries,
|
||||
struct xlog_ticket *ticket,
|
||||
xfs_lsn_t *start_lsn,
|
||||
struct xlog_in_core **commit_iclog,
|
||||
uint flags)
|
||||
{
|
||||
xlog_t *log = mp->m_log;
|
||||
xlog_ticket_t *ticket = (xlog_ticket_t *)tic;
|
||||
xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */
|
||||
xlog_op_header_t *logop_head; /* ptr to log operation header */
|
||||
__psint_t ptr; /* copy address into data region */
|
||||
@ -1765,7 +1763,7 @@ xlog_write(xfs_mount_t * mp,
|
||||
default:
|
||||
xfs_fs_cmn_err(CE_WARN, mp,
|
||||
"Bad XFS transaction clientid 0x%x in ticket 0x%p",
|
||||
logop_head->oh_clientid, tic);
|
||||
logop_head->oh_clientid, ticket);
|
||||
return XFS_ERROR(EIO);
|
||||
}
|
||||
|
||||
|
@ -110,8 +110,6 @@ typedef struct xfs_log_iovec {
|
||||
uint i_type; /* type of region */
|
||||
} xfs_log_iovec_t;
|
||||
|
||||
typedef void* xfs_log_ticket_t;
|
||||
|
||||
/*
|
||||
* Structure used to pass callback function and the function's argument
|
||||
* to the log manager.
|
||||
@ -126,10 +124,12 @@ typedef struct xfs_log_callback {
|
||||
#ifdef __KERNEL__
|
||||
/* Log manager interfaces */
|
||||
struct xfs_mount;
|
||||
struct xlog_in_core;
|
||||
struct xlog_ticket;
|
||||
|
||||
xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
|
||||
xfs_log_ticket_t ticket,
|
||||
void **iclog,
|
||||
struct xlog_ticket *ticket,
|
||||
struct xlog_in_core **iclog,
|
||||
uint flags);
|
||||
int _xfs_log_force(struct xfs_mount *mp,
|
||||
uint flags,
|
||||
@ -151,21 +151,21 @@ int xfs_log_mount_finish(struct xfs_mount *mp);
|
||||
void xfs_log_move_tail(struct xfs_mount *mp,
|
||||
xfs_lsn_t tail_lsn);
|
||||
int xfs_log_notify(struct xfs_mount *mp,
|
||||
void *iclog,
|
||||
struct xlog_in_core *iclog,
|
||||
xfs_log_callback_t *callback_entry);
|
||||
int xfs_log_release_iclog(struct xfs_mount *mp,
|
||||
void *iclog_hndl);
|
||||
struct xlog_in_core *iclog);
|
||||
int xfs_log_reserve(struct xfs_mount *mp,
|
||||
int length,
|
||||
int count,
|
||||
xfs_log_ticket_t *ticket,
|
||||
struct xlog_ticket **ticket,
|
||||
__uint8_t clientid,
|
||||
uint flags,
|
||||
uint t_type);
|
||||
int xfs_log_write(struct xfs_mount *mp,
|
||||
xfs_log_iovec_t region[],
|
||||
int nentries,
|
||||
xfs_log_ticket_t ticket,
|
||||
struct xlog_ticket *ticket,
|
||||
xfs_lsn_t *start_lsn);
|
||||
int xfs_log_unmount_write(struct xfs_mount *mp);
|
||||
void xfs_log_unmount(struct xfs_mount *mp);
|
||||
|
@ -1097,13 +1097,15 @@ xfs_default_resblks(xfs_mount_t *mp)
|
||||
__uint64_t resblks;
|
||||
|
||||
/*
|
||||
* We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
|
||||
* This may drive us straight to ENOSPC on mount, but that implies
|
||||
* we were already there on the last unmount. Warn if this occurs.
|
||||
* We default to 5% or 8192 fsbs of space reserved, whichever is
|
||||
* smaller. This is intended to cover concurrent allocation
|
||||
* transactions when we initially hit enospc. These each require a 4
|
||||
* block reservation. Hence by default we cover roughly 2000 concurrent
|
||||
* allocation reservations.
|
||||
*/
|
||||
resblks = mp->m_sb.sb_dblocks;
|
||||
do_div(resblks, 20);
|
||||
resblks = min_t(__uint64_t, resblks, 1024);
|
||||
resblks = min_t(__uint64_t, resblks, 8192);
|
||||
return resblks;
|
||||
}
|
||||
|
||||
@ -1417,6 +1419,9 @@ xfs_mountfs(
|
||||
* when at ENOSPC. This is needed for operations like create with
|
||||
* attr, unwritten extent conversion at ENOSPC, etc. Data allocations
|
||||
* are not allowed to use this reserved space.
|
||||
*
|
||||
* This may drive us straight to ENOSPC on mount, but that implies
|
||||
* we were already there on the last unmount. Warn if this occurs.
|
||||
*/
|
||||
if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
|
||||
resblks = xfs_default_resblks(mp);
|
||||
@ -1725,26 +1730,30 @@ xfs_mod_incore_sb_unlocked(
|
||||
lcounter += rem;
|
||||
}
|
||||
} else { /* Taking blocks away */
|
||||
|
||||
lcounter += delta;
|
||||
|
||||
/*
|
||||
* If were out of blocks, use any available reserved blocks if
|
||||
* were allowed to.
|
||||
*/
|
||||
|
||||
if (lcounter < 0) {
|
||||
if (rsvd) {
|
||||
lcounter = (long long)mp->m_resblks_avail + delta;
|
||||
if (lcounter < 0) {
|
||||
return XFS_ERROR(ENOSPC);
|
||||
}
|
||||
mp->m_resblks_avail = lcounter;
|
||||
return 0;
|
||||
} else { /* not reserved */
|
||||
return XFS_ERROR(ENOSPC);
|
||||
}
|
||||
if (lcounter >= 0) {
|
||||
mp->m_sb.sb_fdblocks = lcounter +
|
||||
XFS_ALLOC_SET_ASIDE(mp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We are out of blocks, use any available reserved
|
||||
* blocks if were allowed to.
|
||||
*/
|
||||
if (!rsvd)
|
||||
return XFS_ERROR(ENOSPC);
|
||||
|
||||
lcounter = (long long)mp->m_resblks_avail + delta;
|
||||
if (lcounter >= 0) {
|
||||
mp->m_resblks_avail = lcounter;
|
||||
return 0;
|
||||
}
|
||||
printk_once(KERN_WARNING
|
||||
"Filesystem \"%s\": reserve blocks depleted! "
|
||||
"Consider increasing reserve pool size.",
|
||||
mp->m_fsname);
|
||||
return XFS_ERROR(ENOSPC);
|
||||
}
|
||||
|
||||
mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
|
||||
@ -2052,6 +2061,26 @@ xfs_mount_log_sb(
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the underlying (data/log/rt) device is readonly, there are some
|
||||
* operations that cannot proceed.
|
||||
*/
|
||||
int
|
||||
xfs_dev_is_read_only(
|
||||
struct xfs_mount *mp,
|
||||
char *message)
|
||||
{
|
||||
if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
|
||||
xfs_readonly_buftarg(mp->m_logdev_targp) ||
|
||||
(mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
|
||||
cmn_err(CE_NOTE,
|
||||
"XFS: %s required on read-only device.", message);
|
||||
cmn_err(CE_NOTE,
|
||||
"XFS: write access unavailable, cannot proceed.");
|
||||
return EROFS;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef HAVE_PERCPU_SB
|
||||
/*
|
||||
|
@ -436,6 +436,8 @@ extern void xfs_freesb(xfs_mount_t *);
|
||||
extern int xfs_fs_writable(xfs_mount_t *);
|
||||
extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
|
||||
|
||||
extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
|
||||
|
||||
extern int xfs_dmops_get(struct xfs_mount *);
|
||||
extern void xfs_dmops_put(struct xfs_mount *);
|
||||
|
||||
|
@ -796,7 +796,7 @@ _xfs_trans_commit(
|
||||
int sync;
|
||||
#define XFS_TRANS_LOGVEC_COUNT 16
|
||||
xfs_log_iovec_t log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
|
||||
void *commit_iclog;
|
||||
struct xlog_in_core *commit_iclog;
|
||||
int shutdown;
|
||||
|
||||
commit_lsn = -1;
|
||||
|
@ -910,7 +910,7 @@ typedef struct xfs_trans {
|
||||
unsigned int t_blk_res_used; /* # of resvd blocks used */
|
||||
unsigned int t_rtx_res; /* # of rt extents resvd */
|
||||
unsigned int t_rtx_res_used; /* # of resvd rt extents used */
|
||||
xfs_log_ticket_t t_ticket; /* log mgr ticket */
|
||||
struct xlog_ticket *t_ticket; /* log mgr ticket */
|
||||
xfs_lsn_t t_lsn; /* log seq num of start of
|
||||
* transaction. */
|
||||
xfs_lsn_t t_commit_lsn; /* log seq num of end of
|
||||
|
@ -46,6 +46,65 @@ STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *,
|
||||
STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *,
|
||||
xfs_daddr_t, int);
|
||||
|
||||
/*
|
||||
* Add the locked buffer to the transaction.
|
||||
*
|
||||
* The buffer must be locked, and it cannot be associated with any
|
||||
* transaction.
|
||||
*
|
||||
* If the buffer does not yet have a buf log item associated with it,
|
||||
* then allocate one for it. Then add the buf item to the transaction.
|
||||
*/
|
||||
STATIC void
|
||||
_xfs_trans_bjoin(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_buf *bp,
|
||||
int reset_recur)
|
||||
{
|
||||
struct xfs_buf_log_item *bip;
|
||||
|
||||
ASSERT(XFS_BUF_ISBUSY(bp));
|
||||
ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
|
||||
|
||||
/*
|
||||
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
|
||||
* it doesn't have one yet, then allocate one and initialize it.
|
||||
* The checks to see if one is there are in xfs_buf_item_init().
|
||||
*/
|
||||
xfs_buf_item_init(bp, tp->t_mountp);
|
||||
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
|
||||
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
|
||||
if (reset_recur)
|
||||
bip->bli_recur = 0;
|
||||
|
||||
/*
|
||||
* Take a reference for this transaction on the buf item.
|
||||
*/
|
||||
atomic_inc(&bip->bli_refcount);
|
||||
|
||||
/*
|
||||
* Get a log_item_desc to point at the new item.
|
||||
*/
|
||||
(void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
|
||||
|
||||
/*
|
||||
* Initialize b_fsprivate2 so we can find it with incore_match()
|
||||
* in xfs_trans_get_buf() and friends above.
|
||||
*/
|
||||
XFS_BUF_SET_FSPRIVATE2(bp, tp);
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
xfs_trans_bjoin(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
_xfs_trans_bjoin(tp, bp, 0);
|
||||
trace_xfs_trans_bjoin(bp->b_fspriv);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get and lock the buffer for the caller if it is not already
|
||||
@ -132,40 +191,8 @@ xfs_trans_get_buf(xfs_trans_t *tp,
|
||||
|
||||
ASSERT(!XFS_BUF_GETERROR(bp));
|
||||
|
||||
/*
|
||||
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
|
||||
* it doesn't have one yet, then allocate one and initialize it.
|
||||
* The checks to see if one is there are in xfs_buf_item_init().
|
||||
*/
|
||||
xfs_buf_item_init(bp, tp->t_mountp);
|
||||
|
||||
/*
|
||||
* Set the recursion count for the buffer within this transaction
|
||||
* to 0.
|
||||
*/
|
||||
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
|
||||
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
|
||||
bip->bli_recur = 0;
|
||||
|
||||
/*
|
||||
* Take a reference for this transaction on the buf item.
|
||||
*/
|
||||
atomic_inc(&bip->bli_refcount);
|
||||
|
||||
/*
|
||||
* Get a log_item_desc to point at the new item.
|
||||
*/
|
||||
(void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
|
||||
|
||||
/*
|
||||
* Initialize b_fsprivate2 so we can find it with incore_match()
|
||||
* above.
|
||||
*/
|
||||
XFS_BUF_SET_FSPRIVATE2(bp, tp);
|
||||
|
||||
trace_xfs_trans_get_buf(bip);
|
||||
_xfs_trans_bjoin(tp, bp, 1);
|
||||
trace_xfs_trans_get_buf(bp->b_fspriv);
|
||||
return (bp);
|
||||
}
|
||||
|
||||
@ -210,44 +237,11 @@ xfs_trans_getsb(xfs_trans_t *tp,
|
||||
}
|
||||
|
||||
bp = xfs_getsb(mp, flags);
|
||||
if (bp == NULL) {
|
||||
if (bp == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
|
||||
* it doesn't have one yet, then allocate one and initialize it.
|
||||
* The checks to see if one is there are in xfs_buf_item_init().
|
||||
*/
|
||||
xfs_buf_item_init(bp, mp);
|
||||
|
||||
/*
|
||||
* Set the recursion count for the buffer within this transaction
|
||||
* to 0.
|
||||
*/
|
||||
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
|
||||
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
|
||||
bip->bli_recur = 0;
|
||||
|
||||
/*
|
||||
* Take a reference for this transaction on the buf item.
|
||||
*/
|
||||
atomic_inc(&bip->bli_refcount);
|
||||
|
||||
/*
|
||||
* Get a log_item_desc to point at the new item.
|
||||
*/
|
||||
(void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
|
||||
|
||||
/*
|
||||
* Initialize b_fsprivate2 so we can find it with incore_match()
|
||||
* above.
|
||||
*/
|
||||
XFS_BUF_SET_FSPRIVATE2(bp, tp);
|
||||
|
||||
trace_xfs_trans_getsb(bip);
|
||||
_xfs_trans_bjoin(tp, bp, 1);
|
||||
trace_xfs_trans_getsb(bp->b_fspriv);
|
||||
return (bp);
|
||||
}
|
||||
|
||||
@ -425,40 +419,9 @@ xfs_trans_read_buf(
|
||||
if (XFS_FORCED_SHUTDOWN(mp))
|
||||
goto shutdown_abort;
|
||||
|
||||
/*
|
||||
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
|
||||
* it doesn't have one yet, then allocate one and initialize it.
|
||||
* The checks to see if one is there are in xfs_buf_item_init().
|
||||
*/
|
||||
xfs_buf_item_init(bp, tp->t_mountp);
|
||||
_xfs_trans_bjoin(tp, bp, 1);
|
||||
trace_xfs_trans_read_buf(bp->b_fspriv);
|
||||
|
||||
/*
|
||||
* Set the recursion count for the buffer within this transaction
|
||||
* to 0.
|
||||
*/
|
||||
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
|
||||
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
|
||||
bip->bli_recur = 0;
|
||||
|
||||
/*
|
||||
* Take a reference for this transaction on the buf item.
|
||||
*/
|
||||
atomic_inc(&bip->bli_refcount);
|
||||
|
||||
/*
|
||||
* Get a log_item_desc to point at the new item.
|
||||
*/
|
||||
(void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
|
||||
|
||||
/*
|
||||
* Initialize b_fsprivate2 so we can find it with incore_match()
|
||||
* above.
|
||||
*/
|
||||
XFS_BUF_SET_FSPRIVATE2(bp, tp);
|
||||
|
||||
trace_xfs_trans_read_buf(bip);
|
||||
*bpp = bp;
|
||||
return 0;
|
||||
|
||||
@ -622,53 +585,6 @@ xfs_trans_brelse(xfs_trans_t *tp,
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add the locked buffer to the transaction.
|
||||
* The buffer must be locked, and it cannot be associated with any
|
||||
* transaction.
|
||||
*
|
||||
* If the buffer does not yet have a buf log item associated with it,
|
||||
* then allocate one for it. Then add the buf item to the transaction.
|
||||
*/
|
||||
void
|
||||
xfs_trans_bjoin(xfs_trans_t *tp,
|
||||
xfs_buf_t *bp)
|
||||
{
|
||||
xfs_buf_log_item_t *bip;
|
||||
|
||||
ASSERT(XFS_BUF_ISBUSY(bp));
|
||||
ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
|
||||
|
||||
/*
|
||||
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
|
||||
* it doesn't have one yet, then allocate one and initialize it.
|
||||
* The checks to see if one is there are in xfs_buf_item_init().
|
||||
*/
|
||||
xfs_buf_item_init(bp, tp->t_mountp);
|
||||
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
|
||||
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
|
||||
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
|
||||
|
||||
/*
|
||||
* Take a reference for this transaction on the buf item.
|
||||
*/
|
||||
atomic_inc(&bip->bli_refcount);
|
||||
|
||||
/*
|
||||
* Get a log_item_desc to point at the new item.
|
||||
*/
|
||||
(void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
|
||||
|
||||
/*
|
||||
* Initialize b_fsprivate2 so we can find it with incore_match()
|
||||
* in xfs_trans_get_buf() and friends above.
|
||||
*/
|
||||
XFS_BUF_SET_FSPRIVATE2(bp, tp);
|
||||
|
||||
trace_xfs_trans_bjoin(bip);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the buffer as not needing to be unlocked when the buf item's
|
||||
* IOP_UNLOCK() routine is called. The buffer must already be locked
|
||||
|
@ -583,113 +583,6 @@ xfs_readlink(
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* xfs_fsync
|
||||
*
|
||||
* This is called to sync the inode and its data out to disk. We need to hold
|
||||
* the I/O lock while flushing the data, and the inode lock while flushing the
|
||||
* inode. The inode lock CANNOT be held while flushing the data, so acquire
|
||||
* after we're done with that.
|
||||
*/
|
||||
int
|
||||
xfs_fsync(
|
||||
xfs_inode_t *ip)
|
||||
{
|
||||
xfs_trans_t *tp;
|
||||
int error = 0;
|
||||
int log_flushed = 0;
|
||||
|
||||
xfs_itrace_entry(ip);
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
||||
return XFS_ERROR(EIO);
|
||||
|
||||
/*
|
||||
* We always need to make sure that the required inode state is safe on
|
||||
* disk. The inode might be clean but we still might need to force the
|
||||
* log because of committed transactions that haven't hit the disk yet.
|
||||
* Likewise, there could be unflushed non-transactional changes to the
|
||||
* inode core that have to go to disk and this requires us to issue
|
||||
* a synchronous transaction to capture these changes correctly.
|
||||
*
|
||||
* This code relies on the assumption that if the update_* fields
|
||||
* of the inode are clear and the inode is unpinned then it is clean
|
||||
* and no action is required.
|
||||
*/
|
||||
xfs_ilock(ip, XFS_ILOCK_SHARED);
|
||||
|
||||
if (!ip->i_update_core) {
|
||||
/*
|
||||
* Timestamps/size haven't changed since last inode flush or
|
||||
* inode transaction commit. That means either nothing got
|
||||
* written or a transaction committed which caught the updates.
|
||||
* If the latter happened and the transaction hasn't hit the
|
||||
* disk yet, the inode will be still be pinned. If it is,
|
||||
* force the log.
|
||||
*/
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
if (xfs_ipincount(ip)) {
|
||||
if (ip->i_itemp->ili_last_lsn) {
|
||||
error = _xfs_log_force_lsn(ip->i_mount,
|
||||
ip->i_itemp->ili_last_lsn,
|
||||
XFS_LOG_SYNC, &log_flushed);
|
||||
} else {
|
||||
error = _xfs_log_force(ip->i_mount,
|
||||
XFS_LOG_SYNC, &log_flushed);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Kick off a transaction to log the inode core to get the
|
||||
* updates. The sync transaction will also force the log.
|
||||
*/
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
|
||||
error = xfs_trans_reserve(tp, 0,
|
||||
XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
|
||||
if (error) {
|
||||
xfs_trans_cancel(tp, 0);
|
||||
return error;
|
||||
}
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
/*
|
||||
* Note - it's possible that we might have pushed ourselves out
|
||||
* of the way during trans_reserve which would flush the inode.
|
||||
* But there's no guarantee that the inode buffer has actually
|
||||
* gone out yet (it's delwri). Plus the buffer could be pinned
|
||||
* anyway if it's part of an inode in another recent
|
||||
* transaction. So we play it safe and fire off the
|
||||
* transaction anyway.
|
||||
*/
|
||||
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
|
||||
xfs_trans_ihold(tp, ip);
|
||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||
xfs_trans_set_sync(tp);
|
||||
error = _xfs_trans_commit(tp, 0, &log_flushed);
|
||||
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
|
||||
/*
|
||||
* If the log write didn't issue an ordered tag we need
|
||||
* to flush the disk cache for the data device now.
|
||||
*/
|
||||
if (!log_flushed)
|
||||
xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
|
||||
|
||||
/*
|
||||
* If this inode is on the RT dev we need to flush that
|
||||
* cache as well.
|
||||
*/
|
||||
if (XFS_IS_REALTIME_INODE(ip))
|
||||
xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Flags for xfs_free_eofblocks
|
||||
*/
|
||||
|
@ -21,7 +21,6 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
|
||||
#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */
|
||||
|
||||
int xfs_readlink(struct xfs_inode *ip, char *link);
|
||||
int xfs_fsync(struct xfs_inode *ip);
|
||||
int xfs_release(struct xfs_inode *ip);
|
||||
int xfs_inactive(struct xfs_inode *ip);
|
||||
int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
|
||||
@ -50,18 +49,6 @@ int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
|
||||
int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
|
||||
int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
|
||||
int flags, struct attrlist_cursor_kern *cursor);
|
||||
ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,
|
||||
const struct iovec *iovp, unsigned int segs,
|
||||
loff_t *offset, int ioflags);
|
||||
ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp,
|
||||
loff_t *ppos, struct pipe_inode_info *pipe, size_t count,
|
||||
int flags, int ioflags);
|
||||
ssize_t xfs_splice_write(struct xfs_inode *ip,
|
||||
struct pipe_inode_info *pipe, struct file *outfilp,
|
||||
loff_t *ppos, size_t count, int flags, int ioflags);
|
||||
ssize_t xfs_write(struct xfs_inode *xip, struct kiocb *iocb,
|
||||
const struct iovec *iovp, unsigned int nsegs,
|
||||
loff_t *offset, int ioflags);
|
||||
int xfs_bmap(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
|
||||
int flags, struct xfs_iomap *iomapp, int *niomaps);
|
||||
void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first,
|
||||
@ -72,4 +59,6 @@ int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first,
|
||||
xfs_off_t last, uint64_t flags, int fiopt);
|
||||
int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last);
|
||||
|
||||
int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
|
||||
|
||||
#endif /* _XFS_VNODEOPS_H */
|
||||
|
Loading…
x
Reference in New Issue
Block a user