mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-10 15:58:47 +00:00
fe8bc91c4c
We cannot rely on buffer dirty bits during fsync because pdflush can come before fsync is called and clear dirty bits without forcing a transaction commit. What we do is that we track which transaction has last changed the inode and which transaction last changed allocation and force it to disk on fsync. Signed-off-by: Jan Kara <jack@suse.cz> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
152 lines
4.5 KiB
C
152 lines
4.5 KiB
C
/*
|
|
* linux/include/linux/ext3_fs_i.h
|
|
*
|
|
* Copyright (C) 1992, 1993, 1994, 1995
|
|
* Remy Card (card@masi.ibp.fr)
|
|
* Laboratoire MASI - Institut Blaise Pascal
|
|
* Universite Pierre et Marie Curie (Paris VI)
|
|
*
|
|
* from
|
|
*
|
|
* linux/include/linux/minix_fs_i.h
|
|
*
|
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
*/
|
|
|
|
#ifndef _LINUX_EXT3_FS_I
|
|
#define _LINUX_EXT3_FS_I
|
|
|
|
#include <linux/rwsem.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/seqlock.h>
|
|
#include <linux/mutex.h>
|
|
|
|
/* data type for block offset of block group */
|
|
typedef int ext3_grpblk_t;
|
|
|
|
/* data type for filesystem-wide blocks number */
|
|
typedef unsigned long ext3_fsblk_t;
|
|
|
|
#define E3FSBLK "%lu"
|
|
|
|
struct ext3_reserve_window {
|
|
ext3_fsblk_t _rsv_start; /* First byte reserved */
|
|
ext3_fsblk_t _rsv_end; /* Last byte reserved or 0 */
|
|
};
|
|
|
|
struct ext3_reserve_window_node {
|
|
struct rb_node rsv_node;
|
|
__u32 rsv_goal_size;
|
|
__u32 rsv_alloc_hit;
|
|
struct ext3_reserve_window rsv_window;
|
|
};
|
|
|
|
struct ext3_block_alloc_info {
|
|
/* information about reservation window */
|
|
struct ext3_reserve_window_node rsv_window_node;
|
|
/*
|
|
* was i_next_alloc_block in ext3_inode_info
|
|
* is the logical (file-relative) number of the
|
|
* most-recently-allocated block in this file.
|
|
* We use this for detecting linearly ascending allocation requests.
|
|
*/
|
|
__u32 last_alloc_logical_block;
|
|
/*
|
|
* Was i_next_alloc_goal in ext3_inode_info
|
|
* is the *physical* companion to i_next_alloc_block.
|
|
* it the physical block number of the block which was most-recentl
|
|
* allocated to this file. This give us the goal (target) for the next
|
|
* allocation when we detect linearly ascending requests.
|
|
*/
|
|
ext3_fsblk_t last_alloc_physical_block;
|
|
};
|
|
|
|
#define rsv_start rsv_window._rsv_start
|
|
#define rsv_end rsv_window._rsv_end
|
|
|
|
/*
|
|
* third extended file system inode data in memory
|
|
*/
|
|
struct ext3_inode_info {
|
|
__le32 i_data[15]; /* unconverted */
|
|
__u32 i_flags;
|
|
#ifdef EXT3_FRAGMENTS
|
|
__u32 i_faddr;
|
|
__u8 i_frag_no;
|
|
__u8 i_frag_size;
|
|
#endif
|
|
ext3_fsblk_t i_file_acl;
|
|
__u32 i_dir_acl;
|
|
__u32 i_dtime;
|
|
|
|
/*
|
|
* i_block_group is the number of the block group which contains
|
|
* this file's inode. Constant across the lifetime of the inode,
|
|
* it is ued for making block allocation decisions - we try to
|
|
* place a file's data blocks near its inode block, and new inodes
|
|
* near to their parent directory's inode.
|
|
*/
|
|
__u32 i_block_group;
|
|
__u32 i_state; /* Dynamic state flags for ext3 */
|
|
|
|
/* block reservation info */
|
|
struct ext3_block_alloc_info *i_block_alloc_info;
|
|
|
|
__u32 i_dir_start_lookup;
|
|
#ifdef CONFIG_EXT3_FS_XATTR
|
|
/*
|
|
* Extended attributes can be read independently of the main file
|
|
* data. Taking i_mutex even when reading would cause contention
|
|
* between readers of EAs and writers of regular file data, so
|
|
* instead we synchronize on xattr_sem when reading or changing
|
|
* EAs.
|
|
*/
|
|
struct rw_semaphore xattr_sem;
|
|
#endif
|
|
|
|
struct list_head i_orphan; /* unlinked but open inodes */
|
|
|
|
/*
|
|
* i_disksize keeps track of what the inode size is ON DISK, not
|
|
* in memory. During truncate, i_size is set to the new size by
|
|
* the VFS prior to calling ext3_truncate(), but the filesystem won't
|
|
* set i_disksize to 0 until the truncate is actually under way.
|
|
*
|
|
* The intent is that i_disksize always represents the blocks which
|
|
* are used by this file. This allows recovery to restart truncate
|
|
* on orphans if we crash during truncate. We actually write i_disksize
|
|
* into the on-disk inode when writing inodes out, instead of i_size.
|
|
*
|
|
* The only time when i_disksize and i_size may be different is when
|
|
* a truncate is in progress. The only things which change i_disksize
|
|
* are ext3_get_block (growth) and ext3_truncate (shrinkth).
|
|
*/
|
|
loff_t i_disksize;
|
|
|
|
/* on-disk additional length */
|
|
__u16 i_extra_isize;
|
|
|
|
/*
|
|
* truncate_mutex is for serialising ext3_truncate() against
|
|
* ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's
|
|
* data tree are chopped off during truncate. We can't do that in
|
|
* ext3 because whenever we perform intermediate commits during
|
|
* truncate, the inode and all the metadata blocks *must* be in a
|
|
* consistent state which allows truncation of the orphans to restart
|
|
* during recovery. Hence we must fix the get_block-vs-truncate race
|
|
* by other means, so we have truncate_mutex.
|
|
*/
|
|
struct mutex truncate_mutex;
|
|
|
|
/*
|
|
* Transactions that contain inode's metadata needed to complete
|
|
* fsync and fdatasync, respectively.
|
|
*/
|
|
atomic_t i_sync_tid;
|
|
atomic_t i_datasync_tid;
|
|
|
|
struct inode vfs_inode;
|
|
};
|
|
|
|
#endif /* _LINUX_EXT3_FS_I */
|